ProcFS server, by Alen Stojanov and David van Moolenbroek

This commit is contained in:
David van Moolenbroek 2010-09-14 21:25:25 +00:00
parent 3eb65448a6
commit 2c5c5c06ea
17 changed files with 1629 additions and 2 deletions

View file

@ -429,6 +429,17 @@ service bios_wini
;
};
service procfs
{
system
VIRCOPY # 15
;
vm
INFO
;
uid 0;
};
service isofs
{
system

View file

@ -21,7 +21,8 @@ INCS+= minix/a.out.h minix/bitmap.h minix/callnr.h minix/cdrom.h \
minix/fslib.h minix/ioctl.h minix/ipc.h minix/ipcconst.h \
minix/keymap.h minix/minlib.h minix/mq.h \
minix/netdriver.h minix/partition.h minix/paths.h \
minix/portio.h minix/priv.h minix/profile.h minix/queryparam.h \
minix/portio.h minix/priv.h minix/procfs.h minix/profile.h \
minix/queryparam.h \
minix/rs.h minix/safecopies.h minix/sched.h minix/sef.h minix/sound.h \
minix/spin.h minix/sys_config.h minix/sysinfo.h minix/syslib.h \
minix/sysutil.h minix/timers.h minix/tty.h minix/type.h minix/types.h \

42
include/minix/procfs.h Normal file
View file

@ -0,0 +1,42 @@
#ifndef _MINIX_PROCFS_H
#define _MINIX_PROCFS_H
/* The compatibility model is as follows. The current format should be retained
* for as long as possible; new fields can be added at the end of the line,
* because ps/top only read as much as they know of from the start of the line.
* Once fields (really) have to be removed, or the whole line becomes too big
* of a mess, a completely new format string can be put in, but with an
* increased PSINFO_VERSION at the beginning. That way, older ps/top copies
* will not misinterpret the new fields, but rather fail cleanly.
*/
#define PSINFO_VERSION 0
/* Process types. */
#define TYPE_TASK 'T'
#define TYPE_SYSTEM 'S'
#define TYPE_USER 'U'
/* General process states. */
#define STATE_SLEEP 'S'
#define STATE_WAIT 'W'
#define STATE_ZOMBIE 'Z'
#define STATE_RUN 'R'
#define STATE_STOP 'T'
/* PM sleep states. */
#define PSTATE_NONE '-'
#define PSTATE_PAUSED 'P'
#define PSTATE_WAITING 'W'
#define PSTATE_SIGSUSP 'S'
/* VFS block states. */
#define FSTATE_NONE '-'
#define FSTATE_PIPE 'P'
#define FSTATE_LOCK 'L'
#define FSTATE_POPEN 'O'
#define FSTATE_SELECT 'S'
#define FSTATE_DOPEN 'D'
#define FSTATE_TASK 'T'
#define FSTATE_UNKNOWN '?'
#endif /* _MINIX_PROCFS_H */

View file

@ -3,7 +3,8 @@
.include <bsd.own.mk>
SUBDIR= ds ext2 hgfs inet init ipc is iso9660fs mfs pfs pm rs sched vfs vm
SUBDIR= ds ext2 hgfs inet init ipc is iso9660fs \
mfs pfs pm procfs rs sched vfs vm
IMAGE_SUBDIR= ds init mfs pfs pm rs sched vfs vm

14
servers/procfs/Makefile Normal file
View file

@ -0,0 +1,14 @@
# Makefile for ProcFS server
PROG= procfs
SRCS= buf.c main.c pid.c root.c tree.c util.c
CPPFLAGS+= -I${MINIXSRCDIR} -I${MINIXSRCDIR}/servers
DPADD+= ${LIBVTREEFS} ${LIBSYS}
LDADD+= -lvtreefs -lsys
MAN=
BINDIR?= /sbin
.include <bsd.prog.mk>

68
servers/procfs/NOTES Normal file
View file

@ -0,0 +1,68 @@
Development notes regarding ProcFS. Original document by David van Moolenbroek.
SECURITY MODEL
Right now, procfs is not able to deal with security-sensitive information,
because there would be too many opportunities for rogue processes to obtain
values they shouldn't be able to get to. This is mainly due to the fact that
while procfs is running, the environment around it may change arbitrarily: for
example, a /proc/<pid>/mem file could offer access to a process's core memory,
but if a rogue process opened that file right before the victim process invokes
an exec() on a setuid binary, the rogue process could read from the victim
process's memory while a victim user provides this process with their password.
This is only one example out of many; such time-to-check/time-to-use race
conditions are inherent to the inherently race-prone situation that procfs
finds itself in, trying to provide information about an asynchronously running
system.
A little more specifically, this problem mainly comes up when system calls are
made to obtain information (long) after a certain PID directory has been
updated, which typically happens right after pulling in a new copy of the
process tables of the kernel, PM, and VFS. Returning stale information from
those tables is usually not a problem: at worst, the caller gets outdated
information about the system as it once was, after passing a security check for
that point in time. Hence, it can not obtain information it never had access
to. Using information from those tables to perform calls later, however, is
a different case. In the "mem" example above, procfs would have the old user ID
in its copy of the process tables, and yet perform on-demand sys_datacopy calls
(or something similar) to retrieve memory from the process, bypassing a check
on the then-current user ID. A similar situation already exists right now for
the /proc/<pid>/map file for example, which pulls in information on demand -
but it provides only public information anyway, just like the other files that
procfs currently exposes.
A proper solution to this problem has simply not been implemented yet. It is
possible to change the system in such a way that procfs check whether the
target process is still in the same security state before returning information
to the caller process. This can be done either while or after obtaining the
information, depending on what is most convenient for the design of the system.
Any such solution obviously has an impact on system design and procfs'
performance, and was found not worth implementing for the first version of
procfs, since all offered information was public anyway. However, such a change
*must* be made before procfs can expose anything that provides a potential for
security breaches.
Finally, it must be kept in mind that even updating the process tables from
various other sources is not an atomic operation. There might be mismatches
between the tables. Procfs must be able to handle such occurrences with care,
from both a security perspective and a general functionality perspective.
FUTURE EXPANSIONS
It would be trivial to add a /proc/self symlink pointing to the caller's PID
directory, if the VFS-FS protocol's REQ_RDLINK request were augmented to
include the caller's PID or endpoint. However, this would be a procfs-specific
protocol change, and there does not seem to be a need for this just yet.
Even more custom protocol changes or procfs-specific backcalls would have to be
added to expose processes' current working directory, root directory,
executable path, or open files. A number of VFS parts would have to be changed
significantly to fully support all of these, possibly including an entire DNLC.
All the necessary infrastructure is there to add static (sub)directories - for
example, a /proc/net/ directory. It would be more tricky to add subdirectories
for dynamic (process) directories, for example /proc/<pid>/fd/. This would
require some changes to the VTreeFS side of the tree management. Some of the
current assumptions are documented in type.h.

128
servers/procfs/buf.c Normal file
View file

@ -0,0 +1,128 @@
/* ProcFS - buf.c - by Alen Stojanov and David van Moolenbroek */
#include "inc.h"
#include <stdarg.h>
#define BUF_SIZE 4096
PRIVATE char buf[BUF_SIZE + 1];
PRIVATE size_t off, left, used;
PRIVATE off_t skip;
/*===========================================================================*
* buf_init *
*===========================================================================*/
PUBLIC void buf_init(off_t start, size_t len)
{
/* Initialize the buffer for fresh use. The first 'start' bytes of the
* produced output are to be skipped. After that, up to a total of
* 'len' bytes are requested.
*/
skip = start;
left = MIN(len, BUF_SIZE);
off = 0;
used = 0;
}
/*===========================================================================*
* buf_printf *
*===========================================================================*/
PUBLIC void buf_printf(char *fmt, ...)
{
/* Add formatted text to the end of the buffer.
*/
va_list args;
ssize_t len, max;
if (left == 0)
return;
/* There is no way to estimate how much space the result will take, so
* we need to produce the string even when skipping part of the start.
* If part of the result is to be skipped, do not memcpy; instead, save
* the offset of where the result starts within the buffer.
*
* The null terminating character is not part of the result, so room
* must be given for it to be stored after completely filling up the
* requested part of the buffer.
*/
max = MIN(skip + left, BUF_SIZE);
va_start(args, fmt);
len = vsnprintf(&buf[off + used], max + 1, fmt, args);
va_end(args);
if (skip > 0) {
assert(off == 0);
assert(used == 0);
if (skip >= len) {
skip -= len;
return;
}
off = skip;
if (left > BUF_SIZE - off)
left = BUF_SIZE - off;
len -= off;
skip = 0;
}
assert(skip == 0);
assert(len >= 0);
assert((long) left >= 0);
if (len > (ssize_t) left)
len = left;
used += len;
left -= len;
}
/*===========================================================================*
* buf_append *
*===========================================================================*/
PUBLIC void buf_append(char *data, size_t len)
{
/* Add arbitrary data to the end of the buffer.
*/
if (left == 0)
return;
if (skip > 0) {
if (skip >= (ssize_t) len) {
skip -= len;
return;
}
data += skip;
len -= skip;
skip = 0;
}
if (len > left)
len = left;
memcpy(&buf[off + used], data, len);
used += len;
left -= len;
}
/*===========================================================================*
* buf_get *
*===========================================================================*/
PUBLIC size_t buf_get(char **ptr)
{
/* Return the buffer's starting address and the length of the used
* part, not counting the trailing null character for the latter.
*/
*ptr = &buf[off];
return used;
}

33
servers/procfs/const.h Normal file
View file

@ -0,0 +1,33 @@
#ifndef _PROCFS_CONST_H
#define _PROCFS_CONST_H
/* The minimum number of inodes depends on a number of factors:
* - Each statically created inode (e.g., /proc/hz) needs an inode. As of
* writing, this requires about a dozen inodes.
* - Deleted inodes that are still in use by VFS must be retained. For deleted
* directories, all their containing directories up to the root must be
* retained as well (to allow the user to "cd .." out). VTreeFS already takes
* care of this. In the case of ProcFS, only PID-based directories can be
* deleted; no other directories are dynamically created. These directories
* currently do not contain subdirectories, either. Hence, for deleted open
* inodes, we need to reserve at most NR_VNODES inodes in the worst case.
* - In order for getdents to be able to return all PID-based directories,
* inodes must not be recycled while generating the list of these PID-based
* directories. In the worst case, this means (NR_TASKS + NR_PROCS) extra
* inodes.
* The sum of these is the bare minimum for correct operation in all possible
* circumstances. In practice, not all open files will be deleted files in
* ProcFS, and not all process slots will be in use either, so the average use
* will be a lot less. However, setting the value too low allows for a
* potential denial-of-service attack by a non-root user.
*
* For the moment, we simply set this value to something reasonable.
*/
#define NR_INODES ((NR_TASKS + NR_PROCS) * 4)
/* Various file modes. */
#define REG_ALL_MODE (S_IFREG | 0444) /* world-readable regular */
#define DIR_ALL_MODE (S_IFDIR | 0555) /* world-accessible directory */
#define LNK_ALL_MODE (S_IFLNK | 0777) /* symbolic link */
#endif /* _PROCFS_CONST_H */

15
servers/procfs/glo.h Normal file
View file

@ -0,0 +1,15 @@
#ifndef _PROCFS_GLO_H
#define _PROCFS_GLO_H
/* pid.c */
extern struct file pid_files[];
/* root.c */
extern struct file root_files[];
/* tree.c */
extern struct proc proc[NR_PROCS + NR_TASKS]; /* process table from kernel */
extern struct mproc mproc[NR_PROCS]; /* process table from PM */
extern struct fproc fproc[NR_PROCS]; /* process table from VFS */
#endif /* _PROCFS_GLO_H */

64
servers/procfs/inc.h Normal file
View file

@ -0,0 +1,64 @@
#ifndef _PROCFS_INC_H
#define _PROCFS_INC_H
#define _POSIX_SOURCE 1
#define _MINIX 1
#define _SYSTEM 1
#include <minix/config.h>
#include <ansi.h>
#include <limits.h>
#include <errno.h>
#include <signal.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include <fcntl.h>
#include <lib.h>
#include <timers.h>
#include <a.out.h>
#include <dirent.h>
#include <minix/callnr.h>
#include <minix/type.h>
#include <minix/const.h>
#include <minix/com.h>
#include <minix/syslib.h>
#include <minix/sysutil.h>
#include <minix/keymap.h>
#include <minix/bitmap.h>
#include <minix/vfsif.h>
#include <minix/endpoint.h>
#include <minix/sysinfo.h>
#include <minix/u64.h>
#include <minix/sysinfo.h>
#include <minix/type.h>
#include <minix/ipc.h>
#include <sys/utsname.h>
#include <sys/time.h>
#include <sys/times.h>
#include <sys/types.h>
#include <sys/param.h>
#include <sys/stat.h>
#include <sys/ioctl.h>
#include <machine/archtypes.h>
#include "kernel/const.h"
#include "kernel/type.h"
#include "kernel/proc.h"
#include "pm/mproc.h"
#include "vfs/const.h"
#include "vfs/fproc.h"
#include <minix/vtreefs.h>
#include <minix/procfs.h>
#include "const.h"
#include "type.h"
#include "proto.h"
#include "glo.h"
#endif /* _PROCFS_INC_H */

90
servers/procfs/main.c Normal file
View file

@ -0,0 +1,90 @@
/* ProcFS - main.c - by Alen Stojanov and David van Moolenbroek */
#include "inc.h"
FORWARD _PROTOTYPE( void init_hook, (void) );
/* The hook functions that will be called by VTreeFS. */
PRIVATE struct fs_hooks hooks = {
init_hook,
NULL, /* cleanup_hook */
lookup_hook,
getdents_hook,
read_hook,
rdlink_hook,
NULL /* message_hook */
};
/*===========================================================================*
* construct_tree *
*===========================================================================*/
PRIVATE void construct_tree(struct inode *dir, struct file *files)
{
/* Construct a tree of static files from a null-terminated array of
* file structures, recursively creating directories which have their
* associated data point to child file structures.
*/
struct file *file;
struct inode *node;
struct inode_stat stat;
stat.uid = SUPER_USER;
stat.gid = SUPER_USER;
stat.size = 0;
stat.dev = NO_DEV;
for (file = files; file->name != NULL; file++) {
stat.mode = file->mode;
node = add_inode(dir, file->name, NO_INDEX, &stat, (index_t) 0,
(cbdata_t) file->data);
assert(node != NULL);
if (S_ISDIR(file->mode))
construct_tree(node, (struct file *) file->data);
}
}
/*===========================================================================*
* init_hook *
*===========================================================================*/
PRIVATE void init_hook(void)
{
/* Initialization hook. Generate the static part of the tree.
*/
struct inode *root;
root = get_root_inode();
construct_tree(root, root_files);
}
/*===========================================================================*
* main *
*===========================================================================*/
PUBLIC int main(int argc, char *argv[])
{
/* ProcFS entry point.
*/
struct inode_stat stat;
int r;
/* Initialize some state. If we are incompatible with the kernel, exit
* immediately.
*/
if ((r = init_tree()) != OK)
return r;
/* Properties of the root directory. */
stat.mode = DIR_ALL_MODE;
stat.uid = SUPER_USER;
stat.gid = SUPER_USER;
stat.size = 0;
stat.dev = NO_DEV;
/* Start VTreeFS. This call does not return. */
start_vtreefs(&hooks, NR_INODES, &stat, NR_PROCS + NR_TASKS);
return 0;
}

417
servers/procfs/pid.c Normal file
View file

@ -0,0 +1,417 @@
/* ProcFS - pid.c - by Alen Stojanov and David van Moolenbroek */
#include "inc.h"
#include <sys/mman.h>
#include <minix/vm.h>
#define S_FRAME_SIZE 4096 /* use malloc if larger than this */
PRIVATE char s_frame[S_FRAME_SIZE]; /* static storage for process frame */
PRIVATE char *frame; /* pointer to process frame buffer */
FORWARD _PROTOTYPE( void pid_psinfo, (int slot) );
FORWARD _PROTOTYPE( void pid_cmdline, (int slot) );
FORWARD _PROTOTYPE( void pid_environ, (int slot) );
FORWARD _PROTOTYPE( void pid_map, (int slot) );
/* The files that are dynamically created in each PID directory. The data field
* contains each file's read function. Subdirectories are not yet supported.
*/
PUBLIC struct file pid_files[] = {
{ "psinfo", REG_ALL_MODE, (data_t) pid_psinfo },
{ "cmdline", REG_ALL_MODE, (data_t) pid_cmdline },
{ "environ", REG_ALL_MODE, (data_t) pid_environ },
{ "map", REG_ALL_MODE, (data_t) pid_map },
{ NULL, 0, (data_t) NULL }
};
/*===========================================================================*
* is_zombie *
*===========================================================================*/
PRIVATE int is_zombie(int slot)
{
/* Is the given slot a zombie process?
*/
return (slot >= NR_TASKS &&
(mproc[slot - NR_TASKS].mp_flags & (TRACE_ZOMBIE | ZOMBIE)));
}
/*===========================================================================*
* pid_psinfo *
*===========================================================================*/
PRIVATE void pid_psinfo(int i)
{
/* Print information used by ps(1) and top(1).
*/
int pi, task, state, type, p_state, f_state;
char name[PROC_NAME_LEN+1], *p;
struct vm_usage_info vui;
pid_t ppid;
pi = i - NR_TASKS;
task = proc[i].p_nr < 0;
/* Get the name of the process. Spaces would mess up the format.. */
if (task || mproc[i].mp_name[0] == 0)
strncpy(name, proc[i].p_name, sizeof(name) - 1);
else
strncpy(name, mproc[pi].mp_name, sizeof(name) - 1);
name[sizeof(name) - 1] = 0;
if ((p = strchr(name, ' ')) != NULL)
p[0] = 0;
/* Get the type of the process. */
if (task)
type = TYPE_TASK;
else if (mproc[i].mp_flags & PRIV_PROC)
type = TYPE_SYSTEM;
else
type = TYPE_USER;
/* Get the state of the process. */
if (!task) {
if (is_zombie(i))
state = STATE_ZOMBIE; /* zombie */
else if (mproc[pi].mp_flags & STOPPED)
state = STATE_STOP; /* stopped (traced) */
else if (proc[i].p_rts_flags == 0)
state = STATE_RUN; /* in run-queue */
else if (fp_is_blocked(&fproc[pi]) ||
(mproc[pi].mp_flags & (WAITING | PAUSED | SIGSUSPENDED)))
state = STATE_SLEEP; /* sleeping */
else
state = STATE_WAIT; /* waiting */
} else {
if (proc[i].p_rts_flags == 0)
state = STATE_RUN; /* in run-queue */
else
state = STATE_WAIT; /* other i.e. waiting */
}
/* We assume that even if a process has become a zombie, its kernel
* proc entry still contains the old (but valid) information. Currently
* this is true, but in the future we may have to filter some fields.
*/
buf_printf("%d %c %d %s %c %d %d %lu %lu %lu %lu",
PSINFO_VERSION, /* information version */
type, /* process type */
(int) proc[i].p_endpoint, /* process endpoint */
name, /* process name */
state, /* process state letter */
(int) P_BLOCKEDON(&proc[i]), /* endpt blocked on, or NONE */
(int) proc[i].p_priority, /* process priority */
(long) proc[i].p_user_time, /* user time */
(long) proc[i].p_sys_time, /* system time */
ex64hi(proc[i].p_cycles), /* execution cycles */
ex64lo(proc[i].p_cycles)
);
/* If the process is not a kernel task, we add some extra info. */
if (!task) {
memset(&vui, 0, sizeof(vui));
if (!is_zombie(i)) {
/* We don't care if this fails. It may still return
* zero memory usage for processes that don't have a
* pagetable, though. Look at vui_total instead.
*/
(void) vm_info_usage(proc[i].p_endpoint, &vui);
if (vui.vui_total == 0L) {
vui.vui_total =
(proc[i].p_memmap[T].mem_len +
proc[i].p_memmap[D].mem_len) <<
CLICK_SHIFT;
}
}
if (mproc[pi].mp_flags & PAUSED)
p_state = PSTATE_PAUSED;
else if (mproc[pi].mp_flags & WAITING)
p_state = PSTATE_WAITING;
else if (mproc[pi].mp_flags & SIGSUSPENDED)
p_state = PSTATE_SIGSUSP;
else
p_state = '-';
if (mproc[pi].mp_parent == pi)
ppid = NO_PID;
else
ppid = mproc[mproc[pi].mp_parent].mp_pid;
switch (fproc[pi].fp_blocked_on) {
case FP_BLOCKED_ON_NONE: f_state = FSTATE_NONE; break;
case FP_BLOCKED_ON_PIPE: f_state = FSTATE_PIPE; break;
case FP_BLOCKED_ON_LOCK: f_state = FSTATE_LOCK; break;
case FP_BLOCKED_ON_POPEN: f_state = FSTATE_POPEN; break;
case FP_BLOCKED_ON_SELECT: f_state = FSTATE_SELECT; break;
case FP_BLOCKED_ON_DOPEN: f_state = FSTATE_DOPEN; break;
case FP_BLOCKED_ON_OTHER: f_state = FSTATE_TASK; break;
default: f_state = FSTATE_UNKNOWN;
}
buf_printf(" %lu %lu %lu %c %d %u %u %u %d %c %d %u",
vui.vui_total, /* total memory */
vui.vui_common, /* common memory */
vui.vui_shared, /* shared memory */
p_state, /* sleep state */
ppid, /* parent PID */
mproc[pi].mp_realuid, /* real UID */
mproc[pi].mp_effuid, /* effective UID */
mproc[pi].mp_procgrp, /* process group */
mproc[pi].mp_nice, /* nice value */
f_state, /* VFS block state */
(int) (fproc[pi].fp_blocked_on == FP_BLOCKED_ON_OTHER)
? fproc[pi].fp_task : NONE, /* block proc */
fproc[pi].fp_tty /* controlling tty */
);
}
/* Newline at the end of the file. */
buf_printf("\n");
}
/*===========================================================================*
* put_frame *
*===========================================================================*/
PRIVATE void put_frame(void)
{
/* If we allocated memory dynamically during a call to get_frame(),
* free it up here.
*/
if (frame != s_frame)
free(frame);
}
/*===========================================================================*
* get_frame *
*===========================================================================*/
PRIVATE int get_frame(int slot, vir_bytes *basep, vir_bytes *sizep,
size_t *nargsp)
{
/* Get the execution frame from the top of the given process's stack.
* It may be very large, in which case we temporarily allocate memory
* for it (up to a certain size).
*/
vir_bytes base, size;
size_t nargs;
if (proc[slot].p_nr < 0 || is_zombie(slot))
return FALSE;
/* Get the frame base address and size. Limit the size to whatever we
* can handle. If our static buffer is not sufficiently large to store
* the entire frame, allocate memory dynamically. It is then later
* freed by put_frame().
*/
base = mproc[slot - NR_TASKS].mp_frame_addr;
size = mproc[slot - NR_TASKS].mp_frame_len;
if (size < sizeof(size_t)) return FALSE;
if (size > ARG_MAX) size = ARG_MAX;
if (size > sizeof(s_frame)) {
frame = malloc(size);
if (frame == NULL)
return FALSE;
}
else frame = s_frame;
/* Copy in the complete process frame. */
if (sys_datacopy(proc[slot].p_endpoint, base,
SELF, (vir_bytes) frame, (phys_bytes) size) != OK) {
put_frame();
return FALSE;
}
frame[size] = 0; /* terminate any last string */
nargs = * (size_t *) frame;
if (nargs < 1 || sizeof(size_t) + sizeof(char *) * (nargs + 1) > size) {
put_frame();
return FALSE;
}
*basep = base;
*sizep = size;
*nargsp = nargs;
/* The caller now has to called put_frame() to clean up. */
return TRUE;
}
/*===========================================================================*
* pid_cmdline *
*===========================================================================*/
PRIVATE void pid_cmdline(int slot)
{
/* Dump the process's command line as it is contained in the process
* itself. Each argument is terminated with a null character.
*/
vir_bytes base, size, ptr;
size_t i, len, nargs;
char **argv;
if (!get_frame(slot, &base, &size, &nargs))
return;
argv = (char **) &frame[sizeof(size_t)];
for (i = 0; i < nargs; i++) {
ptr = (vir_bytes) argv[i] - base;
/* Check for bad pointers. */
if ((long) ptr < 0L || ptr >= size)
break;
len = strlen(&frame[ptr]) + 1;
buf_append(&frame[ptr], len);
}
put_frame();
}
/*===========================================================================*
* pid_environ *
*===========================================================================*/
PRIVATE void pid_environ(int slot)
{
/* Dump the process's initial environment as it is contained in the
* process itself. Each entry is terminated with a null character.
*/
vir_bytes base, size, ptr;
size_t nargs, off, len;
char **envp;
if (!get_frame(slot, &base, &size, &nargs))
return;
off = sizeof(size_t) + sizeof(char *) * (nargs + 1);
envp = (char **) &frame[off];
for (;;) {
/* Make sure there is no buffer overrun. */
if (off + sizeof(char *) > size)
break;
ptr = (vir_bytes) *envp;
/* Stop at the terminating NULL pointer. */
if (ptr == 0L)
break;
ptr -= base;
/* Check for bad pointers. */
if ((long) ptr < 0L || ptr >= size)
break;
len = strlen(&frame[ptr]) + 1;
buf_append(&frame[ptr], len);
off += sizeof(char *);
envp++;
}
put_frame();
}
/*===========================================================================*
* dump_regions *
*===========================================================================*/
PRIVATE int dump_regions(int slot)
{
/* Print the virtual memory regions of a process.
*/
struct vm_region_info vri[MAX_VRI_COUNT];
vir_bytes next;
int i, r, seg, count;
count = 0;
next = 0;
do {
r = vm_info_region(proc[slot].p_endpoint, vri, MAX_VRI_COUNT,
&next);
if (r < 0)
return r;
if (r == 0)
break;
for (i = 0; i < r; i++) {
switch (vri[i].vri_seg) {
case T: seg = 'T'; break;
case D: seg = 'D'; break;
default: seg = '?'; break;
}
buf_printf("%c %08lx-%08lx %c%c%c %c\n",
seg, vri[i].vri_addr,
vri[i].vri_addr + vri[i].vri_length,
(vri[i].vri_prot & PROT_READ) ? 'r' : '-',
(vri[i].vri_prot & PROT_WRITE) ? 'w' : '-',
(vri[i].vri_prot & PROT_EXEC) ? 'x' : '-',
(vri[i].vri_flags & MAP_SHARED) ? 's' : 'p');
count++;
}
} while (r == MAX_VRI_COUNT);
return count;
}
/*===========================================================================*
* dump_segments *
*===========================================================================*/
PRIVATE void dump_segments(int slot)
{
/* Print the memory segments of a process.
*/
int i;
for (i = 0; i < NR_LOCAL_SEGS; i++) {
buf_printf("%c %08lx-%08lx %s -\n",
i == T ? 'T' : 'D',
proc[slot].p_memmap[i].mem_vir << CLICK_SHIFT,
(proc[slot].p_memmap[i].mem_vir +
proc[slot].p_memmap[i].mem_len) << CLICK_SHIFT,
(i == T) ? "r-x" :
(proc[slot].p_memmap[T].mem_len == 0) ? "rwx" : "rw-");
}
}
/*===========================================================================*
* pid_map *
*===========================================================================*/
PRIVATE void pid_map(int slot)
{
/* Print a memory map of the process. Obtain the information from VM if
* possible; otherwise fall back on segments from the kernel.
*/
/* Zombies have no memory. */
if (is_zombie(slot))
return;
/* Kernel tasks also have no memory. */
if (proc[slot].p_nr >= 0) {
if (dump_regions(slot) != 0)
return;
}
/* For kernel tasks, or for processes that have no regions according to
* VM, we assume they are not using virtual memory, and we print their
* segments instead.
*/
dump_segments(slot);
}

23
servers/procfs/proto.h Normal file
View file

@ -0,0 +1,23 @@
#ifndef _PROCFS_PROTO_H
#define _PROCFS_PROTO_H
/* buf.c */
_PROTOTYPE( void buf_init, (off_t start, size_t len) );
_PROTOTYPE( void buf_printf, (char *fmt, ...) );
_PROTOTYPE( void buf_append, (char *data, size_t len) );
_PROTOTYPE( size_t buf_get, (char **ptr) );
/* tree.c */
_PROTOTYPE( int init_tree, (void) );
_PROTOTYPE( int lookup_hook, (struct inode *parent, char *name,
cbdata_t cbdata) );
_PROTOTYPE( int getdents_hook, (struct inode *inode, cbdata_t cbdata) );
_PROTOTYPE( int read_hook, (struct inode *inode, off_t offset,
char **ptr, size_t *len, cbdata_t cbdata) );
_PROTOTYPE( int rdlink_hook, (struct inode *inode, char *ptr,
size_t max, cbdata_t cbdata) );
/* util.c */
_PROTOTYPE( int procfs_getloadavg, (double *loadavg, int nelem) );
#endif /* _PROCFS_PROTO_H */

131
servers/procfs/root.c Normal file
View file

@ -0,0 +1,131 @@
/* ProcFS - root.c - by Alen Stojanov and David van Moolenbroek */
#include "inc.h"
#include <machine/pci.h>
FORWARD _PROTOTYPE( void root_hz, (void) );
FORWARD _PROTOTYPE( void root_uptime, (void) );
FORWARD _PROTOTYPE( void root_loadavg, (void) );
FORWARD _PROTOTYPE( void root_kinfo, (void) );
FORWARD _PROTOTYPE( void root_meminfo, (void) );
FORWARD _PROTOTYPE( void root_pci, (void) );
struct file root_files[] = {
{ "hz", REG_ALL_MODE, (data_t) root_hz },
{ "uptime", REG_ALL_MODE, (data_t) root_uptime },
{ "loadavg", REG_ALL_MODE, (data_t) root_loadavg },
{ "kinfo", REG_ALL_MODE, (data_t) root_kinfo },
{ "meminfo", REG_ALL_MODE, (data_t) root_meminfo },
{ "pci", REG_ALL_MODE, (data_t) root_pci },
{ NULL, 0, NULL }
};
/*===========================================================================*
* root_hz *
*===========================================================================*/
PRIVATE void root_hz(void)
{
/* Print the system clock frequency.
*/
buf_printf("%lu\n", (long) sys_hz());
}
/*===========================================================================*
* root_loadavg *
*===========================================================================*/
PRIVATE void root_loadavg(void)
{
/* Print load averages.
*/
double avg[3];
if (procfs_getloadavg(avg, 3) != 3)
return;
buf_printf("%.2lf %.2lf %.2lf\n", avg[0], avg[1], avg[2]);
}
/*===========================================================================*
* root_uptime *
*===========================================================================*/
PRIVATE void root_uptime(void)
{
/* Print the current uptime.
*/
clock_t ticks;
if (getuptime(&ticks) != OK)
return;
buf_printf("%.2lf\n", (double) ticks / (double) sys_hz());
}
/*===========================================================================*
* root_kinfo *
*===========================================================================*/
PRIVATE void root_kinfo(void)
{
/* Print general kernel information.
*/
struct kinfo kinfo;
if (sys_getkinfo(&kinfo) != OK)
return;
buf_printf("%u %u\n", kinfo.nr_procs, kinfo.nr_tasks);
}
/*===========================================================================*
* root_meminfo *
*===========================================================================*/
PRIVATE void root_meminfo(void)
{
/* Print general memory information.
*/
struct vm_stats_info vsi;
if (vm_info_stats(&vsi) != OK)
return;
buf_printf("%u %lu %lu %lu %lu\n", vsi.vsi_pagesize,
vsi.vsi_total, vsi.vsi_free, vsi.vsi_largest, vsi.vsi_cached);
}
/*===========================================================================*
* root_pci *
*===========================================================================*/
PRIVATE void root_pci(void)
{
/* Print information about PCI devices present in the system.
*/
u16_t vid, did;
u8_t bcr, scr, pifr;
char *slot_name, *dev_name;
int r, devind;
static int first = TRUE;
/* This should be taken care of behind the scenes by the PCI lib. */
if (first) {
pci_init();
first = FALSE;
}
/* Iterate over all devices, printing info for each of them. */
r = pci_first_dev(&devind, &vid, &did);
while (r == 1) {
slot_name = pci_slot_name(devind);
dev_name = pci_dev_name(vid, did);
bcr = pci_attr_r8(devind, PCI_BCR);
scr = pci_attr_r8(devind, PCI_SCR);
pifr = pci_attr_r8(devind, PCI_PIFR);
buf_printf("%s %x/%x/%x %04X:%04X %s\n",
slot_name ? slot_name : "-",
bcr, scr, pifr, vid, did,
dev_name ? dev_name : "");
r = pci_next_dev(&devind, &vid, &did);
}
}

456
servers/procfs/tree.c Normal file
View file

@ -0,0 +1,456 @@
/* ProcFS - tree.c - by Alen Stojanov and David van Moolenbroek */
#include "inc.h"
PUBLIC struct proc proc[NR_PROCS + NR_TASKS];
PUBLIC struct mproc mproc[NR_PROCS];
PUBLIC struct fproc fproc[NR_PROCS];
PRIVATE int nr_pid_entries;
/*===========================================================================*
* slot_in_use *
*===========================================================================*/
PRIVATE int slot_in_use(int slot)
{
/* Return whether the given slot is in use by a process.
*/
return (proc[slot].p_rts_flags != RTS_SLOT_FREE ||
(slot >= NR_TASKS &&
(mproc[slot - NR_TASKS].mp_flags & IN_USE)));
}
/*===========================================================================*
* check_owner *
*===========================================================================*/
PRIVATE int check_owner(struct inode *node, int slot)
{
/* Check if the owner user and group ID of the inode are still in sync
* the current effective user and group ID of the given process.
*/
struct inode_stat stat;
if (slot < NR_TASKS) return TRUE;
get_inode_stat(node, &stat);
return (stat.uid == mproc[slot - NR_TASKS].mp_effuid &&
stat.gid == mproc[slot - NR_TASKS].mp_effgid);
}
/*===========================================================================*
* make_stat *
*===========================================================================*/
PRIVATE void make_stat(struct inode_stat *stat, int slot, int index)
{
/* Fill in an inode_stat structure for the given process slot and
* per-pid file index (or NO_INDEX for the process subdirectory root).
*/
if (index == NO_INDEX)
stat->mode = DIR_ALL_MODE;
else
stat->mode = pid_files[index].mode;
if (slot < NR_TASKS) {
stat->uid = SUPER_USER;
stat->gid = SUPER_USER;
} else {
stat->uid = mproc[slot - NR_TASKS].mp_effuid;
stat->gid = mproc[slot - NR_TASKS].mp_effgid;
}
stat->size = 0;
stat->dev = NO_DEV;
}
/*===========================================================================*
* dir_is_pid *
*===========================================================================*/
PRIVATE int dir_is_pid(struct inode *node)
{
/* Return whether the given node is a PID directory.
*/
return (get_parent_inode(node) == get_root_inode() &&
get_inode_index(node) != NO_INDEX);
}
/*===========================================================================*
* update_tables *
*===========================================================================*/
PRIVATE int update_tables(void)
{
/* Get the process tables from the kernel, PM, and VFS.
* Check the magic number in the kernel table entries.
*/
int r, slot;
if ((r = sys_getproctab(proc)) != OK) return r;
for (slot = 0; slot < NR_PROCS + NR_TASKS; slot++) {
if (proc[slot].p_magic != PMAGIC) {
printf("PROCFS: system version mismatch!\n");
return EINVAL;
}
}
if ((r = getsysinfo(PM_PROC_NR, SI_PROC_TAB, mproc)) != OK) return r;
if ((r = getsysinfo(VFS_PROC_NR, SI_PROC_TAB, fproc)) != OK) return r;
return OK;
}
/*===========================================================================*
* init_tree *
*===========================================================================*/
PUBLIC int init_tree(void)
{
/* Initialize this module, before VTreeFS is started. As part of the
* process, check if we're not compiled against a kernel different from
* the one that is running at the moment.
*/
int i, r;
if ((r = update_tables()) != OK)
return r;
/* Get the maximum number of entries that we may add to each PID's
* directory. We could just default to a large value, but why not get
* it right?
*/
for (i = 0; pid_files[i].name != NULL; i++);
nr_pid_entries = i;
return OK;
}
/*===========================================================================*
* out_of_inodes *
*===========================================================================*/
PRIVATE void out_of_inodes(void)
{
/* Out of inodes - the NR_INODES value is set too low. We can not do
* much, but we might be able to continue with degraded functionality,
* so do not panic. If the NR_INODES value is not below the *crucial*
* minimum, the symptom of this case will be an incomplete listing of
* the main proc directory.
*/
static int warned = FALSE;
if (warned == FALSE) {
printf("PROCFS: out of inodes!\n");
warned = TRUE;
}
}
/*===========================================================================*
* construct_pid_dirs *
*===========================================================================*/
PRIVATE void construct_pid_dirs(void)
{
/* Regenerate the set of PID directories in the root directory of the
* file system. Add new directories and delete old directories as
* appropriate; leave unchanged those that should remain the same.
*/
struct inode *root, *node;
struct inode_stat stat;
char name[PNAME_MAX+1];
pid_t pid;
int i;
root = get_root_inode();
for (i = 0; i < NR_PROCS + NR_TASKS; i++) {
/* Do we already have an inode associated with this slot? */
node = get_inode_by_index(root, i);
/* If the process slot is not in use, delete the associated
* inode if there was one, and skip this slot entirely.
*/
if (!slot_in_use(i)) {
if (node != NULL)
delete_inode(node);
continue;
}
/* Get the process ID. */
if (i < NR_TASKS)
pid = (pid_t) (i - NR_TASKS);
else
pid = mproc[i - NR_TASKS].mp_pid;
/* If there is an old entry, see if the pid matches the current
* entry, and the owner is still the same. Otherwise, delete
* the old entry first. We reconstruct the entire subtree even
* if only the owner changed, for security reasons: if a
* process could keep open a file or directory across the owner
* change, it might be able to access information it shouldn't.
*/
if (node != NULL) {
if (pid == (pid_t) get_inode_cbdata(node) &&
check_owner(node, i))
continue;
delete_inode(node);
}
/* Add the entry for the process slot. */
sprintf(name, "%d", pid);
make_stat(&stat, i, NO_INDEX);
node = add_inode(root, name, i, &stat, nr_pid_entries,
(cbdata_t) pid);
if (node == NULL)
out_of_inodes();
}
}
/*===========================================================================*
* make_one_pid_entry *
*===========================================================================*/
PRIVATE void make_one_pid_entry(struct inode *parent, char *name, int slot)
{
/* Construct one file in a PID directory, if a file with the given name
* should exist at all.
*/
struct inode *node;
struct inode_stat stat;
int i;
/* Don't readd if it is already there. */
node = get_inode_by_name(parent, name);
if (node != NULL)
return;
/* Only add the file if it is a known, registered name. */
for (i = 0; pid_files[i].name != NULL; i++) {
if (!strcmp(name, pid_files[i].name)) {
make_stat(&stat, slot, i);
node = add_inode(parent, name, i, &stat,
(index_t) 0, (cbdata_t) 0);
if (node == NULL)
out_of_inodes();
break;
}
}
}
/*===========================================================================*
* make_all_pid_entries *
*===========================================================================*/
PRIVATE void make_all_pid_entries(struct inode *parent, int slot)
{
/* Construct all files in a PID directory.
*/
struct inode *node;
struct inode_stat stat;
int i;
for (i = 0; pid_files[i].name != NULL; i++) {
node = get_inode_by_index(parent, i);
if (node != NULL)
continue;
make_stat(&stat, slot, i);
node = add_inode(parent, pid_files[i].name, i, &stat,
(index_t) 0, (cbdata_t) 0);
if (node == NULL)
out_of_inodes();
}
}
/*===========================================================================*
* construct_pid_entries *
*===========================================================================*/
PRIVATE void construct_pid_entries(struct inode *parent, char *name)
{
/* Construct one requested file entry, or all file entries, in a PID
* directory.
*/
int slot;
slot = get_inode_index(parent);
assert(slot >= 0 && slot < NR_TASKS + NR_PROCS);
/* If this process is already gone, delete the directory now. */
if (!slot_in_use(slot)) {
delete_inode(parent);
return;
}
/* If a specific file name is being looked up, see if we have to add
* an inode for that file. If the directory contents are being
* retrieved, add all files that have not yet been added.
*/
if (name != NULL)
make_one_pid_entry(parent, name, slot);
else
make_all_pid_entries(parent, slot);
}
/*===========================================================================*
* pid_read *
*===========================================================================*/
PRIVATE void pid_read(struct inode *node)
{
/* Data is requested from one of the files in a PID directory. Call the
* function that is responsible for generating the data for that file.
*/
struct inode *parent;
int slot, index;
/* Get the slot number of the process. Note that this currently will
* not work for files not in the top-level pid subdirectory.
*/
parent = get_parent_inode(node);
slot = get_inode_index(parent);
/* Get this file's index number. */
index = get_inode_index(node);
/* Call the handler procedure for the file. */
((_PROTOTYPE(void (*), (int))) pid_files[index].data)(slot);
}
/*===========================================================================*
* pid_link *
*===========================================================================*/
PRIVATE int pid_link(struct inode *node, char *ptr, int max)
{
/* The contents of a symbolic link in a PID directory are requested.
* This function is a placeholder for future use.
*/
/* Nothing yet. */
strcpy(ptr, "");
return OK;
}
/*===========================================================================*
* lookup_hook *
*===========================================================================*/
PUBLIC int lookup_hook(struct inode *parent, char *name, cbdata_t cbdata)
{
/* Path name resolution hook, for a specific parent and name pair.
* If needed, update our own view of the system first; after that,
* determine whether we need to (re)generate certain files.
*/
static clock_t last_update = 0;
clock_t now;
int r;
/* Update lazily for lookups, as this gets too expensive otherwise.
* Alternative: pull in only PM's table?
*/
if ((r = getuptime(&now)) != OK)
panic(__FILE__, "unable to get uptime", r);
if (last_update != now) {
update_tables();
last_update = now;
}
/* If the parent is the root directory, we must now reconstruct all
* entries, because some of them might have been garbage collected.
* We must update the entire tree at once; if we update individual
* entries, we risk name collisions.
*/
if (parent == get_root_inode()) {
construct_pid_dirs();
}
/* If the parent is a process directory, we may need to (re)construct
* the entry being looked up.
*/
else if (dir_is_pid(parent)) {
/* We might now have deleted our current containing directory;
* construct_pid_entries() will take care of this case.
*/
construct_pid_entries(parent, name);
}
return OK;
}
/*===========================================================================*
* getdents_hook *
*===========================================================================*/
PUBLIC int getdents_hook(struct inode *node, cbdata_t cbdata)
{
/* Directory entry retrieval hook, for potentially all files in a
* directory. Make sure that all files that are supposed to be
* returned, are actually part of the virtual tree.
*/
if (node == get_root_inode()) {
update_tables();
construct_pid_dirs();
} else if (dir_is_pid(node)) {
construct_pid_entries(node, NULL /*name*/);
}
return OK;
}
/*===========================================================================*
* read_hook *
*===========================================================================*/
PUBLIC int read_hook(struct inode *node, off_t off, char **ptr,
size_t *len, cbdata_t cbdata)
{
/* Regular file read hook. Call the appropriate callback function to
* generate and return the data.
*/
buf_init(off, *len);
/* Populate the buffer with the proper content. */
if (get_inode_index(node) != NO_INDEX) {
pid_read(node);
} else {
((_PROTOTYPE(void (*), (void))) cbdata)();
}
*len = buf_get(ptr);
return OK;
}
/*===========================================================================*
* rdlink_hook *
*===========================================================================*/
PUBLIC int rdlink_hook(struct inode *node, char *ptr, size_t max,
cbdata_t cbdata)
{
/* Symbolic link resolution hook. Not used yet.
*/
struct inode *parent;
/* Get the parent inode. */
parent = get_parent_inode(node);
/* If the parent inode is a pid directory, call the pid handler.
*/
if (parent != NULL && dir_is_pid(parent))
pid_link(node, ptr, max);
return OK;
}

70
servers/procfs/type.h Normal file
View file

@ -0,0 +1,70 @@
#ifndef _PROCFS_TYPE_H
#define _PROCFS_TYPE_H
typedef void *data_t; /* abstract data type; can hold pointer */
/* ProcFS supports two groups of files: dynamic files, which are created within
* process-specific (PID) directories, and static files, which are global. For
* both, the following structure is used to construct the files.
*
* For dynamic files, the rules are simple: only regular files are supported
* (although partial support for symbolic links is already present), and the
* 'data' field must be filled with a pointer to a function of the type:
*
* void (*)(int slot)
*
* The function will be called whenever a read request for the file is made;
* 'slot' contains the kernel slot number of the process being queried (so for
* the PM and VFS process tables, NR_TASKS has to be subtracted from the slot
* number to find the right slot). The function is expected to produce
* appropriate output using the buf_printf() function.
*
* For static files, regular files and directories are supported. For
* directories, the 'data' field must be a pointer to another 'struct file'
* array that specifies the contents of the directory - this directory will
* the be created recursively. For regular files, the 'data' field must point
* to a function of the type:
*
* void (*)(void)
*
* Here too, the function will be called upon a read request, and it is
* supposed to "fill" the file using buf_printf(). Obviously, for static files,
* there is no slot number.
*
* For both static and dynamic files, 'mode' must specify the file type as well
* as the access mode, and in both cases, each array is terminated with an
* entry that has its name set to NULL.
*/
/* The internal link between static/dynamic files/directories and VTreeFS'
* indexes and cbdata values is as follows:
* - Dynamic directories are always PID directories in the root directory.
* They are generated automatically, and are not specified using a "struct
* file" structure. Their index is their slot number, so that getdents()
* calls always return any PID at most once. Their cbdata value is the PID of
* the process associated with that dynamic directory, for the purpose of
* comparing old and new PIDs after updating process tables (without having
* to atoi() the directory's name).
* - Dynamic files are always in such a dynamic directory. Their index is the
* array index into the "struct file" array of pid files (pid_files[]). They
* are indexed at all, because they may be deleted at any time due to inode
* shortages, independently of other dynamic files in the same directory, and
* recreating them without index would again risk possibly inconsistent
* getdents() results, where for example the same file shows up twice.
* VTreeFS currently does not distinguish between indexed and delatable files
* and hence, all dynamic files must be indexed so as to be deletable anyway.
* - Static directories have no index (they are not and must not be deletable),
* and although their cbdata is their associated 'data' field from their
* "struct file" entries, their cbdata value is currently not relied on
* anywhere. Then again, as of writing, there are no static directories at
* all.
* - Static files have no index either (for the same reason). Their cbdata is
* also their 'data' field from the "struct file" entry creating the file,
* and this is used to actually call the callback function directly.
*/
struct file {
char *name; /* file name, maximum length PNAME_MAX */
mode_t mode; /* file mode, including file type */
data_t data; /* custom data associated with this file */
};
#endif /* _PROCFS_TYPE_H */

63
servers/procfs/util.c Normal file
View file

@ -0,0 +1,63 @@
/* ProcFS - util.c - by Alen Stojanov and David van Moolenbroek */
#include "inc.h"
/*===========================================================================*
* procfs_getloadavg *
*===========================================================================*/
PUBLIC int procfs_getloadavg(double *loadavg, int nelem)
{
/* Retrieve system load average information.
*/
struct loadinfo loadinfo;
u32_t system_hz, ticks_per_slot;
int p, unfilled_ticks;
int minutes[3] = { 1, 5, 15 };
ssize_t l;
if(nelem < 1) {
errno = ENOSPC;
return -1;
}
system_hz = sys_hz();
if((l=sys_getloadinfo(&loadinfo)) != OK)
return -1;
if(nelem > 3)
nelem = 3;
/* How many ticks are missing from the newest-filled slot? */
ticks_per_slot = _LOAD_UNIT_SECS * system_hz;
unfilled_ticks =
ticks_per_slot - (loadinfo.last_clock % ticks_per_slot);
for(p = 0; p < nelem; p++) {
int h, slots;
double l = 0.0;
int latest = loadinfo.proc_last_slot;
slots = minutes[p] * 60 / _LOAD_UNIT_SECS;
/* Add up the total number of process ticks for this number
* of minutes (minutes[p]). Start with the newest slot, which
* is latest, and count back for the number of slots that
* correspond to the right number of minutes. Take wraparound
* into account by calculating the index modulo _LOAD_HISTORY,
* which is the number of slots of history kept.
*/
for(h = 0; h < slots; h++) {
int slot;
slot = (latest - h + _LOAD_HISTORY) % _LOAD_HISTORY;
l += (double) loadinfo.proc_load_history[slot];
}
/* The load average over this number of minutes is the number
* of process-ticks divided by the number of ticks, not
* counting the number of ticks the last slot hasn't been
* around yet.
*/
loadavg[p] = l / (slots * ticks_per_slot - unfilled_ticks);
}
return nelem;
}