minix/servers/vfs/main.c
Ben Gras 50e2064049 No more intel/minix segments.
This commit removes all traces of Minix segments (the text/data/stack
memory map abstraction in the kernel) and significance of Intel segments
(hardware segments like CS, DS that add offsets to all addressing before
page table translation). This ultimately simplifies the memory layout
and addressing and makes the same layout possible on non-Intel
architectures.

There are only two types of addresses in the world now: virtual
and physical; even the kernel and processes have the same virtual
address space. Kernel and user processes can be distinguished at a
glance as processes won't use 0xF0000000 and above.

No static pre-allocated memory sizes exist any more.

Changes to booting:
        . The pre_init.c leaves the kernel and modules exactly as
          they were left by the bootloader in physical memory
        . The kernel starts running using physical addressing,
          loaded at a fixed location given in its linker script by the
          bootloader.  All code and data in this phase are linked to
          this fixed low location.
        . It makes a bootstrap pagetable to map itself to a
          fixed high location (also in linker script) and jumps to
          the high address. All code and data then use this high addressing.
        . All code/data symbols linked at the low addresses is prefixed by
          an objcopy step with __k_unpaged_*, so that that code cannot
          reference highly-linked symbols (which aren't valid yet) or vice
          versa (symbols that aren't valid any more).
        . The two addressing modes are separated in the linker script by
          collecting the unpaged_*.o objects and linking them with low
          addresses, and linking the rest high. Some objects are linked
          twice, once low and once high.
        . The bootstrap phase passes a lot of information (e.g. free memory
          list, physical location of the modules, etc.) using the kinfo
          struct.
        . After this bootstrap the low-linked part is freed.
        . The kernel maps in VM into the bootstrap page table so that VM can
          begin executing. Its first job is to make page tables for all other
          boot processes. So VM runs before RS, and RS gets a fully dynamic,
          VM-managed address space. VM gets its privilege info from RS as usual
          but that happens after RS starts running.
        . Both the kernel loading VM and VM organizing boot processes happen
	  using the libexec logic. This removes the last reason for VM to
	  still know much about exec() and vm/exec.c is gone.

Further Implementation:
        . All segments are based at 0 and have a 4 GB limit.
        . The kernel is mapped in at the top of the virtual address
          space so as not to constrain the user processes.
        . Processes do not use segments from the LDT at all; there are
          no segments in the LDT any more, so no LLDT is needed.
        . The Minix segments T/D/S are gone and so none of the
          user-space or in-kernel copy functions use them. The copy
          functions use a process endpoint of NONE to realize it's
          a physical address, virtual otherwise.
        . The umap call only makes sense to translate a virtual address
          to a physical address now.
        . Segments-related calls like newmap and alloc_segments are gone.
        . All segments-related translation in VM is gone (vir2map etc).
        . Initialization in VM is simpler as no moving around is necessary.
        . VM and all other boot processes can be linked wherever they wish
          and will be mapped in at the right location by the kernel and VM
          respectively.

Other changes:
        . The multiboot code is less special: it does not use mb_print
          for its diagnostics any more but uses printf() as normal, saving
          the output into the diagnostics buffer, only printing to the
          screen using the direct print functions if a panic() occurs.
        . The multiboot code uses the flexible 'free memory map list'
          style to receive the list of free memory if available.
        . The kernel determines the memory layout of the processes to
          a degree: it tells VM where the kernel starts and ends and
          where the kernel wants the top of the process to be. VM then
          uses this entire range, i.e. the stack is right at the top,
          and mmap()ped bits of memory are placed below that downwards,
          and the break grows upwards.

Other Consequences:
        . Every process gets its own page table as address spaces
          can't be separated any more by segments.
        . As all segments are 0-based, there is no distinction between
          virtual and linear addresses, nor between userspace and
          kernel addresses.
        . Less work is done when context switching, leading to a net
          performance increase. (8% faster on my machine for 'make servers'.)
	. The layout and configuration of the GDT makes sysenter and syscall
	  possible.
2012-07-15 22:30:15 +02:00

1076 lines
28 KiB
C

/*
* a loop that gets messages requesting work, carries out the work, and sends
* replies.
*
* The entry points into this file are:
* main: main program of the Virtual File System
* reply: send a reply to a process after the requested work is done
*
*/
#include "fs.h"
#include <fcntl.h>
#include <string.h>
#include <stdio.h>
#include <signal.h>
#include <assert.h>
#include <stdlib.h>
#include <sys/ioc_memory.h>
#include <sys/svrctl.h>
#include <sys/select.h>
#include <minix/callnr.h>
#include <minix/com.h>
#include <minix/keymap.h>
#include <minix/const.h>
#include <minix/endpoint.h>
#include <minix/safecopies.h>
#include <minix/debug.h>
#include <minix/vfsif.h>
#include "file.h"
#include "dmap.h"
#include "fproc.h"
#include "scratchpad.h"
#include "vmnt.h"
#include "vnode.h"
#include "job.h"
#include "param.h"
#if ENABLE_SYSCALL_STATS
EXTERN unsigned long calls_stats[NCALLS];
#endif
/* Thread related prototypes */
static void thread_cleanup(struct fproc *rfp);
static void *do_async_dev_result(void *arg);
static void *do_control_msgs(void *arg);
static void *do_fs_reply(struct job *job);
static void *do_work(void *arg);
static void *do_pm(void *arg);
static void *do_init_root(void *arg);
static void handle_work(void *(*func)(void *arg));
static void get_work(void);
static void lock_pm(void);
static void unlock_pm(void);
static void service_pm(void);
static void service_pm_postponed(void);
static int unblock(struct fproc *rfp);
/* SEF functions and variables. */
static void sef_local_startup(void);
static int sef_cb_init_fresh(int type, sef_init_info_t *info);
static mutex_t pm_lock;
static endpoint_t receive_from;
/*===========================================================================*
* main *
*===========================================================================*/
int main(void)
{
/* This is the main program of the file system. The main loop consists of
* three major activities: getting new work, processing the work, and sending
* the reply. This loop never terminates as long as the file system runs.
*/
int transid;
struct job *job;
/* SEF local startup. */
sef_local_startup();
printf("Started VFS: %d worker thread(s)\n", NR_WTHREADS);
if (OK != (sys_getkinfo(&kinfo)))
panic("couldn't get kernel kinfo");
/* This is the main loop that gets work, processes it, and sends replies. */
while (TRUE) {
yield_all(); /* let other threads run */
self = NULL;
job = NULL;
send_work();
get_work();
transid = TRNS_GET_ID(m_in.m_type);
if (IS_VFS_FS_TRANSID(transid)) {
job = worker_getjob( (thread_t) transid - VFS_TRANSID);
if (job == NULL) {
printf("VFS: spurious message %d from endpoint %d\n",
m_in.m_type, m_in.m_source);
continue;
}
m_in.m_type = TRNS_DEL_ID(m_in.m_type);
}
if (job != NULL) {
do_fs_reply(job);
continue;
} else if (who_e == PM_PROC_NR) { /* Calls from PM */
/* Special control messages from PM */
sys_worker_start(do_pm);
continue;
} else if (is_notify(call_nr)) {
/* A task notify()ed us */
sys_worker_start(do_control_msgs);
continue;
} else if (who_p < 0) { /* i.e., message comes from a task */
/* We're going to ignore this message. Tasks should
* send notify()s only.
*/
printf("VFS: ignoring message from %d (%d)\n", who_e, call_nr);
continue;
}
/* At this point we either have results from an asynchronous device
* or a new system call. In both cases a new worker thread has to be
* started and there might not be one available from the pool. This is
* not a problem (requests/replies are simply queued), except when
* they're from an FS endpoint, because these can cause a deadlock.
* handle_work() takes care of the details. */
if (IS_DEV_RS(call_nr)) {
/* We've got results for a device request */
handle_work(do_async_dev_result);
continue;
} else {
/* Normal syscall. */
handle_work(do_work);
}
}
return(OK); /* shouldn't come here */
}
/*===========================================================================*
* handle_work *
*===========================================================================*/
static void handle_work(void *(*func)(void *arg))
{
/* Handle asynchronous device replies and new system calls. If the originating
* endpoint is an FS endpoint, take extra care not to get in deadlock. */
struct vmnt *vmp = NULL;
endpoint_t proc_e;
proc_e = m_in.m_source;
if (fp->fp_flags & FP_SYS_PROC) {
if (worker_available() == 0) {
if (!deadlock_resolving) {
if ((vmp = find_vmnt(proc_e)) != NULL) {
/* A call back or dev result from an FS
* endpoint. Set call back flag. Can do only
* one call back at a time.
*/
if (vmp->m_flags & VMNT_CALLBACK) {
reply(proc_e, EAGAIN);
return;
}
vmp->m_flags |= VMNT_CALLBACK;
/* When an FS endpoint has to make a call back
* in order to mount, force its device to a
* "none device" so block reads/writes will be
* handled by ROOT_FS_E.
*/
if (vmp->m_flags & VMNT_MOUNTING)
vmp->m_flags |= VMNT_FORCEROOTBSF;
}
deadlock_resolving = 1;
dl_worker_start(func);
return;
}
/* Already trying to resolve a deadlock, can't
* handle more, sorry */
reply(proc_e, EAGAIN);
return;
}
}
worker_start(func);
}
/*===========================================================================*
* do_async_dev_result *
*===========================================================================*/
static void *do_async_dev_result(void *arg)
{
endpoint_t endpt;
struct job my_job;
my_job = *((struct job *) arg);
fp = my_job.j_fp;
/* An asynchronous character driver has results for us */
if (job_call_nr == DEV_REVIVE) {
endpt = job_m_in.REP_ENDPT;
if (endpt == VFS_PROC_NR)
endpt = find_suspended_ep(job_m_in.m_source,
job_m_in.REP_IO_GRANT);
if (endpt == NONE) {
printf("VFS: proc with grant %d from %d not found\n",
job_m_in.REP_IO_GRANT, job_m_in.m_source);
} else if (job_m_in.REP_STATUS == SUSPEND) {
printf("VFS: got SUSPEND on DEV_REVIVE: not reviving proc\n");
} else
revive(endpt, job_m_in.REP_STATUS);
}
else if (job_call_nr == DEV_OPEN_REPL) open_reply();
else if (job_call_nr == DEV_REOPEN_REPL) reopen_reply();
else if (job_call_nr == DEV_CLOSE_REPL) close_reply();
else if (job_call_nr == DEV_SEL_REPL1)
select_reply1(job_m_in.m_source, job_m_in.DEV_MINOR,
job_m_in.DEV_SEL_OPS);
else if (job_call_nr == DEV_SEL_REPL2)
select_reply2(job_m_in.m_source, job_m_in.DEV_MINOR,
job_m_in.DEV_SEL_OPS);
if (deadlock_resolving) {
if (fp != NULL && fp->fp_wtid == dl_worker.w_tid)
deadlock_resolving = 0;
}
if (fp != NULL && (fp->fp_flags & FP_SYS_PROC)) {
struct vmnt *vmp;
if ((vmp = find_vmnt(fp->fp_endpoint)) != NULL)
vmp->m_flags &= ~VMNT_CALLBACK;
}
thread_cleanup(NULL);
return(NULL);
}
/*===========================================================================*
* do_control_msgs *
*===========================================================================*/
static void *do_control_msgs(void *arg)
{
struct job my_job;
my_job = *((struct job *) arg);
fp = my_job.j_fp;
/* Check for special control messages. */
if (job_m_in.m_source == CLOCK) {
/* Alarm timer expired. Used only for select(). Check it. */
expire_timers(job_m_in.NOTIFY_TIMESTAMP);
} else if (job_m_in.m_source == DS_PROC_NR) {
/* DS notifies us of an event. */
ds_event();
} else {
/* Device notifies us of an event. */
dev_status(&job_m_in);
}
thread_cleanup(NULL);
return(NULL);
}
/*===========================================================================*
* do_fs_reply *
*===========================================================================*/
static void *do_fs_reply(struct job *job)
{
struct vmnt *vmp;
struct fproc *rfp;
if ((vmp = find_vmnt(who_e)) == NULL)
panic("Couldn't find vmnt for endpoint %d", who_e);
rfp = job->j_fp;
if (rfp == NULL || rfp->fp_endpoint == NONE) {
printf("VFS: spurious reply from %d\n", who_e);
return(NULL);
}
if (rfp->fp_task != who_e)
printf("VFS: expected %d to reply, not %d\n", rfp->fp_task, who_e);
*rfp->fp_sendrec = m_in;
rfp->fp_task = NONE;
vmp->m_comm.c_cur_reqs--; /* We've got our reply, make room for others */
if (rfp->fp_wtid != invalid_thread_id)
worker_signal(worker_get(rfp->fp_wtid)); /* Continue this thread */
else
printf("VFS: consistency error: reply for finished job\n");
return(NULL);
}
/*===========================================================================*
* lock_pm *
*===========================================================================*/
static void lock_pm(void)
{
struct fproc *org_fp;
struct worker_thread *org_self;
/* First try to get it right off the bat */
if (mutex_trylock(&pm_lock) == 0)
return;
org_fp = fp;
org_self = self;
if (mutex_lock(&pm_lock) != 0)
panic("Could not obtain lock on pm\n");
fp = org_fp;
self = org_self;
}
/*===========================================================================*
* unlock_pm *
*===========================================================================*/
static void unlock_pm(void)
{
if (mutex_unlock(&pm_lock) != 0)
panic("Could not release lock on pm");
}
/*===========================================================================*
* do_pm *
*===========================================================================*/
static void *do_pm(void *arg)
{
struct job my_job;
struct fproc *rfp;
my_job = *((struct job *) arg);
rfp = fp = my_job.j_fp;
lock_pm();
service_pm();
unlock_pm();
thread_cleanup(NULL);
return(NULL);
}
/*===========================================================================*
* do_pending_pipe *
*===========================================================================*/
static void *do_pending_pipe(void *arg)
{
int r, op;
struct job my_job;
struct filp *f;
tll_access_t locktype;
my_job = *((struct job *) arg);
fp = my_job.j_fp;
lock_proc(fp, 1 /* force lock */);
f = scratch(fp).file.filp;
assert(f != NULL);
scratch(fp).file.filp = NULL;
locktype = (job_call_nr == READ) ? VNODE_READ : VNODE_WRITE;
op = (job_call_nr == READ) ? READING : WRITING;
lock_filp(f, locktype);
r = rw_pipe(op, who_e, f, scratch(fp).io.io_buffer, scratch(fp).io.io_nbytes);
if (r != SUSPEND) /* Do we have results to report? */
reply(fp->fp_endpoint, r);
unlock_filp(f);
thread_cleanup(fp);
return(NULL);
}
/*===========================================================================*
* do_dummy *
*===========================================================================*/
void *do_dummy(void *arg)
{
struct job my_job;
int r;
my_job = *((struct job *) arg);
fp = my_job.j_fp;
if ((r = mutex_trylock(&fp->fp_lock)) == 0) {
thread_cleanup(fp);
} else {
/* Proc is busy, let that worker thread carry out the work */
thread_cleanup(NULL);
}
return(NULL);
}
/*===========================================================================*
* do_work *
*===========================================================================*/
static void *do_work(void *arg)
{
int error;
struct job my_job;
my_job = *((struct job *) arg);
fp = my_job.j_fp;
lock_proc(fp, 0); /* This proc is busy */
if (job_call_nr == MAPDRIVER) {
error = do_mapdriver();
} else if (job_call_nr == COMMON_GETSYSINFO) {
error = do_getsysinfo();
} else if (IS_PFS_VFS_RQ(job_call_nr)) {
if (who_e != PFS_PROC_NR) {
printf("VFS: only PFS is allowed to make nested VFS calls\n");
error = ENOSYS;
} else if (job_call_nr <= PFS_BASE ||
job_call_nr >= PFS_BASE + PFS_NREQS) {
error = ENOSYS;
} else {
job_call_nr -= PFS_BASE;
error = (*pfs_call_vec[job_call_nr])();
}
} else {
/* We're dealing with a POSIX system call from a normal
* process. Call the internal function that does the work.
*/
if (job_call_nr < 0 || job_call_nr >= NCALLS) {
error = ENOSYS;
} else if (fp->fp_pid == PID_FREE) {
/* Process vanished before we were able to handle request.
* Replying has no use. Just drop it. */
error = SUSPEND;
} else {
#if ENABLE_SYSCALL_STATS
calls_stats[job_call_nr]++;
#endif
error = (*call_vec[job_call_nr])();
}
}
/* Copy the results back to the user and send reply. */
if (error != SUSPEND) {
if ((fp->fp_flags & FP_SYS_PROC)) {
struct vmnt *vmp;
if ((vmp = find_vmnt(fp->fp_endpoint)) != NULL)
vmp->m_flags &= ~VMNT_CALLBACK;
}
if (deadlock_resolving) {
if (fp->fp_wtid == dl_worker.w_tid)
deadlock_resolving = 0;
}
reply(fp->fp_endpoint, error);
}
thread_cleanup(fp);
return(NULL);
}
/*===========================================================================*
* sef_local_startup *
*===========================================================================*/
static void sef_local_startup()
{
/* Register init callbacks. */
sef_setcb_init_fresh(sef_cb_init_fresh);
sef_setcb_init_restart(sef_cb_init_fail);
/* No live update support for now. */
/* Let SEF perform startup. */
sef_startup();
}
/*===========================================================================*
* sef_cb_init_fresh *
*===========================================================================*/
static int sef_cb_init_fresh(int UNUSED(type), sef_init_info_t *info)
{
/* Initialize the virtual file server. */
int s, i;
struct fproc *rfp;
message mess;
struct rprocpub rprocpub[NR_BOOT_PROCS];
force_sync = 0;
receive_from = ANY;
self = NULL;
verbose = 0;
/* Initialize proc endpoints to NONE */
for (rfp = &fproc[0]; rfp < &fproc[NR_PROCS]; rfp++) {
rfp->fp_endpoint = NONE;
rfp->fp_pid = PID_FREE;
}
/* Initialize the process table with help of the process manager messages.
* Expect one message for each system process with its slot number and pid.
* When no more processes follow, the magic process number NONE is sent.
* Then, stop and synchronize with the PM.
*/
do {
if ((s = sef_receive(PM_PROC_NR, &mess)) != OK)
panic("VFS: couldn't receive from PM: %d", s);
if (mess.m_type != PM_INIT)
panic("unexpected message from PM: %d", mess.m_type);
if (NONE == mess.PM_PROC) break;
rfp = &fproc[mess.PM_SLOT];
rfp->fp_flags = FP_NOFLAGS;
rfp->fp_pid = mess.PM_PID;
rfp->fp_endpoint = mess.PM_PROC;
rfp->fp_grant = GRANT_INVALID;
rfp->fp_blocked_on = FP_BLOCKED_ON_NONE;
rfp->fp_realuid = (uid_t) SYS_UID;
rfp->fp_effuid = (uid_t) SYS_UID;
rfp->fp_realgid = (gid_t) SYS_GID;
rfp->fp_effgid = (gid_t) SYS_GID;
rfp->fp_umask = ~0;
} while (TRUE); /* continue until process NONE */
mess.m_type = OK; /* tell PM that we succeeded */
s = send(PM_PROC_NR, &mess); /* send synchronization message */
/* All process table entries have been set. Continue with initialization. */
fp = &fproc[_ENDPOINT_P(VFS_PROC_NR)];/* During init all communication with
* FSes is on behalf of myself */
init_dmap(); /* Initialize device table. */
system_hz = sys_hz();
/* Map all the services in the boot image. */
if ((s = sys_safecopyfrom(RS_PROC_NR, info->rproctab_gid, 0,
(vir_bytes) rprocpub, sizeof(rprocpub))) != OK){
panic("sys_safecopyfrom failed: %d", s);
}
for (i = 0; i < NR_BOOT_PROCS; i++) {
if (rprocpub[i].in_use) {
if ((s = map_service(&rprocpub[i])) != OK) {
panic("VFS: unable to map service: %d", s);
}
}
}
/* Subscribe to block and character driver events. */
s = ds_subscribe("drv\\.[bc]..\\..*", DSF_INITIAL | DSF_OVERWRITE);
if (s != OK) panic("VFS: can't subscribe to driver events (%d)", s);
/* Initialize worker threads */
for (i = 0; i < NR_WTHREADS; i++) {
worker_init(&workers[i]);
}
worker_init(&sys_worker); /* exclusive system worker thread */
worker_init(&dl_worker); /* exclusive worker thread to resolve deadlocks */
/* Initialize global locks */
if (mthread_mutex_init(&pm_lock, NULL) != 0)
panic("VFS: couldn't initialize pm lock mutex");
if (mthread_mutex_init(&exec_lock, NULL) != 0)
panic("VFS: couldn't initialize exec lock");
if (mthread_mutex_init(&bsf_lock, NULL) != 0)
panic("VFS: couldn't initialize block special file lock");
/* Initialize event resources for boot procs and locks for all procs */
for (rfp = &fproc[0]; rfp < &fproc[NR_PROCS]; rfp++) {
if (mutex_init(&rfp->fp_lock, NULL) != 0)
panic("unable to initialize fproc lock");
#if LOCK_DEBUG
rfp->fp_vp_rdlocks = 0;
rfp->fp_vmnt_rdlocks = 0;
#endif
}
init_vnodes(); /* init vnodes */
init_vmnts(); /* init vmnt structures */
init_select(); /* init select() structures */
init_filps(); /* Init filp structures */
mount_pfs(); /* mount Pipe File Server */
worker_start(do_init_root); /* mount initial ramdisk as file system root */
yield(); /* force do_init_root to start */
self = NULL;
return(OK);
}
/*===========================================================================*
* do_init_root *
*===========================================================================*/
static void *do_init_root(void *arg)
{
struct fproc *rfp;
struct job my_job;
int r;
char *mount_label = "fs_imgrd"; /* FIXME: obtain this from RS */
my_job = *((struct job *) arg);
fp = my_job.j_fp;
lock_proc(fp, 1 /* force lock */); /* This proc is busy */
lock_pm();
/* Initialize process directories. mount_fs will set them to the correct
* values */
for (rfp = &fproc[0]; rfp < &fproc[NR_PROCS]; rfp++) {
FD_ZERO(&(rfp->fp_filp_inuse));
rfp->fp_rd = NULL;
rfp->fp_wd = NULL;
}
receive_from = MFS_PROC_NR;
if ((r = mount_fs(DEV_IMGRD, "/", MFS_PROC_NR, 0, mount_label)) != OK)
panic("Failed to initialize root");
receive_from = ANY;
unlock_pm();
thread_cleanup(fp);
return(NULL);
}
/*===========================================================================*
* lock_proc *
*===========================================================================*/
void lock_proc(struct fproc *rfp, int force_lock)
{
int r;
struct fproc *org_fp;
struct worker_thread *org_self;
r = mutex_trylock(&rfp->fp_lock);
/* Were we supposed to obtain this lock immediately? */
if (force_lock) {
assert(r == 0);
return;
}
if (r == 0) return;
org_fp = fp;
org_self = self;
if ((r = mutex_lock(&rfp->fp_lock)) != 0)
panic("unable to lock fproc lock: %d", r);
fp = org_fp;
self = org_self;
}
/*===========================================================================*
* unlock_proc *
*===========================================================================*/
void unlock_proc(struct fproc *rfp)
{
int r;
if ((r = mutex_unlock(&rfp->fp_lock)) != 0)
panic("Failed to unlock: %d", r);
}
/*===========================================================================*
* thread_cleanup *
*===========================================================================*/
static void thread_cleanup(struct fproc *rfp)
{
/* Clean up worker thread. Skip parts if this thread is not associated
* with a particular process (i.e., rfp is NULL) */
#if LOCK_DEBUG
if (rfp != NULL) {
check_filp_locks_by_me();
check_vnode_locks_by_me(rfp);
check_vmnt_locks_by_me(rfp);
}
#endif
if (rfp != NULL && rfp->fp_flags & FP_PM_PENDING) { /* Postponed PM call */
job_m_in = rfp->fp_job.j_m_in;
rfp->fp_flags &= ~FP_PM_PENDING;
service_pm_postponed();
}
#if LOCK_DEBUG
if (rfp != NULL) {
check_filp_locks_by_me();
check_vnode_locks_by_me(rfp);
check_vmnt_locks_by_me(rfp);
}
#endif
if (rfp != NULL) {
rfp->fp_flags &= ~FP_DROP_WORK;
unlock_proc(rfp);
}
}
/*===========================================================================*
* get_work *
*===========================================================================*/
static void get_work()
{
/* Normally wait for new input. However, if 'reviving' is
* nonzero, a suspended process must be awakened.
*/
int r, found_one, proc_p;
register struct fproc *rp;
while (reviving != 0) {
found_one = FALSE;
/* Find a suspended process. */
for (rp = &fproc[0]; rp < &fproc[NR_PROCS]; rp++)
if (rp->fp_pid != PID_FREE && (rp->fp_flags & FP_REVIVED)) {
found_one = TRUE; /* Found a suspended process */
if (unblock(rp))
return; /* So main loop can process job */
send_work();
}
if (!found_one) /* Consistency error */
panic("VFS: get_work couldn't revive anyone");
}
for(;;) {
/* Normal case. No one to revive. Get a useful request. */
if ((r = sef_receive(receive_from, &m_in)) != OK) {
panic("VFS: sef_receive error: %d", r);
}
proc_p = _ENDPOINT_P(m_in.m_source);
if (proc_p < 0) fp = NULL;
else fp = &fproc[proc_p];
if (m_in.m_type == EDEADSRCDST) return; /* Failed 'sendrec' */
/* Negative who_p is never used to access the fproc array. Negative
* numbers (kernel tasks) are treated in a special way.
*/
if (who_p >= (int)(sizeof(fproc) / sizeof(struct fproc)))
panic("receive process out of range: %d", who_p);
if (who_p >= 0 && fproc[who_p].fp_endpoint == NONE) {
printf("VFS: ignoring request from %d: NONE endpoint %d (%d)\n",
m_in.m_source, who_p, m_in.m_type);
continue;
}
/* Internal consistency check; our mental image of process numbers and
* endpoints must match with how the rest of the system thinks of them.
*/
if (who_p >= 0 && fproc[who_p].fp_endpoint != who_e) {
if (fproc[who_p].fp_endpoint == NONE)
printf("slot unknown even\n");
printf("VFS: receive endpoint inconsistent (source %d, who_p "
"%d, stored ep %d, who_e %d).\n", m_in.m_source, who_p,
fproc[who_p].fp_endpoint, who_e);
panic("VFS: inconsistent endpoint ");
}
return;
}
}
/*===========================================================================*
* reply *
*===========================================================================*/
void reply(endpoint_t whom, int result)
{
/* Send a reply to a user process. If the send fails, just ignore it. */
int r;
m_out.reply_type = result;
r = sendnb(whom, &m_out);
if (r != OK) {
printf("VFS: %d couldn't send reply %d to %d: %d\n", mthread_self(),
result, whom, r);
util_stacktrace();
}
}
/*===========================================================================*
* service_pm_postponed *
*===========================================================================*/
static void service_pm_postponed(void)
{
int r;
vir_bytes pc, newsp;
switch(job_call_nr) {
case PM_EXEC:
{
endpoint_t proc_e;
vir_bytes exec_path, stack_frame;
size_t exec_path_len, stack_frame_len;
proc_e = job_m_in.PM_PROC;
exec_path = (vir_bytes) job_m_in.PM_PATH;
exec_path_len = (size_t) job_m_in.PM_PATH_LEN;
stack_frame = (vir_bytes) job_m_in.PM_FRAME;
stack_frame_len = (size_t) job_m_in.PM_FRAME_LEN;
r = pm_exec(proc_e, exec_path, exec_path_len, stack_frame,
stack_frame_len, &pc, &newsp, job_m_in.PM_EXECFLAGS);
/* Reply status to PM */
m_out.m_type = PM_EXEC_REPLY;
m_out.PM_PROC = proc_e;
m_out.PM_PC = (void*) pc;
m_out.PM_STATUS = r;
m_out.PM_NEWSP = (void *) newsp;
}
break;
case PM_EXIT:
{
endpoint_t proc_e;
proc_e = job_m_in.PM_PROC;
pm_exit(proc_e);
/* Reply dummy status to PM for synchronization */
m_out.m_type = PM_EXIT_REPLY;
m_out.PM_PROC = proc_e;
}
break;
case PM_DUMPCORE:
{
endpoint_t proc_e, traced_proc_e;
int term_signal;
vir_bytes core_path;
proc_e = job_m_in.PM_PROC;
traced_proc_e = job_m_in.PM_TRACED_PROC;
if(job_m_in.PM_PROC != job_m_in.PM_TRACED_PROC) {
/* dumpcore request */
term_signal = 0;
} else {
/* dumpcore on exit */
term_signal = job_m_in.PM_TERM_SIG;
}
core_path = (vir_bytes) job_m_in.PM_PATH;
r = pm_dumpcore(proc_e, term_signal, core_path);
/* Reply status to PM */
m_out.m_type = PM_CORE_REPLY;
m_out.PM_PROC = proc_e;
m_out.PM_TRACED_PROC = traced_proc_e;
m_out.PM_STATUS = r;
}
break;
default:
panic("Unhandled postponed PM call %d", job_m_in.m_type);
}
r = send(PM_PROC_NR, &m_out);
if (r != OK)
panic("service_pm_postponed: send failed: %d", r);
}
/*===========================================================================*
* service_pm *
*===========================================================================*/
static void service_pm()
{
int r, slot;
switch (job_call_nr) {
case PM_SETUID:
{
endpoint_t proc_e;
uid_t euid, ruid;
proc_e = job_m_in.PM_PROC;
euid = job_m_in.PM_EID;
ruid = job_m_in.PM_RID;
pm_setuid(proc_e, euid, ruid);
m_out.m_type = PM_SETUID_REPLY;
m_out.PM_PROC = proc_e;
}
break;
case PM_SETGID:
{
endpoint_t proc_e;
gid_t egid, rgid;
proc_e = job_m_in.PM_PROC;
egid = job_m_in.PM_EID;
rgid = job_m_in.PM_RID;
pm_setgid(proc_e, egid, rgid);
m_out.m_type = PM_SETGID_REPLY;
m_out.PM_PROC = proc_e;
}
break;
case PM_SETSID:
{
endpoint_t proc_e;
proc_e = job_m_in.PM_PROC;
pm_setsid(proc_e);
m_out.m_type = PM_SETSID_REPLY;
m_out.PM_PROC = proc_e;
}
break;
case PM_EXEC:
case PM_EXIT:
case PM_DUMPCORE:
{
endpoint_t proc_e = job_m_in.PM_PROC;
if(isokendpt(proc_e, &slot) != OK) {
printf("VFS: proc ep %d not ok\n", proc_e);
return;
}
fp = &fproc[slot];
if (fp->fp_flags & FP_PENDING) {
/* This process has a request pending, but PM wants it
* gone. Forget about the pending request and satisfy
* PM's request instead. Note that a pending request
* AND an EXEC request are mutually exclusive. Also, PM
* should send only one request/process at a time.
*/
assert(fp->fp_job.j_m_in.m_source != PM_PROC_NR);
}
/* PM requests on behalf of a proc are handled after the
* system call that might be in progress for that proc has
* finished. If the proc is not busy, we start a dummy call.
*/
if (!(fp->fp_flags & FP_PENDING) &&
mutex_trylock(&fp->fp_lock) == 0) {
mutex_unlock(&fp->fp_lock);
worker_start(do_dummy);
fp->fp_flags |= FP_DROP_WORK;
}
fp->fp_job.j_m_in = job_m_in;
fp->fp_flags |= FP_PM_PENDING;
return;
}
case PM_FORK:
case PM_SRV_FORK:
{
endpoint_t pproc_e, proc_e;
pid_t child_pid;
uid_t reuid;
gid_t regid;
pproc_e = job_m_in.PM_PPROC;
proc_e = job_m_in.PM_PROC;
child_pid = job_m_in.PM_CPID;
reuid = job_m_in.PM_REUID;
regid = job_m_in.PM_REGID;
pm_fork(pproc_e, proc_e, child_pid);
m_out.m_type = PM_FORK_REPLY;
if (job_call_nr == PM_SRV_FORK) {
m_out.m_type = PM_SRV_FORK_REPLY;
pm_setuid(proc_e, reuid, reuid);
pm_setgid(proc_e, regid, regid);
}
m_out.PM_PROC = proc_e;
}
break;
case PM_SETGROUPS:
{
endpoint_t proc_e;
int group_no;
gid_t *group_addr;
proc_e = job_m_in.PM_PROC;
group_no = job_m_in.PM_GROUP_NO;
group_addr = (gid_t *) job_m_in.PM_GROUP_ADDR;
pm_setgroups(proc_e, group_no, group_addr);
m_out.m_type = PM_SETGROUPS_REPLY;
m_out.PM_PROC = proc_e;
}
break;
case PM_UNPAUSE:
{
endpoint_t proc_e;
proc_e = job_m_in.PM_PROC;
unpause(proc_e);
m_out.m_type = PM_UNPAUSE_REPLY;
m_out.PM_PROC = proc_e;
}
break;
case PM_REBOOT:
pm_reboot();
/* Reply dummy status to PM for synchronization */
m_out.m_type = PM_REBOOT_REPLY;
break;
default:
printf("VFS: don't know how to handle PM request %d\n", job_call_nr);
return;
}
r = send(PM_PROC_NR, &m_out);
if (r != OK)
panic("service_pm: send failed: %d", r);
}
/*===========================================================================*
* unblock *
*===========================================================================*/
static int unblock(rfp)
struct fproc *rfp;
{
int blocked_on;
fp = rfp;
blocked_on = rfp->fp_blocked_on;
m_in.m_source = rfp->fp_endpoint;
m_in.m_type = rfp->fp_block_callnr;
m_in.fd = scratch(fp).file.fd_nr;
m_in.buffer = scratch(fp).io.io_buffer;
m_in.nbytes = scratch(fp).io.io_nbytes;
rfp->fp_blocked_on = FP_BLOCKED_ON_NONE; /* no longer blocked */
rfp->fp_flags &= ~FP_REVIVED;
reviving--;
assert(reviving >= 0);
/* This should be a pipe I/O, not a device I/O. If it is, it'll 'leak'
* grants.
*/
assert(!GRANT_VALID(rfp->fp_grant));
/* Pending pipe reads/writes can be handled directly */
if (blocked_on == FP_BLOCKED_ON_PIPE) {
worker_start(do_pending_pipe);
yield(); /* Give thread a chance to run */
self = NULL;
return(0); /* Retrieve more work */
}
return(1); /* We've unblocked a process */
}