2008-11-19 13:26:10 +01:00
|
|
|
/*
|
2005-04-21 16:53:53 +02:00
|
|
|
* a loop that gets messages requesting work, carries out the work, and sends
|
|
|
|
* replies.
|
|
|
|
*
|
|
|
|
* The entry points into this file are:
|
2006-10-25 15:40:36 +02:00
|
|
|
* main: main program of the Virtual File System
|
2005-04-21 16:53:53 +02:00
|
|
|
* reply: send a reply to a process after the requested work is done
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "fs.h"
|
|
|
|
#include <fcntl.h>
|
|
|
|
#include <string.h>
|
2005-07-22 20:28:32 +02:00
|
|
|
#include <stdio.h>
|
2005-07-20 17:27:42 +02:00
|
|
|
#include <signal.h>
|
2006-06-20 12:12:09 +02:00
|
|
|
#include <assert.h>
|
2005-04-21 16:53:53 +02:00
|
|
|
#include <stdlib.h>
|
|
|
|
#include <sys/ioc_memory.h>
|
|
|
|
#include <sys/svrctl.h>
|
2006-03-09 17:00:33 +01:00
|
|
|
#include <sys/select.h>
|
2005-04-21 16:53:53 +02:00
|
|
|
#include <minix/callnr.h>
|
|
|
|
#include <minix/com.h>
|
|
|
|
#include <minix/keymap.h>
|
2005-05-13 10:57:08 +02:00
|
|
|
#include <minix/const.h>
|
endpoint-aware conversion of servers.
'who', indicating caller number in pm and fs and some other servers, has
been removed in favour of 'who_e' (endpoint) and 'who_p' (proc nr.).
In both PM and FS, isokendpt() convert endpoints to process slot
numbers, returning OK if it was a valid and consistent endpoint number.
okendpt() does the same but panic()s if it doesn't succeed. (In PM,
this is pm_isok..)
pm and fs keep their own records of process endpoints in their proc tables,
which are needed to make kernel calls about those processes.
message field names have changed.
fs drivers are endpoints.
fs now doesn't try to get out of driver deadlock, as the protocol isn't
supposed to let that happen any more. (A warning is printed if ELOCKED
is detected though.)
fproc[].fp_task (indicating which driver the process is suspended on)
became an int.
PM and FS now get endpoint numbers of initial boot processes from the
kernel. These happen to be the same as the old proc numbers, to let
user processes reach them with the old numbers, but FS and PM don't know
that. All new processes after INIT, even after the generation number
wraps around, get endpoint numbers with generation 1 and higher, so
the first instances of the boot processes are the only processes ever
to have endpoint numbers in the old proc number range.
More return code checks of sys_* functions have been added.
IS has become endpoint-aware. Ditched the 'text' and 'data' fields
in the kernel dump (which show locations, not sizes, so aren't terribly
useful) in favour of the endpoint number. Proc number is still visible.
Some other dumps (e.g. dmap, rs) show endpoint numbers now too which got
the formatting changed.
PM reading segments using rw_seg() has changed - it uses other fields
in the message now instead of encoding the segment and process number and
fd in the fd field. For that it uses _read_pm() and _write_pm() which to
_taskcall()s directly in pm/misc.c.
PM now sys_exit()s itself on panic(), instead of sys_abort().
RS also talks in endpoints instead of process numbers.
2006-03-03 11:20:58 +01:00
|
|
|
#include <minix/endpoint.h>
|
2006-06-20 12:12:09 +02:00
|
|
|
#include <minix/safecopies.h>
|
2009-09-21 16:49:26 +02:00
|
|
|
#include <minix/debug.h>
|
2012-02-13 16:28:04 +01:00
|
|
|
#include <minix/vfsif.h>
|
2005-04-21 16:53:53 +02:00
|
|
|
#include "file.h"
|
2012-02-13 16:28:04 +01:00
|
|
|
#include "dmap.h"
|
2005-04-21 16:53:53 +02:00
|
|
|
#include "fproc.h"
|
2012-02-13 16:28:04 +01:00
|
|
|
#include "scratchpad.h"
|
2006-10-25 15:40:36 +02:00
|
|
|
#include "vmnt.h"
|
|
|
|
#include "vnode.h"
|
2012-02-13 16:28:04 +01:00
|
|
|
#include "job.h"
|
|
|
|
#include "param.h"
|
2005-04-21 16:53:53 +02:00
|
|
|
|
2006-07-10 14:44:43 +02:00
|
|
|
#if ENABLE_SYSCALL_STATS
|
|
|
|
EXTERN unsigned long calls_stats[NCALLS];
|
|
|
|
#endif
|
|
|
|
|
2012-02-13 16:28:04 +01:00
|
|
|
/* Thread related prototypes */
|
2012-03-25 20:25:53 +02:00
|
|
|
static void *do_async_dev_result(void *arg);
|
|
|
|
static void *do_control_msgs(void *arg);
|
2012-11-14 14:12:37 +01:00
|
|
|
static void *do_dev_event(void *arg);
|
2012-03-25 20:25:53 +02:00
|
|
|
static void *do_fs_reply(struct job *job);
|
|
|
|
static void *do_work(void *arg);
|
|
|
|
static void *do_pm(void *arg);
|
|
|
|
static void *do_init_root(void *arg);
|
|
|
|
static void handle_work(void *(*func)(void *arg));
|
|
|
|
|
|
|
|
static void get_work(void);
|
|
|
|
static void lock_pm(void);
|
|
|
|
static void unlock_pm(void);
|
|
|
|
static void service_pm(void);
|
|
|
|
static void service_pm_postponed(void);
|
|
|
|
static int unblock(struct fproc *rfp);
|
2005-04-21 16:53:53 +02:00
|
|
|
|
Basic System Event Framework (SEF) with ping and live update.
SYSLIB CHANGES:
- SEF must be used by every system process and is thereby part of the system
library.
- The framework provides a receive() interface (sef_receive) for system
processes to automatically catch known system even messages and process them.
- SEF provides a default behavior for each type of system event, but allows
system processes to register callbacks to override the default behavior.
- Custom (local to the process) or predefined (provided by SEF) callback
implementations can be registered to SEF.
- SEF currently includes support for 2 types of system events:
1. SEF Ping. The event occurs every time RS sends a ping to figure out
whether a system process is still alive. The default callback implementation
provided by SEF is to notify RS back to let it know the process is alive
and kicking.
2. SEF Live update. The event occurs every time RS sends a prepare to update
message to let a system process know an update is available and to prepare
for it. The live update support is very basic for now. SEF only deals with
verifying if the prepare state can be supported by the process, dumping the
state for debugging purposes, and providing an event-driven programming
model to the process to react to state changes check-in when ready to update.
- SEF should be extended in the future to integrate support for more types of
system events. Ideally, all the cross-cutting concerns should be integrated into
SEF to avoid duplicating code and ease extensibility. Examples include:
* PM notify messages primarily used at shutdown.
* SYSTEM notify messages primarily used for signals.
* CLOCK notify messages used for system alarms.
* Debug messages. IS could still be in charge of fkey handling but would
forward the debug message to the target process (e.g. PM, if the user
requested debug information about PM). SEF would then catch the message and
do nothing unless the process has registered an appropriate callback to
deal with the event. This simplifies the programming model to print debug
information, avoids duplicating code, and reduces the effort to print
debug information.
SYSTEM PROCESSES CHANGES:
- Every system process registers SEF callbacks it needs to override the default
system behavior and calls sef_startup() right after being started.
- sef_startup() does almost nothing now, but will be extended in the future to
support callbacks of its own to let RS control and synchronize with every
system process at initialization time.
- Every system process calls sef_receive() now rather than receive() directly,
to let SEF handle predefined system events.
RS CHANGES:
- RS supports a basic single-component live update protocol now, as follows:
* When an update command is issued (via "service update *"), RS notifies the
target system process to prepare for a specific update state.
* If the process doesn't respond back in time, the update is aborted.
* When the process responds back, RS kills it and marks it for refreshing.
* The process is then automatically restarted as for a buggy process and can
start running again.
* Live update is currently prototyped as a controlled failure.
2009-12-21 15:12:21 +01:00
|
|
|
/* SEF functions and variables. */
|
2012-03-25 20:25:53 +02:00
|
|
|
static void sef_local_startup(void);
|
|
|
|
static int sef_cb_init_fresh(int type, sef_init_info_t *info);
|
|
|
|
static mutex_t pm_lock;
|
|
|
|
static endpoint_t receive_from;
|
2006-10-25 15:40:36 +02:00
|
|
|
|
2005-04-21 16:53:53 +02:00
|
|
|
/*===========================================================================*
|
|
|
|
* main *
|
|
|
|
*===========================================================================*/
|
2012-03-25 20:25:53 +02:00
|
|
|
int main(void)
|
2005-04-21 16:53:53 +02:00
|
|
|
{
|
|
|
|
/* This is the main program of the file system. The main loop consists of
|
|
|
|
* three major activities: getting new work, processing the work, and sending
|
|
|
|
* the reply. This loop never terminates as long as the file system runs.
|
|
|
|
*/
|
2012-03-30 11:05:28 +02:00
|
|
|
int transid;
|
2012-02-13 16:28:04 +01:00
|
|
|
struct job *job;
|
2005-04-21 16:53:53 +02:00
|
|
|
|
Basic System Event Framework (SEF) with ping and live update.
SYSLIB CHANGES:
- SEF must be used by every system process and is thereby part of the system
library.
- The framework provides a receive() interface (sef_receive) for system
processes to automatically catch known system even messages and process them.
- SEF provides a default behavior for each type of system event, but allows
system processes to register callbacks to override the default behavior.
- Custom (local to the process) or predefined (provided by SEF) callback
implementations can be registered to SEF.
- SEF currently includes support for 2 types of system events:
1. SEF Ping. The event occurs every time RS sends a ping to figure out
whether a system process is still alive. The default callback implementation
provided by SEF is to notify RS back to let it know the process is alive
and kicking.
2. SEF Live update. The event occurs every time RS sends a prepare to update
message to let a system process know an update is available and to prepare
for it. The live update support is very basic for now. SEF only deals with
verifying if the prepare state can be supported by the process, dumping the
state for debugging purposes, and providing an event-driven programming
model to the process to react to state changes check-in when ready to update.
- SEF should be extended in the future to integrate support for more types of
system events. Ideally, all the cross-cutting concerns should be integrated into
SEF to avoid duplicating code and ease extensibility. Examples include:
* PM notify messages primarily used at shutdown.
* SYSTEM notify messages primarily used for signals.
* CLOCK notify messages used for system alarms.
* Debug messages. IS could still be in charge of fkey handling but would
forward the debug message to the target process (e.g. PM, if the user
requested debug information about PM). SEF would then catch the message and
do nothing unless the process has registered an appropriate callback to
deal with the event. This simplifies the programming model to print debug
information, avoids duplicating code, and reduces the effort to print
debug information.
SYSTEM PROCESSES CHANGES:
- Every system process registers SEF callbacks it needs to override the default
system behavior and calls sef_startup() right after being started.
- sef_startup() does almost nothing now, but will be extended in the future to
support callbacks of its own to let RS control and synchronize with every
system process at initialization time.
- Every system process calls sef_receive() now rather than receive() directly,
to let SEF handle predefined system events.
RS CHANGES:
- RS supports a basic single-component live update protocol now, as follows:
* When an update command is issued (via "service update *"), RS notifies the
target system process to prepare for a specific update state.
* If the process doesn't respond back in time, the update is aborted.
* When the process responds back, RS kills it and marks it for refreshing.
* The process is then automatically restarted as for a buggy process and can
start running again.
* Live update is currently prototyped as a controlled failure.
2009-12-21 15:12:21 +01:00
|
|
|
/* SEF local startup. */
|
|
|
|
sef_local_startup();
|
|
|
|
|
2012-02-13 16:28:04 +01:00
|
|
|
printf("Started VFS: %d worker thread(s)\n", NR_WTHREADS);
|
|
|
|
|
No more intel/minix segments.
This commit removes all traces of Minix segments (the text/data/stack
memory map abstraction in the kernel) and significance of Intel segments
(hardware segments like CS, DS that add offsets to all addressing before
page table translation). This ultimately simplifies the memory layout
and addressing and makes the same layout possible on non-Intel
architectures.
There are only two types of addresses in the world now: virtual
and physical; even the kernel and processes have the same virtual
address space. Kernel and user processes can be distinguished at a
glance as processes won't use 0xF0000000 and above.
No static pre-allocated memory sizes exist any more.
Changes to booting:
. The pre_init.c leaves the kernel and modules exactly as
they were left by the bootloader in physical memory
. The kernel starts running using physical addressing,
loaded at a fixed location given in its linker script by the
bootloader. All code and data in this phase are linked to
this fixed low location.
. It makes a bootstrap pagetable to map itself to a
fixed high location (also in linker script) and jumps to
the high address. All code and data then use this high addressing.
. All code/data symbols linked at the low addresses is prefixed by
an objcopy step with __k_unpaged_*, so that that code cannot
reference highly-linked symbols (which aren't valid yet) or vice
versa (symbols that aren't valid any more).
. The two addressing modes are separated in the linker script by
collecting the unpaged_*.o objects and linking them with low
addresses, and linking the rest high. Some objects are linked
twice, once low and once high.
. The bootstrap phase passes a lot of information (e.g. free memory
list, physical location of the modules, etc.) using the kinfo
struct.
. After this bootstrap the low-linked part is freed.
. The kernel maps in VM into the bootstrap page table so that VM can
begin executing. Its first job is to make page tables for all other
boot processes. So VM runs before RS, and RS gets a fully dynamic,
VM-managed address space. VM gets its privilege info from RS as usual
but that happens after RS starts running.
. Both the kernel loading VM and VM organizing boot processes happen
using the libexec logic. This removes the last reason for VM to
still know much about exec() and vm/exec.c is gone.
Further Implementation:
. All segments are based at 0 and have a 4 GB limit.
. The kernel is mapped in at the top of the virtual address
space so as not to constrain the user processes.
. Processes do not use segments from the LDT at all; there are
no segments in the LDT any more, so no LLDT is needed.
. The Minix segments T/D/S are gone and so none of the
user-space or in-kernel copy functions use them. The copy
functions use a process endpoint of NONE to realize it's
a physical address, virtual otherwise.
. The umap call only makes sense to translate a virtual address
to a physical address now.
. Segments-related calls like newmap and alloc_segments are gone.
. All segments-related translation in VM is gone (vir2map etc).
. Initialization in VM is simpler as no moving around is necessary.
. VM and all other boot processes can be linked wherever they wish
and will be mapped in at the right location by the kernel and VM
respectively.
Other changes:
. The multiboot code is less special: it does not use mb_print
for its diagnostics any more but uses printf() as normal, saving
the output into the diagnostics buffer, only printing to the
screen using the direct print functions if a panic() occurs.
. The multiboot code uses the flexible 'free memory map list'
style to receive the list of free memory if available.
. The kernel determines the memory layout of the processes to
a degree: it tells VM where the kernel starts and ends and
where the kernel wants the top of the process to be. VM then
uses this entire range, i.e. the stack is right at the top,
and mmap()ped bits of memory are placed below that downwards,
and the break grows upwards.
Other Consequences:
. Every process gets its own page table as address spaces
can't be separated any more by segments.
. As all segments are 0-based, there is no distinction between
virtual and linear addresses, nor between userspace and
kernel addresses.
. Less work is done when context switching, leading to a net
performance increase. (8% faster on my machine for 'make servers'.)
. The layout and configuration of the GDT makes sysenter and syscall
possible.
2012-05-07 16:03:35 +02:00
|
|
|
if (OK != (sys_getkinfo(&kinfo)))
|
|
|
|
panic("couldn't get kernel kinfo");
|
|
|
|
|
2005-04-21 16:53:53 +02:00
|
|
|
/* This is the main loop that gets work, processes it, and sends replies. */
|
|
|
|
while (TRUE) {
|
2012-02-13 16:28:04 +01:00
|
|
|
yield_all(); /* let other threads run */
|
2012-04-13 14:50:38 +02:00
|
|
|
self = NULL;
|
2012-03-30 11:05:28 +02:00
|
|
|
job = NULL;
|
2012-02-13 16:28:04 +01:00
|
|
|
send_work();
|
|
|
|
get_work();
|
|
|
|
|
|
|
|
transid = TRNS_GET_ID(m_in.m_type);
|
2012-03-30 11:05:28 +02:00
|
|
|
if (IS_VFS_FS_TRANSID(transid)) {
|
|
|
|
job = worker_getjob( (thread_t) transid - VFS_TRANSID);
|
|
|
|
if (job == NULL) {
|
|
|
|
printf("VFS: spurious message %d from endpoint %d\n",
|
|
|
|
m_in.m_type, m_in.m_source);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
m_in.m_type = TRNS_DEL_ID(m_in.m_type);
|
|
|
|
}
|
2012-02-13 16:28:04 +01:00
|
|
|
|
|
|
|
if (job != NULL) {
|
|
|
|
do_fs_reply(job);
|
2008-02-22 15:53:02 +01:00
|
|
|
continue;
|
2012-02-13 16:28:04 +01:00
|
|
|
} else if (who_e == PM_PROC_NR) { /* Calls from PM */
|
|
|
|
/* Special control messages from PM */
|
|
|
|
sys_worker_start(do_pm);
|
2008-02-22 15:53:02 +01:00
|
|
|
continue;
|
2012-02-13 16:28:04 +01:00
|
|
|
} else if (is_notify(call_nr)) {
|
|
|
|
/* A task notify()ed us */
|
VFS: make all IPC asynchronous
By decoupling synchronous drivers from VFS, we are a big step closer to
supporting driver crashes under all circumstances. That is, VFS can't
become stuck on IPC with a synchronous driver (e.g., INET) and can
recover from crashing block drivers during open/close/ioctl or during
communication with an FS.
In order to maintain serialized communication with a synchronous driver,
the communication is wrapped by a mutex on a per driver basis (not major
numbers as there can be multiple majors with identical endpoints). Majors
that share a driver endpoint point to a single mutex object.
In order to support crashes from block drivers, the file reopen tactic
had to be changed; first reopen files associated with the crashed
driver, then send the new driver endpoint to FSes. This solves a
deadlock between the FS and the block driver;
- VFS would send REQ_NEW_DRIVER to an FS, but he FS only receives it
after retrying the current request to the newly started driver.
- The block driver would refuse the retried request until all files
had been reopened.
- VFS would reopen files only after getting a reply from the initial
REQ_NEW_DRIVER.
When a character special driver crashes, all associated files have to
be marked invalid and closed (or reopened if flagged as such). However,
they can only be closed if a thread holds exclusive access to it. To
obtain exclusive access, the worker thread (which handles the new driver
endpoint event from DS) schedules a new job to garbage collect invalid
files. This way, we can signal the worker thread that was talking to the
crashed driver and will release exclusive access to a file associated
with the crashed driver and prevent the garbage collecting worker thread
from dead locking on that file.
Also, when a character special driver crashes, RS will unmap the driver
and remap it upon restart. During unmapping, associated files are marked
invalid instead of waiting for an endpoint up event from DS, as that
event might come later than new read/write/select requests and thus
cause confusion in the freshly started driver.
When locking a filp, the usage counters are no longer checked. The usage
counter can legally go down to zero during filp invalidation while there
are locks pending.
DS events are handled by a separate worker thread instead of the main
thread as reopening files could lead to another crash and a stuck thread.
An additional worker thread is then necessary to unlock it.
Finally, with everything asynchronous a race condition in do_select
surfaced. A select entry was only marked in use after succesfully sending
initial select requests to drivers and having to wait. When multiple
select() calls were handled there was opportunity that these entries
were overwritten. This had as effect that some select results were
ignored (and select() remained blocking instead if returning) or do_select
tried to access filps that were not present (because thrown away by
secondary select()). This bug manifested itself with sendrecs, but was
very hard to reproduce. However, it became awfully easy to trigger with
asynsends only.
2012-08-28 16:06:51 +02:00
|
|
|
if (who_e == DS_PROC_NR)
|
2012-11-14 14:12:37 +01:00
|
|
|
handle_work(ds_event);
|
2013-01-02 14:41:36 +01:00
|
|
|
else if (who_e == KERNEL)
|
|
|
|
mthread_stacktraces();
|
2012-11-14 14:12:37 +01:00
|
|
|
else if (fp != NULL && (fp->fp_flags & FP_SRV_PROC))
|
|
|
|
handle_work(do_dev_event);
|
VFS: make all IPC asynchronous
By decoupling synchronous drivers from VFS, we are a big step closer to
supporting driver crashes under all circumstances. That is, VFS can't
become stuck on IPC with a synchronous driver (e.g., INET) and can
recover from crashing block drivers during open/close/ioctl or during
communication with an FS.
In order to maintain serialized communication with a synchronous driver,
the communication is wrapped by a mutex on a per driver basis (not major
numbers as there can be multiple majors with identical endpoints). Majors
that share a driver endpoint point to a single mutex object.
In order to support crashes from block drivers, the file reopen tactic
had to be changed; first reopen files associated with the crashed
driver, then send the new driver endpoint to FSes. This solves a
deadlock between the FS and the block driver;
- VFS would send REQ_NEW_DRIVER to an FS, but he FS only receives it
after retrying the current request to the newly started driver.
- The block driver would refuse the retried request until all files
had been reopened.
- VFS would reopen files only after getting a reply from the initial
REQ_NEW_DRIVER.
When a character special driver crashes, all associated files have to
be marked invalid and closed (or reopened if flagged as such). However,
they can only be closed if a thread holds exclusive access to it. To
obtain exclusive access, the worker thread (which handles the new driver
endpoint event from DS) schedules a new job to garbage collect invalid
files. This way, we can signal the worker thread that was talking to the
crashed driver and will release exclusive access to a file associated
with the crashed driver and prevent the garbage collecting worker thread
from dead locking on that file.
Also, when a character special driver crashes, RS will unmap the driver
and remap it upon restart. During unmapping, associated files are marked
invalid instead of waiting for an endpoint up event from DS, as that
event might come later than new read/write/select requests and thus
cause confusion in the freshly started driver.
When locking a filp, the usage counters are no longer checked. The usage
counter can legally go down to zero during filp invalidation while there
are locks pending.
DS events are handled by a separate worker thread instead of the main
thread as reopening files could lead to another crash and a stuck thread.
An additional worker thread is then necessary to unlock it.
Finally, with everything asynchronous a race condition in do_select
surfaced. A select entry was only marked in use after succesfully sending
initial select requests to drivers and having to wait. When multiple
select() calls were handled there was opportunity that these entries
were overwritten. This had as effect that some select results were
ignored (and select() remained blocking instead if returning) or do_select
tried to access filps that were not present (because thrown away by
secondary select()). This bug manifested itself with sendrecs, but was
very hard to reproduce. However, it became awfully easy to trigger with
asynsends only.
2012-08-28 16:06:51 +02:00
|
|
|
else
|
|
|
|
sys_worker_start(do_control_msgs);
|
2008-02-22 15:53:02 +01:00
|
|
|
continue;
|
2012-02-13 16:28:04 +01:00
|
|
|
} else if (who_p < 0) { /* i.e., message comes from a task */
|
|
|
|
/* We're going to ignore this message. Tasks should
|
|
|
|
* send notify()s only.
|
|
|
|
*/
|
|
|
|
printf("VFS: ignoring message from %d (%d)\n", who_e, call_nr);
|
|
|
|
continue;
|
2008-02-22 15:53:02 +01:00
|
|
|
}
|
2012-02-13 16:28:04 +01:00
|
|
|
|
|
|
|
/* At this point we either have results from an asynchronous device
|
|
|
|
* or a new system call. In both cases a new worker thread has to be
|
|
|
|
* started and there might not be one available from the pool. This is
|
|
|
|
* not a problem (requests/replies are simply queued), except when
|
|
|
|
* they're from an FS endpoint, because these can cause a deadlock.
|
|
|
|
* handle_work() takes care of the details. */
|
VFS: make all IPC asynchronous
By decoupling synchronous drivers from VFS, we are a big step closer to
supporting driver crashes under all circumstances. That is, VFS can't
become stuck on IPC with a synchronous driver (e.g., INET) and can
recover from crashing block drivers during open/close/ioctl or during
communication with an FS.
In order to maintain serialized communication with a synchronous driver,
the communication is wrapped by a mutex on a per driver basis (not major
numbers as there can be multiple majors with identical endpoints). Majors
that share a driver endpoint point to a single mutex object.
In order to support crashes from block drivers, the file reopen tactic
had to be changed; first reopen files associated with the crashed
driver, then send the new driver endpoint to FSes. This solves a
deadlock between the FS and the block driver;
- VFS would send REQ_NEW_DRIVER to an FS, but he FS only receives it
after retrying the current request to the newly started driver.
- The block driver would refuse the retried request until all files
had been reopened.
- VFS would reopen files only after getting a reply from the initial
REQ_NEW_DRIVER.
When a character special driver crashes, all associated files have to
be marked invalid and closed (or reopened if flagged as such). However,
they can only be closed if a thread holds exclusive access to it. To
obtain exclusive access, the worker thread (which handles the new driver
endpoint event from DS) schedules a new job to garbage collect invalid
files. This way, we can signal the worker thread that was talking to the
crashed driver and will release exclusive access to a file associated
with the crashed driver and prevent the garbage collecting worker thread
from dead locking on that file.
Also, when a character special driver crashes, RS will unmap the driver
and remap it upon restart. During unmapping, associated files are marked
invalid instead of waiting for an endpoint up event from DS, as that
event might come later than new read/write/select requests and thus
cause confusion in the freshly started driver.
When locking a filp, the usage counters are no longer checked. The usage
counter can legally go down to zero during filp invalidation while there
are locks pending.
DS events are handled by a separate worker thread instead of the main
thread as reopening files could lead to another crash and a stuck thread.
An additional worker thread is then necessary to unlock it.
Finally, with everything asynchronous a race condition in do_select
surfaced. A select entry was only marked in use after succesfully sending
initial select requests to drivers and having to wait. When multiple
select() calls were handled there was opportunity that these entries
were overwritten. This had as effect that some select results were
ignored (and select() remained blocking instead if returning) or do_select
tried to access filps that were not present (because thrown away by
secondary select()). This bug manifested itself with sendrecs, but was
very hard to reproduce. However, it became awfully easy to trigger with
asynsends only.
2012-08-28 16:06:51 +02:00
|
|
|
if (IS_DRV_REPLY(call_nr)) {
|
2012-02-13 16:28:04 +01:00
|
|
|
/* We've got results for a device request */
|
VFS: make all IPC asynchronous
By decoupling synchronous drivers from VFS, we are a big step closer to
supporting driver crashes under all circumstances. That is, VFS can't
become stuck on IPC with a synchronous driver (e.g., INET) and can
recover from crashing block drivers during open/close/ioctl or during
communication with an FS.
In order to maintain serialized communication with a synchronous driver,
the communication is wrapped by a mutex on a per driver basis (not major
numbers as there can be multiple majors with identical endpoints). Majors
that share a driver endpoint point to a single mutex object.
In order to support crashes from block drivers, the file reopen tactic
had to be changed; first reopen files associated with the crashed
driver, then send the new driver endpoint to FSes. This solves a
deadlock between the FS and the block driver;
- VFS would send REQ_NEW_DRIVER to an FS, but he FS only receives it
after retrying the current request to the newly started driver.
- The block driver would refuse the retried request until all files
had been reopened.
- VFS would reopen files only after getting a reply from the initial
REQ_NEW_DRIVER.
When a character special driver crashes, all associated files have to
be marked invalid and closed (or reopened if flagged as such). However,
they can only be closed if a thread holds exclusive access to it. To
obtain exclusive access, the worker thread (which handles the new driver
endpoint event from DS) schedules a new job to garbage collect invalid
files. This way, we can signal the worker thread that was talking to the
crashed driver and will release exclusive access to a file associated
with the crashed driver and prevent the garbage collecting worker thread
from dead locking on that file.
Also, when a character special driver crashes, RS will unmap the driver
and remap it upon restart. During unmapping, associated files are marked
invalid instead of waiting for an endpoint up event from DS, as that
event might come later than new read/write/select requests and thus
cause confusion in the freshly started driver.
When locking a filp, the usage counters are no longer checked. The usage
counter can legally go down to zero during filp invalidation while there
are locks pending.
DS events are handled by a separate worker thread instead of the main
thread as reopening files could lead to another crash and a stuck thread.
An additional worker thread is then necessary to unlock it.
Finally, with everything asynchronous a race condition in do_select
surfaced. A select entry was only marked in use after succesfully sending
initial select requests to drivers and having to wait. When multiple
select() calls were handled there was opportunity that these entries
were overwritten. This had as effect that some select results were
ignored (and select() remained blocking instead if returning) or do_select
tried to access filps that were not present (because thrown away by
secondary select()). This bug manifested itself with sendrecs, but was
very hard to reproduce. However, it became awfully easy to trigger with
asynsends only.
2012-08-28 16:06:51 +02:00
|
|
|
|
|
|
|
struct dmap *dp;
|
|
|
|
|
|
|
|
dp = get_dmap(who_e);
|
|
|
|
if (dp != NULL) {
|
|
|
|
if (dev_style_asyn(dp->dmap_style)) {
|
|
|
|
handle_work(do_async_dev_result);
|
|
|
|
|
|
|
|
} else {
|
|
|
|
if (dp->dmap_servicing == NONE) {
|
|
|
|
printf("Got spurious dev reply from %d",
|
|
|
|
who_e);
|
|
|
|
} else {
|
|
|
|
dev_reply(dp);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
printf("VFS: ignoring dev reply from unknown driver %d\n",
|
|
|
|
who_e);
|
2012-02-13 16:28:04 +01:00
|
|
|
} else {
|
|
|
|
/* Normal syscall. */
|
|
|
|
handle_work(do_work);
|
2008-02-22 15:53:02 +01:00
|
|
|
}
|
2012-02-13 16:28:04 +01:00
|
|
|
}
|
|
|
|
return(OK); /* shouldn't come here */
|
|
|
|
}
|
2008-02-22 15:53:02 +01:00
|
|
|
|
2012-02-13 16:28:04 +01:00
|
|
|
/*===========================================================================*
|
|
|
|
* handle_work *
|
|
|
|
*===========================================================================*/
|
2012-03-25 20:25:53 +02:00
|
|
|
static void handle_work(void *(*func)(void *arg))
|
2012-02-13 16:28:04 +01:00
|
|
|
{
|
|
|
|
/* Handle asynchronous device replies and new system calls. If the originating
|
|
|
|
* endpoint is an FS endpoint, take extra care not to get in deadlock. */
|
|
|
|
struct vmnt *vmp = NULL;
|
2012-04-13 14:50:38 +02:00
|
|
|
endpoint_t proc_e;
|
2012-02-13 16:28:04 +01:00
|
|
|
|
2012-04-13 14:50:38 +02:00
|
|
|
proc_e = m_in.m_source;
|
|
|
|
|
2012-11-14 14:12:37 +01:00
|
|
|
if (fp->fp_flags & FP_SRV_PROC) {
|
|
|
|
vmp = find_vmnt(proc_e);
|
|
|
|
if (vmp != NULL) {
|
|
|
|
/* A call back or dev result from an FS
|
|
|
|
* endpoint. Set call back flag. Can do only
|
|
|
|
* one call back at a time.
|
|
|
|
*/
|
|
|
|
if (vmp->m_flags & VMNT_CALLBACK) {
|
2013-04-12 18:41:23 +02:00
|
|
|
replycode(proc_e, EAGAIN);
|
2012-11-14 14:12:37 +01:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
vmp->m_flags |= VMNT_CALLBACK;
|
|
|
|
if (vmp->m_flags & VMNT_MOUNTING) {
|
|
|
|
vmp->m_flags |= VMNT_FORCEROOTBSF;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-02-13 16:28:04 +01:00
|
|
|
if (worker_available() == 0) {
|
2012-04-02 17:20:05 +02:00
|
|
|
if (!deadlock_resolving) {
|
|
|
|
deadlock_resolving = 1;
|
|
|
|
dl_worker_start(func);
|
2012-02-13 16:28:04 +01:00
|
|
|
return;
|
2006-05-19 14:19:37 +02:00
|
|
|
}
|
2012-04-02 17:20:05 +02:00
|
|
|
|
2012-11-14 14:12:37 +01:00
|
|
|
if (vmp != NULL) {
|
|
|
|
/* Already trying to resolve a deadlock, can't
|
|
|
|
* handle more, sorry */
|
|
|
|
|
2013-04-12 18:41:23 +02:00
|
|
|
replycode(proc_e, EAGAIN);
|
2012-11-14 14:12:37 +01:00
|
|
|
return;
|
|
|
|
}
|
2006-05-19 14:19:37 +02:00
|
|
|
}
|
2012-02-13 16:28:04 +01:00
|
|
|
}
|
2006-05-19 14:19:37 +02:00
|
|
|
|
2012-02-13 16:28:04 +01:00
|
|
|
worker_start(func);
|
|
|
|
}
|
2008-11-19 13:26:10 +01:00
|
|
|
|
2012-02-13 16:28:04 +01:00
|
|
|
/*===========================================================================*
|
2012-04-13 14:50:38 +02:00
|
|
|
* do_async_dev_result *
|
2012-02-13 16:28:04 +01:00
|
|
|
*===========================================================================*/
|
2012-03-25 20:25:53 +02:00
|
|
|
static void *do_async_dev_result(void *arg)
|
2012-02-13 16:28:04 +01:00
|
|
|
{
|
|
|
|
endpoint_t endpt;
|
|
|
|
struct job my_job;
|
|
|
|
|
|
|
|
my_job = *((struct job *) arg);
|
|
|
|
fp = my_job.j_fp;
|
|
|
|
|
|
|
|
/* An asynchronous character driver has results for us */
|
2012-04-13 14:50:38 +02:00
|
|
|
if (job_call_nr == DEV_REVIVE) {
|
|
|
|
endpt = job_m_in.REP_ENDPT;
|
2012-02-13 16:28:04 +01:00
|
|
|
if (endpt == VFS_PROC_NR)
|
2012-04-13 14:50:38 +02:00
|
|
|
endpt = find_suspended_ep(job_m_in.m_source,
|
|
|
|
job_m_in.REP_IO_GRANT);
|
2012-02-13 16:28:04 +01:00
|
|
|
|
|
|
|
if (endpt == NONE) {
|
|
|
|
printf("VFS: proc with grant %d from %d not found\n",
|
2012-04-13 14:50:38 +02:00
|
|
|
job_m_in.REP_IO_GRANT, job_m_in.m_source);
|
|
|
|
} else if (job_m_in.REP_STATUS == SUSPEND) {
|
2012-02-13 16:28:04 +01:00
|
|
|
printf("VFS: got SUSPEND on DEV_REVIVE: not reviving proc\n");
|
|
|
|
} else
|
2012-04-13 14:50:38 +02:00
|
|
|
revive(endpt, job_m_in.REP_STATUS);
|
2012-02-13 16:28:04 +01:00
|
|
|
}
|
2012-04-13 14:50:38 +02:00
|
|
|
else if (job_call_nr == DEV_OPEN_REPL) open_reply();
|
|
|
|
else if (job_call_nr == DEV_REOPEN_REPL) reopen_reply();
|
|
|
|
else if (job_call_nr == DEV_CLOSE_REPL) close_reply();
|
|
|
|
else if (job_call_nr == DEV_SEL_REPL1)
|
|
|
|
select_reply1(job_m_in.m_source, job_m_in.DEV_MINOR,
|
|
|
|
job_m_in.DEV_SEL_OPS);
|
|
|
|
else if (job_call_nr == DEV_SEL_REPL2)
|
|
|
|
select_reply2(job_m_in.m_source, job_m_in.DEV_MINOR,
|
|
|
|
job_m_in.DEV_SEL_OPS);
|
2012-02-13 16:28:04 +01:00
|
|
|
|
2012-11-14 14:12:37 +01:00
|
|
|
thread_cleanup(fp);
|
2012-02-13 16:28:04 +01:00
|
|
|
return(NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*===========================================================================*
|
|
|
|
* do_control_msgs *
|
|
|
|
*===========================================================================*/
|
2012-03-25 20:25:53 +02:00
|
|
|
static void *do_control_msgs(void *arg)
|
2012-02-13 16:28:04 +01:00
|
|
|
{
|
|
|
|
struct job my_job;
|
|
|
|
|
|
|
|
my_job = *((struct job *) arg);
|
|
|
|
fp = my_job.j_fp;
|
|
|
|
|
|
|
|
/* Check for special control messages. */
|
2012-04-13 14:50:38 +02:00
|
|
|
if (job_m_in.m_source == CLOCK) {
|
2012-02-13 16:28:04 +01:00
|
|
|
/* Alarm timer expired. Used only for select(). Check it. */
|
2012-04-13 14:50:38 +02:00
|
|
|
expire_timers(job_m_in.NOTIFY_TIMESTAMP);
|
2012-02-13 16:28:04 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
thread_cleanup(NULL);
|
|
|
|
return(NULL);
|
|
|
|
}
|
|
|
|
|
2012-11-14 14:12:37 +01:00
|
|
|
/*===========================================================================*
|
|
|
|
* do_dev_event *
|
|
|
|
*===========================================================================*/
|
|
|
|
static void *do_dev_event(void *arg)
|
|
|
|
{
|
|
|
|
/* Device notifies us of an event. */
|
|
|
|
struct job my_job;
|
|
|
|
|
|
|
|
my_job = *((struct job *) arg);
|
|
|
|
fp = my_job.j_fp;
|
|
|
|
|
|
|
|
dev_status(job_m_in.m_source);
|
|
|
|
|
|
|
|
thread_cleanup(fp);
|
|
|
|
return(NULL);
|
|
|
|
}
|
|
|
|
|
2012-02-13 16:28:04 +01:00
|
|
|
/*===========================================================================*
|
|
|
|
* do_fs_reply *
|
|
|
|
*===========================================================================*/
|
2012-03-25 20:25:53 +02:00
|
|
|
static void *do_fs_reply(struct job *job)
|
2012-02-13 16:28:04 +01:00
|
|
|
{
|
|
|
|
struct vmnt *vmp;
|
VFS: make all IPC asynchronous
By decoupling synchronous drivers from VFS, we are a big step closer to
supporting driver crashes under all circumstances. That is, VFS can't
become stuck on IPC with a synchronous driver (e.g., INET) and can
recover from crashing block drivers during open/close/ioctl or during
communication with an FS.
In order to maintain serialized communication with a synchronous driver,
the communication is wrapped by a mutex on a per driver basis (not major
numbers as there can be multiple majors with identical endpoints). Majors
that share a driver endpoint point to a single mutex object.
In order to support crashes from block drivers, the file reopen tactic
had to be changed; first reopen files associated with the crashed
driver, then send the new driver endpoint to FSes. This solves a
deadlock between the FS and the block driver;
- VFS would send REQ_NEW_DRIVER to an FS, but he FS only receives it
after retrying the current request to the newly started driver.
- The block driver would refuse the retried request until all files
had been reopened.
- VFS would reopen files only after getting a reply from the initial
REQ_NEW_DRIVER.
When a character special driver crashes, all associated files have to
be marked invalid and closed (or reopened if flagged as such). However,
they can only be closed if a thread holds exclusive access to it. To
obtain exclusive access, the worker thread (which handles the new driver
endpoint event from DS) schedules a new job to garbage collect invalid
files. This way, we can signal the worker thread that was talking to the
crashed driver and will release exclusive access to a file associated
with the crashed driver and prevent the garbage collecting worker thread
from dead locking on that file.
Also, when a character special driver crashes, RS will unmap the driver
and remap it upon restart. During unmapping, associated files are marked
invalid instead of waiting for an endpoint up event from DS, as that
event might come later than new read/write/select requests and thus
cause confusion in the freshly started driver.
When locking a filp, the usage counters are no longer checked. The usage
counter can legally go down to zero during filp invalidation while there
are locks pending.
DS events are handled by a separate worker thread instead of the main
thread as reopening files could lead to another crash and a stuck thread.
An additional worker thread is then necessary to unlock it.
Finally, with everything asynchronous a race condition in do_select
surfaced. A select entry was only marked in use after succesfully sending
initial select requests to drivers and having to wait. When multiple
select() calls were handled there was opportunity that these entries
were overwritten. This had as effect that some select results were
ignored (and select() remained blocking instead if returning) or do_select
tried to access filps that were not present (because thrown away by
secondary select()). This bug manifested itself with sendrecs, but was
very hard to reproduce. However, it became awfully easy to trigger with
asynsends only.
2012-08-28 16:06:51 +02:00
|
|
|
struct worker_thread *wp;
|
Merge of David's ptrace branch. Summary:
o Support for ptrace T_ATTACH/T_DETACH and T_SYSCALL
o PM signal handling logic should now work properly, even with debuggers
being present
o Asynchronous PM/VFS protocol, full IPC support for senda(), and
AMF_NOREPLY senda() flag
DETAILS
Process stop and delay call handling of PM:
o Added sys_runctl() kernel call with sys_stop() and sys_resume()
aliases, for PM to stop and resume a process
o Added exception for sending/syscall-traced processes to sys_runctl(),
and matching SIGKREADY pseudo-signal to PM
o Fixed PM signal logic to deal with requests from a process after
stopping it (so-called "delay calls"), using the SIGKREADY facility
o Fixed various PM panics due to race conditions with delay calls versus
VFS calls
o Removed special PRIO_STOP priority value
o Added SYS_LOCK RTS kernel flag, to stop an individual process from
running while modifying its process structure
Signal and debugger handling in PM:
o Fixed debugger signals being dropped if a second signal arrives when
the debugger has not retrieved the first one
o Fixed debugger signals being sent to the debugger more than once
o Fixed debugger signals unpausing process in VFS; removed PM_UNPAUSE_TR
protocol message
o Detached debugger signals from general signal logic and from being
blocked on VFS calls, meaning that even VFS can now be traced
o Fixed debugger being unable to receive more than one pending signal in
one process stop
o Fixed signal delivery being delayed needlessly when multiple signals
are pending
o Fixed wait test for tracer, which was returning for children that were
not waited for
o Removed second parallel pending call from PM to VFS for any process
o Fixed process becoming runnable between exec() and debugger trap
o Added support for notifying the debugger before the parent when a
debugged child exits
o Fixed debugger death causing child to remain stopped forever
o Fixed consistently incorrect use of _NSIG
Extensions to ptrace():
o Added T_ATTACH and T_DETACH ptrace request, to attach and detach a
debugger to and from a process
o Added T_SYSCALL ptrace request, to trace system calls
o Added T_SETOPT ptrace request, to set trace options
o Added TO_TRACEFORK trace option, to attach automatically to children
of a traced process
o Added TO_ALTEXEC trace option, to send SIGSTOP instead of SIGTRAP upon
a successful exec() of the tracee
o Extended T_GETUSER ptrace support to allow retrieving a process's priv
structure
o Removed T_STOP ptrace request again, as it does not help implementing
debuggers properly
o Added MINIX3-specific ptrace test (test42)
o Added proper manual page for ptrace(2)
Asynchronous PM/VFS interface:
o Fixed asynchronous messages not being checked when receive() is called
with an endpoint other than ANY
o Added AMF_NOREPLY senda() flag, preventing such messages from
satisfying the receive part of a sendrec()
o Added asynsend3() that takes optional flags; asynsend() is now a
#define passing in 0 as third parameter
o Made PM/VFS protocol asynchronous; reintroduced tell_fs()
o Made PM_BASE request/reply number range unique
o Hacked in a horrible temporary workaround into RS to deal with newly
revealed RS-PM-VFS race condition triangle until VFS is asynchronous
System signal handling:
o Fixed shutdown logic of device drivers; removed old SIGKSTOP signal
o Removed is-superuser check from PM's do_procstat() (aka getsigset())
o Added sigset macros to allow system processes to deal with the full
signal set, rather than just the POSIX subset
Miscellaneous PM fixes:
o Split do_getset into do_get and do_set, merging common code and making
structure clearer
o Fixed setpriority() being able to put to sleep processes using an
invalid parameter, or revive zombie processes
o Made find_proc() global; removed obsolete proc_from_pid()
o Cleanup here and there
Also included:
o Fixed false-positive boot order kernel warning
o Removed last traces of old NOTIFY_FROM code
THINGS OF POSSIBLE INTEREST
o It should now be possible to run PM at any priority, even lower than
user processes
o No assumptions are made about communication speed between PM and VFS,
although communication must be FIFO
o A debugger will now receive incoming debuggee signals at kill time
only; the process may not yet be fully stopped
o A first step has been made towards making the SYSTEM task preemptible
2009-09-30 11:57:22 +02:00
|
|
|
|
2012-02-13 16:28:04 +01:00
|
|
|
if ((vmp = find_vmnt(who_e)) == NULL)
|
|
|
|
panic("Couldn't find vmnt for endpoint %d", who_e);
|
|
|
|
|
VFS: make all IPC asynchronous
By decoupling synchronous drivers from VFS, we are a big step closer to
supporting driver crashes under all circumstances. That is, VFS can't
become stuck on IPC with a synchronous driver (e.g., INET) and can
recover from crashing block drivers during open/close/ioctl or during
communication with an FS.
In order to maintain serialized communication with a synchronous driver,
the communication is wrapped by a mutex on a per driver basis (not major
numbers as there can be multiple majors with identical endpoints). Majors
that share a driver endpoint point to a single mutex object.
In order to support crashes from block drivers, the file reopen tactic
had to be changed; first reopen files associated with the crashed
driver, then send the new driver endpoint to FSes. This solves a
deadlock between the FS and the block driver;
- VFS would send REQ_NEW_DRIVER to an FS, but he FS only receives it
after retrying the current request to the newly started driver.
- The block driver would refuse the retried request until all files
had been reopened.
- VFS would reopen files only after getting a reply from the initial
REQ_NEW_DRIVER.
When a character special driver crashes, all associated files have to
be marked invalid and closed (or reopened if flagged as such). However,
they can only be closed if a thread holds exclusive access to it. To
obtain exclusive access, the worker thread (which handles the new driver
endpoint event from DS) schedules a new job to garbage collect invalid
files. This way, we can signal the worker thread that was talking to the
crashed driver and will release exclusive access to a file associated
with the crashed driver and prevent the garbage collecting worker thread
from dead locking on that file.
Also, when a character special driver crashes, RS will unmap the driver
and remap it upon restart. During unmapping, associated files are marked
invalid instead of waiting for an endpoint up event from DS, as that
event might come later than new read/write/select requests and thus
cause confusion in the freshly started driver.
When locking a filp, the usage counters are no longer checked. The usage
counter can legally go down to zero during filp invalidation while there
are locks pending.
DS events are handled by a separate worker thread instead of the main
thread as reopening files could lead to another crash and a stuck thread.
An additional worker thread is then necessary to unlock it.
Finally, with everything asynchronous a race condition in do_select
surfaced. A select entry was only marked in use after succesfully sending
initial select requests to drivers and having to wait. When multiple
select() calls were handled there was opportunity that these entries
were overwritten. This had as effect that some select results were
ignored (and select() remained blocking instead if returning) or do_select
tried to access filps that were not present (because thrown away by
secondary select()). This bug manifested itself with sendrecs, but was
very hard to reproduce. However, it became awfully easy to trigger with
asynsends only.
2012-08-28 16:06:51 +02:00
|
|
|
wp = worker_get(job->j_fp->fp_wtid);
|
2012-02-13 16:28:04 +01:00
|
|
|
|
VFS: make all IPC asynchronous
By decoupling synchronous drivers from VFS, we are a big step closer to
supporting driver crashes under all circumstances. That is, VFS can't
become stuck on IPC with a synchronous driver (e.g., INET) and can
recover from crashing block drivers during open/close/ioctl or during
communication with an FS.
In order to maintain serialized communication with a synchronous driver,
the communication is wrapped by a mutex on a per driver basis (not major
numbers as there can be multiple majors with identical endpoints). Majors
that share a driver endpoint point to a single mutex object.
In order to support crashes from block drivers, the file reopen tactic
had to be changed; first reopen files associated with the crashed
driver, then send the new driver endpoint to FSes. This solves a
deadlock between the FS and the block driver;
- VFS would send REQ_NEW_DRIVER to an FS, but he FS only receives it
after retrying the current request to the newly started driver.
- The block driver would refuse the retried request until all files
had been reopened.
- VFS would reopen files only after getting a reply from the initial
REQ_NEW_DRIVER.
When a character special driver crashes, all associated files have to
be marked invalid and closed (or reopened if flagged as such). However,
they can only be closed if a thread holds exclusive access to it. To
obtain exclusive access, the worker thread (which handles the new driver
endpoint event from DS) schedules a new job to garbage collect invalid
files. This way, we can signal the worker thread that was talking to the
crashed driver and will release exclusive access to a file associated
with the crashed driver and prevent the garbage collecting worker thread
from dead locking on that file.
Also, when a character special driver crashes, RS will unmap the driver
and remap it upon restart. During unmapping, associated files are marked
invalid instead of waiting for an endpoint up event from DS, as that
event might come later than new read/write/select requests and thus
cause confusion in the freshly started driver.
When locking a filp, the usage counters are no longer checked. The usage
counter can legally go down to zero during filp invalidation while there
are locks pending.
DS events are handled by a separate worker thread instead of the main
thread as reopening files could lead to another crash and a stuck thread.
An additional worker thread is then necessary to unlock it.
Finally, with everything asynchronous a race condition in do_select
surfaced. A select entry was only marked in use after succesfully sending
initial select requests to drivers and having to wait. When multiple
select() calls were handled there was opportunity that these entries
were overwritten. This had as effect that some select results were
ignored (and select() remained blocking instead if returning) or do_select
tried to access filps that were not present (because thrown away by
secondary select()). This bug manifested itself with sendrecs, but was
very hard to reproduce. However, it became awfully easy to trigger with
asynsends only.
2012-08-28 16:06:51 +02:00
|
|
|
if (wp == NULL) {
|
2012-02-13 16:28:04 +01:00
|
|
|
printf("VFS: spurious reply from %d\n", who_e);
|
|
|
|
return(NULL);
|
|
|
|
}
|
|
|
|
|
VFS: make all IPC asynchronous
By decoupling synchronous drivers from VFS, we are a big step closer to
supporting driver crashes under all circumstances. That is, VFS can't
become stuck on IPC with a synchronous driver (e.g., INET) and can
recover from crashing block drivers during open/close/ioctl or during
communication with an FS.
In order to maintain serialized communication with a synchronous driver,
the communication is wrapped by a mutex on a per driver basis (not major
numbers as there can be multiple majors with identical endpoints). Majors
that share a driver endpoint point to a single mutex object.
In order to support crashes from block drivers, the file reopen tactic
had to be changed; first reopen files associated with the crashed
driver, then send the new driver endpoint to FSes. This solves a
deadlock between the FS and the block driver;
- VFS would send REQ_NEW_DRIVER to an FS, but he FS only receives it
after retrying the current request to the newly started driver.
- The block driver would refuse the retried request until all files
had been reopened.
- VFS would reopen files only after getting a reply from the initial
REQ_NEW_DRIVER.
When a character special driver crashes, all associated files have to
be marked invalid and closed (or reopened if flagged as such). However,
they can only be closed if a thread holds exclusive access to it. To
obtain exclusive access, the worker thread (which handles the new driver
endpoint event from DS) schedules a new job to garbage collect invalid
files. This way, we can signal the worker thread that was talking to the
crashed driver and will release exclusive access to a file associated
with the crashed driver and prevent the garbage collecting worker thread
from dead locking on that file.
Also, when a character special driver crashes, RS will unmap the driver
and remap it upon restart. During unmapping, associated files are marked
invalid instead of waiting for an endpoint up event from DS, as that
event might come later than new read/write/select requests and thus
cause confusion in the freshly started driver.
When locking a filp, the usage counters are no longer checked. The usage
counter can legally go down to zero during filp invalidation while there
are locks pending.
DS events are handled by a separate worker thread instead of the main
thread as reopening files could lead to another crash and a stuck thread.
An additional worker thread is then necessary to unlock it.
Finally, with everything asynchronous a race condition in do_select
surfaced. A select entry was only marked in use after succesfully sending
initial select requests to drivers and having to wait. When multiple
select() calls were handled there was opportunity that these entries
were overwritten. This had as effect that some select results were
ignored (and select() remained blocking instead if returning) or do_select
tried to access filps that were not present (because thrown away by
secondary select()). This bug manifested itself with sendrecs, but was
very hard to reproduce. However, it became awfully easy to trigger with
asynsends only.
2012-08-28 16:06:51 +02:00
|
|
|
if (wp->w_task != who_e) {
|
|
|
|
printf("VFS: expected %d to reply, not %d\n", wp->w_task, who_e);
|
|
|
|
return(NULL);
|
|
|
|
}
|
|
|
|
*wp->w_fs_sendrec = m_in;
|
|
|
|
wp->w_task = NONE;
|
2012-04-13 14:50:38 +02:00
|
|
|
vmp->m_comm.c_cur_reqs--; /* We've got our reply, make room for others */
|
VFS: make all IPC asynchronous
By decoupling synchronous drivers from VFS, we are a big step closer to
supporting driver crashes under all circumstances. That is, VFS can't
become stuck on IPC with a synchronous driver (e.g., INET) and can
recover from crashing block drivers during open/close/ioctl or during
communication with an FS.
In order to maintain serialized communication with a synchronous driver,
the communication is wrapped by a mutex on a per driver basis (not major
numbers as there can be multiple majors with identical endpoints). Majors
that share a driver endpoint point to a single mutex object.
In order to support crashes from block drivers, the file reopen tactic
had to be changed; first reopen files associated with the crashed
driver, then send the new driver endpoint to FSes. This solves a
deadlock between the FS and the block driver;
- VFS would send REQ_NEW_DRIVER to an FS, but he FS only receives it
after retrying the current request to the newly started driver.
- The block driver would refuse the retried request until all files
had been reopened.
- VFS would reopen files only after getting a reply from the initial
REQ_NEW_DRIVER.
When a character special driver crashes, all associated files have to
be marked invalid and closed (or reopened if flagged as such). However,
they can only be closed if a thread holds exclusive access to it. To
obtain exclusive access, the worker thread (which handles the new driver
endpoint event from DS) schedules a new job to garbage collect invalid
files. This way, we can signal the worker thread that was talking to the
crashed driver and will release exclusive access to a file associated
with the crashed driver and prevent the garbage collecting worker thread
from dead locking on that file.
Also, when a character special driver crashes, RS will unmap the driver
and remap it upon restart. During unmapping, associated files are marked
invalid instead of waiting for an endpoint up event from DS, as that
event might come later than new read/write/select requests and thus
cause confusion in the freshly started driver.
When locking a filp, the usage counters are no longer checked. The usage
counter can legally go down to zero during filp invalidation while there
are locks pending.
DS events are handled by a separate worker thread instead of the main
thread as reopening files could lead to another crash and a stuck thread.
An additional worker thread is then necessary to unlock it.
Finally, with everything asynchronous a race condition in do_select
surfaced. A select entry was only marked in use after succesfully sending
initial select requests to drivers and having to wait. When multiple
select() calls were handled there was opportunity that these entries
were overwritten. This had as effect that some select results were
ignored (and select() remained blocking instead if returning) or do_select
tried to access filps that were not present (because thrown away by
secondary select()). This bug manifested itself with sendrecs, but was
very hard to reproduce. However, it became awfully easy to trigger with
asynsends only.
2012-08-28 16:06:51 +02:00
|
|
|
worker_signal(wp); /* Continue this thread */
|
2012-02-13 16:28:04 +01:00
|
|
|
return(NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*===========================================================================*
|
|
|
|
* lock_pm *
|
|
|
|
*===========================================================================*/
|
2012-03-25 20:25:53 +02:00
|
|
|
static void lock_pm(void)
|
2012-02-13 16:28:04 +01:00
|
|
|
{
|
|
|
|
struct fproc *org_fp;
|
|
|
|
struct worker_thread *org_self;
|
|
|
|
|
|
|
|
/* First try to get it right off the bat */
|
|
|
|
if (mutex_trylock(&pm_lock) == 0)
|
|
|
|
return;
|
|
|
|
|
|
|
|
org_fp = fp;
|
|
|
|
org_self = self;
|
|
|
|
|
|
|
|
if (mutex_lock(&pm_lock) != 0)
|
|
|
|
panic("Could not obtain lock on pm\n");
|
|
|
|
|
|
|
|
fp = org_fp;
|
|
|
|
self = org_self;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*===========================================================================*
|
|
|
|
* unlock_pm *
|
|
|
|
*===========================================================================*/
|
2012-03-25 20:25:53 +02:00
|
|
|
static void unlock_pm(void)
|
2012-02-13 16:28:04 +01:00
|
|
|
{
|
|
|
|
if (mutex_unlock(&pm_lock) != 0)
|
|
|
|
panic("Could not release lock on pm");
|
|
|
|
}
|
|
|
|
|
|
|
|
/*===========================================================================*
|
|
|
|
* do_pm *
|
|
|
|
*===========================================================================*/
|
2012-08-25 19:42:05 +02:00
|
|
|
static void *do_pm(void *arg __unused)
|
2012-02-13 16:28:04 +01:00
|
|
|
{
|
|
|
|
lock_pm();
|
|
|
|
service_pm();
|
|
|
|
unlock_pm();
|
|
|
|
|
|
|
|
thread_cleanup(NULL);
|
|
|
|
return(NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*===========================================================================*
|
2012-04-13 14:50:38 +02:00
|
|
|
* do_pending_pipe *
|
2012-02-13 16:28:04 +01:00
|
|
|
*===========================================================================*/
|
2012-03-25 20:25:53 +02:00
|
|
|
static void *do_pending_pipe(void *arg)
|
2012-02-13 16:28:04 +01:00
|
|
|
{
|
|
|
|
int r, op;
|
|
|
|
struct job my_job;
|
|
|
|
struct filp *f;
|
|
|
|
tll_access_t locktype;
|
|
|
|
|
|
|
|
my_job = *((struct job *) arg);
|
|
|
|
fp = my_job.j_fp;
|
|
|
|
|
|
|
|
lock_proc(fp, 1 /* force lock */);
|
|
|
|
|
|
|
|
f = scratch(fp).file.filp;
|
|
|
|
assert(f != NULL);
|
|
|
|
scratch(fp).file.filp = NULL;
|
|
|
|
|
2012-04-13 14:50:38 +02:00
|
|
|
locktype = (job_call_nr == READ) ? VNODE_READ : VNODE_WRITE;
|
|
|
|
op = (job_call_nr == READ) ? READING : WRITING;
|
2012-02-13 16:28:04 +01:00
|
|
|
lock_filp(f, locktype);
|
|
|
|
|
|
|
|
r = rw_pipe(op, who_e, f, scratch(fp).io.io_buffer, scratch(fp).io.io_nbytes);
|
|
|
|
|
|
|
|
if (r != SUSPEND) /* Do we have results to report? */
|
2013-04-12 18:41:23 +02:00
|
|
|
replycode(fp->fp_endpoint, r);
|
2012-02-13 16:28:04 +01:00
|
|
|
|
|
|
|
unlock_filp(f);
|
|
|
|
thread_cleanup(fp);
|
2012-11-14 14:12:37 +01:00
|
|
|
unlock_proc(fp);
|
2012-02-13 16:28:04 +01:00
|
|
|
return(NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*===========================================================================*
|
|
|
|
* do_dummy *
|
|
|
|
*===========================================================================*/
|
2012-03-25 20:25:53 +02:00
|
|
|
void *do_dummy(void *arg)
|
2012-02-13 16:28:04 +01:00
|
|
|
{
|
|
|
|
struct job my_job;
|
|
|
|
int r;
|
|
|
|
|
|
|
|
my_job = *((struct job *) arg);
|
|
|
|
fp = my_job.j_fp;
|
|
|
|
|
|
|
|
if ((r = mutex_trylock(&fp->fp_lock)) == 0) {
|
|
|
|
thread_cleanup(fp);
|
2012-11-14 14:12:37 +01:00
|
|
|
unlock_proc(fp);
|
2012-02-13 16:28:04 +01:00
|
|
|
} else {
|
|
|
|
/* Proc is busy, let that worker thread carry out the work */
|
|
|
|
thread_cleanup(NULL);
|
|
|
|
}
|
|
|
|
return(NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*===========================================================================*
|
|
|
|
* do_work *
|
|
|
|
*===========================================================================*/
|
2012-03-25 20:25:53 +02:00
|
|
|
static void *do_work(void *arg)
|
2012-02-13 16:28:04 +01:00
|
|
|
{
|
|
|
|
int error;
|
|
|
|
struct job my_job;
|
2013-04-12 18:41:23 +02:00
|
|
|
message m_out;
|
|
|
|
|
|
|
|
memset(&m_out, 0, sizeof(m_out));
|
2012-02-13 16:28:04 +01:00
|
|
|
|
|
|
|
my_job = *((struct job *) arg);
|
|
|
|
fp = my_job.j_fp;
|
|
|
|
|
|
|
|
lock_proc(fp, 0); /* This proc is busy */
|
|
|
|
|
2012-04-13 14:50:38 +02:00
|
|
|
if (job_call_nr == MAPDRIVER) {
|
2012-02-13 16:28:04 +01:00
|
|
|
error = do_mapdriver();
|
2012-04-13 14:50:38 +02:00
|
|
|
} else if (job_call_nr == COMMON_GETSYSINFO) {
|
2012-02-13 16:28:04 +01:00
|
|
|
error = do_getsysinfo();
|
2012-04-13 14:50:38 +02:00
|
|
|
} else if (IS_PFS_VFS_RQ(job_call_nr)) {
|
2012-02-13 16:28:04 +01:00
|
|
|
if (who_e != PFS_PROC_NR) {
|
|
|
|
printf("VFS: only PFS is allowed to make nested VFS calls\n");
|
|
|
|
error = ENOSYS;
|
2012-04-13 14:50:38 +02:00
|
|
|
} else if (job_call_nr <= PFS_BASE ||
|
|
|
|
job_call_nr >= PFS_BASE + PFS_NREQS) {
|
2012-02-13 16:28:04 +01:00
|
|
|
error = ENOSYS;
|
|
|
|
} else {
|
2012-04-13 14:50:38 +02:00
|
|
|
job_call_nr -= PFS_BASE;
|
2013-04-12 18:41:23 +02:00
|
|
|
error = (*pfs_call_vec[job_call_nr])(&m_out);
|
2012-02-13 16:28:04 +01:00
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/* We're dealing with a POSIX system call from a normal
|
|
|
|
* process. Call the internal function that does the work.
|
|
|
|
*/
|
2012-04-13 14:50:38 +02:00
|
|
|
if (job_call_nr < 0 || job_call_nr >= NCALLS) {
|
2012-02-13 16:28:04 +01:00
|
|
|
error = ENOSYS;
|
|
|
|
} else if (fp->fp_pid == PID_FREE) {
|
|
|
|
/* Process vanished before we were able to handle request.
|
|
|
|
* Replying has no use. Just drop it. */
|
|
|
|
error = SUSPEND;
|
|
|
|
} else {
|
2006-07-10 14:44:43 +02:00
|
|
|
#if ENABLE_SYSCALL_STATS
|
2012-04-13 14:50:38 +02:00
|
|
|
calls_stats[job_call_nr]++;
|
2006-07-10 14:44:43 +02:00
|
|
|
#endif
|
2013-04-12 18:41:23 +02:00
|
|
|
error = (*call_vec[job_call_nr])(&m_out);
|
2012-02-13 16:28:04 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Copy the results back to the user and send reply. */
|
2013-04-12 18:41:23 +02:00
|
|
|
if (error != SUSPEND) reply(&m_out, fp->fp_endpoint, error);
|
2012-02-13 16:28:04 +01:00
|
|
|
|
|
|
|
thread_cleanup(fp);
|
2012-11-14 14:12:37 +01:00
|
|
|
unlock_proc(fp);
|
2012-02-13 16:28:04 +01:00
|
|
|
return(NULL);
|
2005-04-21 16:53:53 +02:00
|
|
|
}
|
|
|
|
|
Basic System Event Framework (SEF) with ping and live update.
SYSLIB CHANGES:
- SEF must be used by every system process and is thereby part of the system
library.
- The framework provides a receive() interface (sef_receive) for system
processes to automatically catch known system even messages and process them.
- SEF provides a default behavior for each type of system event, but allows
system processes to register callbacks to override the default behavior.
- Custom (local to the process) or predefined (provided by SEF) callback
implementations can be registered to SEF.
- SEF currently includes support for 2 types of system events:
1. SEF Ping. The event occurs every time RS sends a ping to figure out
whether a system process is still alive. The default callback implementation
provided by SEF is to notify RS back to let it know the process is alive
and kicking.
2. SEF Live update. The event occurs every time RS sends a prepare to update
message to let a system process know an update is available and to prepare
for it. The live update support is very basic for now. SEF only deals with
verifying if the prepare state can be supported by the process, dumping the
state for debugging purposes, and providing an event-driven programming
model to the process to react to state changes check-in when ready to update.
- SEF should be extended in the future to integrate support for more types of
system events. Ideally, all the cross-cutting concerns should be integrated into
SEF to avoid duplicating code and ease extensibility. Examples include:
* PM notify messages primarily used at shutdown.
* SYSTEM notify messages primarily used for signals.
* CLOCK notify messages used for system alarms.
* Debug messages. IS could still be in charge of fkey handling but would
forward the debug message to the target process (e.g. PM, if the user
requested debug information about PM). SEF would then catch the message and
do nothing unless the process has registered an appropriate callback to
deal with the event. This simplifies the programming model to print debug
information, avoids duplicating code, and reduces the effort to print
debug information.
SYSTEM PROCESSES CHANGES:
- Every system process registers SEF callbacks it needs to override the default
system behavior and calls sef_startup() right after being started.
- sef_startup() does almost nothing now, but will be extended in the future to
support callbacks of its own to let RS control and synchronize with every
system process at initialization time.
- Every system process calls sef_receive() now rather than receive() directly,
to let SEF handle predefined system events.
RS CHANGES:
- RS supports a basic single-component live update protocol now, as follows:
* When an update command is issued (via "service update *"), RS notifies the
target system process to prepare for a specific update state.
* If the process doesn't respond back in time, the update is aborted.
* When the process responds back, RS kills it and marks it for refreshing.
* The process is then automatically restarted as for a buggy process and can
start running again.
* Live update is currently prototyped as a controlled failure.
2009-12-21 15:12:21 +01:00
|
|
|
/*===========================================================================*
|
|
|
|
* sef_local_startup *
|
|
|
|
*===========================================================================*/
|
2012-03-25 20:25:53 +02:00
|
|
|
static void sef_local_startup()
|
Basic System Event Framework (SEF) with ping and live update.
SYSLIB CHANGES:
- SEF must be used by every system process and is thereby part of the system
library.
- The framework provides a receive() interface (sef_receive) for system
processes to automatically catch known system even messages and process them.
- SEF provides a default behavior for each type of system event, but allows
system processes to register callbacks to override the default behavior.
- Custom (local to the process) or predefined (provided by SEF) callback
implementations can be registered to SEF.
- SEF currently includes support for 2 types of system events:
1. SEF Ping. The event occurs every time RS sends a ping to figure out
whether a system process is still alive. The default callback implementation
provided by SEF is to notify RS back to let it know the process is alive
and kicking.
2. SEF Live update. The event occurs every time RS sends a prepare to update
message to let a system process know an update is available and to prepare
for it. The live update support is very basic for now. SEF only deals with
verifying if the prepare state can be supported by the process, dumping the
state for debugging purposes, and providing an event-driven programming
model to the process to react to state changes check-in when ready to update.
- SEF should be extended in the future to integrate support for more types of
system events. Ideally, all the cross-cutting concerns should be integrated into
SEF to avoid duplicating code and ease extensibility. Examples include:
* PM notify messages primarily used at shutdown.
* SYSTEM notify messages primarily used for signals.
* CLOCK notify messages used for system alarms.
* Debug messages. IS could still be in charge of fkey handling but would
forward the debug message to the target process (e.g. PM, if the user
requested debug information about PM). SEF would then catch the message and
do nothing unless the process has registered an appropriate callback to
deal with the event. This simplifies the programming model to print debug
information, avoids duplicating code, and reduces the effort to print
debug information.
SYSTEM PROCESSES CHANGES:
- Every system process registers SEF callbacks it needs to override the default
system behavior and calls sef_startup() right after being started.
- sef_startup() does almost nothing now, but will be extended in the future to
support callbacks of its own to let RS control and synchronize with every
system process at initialization time.
- Every system process calls sef_receive() now rather than receive() directly,
to let SEF handle predefined system events.
RS CHANGES:
- RS supports a basic single-component live update protocol now, as follows:
* When an update command is issued (via "service update *"), RS notifies the
target system process to prepare for a specific update state.
* If the process doesn't respond back in time, the update is aborted.
* When the process responds back, RS kills it and marks it for refreshing.
* The process is then automatically restarted as for a buggy process and can
start running again.
* Live update is currently prototyped as a controlled failure.
2009-12-21 15:12:21 +01:00
|
|
|
{
|
Initialization protocol for system services.
SYSLIB CHANGES:
- SEF framework now supports a new SEF Init request type from RS. 3 different
callbacks are available (init_fresh, init_lu, init_restart) to specify
initialization code when a service starts fresh, starts after a live update,
or restarts.
SYSTEM SERVICE CHANGES:
- Initialization code for system services is now enclosed in a callback SEF will
automatically call at init time. The return code of the callback will
tell RS whether the initialization completed successfully.
- Each init callback can access information passed by RS to initialize. As of
now, each system service has access to the public entries of RS's system process
table to gather all the information required to initialize. This design
eliminates many existing or potential races at boot time and provides a uniform
initialization interface to system services. The same interface will be reused
for the upcoming publish/subscribe model to handle dynamic
registration / deregistration of system services.
VM CHANGES:
- Uniform privilege management for all system services. Every service uses the
same call mask format. For boot services, VM copies the call mask from init
data. For dynamic services, VM still receives the call mask via rs_set_priv
call that will be soon replaced by the upcoming publish/subscribe model.
RS CHANGES:
- The system process table has been reorganized and split into private entries
and public entries. Only the latter ones are exposed to system services.
- VM call masks are now entirely configured in rs/table.c
- RS has now its own slot in the system process table. Only kernel tasks and
user processes not included in the boot image are now left out from the system
process table.
- RS implements the initialization protocol for system services.
- For services in the boot image, RS blocks till initialization is complete and
panics when failure is reported back. Services are initialized in their order of
appearance in the boot image priv table and RS blocks to implements synchronous
initialization for every system service having the flag SF_SYNCH_BOOT set.
- For services started dynamically, the initialization protocol is implemented
as though it were the first ping for the service. In this case, if the
system service fails to report back (or reports failure), RS brings the service
down rather than trying to restart it.
2010-01-08 02:20:42 +01:00
|
|
|
/* Register init callbacks. */
|
|
|
|
sef_setcb_init_fresh(sef_cb_init_fresh);
|
New RS and new signal handling for system processes.
UPDATING INFO:
20100317:
/usr/src/etc/system.conf updated to ignore default kernel calls: copy
it (or merge it) to /etc/system.conf.
The hello driver (/dev/hello) added to the distribution:
# cd /usr/src/commands/scripts && make clean install
# cd /dev && MAKEDEV hello
KERNEL CHANGES:
- Generic signal handling support. The kernel no longer assumes PM as a signal
manager for every process. The signal manager of a given process can now be
specified in its privilege slot. When a signal has to be delivered, the kernel
performs the lookup and forwards the signal to the appropriate signal manager.
PM is the default signal manager for user processes, RS is the default signal
manager for system processes. To enable ptrace()ing for system processes, it
is sufficient to change the default signal manager to PM. This will temporarily
disable crash recovery, though.
- sys_exit() is now split into sys_exit() (i.e. exit() for system processes,
which generates a self-termination signal), and sys_clear() (i.e. used by PM
to ask the kernel to clear a process slot when a process exits).
- Added a new kernel call (i.e. sys_update()) to swap two process slots and
implement live update.
PM CHANGES:
- Posix signal handling is no longer allowed for system processes. System
signals are split into two fixed categories: termination and non-termination
signals. When a non-termination signaled is processed, PM transforms the signal
into an IPC message and delivers the message to the system process. When a
termination signal is processed, PM terminates the process.
- PM no longer assumes itself as the signal manager for system processes. It now
makes sure that every system signal goes through the kernel before being
actually processes. The kernel will then dispatch the signal to the appropriate
signal manager which may or may not be PM.
SYSLIB CHANGES:
- Simplified SEF init and LU callbacks.
- Added additional predefined SEF callbacks to debug crash recovery and
live update.
- Fixed a temporary ack in the SEF init protocol. SEF init reply is now
completely synchronous.
- Added SEF signal event type to provide a uniform interface for system
processes to deal with signals. A sef_cb_signal_handler() callback is
available for system processes to handle every received signal. A
sef_cb_signal_manager() callback is used by signal managers to process
system signals on behalf of the kernel.
- Fixed a few bugs with memory mapping and DS.
VM CHANGES:
- Page faults and memory requests coming from the kernel are now implemented
using signals.
- Added a new VM call to swap two process slots and implement live update.
- The call is used by RS at update time and in turn invokes the kernel call
sys_update().
RS CHANGES:
- RS has been reworked with a better functional decomposition.
- Better kernel call masks. com.h now defines the set of very basic kernel calls
every system service is allowed to use. This makes system.conf simpler and
easier to maintain. In addition, this guarantees a higher level of isolation
for system libraries that use one or more kernel calls internally (e.g. printf).
- RS is the default signal manager for system processes. By default, RS
intercepts every signal delivered to every system process. This makes crash
recovery possible before bringing PM and friends in the loop.
- RS now supports fast rollback when something goes wrong while initializing
the new version during a live update.
- Live update is now implemented by keeping the two versions side-by-side and
swapping the process slots when the old version is ready to update.
- Crash recovery is now implemented by keeping the two versions side-by-side
and cleaning up the old version only when the recovery process is complete.
DS CHANGES:
- Fixed a bug when the process doing ds_publish() or ds_delete() is not known
by DS.
- Fixed the completely broken support for strings. String publishing is now
implemented in the system library and simply wraps publishing of memory ranges.
Ideally, we should adopt a similar approach for other data types as well.
- Test suite fixed.
DRIVER CHANGES:
- The hello driver has been added to the Minix distribution to demonstrate basic
live update and crash recovery functionalities.
- Other drivers have been adapted to conform the new SEF interface.
2010-03-17 02:15:29 +01:00
|
|
|
sef_setcb_init_restart(sef_cb_init_fail);
|
Initialization protocol for system services.
SYSLIB CHANGES:
- SEF framework now supports a new SEF Init request type from RS. 3 different
callbacks are available (init_fresh, init_lu, init_restart) to specify
initialization code when a service starts fresh, starts after a live update,
or restarts.
SYSTEM SERVICE CHANGES:
- Initialization code for system services is now enclosed in a callback SEF will
automatically call at init time. The return code of the callback will
tell RS whether the initialization completed successfully.
- Each init callback can access information passed by RS to initialize. As of
now, each system service has access to the public entries of RS's system process
table to gather all the information required to initialize. This design
eliminates many existing or potential races at boot time and provides a uniform
initialization interface to system services. The same interface will be reused
for the upcoming publish/subscribe model to handle dynamic
registration / deregistration of system services.
VM CHANGES:
- Uniform privilege management for all system services. Every service uses the
same call mask format. For boot services, VM copies the call mask from init
data. For dynamic services, VM still receives the call mask via rs_set_priv
call that will be soon replaced by the upcoming publish/subscribe model.
RS CHANGES:
- The system process table has been reorganized and split into private entries
and public entries. Only the latter ones are exposed to system services.
- VM call masks are now entirely configured in rs/table.c
- RS has now its own slot in the system process table. Only kernel tasks and
user processes not included in the boot image are now left out from the system
process table.
- RS implements the initialization protocol for system services.
- For services in the boot image, RS blocks till initialization is complete and
panics when failure is reported back. Services are initialized in their order of
appearance in the boot image priv table and RS blocks to implements synchronous
initialization for every system service having the flag SF_SYNCH_BOOT set.
- For services started dynamically, the initialization protocol is implemented
as though it were the first ping for the service. In this case, if the
system service fails to report back (or reports failure), RS brings the service
down rather than trying to restart it.
2010-01-08 02:20:42 +01:00
|
|
|
|
Basic System Event Framework (SEF) with ping and live update.
SYSLIB CHANGES:
- SEF must be used by every system process and is thereby part of the system
library.
- The framework provides a receive() interface (sef_receive) for system
processes to automatically catch known system even messages and process them.
- SEF provides a default behavior for each type of system event, but allows
system processes to register callbacks to override the default behavior.
- Custom (local to the process) or predefined (provided by SEF) callback
implementations can be registered to SEF.
- SEF currently includes support for 2 types of system events:
1. SEF Ping. The event occurs every time RS sends a ping to figure out
whether a system process is still alive. The default callback implementation
provided by SEF is to notify RS back to let it know the process is alive
and kicking.
2. SEF Live update. The event occurs every time RS sends a prepare to update
message to let a system process know an update is available and to prepare
for it. The live update support is very basic for now. SEF only deals with
verifying if the prepare state can be supported by the process, dumping the
state for debugging purposes, and providing an event-driven programming
model to the process to react to state changes check-in when ready to update.
- SEF should be extended in the future to integrate support for more types of
system events. Ideally, all the cross-cutting concerns should be integrated into
SEF to avoid duplicating code and ease extensibility. Examples include:
* PM notify messages primarily used at shutdown.
* SYSTEM notify messages primarily used for signals.
* CLOCK notify messages used for system alarms.
* Debug messages. IS could still be in charge of fkey handling but would
forward the debug message to the target process (e.g. PM, if the user
requested debug information about PM). SEF would then catch the message and
do nothing unless the process has registered an appropriate callback to
deal with the event. This simplifies the programming model to print debug
information, avoids duplicating code, and reduces the effort to print
debug information.
SYSTEM PROCESSES CHANGES:
- Every system process registers SEF callbacks it needs to override the default
system behavior and calls sef_startup() right after being started.
- sef_startup() does almost nothing now, but will be extended in the future to
support callbacks of its own to let RS control and synchronize with every
system process at initialization time.
- Every system process calls sef_receive() now rather than receive() directly,
to let SEF handle predefined system events.
RS CHANGES:
- RS supports a basic single-component live update protocol now, as follows:
* When an update command is issued (via "service update *"), RS notifies the
target system process to prepare for a specific update state.
* If the process doesn't respond back in time, the update is aborted.
* When the process responds back, RS kills it and marks it for refreshing.
* The process is then automatically restarted as for a buggy process and can
start running again.
* Live update is currently prototyped as a controlled failure.
2009-12-21 15:12:21 +01:00
|
|
|
/* No live update support for now. */
|
|
|
|
|
|
|
|
/* Let SEF perform startup. */
|
|
|
|
sef_startup();
|
|
|
|
}
|
|
|
|
|
Initialization protocol for system services.
SYSLIB CHANGES:
- SEF framework now supports a new SEF Init request type from RS. 3 different
callbacks are available (init_fresh, init_lu, init_restart) to specify
initialization code when a service starts fresh, starts after a live update,
or restarts.
SYSTEM SERVICE CHANGES:
- Initialization code for system services is now enclosed in a callback SEF will
automatically call at init time. The return code of the callback will
tell RS whether the initialization completed successfully.
- Each init callback can access information passed by RS to initialize. As of
now, each system service has access to the public entries of RS's system process
table to gather all the information required to initialize. This design
eliminates many existing or potential races at boot time and provides a uniform
initialization interface to system services. The same interface will be reused
for the upcoming publish/subscribe model to handle dynamic
registration / deregistration of system services.
VM CHANGES:
- Uniform privilege management for all system services. Every service uses the
same call mask format. For boot services, VM copies the call mask from init
data. For dynamic services, VM still receives the call mask via rs_set_priv
call that will be soon replaced by the upcoming publish/subscribe model.
RS CHANGES:
- The system process table has been reorganized and split into private entries
and public entries. Only the latter ones are exposed to system services.
- VM call masks are now entirely configured in rs/table.c
- RS has now its own slot in the system process table. Only kernel tasks and
user processes not included in the boot image are now left out from the system
process table.
- RS implements the initialization protocol for system services.
- For services in the boot image, RS blocks till initialization is complete and
panics when failure is reported back. Services are initialized in their order of
appearance in the boot image priv table and RS blocks to implements synchronous
initialization for every system service having the flag SF_SYNCH_BOOT set.
- For services started dynamically, the initialization protocol is implemented
as though it were the first ping for the service. In this case, if the
system service fails to report back (or reports failure), RS brings the service
down rather than trying to restart it.
2010-01-08 02:20:42 +01:00
|
|
|
/*===========================================================================*
|
|
|
|
* sef_cb_init_fresh *
|
|
|
|
*===========================================================================*/
|
2012-03-25 20:25:53 +02:00
|
|
|
static int sef_cb_init_fresh(int UNUSED(type), sef_init_info_t *info)
|
Initialization protocol for system services.
SYSLIB CHANGES:
- SEF framework now supports a new SEF Init request type from RS. 3 different
callbacks are available (init_fresh, init_lu, init_restart) to specify
initialization code when a service starts fresh, starts after a live update,
or restarts.
SYSTEM SERVICE CHANGES:
- Initialization code for system services is now enclosed in a callback SEF will
automatically call at init time. The return code of the callback will
tell RS whether the initialization completed successfully.
- Each init callback can access information passed by RS to initialize. As of
now, each system service has access to the public entries of RS's system process
table to gather all the information required to initialize. This design
eliminates many existing or potential races at boot time and provides a uniform
initialization interface to system services. The same interface will be reused
for the upcoming publish/subscribe model to handle dynamic
registration / deregistration of system services.
VM CHANGES:
- Uniform privilege management for all system services. Every service uses the
same call mask format. For boot services, VM copies the call mask from init
data. For dynamic services, VM still receives the call mask via rs_set_priv
call that will be soon replaced by the upcoming publish/subscribe model.
RS CHANGES:
- The system process table has been reorganized and split into private entries
and public entries. Only the latter ones are exposed to system services.
- VM call masks are now entirely configured in rs/table.c
- RS has now its own slot in the system process table. Only kernel tasks and
user processes not included in the boot image are now left out from the system
process table.
- RS implements the initialization protocol for system services.
- For services in the boot image, RS blocks till initialization is complete and
panics when failure is reported back. Services are initialized in their order of
appearance in the boot image priv table and RS blocks to implements synchronous
initialization for every system service having the flag SF_SYNCH_BOOT set.
- For services started dynamically, the initialization protocol is implemented
as though it were the first ping for the service. In this case, if the
system service fails to report back (or reports failure), RS brings the service
down rather than trying to restart it.
2010-01-08 02:20:42 +01:00
|
|
|
{
|
|
|
|
/* Initialize the virtual file server. */
|
2010-04-09 23:56:44 +02:00
|
|
|
int s, i;
|
2012-02-13 16:28:04 +01:00
|
|
|
struct fproc *rfp;
|
Initialization protocol for system services.
SYSLIB CHANGES:
- SEF framework now supports a new SEF Init request type from RS. 3 different
callbacks are available (init_fresh, init_lu, init_restart) to specify
initialization code when a service starts fresh, starts after a live update,
or restarts.
SYSTEM SERVICE CHANGES:
- Initialization code for system services is now enclosed in a callback SEF will
automatically call at init time. The return code of the callback will
tell RS whether the initialization completed successfully.
- Each init callback can access information passed by RS to initialize. As of
now, each system service has access to the public entries of RS's system process
table to gather all the information required to initialize. This design
eliminates many existing or potential races at boot time and provides a uniform
initialization interface to system services. The same interface will be reused
for the upcoming publish/subscribe model to handle dynamic
registration / deregistration of system services.
VM CHANGES:
- Uniform privilege management for all system services. Every service uses the
same call mask format. For boot services, VM copies the call mask from init
data. For dynamic services, VM still receives the call mask via rs_set_priv
call that will be soon replaced by the upcoming publish/subscribe model.
RS CHANGES:
- The system process table has been reorganized and split into private entries
and public entries. Only the latter ones are exposed to system services.
- VM call masks are now entirely configured in rs/table.c
- RS has now its own slot in the system process table. Only kernel tasks and
user processes not included in the boot image are now left out from the system
process table.
- RS implements the initialization protocol for system services.
- For services in the boot image, RS blocks till initialization is complete and
panics when failure is reported back. Services are initialized in their order of
appearance in the boot image priv table and RS blocks to implements synchronous
initialization for every system service having the flag SF_SYNCH_BOOT set.
- For services started dynamically, the initialization protocol is implemented
as though it were the first ping for the service. In this case, if the
system service fails to report back (or reports failure), RS brings the service
down rather than trying to restart it.
2010-01-08 02:20:42 +01:00
|
|
|
message mess;
|
2010-04-09 23:56:44 +02:00
|
|
|
struct rprocpub rprocpub[NR_BOOT_PROCS];
|
Initialization protocol for system services.
SYSLIB CHANGES:
- SEF framework now supports a new SEF Init request type from RS. 3 different
callbacks are available (init_fresh, init_lu, init_restart) to specify
initialization code when a service starts fresh, starts after a live update,
or restarts.
SYSTEM SERVICE CHANGES:
- Initialization code for system services is now enclosed in a callback SEF will
automatically call at init time. The return code of the callback will
tell RS whether the initialization completed successfully.
- Each init callback can access information passed by RS to initialize. As of
now, each system service has access to the public entries of RS's system process
table to gather all the information required to initialize. This design
eliminates many existing or potential races at boot time and provides a uniform
initialization interface to system services. The same interface will be reused
for the upcoming publish/subscribe model to handle dynamic
registration / deregistration of system services.
VM CHANGES:
- Uniform privilege management for all system services. Every service uses the
same call mask format. For boot services, VM copies the call mask from init
data. For dynamic services, VM still receives the call mask via rs_set_priv
call that will be soon replaced by the upcoming publish/subscribe model.
RS CHANGES:
- The system process table has been reorganized and split into private entries
and public entries. Only the latter ones are exposed to system services.
- VM call masks are now entirely configured in rs/table.c
- RS has now its own slot in the system process table. Only kernel tasks and
user processes not included in the boot image are now left out from the system
process table.
- RS implements the initialization protocol for system services.
- For services in the boot image, RS blocks till initialization is complete and
panics when failure is reported back. Services are initialized in their order of
appearance in the boot image priv table and RS blocks to implements synchronous
initialization for every system service having the flag SF_SYNCH_BOOT set.
- For services started dynamically, the initialization protocol is implemented
as though it were the first ping for the service. In this case, if the
system service fails to report back (or reports failure), RS brings the service
down rather than trying to restart it.
2010-01-08 02:20:42 +01:00
|
|
|
|
2012-02-13 16:28:04 +01:00
|
|
|
force_sync = 0;
|
|
|
|
receive_from = ANY;
|
2012-04-13 14:50:38 +02:00
|
|
|
self = NULL;
|
2012-03-30 11:24:44 +02:00
|
|
|
verbose = 0;
|
2012-02-13 16:28:04 +01:00
|
|
|
|
|
|
|
/* Initialize proc endpoints to NONE */
|
|
|
|
for (rfp = &fproc[0]; rfp < &fproc[NR_PROCS]; rfp++) {
|
|
|
|
rfp->fp_endpoint = NONE;
|
|
|
|
rfp->fp_pid = PID_FREE;
|
|
|
|
}
|
Initialization protocol for system services.
SYSLIB CHANGES:
- SEF framework now supports a new SEF Init request type from RS. 3 different
callbacks are available (init_fresh, init_lu, init_restart) to specify
initialization code when a service starts fresh, starts after a live update,
or restarts.
SYSTEM SERVICE CHANGES:
- Initialization code for system services is now enclosed in a callback SEF will
automatically call at init time. The return code of the callback will
tell RS whether the initialization completed successfully.
- Each init callback can access information passed by RS to initialize. As of
now, each system service has access to the public entries of RS's system process
table to gather all the information required to initialize. This design
eliminates many existing or potential races at boot time and provides a uniform
initialization interface to system services. The same interface will be reused
for the upcoming publish/subscribe model to handle dynamic
registration / deregistration of system services.
VM CHANGES:
- Uniform privilege management for all system services. Every service uses the
same call mask format. For boot services, VM copies the call mask from init
data. For dynamic services, VM still receives the call mask via rs_set_priv
call that will be soon replaced by the upcoming publish/subscribe model.
RS CHANGES:
- The system process table has been reorganized and split into private entries
and public entries. Only the latter ones are exposed to system services.
- VM call masks are now entirely configured in rs/table.c
- RS has now its own slot in the system process table. Only kernel tasks and
user processes not included in the boot image are now left out from the system
process table.
- RS implements the initialization protocol for system services.
- For services in the boot image, RS blocks till initialization is complete and
panics when failure is reported back. Services are initialized in their order of
appearance in the boot image priv table and RS blocks to implements synchronous
initialization for every system service having the flag SF_SYNCH_BOOT set.
- For services started dynamically, the initialization protocol is implemented
as though it were the first ping for the service. In this case, if the
system service fails to report back (or reports failure), RS brings the service
down rather than trying to restart it.
2010-01-08 02:20:42 +01:00
|
|
|
|
2012-02-13 16:28:04 +01:00
|
|
|
/* Initialize the process table with help of the process manager messages.
|
|
|
|
* Expect one message for each system process with its slot number and pid.
|
|
|
|
* When no more processes follow, the magic process number NONE is sent.
|
Initialization protocol for system services.
SYSLIB CHANGES:
- SEF framework now supports a new SEF Init request type from RS. 3 different
callbacks are available (init_fresh, init_lu, init_restart) to specify
initialization code when a service starts fresh, starts after a live update,
or restarts.
SYSTEM SERVICE CHANGES:
- Initialization code for system services is now enclosed in a callback SEF will
automatically call at init time. The return code of the callback will
tell RS whether the initialization completed successfully.
- Each init callback can access information passed by RS to initialize. As of
now, each system service has access to the public entries of RS's system process
table to gather all the information required to initialize. This design
eliminates many existing or potential races at boot time and provides a uniform
initialization interface to system services. The same interface will be reused
for the upcoming publish/subscribe model to handle dynamic
registration / deregistration of system services.
VM CHANGES:
- Uniform privilege management for all system services. Every service uses the
same call mask format. For boot services, VM copies the call mask from init
data. For dynamic services, VM still receives the call mask via rs_set_priv
call that will be soon replaced by the upcoming publish/subscribe model.
RS CHANGES:
- The system process table has been reorganized and split into private entries
and public entries. Only the latter ones are exposed to system services.
- VM call masks are now entirely configured in rs/table.c
- RS has now its own slot in the system process table. Only kernel tasks and
user processes not included in the boot image are now left out from the system
process table.
- RS implements the initialization protocol for system services.
- For services in the boot image, RS blocks till initialization is complete and
panics when failure is reported back. Services are initialized in their order of
appearance in the boot image priv table and RS blocks to implements synchronous
initialization for every system service having the flag SF_SYNCH_BOOT set.
- For services started dynamically, the initialization protocol is implemented
as though it were the first ping for the service. In this case, if the
system service fails to report back (or reports failure), RS brings the service
down rather than trying to restart it.
2010-01-08 02:20:42 +01:00
|
|
|
* Then, stop and synchronize with the PM.
|
|
|
|
*/
|
|
|
|
do {
|
2012-02-13 16:28:04 +01:00
|
|
|
if ((s = sef_receive(PM_PROC_NR, &mess)) != OK)
|
|
|
|
panic("VFS: couldn't receive from PM: %d", s);
|
Initialization protocol for system services.
SYSLIB CHANGES:
- SEF framework now supports a new SEF Init request type from RS. 3 different
callbacks are available (init_fresh, init_lu, init_restart) to specify
initialization code when a service starts fresh, starts after a live update,
or restarts.
SYSTEM SERVICE CHANGES:
- Initialization code for system services is now enclosed in a callback SEF will
automatically call at init time. The return code of the callback will
tell RS whether the initialization completed successfully.
- Each init callback can access information passed by RS to initialize. As of
now, each system service has access to the public entries of RS's system process
table to gather all the information required to initialize. This design
eliminates many existing or potential races at boot time and provides a uniform
initialization interface to system services. The same interface will be reused
for the upcoming publish/subscribe model to handle dynamic
registration / deregistration of system services.
VM CHANGES:
- Uniform privilege management for all system services. Every service uses the
same call mask format. For boot services, VM copies the call mask from init
data. For dynamic services, VM still receives the call mask via rs_set_priv
call that will be soon replaced by the upcoming publish/subscribe model.
RS CHANGES:
- The system process table has been reorganized and split into private entries
and public entries. Only the latter ones are exposed to system services.
- VM call masks are now entirely configured in rs/table.c
- RS has now its own slot in the system process table. Only kernel tasks and
user processes not included in the boot image are now left out from the system
process table.
- RS implements the initialization protocol for system services.
- For services in the boot image, RS blocks till initialization is complete and
panics when failure is reported back. Services are initialized in their order of
appearance in the boot image priv table and RS blocks to implements synchronous
initialization for every system service having the flag SF_SYNCH_BOOT set.
- For services started dynamically, the initialization protocol is implemented
as though it were the first ping for the service. In this case, if the
system service fails to report back (or reports failure), RS brings the service
down rather than trying to restart it.
2010-01-08 02:20:42 +01:00
|
|
|
|
|
|
|
if (mess.m_type != PM_INIT)
|
2010-03-05 16:05:11 +01:00
|
|
|
panic("unexpected message from PM: %d", mess.m_type);
|
Initialization protocol for system services.
SYSLIB CHANGES:
- SEF framework now supports a new SEF Init request type from RS. 3 different
callbacks are available (init_fresh, init_lu, init_restart) to specify
initialization code when a service starts fresh, starts after a live update,
or restarts.
SYSTEM SERVICE CHANGES:
- Initialization code for system services is now enclosed in a callback SEF will
automatically call at init time. The return code of the callback will
tell RS whether the initialization completed successfully.
- Each init callback can access information passed by RS to initialize. As of
now, each system service has access to the public entries of RS's system process
table to gather all the information required to initialize. This design
eliminates many existing or potential races at boot time and provides a uniform
initialization interface to system services. The same interface will be reused
for the upcoming publish/subscribe model to handle dynamic
registration / deregistration of system services.
VM CHANGES:
- Uniform privilege management for all system services. Every service uses the
same call mask format. For boot services, VM copies the call mask from init
data. For dynamic services, VM still receives the call mask via rs_set_priv
call that will be soon replaced by the upcoming publish/subscribe model.
RS CHANGES:
- The system process table has been reorganized and split into private entries
and public entries. Only the latter ones are exposed to system services.
- VM call masks are now entirely configured in rs/table.c
- RS has now its own slot in the system process table. Only kernel tasks and
user processes not included in the boot image are now left out from the system
process table.
- RS implements the initialization protocol for system services.
- For services in the boot image, RS blocks till initialization is complete and
panics when failure is reported back. Services are initialized in their order of
appearance in the boot image priv table and RS blocks to implements synchronous
initialization for every system service having the flag SF_SYNCH_BOOT set.
- For services started dynamically, the initialization protocol is implemented
as though it were the first ping for the service. In this case, if the
system service fails to report back (or reports failure), RS brings the service
down rather than trying to restart it.
2010-01-08 02:20:42 +01:00
|
|
|
|
2012-02-13 16:28:04 +01:00
|
|
|
if (NONE == mess.PM_PROC) break;
|
Initialization protocol for system services.
SYSLIB CHANGES:
- SEF framework now supports a new SEF Init request type from RS. 3 different
callbacks are available (init_fresh, init_lu, init_restart) to specify
initialization code when a service starts fresh, starts after a live update,
or restarts.
SYSTEM SERVICE CHANGES:
- Initialization code for system services is now enclosed in a callback SEF will
automatically call at init time. The return code of the callback will
tell RS whether the initialization completed successfully.
- Each init callback can access information passed by RS to initialize. As of
now, each system service has access to the public entries of RS's system process
table to gather all the information required to initialize. This design
eliminates many existing or potential races at boot time and provides a uniform
initialization interface to system services. The same interface will be reused
for the upcoming publish/subscribe model to handle dynamic
registration / deregistration of system services.
VM CHANGES:
- Uniform privilege management for all system services. Every service uses the
same call mask format. For boot services, VM copies the call mask from init
data. For dynamic services, VM still receives the call mask via rs_set_priv
call that will be soon replaced by the upcoming publish/subscribe model.
RS CHANGES:
- The system process table has been reorganized and split into private entries
and public entries. Only the latter ones are exposed to system services.
- VM call masks are now entirely configured in rs/table.c
- RS has now its own slot in the system process table. Only kernel tasks and
user processes not included in the boot image are now left out from the system
process table.
- RS implements the initialization protocol for system services.
- For services in the boot image, RS blocks till initialization is complete and
panics when failure is reported back. Services are initialized in their order of
appearance in the boot image priv table and RS blocks to implements synchronous
initialization for every system service having the flag SF_SYNCH_BOOT set.
- For services started dynamically, the initialization protocol is implemented
as though it were the first ping for the service. In this case, if the
system service fails to report back (or reports failure), RS brings the service
down rather than trying to restart it.
2010-01-08 02:20:42 +01:00
|
|
|
|
|
|
|
rfp = &fproc[mess.PM_SLOT];
|
2012-02-13 16:28:04 +01:00
|
|
|
rfp->fp_flags = FP_NOFLAGS;
|
Initialization protocol for system services.
SYSLIB CHANGES:
- SEF framework now supports a new SEF Init request type from RS. 3 different
callbacks are available (init_fresh, init_lu, init_restart) to specify
initialization code when a service starts fresh, starts after a live update,
or restarts.
SYSTEM SERVICE CHANGES:
- Initialization code for system services is now enclosed in a callback SEF will
automatically call at init time. The return code of the callback will
tell RS whether the initialization completed successfully.
- Each init callback can access information passed by RS to initialize. As of
now, each system service has access to the public entries of RS's system process
table to gather all the information required to initialize. This design
eliminates many existing or potential races at boot time and provides a uniform
initialization interface to system services. The same interface will be reused
for the upcoming publish/subscribe model to handle dynamic
registration / deregistration of system services.
VM CHANGES:
- Uniform privilege management for all system services. Every service uses the
same call mask format. For boot services, VM copies the call mask from init
data. For dynamic services, VM still receives the call mask via rs_set_priv
call that will be soon replaced by the upcoming publish/subscribe model.
RS CHANGES:
- The system process table has been reorganized and split into private entries
and public entries. Only the latter ones are exposed to system services.
- VM call masks are now entirely configured in rs/table.c
- RS has now its own slot in the system process table. Only kernel tasks and
user processes not included in the boot image are now left out from the system
process table.
- RS implements the initialization protocol for system services.
- For services in the boot image, RS blocks till initialization is complete and
panics when failure is reported back. Services are initialized in their order of
appearance in the boot image priv table and RS blocks to implements synchronous
initialization for every system service having the flag SF_SYNCH_BOOT set.
- For services started dynamically, the initialization protocol is implemented
as though it were the first ping for the service. In this case, if the
system service fails to report back (or reports failure), RS brings the service
down rather than trying to restart it.
2010-01-08 02:20:42 +01:00
|
|
|
rfp->fp_pid = mess.PM_PID;
|
|
|
|
rfp->fp_endpoint = mess.PM_PROC;
|
2012-02-13 16:28:04 +01:00
|
|
|
rfp->fp_grant = GRANT_INVALID;
|
|
|
|
rfp->fp_blocked_on = FP_BLOCKED_ON_NONE;
|
Initialization protocol for system services.
SYSLIB CHANGES:
- SEF framework now supports a new SEF Init request type from RS. 3 different
callbacks are available (init_fresh, init_lu, init_restart) to specify
initialization code when a service starts fresh, starts after a live update,
or restarts.
SYSTEM SERVICE CHANGES:
- Initialization code for system services is now enclosed in a callback SEF will
automatically call at init time. The return code of the callback will
tell RS whether the initialization completed successfully.
- Each init callback can access information passed by RS to initialize. As of
now, each system service has access to the public entries of RS's system process
table to gather all the information required to initialize. This design
eliminates many existing or potential races at boot time and provides a uniform
initialization interface to system services. The same interface will be reused
for the upcoming publish/subscribe model to handle dynamic
registration / deregistration of system services.
VM CHANGES:
- Uniform privilege management for all system services. Every service uses the
same call mask format. For boot services, VM copies the call mask from init
data. For dynamic services, VM still receives the call mask via rs_set_priv
call that will be soon replaced by the upcoming publish/subscribe model.
RS CHANGES:
- The system process table has been reorganized and split into private entries
and public entries. Only the latter ones are exposed to system services.
- VM call masks are now entirely configured in rs/table.c
- RS has now its own slot in the system process table. Only kernel tasks and
user processes not included in the boot image are now left out from the system
process table.
- RS implements the initialization protocol for system services.
- For services in the boot image, RS blocks till initialization is complete and
panics when failure is reported back. Services are initialized in their order of
appearance in the boot image priv table and RS blocks to implements synchronous
initialization for every system service having the flag SF_SYNCH_BOOT set.
- For services started dynamically, the initialization protocol is implemented
as though it were the first ping for the service. In this case, if the
system service fails to report back (or reports failure), RS brings the service
down rather than trying to restart it.
2010-01-08 02:20:42 +01:00
|
|
|
rfp->fp_realuid = (uid_t) SYS_UID;
|
|
|
|
rfp->fp_effuid = (uid_t) SYS_UID;
|
|
|
|
rfp->fp_realgid = (gid_t) SYS_GID;
|
|
|
|
rfp->fp_effgid = (gid_t) SYS_GID;
|
|
|
|
rfp->fp_umask = ~0;
|
|
|
|
} while (TRUE); /* continue until process NONE */
|
|
|
|
mess.m_type = OK; /* tell PM that we succeeded */
|
|
|
|
s = send(PM_PROC_NR, &mess); /* send synchronization message */
|
|
|
|
|
2010-07-22 16:55:28 +02:00
|
|
|
/* All process table entries have been set. Continue with initialization. */
|
2012-02-13 16:28:04 +01:00
|
|
|
fp = &fproc[_ENDPOINT_P(VFS_PROC_NR)];/* During init all communication with
|
|
|
|
* FSes is on behalf of myself */
|
|
|
|
init_dmap(); /* Initialize device table. */
|
|
|
|
system_hz = sys_hz();
|
2010-04-09 23:56:44 +02:00
|
|
|
|
|
|
|
/* Map all the services in the boot image. */
|
2012-02-13 16:28:04 +01:00
|
|
|
if ((s = sys_safecopyfrom(RS_PROC_NR, info->rproctab_gid, 0,
|
2012-06-16 03:46:15 +02:00
|
|
|
(vir_bytes) rprocpub, sizeof(rprocpub))) != OK){
|
2010-04-09 23:56:44 +02:00
|
|
|
panic("sys_safecopyfrom failed: %d", s);
|
|
|
|
}
|
2012-02-13 16:28:04 +01:00
|
|
|
for (i = 0; i < NR_BOOT_PROCS; i++) {
|
|
|
|
if (rprocpub[i].in_use) {
|
|
|
|
if ((s = map_service(&rprocpub[i])) != OK) {
|
|
|
|
panic("VFS: unable to map service: %d", s);
|
2010-04-09 23:56:44 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-02-13 16:28:04 +01:00
|
|
|
/* Subscribe to block and character driver events. */
|
|
|
|
s = ds_subscribe("drv\\.[bc]..\\..*", DSF_INITIAL | DSF_OVERWRITE);
|
|
|
|
if (s != OK) panic("VFS: can't subscribe to driver events (%d)", s);
|
|
|
|
|
|
|
|
/* Initialize worker threads */
|
|
|
|
for (i = 0; i < NR_WTHREADS; i++) {
|
|
|
|
worker_init(&workers[i]);
|
|
|
|
}
|
|
|
|
worker_init(&sys_worker); /* exclusive system worker thread */
|
|
|
|
worker_init(&dl_worker); /* exclusive worker thread to resolve deadlocks */
|
|
|
|
|
|
|
|
/* Initialize global locks */
|
|
|
|
if (mthread_mutex_init(&pm_lock, NULL) != 0)
|
|
|
|
panic("VFS: couldn't initialize pm lock mutex");
|
|
|
|
if (mthread_mutex_init(&exec_lock, NULL) != 0)
|
|
|
|
panic("VFS: couldn't initialize exec lock");
|
|
|
|
if (mthread_mutex_init(&bsf_lock, NULL) != 0)
|
|
|
|
panic("VFS: couldn't initialize block special file lock");
|
|
|
|
|
|
|
|
/* Initialize event resources for boot procs and locks for all procs */
|
|
|
|
for (rfp = &fproc[0]; rfp < &fproc[NR_PROCS]; rfp++) {
|
|
|
|
if (mutex_init(&rfp->fp_lock, NULL) != 0)
|
|
|
|
panic("unable to initialize fproc lock");
|
|
|
|
#if LOCK_DEBUG
|
|
|
|
rfp->fp_vp_rdlocks = 0;
|
|
|
|
rfp->fp_vmnt_rdlocks = 0;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
VFS: make all IPC asynchronous
By decoupling synchronous drivers from VFS, we are a big step closer to
supporting driver crashes under all circumstances. That is, VFS can't
become stuck on IPC with a synchronous driver (e.g., INET) and can
recover from crashing block drivers during open/close/ioctl or during
communication with an FS.
In order to maintain serialized communication with a synchronous driver,
the communication is wrapped by a mutex on a per driver basis (not major
numbers as there can be multiple majors with identical endpoints). Majors
that share a driver endpoint point to a single mutex object.
In order to support crashes from block drivers, the file reopen tactic
had to be changed; first reopen files associated with the crashed
driver, then send the new driver endpoint to FSes. This solves a
deadlock between the FS and the block driver;
- VFS would send REQ_NEW_DRIVER to an FS, but he FS only receives it
after retrying the current request to the newly started driver.
- The block driver would refuse the retried request until all files
had been reopened.
- VFS would reopen files only after getting a reply from the initial
REQ_NEW_DRIVER.
When a character special driver crashes, all associated files have to
be marked invalid and closed (or reopened if flagged as such). However,
they can only be closed if a thread holds exclusive access to it. To
obtain exclusive access, the worker thread (which handles the new driver
endpoint event from DS) schedules a new job to garbage collect invalid
files. This way, we can signal the worker thread that was talking to the
crashed driver and will release exclusive access to a file associated
with the crashed driver and prevent the garbage collecting worker thread
from dead locking on that file.
Also, when a character special driver crashes, RS will unmap the driver
and remap it upon restart. During unmapping, associated files are marked
invalid instead of waiting for an endpoint up event from DS, as that
event might come later than new read/write/select requests and thus
cause confusion in the freshly started driver.
When locking a filp, the usage counters are no longer checked. The usage
counter can legally go down to zero during filp invalidation while there
are locks pending.
DS events are handled by a separate worker thread instead of the main
thread as reopening files could lead to another crash and a stuck thread.
An additional worker thread is then necessary to unlock it.
Finally, with everything asynchronous a race condition in do_select
surfaced. A select entry was only marked in use after succesfully sending
initial select requests to drivers and having to wait. When multiple
select() calls were handled there was opportunity that these entries
were overwritten. This had as effect that some select results were
ignored (and select() remained blocking instead if returning) or do_select
tried to access filps that were not present (because thrown away by
secondary select()). This bug manifested itself with sendrecs, but was
very hard to reproduce. However, it became awfully easy to trigger with
asynsends only.
2012-08-28 16:06:51 +02:00
|
|
|
init_dmap_locks(); /* init dmap locks */
|
2012-02-13 16:28:04 +01:00
|
|
|
init_vnodes(); /* init vnodes */
|
|
|
|
init_vmnts(); /* init vmnt structures */
|
Initialization protocol for system services.
SYSLIB CHANGES:
- SEF framework now supports a new SEF Init request type from RS. 3 different
callbacks are available (init_fresh, init_lu, init_restart) to specify
initialization code when a service starts fresh, starts after a live update,
or restarts.
SYSTEM SERVICE CHANGES:
- Initialization code for system services is now enclosed in a callback SEF will
automatically call at init time. The return code of the callback will
tell RS whether the initialization completed successfully.
- Each init callback can access information passed by RS to initialize. As of
now, each system service has access to the public entries of RS's system process
table to gather all the information required to initialize. This design
eliminates many existing or potential races at boot time and provides a uniform
initialization interface to system services. The same interface will be reused
for the upcoming publish/subscribe model to handle dynamic
registration / deregistration of system services.
VM CHANGES:
- Uniform privilege management for all system services. Every service uses the
same call mask format. For boot services, VM copies the call mask from init
data. For dynamic services, VM still receives the call mask via rs_set_priv
call that will be soon replaced by the upcoming publish/subscribe model.
RS CHANGES:
- The system process table has been reorganized and split into private entries
and public entries. Only the latter ones are exposed to system services.
- VM call masks are now entirely configured in rs/table.c
- RS has now its own slot in the system process table. Only kernel tasks and
user processes not included in the boot image are now left out from the system
process table.
- RS implements the initialization protocol for system services.
- For services in the boot image, RS blocks till initialization is complete and
panics when failure is reported back. Services are initialized in their order of
appearance in the boot image priv table and RS blocks to implements synchronous
initialization for every system service having the flag SF_SYNCH_BOOT set.
- For services started dynamically, the initialization protocol is implemented
as though it were the first ping for the service. In this case, if the
system service fails to report back (or reports failure), RS brings the service
down rather than trying to restart it.
2010-01-08 02:20:42 +01:00
|
|
|
init_select(); /* init select() structures */
|
2012-02-13 16:28:04 +01:00
|
|
|
init_filps(); /* Init filp structures */
|
|
|
|
mount_pfs(); /* mount Pipe File Server */
|
|
|
|
worker_start(do_init_root); /* mount initial ramdisk as file system root */
|
|
|
|
yield(); /* force do_init_root to start */
|
2012-04-13 14:50:38 +02:00
|
|
|
self = NULL;
|
2012-02-13 16:28:04 +01:00
|
|
|
|
|
|
|
return(OK);
|
|
|
|
}
|
Initialization protocol for system services.
SYSLIB CHANGES:
- SEF framework now supports a new SEF Init request type from RS. 3 different
callbacks are available (init_fresh, init_lu, init_restart) to specify
initialization code when a service starts fresh, starts after a live update,
or restarts.
SYSTEM SERVICE CHANGES:
- Initialization code for system services is now enclosed in a callback SEF will
automatically call at init time. The return code of the callback will
tell RS whether the initialization completed successfully.
- Each init callback can access information passed by RS to initialize. As of
now, each system service has access to the public entries of RS's system process
table to gather all the information required to initialize. This design
eliminates many existing or potential races at boot time and provides a uniform
initialization interface to system services. The same interface will be reused
for the upcoming publish/subscribe model to handle dynamic
registration / deregistration of system services.
VM CHANGES:
- Uniform privilege management for all system services. Every service uses the
same call mask format. For boot services, VM copies the call mask from init
data. For dynamic services, VM still receives the call mask via rs_set_priv
call that will be soon replaced by the upcoming publish/subscribe model.
RS CHANGES:
- The system process table has been reorganized and split into private entries
and public entries. Only the latter ones are exposed to system services.
- VM call masks are now entirely configured in rs/table.c
- RS has now its own slot in the system process table. Only kernel tasks and
user processes not included in the boot image are now left out from the system
process table.
- RS implements the initialization protocol for system services.
- For services in the boot image, RS blocks till initialization is complete and
panics when failure is reported back. Services are initialized in their order of
appearance in the boot image priv table and RS blocks to implements synchronous
initialization for every system service having the flag SF_SYNCH_BOOT set.
- For services started dynamically, the initialization protocol is implemented
as though it were the first ping for the service. In this case, if the
system service fails to report back (or reports failure), RS brings the service
down rather than trying to restart it.
2010-01-08 02:20:42 +01:00
|
|
|
|
2012-02-13 16:28:04 +01:00
|
|
|
/*===========================================================================*
|
|
|
|
* do_init_root *
|
|
|
|
*===========================================================================*/
|
2012-03-25 20:25:53 +02:00
|
|
|
static void *do_init_root(void *arg)
|
2012-02-13 16:28:04 +01:00
|
|
|
{
|
|
|
|
struct fproc *rfp;
|
|
|
|
struct job my_job;
|
|
|
|
int r;
|
|
|
|
char *mount_label = "fs_imgrd"; /* FIXME: obtain this from RS */
|
|
|
|
|
|
|
|
my_job = *((struct job *) arg);
|
|
|
|
fp = my_job.j_fp;
|
Initialization protocol for system services.
SYSLIB CHANGES:
- SEF framework now supports a new SEF Init request type from RS. 3 different
callbacks are available (init_fresh, init_lu, init_restart) to specify
initialization code when a service starts fresh, starts after a live update,
or restarts.
SYSTEM SERVICE CHANGES:
- Initialization code for system services is now enclosed in a callback SEF will
automatically call at init time. The return code of the callback will
tell RS whether the initialization completed successfully.
- Each init callback can access information passed by RS to initialize. As of
now, each system service has access to the public entries of RS's system process
table to gather all the information required to initialize. This design
eliminates many existing or potential races at boot time and provides a uniform
initialization interface to system services. The same interface will be reused
for the upcoming publish/subscribe model to handle dynamic
registration / deregistration of system services.
VM CHANGES:
- Uniform privilege management for all system services. Every service uses the
same call mask format. For boot services, VM copies the call mask from init
data. For dynamic services, VM still receives the call mask via rs_set_priv
call that will be soon replaced by the upcoming publish/subscribe model.
RS CHANGES:
- The system process table has been reorganized and split into private entries
and public entries. Only the latter ones are exposed to system services.
- VM call masks are now entirely configured in rs/table.c
- RS has now its own slot in the system process table. Only kernel tasks and
user processes not included in the boot image are now left out from the system
process table.
- RS implements the initialization protocol for system services.
- For services in the boot image, RS blocks till initialization is complete and
panics when failure is reported back. Services are initialized in their order of
appearance in the boot image priv table and RS blocks to implements synchronous
initialization for every system service having the flag SF_SYNCH_BOOT set.
- For services started dynamically, the initialization protocol is implemented
as though it were the first ping for the service. In this case, if the
system service fails to report back (or reports failure), RS brings the service
down rather than trying to restart it.
2010-01-08 02:20:42 +01:00
|
|
|
|
2012-02-13 16:28:04 +01:00
|
|
|
lock_proc(fp, 1 /* force lock */); /* This proc is busy */
|
|
|
|
lock_pm();
|
Initialization protocol for system services.
SYSLIB CHANGES:
- SEF framework now supports a new SEF Init request type from RS. 3 different
callbacks are available (init_fresh, init_lu, init_restart) to specify
initialization code when a service starts fresh, starts after a live update,
or restarts.
SYSTEM SERVICE CHANGES:
- Initialization code for system services is now enclosed in a callback SEF will
automatically call at init time. The return code of the callback will
tell RS whether the initialization completed successfully.
- Each init callback can access information passed by RS to initialize. As of
now, each system service has access to the public entries of RS's system process
table to gather all the information required to initialize. This design
eliminates many existing or potential races at boot time and provides a uniform
initialization interface to system services. The same interface will be reused
for the upcoming publish/subscribe model to handle dynamic
registration / deregistration of system services.
VM CHANGES:
- Uniform privilege management for all system services. Every service uses the
same call mask format. For boot services, VM copies the call mask from init
data. For dynamic services, VM still receives the call mask via rs_set_priv
call that will be soon replaced by the upcoming publish/subscribe model.
RS CHANGES:
- The system process table has been reorganized and split into private entries
and public entries. Only the latter ones are exposed to system services.
- VM call masks are now entirely configured in rs/table.c
- RS has now its own slot in the system process table. Only kernel tasks and
user processes not included in the boot image are now left out from the system
process table.
- RS implements the initialization protocol for system services.
- For services in the boot image, RS blocks till initialization is complete and
panics when failure is reported back. Services are initialized in their order of
appearance in the boot image priv table and RS blocks to implements synchronous
initialization for every system service having the flag SF_SYNCH_BOOT set.
- For services started dynamically, the initialization protocol is implemented
as though it were the first ping for the service. In this case, if the
system service fails to report back (or reports failure), RS brings the service
down rather than trying to restart it.
2010-01-08 02:20:42 +01:00
|
|
|
|
2012-02-13 16:28:04 +01:00
|
|
|
/* Initialize process directories. mount_fs will set them to the correct
|
|
|
|
* values */
|
|
|
|
for (rfp = &fproc[0]; rfp < &fproc[NR_PROCS]; rfp++) {
|
Initialization protocol for system services.
SYSLIB CHANGES:
- SEF framework now supports a new SEF Init request type from RS. 3 different
callbacks are available (init_fresh, init_lu, init_restart) to specify
initialization code when a service starts fresh, starts after a live update,
or restarts.
SYSTEM SERVICE CHANGES:
- Initialization code for system services is now enclosed in a callback SEF will
automatically call at init time. The return code of the callback will
tell RS whether the initialization completed successfully.
- Each init callback can access information passed by RS to initialize. As of
now, each system service has access to the public entries of RS's system process
table to gather all the information required to initialize. This design
eliminates many existing or potential races at boot time and provides a uniform
initialization interface to system services. The same interface will be reused
for the upcoming publish/subscribe model to handle dynamic
registration / deregistration of system services.
VM CHANGES:
- Uniform privilege management for all system services. Every service uses the
same call mask format. For boot services, VM copies the call mask from init
data. For dynamic services, VM still receives the call mask via rs_set_priv
call that will be soon replaced by the upcoming publish/subscribe model.
RS CHANGES:
- The system process table has been reorganized and split into private entries
and public entries. Only the latter ones are exposed to system services.
- VM call masks are now entirely configured in rs/table.c
- RS has now its own slot in the system process table. Only kernel tasks and
user processes not included in the boot image are now left out from the system
process table.
- RS implements the initialization protocol for system services.
- For services in the boot image, RS blocks till initialization is complete and
panics when failure is reported back. Services are initialized in their order of
appearance in the boot image priv table and RS blocks to implements synchronous
initialization for every system service having the flag SF_SYNCH_BOOT set.
- For services started dynamically, the initialization protocol is implemented
as though it were the first ping for the service. In this case, if the
system service fails to report back (or reports failure), RS brings the service
down rather than trying to restart it.
2010-01-08 02:20:42 +01:00
|
|
|
FD_ZERO(&(rfp->fp_filp_inuse));
|
2012-02-13 16:28:04 +01:00
|
|
|
rfp->fp_rd = NULL;
|
|
|
|
rfp->fp_wd = NULL;
|
Initialization protocol for system services.
SYSLIB CHANGES:
- SEF framework now supports a new SEF Init request type from RS. 3 different
callbacks are available (init_fresh, init_lu, init_restart) to specify
initialization code when a service starts fresh, starts after a live update,
or restarts.
SYSTEM SERVICE CHANGES:
- Initialization code for system services is now enclosed in a callback SEF will
automatically call at init time. The return code of the callback will
tell RS whether the initialization completed successfully.
- Each init callback can access information passed by RS to initialize. As of
now, each system service has access to the public entries of RS's system process
table to gather all the information required to initialize. This design
eliminates many existing or potential races at boot time and provides a uniform
initialization interface to system services. The same interface will be reused
for the upcoming publish/subscribe model to handle dynamic
registration / deregistration of system services.
VM CHANGES:
- Uniform privilege management for all system services. Every service uses the
same call mask format. For boot services, VM copies the call mask from init
data. For dynamic services, VM still receives the call mask via rs_set_priv
call that will be soon replaced by the upcoming publish/subscribe model.
RS CHANGES:
- The system process table has been reorganized and split into private entries
and public entries. Only the latter ones are exposed to system services.
- VM call masks are now entirely configured in rs/table.c
- RS has now its own slot in the system process table. Only kernel tasks and
user processes not included in the boot image are now left out from the system
process table.
- RS implements the initialization protocol for system services.
- For services in the boot image, RS blocks till initialization is complete and
panics when failure is reported back. Services are initialized in their order of
appearance in the boot image priv table and RS blocks to implements synchronous
initialization for every system service having the flag SF_SYNCH_BOOT set.
- For services started dynamically, the initialization protocol is implemented
as though it were the first ping for the service. In this case, if the
system service fails to report back (or reports failure), RS brings the service
down rather than trying to restart it.
2010-01-08 02:20:42 +01:00
|
|
|
}
|
|
|
|
|
2012-02-13 16:28:04 +01:00
|
|
|
receive_from = MFS_PROC_NR;
|
2012-11-22 23:00:00 +01:00
|
|
|
r = mount_fs(DEV_IMGRD, "bootramdisk", "/", MFS_PROC_NR, 0, mount_label);
|
|
|
|
if (r != OK)
|
2012-02-13 16:28:04 +01:00
|
|
|
panic("Failed to initialize root");
|
|
|
|
receive_from = ANY;
|
Initialization protocol for system services.
SYSLIB CHANGES:
- SEF framework now supports a new SEF Init request type from RS. 3 different
callbacks are available (init_fresh, init_lu, init_restart) to specify
initialization code when a service starts fresh, starts after a live update,
or restarts.
SYSTEM SERVICE CHANGES:
- Initialization code for system services is now enclosed in a callback SEF will
automatically call at init time. The return code of the callback will
tell RS whether the initialization completed successfully.
- Each init callback can access information passed by RS to initialize. As of
now, each system service has access to the public entries of RS's system process
table to gather all the information required to initialize. This design
eliminates many existing or potential races at boot time and provides a uniform
initialization interface to system services. The same interface will be reused
for the upcoming publish/subscribe model to handle dynamic
registration / deregistration of system services.
VM CHANGES:
- Uniform privilege management for all system services. Every service uses the
same call mask format. For boot services, VM copies the call mask from init
data. For dynamic services, VM still receives the call mask via rs_set_priv
call that will be soon replaced by the upcoming publish/subscribe model.
RS CHANGES:
- The system process table has been reorganized and split into private entries
and public entries. Only the latter ones are exposed to system services.
- VM call masks are now entirely configured in rs/table.c
- RS has now its own slot in the system process table. Only kernel tasks and
user processes not included in the boot image are now left out from the system
process table.
- RS implements the initialization protocol for system services.
- For services in the boot image, RS blocks till initialization is complete and
panics when failure is reported back. Services are initialized in their order of
appearance in the boot image priv table and RS blocks to implements synchronous
initialization for every system service having the flag SF_SYNCH_BOOT set.
- For services started dynamically, the initialization protocol is implemented
as though it were the first ping for the service. In this case, if the
system service fails to report back (or reports failure), RS brings the service
down rather than trying to restart it.
2010-01-08 02:20:42 +01:00
|
|
|
|
2012-02-13 16:28:04 +01:00
|
|
|
unlock_pm();
|
|
|
|
thread_cleanup(fp);
|
2012-11-14 14:12:37 +01:00
|
|
|
unlock_proc(fp);
|
2012-02-13 16:28:04 +01:00
|
|
|
return(NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*===========================================================================*
|
|
|
|
* lock_proc *
|
|
|
|
*===========================================================================*/
|
2012-03-25 20:25:53 +02:00
|
|
|
void lock_proc(struct fproc *rfp, int force_lock)
|
2012-02-13 16:28:04 +01:00
|
|
|
{
|
|
|
|
int r;
|
|
|
|
struct fproc *org_fp;
|
|
|
|
struct worker_thread *org_self;
|
|
|
|
|
|
|
|
r = mutex_trylock(&rfp->fp_lock);
|
|
|
|
|
|
|
|
/* Were we supposed to obtain this lock immediately? */
|
|
|
|
if (force_lock) {
|
|
|
|
assert(r == 0);
|
|
|
|
return;
|
2010-04-08 15:41:35 +02:00
|
|
|
}
|
|
|
|
|
2012-02-13 16:28:04 +01:00
|
|
|
if (r == 0) return;
|
Initialization protocol for system services.
SYSLIB CHANGES:
- SEF framework now supports a new SEF Init request type from RS. 3 different
callbacks are available (init_fresh, init_lu, init_restart) to specify
initialization code when a service starts fresh, starts after a live update,
or restarts.
SYSTEM SERVICE CHANGES:
- Initialization code for system services is now enclosed in a callback SEF will
automatically call at init time. The return code of the callback will
tell RS whether the initialization completed successfully.
- Each init callback can access information passed by RS to initialize. As of
now, each system service has access to the public entries of RS's system process
table to gather all the information required to initialize. This design
eliminates many existing or potential races at boot time and provides a uniform
initialization interface to system services. The same interface will be reused
for the upcoming publish/subscribe model to handle dynamic
registration / deregistration of system services.
VM CHANGES:
- Uniform privilege management for all system services. Every service uses the
same call mask format. For boot services, VM copies the call mask from init
data. For dynamic services, VM still receives the call mask via rs_set_priv
call that will be soon replaced by the upcoming publish/subscribe model.
RS CHANGES:
- The system process table has been reorganized and split into private entries
and public entries. Only the latter ones are exposed to system services.
- VM call masks are now entirely configured in rs/table.c
- RS has now its own slot in the system process table. Only kernel tasks and
user processes not included in the boot image are now left out from the system
process table.
- RS implements the initialization protocol for system services.
- For services in the boot image, RS blocks till initialization is complete and
panics when failure is reported back. Services are initialized in their order of
appearance in the boot image priv table and RS blocks to implements synchronous
initialization for every system service having the flag SF_SYNCH_BOOT set.
- For services started dynamically, the initialization protocol is implemented
as though it were the first ping for the service. In this case, if the
system service fails to report back (or reports failure), RS brings the service
down rather than trying to restart it.
2010-01-08 02:20:42 +01:00
|
|
|
|
2012-02-13 16:28:04 +01:00
|
|
|
org_fp = fp;
|
|
|
|
org_self = self;
|
2012-04-13 14:50:38 +02:00
|
|
|
|
2012-02-13 16:28:04 +01:00
|
|
|
if ((r = mutex_lock(&rfp->fp_lock)) != 0)
|
|
|
|
panic("unable to lock fproc lock: %d", r);
|
2012-04-13 14:50:38 +02:00
|
|
|
|
2012-02-13 16:28:04 +01:00
|
|
|
fp = org_fp;
|
|
|
|
self = org_self;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*===========================================================================*
|
|
|
|
* unlock_proc *
|
|
|
|
*===========================================================================*/
|
2012-03-25 20:25:53 +02:00
|
|
|
void unlock_proc(struct fproc *rfp)
|
2012-02-13 16:28:04 +01:00
|
|
|
{
|
|
|
|
int r;
|
|
|
|
|
|
|
|
if ((r = mutex_unlock(&rfp->fp_lock)) != 0)
|
|
|
|
panic("Failed to unlock: %d", r);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*===========================================================================*
|
|
|
|
* thread_cleanup *
|
|
|
|
*===========================================================================*/
|
VFS: make all IPC asynchronous
By decoupling synchronous drivers from VFS, we are a big step closer to
supporting driver crashes under all circumstances. That is, VFS can't
become stuck on IPC with a synchronous driver (e.g., INET) and can
recover from crashing block drivers during open/close/ioctl or during
communication with an FS.
In order to maintain serialized communication with a synchronous driver,
the communication is wrapped by a mutex on a per driver basis (not major
numbers as there can be multiple majors with identical endpoints). Majors
that share a driver endpoint point to a single mutex object.
In order to support crashes from block drivers, the file reopen tactic
had to be changed; first reopen files associated with the crashed
driver, then send the new driver endpoint to FSes. This solves a
deadlock between the FS and the block driver;
- VFS would send REQ_NEW_DRIVER to an FS, but he FS only receives it
after retrying the current request to the newly started driver.
- The block driver would refuse the retried request until all files
had been reopened.
- VFS would reopen files only after getting a reply from the initial
REQ_NEW_DRIVER.
When a character special driver crashes, all associated files have to
be marked invalid and closed (or reopened if flagged as such). However,
they can only be closed if a thread holds exclusive access to it. To
obtain exclusive access, the worker thread (which handles the new driver
endpoint event from DS) schedules a new job to garbage collect invalid
files. This way, we can signal the worker thread that was talking to the
crashed driver and will release exclusive access to a file associated
with the crashed driver and prevent the garbage collecting worker thread
from dead locking on that file.
Also, when a character special driver crashes, RS will unmap the driver
and remap it upon restart. During unmapping, associated files are marked
invalid instead of waiting for an endpoint up event from DS, as that
event might come later than new read/write/select requests and thus
cause confusion in the freshly started driver.
When locking a filp, the usage counters are no longer checked. The usage
counter can legally go down to zero during filp invalidation while there
are locks pending.
DS events are handled by a separate worker thread instead of the main
thread as reopening files could lead to another crash and a stuck thread.
An additional worker thread is then necessary to unlock it.
Finally, with everything asynchronous a race condition in do_select
surfaced. A select entry was only marked in use after succesfully sending
initial select requests to drivers and having to wait. When multiple
select() calls were handled there was opportunity that these entries
were overwritten. This had as effect that some select results were
ignored (and select() remained blocking instead if returning) or do_select
tried to access filps that were not present (because thrown away by
secondary select()). This bug manifested itself with sendrecs, but was
very hard to reproduce. However, it became awfully easy to trigger with
asynsends only.
2012-08-28 16:06:51 +02:00
|
|
|
void thread_cleanup(struct fproc *rfp)
|
2012-02-13 16:28:04 +01:00
|
|
|
{
|
|
|
|
/* Clean up worker thread. Skip parts if this thread is not associated
|
|
|
|
* with a particular process (i.e., rfp is NULL) */
|
|
|
|
|
|
|
|
#if LOCK_DEBUG
|
|
|
|
if (rfp != NULL) {
|
|
|
|
check_filp_locks_by_me();
|
|
|
|
check_vnode_locks_by_me(rfp);
|
|
|
|
check_vmnt_locks_by_me(rfp);
|
|
|
|
}
|
Initialization protocol for system services.
SYSLIB CHANGES:
- SEF framework now supports a new SEF Init request type from RS. 3 different
callbacks are available (init_fresh, init_lu, init_restart) to specify
initialization code when a service starts fresh, starts after a live update,
or restarts.
SYSTEM SERVICE CHANGES:
- Initialization code for system services is now enclosed in a callback SEF will
automatically call at init time. The return code of the callback will
tell RS whether the initialization completed successfully.
- Each init callback can access information passed by RS to initialize. As of
now, each system service has access to the public entries of RS's system process
table to gather all the information required to initialize. This design
eliminates many existing or potential races at boot time and provides a uniform
initialization interface to system services. The same interface will be reused
for the upcoming publish/subscribe model to handle dynamic
registration / deregistration of system services.
VM CHANGES:
- Uniform privilege management for all system services. Every service uses the
same call mask format. For boot services, VM copies the call mask from init
data. For dynamic services, VM still receives the call mask via rs_set_priv
call that will be soon replaced by the upcoming publish/subscribe model.
RS CHANGES:
- The system process table has been reorganized and split into private entries
and public entries. Only the latter ones are exposed to system services.
- VM call masks are now entirely configured in rs/table.c
- RS has now its own slot in the system process table. Only kernel tasks and
user processes not included in the boot image are now left out from the system
process table.
- RS implements the initialization protocol for system services.
- For services in the boot image, RS blocks till initialization is complete and
panics when failure is reported back. Services are initialized in their order of
appearance in the boot image priv table and RS blocks to implements synchronous
initialization for every system service having the flag SF_SYNCH_BOOT set.
- For services started dynamically, the initialization protocol is implemented
as though it were the first ping for the service. In this case, if the
system service fails to report back (or reports failure), RS brings the service
down rather than trying to restart it.
2010-01-08 02:20:42 +01:00
|
|
|
#endif
|
|
|
|
|
2012-02-13 16:28:04 +01:00
|
|
|
if (rfp != NULL && rfp->fp_flags & FP_PM_PENDING) { /* Postponed PM call */
|
2012-04-13 14:50:38 +02:00
|
|
|
job_m_in = rfp->fp_job.j_m_in;
|
2012-02-13 16:28:04 +01:00
|
|
|
rfp->fp_flags &= ~FP_PM_PENDING;
|
|
|
|
service_pm_postponed();
|
|
|
|
}
|
|
|
|
|
|
|
|
#if LOCK_DEBUG
|
|
|
|
if (rfp != NULL) {
|
|
|
|
check_filp_locks_by_me();
|
|
|
|
check_vnode_locks_by_me(rfp);
|
|
|
|
check_vmnt_locks_by_me(rfp);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
if (rfp != NULL) {
|
|
|
|
rfp->fp_flags &= ~FP_DROP_WORK;
|
2012-11-14 14:12:37 +01:00
|
|
|
if (rfp->fp_flags & FP_SRV_PROC) {
|
|
|
|
struct vmnt *vmp;
|
|
|
|
|
|
|
|
if ((vmp = find_vmnt(rfp->fp_endpoint)) != NULL) {
|
|
|
|
vmp->m_flags &= ~VMNT_CALLBACK;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (deadlock_resolving) {
|
|
|
|
if (self->w_tid == dl_worker.w_tid)
|
|
|
|
deadlock_resolving = 0;
|
2012-02-13 16:28:04 +01:00
|
|
|
}
|
Initialization protocol for system services.
SYSLIB CHANGES:
- SEF framework now supports a new SEF Init request type from RS. 3 different
callbacks are available (init_fresh, init_lu, init_restart) to specify
initialization code when a service starts fresh, starts after a live update,
or restarts.
SYSTEM SERVICE CHANGES:
- Initialization code for system services is now enclosed in a callback SEF will
automatically call at init time. The return code of the callback will
tell RS whether the initialization completed successfully.
- Each init callback can access information passed by RS to initialize. As of
now, each system service has access to the public entries of RS's system process
table to gather all the information required to initialize. This design
eliminates many existing or potential races at boot time and provides a uniform
initialization interface to system services. The same interface will be reused
for the upcoming publish/subscribe model to handle dynamic
registration / deregistration of system services.
VM CHANGES:
- Uniform privilege management for all system services. Every service uses the
same call mask format. For boot services, VM copies the call mask from init
data. For dynamic services, VM still receives the call mask via rs_set_priv
call that will be soon replaced by the upcoming publish/subscribe model.
RS CHANGES:
- The system process table has been reorganized and split into private entries
and public entries. Only the latter ones are exposed to system services.
- VM call masks are now entirely configured in rs/table.c
- RS has now its own slot in the system process table. Only kernel tasks and
user processes not included in the boot image are now left out from the system
process table.
- RS implements the initialization protocol for system services.
- For services in the boot image, RS blocks till initialization is complete and
panics when failure is reported back. Services are initialized in their order of
appearance in the boot image priv table and RS blocks to implements synchronous
initialization for every system service having the flag SF_SYNCH_BOOT set.
- For services started dynamically, the initialization protocol is implemented
as though it were the first ping for the service. In this case, if the
system service fails to report back (or reports failure), RS brings the service
down rather than trying to restart it.
2010-01-08 02:20:42 +01:00
|
|
|
}
|
|
|
|
|
2005-04-21 16:53:53 +02:00
|
|
|
/*===========================================================================*
|
|
|
|
* get_work *
|
|
|
|
*===========================================================================*/
|
2012-03-25 20:25:53 +02:00
|
|
|
static void get_work()
|
2012-02-13 16:28:04 +01:00
|
|
|
{
|
2005-04-21 16:53:53 +02:00
|
|
|
/* Normally wait for new input. However, if 'reviving' is
|
|
|
|
* nonzero, a suspended process must be awakened.
|
|
|
|
*/
|
2012-02-13 16:28:04 +01:00
|
|
|
int r, found_one, proc_p;
|
2005-04-21 16:53:53 +02:00
|
|
|
register struct fproc *rp;
|
|
|
|
|
2007-08-07 14:52:47 +02:00
|
|
|
while (reviving != 0) {
|
2012-02-13 16:28:04 +01:00
|
|
|
found_one = FALSE;
|
|
|
|
|
|
|
|
/* Find a suspended process. */
|
|
|
|
for (rp = &fproc[0]; rp < &fproc[NR_PROCS]; rp++)
|
|
|
|
if (rp->fp_pid != PID_FREE && (rp->fp_flags & FP_REVIVED)) {
|
|
|
|
found_one = TRUE; /* Found a suspended process */
|
|
|
|
if (unblock(rp))
|
|
|
|
return; /* So main loop can process job */
|
|
|
|
send_work();
|
2005-04-21 16:53:53 +02:00
|
|
|
}
|
2012-02-13 16:28:04 +01:00
|
|
|
|
|
|
|
if (!found_one) /* Consistency error */
|
|
|
|
panic("VFS: get_work couldn't revive anyone");
|
2005-04-21 16:53:53 +02:00
|
|
|
}
|
|
|
|
|
endpoint-aware conversion of servers.
'who', indicating caller number in pm and fs and some other servers, has
been removed in favour of 'who_e' (endpoint) and 'who_p' (proc nr.).
In both PM and FS, isokendpt() convert endpoints to process slot
numbers, returning OK if it was a valid and consistent endpoint number.
okendpt() does the same but panic()s if it doesn't succeed. (In PM,
this is pm_isok..)
pm and fs keep their own records of process endpoints in their proc tables,
which are needed to make kernel calls about those processes.
message field names have changed.
fs drivers are endpoints.
fs now doesn't try to get out of driver deadlock, as the protocol isn't
supposed to let that happen any more. (A warning is printed if ELOCKED
is detected though.)
fproc[].fp_task (indicating which driver the process is suspended on)
became an int.
PM and FS now get endpoint numbers of initial boot processes from the
kernel. These happen to be the same as the old proc numbers, to let
user processes reach them with the old numbers, but FS and PM don't know
that. All new processes after INIT, even after the generation number
wraps around, get endpoint numbers with generation 1 and higher, so
the first instances of the boot processes are the only processes ever
to have endpoint numbers in the old proc number range.
More return code checks of sys_* functions have been added.
IS has become endpoint-aware. Ditched the 'text' and 'data' fields
in the kernel dump (which show locations, not sizes, so aren't terribly
useful) in favour of the endpoint number. Proc number is still visible.
Some other dumps (e.g. dmap, rs) show endpoint numbers now too which got
the formatting changed.
PM reading segments using rw_seg() has changed - it uses other fields
in the message now instead of encoding the segment and process number and
fd in the fd field. For that it uses _read_pm() and _write_pm() which to
_taskcall()s directly in pm/misc.c.
PM now sys_exit()s itself on panic(), instead of sys_abort().
RS also talks in endpoints instead of process numbers.
2006-03-03 11:20:58 +01:00
|
|
|
for(;;) {
|
2012-02-13 16:28:04 +01:00
|
|
|
/* Normal case. No one to revive. Get a useful request. */
|
|
|
|
if ((r = sef_receive(receive_from, &m_in)) != OK) {
|
|
|
|
panic("VFS: sef_receive error: %d", r);
|
2009-09-21 16:49:26 +02:00
|
|
|
}
|
2012-02-13 16:28:04 +01:00
|
|
|
|
|
|
|
proc_p = _ENDPOINT_P(m_in.m_source);
|
2012-07-17 12:29:22 +02:00
|
|
|
if (proc_p < 0 || proc_p >= NR_PROCS) fp = NULL;
|
2012-02-13 16:28:04 +01:00
|
|
|
else fp = &fproc[proc_p];
|
|
|
|
|
|
|
|
if (m_in.m_type == EDEADSRCDST) return; /* Failed 'sendrec' */
|
|
|
|
|
|
|
|
/* Negative who_p is never used to access the fproc array. Negative
|
|
|
|
* numbers (kernel tasks) are treated in a special way.
|
|
|
|
*/
|
|
|
|
if (who_p >= (int)(sizeof(fproc) / sizeof(struct fproc)))
|
|
|
|
panic("receive process out of range: %d", who_p);
|
|
|
|
if (who_p >= 0 && fproc[who_p].fp_endpoint == NONE) {
|
2012-04-13 14:50:38 +02:00
|
|
|
printf("VFS: ignoring request from %d: NONE endpoint %d (%d)\n",
|
2012-02-13 16:28:04 +01:00
|
|
|
m_in.m_source, who_p, m_in.m_type);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Internal consistency check; our mental image of process numbers and
|
|
|
|
* endpoints must match with how the rest of the system thinks of them.
|
|
|
|
*/
|
|
|
|
if (who_p >= 0 && fproc[who_p].fp_endpoint != who_e) {
|
|
|
|
if (fproc[who_p].fp_endpoint == NONE)
|
|
|
|
printf("slot unknown even\n");
|
|
|
|
|
|
|
|
printf("VFS: receive endpoint inconsistent (source %d, who_p "
|
|
|
|
"%d, stored ep %d, who_e %d).\n", m_in.m_source, who_p,
|
|
|
|
fproc[who_p].fp_endpoint, who_e);
|
|
|
|
panic("VFS: inconsistent endpoint ");
|
|
|
|
}
|
|
|
|
|
|
|
|
return;
|
endpoint-aware conversion of servers.
'who', indicating caller number in pm and fs and some other servers, has
been removed in favour of 'who_e' (endpoint) and 'who_p' (proc nr.).
In both PM and FS, isokendpt() convert endpoints to process slot
numbers, returning OK if it was a valid and consistent endpoint number.
okendpt() does the same but panic()s if it doesn't succeed. (In PM,
this is pm_isok..)
pm and fs keep their own records of process endpoints in their proc tables,
which are needed to make kernel calls about those processes.
message field names have changed.
fs drivers are endpoints.
fs now doesn't try to get out of driver deadlock, as the protocol isn't
supposed to let that happen any more. (A warning is printed if ELOCKED
is detected though.)
fproc[].fp_task (indicating which driver the process is suspended on)
became an int.
PM and FS now get endpoint numbers of initial boot processes from the
kernel. These happen to be the same as the old proc numbers, to let
user processes reach them with the old numbers, but FS and PM don't know
that. All new processes after INIT, even after the generation number
wraps around, get endpoint numbers with generation 1 and higher, so
the first instances of the boot processes are the only processes ever
to have endpoint numbers in the old proc number range.
More return code checks of sys_* functions have been added.
IS has become endpoint-aware. Ditched the 'text' and 'data' fields
in the kernel dump (which show locations, not sizes, so aren't terribly
useful) in favour of the endpoint number. Proc number is still visible.
Some other dumps (e.g. dmap, rs) show endpoint numbers now too which got
the formatting changed.
PM reading segments using rw_seg() has changed - it uses other fields
in the message now instead of encoding the segment and process number and
fd in the fd field. For that it uses _read_pm() and _write_pm() which to
_taskcall()s directly in pm/misc.c.
PM now sys_exit()s itself on panic(), instead of sys_abort().
RS also talks in endpoints instead of process numbers.
2006-03-03 11:20:58 +01:00
|
|
|
}
|
2005-04-21 16:53:53 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*===========================================================================*
|
|
|
|
* reply *
|
|
|
|
*===========================================================================*/
|
2013-04-12 18:41:23 +02:00
|
|
|
void reply(message *m_out, endpoint_t whom, int result)
|
2005-04-21 16:53:53 +02:00
|
|
|
{
|
2006-03-10 17:10:05 +01:00
|
|
|
/* Send a reply to a user process. If the send fails, just ignore it. */
|
2012-02-13 16:28:04 +01:00
|
|
|
int r;
|
2007-08-07 14:52:47 +02:00
|
|
|
|
2013-04-12 18:41:23 +02:00
|
|
|
m_out->reply_type = result;
|
|
|
|
r = sendnb(whom, m_out);
|
|
|
|
if (r != OK) {
|
|
|
|
printf("VFS: %d couldn't send reply %d to %d: %d\n", mthread_self(),
|
|
|
|
result, whom, r);
|
|
|
|
util_stacktrace();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*===========================================================================*
|
|
|
|
* replycode *
|
|
|
|
*===========================================================================*/
|
|
|
|
void replycode(endpoint_t whom, int result)
|
|
|
|
{
|
|
|
|
/* Send a reply to a user process. If the send fails, just ignore it. */
|
|
|
|
int r;
|
|
|
|
message m_out;
|
|
|
|
|
|
|
|
memset(&m_out, 0, sizeof(m_out));
|
|
|
|
|
2005-04-21 16:53:53 +02:00
|
|
|
m_out.reply_type = result;
|
2012-02-13 16:28:04 +01:00
|
|
|
r = sendnb(whom, &m_out);
|
|
|
|
if (r != OK) {
|
2012-04-13 14:50:38 +02:00
|
|
|
printf("VFS: %d couldn't send reply %d to %d: %d\n", mthread_self(),
|
|
|
|
result, whom, r);
|
|
|
|
util_stacktrace();
|
2012-02-13 16:28:04 +01:00
|
|
|
}
|
2005-04-21 16:53:53 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*===========================================================================*
|
2012-02-13 16:28:04 +01:00
|
|
|
* service_pm_postponed *
|
2005-04-21 16:53:53 +02:00
|
|
|
*===========================================================================*/
|
2012-03-25 20:25:53 +02:00
|
|
|
static void service_pm_postponed(void)
|
2005-04-21 16:53:53 +02:00
|
|
|
{
|
2012-02-13 16:28:04 +01:00
|
|
|
int r;
|
2012-04-03 15:52:25 +02:00
|
|
|
vir_bytes pc, newsp;
|
2013-04-12 18:41:23 +02:00
|
|
|
message m_out;
|
|
|
|
|
|
|
|
memset(&m_out, 0, sizeof(m_out));
|
2005-04-21 16:53:53 +02:00
|
|
|
|
2012-04-13 14:50:38 +02:00
|
|
|
switch(job_call_nr) {
|
2012-02-13 16:28:04 +01:00
|
|
|
case PM_EXEC:
|
2012-04-13 14:50:38 +02:00
|
|
|
{
|
|
|
|
endpoint_t proc_e;
|
|
|
|
vir_bytes exec_path, stack_frame;
|
|
|
|
size_t exec_path_len, stack_frame_len;
|
|
|
|
|
|
|
|
proc_e = job_m_in.PM_PROC;
|
|
|
|
exec_path = (vir_bytes) job_m_in.PM_PATH;
|
|
|
|
exec_path_len = (size_t) job_m_in.PM_PATH_LEN;
|
|
|
|
stack_frame = (vir_bytes) job_m_in.PM_FRAME;
|
|
|
|
stack_frame_len = (size_t) job_m_in.PM_FRAME_LEN;
|
|
|
|
|
|
|
|
r = pm_exec(proc_e, exec_path, exec_path_len, stack_frame,
|
2012-04-18 16:32:38 +02:00
|
|
|
stack_frame_len, &pc, &newsp, job_m_in.PM_EXECFLAGS);
|
2012-04-13 14:50:38 +02:00
|
|
|
|
|
|
|
/* Reply status to PM */
|
|
|
|
m_out.m_type = PM_EXEC_REPLY;
|
|
|
|
m_out.PM_PROC = proc_e;
|
|
|
|
m_out.PM_PC = (void*) pc;
|
|
|
|
m_out.PM_STATUS = r;
|
2012-04-03 15:52:25 +02:00
|
|
|
m_out.PM_NEWSP = (void *) newsp;
|
2012-04-13 14:50:38 +02:00
|
|
|
}
|
2012-02-13 16:28:04 +01:00
|
|
|
break;
|
|
|
|
|
|
|
|
case PM_EXIT:
|
2012-04-13 14:50:38 +02:00
|
|
|
{
|
|
|
|
endpoint_t proc_e;
|
|
|
|
proc_e = job_m_in.PM_PROC;
|
2012-02-13 16:28:04 +01:00
|
|
|
|
2012-04-13 14:50:38 +02:00
|
|
|
pm_exit(proc_e);
|
2012-02-13 16:28:04 +01:00
|
|
|
|
2012-04-13 14:50:38 +02:00
|
|
|
/* Reply dummy status to PM for synchronization */
|
|
|
|
m_out.m_type = PM_EXIT_REPLY;
|
|
|
|
m_out.PM_PROC = proc_e;
|
|
|
|
}
|
2012-02-13 16:28:04 +01:00
|
|
|
break;
|
|
|
|
|
|
|
|
case PM_DUMPCORE:
|
2012-04-13 14:50:38 +02:00
|
|
|
{
|
|
|
|
endpoint_t proc_e, traced_proc_e;
|
|
|
|
int term_signal;
|
|
|
|
vir_bytes core_path;
|
|
|
|
|
|
|
|
proc_e = job_m_in.PM_PROC;
|
|
|
|
traced_proc_e = job_m_in.PM_TRACED_PROC;
|
2012-06-15 02:38:00 +02:00
|
|
|
if(job_m_in.PM_PROC != job_m_in.PM_TRACED_PROC) {
|
|
|
|
/* dumpcore request */
|
|
|
|
term_signal = 0;
|
|
|
|
} else {
|
|
|
|
/* dumpcore on exit */
|
|
|
|
term_signal = job_m_in.PM_TERM_SIG;
|
|
|
|
}
|
2012-04-13 14:50:38 +02:00
|
|
|
core_path = (vir_bytes) job_m_in.PM_PATH;
|
|
|
|
|
|
|
|
r = pm_dumpcore(proc_e, term_signal, core_path);
|
|
|
|
|
|
|
|
/* Reply status to PM */
|
|
|
|
m_out.m_type = PM_CORE_REPLY;
|
|
|
|
m_out.PM_PROC = proc_e;
|
|
|
|
m_out.PM_TRACED_PROC = traced_proc_e;
|
|
|
|
m_out.PM_STATUS = r;
|
|
|
|
}
|
2012-02-13 16:28:04 +01:00
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
2012-04-13 14:50:38 +02:00
|
|
|
panic("Unhandled postponed PM call %d", job_m_in.m_type);
|
2006-10-25 15:40:36 +02:00
|
|
|
}
|
2012-02-13 16:28:04 +01:00
|
|
|
|
|
|
|
r = send(PM_PROC_NR, &m_out);
|
|
|
|
if (r != OK)
|
|
|
|
panic("service_pm_postponed: send failed: %d", r);
|
2005-04-21 16:53:53 +02:00
|
|
|
}
|
2006-05-11 16:57:23 +02:00
|
|
|
|
|
|
|
/*===========================================================================*
|
|
|
|
* service_pm *
|
|
|
|
*===========================================================================*/
|
2012-03-25 20:25:53 +02:00
|
|
|
static void service_pm()
|
2006-05-11 16:57:23 +02:00
|
|
|
{
|
2012-02-13 16:28:04 +01:00
|
|
|
int r, slot;
|
2013-04-12 18:41:23 +02:00
|
|
|
message m_out;
|
|
|
|
|
|
|
|
memset(&m_out, 0, sizeof(m_out));
|
2006-05-11 16:57:23 +02:00
|
|
|
|
2012-04-13 14:50:38 +02:00
|
|
|
switch (job_call_nr) {
|
2012-02-13 16:28:04 +01:00
|
|
|
case PM_SETUID:
|
2012-04-13 14:50:38 +02:00
|
|
|
{
|
|
|
|
endpoint_t proc_e;
|
|
|
|
uid_t euid, ruid;
|
|
|
|
|
|
|
|
proc_e = job_m_in.PM_PROC;
|
|
|
|
euid = job_m_in.PM_EID;
|
|
|
|
ruid = job_m_in.PM_RID;
|
2006-05-11 16:57:23 +02:00
|
|
|
|
2012-04-13 14:50:38 +02:00
|
|
|
pm_setuid(proc_e, euid, ruid);
|
2006-05-11 16:57:23 +02:00
|
|
|
|
2012-04-13 14:50:38 +02:00
|
|
|
m_out.m_type = PM_SETUID_REPLY;
|
|
|
|
m_out.PM_PROC = proc_e;
|
|
|
|
}
|
Merge of David's ptrace branch. Summary:
o Support for ptrace T_ATTACH/T_DETACH and T_SYSCALL
o PM signal handling logic should now work properly, even with debuggers
being present
o Asynchronous PM/VFS protocol, full IPC support for senda(), and
AMF_NOREPLY senda() flag
DETAILS
Process stop and delay call handling of PM:
o Added sys_runctl() kernel call with sys_stop() and sys_resume()
aliases, for PM to stop and resume a process
o Added exception for sending/syscall-traced processes to sys_runctl(),
and matching SIGKREADY pseudo-signal to PM
o Fixed PM signal logic to deal with requests from a process after
stopping it (so-called "delay calls"), using the SIGKREADY facility
o Fixed various PM panics due to race conditions with delay calls versus
VFS calls
o Removed special PRIO_STOP priority value
o Added SYS_LOCK RTS kernel flag, to stop an individual process from
running while modifying its process structure
Signal and debugger handling in PM:
o Fixed debugger signals being dropped if a second signal arrives when
the debugger has not retrieved the first one
o Fixed debugger signals being sent to the debugger more than once
o Fixed debugger signals unpausing process in VFS; removed PM_UNPAUSE_TR
protocol message
o Detached debugger signals from general signal logic and from being
blocked on VFS calls, meaning that even VFS can now be traced
o Fixed debugger being unable to receive more than one pending signal in
one process stop
o Fixed signal delivery being delayed needlessly when multiple signals
are pending
o Fixed wait test for tracer, which was returning for children that were
not waited for
o Removed second parallel pending call from PM to VFS for any process
o Fixed process becoming runnable between exec() and debugger trap
o Added support for notifying the debugger before the parent when a
debugged child exits
o Fixed debugger death causing child to remain stopped forever
o Fixed consistently incorrect use of _NSIG
Extensions to ptrace():
o Added T_ATTACH and T_DETACH ptrace request, to attach and detach a
debugger to and from a process
o Added T_SYSCALL ptrace request, to trace system calls
o Added T_SETOPT ptrace request, to set trace options
o Added TO_TRACEFORK trace option, to attach automatically to children
of a traced process
o Added TO_ALTEXEC trace option, to send SIGSTOP instead of SIGTRAP upon
a successful exec() of the tracee
o Extended T_GETUSER ptrace support to allow retrieving a process's priv
structure
o Removed T_STOP ptrace request again, as it does not help implementing
debuggers properly
o Added MINIX3-specific ptrace test (test42)
o Added proper manual page for ptrace(2)
Asynchronous PM/VFS interface:
o Fixed asynchronous messages not being checked when receive() is called
with an endpoint other than ANY
o Added AMF_NOREPLY senda() flag, preventing such messages from
satisfying the receive part of a sendrec()
o Added asynsend3() that takes optional flags; asynsend() is now a
#define passing in 0 as third parameter
o Made PM/VFS protocol asynchronous; reintroduced tell_fs()
o Made PM_BASE request/reply number range unique
o Hacked in a horrible temporary workaround into RS to deal with newly
revealed RS-PM-VFS race condition triangle until VFS is asynchronous
System signal handling:
o Fixed shutdown logic of device drivers; removed old SIGKSTOP signal
o Removed is-superuser check from PM's do_procstat() (aka getsigset())
o Added sigset macros to allow system processes to deal with the full
signal set, rather than just the POSIX subset
Miscellaneous PM fixes:
o Split do_getset into do_get and do_set, merging common code and making
structure clearer
o Fixed setpriority() being able to put to sleep processes using an
invalid parameter, or revive zombie processes
o Made find_proc() global; removed obsolete proc_from_pid()
o Cleanup here and there
Also included:
o Fixed false-positive boot order kernel warning
o Removed last traces of old NOTIFY_FROM code
THINGS OF POSSIBLE INTEREST
o It should now be possible to run PM at any priority, even lower than
user processes
o No assumptions are made about communication speed between PM and VFS,
although communication must be FIFO
o A debugger will now receive incoming debuggee signals at kill time
only; the process may not yet be fully stopped
o A first step has been made towards making the SYSTEM task preemptible
2009-09-30 11:57:22 +02:00
|
|
|
break;
|
2006-05-11 16:57:23 +02:00
|
|
|
|
2012-02-13 16:28:04 +01:00
|
|
|
case PM_SETGID:
|
2012-04-13 14:50:38 +02:00
|
|
|
{
|
|
|
|
endpoint_t proc_e;
|
|
|
|
gid_t egid, rgid;
|
|
|
|
|
|
|
|
proc_e = job_m_in.PM_PROC;
|
|
|
|
egid = job_m_in.PM_EID;
|
|
|
|
rgid = job_m_in.PM_RID;
|
2006-05-11 16:57:23 +02:00
|
|
|
|
2012-04-13 14:50:38 +02:00
|
|
|
pm_setgid(proc_e, egid, rgid);
|
2006-05-11 16:57:23 +02:00
|
|
|
|
2012-04-13 14:50:38 +02:00
|
|
|
m_out.m_type = PM_SETGID_REPLY;
|
|
|
|
m_out.PM_PROC = proc_e;
|
|
|
|
}
|
Merge of David's ptrace branch. Summary:
o Support for ptrace T_ATTACH/T_DETACH and T_SYSCALL
o PM signal handling logic should now work properly, even with debuggers
being present
o Asynchronous PM/VFS protocol, full IPC support for senda(), and
AMF_NOREPLY senda() flag
DETAILS
Process stop and delay call handling of PM:
o Added sys_runctl() kernel call with sys_stop() and sys_resume()
aliases, for PM to stop and resume a process
o Added exception for sending/syscall-traced processes to sys_runctl(),
and matching SIGKREADY pseudo-signal to PM
o Fixed PM signal logic to deal with requests from a process after
stopping it (so-called "delay calls"), using the SIGKREADY facility
o Fixed various PM panics due to race conditions with delay calls versus
VFS calls
o Removed special PRIO_STOP priority value
o Added SYS_LOCK RTS kernel flag, to stop an individual process from
running while modifying its process structure
Signal and debugger handling in PM:
o Fixed debugger signals being dropped if a second signal arrives when
the debugger has not retrieved the first one
o Fixed debugger signals being sent to the debugger more than once
o Fixed debugger signals unpausing process in VFS; removed PM_UNPAUSE_TR
protocol message
o Detached debugger signals from general signal logic and from being
blocked on VFS calls, meaning that even VFS can now be traced
o Fixed debugger being unable to receive more than one pending signal in
one process stop
o Fixed signal delivery being delayed needlessly when multiple signals
are pending
o Fixed wait test for tracer, which was returning for children that were
not waited for
o Removed second parallel pending call from PM to VFS for any process
o Fixed process becoming runnable between exec() and debugger trap
o Added support for notifying the debugger before the parent when a
debugged child exits
o Fixed debugger death causing child to remain stopped forever
o Fixed consistently incorrect use of _NSIG
Extensions to ptrace():
o Added T_ATTACH and T_DETACH ptrace request, to attach and detach a
debugger to and from a process
o Added T_SYSCALL ptrace request, to trace system calls
o Added T_SETOPT ptrace request, to set trace options
o Added TO_TRACEFORK trace option, to attach automatically to children
of a traced process
o Added TO_ALTEXEC trace option, to send SIGSTOP instead of SIGTRAP upon
a successful exec() of the tracee
o Extended T_GETUSER ptrace support to allow retrieving a process's priv
structure
o Removed T_STOP ptrace request again, as it does not help implementing
debuggers properly
o Added MINIX3-specific ptrace test (test42)
o Added proper manual page for ptrace(2)
Asynchronous PM/VFS interface:
o Fixed asynchronous messages not being checked when receive() is called
with an endpoint other than ANY
o Added AMF_NOREPLY senda() flag, preventing such messages from
satisfying the receive part of a sendrec()
o Added asynsend3() that takes optional flags; asynsend() is now a
#define passing in 0 as third parameter
o Made PM/VFS protocol asynchronous; reintroduced tell_fs()
o Made PM_BASE request/reply number range unique
o Hacked in a horrible temporary workaround into RS to deal with newly
revealed RS-PM-VFS race condition triangle until VFS is asynchronous
System signal handling:
o Fixed shutdown logic of device drivers; removed old SIGKSTOP signal
o Removed is-superuser check from PM's do_procstat() (aka getsigset())
o Added sigset macros to allow system processes to deal with the full
signal set, rather than just the POSIX subset
Miscellaneous PM fixes:
o Split do_getset into do_get and do_set, merging common code and making
structure clearer
o Fixed setpriority() being able to put to sleep processes using an
invalid parameter, or revive zombie processes
o Made find_proc() global; removed obsolete proc_from_pid()
o Cleanup here and there
Also included:
o Fixed false-positive boot order kernel warning
o Removed last traces of old NOTIFY_FROM code
THINGS OF POSSIBLE INTEREST
o It should now be possible to run PM at any priority, even lower than
user processes
o No assumptions are made about communication speed between PM and VFS,
although communication must be FIFO
o A debugger will now receive incoming debuggee signals at kill time
only; the process may not yet be fully stopped
o A first step has been made towards making the SYSTEM task preemptible
2009-09-30 11:57:22 +02:00
|
|
|
break;
|
2006-05-11 16:57:23 +02:00
|
|
|
|
2012-02-13 16:28:04 +01:00
|
|
|
case PM_SETSID:
|
2012-04-13 14:50:38 +02:00
|
|
|
{
|
|
|
|
endpoint_t proc_e;
|
2006-05-11 16:57:23 +02:00
|
|
|
|
2012-04-13 14:50:38 +02:00
|
|
|
proc_e = job_m_in.PM_PROC;
|
|
|
|
pm_setsid(proc_e);
|
2006-05-11 16:57:23 +02:00
|
|
|
|
2012-04-13 14:50:38 +02:00
|
|
|
m_out.m_type = PM_SETSID_REPLY;
|
|
|
|
m_out.PM_PROC = proc_e;
|
|
|
|
}
|
Merge of David's ptrace branch. Summary:
o Support for ptrace T_ATTACH/T_DETACH and T_SYSCALL
o PM signal handling logic should now work properly, even with debuggers
being present
o Asynchronous PM/VFS protocol, full IPC support for senda(), and
AMF_NOREPLY senda() flag
DETAILS
Process stop and delay call handling of PM:
o Added sys_runctl() kernel call with sys_stop() and sys_resume()
aliases, for PM to stop and resume a process
o Added exception for sending/syscall-traced processes to sys_runctl(),
and matching SIGKREADY pseudo-signal to PM
o Fixed PM signal logic to deal with requests from a process after
stopping it (so-called "delay calls"), using the SIGKREADY facility
o Fixed various PM panics due to race conditions with delay calls versus
VFS calls
o Removed special PRIO_STOP priority value
o Added SYS_LOCK RTS kernel flag, to stop an individual process from
running while modifying its process structure
Signal and debugger handling in PM:
o Fixed debugger signals being dropped if a second signal arrives when
the debugger has not retrieved the first one
o Fixed debugger signals being sent to the debugger more than once
o Fixed debugger signals unpausing process in VFS; removed PM_UNPAUSE_TR
protocol message
o Detached debugger signals from general signal logic and from being
blocked on VFS calls, meaning that even VFS can now be traced
o Fixed debugger being unable to receive more than one pending signal in
one process stop
o Fixed signal delivery being delayed needlessly when multiple signals
are pending
o Fixed wait test for tracer, which was returning for children that were
not waited for
o Removed second parallel pending call from PM to VFS for any process
o Fixed process becoming runnable between exec() and debugger trap
o Added support for notifying the debugger before the parent when a
debugged child exits
o Fixed debugger death causing child to remain stopped forever
o Fixed consistently incorrect use of _NSIG
Extensions to ptrace():
o Added T_ATTACH and T_DETACH ptrace request, to attach and detach a
debugger to and from a process
o Added T_SYSCALL ptrace request, to trace system calls
o Added T_SETOPT ptrace request, to set trace options
o Added TO_TRACEFORK trace option, to attach automatically to children
of a traced process
o Added TO_ALTEXEC trace option, to send SIGSTOP instead of SIGTRAP upon
a successful exec() of the tracee
o Extended T_GETUSER ptrace support to allow retrieving a process's priv
structure
o Removed T_STOP ptrace request again, as it does not help implementing
debuggers properly
o Added MINIX3-specific ptrace test (test42)
o Added proper manual page for ptrace(2)
Asynchronous PM/VFS interface:
o Fixed asynchronous messages not being checked when receive() is called
with an endpoint other than ANY
o Added AMF_NOREPLY senda() flag, preventing such messages from
satisfying the receive part of a sendrec()
o Added asynsend3() that takes optional flags; asynsend() is now a
#define passing in 0 as third parameter
o Made PM/VFS protocol asynchronous; reintroduced tell_fs()
o Made PM_BASE request/reply number range unique
o Hacked in a horrible temporary workaround into RS to deal with newly
revealed RS-PM-VFS race condition triangle until VFS is asynchronous
System signal handling:
o Fixed shutdown logic of device drivers; removed old SIGKSTOP signal
o Removed is-superuser check from PM's do_procstat() (aka getsigset())
o Added sigset macros to allow system processes to deal with the full
signal set, rather than just the POSIX subset
Miscellaneous PM fixes:
o Split do_getset into do_get and do_set, merging common code and making
structure clearer
o Fixed setpriority() being able to put to sleep processes using an
invalid parameter, or revive zombie processes
o Made find_proc() global; removed obsolete proc_from_pid()
o Cleanup here and there
Also included:
o Fixed false-positive boot order kernel warning
o Removed last traces of old NOTIFY_FROM code
THINGS OF POSSIBLE INTEREST
o It should now be possible to run PM at any priority, even lower than
user processes
o No assumptions are made about communication speed between PM and VFS,
although communication must be FIFO
o A debugger will now receive incoming debuggee signals at kill time
only; the process may not yet be fully stopped
o A first step has been made towards making the SYSTEM task preemptible
2009-09-30 11:57:22 +02:00
|
|
|
break;
|
2006-05-11 16:57:23 +02:00
|
|
|
|
2012-02-13 16:28:04 +01:00
|
|
|
case PM_EXEC:
|
|
|
|
case PM_EXIT:
|
|
|
|
case PM_DUMPCORE:
|
2012-04-13 14:50:38 +02:00
|
|
|
{
|
|
|
|
endpoint_t proc_e = job_m_in.PM_PROC;
|
2012-06-15 02:38:00 +02:00
|
|
|
|
|
|
|
if(isokendpt(proc_e, &slot) != OK) {
|
|
|
|
printf("VFS: proc ep %d not ok\n", proc_e);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2012-04-13 14:50:38 +02:00
|
|
|
fp = &fproc[slot];
|
|
|
|
|
|
|
|
if (fp->fp_flags & FP_PENDING) {
|
|
|
|
/* This process has a request pending, but PM wants it
|
|
|
|
* gone. Forget about the pending request and satisfy
|
|
|
|
* PM's request instead. Note that a pending request
|
|
|
|
* AND an EXEC request are mutually exclusive. Also, PM
|
|
|
|
* should send only one request/process at a time.
|
|
|
|
*/
|
|
|
|
assert(fp->fp_job.j_m_in.m_source != PM_PROC_NR);
|
|
|
|
}
|
2006-05-11 16:57:23 +02:00
|
|
|
|
2012-04-13 14:50:38 +02:00
|
|
|
/* PM requests on behalf of a proc are handled after the
|
|
|
|
* system call that might be in progress for that proc has
|
|
|
|
* finished. If the proc is not busy, we start a dummy call.
|
|
|
|
*/
|
|
|
|
if (!(fp->fp_flags & FP_PENDING) &&
|
|
|
|
mutex_trylock(&fp->fp_lock) == 0) {
|
|
|
|
mutex_unlock(&fp->fp_lock);
|
|
|
|
worker_start(do_dummy);
|
|
|
|
fp->fp_flags |= FP_DROP_WORK;
|
|
|
|
}
|
2006-05-11 16:57:23 +02:00
|
|
|
|
2012-04-13 14:50:38 +02:00
|
|
|
fp->fp_job.j_m_in = job_m_in;
|
|
|
|
fp->fp_flags |= FP_PM_PENDING;
|
2006-05-11 16:57:23 +02:00
|
|
|
|
2012-04-13 14:50:38 +02:00
|
|
|
return;
|
|
|
|
}
|
2012-02-13 16:28:04 +01:00
|
|
|
case PM_FORK:
|
|
|
|
case PM_SRV_FORK:
|
2012-04-13 14:50:38 +02:00
|
|
|
{
|
|
|
|
endpoint_t pproc_e, proc_e;
|
|
|
|
pid_t child_pid;
|
|
|
|
uid_t reuid;
|
|
|
|
gid_t regid;
|
|
|
|
|
|
|
|
pproc_e = job_m_in.PM_PPROC;
|
|
|
|
proc_e = job_m_in.PM_PROC;
|
|
|
|
child_pid = job_m_in.PM_CPID;
|
|
|
|
reuid = job_m_in.PM_REUID;
|
|
|
|
regid = job_m_in.PM_REGID;
|
|
|
|
|
|
|
|
pm_fork(pproc_e, proc_e, child_pid);
|
|
|
|
m_out.m_type = PM_FORK_REPLY;
|
|
|
|
|
|
|
|
if (job_call_nr == PM_SRV_FORK) {
|
|
|
|
m_out.m_type = PM_SRV_FORK_REPLY;
|
|
|
|
pm_setuid(proc_e, reuid, reuid);
|
|
|
|
pm_setgid(proc_e, regid, regid);
|
|
|
|
}
|
2012-01-27 12:50:11 +01:00
|
|
|
|
2012-04-13 14:50:38 +02:00
|
|
|
m_out.PM_PROC = proc_e;
|
2012-01-27 12:50:11 +01:00
|
|
|
}
|
- Introduce support for sticky bit.
- Revise VFS-FS protocol and update VFS/MFS/ISOFS accordingly.
- Clean up MFS by removing old, dead code (backwards compatibility is broken by
the new VFS-FS protocol, anyway) and rewrite other parts. Also, make sure all
functions have proper banners and prototypes.
- VFS should always provide a (syntactically) valid path to the FS; no need for
the FS to do sanity checks when leaving/entering mount points.
- Fix several bugs in MFS:
- Several path lookup bugs in MFS.
- A link can be too big for the path buffer.
- A mountpoint can become inaccessible when the creation of a new inode
fails, because the inode already exists and is a mountpoint.
- Introduce support for supplemental groups.
- Add test 46 to test supplemental group functionality (and removed obsolete
suppl. tests from test 2).
- Clean up VFS (not everything is done yet).
- ISOFS now opens device read-only. This makes the -r flag in the mount command
unnecessary (but will still report to be mounted read-write).
- Introduce PipeFS. PipeFS is a new FS that handles all anonymous and
named pipes. However, named pipes still reside on the (M)FS, as they are part
of the file system on disk. To make this work VFS now has a concept of
'mapped' inodes, which causes read, write, truncate and stat requests to be
redirected to the mapped FS, and all other requests to the original FS.
2009-12-20 21:27:14 +01:00
|
|
|
break;
|
2012-02-13 16:28:04 +01:00
|
|
|
case PM_SETGROUPS:
|
2012-04-13 14:50:38 +02:00
|
|
|
{
|
|
|
|
endpoint_t proc_e;
|
|
|
|
int group_no;
|
|
|
|
gid_t *group_addr;
|
- Introduce support for sticky bit.
- Revise VFS-FS protocol and update VFS/MFS/ISOFS accordingly.
- Clean up MFS by removing old, dead code (backwards compatibility is broken by
the new VFS-FS protocol, anyway) and rewrite other parts. Also, make sure all
functions have proper banners and prototypes.
- VFS should always provide a (syntactically) valid path to the FS; no need for
the FS to do sanity checks when leaving/entering mount points.
- Fix several bugs in MFS:
- Several path lookup bugs in MFS.
- A link can be too big for the path buffer.
- A mountpoint can become inaccessible when the creation of a new inode
fails, because the inode already exists and is a mountpoint.
- Introduce support for supplemental groups.
- Add test 46 to test supplemental group functionality (and removed obsolete
suppl. tests from test 2).
- Clean up VFS (not everything is done yet).
- ISOFS now opens device read-only. This makes the -r flag in the mount command
unnecessary (but will still report to be mounted read-write).
- Introduce PipeFS. PipeFS is a new FS that handles all anonymous and
named pipes. However, named pipes still reside on the (M)FS, as they are part
of the file system on disk. To make this work VFS now has a concept of
'mapped' inodes, which causes read, write, truncate and stat requests to be
redirected to the mapped FS, and all other requests to the original FS.
2009-12-20 21:27:14 +01:00
|
|
|
|
2012-04-13 14:50:38 +02:00
|
|
|
proc_e = job_m_in.PM_PROC;
|
|
|
|
group_no = job_m_in.PM_GROUP_NO;
|
|
|
|
group_addr = (gid_t *) job_m_in.PM_GROUP_ADDR;
|
- Introduce support for sticky bit.
- Revise VFS-FS protocol and update VFS/MFS/ISOFS accordingly.
- Clean up MFS by removing old, dead code (backwards compatibility is broken by
the new VFS-FS protocol, anyway) and rewrite other parts. Also, make sure all
functions have proper banners and prototypes.
- VFS should always provide a (syntactically) valid path to the FS; no need for
the FS to do sanity checks when leaving/entering mount points.
- Fix several bugs in MFS:
- Several path lookup bugs in MFS.
- A link can be too big for the path buffer.
- A mountpoint can become inaccessible when the creation of a new inode
fails, because the inode already exists and is a mountpoint.
- Introduce support for supplemental groups.
- Add test 46 to test supplemental group functionality (and removed obsolete
suppl. tests from test 2).
- Clean up VFS (not everything is done yet).
- ISOFS now opens device read-only. This makes the -r flag in the mount command
unnecessary (but will still report to be mounted read-write).
- Introduce PipeFS. PipeFS is a new FS that handles all anonymous and
named pipes. However, named pipes still reside on the (M)FS, as they are part
of the file system on disk. To make this work VFS now has a concept of
'mapped' inodes, which causes read, write, truncate and stat requests to be
redirected to the mapped FS, and all other requests to the original FS.
2009-12-20 21:27:14 +01:00
|
|
|
|
2012-04-13 14:50:38 +02:00
|
|
|
pm_setgroups(proc_e, group_no, group_addr);
|
|
|
|
|
|
|
|
m_out.m_type = PM_SETGROUPS_REPLY;
|
|
|
|
m_out.PM_PROC = proc_e;
|
|
|
|
}
|
Merge of David's ptrace branch. Summary:
o Support for ptrace T_ATTACH/T_DETACH and T_SYSCALL
o PM signal handling logic should now work properly, even with debuggers
being present
o Asynchronous PM/VFS protocol, full IPC support for senda(), and
AMF_NOREPLY senda() flag
DETAILS
Process stop and delay call handling of PM:
o Added sys_runctl() kernel call with sys_stop() and sys_resume()
aliases, for PM to stop and resume a process
o Added exception for sending/syscall-traced processes to sys_runctl(),
and matching SIGKREADY pseudo-signal to PM
o Fixed PM signal logic to deal with requests from a process after
stopping it (so-called "delay calls"), using the SIGKREADY facility
o Fixed various PM panics due to race conditions with delay calls versus
VFS calls
o Removed special PRIO_STOP priority value
o Added SYS_LOCK RTS kernel flag, to stop an individual process from
running while modifying its process structure
Signal and debugger handling in PM:
o Fixed debugger signals being dropped if a second signal arrives when
the debugger has not retrieved the first one
o Fixed debugger signals being sent to the debugger more than once
o Fixed debugger signals unpausing process in VFS; removed PM_UNPAUSE_TR
protocol message
o Detached debugger signals from general signal logic and from being
blocked on VFS calls, meaning that even VFS can now be traced
o Fixed debugger being unable to receive more than one pending signal in
one process stop
o Fixed signal delivery being delayed needlessly when multiple signals
are pending
o Fixed wait test for tracer, which was returning for children that were
not waited for
o Removed second parallel pending call from PM to VFS for any process
o Fixed process becoming runnable between exec() and debugger trap
o Added support for notifying the debugger before the parent when a
debugged child exits
o Fixed debugger death causing child to remain stopped forever
o Fixed consistently incorrect use of _NSIG
Extensions to ptrace():
o Added T_ATTACH and T_DETACH ptrace request, to attach and detach a
debugger to and from a process
o Added T_SYSCALL ptrace request, to trace system calls
o Added T_SETOPT ptrace request, to set trace options
o Added TO_TRACEFORK trace option, to attach automatically to children
of a traced process
o Added TO_ALTEXEC trace option, to send SIGSTOP instead of SIGTRAP upon
a successful exec() of the tracee
o Extended T_GETUSER ptrace support to allow retrieving a process's priv
structure
o Removed T_STOP ptrace request again, as it does not help implementing
debuggers properly
o Added MINIX3-specific ptrace test (test42)
o Added proper manual page for ptrace(2)
Asynchronous PM/VFS interface:
o Fixed asynchronous messages not being checked when receive() is called
with an endpoint other than ANY
o Added AMF_NOREPLY senda() flag, preventing such messages from
satisfying the receive part of a sendrec()
o Added asynsend3() that takes optional flags; asynsend() is now a
#define passing in 0 as third parameter
o Made PM/VFS protocol asynchronous; reintroduced tell_fs()
o Made PM_BASE request/reply number range unique
o Hacked in a horrible temporary workaround into RS to deal with newly
revealed RS-PM-VFS race condition triangle until VFS is asynchronous
System signal handling:
o Fixed shutdown logic of device drivers; removed old SIGKSTOP signal
o Removed is-superuser check from PM's do_procstat() (aka getsigset())
o Added sigset macros to allow system processes to deal with the full
signal set, rather than just the POSIX subset
Miscellaneous PM fixes:
o Split do_getset into do_get and do_set, merging common code and making
structure clearer
o Fixed setpriority() being able to put to sleep processes using an
invalid parameter, or revive zombie processes
o Made find_proc() global; removed obsolete proc_from_pid()
o Cleanup here and there
Also included:
o Fixed false-positive boot order kernel warning
o Removed last traces of old NOTIFY_FROM code
THINGS OF POSSIBLE INTEREST
o It should now be possible to run PM at any priority, even lower than
user processes
o No assumptions are made about communication speed between PM and VFS,
although communication must be FIFO
o A debugger will now receive incoming debuggee signals at kill time
only; the process may not yet be fully stopped
o A first step has been made towards making the SYSTEM task preemptible
2009-09-30 11:57:22 +02:00
|
|
|
break;
|
|
|
|
|
2012-02-13 16:28:04 +01:00
|
|
|
case PM_UNPAUSE:
|
2012-04-13 14:50:38 +02:00
|
|
|
{
|
|
|
|
endpoint_t proc_e;
|
|
|
|
|
|
|
|
proc_e = job_m_in.PM_PROC;
|
2006-10-25 15:40:36 +02:00
|
|
|
|
2012-04-13 14:50:38 +02:00
|
|
|
unpause(proc_e);
|
Merge of David's ptrace branch. Summary:
o Support for ptrace T_ATTACH/T_DETACH and T_SYSCALL
o PM signal handling logic should now work properly, even with debuggers
being present
o Asynchronous PM/VFS protocol, full IPC support for senda(), and
AMF_NOREPLY senda() flag
DETAILS
Process stop and delay call handling of PM:
o Added sys_runctl() kernel call with sys_stop() and sys_resume()
aliases, for PM to stop and resume a process
o Added exception for sending/syscall-traced processes to sys_runctl(),
and matching SIGKREADY pseudo-signal to PM
o Fixed PM signal logic to deal with requests from a process after
stopping it (so-called "delay calls"), using the SIGKREADY facility
o Fixed various PM panics due to race conditions with delay calls versus
VFS calls
o Removed special PRIO_STOP priority value
o Added SYS_LOCK RTS kernel flag, to stop an individual process from
running while modifying its process structure
Signal and debugger handling in PM:
o Fixed debugger signals being dropped if a second signal arrives when
the debugger has not retrieved the first one
o Fixed debugger signals being sent to the debugger more than once
o Fixed debugger signals unpausing process in VFS; removed PM_UNPAUSE_TR
protocol message
o Detached debugger signals from general signal logic and from being
blocked on VFS calls, meaning that even VFS can now be traced
o Fixed debugger being unable to receive more than one pending signal in
one process stop
o Fixed signal delivery being delayed needlessly when multiple signals
are pending
o Fixed wait test for tracer, which was returning for children that were
not waited for
o Removed second parallel pending call from PM to VFS for any process
o Fixed process becoming runnable between exec() and debugger trap
o Added support for notifying the debugger before the parent when a
debugged child exits
o Fixed debugger death causing child to remain stopped forever
o Fixed consistently incorrect use of _NSIG
Extensions to ptrace():
o Added T_ATTACH and T_DETACH ptrace request, to attach and detach a
debugger to and from a process
o Added T_SYSCALL ptrace request, to trace system calls
o Added T_SETOPT ptrace request, to set trace options
o Added TO_TRACEFORK trace option, to attach automatically to children
of a traced process
o Added TO_ALTEXEC trace option, to send SIGSTOP instead of SIGTRAP upon
a successful exec() of the tracee
o Extended T_GETUSER ptrace support to allow retrieving a process's priv
structure
o Removed T_STOP ptrace request again, as it does not help implementing
debuggers properly
o Added MINIX3-specific ptrace test (test42)
o Added proper manual page for ptrace(2)
Asynchronous PM/VFS interface:
o Fixed asynchronous messages not being checked when receive() is called
with an endpoint other than ANY
o Added AMF_NOREPLY senda() flag, preventing such messages from
satisfying the receive part of a sendrec()
o Added asynsend3() that takes optional flags; asynsend() is now a
#define passing in 0 as third parameter
o Made PM/VFS protocol asynchronous; reintroduced tell_fs()
o Made PM_BASE request/reply number range unique
o Hacked in a horrible temporary workaround into RS to deal with newly
revealed RS-PM-VFS race condition triangle until VFS is asynchronous
System signal handling:
o Fixed shutdown logic of device drivers; removed old SIGKSTOP signal
o Removed is-superuser check from PM's do_procstat() (aka getsigset())
o Added sigset macros to allow system processes to deal with the full
signal set, rather than just the POSIX subset
Miscellaneous PM fixes:
o Split do_getset into do_get and do_set, merging common code and making
structure clearer
o Fixed setpriority() being able to put to sleep processes using an
invalid parameter, or revive zombie processes
o Made find_proc() global; removed obsolete proc_from_pid()
o Cleanup here and there
Also included:
o Fixed false-positive boot order kernel warning
o Removed last traces of old NOTIFY_FROM code
THINGS OF POSSIBLE INTEREST
o It should now be possible to run PM at any priority, even lower than
user processes
o No assumptions are made about communication speed between PM and VFS,
although communication must be FIFO
o A debugger will now receive incoming debuggee signals at kill time
only; the process may not yet be fully stopped
o A first step has been made towards making the SYSTEM task preemptible
2009-09-30 11:57:22 +02:00
|
|
|
|
2012-04-13 14:50:38 +02:00
|
|
|
m_out.m_type = PM_UNPAUSE_REPLY;
|
|
|
|
m_out.PM_PROC = proc_e;
|
|
|
|
}
|
Merge of David's ptrace branch. Summary:
o Support for ptrace T_ATTACH/T_DETACH and T_SYSCALL
o PM signal handling logic should now work properly, even with debuggers
being present
o Asynchronous PM/VFS protocol, full IPC support for senda(), and
AMF_NOREPLY senda() flag
DETAILS
Process stop and delay call handling of PM:
o Added sys_runctl() kernel call with sys_stop() and sys_resume()
aliases, for PM to stop and resume a process
o Added exception for sending/syscall-traced processes to sys_runctl(),
and matching SIGKREADY pseudo-signal to PM
o Fixed PM signal logic to deal with requests from a process after
stopping it (so-called "delay calls"), using the SIGKREADY facility
o Fixed various PM panics due to race conditions with delay calls versus
VFS calls
o Removed special PRIO_STOP priority value
o Added SYS_LOCK RTS kernel flag, to stop an individual process from
running while modifying its process structure
Signal and debugger handling in PM:
o Fixed debugger signals being dropped if a second signal arrives when
the debugger has not retrieved the first one
o Fixed debugger signals being sent to the debugger more than once
o Fixed debugger signals unpausing process in VFS; removed PM_UNPAUSE_TR
protocol message
o Detached debugger signals from general signal logic and from being
blocked on VFS calls, meaning that even VFS can now be traced
o Fixed debugger being unable to receive more than one pending signal in
one process stop
o Fixed signal delivery being delayed needlessly when multiple signals
are pending
o Fixed wait test for tracer, which was returning for children that were
not waited for
o Removed second parallel pending call from PM to VFS for any process
o Fixed process becoming runnable between exec() and debugger trap
o Added support for notifying the debugger before the parent when a
debugged child exits
o Fixed debugger death causing child to remain stopped forever
o Fixed consistently incorrect use of _NSIG
Extensions to ptrace():
o Added T_ATTACH and T_DETACH ptrace request, to attach and detach a
debugger to and from a process
o Added T_SYSCALL ptrace request, to trace system calls
o Added T_SETOPT ptrace request, to set trace options
o Added TO_TRACEFORK trace option, to attach automatically to children
of a traced process
o Added TO_ALTEXEC trace option, to send SIGSTOP instead of SIGTRAP upon
a successful exec() of the tracee
o Extended T_GETUSER ptrace support to allow retrieving a process's priv
structure
o Removed T_STOP ptrace request again, as it does not help implementing
debuggers properly
o Added MINIX3-specific ptrace test (test42)
o Added proper manual page for ptrace(2)
Asynchronous PM/VFS interface:
o Fixed asynchronous messages not being checked when receive() is called
with an endpoint other than ANY
o Added AMF_NOREPLY senda() flag, preventing such messages from
satisfying the receive part of a sendrec()
o Added asynsend3() that takes optional flags; asynsend() is now a
#define passing in 0 as third parameter
o Made PM/VFS protocol asynchronous; reintroduced tell_fs()
o Made PM_BASE request/reply number range unique
o Hacked in a horrible temporary workaround into RS to deal with newly
revealed RS-PM-VFS race condition triangle until VFS is asynchronous
System signal handling:
o Fixed shutdown logic of device drivers; removed old SIGKSTOP signal
o Removed is-superuser check from PM's do_procstat() (aka getsigset())
o Added sigset macros to allow system processes to deal with the full
signal set, rather than just the POSIX subset
Miscellaneous PM fixes:
o Split do_getset into do_get and do_set, merging common code and making
structure clearer
o Fixed setpriority() being able to put to sleep processes using an
invalid parameter, or revive zombie processes
o Made find_proc() global; removed obsolete proc_from_pid()
o Cleanup here and there
Also included:
o Fixed false-positive boot order kernel warning
o Removed last traces of old NOTIFY_FROM code
THINGS OF POSSIBLE INTEREST
o It should now be possible to run PM at any priority, even lower than
user processes
o No assumptions are made about communication speed between PM and VFS,
although communication must be FIFO
o A debugger will now receive incoming debuggee signals at kill time
only; the process may not yet be fully stopped
o A first step has been made towards making the SYSTEM task preemptible
2009-09-30 11:57:22 +02:00
|
|
|
break;
|
|
|
|
|
2012-02-13 16:28:04 +01:00
|
|
|
case PM_REBOOT:
|
Merge of David's ptrace branch. Summary:
o Support for ptrace T_ATTACH/T_DETACH and T_SYSCALL
o PM signal handling logic should now work properly, even with debuggers
being present
o Asynchronous PM/VFS protocol, full IPC support for senda(), and
AMF_NOREPLY senda() flag
DETAILS
Process stop and delay call handling of PM:
o Added sys_runctl() kernel call with sys_stop() and sys_resume()
aliases, for PM to stop and resume a process
o Added exception for sending/syscall-traced processes to sys_runctl(),
and matching SIGKREADY pseudo-signal to PM
o Fixed PM signal logic to deal with requests from a process after
stopping it (so-called "delay calls"), using the SIGKREADY facility
o Fixed various PM panics due to race conditions with delay calls versus
VFS calls
o Removed special PRIO_STOP priority value
o Added SYS_LOCK RTS kernel flag, to stop an individual process from
running while modifying its process structure
Signal and debugger handling in PM:
o Fixed debugger signals being dropped if a second signal arrives when
the debugger has not retrieved the first one
o Fixed debugger signals being sent to the debugger more than once
o Fixed debugger signals unpausing process in VFS; removed PM_UNPAUSE_TR
protocol message
o Detached debugger signals from general signal logic and from being
blocked on VFS calls, meaning that even VFS can now be traced
o Fixed debugger being unable to receive more than one pending signal in
one process stop
o Fixed signal delivery being delayed needlessly when multiple signals
are pending
o Fixed wait test for tracer, which was returning for children that were
not waited for
o Removed second parallel pending call from PM to VFS for any process
o Fixed process becoming runnable between exec() and debugger trap
o Added support for notifying the debugger before the parent when a
debugged child exits
o Fixed debugger death causing child to remain stopped forever
o Fixed consistently incorrect use of _NSIG
Extensions to ptrace():
o Added T_ATTACH and T_DETACH ptrace request, to attach and detach a
debugger to and from a process
o Added T_SYSCALL ptrace request, to trace system calls
o Added T_SETOPT ptrace request, to set trace options
o Added TO_TRACEFORK trace option, to attach automatically to children
of a traced process
o Added TO_ALTEXEC trace option, to send SIGSTOP instead of SIGTRAP upon
a successful exec() of the tracee
o Extended T_GETUSER ptrace support to allow retrieving a process's priv
structure
o Removed T_STOP ptrace request again, as it does not help implementing
debuggers properly
o Added MINIX3-specific ptrace test (test42)
o Added proper manual page for ptrace(2)
Asynchronous PM/VFS interface:
o Fixed asynchronous messages not being checked when receive() is called
with an endpoint other than ANY
o Added AMF_NOREPLY senda() flag, preventing such messages from
satisfying the receive part of a sendrec()
o Added asynsend3() that takes optional flags; asynsend() is now a
#define passing in 0 as third parameter
o Made PM/VFS protocol asynchronous; reintroduced tell_fs()
o Made PM_BASE request/reply number range unique
o Hacked in a horrible temporary workaround into RS to deal with newly
revealed RS-PM-VFS race condition triangle until VFS is asynchronous
System signal handling:
o Fixed shutdown logic of device drivers; removed old SIGKSTOP signal
o Removed is-superuser check from PM's do_procstat() (aka getsigset())
o Added sigset macros to allow system processes to deal with the full
signal set, rather than just the POSIX subset
Miscellaneous PM fixes:
o Split do_getset into do_get and do_set, merging common code and making
structure clearer
o Fixed setpriority() being able to put to sleep processes using an
invalid parameter, or revive zombie processes
o Made find_proc() global; removed obsolete proc_from_pid()
o Cleanup here and there
Also included:
o Fixed false-positive boot order kernel warning
o Removed last traces of old NOTIFY_FROM code
THINGS OF POSSIBLE INTEREST
o It should now be possible to run PM at any priority, even lower than
user processes
o No assumptions are made about communication speed between PM and VFS,
although communication must be FIFO
o A debugger will now receive incoming debuggee signals at kill time
only; the process may not yet be fully stopped
o A first step has been made towards making the SYSTEM task preemptible
2009-09-30 11:57:22 +02:00
|
|
|
pm_reboot();
|
|
|
|
|
|
|
|
/* Reply dummy status to PM for synchronization */
|
|
|
|
m_out.m_type = PM_REBOOT_REPLY;
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
2012-02-13 16:28:04 +01:00
|
|
|
default:
|
2012-04-13 14:50:38 +02:00
|
|
|
printf("VFS: don't know how to handle PM request %d\n", job_call_nr);
|
Merge of David's ptrace branch. Summary:
o Support for ptrace T_ATTACH/T_DETACH and T_SYSCALL
o PM signal handling logic should now work properly, even with debuggers
being present
o Asynchronous PM/VFS protocol, full IPC support for senda(), and
AMF_NOREPLY senda() flag
DETAILS
Process stop and delay call handling of PM:
o Added sys_runctl() kernel call with sys_stop() and sys_resume()
aliases, for PM to stop and resume a process
o Added exception for sending/syscall-traced processes to sys_runctl(),
and matching SIGKREADY pseudo-signal to PM
o Fixed PM signal logic to deal with requests from a process after
stopping it (so-called "delay calls"), using the SIGKREADY facility
o Fixed various PM panics due to race conditions with delay calls versus
VFS calls
o Removed special PRIO_STOP priority value
o Added SYS_LOCK RTS kernel flag, to stop an individual process from
running while modifying its process structure
Signal and debugger handling in PM:
o Fixed debugger signals being dropped if a second signal arrives when
the debugger has not retrieved the first one
o Fixed debugger signals being sent to the debugger more than once
o Fixed debugger signals unpausing process in VFS; removed PM_UNPAUSE_TR
protocol message
o Detached debugger signals from general signal logic and from being
blocked on VFS calls, meaning that even VFS can now be traced
o Fixed debugger being unable to receive more than one pending signal in
one process stop
o Fixed signal delivery being delayed needlessly when multiple signals
are pending
o Fixed wait test for tracer, which was returning for children that were
not waited for
o Removed second parallel pending call from PM to VFS for any process
o Fixed process becoming runnable between exec() and debugger trap
o Added support for notifying the debugger before the parent when a
debugged child exits
o Fixed debugger death causing child to remain stopped forever
o Fixed consistently incorrect use of _NSIG
Extensions to ptrace():
o Added T_ATTACH and T_DETACH ptrace request, to attach and detach a
debugger to and from a process
o Added T_SYSCALL ptrace request, to trace system calls
o Added T_SETOPT ptrace request, to set trace options
o Added TO_TRACEFORK trace option, to attach automatically to children
of a traced process
o Added TO_ALTEXEC trace option, to send SIGSTOP instead of SIGTRAP upon
a successful exec() of the tracee
o Extended T_GETUSER ptrace support to allow retrieving a process's priv
structure
o Removed T_STOP ptrace request again, as it does not help implementing
debuggers properly
o Added MINIX3-specific ptrace test (test42)
o Added proper manual page for ptrace(2)
Asynchronous PM/VFS interface:
o Fixed asynchronous messages not being checked when receive() is called
with an endpoint other than ANY
o Added AMF_NOREPLY senda() flag, preventing such messages from
satisfying the receive part of a sendrec()
o Added asynsend3() that takes optional flags; asynsend() is now a
#define passing in 0 as third parameter
o Made PM/VFS protocol asynchronous; reintroduced tell_fs()
o Made PM_BASE request/reply number range unique
o Hacked in a horrible temporary workaround into RS to deal with newly
revealed RS-PM-VFS race condition triangle until VFS is asynchronous
System signal handling:
o Fixed shutdown logic of device drivers; removed old SIGKSTOP signal
o Removed is-superuser check from PM's do_procstat() (aka getsigset())
o Added sigset macros to allow system processes to deal with the full
signal set, rather than just the POSIX subset
Miscellaneous PM fixes:
o Split do_getset into do_get and do_set, merging common code and making
structure clearer
o Fixed setpriority() being able to put to sleep processes using an
invalid parameter, or revive zombie processes
o Made find_proc() global; removed obsolete proc_from_pid()
o Cleanup here and there
Also included:
o Fixed false-positive boot order kernel warning
o Removed last traces of old NOTIFY_FROM code
THINGS OF POSSIBLE INTEREST
o It should now be possible to run PM at any priority, even lower than
user processes
o No assumptions are made about communication speed between PM and VFS,
although communication must be FIFO
o A debugger will now receive incoming debuggee signals at kill time
only; the process may not yet be fully stopped
o A first step has been made towards making the SYSTEM task preemptible
2009-09-30 11:57:22 +02:00
|
|
|
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
r = send(PM_PROC_NR, &m_out);
|
|
|
|
if (r != OK)
|
2010-03-05 16:05:11 +01:00
|
|
|
panic("service_pm: send failed: %d", r);
|
- Introduce support for sticky bit.
- Revise VFS-FS protocol and update VFS/MFS/ISOFS accordingly.
- Clean up MFS by removing old, dead code (backwards compatibility is broken by
the new VFS-FS protocol, anyway) and rewrite other parts. Also, make sure all
functions have proper banners and prototypes.
- VFS should always provide a (syntactically) valid path to the FS; no need for
the FS to do sanity checks when leaving/entering mount points.
- Fix several bugs in MFS:
- Several path lookup bugs in MFS.
- A link can be too big for the path buffer.
- A mountpoint can become inaccessible when the creation of a new inode
fails, because the inode already exists and is a mountpoint.
- Introduce support for supplemental groups.
- Add test 46 to test supplemental group functionality (and removed obsolete
suppl. tests from test 2).
- Clean up VFS (not everything is done yet).
- ISOFS now opens device read-only. This makes the -r flag in the mount command
unnecessary (but will still report to be mounted read-write).
- Introduce PipeFS. PipeFS is a new FS that handles all anonymous and
named pipes. However, named pipes still reside on the (M)FS, as they are part
of the file system on disk. To make this work VFS now has a concept of
'mapped' inodes, which causes read, write, truncate and stat requests to be
redirected to the mapped FS, and all other requests to the original FS.
2009-12-20 21:27:14 +01:00
|
|
|
|
Merge of David's ptrace branch. Summary:
o Support for ptrace T_ATTACH/T_DETACH and T_SYSCALL
o PM signal handling logic should now work properly, even with debuggers
being present
o Asynchronous PM/VFS protocol, full IPC support for senda(), and
AMF_NOREPLY senda() flag
DETAILS
Process stop and delay call handling of PM:
o Added sys_runctl() kernel call with sys_stop() and sys_resume()
aliases, for PM to stop and resume a process
o Added exception for sending/syscall-traced processes to sys_runctl(),
and matching SIGKREADY pseudo-signal to PM
o Fixed PM signal logic to deal with requests from a process after
stopping it (so-called "delay calls"), using the SIGKREADY facility
o Fixed various PM panics due to race conditions with delay calls versus
VFS calls
o Removed special PRIO_STOP priority value
o Added SYS_LOCK RTS kernel flag, to stop an individual process from
running while modifying its process structure
Signal and debugger handling in PM:
o Fixed debugger signals being dropped if a second signal arrives when
the debugger has not retrieved the first one
o Fixed debugger signals being sent to the debugger more than once
o Fixed debugger signals unpausing process in VFS; removed PM_UNPAUSE_TR
protocol message
o Detached debugger signals from general signal logic and from being
blocked on VFS calls, meaning that even VFS can now be traced
o Fixed debugger being unable to receive more than one pending signal in
one process stop
o Fixed signal delivery being delayed needlessly when multiple signals
are pending
o Fixed wait test for tracer, which was returning for children that were
not waited for
o Removed second parallel pending call from PM to VFS for any process
o Fixed process becoming runnable between exec() and debugger trap
o Added support for notifying the debugger before the parent when a
debugged child exits
o Fixed debugger death causing child to remain stopped forever
o Fixed consistently incorrect use of _NSIG
Extensions to ptrace():
o Added T_ATTACH and T_DETACH ptrace request, to attach and detach a
debugger to and from a process
o Added T_SYSCALL ptrace request, to trace system calls
o Added T_SETOPT ptrace request, to set trace options
o Added TO_TRACEFORK trace option, to attach automatically to children
of a traced process
o Added TO_ALTEXEC trace option, to send SIGSTOP instead of SIGTRAP upon
a successful exec() of the tracee
o Extended T_GETUSER ptrace support to allow retrieving a process's priv
structure
o Removed T_STOP ptrace request again, as it does not help implementing
debuggers properly
o Added MINIX3-specific ptrace test (test42)
o Added proper manual page for ptrace(2)
Asynchronous PM/VFS interface:
o Fixed asynchronous messages not being checked when receive() is called
with an endpoint other than ANY
o Added AMF_NOREPLY senda() flag, preventing such messages from
satisfying the receive part of a sendrec()
o Added asynsend3() that takes optional flags; asynsend() is now a
#define passing in 0 as third parameter
o Made PM/VFS protocol asynchronous; reintroduced tell_fs()
o Made PM_BASE request/reply number range unique
o Hacked in a horrible temporary workaround into RS to deal with newly
revealed RS-PM-VFS race condition triangle until VFS is asynchronous
System signal handling:
o Fixed shutdown logic of device drivers; removed old SIGKSTOP signal
o Removed is-superuser check from PM's do_procstat() (aka getsigset())
o Added sigset macros to allow system processes to deal with the full
signal set, rather than just the POSIX subset
Miscellaneous PM fixes:
o Split do_getset into do_get and do_set, merging common code and making
structure clearer
o Fixed setpriority() being able to put to sleep processes using an
invalid parameter, or revive zombie processes
o Made find_proc() global; removed obsolete proc_from_pid()
o Cleanup here and there
Also included:
o Fixed false-positive boot order kernel warning
o Removed last traces of old NOTIFY_FROM code
THINGS OF POSSIBLE INTEREST
o It should now be possible to run PM at any priority, even lower than
user processes
o No assumptions are made about communication speed between PM and VFS,
although communication must be FIFO
o A debugger will now receive incoming debuggee signals at kill time
only; the process may not yet be fully stopped
o A first step has been made towards making the SYSTEM task preemptible
2009-09-30 11:57:22 +02:00
|
|
|
}
|
- Introduce support for sticky bit.
- Revise VFS-FS protocol and update VFS/MFS/ISOFS accordingly.
- Clean up MFS by removing old, dead code (backwards compatibility is broken by
the new VFS-FS protocol, anyway) and rewrite other parts. Also, make sure all
functions have proper banners and prototypes.
- VFS should always provide a (syntactically) valid path to the FS; no need for
the FS to do sanity checks when leaving/entering mount points.
- Fix several bugs in MFS:
- Several path lookup bugs in MFS.
- A link can be too big for the path buffer.
- A mountpoint can become inaccessible when the creation of a new inode
fails, because the inode already exists and is a mountpoint.
- Introduce support for supplemental groups.
- Add test 46 to test supplemental group functionality (and removed obsolete
suppl. tests from test 2).
- Clean up VFS (not everything is done yet).
- ISOFS now opens device read-only. This makes the -r flag in the mount command
unnecessary (but will still report to be mounted read-write).
- Introduce PipeFS. PipeFS is a new FS that handles all anonymous and
named pipes. However, named pipes still reside on the (M)FS, as they are part
of the file system on disk. To make this work VFS now has a concept of
'mapped' inodes, which causes read, write, truncate and stat requests to be
redirected to the mapped FS, and all other requests to the original FS.
2009-12-20 21:27:14 +01:00
|
|
|
|
2012-02-13 16:28:04 +01:00
|
|
|
|
|
|
|
/*===========================================================================*
|
|
|
|
* unblock *
|
|
|
|
*===========================================================================*/
|
2012-03-25 20:25:53 +02:00
|
|
|
static int unblock(rfp)
|
2012-02-13 16:28:04 +01:00
|
|
|
struct fproc *rfp;
|
|
|
|
{
|
|
|
|
int blocked_on;
|
|
|
|
|
|
|
|
fp = rfp;
|
|
|
|
blocked_on = rfp->fp_blocked_on;
|
2012-04-13 14:50:38 +02:00
|
|
|
m_in.m_source = rfp->fp_endpoint;
|
2012-02-13 16:28:04 +01:00
|
|
|
m_in.m_type = rfp->fp_block_callnr;
|
|
|
|
m_in.fd = scratch(fp).file.fd_nr;
|
|
|
|
m_in.buffer = scratch(fp).io.io_buffer;
|
|
|
|
m_in.nbytes = scratch(fp).io.io_nbytes;
|
|
|
|
|
|
|
|
rfp->fp_blocked_on = FP_BLOCKED_ON_NONE; /* no longer blocked */
|
|
|
|
rfp->fp_flags &= ~FP_REVIVED;
|
|
|
|
reviving--;
|
|
|
|
assert(reviving >= 0);
|
|
|
|
|
|
|
|
/* This should be a pipe I/O, not a device I/O. If it is, it'll 'leak'
|
|
|
|
* grants.
|
|
|
|
*/
|
|
|
|
assert(!GRANT_VALID(rfp->fp_grant));
|
|
|
|
|
|
|
|
/* Pending pipe reads/writes can be handled directly */
|
|
|
|
if (blocked_on == FP_BLOCKED_ON_PIPE) {
|
|
|
|
worker_start(do_pending_pipe);
|
|
|
|
yield(); /* Give thread a chance to run */
|
2012-04-13 14:50:38 +02:00
|
|
|
self = NULL;
|
2012-02-13 16:28:04 +01:00
|
|
|
return(0); /* Retrieve more work */
|
|
|
|
}
|
|
|
|
|
|
|
|
return(1); /* We've unblocked a process */
|
|
|
|
}
|