minix/servers/rs/exec.c

450 lines
12 KiB
C
Raw Normal View History

#include "inc.h"
#include <a.out.h>
2010-12-10 10:27:56 +01:00
#include <assert.h>
#include <libexec.h>
#include "exec.h"
#define BLOCK_SIZE 1024
static int do_exec(int proc_e, char *exec, size_t exec_len, char *progname,
char *frame, int frame_len);
2010-12-10 10:27:56 +01:00
static int exec_newmem(int proc_e, vir_bytes text_addr,
vir_bytes text_bytes, vir_bytes data_addr,
vir_bytes data_bytes, vir_bytes tot_bytes,
vir_bytes frame_len, int sep_id, int is_elf,
dev_t st_dev, ino_t st_ino, time_t ctime, char *progname,
2010-12-10 10:27:56 +01:00
int new_uid, int new_gid, vir_bytes *stack_topp,
int *load_textp, int *allow_setuidp);
static void patch_ptr(char stack[ARG_MAX], vir_bytes base);
static int exec_restart(int proc_e, int result, vir_bytes pc);
static int read_seg(struct exec_info *execi, off_t off,
int proc_e, int seg, vir_bytes seg_addr, phys_bytes seg_bytes);
static int load_aout(struct exec_info *execi);
static int load_elf(struct exec_info *execi);
/* Array of loaders for different object formats */
static struct exec_loaders {
2010-12-10 10:27:56 +01:00
int (*load_object)(struct exec_info *);
} const exec_loaders[] = {
2010-12-10 10:27:56 +01:00
{ load_aout },
{ load_elf },
{ NULL }
};
New RS and new signal handling for system processes. UPDATING INFO: 20100317: /usr/src/etc/system.conf updated to ignore default kernel calls: copy it (or merge it) to /etc/system.conf. The hello driver (/dev/hello) added to the distribution: # cd /usr/src/commands/scripts && make clean install # cd /dev && MAKEDEV hello KERNEL CHANGES: - Generic signal handling support. The kernel no longer assumes PM as a signal manager for every process. The signal manager of a given process can now be specified in its privilege slot. When a signal has to be delivered, the kernel performs the lookup and forwards the signal to the appropriate signal manager. PM is the default signal manager for user processes, RS is the default signal manager for system processes. To enable ptrace()ing for system processes, it is sufficient to change the default signal manager to PM. This will temporarily disable crash recovery, though. - sys_exit() is now split into sys_exit() (i.e. exit() for system processes, which generates a self-termination signal), and sys_clear() (i.e. used by PM to ask the kernel to clear a process slot when a process exits). - Added a new kernel call (i.e. sys_update()) to swap two process slots and implement live update. PM CHANGES: - Posix signal handling is no longer allowed for system processes. System signals are split into two fixed categories: termination and non-termination signals. When a non-termination signaled is processed, PM transforms the signal into an IPC message and delivers the message to the system process. When a termination signal is processed, PM terminates the process. - PM no longer assumes itself as the signal manager for system processes. It now makes sure that every system signal goes through the kernel before being actually processes. The kernel will then dispatch the signal to the appropriate signal manager which may or may not be PM. SYSLIB CHANGES: - Simplified SEF init and LU callbacks. - Added additional predefined SEF callbacks to debug crash recovery and live update. - Fixed a temporary ack in the SEF init protocol. SEF init reply is now completely synchronous. - Added SEF signal event type to provide a uniform interface for system processes to deal with signals. A sef_cb_signal_handler() callback is available for system processes to handle every received signal. A sef_cb_signal_manager() callback is used by signal managers to process system signals on behalf of the kernel. - Fixed a few bugs with memory mapping and DS. VM CHANGES: - Page faults and memory requests coming from the kernel are now implemented using signals. - Added a new VM call to swap two process slots and implement live update. - The call is used by RS at update time and in turn invokes the kernel call sys_update(). RS CHANGES: - RS has been reworked with a better functional decomposition. - Better kernel call masks. com.h now defines the set of very basic kernel calls every system service is allowed to use. This makes system.conf simpler and easier to maintain. In addition, this guarantees a higher level of isolation for system libraries that use one or more kernel calls internally (e.g. printf). - RS is the default signal manager for system processes. By default, RS intercepts every signal delivered to every system process. This makes crash recovery possible before bringing PM and friends in the loop. - RS now supports fast rollback when something goes wrong while initializing the new version during a live update. - Live update is now implemented by keeping the two versions side-by-side and swapping the process slots when the old version is ready to update. - Crash recovery is now implemented by keeping the two versions side-by-side and cleaning up the old version only when the recovery process is complete. DS CHANGES: - Fixed a bug when the process doing ds_publish() or ds_delete() is not known by DS. - Fixed the completely broken support for strings. String publishing is now implemented in the system library and simply wraps publishing of memory ranges. Ideally, we should adopt a similar approach for other data types as well. - Test suite fixed. DRIVER CHANGES: - The hello driver has been added to the Minix distribution to demonstrate basic live update and crash recovery functionalities. - Other drivers have been adapted to conform the new SEF interface.
2010-03-17 02:15:29 +01:00
int srv_execve(int proc_e, char *exec, size_t exec_len, char **argv,
char **UNUSED(Xenvp))
{
char * const *ap;
char * const *ep;
char *frame;
char **vp;
char *sp, *progname;
size_t argc;
size_t frame_size;
size_t string_off;
size_t n;
int ov;
int r;
/* Assumptions: size_t and char *, it's all the same thing. */
/* Create a stack image that only needs to be patched up slightly
* by the kernel to be used for the process to be executed.
*/
ov= 0; /* No overflow yet. */
frame_size= 0; /* Size of the new initial stack. */
string_off= 0; /* Offset to start of the strings. */
argc= 0; /* Argument count. */
for (ap= argv; *ap != NULL; ap++) {
n = sizeof(*ap) + strlen(*ap) + 1;
frame_size+= n;
if (frame_size < n) ov= 1;
string_off+= sizeof(*ap);
argc++;
}
/* Add an argument count and two terminating nulls. */
frame_size+= sizeof(argc) + sizeof(*ap) + sizeof(*ep);
string_off+= sizeof(argc) + sizeof(*ap) + sizeof(*ep);
/* Align. */
frame_size= (frame_size + sizeof(char *) - 1) & ~(sizeof(char *) - 1);
/* The party is off if there is an overflow. */
if (ov || frame_size < 3 * sizeof(char *)) {
errno= E2BIG;
return -1;
}
/* Allocate space for the stack frame. */
frame = (char *) malloc(frame_size);
if (!frame) {
errno = E2BIG;
return -1;
}
/* Set arg count, init pointers to vector and string tables. */
* (size_t *) frame = argc;
vp = (char **) (frame + sizeof(argc));
sp = frame + string_off;
/* Load the argument vector and strings. */
for (ap= argv; *ap != NULL; ap++) {
*vp++= (char *) (sp - frame);
n= strlen(*ap) + 1;
memcpy(sp, *ap, n);
sp+= n;
}
*vp++= NULL;
#if 0
/* Load the environment vector and strings. */
for (ep= envp; *ep != NULL; ep++) {
*vp++= (char *) (sp - frame);
n= strlen(*ep) + 1;
memcpy(sp, *ep, n);
sp+= n;
}
#endif
*vp++= NULL;
/* Padding. */
while (sp < frame + frame_size) *sp++= 0;
(progname=strrchr(argv[0], '/')) ? progname++ : (progname=argv[0]);
r = do_exec(proc_e, exec, exec_len, progname, frame, frame_size);
/* Return the memory used for the frame and exit. */
free(frame);
return r;
}
2010-12-10 10:27:56 +01:00
static int do_exec(int proc_e, char *exec, size_t exec_len, char *progname,
char *frame, int frame_len)
2010-12-10 10:27:56 +01:00
{
int r;
vir_bytes vsp;
struct exec_info execi;
int i;
execi.proc_e = proc_e;
execi.image = exec;
execi.image_len = exec_len;
strncpy(execi.progname, progname, PROC_NAME_LEN-1);
execi.progname[PROC_NAME_LEN-1] = '\0';
execi.frame_len = frame_len;
for(i = 0; exec_loaders[i].load_object != NULL; i++) {
r = (*exec_loaders[i].load_object)(&execi);
/* Loaded successfully, so no need to try other loaders */
if (r == OK) break;
}
/* No exec loader could load the object */
if (r != OK) {
printf("RS: do_exec: loading error %d\n", r);
return r;
}
/* Patch up stack and copy it from RS to new core image. */
vsp = execi.stack_top;
vsp -= frame_len;
patch_ptr(frame, vsp);
r = sys_datacopy(SELF, (vir_bytes) frame,
proc_e, (vir_bytes) vsp, (phys_bytes)frame_len);
if (r != OK) {
printf("RS: stack_top is 0x%lx; tried to copy to 0x%lx in %d\n",
execi.stack_top, vsp, proc_e);
printf("do_exec: copying out new stack failed: %d\n", r);
exec_restart(proc_e, r, execi.pc);
return r;
}
return exec_restart(proc_e, OK, execi.pc);
}
static int load_aout(struct exec_info *execi)
{
int r;
int hdrlen, sep_id, load_text, allow_setuid;
2010-12-10 10:27:56 +01:00
vir_bytes text_bytes, data_bytes, bss_bytes;
phys_bytes tot_bytes;
off_t off;
uid_t new_uid;
gid_t new_gid;
2010-12-10 10:27:56 +01:00
int proc_e;
assert(execi != NULL);
assert(execi->image != NULL);
2010-12-10 10:27:56 +01:00
proc_e = execi->proc_e;
/* Read the file header and extract the segment sizes. */
2010-12-10 10:27:56 +01:00
r = read_header_aout(execi->image, execi->image_len, &sep_id,
&text_bytes, &data_bytes, &bss_bytes,
&tot_bytes, &execi->pc, &hdrlen);
if (r != OK)
{
2010-12-10 10:27:56 +01:00
return r;
}
new_uid= getuid();
new_gid= getgid();
2010-12-10 10:27:56 +01:00
/* XXX what should we use to identify the executable? */
2010-12-10 10:27:56 +01:00
r= exec_newmem(proc_e, 0 /*text_addr*/, text_bytes,
0 /*data_addr*/, data_bytes + bss_bytes, tot_bytes,
execi->frame_len, sep_id, 0 /*is_elf*/, 0 /*dev*/, proc_e /*inum*/, 0 /*ctime*/,
execi->progname, new_uid, new_gid, &execi->stack_top, &load_text,
&allow_setuid);
if (r != OK)
{
2010-12-10 10:27:56 +01:00
printf("RS: load_aout: exec_newmem failed: %d\n", r);
exec_restart(proc_e, r, execi->pc);
return r;
}
off = hdrlen;
/* Read in text and data segments. */
if (load_text) {
2010-12-10 10:27:56 +01:00
r= read_seg(execi, off, proc_e, T, 0, text_bytes);
if (r != OK)
{
2010-12-10 10:27:56 +01:00
printf("RS: load_aout: read_seg failed: %d\n", r);
exec_restart(proc_e, r, execi->pc);
return r;
}
}
else
2010-12-10 10:27:56 +01:00
printf("RS: load_aout: not loading text segment\n");
off += text_bytes;
2010-12-10 10:27:56 +01:00
r= read_seg(execi, off, proc_e, D, 0, data_bytes);
if (r != OK)
{
2010-12-10 10:27:56 +01:00
printf("RS: load_aout: read_seg failed: %d\n", r);
exec_restart(proc_e, r, execi->pc);
return r;
}
2010-12-10 10:27:56 +01:00
return OK;
}
static int load_elf(struct exec_info *execi)
{
int r;
int proc_e;
phys_bytes tot_bytes; /* total space for program, including gap */
vir_bytes text_vaddr, text_paddr, text_filebytes, text_membytes;
vir_bytes data_vaddr, data_paddr, data_filebytes, data_membytes;
2010-12-10 10:27:56 +01:00
off_t text_offset, data_offset;
int sep_id, is_elf, load_text, allow_setuid;
uid_t new_uid;
gid_t new_gid;
assert(execi != NULL);
assert(execi->image != NULL);
proc_e = execi->proc_e;
/* Read the file header and extract the segment sizes. */
r = read_header_elf(execi->image, &text_vaddr, &text_paddr,
&text_filebytes, &text_membytes,
&data_vaddr, &data_paddr,
&data_filebytes, &data_membytes,
&execi->pc, &text_offset, &data_offset);
2010-12-10 10:27:56 +01:00
if (r != OK) {
return(r);
}
new_uid= getuid();
new_gid= getgid();
2011-07-26 15:21:07 +02:00
sep_id = 0;
2010-12-10 10:27:56 +01:00
is_elf = 1;
tot_bytes = 0; /* Use default stack size */
2010-12-10 10:27:56 +01:00
r = exec_newmem(proc_e,
trunc_page(text_vaddr), text_membytes,
trunc_page(data_vaddr), data_membytes,
2010-12-10 10:27:56 +01:00
tot_bytes, execi->frame_len, sep_id, is_elf,
0 /*dev*/, proc_e /*inum*/, 0 /*ctime*/,
execi->progname, new_uid, new_gid,
&execi->stack_top, &load_text, &allow_setuid);
if (r != OK)
{
printf("RS: load_elf: exec_newmem failed: %d\n", r);
exec_restart(proc_e, r, execi->pc);
return r;
}
2010-12-10 10:27:56 +01:00
/* Read in text and data segments. */
if (load_text) {
r = read_seg(execi, text_offset, proc_e, T, text_vaddr, text_filebytes);
2010-12-10 10:27:56 +01:00
if (r != OK)
{
printf("RS: load_elf: read_seg failed: %d\n", r);
exec_restart(proc_e, r, execi->pc);
return r;
}
}
else
printf("RS: load_elf: not loading text segment\n");
r = read_seg(execi, data_offset, proc_e, D, data_vaddr, data_filebytes);
2010-12-10 10:27:56 +01:00
if (r != OK)
{
printf("RS: load_elf: read_seg failed: %d\n", r);
exec_restart(proc_e, r, execi->pc);
return r;
}
2010-12-10 10:27:56 +01:00
return(OK);
}
/*===========================================================================*
* exec_newmem *
*===========================================================================*/
2010-12-10 10:27:56 +01:00
static int exec_newmem(
int proc_e,
2010-12-10 10:27:56 +01:00
vir_bytes text_addr,
vir_bytes text_bytes,
2010-12-10 10:27:56 +01:00
vir_bytes data_addr,
vir_bytes data_bytes,
vir_bytes tot_bytes,
vir_bytes frame_len,
int sep_id,
2010-12-10 10:27:56 +01:00
int is_elf,
dev_t st_dev,
ino_t st_ino,
time_t ctime,
char *progname,
int new_uid,
int new_gid,
vir_bytes *stack_topp,
int *load_textp,
int *allow_setuidp
)
{
int r;
struct exec_newmem e;
message m;
2010-12-10 10:27:56 +01:00
e.text_addr = text_addr;
e.text_bytes= text_bytes;
2010-12-10 10:27:56 +01:00
e.data_addr = data_addr;
e.data_bytes= data_bytes;
e.tot_bytes= tot_bytes;
e.args_bytes= frame_len;
e.sep_id= sep_id;
2010-12-10 10:27:56 +01:00
e.is_elf= is_elf;
e.st_dev= st_dev;
e.st_ino= st_ino;
e.enst_ctime= ctime;
e.new_uid= new_uid;
e.new_gid= new_gid;
strncpy(e.progname, progname, sizeof(e.progname)-1);
e.progname[sizeof(e.progname)-1]= '\0';
m.m_type= EXEC_NEWMEM;
m.EXC_NM_PROC= proc_e;
m.EXC_NM_PTR= (char *)&e;
r= sendrec(PM_PROC_NR, &m);
if (r != OK)
return r;
#if 0
printf("exec_newmem: r = %d, m_type = %d\n", r, m.m_type);
#endif
*stack_topp= m.m1_i1;
*load_textp= !!(m.m1_i2 & EXC_NM_RF_LOAD_TEXT);
*allow_setuidp= !!(m.m1_i2 & EXC_NM_RF_ALLOW_SETUID);
#if 0
printf("RS: exec_newmem: stack_top = 0x%x\n", *stack_topp);
printf("RS: exec_newmem: load_text = %d\n", *load_textp);
#endif
return m.m_type;
}
/*===========================================================================*
* exec_restart *
*===========================================================================*/
2010-12-10 10:27:56 +01:00
static int exec_restart(int proc_e, int result, vir_bytes pc)
{
int r;
message m;
m.m_type= EXEC_RESTART;
m.EXC_RS_PROC= proc_e;
m.EXC_RS_RESULT= result;
m.EXC_RS_PC= (void*)pc;
r= sendrec(PM_PROC_NR, &m);
if (r != OK)
return r;
return m.m_type;
}
/*===========================================================================*
* patch_ptr *
*===========================================================================*/
2010-12-10 10:27:56 +01:00
static void patch_ptr(
char stack[ARG_MAX], /* pointer to stack image within PM */
vir_bytes base /* virtual address of stack base inside user */
)
{
/* When doing an exec(name, argv, envp) call, the user builds up a stack
* image with arg and env pointers relative to the start of the stack. Now
* these pointers must be relocated, since the stack is not positioned at
* address 0 in the user's address space.
*/
char **ap, flag;
vir_bytes v;
flag = 0; /* counts number of 0-pointers seen */
ap = (char **) stack; /* points initially to 'nargs' */
ap++; /* now points to argv[0] */
while (flag < 2) {
if (ap >= (char **) &stack[ARG_MAX]) return; /* too bad */
if (*ap != NULL) {
v = (vir_bytes) *ap; /* v is relative pointer */
v += base; /* relocate it */
*ap = (char *) v; /* put it back */
} else {
flag++;
}
ap++;
}
}
/*===========================================================================*
* read_seg *
*===========================================================================*/
2010-12-10 10:27:56 +01:00
static int read_seg(
struct exec_info *execi, /* various data needed for exec */
off_t off, /* offset in file */
int proc_e, /* process number (endpoint) */
int seg, /* T, D, or S */
vir_bytes seg_addr, /* address to load segment */
phys_bytes seg_bytes /* how much is to be transferred? */
)
{
/*
* The byte count on read is usually smaller than the segment count, because
* a segment is padded out to a click multiple, and the data segment is only
* partially initialized.
*/
int r;
2010-12-10 10:27:56 +01:00
assert((seg == T)||(seg == D));
if (off+seg_bytes > execi->image_len) return ENOEXEC;
r= sys_vircopy(SELF, D, ((vir_bytes)execi->image)+off, proc_e, seg, seg_addr, seg_bytes);
return r;
}