minix/kernel/arch/i386/exception.c

387 lines
10 KiB
C
Raw Normal View History

2005-04-21 16:53:53 +02:00
/* This file contains a simple exception handler. Exceptions in user
* processes are converted to signals. Exceptions in a kernel task cause
* a panic.
2005-04-21 16:53:53 +02:00
*/
2010-04-02 00:22:33 +02:00
#include "kernel/kernel.h"
#include "arch_proto.h"
2005-04-21 16:53:53 +02:00
#include <signal.h>
#include <string.h>
#include <assert.h>
#include <machine/vm.h>
2005-04-21 16:53:53 +02:00
struct ex_s {
char *msg;
int signum;
int minprocessor;
};
static struct ex_s ex_data[] = {
{ "Divide error", SIGFPE, 86 },
{ "Debug exception", SIGTRAP, 86 },
{ "Nonmaskable interrupt", SIGBUS, 86 },
{ "Breakpoint", SIGEMT, 86 },
{ "Overflow", SIGFPE, 86 },
{ "Bounds check", SIGFPE, 186 },
{ "Invalid opcode", SIGILL, 186 },
{ "Coprocessor not available", SIGFPE, 186 },
{ "Double fault", SIGBUS, 286 },
{ "Coprocessor segment overrun", SIGSEGV, 286 },
{ "Invalid TSS", SIGSEGV, 286 },
{ "Segment not present", SIGSEGV, 286 },
{ "Stack exception", SIGSEGV, 286 }, /* STACK_FAULT already used */
{ "General protection", SIGSEGV, 286 },
{ "Page fault", SIGSEGV, 386 }, /* not close */
{ NULL, SIGILL, 0 }, /* probably software trap */
{ "Coprocessor error", SIGFPE, 386 },
{ "Alignment check", SIGBUS, 386 },
{ "Machine check", SIGBUS, 386 },
{ "SIMD exception", SIGFPE, 386 },
};
2012-03-25 20:25:53 +02:00
static void inkernel_disaster(struct proc *saved_proc,
struct exception_frame *frame, struct ex_s *ep, int is_nested);
extern int catch_pagefaults;
2012-03-25 20:25:53 +02:00
static void proc_stacktrace_execute(struct proc *whichproc, reg_t v_bp, reg_t pc);
2012-03-25 20:25:53 +02:00
static void pagefault( struct proc *pr,
2010-04-27 22:30:33 +02:00
struct exception_frame * frame,
int is_nested)
{
int in_physcopy = 0, in_memset = 0;
Complete ovehaul of mode switching code - after a trap to kernel, the code automatically switches to kernel stack, in the future local to the CPU - k_reenter variable replaced by a test whether the CS is kernel cs or not. The information is passed further if needed. Removes a global variable which would need to be cpu local - no need for global variables describing the exception or trap context. This information is kept on stack and a pointer to this structure is passed to the C code as a single structure - removed loadedcr3 variable and its use replaced by reading the %cr3 register - no need to redisable interrupts in restart() as they are already disabled. - unified handling of traps that push and don't push errorcode - removed save() function as the process context is not saved directly to process table but saved as required by the trap code. Essentially it means that save() code is inlined everywhere not only in the exception handling routine - returning from syscall is more arch independent - it sets the retger in C - top of the x86 stack contains the current CPU id and pointer to the currently scheduled process (the one right interrupted) so the mode switch code can find where to save the context without need to use proc_ptr which will be cpu local in the future and therefore difficult to access in assembler and expensive to access in general - some more clean up of level0 code. No need to read-back the argument passed in %eax from the proc structure. The mode switch code does not clobber %the general registers and hence we can just call what is in %eax - many assebly macros in sconst.h as they will be reused by the apic assembly
2009-11-06 10:08:26 +01:00
reg_t pagefaultcr2;
message m_pagefault;
int err;
Complete ovehaul of mode switching code - after a trap to kernel, the code automatically switches to kernel stack, in the future local to the CPU - k_reenter variable replaced by a test whether the CS is kernel cs or not. The information is passed further if needed. Removes a global variable which would need to be cpu local - no need for global variables describing the exception or trap context. This information is kept on stack and a pointer to this structure is passed to the C code as a single structure - removed loadedcr3 variable and its use replaced by reading the %cr3 register - no need to redisable interrupts in restart() as they are already disabled. - unified handling of traps that push and don't push errorcode - removed save() function as the process context is not saved directly to process table but saved as required by the trap code. Essentially it means that save() code is inlined everywhere not only in the exception handling routine - returning from syscall is more arch independent - it sets the retger in C - top of the x86 stack contains the current CPU id and pointer to the currently scheduled process (the one right interrupted) so the mode switch code can find where to save the context without need to use proc_ptr which will be cpu local in the future and therefore difficult to access in assembler and expensive to access in general - some more clean up of level0 code. No need to read-back the argument passed in %eax from the proc structure. The mode switch code does not clobber %the general registers and hence we can just call what is in %eax - many assebly macros in sconst.h as they will be reused by the apic assembly
2009-11-06 10:08:26 +01:00
pagefaultcr2 = read_cr2();
Primary goal for these changes is: - no longer have kernel have its own page table that is loaded on every kernel entry (trap, interrupt, exception). the primary purpose is to reduce the number of required reloads. Result: - kernel can only access memory of process that was running when kernel was entered - kernel must be mapped into every process page table, so traps to kernel keep working Problem: - kernel must often access memory of arbitrary processes (e.g. send arbitrary processes messages); this can't happen directly any more; usually because that process' page table isn't loaded at all, sometimes because that memory isn't mapped in at all, sometimes because it isn't mapped in read-write. So: - kernel must be able to map in memory of any process, in its own address space. Implementation: - VM and kernel share a range of memory in which addresses of all page tables of all processes are available. This has two purposes: . Kernel has to know what data to copy in order to map in a range . Kernel has to know where to write the data in order to map it in That last point is because kernel has to write in the currently loaded page table. - Processes and kernel are separated through segments; kernel segments haven't changed. - The kernel keeps the process whose page table is currently loaded in 'ptproc.' - If it wants to map in a range of memory, it writes the value of the page directory entry for that range into the page directory entry in the currently loaded map. There is a slot reserved for such purposes. The kernel can then access this memory directly. - In order to do this, its segment has been increased (and the segments of processes start where it ends). - In the pagefault handler, detect if the kernel is doing 'trappable' memory access (i.e. a pagefault isn't a fatal error) and if so, - set the saved instruction pointer to phys_copy_fault, breaking out of phys_copy - set the saved eax register to the address of the page fault, both for sanity checking and for checking in which of the two ranges that phys_copy was called with the fault occured - Some boot-time processes do not have their own page table, and are mapped in with the kernel, and separated with segments. The kernel detects this using HASPT. If such a process has to be scheduled, any page table will work and no page table switch is done. Major changes in kernel are - When accessing user processes memory, kernel no longer explicitly checks before it does so if that memory is OK. It simply makes the mapping (if necessary), tries to do the operation, and traps the pagefault if that memory isn't present; if that happens, the copy function returns EFAULT. So all of the CHECKRANGE_OR_SUSPEND macros are gone. - Kernel no longer has to copy/read and parse page tables. - A message copying optimisation: when messages are copied, and the recipient isn't mapped in, they are copied into a buffer in the kernel. This is done in QueueMess. The next time the recipient is scheduled, this message is copied into its memory. This happens in schedcheck(). This eliminates the mapping/copying step for messages, and makes it easier to deliver messages. This eliminates soft_notify. - Kernel no longer creates a page table at all, so the vm_setbuf and pagetable writing in memory.c is gone. Minor changes in kernel are - ipc_stats thrown out, wasn't used - misc flags all renamed to MF_* - NOREC_* macros to enter and leave functions that should not be called recursively; just sanity checks really - code to fully decode segment selectors and descriptors to print on exceptions - lots of vmassert()s added, only executed if DEBUG_VMASSERT is 1
2009-09-21 16:31:52 +02:00
#if 0
printf("kernel: pagefault in pr %d, addr 0x%lx, his cr3 0x%lx, actual cr3 0x%lx\n",
pr->p_endpoint, pagefaultcr2, pr->p_seg.p_cr3, read_cr3());
#endif
Complete ovehaul of mode switching code - after a trap to kernel, the code automatically switches to kernel stack, in the future local to the CPU - k_reenter variable replaced by a test whether the CS is kernel cs or not. The information is passed further if needed. Removes a global variable which would need to be cpu local - no need for global variables describing the exception or trap context. This information is kept on stack and a pointer to this structure is passed to the C code as a single structure - removed loadedcr3 variable and its use replaced by reading the %cr3 register - no need to redisable interrupts in restart() as they are already disabled. - unified handling of traps that push and don't push errorcode - removed save() function as the process context is not saved directly to process table but saved as required by the trap code. Essentially it means that save() code is inlined everywhere not only in the exception handling routine - returning from syscall is more arch independent - it sets the retger in C - top of the x86 stack contains the current CPU id and pointer to the currently scheduled process (the one right interrupted) so the mode switch code can find where to save the context without need to use proc_ptr which will be cpu local in the future and therefore difficult to access in assembler and expensive to access in general - some more clean up of level0 code. No need to read-back the argument passed in %eax from the proc structure. The mode switch code does not clobber %the general registers and hence we can just call what is in %eax - many assebly macros in sconst.h as they will be reused by the apic assembly
2009-11-06 10:08:26 +01:00
in_physcopy = (frame->eip > (vir_bytes) phys_copy) &&
(frame->eip < (vir_bytes) phys_copy_fault);
Primary goal for these changes is: - no longer have kernel have its own page table that is loaded on every kernel entry (trap, interrupt, exception). the primary purpose is to reduce the number of required reloads. Result: - kernel can only access memory of process that was running when kernel was entered - kernel must be mapped into every process page table, so traps to kernel keep working Problem: - kernel must often access memory of arbitrary processes (e.g. send arbitrary processes messages); this can't happen directly any more; usually because that process' page table isn't loaded at all, sometimes because that memory isn't mapped in at all, sometimes because it isn't mapped in read-write. So: - kernel must be able to map in memory of any process, in its own address space. Implementation: - VM and kernel share a range of memory in which addresses of all page tables of all processes are available. This has two purposes: . Kernel has to know what data to copy in order to map in a range . Kernel has to know where to write the data in order to map it in That last point is because kernel has to write in the currently loaded page table. - Processes and kernel are separated through segments; kernel segments haven't changed. - The kernel keeps the process whose page table is currently loaded in 'ptproc.' - If it wants to map in a range of memory, it writes the value of the page directory entry for that range into the page directory entry in the currently loaded map. There is a slot reserved for such purposes. The kernel can then access this memory directly. - In order to do this, its segment has been increased (and the segments of processes start where it ends). - In the pagefault handler, detect if the kernel is doing 'trappable' memory access (i.e. a pagefault isn't a fatal error) and if so, - set the saved instruction pointer to phys_copy_fault, breaking out of phys_copy - set the saved eax register to the address of the page fault, both for sanity checking and for checking in which of the two ranges that phys_copy was called with the fault occured - Some boot-time processes do not have their own page table, and are mapped in with the kernel, and separated with segments. The kernel detects this using HASPT. If such a process has to be scheduled, any page table will work and no page table switch is done. Major changes in kernel are - When accessing user processes memory, kernel no longer explicitly checks before it does so if that memory is OK. It simply makes the mapping (if necessary), tries to do the operation, and traps the pagefault if that memory isn't present; if that happens, the copy function returns EFAULT. So all of the CHECKRANGE_OR_SUSPEND macros are gone. - Kernel no longer has to copy/read and parse page tables. - A message copying optimisation: when messages are copied, and the recipient isn't mapped in, they are copied into a buffer in the kernel. This is done in QueueMess. The next time the recipient is scheduled, this message is copied into its memory. This happens in schedcheck(). This eliminates the mapping/copying step for messages, and makes it easier to deliver messages. This eliminates soft_notify. - Kernel no longer creates a page table at all, so the vm_setbuf and pagetable writing in memory.c is gone. Minor changes in kernel are - ipc_stats thrown out, wasn't used - misc flags all renamed to MF_* - NOREC_* macros to enter and leave functions that should not be called recursively; just sanity checks really - code to fully decode segment selectors and descriptors to print on exceptions - lots of vmassert()s added, only executed if DEBUG_VMASSERT is 1
2009-09-21 16:31:52 +02:00
in_memset = (frame->eip > (vir_bytes) phys_memset) &&
(frame->eip < (vir_bytes) memset_fault);
Complete ovehaul of mode switching code - after a trap to kernel, the code automatically switches to kernel stack, in the future local to the CPU - k_reenter variable replaced by a test whether the CS is kernel cs or not. The information is passed further if needed. Removes a global variable which would need to be cpu local - no need for global variables describing the exception or trap context. This information is kept on stack and a pointer to this structure is passed to the C code as a single structure - removed loadedcr3 variable and its use replaced by reading the %cr3 register - no need to redisable interrupts in restart() as they are already disabled. - unified handling of traps that push and don't push errorcode - removed save() function as the process context is not saved directly to process table but saved as required by the trap code. Essentially it means that save() code is inlined everywhere not only in the exception handling routine - returning from syscall is more arch independent - it sets the retger in C - top of the x86 stack contains the current CPU id and pointer to the currently scheduled process (the one right interrupted) so the mode switch code can find where to save the context without need to use proc_ptr which will be cpu local in the future and therefore difficult to access in assembler and expensive to access in general - some more clean up of level0 code. No need to read-back the argument passed in %eax from the proc structure. The mode switch code does not clobber %the general registers and hence we can just call what is in %eax - many assebly macros in sconst.h as they will be reused by the apic assembly
2009-11-06 10:08:26 +01:00
if((is_nested || iskernelp(pr)) &&
catch_pagefaults && (in_physcopy || in_memset)) {
Primary goal for these changes is: - no longer have kernel have its own page table that is loaded on every kernel entry (trap, interrupt, exception). the primary purpose is to reduce the number of required reloads. Result: - kernel can only access memory of process that was running when kernel was entered - kernel must be mapped into every process page table, so traps to kernel keep working Problem: - kernel must often access memory of arbitrary processes (e.g. send arbitrary processes messages); this can't happen directly any more; usually because that process' page table isn't loaded at all, sometimes because that memory isn't mapped in at all, sometimes because it isn't mapped in read-write. So: - kernel must be able to map in memory of any process, in its own address space. Implementation: - VM and kernel share a range of memory in which addresses of all page tables of all processes are available. This has two purposes: . Kernel has to know what data to copy in order to map in a range . Kernel has to know where to write the data in order to map it in That last point is because kernel has to write in the currently loaded page table. - Processes and kernel are separated through segments; kernel segments haven't changed. - The kernel keeps the process whose page table is currently loaded in 'ptproc.' - If it wants to map in a range of memory, it writes the value of the page directory entry for that range into the page directory entry in the currently loaded map. There is a slot reserved for such purposes. The kernel can then access this memory directly. - In order to do this, its segment has been increased (and the segments of processes start where it ends). - In the pagefault handler, detect if the kernel is doing 'trappable' memory access (i.e. a pagefault isn't a fatal error) and if so, - set the saved instruction pointer to phys_copy_fault, breaking out of phys_copy - set the saved eax register to the address of the page fault, both for sanity checking and for checking in which of the two ranges that phys_copy was called with the fault occured - Some boot-time processes do not have their own page table, and are mapped in with the kernel, and separated with segments. The kernel detects this using HASPT. If such a process has to be scheduled, any page table will work and no page table switch is done. Major changes in kernel are - When accessing user processes memory, kernel no longer explicitly checks before it does so if that memory is OK. It simply makes the mapping (if necessary), tries to do the operation, and traps the pagefault if that memory isn't present; if that happens, the copy function returns EFAULT. So all of the CHECKRANGE_OR_SUSPEND macros are gone. - Kernel no longer has to copy/read and parse page tables. - A message copying optimisation: when messages are copied, and the recipient isn't mapped in, they are copied into a buffer in the kernel. This is done in QueueMess. The next time the recipient is scheduled, this message is copied into its memory. This happens in schedcheck(). This eliminates the mapping/copying step for messages, and makes it easier to deliver messages. This eliminates soft_notify. - Kernel no longer creates a page table at all, so the vm_setbuf and pagetable writing in memory.c is gone. Minor changes in kernel are - ipc_stats thrown out, wasn't used - misc flags all renamed to MF_* - NOREC_* macros to enter and leave functions that should not be called recursively; just sanity checks really - code to fully decode segment selectors and descriptors to print on exceptions - lots of vmassert()s added, only executed if DEBUG_VMASSERT is 1
2009-09-21 16:31:52 +02:00
#if 0
printf("pf caught! addr 0x%lx\n", pagefaultcr2);
#endif
Complete ovehaul of mode switching code - after a trap to kernel, the code automatically switches to kernel stack, in the future local to the CPU - k_reenter variable replaced by a test whether the CS is kernel cs or not. The information is passed further if needed. Removes a global variable which would need to be cpu local - no need for global variables describing the exception or trap context. This information is kept on stack and a pointer to this structure is passed to the C code as a single structure - removed loadedcr3 variable and its use replaced by reading the %cr3 register - no need to redisable interrupts in restart() as they are already disabled. - unified handling of traps that push and don't push errorcode - removed save() function as the process context is not saved directly to process table but saved as required by the trap code. Essentially it means that save() code is inlined everywhere not only in the exception handling routine - returning from syscall is more arch independent - it sets the retger in C - top of the x86 stack contains the current CPU id and pointer to the currently scheduled process (the one right interrupted) so the mode switch code can find where to save the context without need to use proc_ptr which will be cpu local in the future and therefore difficult to access in assembler and expensive to access in general - some more clean up of level0 code. No need to read-back the argument passed in %eax from the proc structure. The mode switch code does not clobber %the general registers and hence we can just call what is in %eax - many assebly macros in sconst.h as they will be reused by the apic assembly
2009-11-06 10:08:26 +01:00
if (is_nested) {
if(in_physcopy) {
assert(!in_memset);
frame->eip = (reg_t) phys_copy_fault_in_kernel;
} else {
frame->eip = (reg_t) memset_fault_in_kernel;
}
Complete ovehaul of mode switching code - after a trap to kernel, the code automatically switches to kernel stack, in the future local to the CPU - k_reenter variable replaced by a test whether the CS is kernel cs or not. The information is passed further if needed. Removes a global variable which would need to be cpu local - no need for global variables describing the exception or trap context. This information is kept on stack and a pointer to this structure is passed to the C code as a single structure - removed loadedcr3 variable and its use replaced by reading the %cr3 register - no need to redisable interrupts in restart() as they are already disabled. - unified handling of traps that push and don't push errorcode - removed save() function as the process context is not saved directly to process table but saved as required by the trap code. Essentially it means that save() code is inlined everywhere not only in the exception handling routine - returning from syscall is more arch independent - it sets the retger in C - top of the x86 stack contains the current CPU id and pointer to the currently scheduled process (the one right interrupted) so the mode switch code can find where to save the context without need to use proc_ptr which will be cpu local in the future and therefore difficult to access in assembler and expensive to access in general - some more clean up of level0 code. No need to read-back the argument passed in %eax from the proc structure. The mode switch code does not clobber %the general registers and hence we can just call what is in %eax - many assebly macros in sconst.h as they will be reused by the apic assembly
2009-11-06 10:08:26 +01:00
}
else {
pr->p_reg.pc = (reg_t) phys_copy_fault;
pr->p_reg.retreg = pagefaultcr2;
}
Primary goal for these changes is: - no longer have kernel have its own page table that is loaded on every kernel entry (trap, interrupt, exception). the primary purpose is to reduce the number of required reloads. Result: - kernel can only access memory of process that was running when kernel was entered - kernel must be mapped into every process page table, so traps to kernel keep working Problem: - kernel must often access memory of arbitrary processes (e.g. send arbitrary processes messages); this can't happen directly any more; usually because that process' page table isn't loaded at all, sometimes because that memory isn't mapped in at all, sometimes because it isn't mapped in read-write. So: - kernel must be able to map in memory of any process, in its own address space. Implementation: - VM and kernel share a range of memory in which addresses of all page tables of all processes are available. This has two purposes: . Kernel has to know what data to copy in order to map in a range . Kernel has to know where to write the data in order to map it in That last point is because kernel has to write in the currently loaded page table. - Processes and kernel are separated through segments; kernel segments haven't changed. - The kernel keeps the process whose page table is currently loaded in 'ptproc.' - If it wants to map in a range of memory, it writes the value of the page directory entry for that range into the page directory entry in the currently loaded map. There is a slot reserved for such purposes. The kernel can then access this memory directly. - In order to do this, its segment has been increased (and the segments of processes start where it ends). - In the pagefault handler, detect if the kernel is doing 'trappable' memory access (i.e. a pagefault isn't a fatal error) and if so, - set the saved instruction pointer to phys_copy_fault, breaking out of phys_copy - set the saved eax register to the address of the page fault, both for sanity checking and for checking in which of the two ranges that phys_copy was called with the fault occured - Some boot-time processes do not have their own page table, and are mapped in with the kernel, and separated with segments. The kernel detects this using HASPT. If such a process has to be scheduled, any page table will work and no page table switch is done. Major changes in kernel are - When accessing user processes memory, kernel no longer explicitly checks before it does so if that memory is OK. It simply makes the mapping (if necessary), tries to do the operation, and traps the pagefault if that memory isn't present; if that happens, the copy function returns EFAULT. So all of the CHECKRANGE_OR_SUSPEND macros are gone. - Kernel no longer has to copy/read and parse page tables. - A message copying optimisation: when messages are copied, and the recipient isn't mapped in, they are copied into a buffer in the kernel. This is done in QueueMess. The next time the recipient is scheduled, this message is copied into its memory. This happens in schedcheck(). This eliminates the mapping/copying step for messages, and makes it easier to deliver messages. This eliminates soft_notify. - Kernel no longer creates a page table at all, so the vm_setbuf and pagetable writing in memory.c is gone. Minor changes in kernel are - ipc_stats thrown out, wasn't used - misc flags all renamed to MF_* - NOREC_* macros to enter and leave functions that should not be called recursively; just sanity checks really - code to fully decode segment selectors and descriptors to print on exceptions - lots of vmassert()s added, only executed if DEBUG_VMASSERT is 1
2009-09-21 16:31:52 +02:00
return;
}
if(is_nested) {
printf("pagefault in kernel at pc 0x%lx address 0x%lx\n",
frame->eip, pagefaultcr2);
inkernel_disaster(pr, frame, NULL, is_nested);
}
No more intel/minix segments. This commit removes all traces of Minix segments (the text/data/stack memory map abstraction in the kernel) and significance of Intel segments (hardware segments like CS, DS that add offsets to all addressing before page table translation). This ultimately simplifies the memory layout and addressing and makes the same layout possible on non-Intel architectures. There are only two types of addresses in the world now: virtual and physical; even the kernel and processes have the same virtual address space. Kernel and user processes can be distinguished at a glance as processes won't use 0xF0000000 and above. No static pre-allocated memory sizes exist any more. Changes to booting: . The pre_init.c leaves the kernel and modules exactly as they were left by the bootloader in physical memory . The kernel starts running using physical addressing, loaded at a fixed location given in its linker script by the bootloader. All code and data in this phase are linked to this fixed low location. . It makes a bootstrap pagetable to map itself to a fixed high location (also in linker script) and jumps to the high address. All code and data then use this high addressing. . All code/data symbols linked at the low addresses is prefixed by an objcopy step with __k_unpaged_*, so that that code cannot reference highly-linked symbols (which aren't valid yet) or vice versa (symbols that aren't valid any more). . The two addressing modes are separated in the linker script by collecting the unpaged_*.o objects and linking them with low addresses, and linking the rest high. Some objects are linked twice, once low and once high. . The bootstrap phase passes a lot of information (e.g. free memory list, physical location of the modules, etc.) using the kinfo struct. . After this bootstrap the low-linked part is freed. . The kernel maps in VM into the bootstrap page table so that VM can begin executing. Its first job is to make page tables for all other boot processes. So VM runs before RS, and RS gets a fully dynamic, VM-managed address space. VM gets its privilege info from RS as usual but that happens after RS starts running. . Both the kernel loading VM and VM organizing boot processes happen using the libexec logic. This removes the last reason for VM to still know much about exec() and vm/exec.c is gone. Further Implementation: . All segments are based at 0 and have a 4 GB limit. . The kernel is mapped in at the top of the virtual address space so as not to constrain the user processes. . Processes do not use segments from the LDT at all; there are no segments in the LDT any more, so no LLDT is needed. . The Minix segments T/D/S are gone and so none of the user-space or in-kernel copy functions use them. The copy functions use a process endpoint of NONE to realize it's a physical address, virtual otherwise. . The umap call only makes sense to translate a virtual address to a physical address now. . Segments-related calls like newmap and alloc_segments are gone. . All segments-related translation in VM is gone (vir2map etc). . Initialization in VM is simpler as no moving around is necessary. . VM and all other boot processes can be linked wherever they wish and will be mapped in at the right location by the kernel and VM respectively. Other changes: . The multiboot code is less special: it does not use mb_print for its diagnostics any more but uses printf() as normal, saving the output into the diagnostics buffer, only printing to the screen using the direct print functions if a panic() occurs. . The multiboot code uses the flexible 'free memory map list' style to receive the list of free memory if available. . The kernel determines the memory layout of the processes to a degree: it tells VM where the kernel starts and ends and where the kernel wants the top of the process to be. VM then uses this entire range, i.e. the stack is right at the top, and mmap()ped bits of memory are placed below that downwards, and the break grows upwards. Other Consequences: . Every process gets its own page table as address spaces can't be separated any more by segments. . As all segments are 0-based, there is no distinction between virtual and linear addresses, nor between userspace and kernel addresses. . Less work is done when context switching, leading to a net performance increase. (8% faster on my machine for 'make servers'.) . The layout and configuration of the GDT makes sysenter and syscall possible.
2012-05-07 16:03:35 +02:00
/* VM can't handle page faults. */
if(pr->p_endpoint == VM_PROC_NR) {
/* Page fault we can't / don't want to
* handle.
*/
No more intel/minix segments. This commit removes all traces of Minix segments (the text/data/stack memory map abstraction in the kernel) and significance of Intel segments (hardware segments like CS, DS that add offsets to all addressing before page table translation). This ultimately simplifies the memory layout and addressing and makes the same layout possible on non-Intel architectures. There are only two types of addresses in the world now: virtual and physical; even the kernel and processes have the same virtual address space. Kernel and user processes can be distinguished at a glance as processes won't use 0xF0000000 and above. No static pre-allocated memory sizes exist any more. Changes to booting: . The pre_init.c leaves the kernel and modules exactly as they were left by the bootloader in physical memory . The kernel starts running using physical addressing, loaded at a fixed location given in its linker script by the bootloader. All code and data in this phase are linked to this fixed low location. . It makes a bootstrap pagetable to map itself to a fixed high location (also in linker script) and jumps to the high address. All code and data then use this high addressing. . All code/data symbols linked at the low addresses is prefixed by an objcopy step with __k_unpaged_*, so that that code cannot reference highly-linked symbols (which aren't valid yet) or vice versa (symbols that aren't valid any more). . The two addressing modes are separated in the linker script by collecting the unpaged_*.o objects and linking them with low addresses, and linking the rest high. Some objects are linked twice, once low and once high. . The bootstrap phase passes a lot of information (e.g. free memory list, physical location of the modules, etc.) using the kinfo struct. . After this bootstrap the low-linked part is freed. . The kernel maps in VM into the bootstrap page table so that VM can begin executing. Its first job is to make page tables for all other boot processes. So VM runs before RS, and RS gets a fully dynamic, VM-managed address space. VM gets its privilege info from RS as usual but that happens after RS starts running. . Both the kernel loading VM and VM organizing boot processes happen using the libexec logic. This removes the last reason for VM to still know much about exec() and vm/exec.c is gone. Further Implementation: . All segments are based at 0 and have a 4 GB limit. . The kernel is mapped in at the top of the virtual address space so as not to constrain the user processes. . Processes do not use segments from the LDT at all; there are no segments in the LDT any more, so no LLDT is needed. . The Minix segments T/D/S are gone and so none of the user-space or in-kernel copy functions use them. The copy functions use a process endpoint of NONE to realize it's a physical address, virtual otherwise. . The umap call only makes sense to translate a virtual address to a physical address now. . Segments-related calls like newmap and alloc_segments are gone. . All segments-related translation in VM is gone (vir2map etc). . Initialization in VM is simpler as no moving around is necessary. . VM and all other boot processes can be linked wherever they wish and will be mapped in at the right location by the kernel and VM respectively. Other changes: . The multiboot code is less special: it does not use mb_print for its diagnostics any more but uses printf() as normal, saving the output into the diagnostics buffer, only printing to the screen using the direct print functions if a panic() occurs. . The multiboot code uses the flexible 'free memory map list' style to receive the list of free memory if available. . The kernel determines the memory layout of the processes to a degree: it tells VM where the kernel starts and ends and where the kernel wants the top of the process to be. VM then uses this entire range, i.e. the stack is right at the top, and mmap()ped bits of memory are placed below that downwards, and the break grows upwards. Other Consequences: . Every process gets its own page table as address spaces can't be separated any more by segments. . As all segments are 0-based, there is no distinction between virtual and linear addresses, nor between userspace and kernel addresses. . Less work is done when context switching, leading to a net performance increase. (8% faster on my machine for 'make servers'.) . The layout and configuration of the GDT makes sysenter and syscall possible.
2012-05-07 16:03:35 +02:00
printf("pagefault for VM on CPU %d, "
"pc = 0x%x, addr = 0x%x, flags = 0x%x, is_nested %d\n",
No more intel/minix segments. This commit removes all traces of Minix segments (the text/data/stack memory map abstraction in the kernel) and significance of Intel segments (hardware segments like CS, DS that add offsets to all addressing before page table translation). This ultimately simplifies the memory layout and addressing and makes the same layout possible on non-Intel architectures. There are only two types of addresses in the world now: virtual and physical; even the kernel and processes have the same virtual address space. Kernel and user processes can be distinguished at a glance as processes won't use 0xF0000000 and above. No static pre-allocated memory sizes exist any more. Changes to booting: . The pre_init.c leaves the kernel and modules exactly as they were left by the bootloader in physical memory . The kernel starts running using physical addressing, loaded at a fixed location given in its linker script by the bootloader. All code and data in this phase are linked to this fixed low location. . It makes a bootstrap pagetable to map itself to a fixed high location (also in linker script) and jumps to the high address. All code and data then use this high addressing. . All code/data symbols linked at the low addresses is prefixed by an objcopy step with __k_unpaged_*, so that that code cannot reference highly-linked symbols (which aren't valid yet) or vice versa (symbols that aren't valid any more). . The two addressing modes are separated in the linker script by collecting the unpaged_*.o objects and linking them with low addresses, and linking the rest high. Some objects are linked twice, once low and once high. . The bootstrap phase passes a lot of information (e.g. free memory list, physical location of the modules, etc.) using the kinfo struct. . After this bootstrap the low-linked part is freed. . The kernel maps in VM into the bootstrap page table so that VM can begin executing. Its first job is to make page tables for all other boot processes. So VM runs before RS, and RS gets a fully dynamic, VM-managed address space. VM gets its privilege info from RS as usual but that happens after RS starts running. . Both the kernel loading VM and VM organizing boot processes happen using the libexec logic. This removes the last reason for VM to still know much about exec() and vm/exec.c is gone. Further Implementation: . All segments are based at 0 and have a 4 GB limit. . The kernel is mapped in at the top of the virtual address space so as not to constrain the user processes. . Processes do not use segments from the LDT at all; there are no segments in the LDT any more, so no LLDT is needed. . The Minix segments T/D/S are gone and so none of the user-space or in-kernel copy functions use them. The copy functions use a process endpoint of NONE to realize it's a physical address, virtual otherwise. . The umap call only makes sense to translate a virtual address to a physical address now. . Segments-related calls like newmap and alloc_segments are gone. . All segments-related translation in VM is gone (vir2map etc). . Initialization in VM is simpler as no moving around is necessary. . VM and all other boot processes can be linked wherever they wish and will be mapped in at the right location by the kernel and VM respectively. Other changes: . The multiboot code is less special: it does not use mb_print for its diagnostics any more but uses printf() as normal, saving the output into the diagnostics buffer, only printing to the screen using the direct print functions if a panic() occurs. . The multiboot code uses the flexible 'free memory map list' style to receive the list of free memory if available. . The kernel determines the memory layout of the processes to a degree: it tells VM where the kernel starts and ends and where the kernel wants the top of the process to be. VM then uses this entire range, i.e. the stack is right at the top, and mmap()ped bits of memory are placed below that downwards, and the break grows upwards. Other Consequences: . Every process gets its own page table as address spaces can't be separated any more by segments. . As all segments are 0-based, there is no distinction between virtual and linear addresses, nor between userspace and kernel addresses. . Less work is done when context switching, leading to a net performance increase. (8% faster on my machine for 'make servers'.) . The layout and configuration of the GDT makes sysenter and syscall possible.
2012-05-07 16:03:35 +02:00
cpuid, pr->p_reg.pc, pagefaultcr2, frame->errcode,
is_nested);
proc_stacktrace(pr);
printf("pc of pagefault: 0x%lx\n", frame->eip);
No more intel/minix segments. This commit removes all traces of Minix segments (the text/data/stack memory map abstraction in the kernel) and significance of Intel segments (hardware segments like CS, DS that add offsets to all addressing before page table translation). This ultimately simplifies the memory layout and addressing and makes the same layout possible on non-Intel architectures. There are only two types of addresses in the world now: virtual and physical; even the kernel and processes have the same virtual address space. Kernel and user processes can be distinguished at a glance as processes won't use 0xF0000000 and above. No static pre-allocated memory sizes exist any more. Changes to booting: . The pre_init.c leaves the kernel and modules exactly as they were left by the bootloader in physical memory . The kernel starts running using physical addressing, loaded at a fixed location given in its linker script by the bootloader. All code and data in this phase are linked to this fixed low location. . It makes a bootstrap pagetable to map itself to a fixed high location (also in linker script) and jumps to the high address. All code and data then use this high addressing. . All code/data symbols linked at the low addresses is prefixed by an objcopy step with __k_unpaged_*, so that that code cannot reference highly-linked symbols (which aren't valid yet) or vice versa (symbols that aren't valid any more). . The two addressing modes are separated in the linker script by collecting the unpaged_*.o objects and linking them with low addresses, and linking the rest high. Some objects are linked twice, once low and once high. . The bootstrap phase passes a lot of information (e.g. free memory list, physical location of the modules, etc.) using the kinfo struct. . After this bootstrap the low-linked part is freed. . The kernel maps in VM into the bootstrap page table so that VM can begin executing. Its first job is to make page tables for all other boot processes. So VM runs before RS, and RS gets a fully dynamic, VM-managed address space. VM gets its privilege info from RS as usual but that happens after RS starts running. . Both the kernel loading VM and VM organizing boot processes happen using the libexec logic. This removes the last reason for VM to still know much about exec() and vm/exec.c is gone. Further Implementation: . All segments are based at 0 and have a 4 GB limit. . The kernel is mapped in at the top of the virtual address space so as not to constrain the user processes. . Processes do not use segments from the LDT at all; there are no segments in the LDT any more, so no LLDT is needed. . The Minix segments T/D/S are gone and so none of the user-space or in-kernel copy functions use them. The copy functions use a process endpoint of NONE to realize it's a physical address, virtual otherwise. . The umap call only makes sense to translate a virtual address to a physical address now. . Segments-related calls like newmap and alloc_segments are gone. . All segments-related translation in VM is gone (vir2map etc). . Initialization in VM is simpler as no moving around is necessary. . VM and all other boot processes can be linked wherever they wish and will be mapped in at the right location by the kernel and VM respectively. Other changes: . The multiboot code is less special: it does not use mb_print for its diagnostics any more but uses printf() as normal, saving the output into the diagnostics buffer, only printing to the screen using the direct print functions if a panic() occurs. . The multiboot code uses the flexible 'free memory map list' style to receive the list of free memory if available. . The kernel determines the memory layout of the processes to a degree: it tells VM where the kernel starts and ends and where the kernel wants the top of the process to be. VM then uses this entire range, i.e. the stack is right at the top, and mmap()ped bits of memory are placed below that downwards, and the break grows upwards. Other Consequences: . Every process gets its own page table as address spaces can't be separated any more by segments. . As all segments are 0-based, there is no distinction between virtual and linear addresses, nor between userspace and kernel addresses. . Less work is done when context switching, leading to a net performance increase. (8% faster on my machine for 'make servers'.) . The layout and configuration of the GDT makes sysenter and syscall possible.
2012-05-07 16:03:35 +02:00
panic("pagefault in VM");
return;
}
Primary goal for these changes is: - no longer have kernel have its own page table that is loaded on every kernel entry (trap, interrupt, exception). the primary purpose is to reduce the number of required reloads. Result: - kernel can only access memory of process that was running when kernel was entered - kernel must be mapped into every process page table, so traps to kernel keep working Problem: - kernel must often access memory of arbitrary processes (e.g. send arbitrary processes messages); this can't happen directly any more; usually because that process' page table isn't loaded at all, sometimes because that memory isn't mapped in at all, sometimes because it isn't mapped in read-write. So: - kernel must be able to map in memory of any process, in its own address space. Implementation: - VM and kernel share a range of memory in which addresses of all page tables of all processes are available. This has two purposes: . Kernel has to know what data to copy in order to map in a range . Kernel has to know where to write the data in order to map it in That last point is because kernel has to write in the currently loaded page table. - Processes and kernel are separated through segments; kernel segments haven't changed. - The kernel keeps the process whose page table is currently loaded in 'ptproc.' - If it wants to map in a range of memory, it writes the value of the page directory entry for that range into the page directory entry in the currently loaded map. There is a slot reserved for such purposes. The kernel can then access this memory directly. - In order to do this, its segment has been increased (and the segments of processes start where it ends). - In the pagefault handler, detect if the kernel is doing 'trappable' memory access (i.e. a pagefault isn't a fatal error) and if so, - set the saved instruction pointer to phys_copy_fault, breaking out of phys_copy - set the saved eax register to the address of the page fault, both for sanity checking and for checking in which of the two ranges that phys_copy was called with the fault occured - Some boot-time processes do not have their own page table, and are mapped in with the kernel, and separated with segments. The kernel detects this using HASPT. If such a process has to be scheduled, any page table will work and no page table switch is done. Major changes in kernel are - When accessing user processes memory, kernel no longer explicitly checks before it does so if that memory is OK. It simply makes the mapping (if necessary), tries to do the operation, and traps the pagefault if that memory isn't present; if that happens, the copy function returns EFAULT. So all of the CHECKRANGE_OR_SUSPEND macros are gone. - Kernel no longer has to copy/read and parse page tables. - A message copying optimisation: when messages are copied, and the recipient isn't mapped in, they are copied into a buffer in the kernel. This is done in QueueMess. The next time the recipient is scheduled, this message is copied into its memory. This happens in schedcheck(). This eliminates the mapping/copying step for messages, and makes it easier to deliver messages. This eliminates soft_notify. - Kernel no longer creates a page table at all, so the vm_setbuf and pagetable writing in memory.c is gone. Minor changes in kernel are - ipc_stats thrown out, wasn't used - misc flags all renamed to MF_* - NOREC_* macros to enter and leave functions that should not be called recursively; just sanity checks really - code to fully decode segment selectors and descriptors to print on exceptions - lots of vmassert()s added, only executed if DEBUG_VMASSERT is 1
2009-09-21 16:31:52 +02:00
/* Don't schedule this process until pagefault is handled. */
RTS_SET(pr, RTS_PAGEFAULT);
Primary goal for these changes is: - no longer have kernel have its own page table that is loaded on every kernel entry (trap, interrupt, exception). the primary purpose is to reduce the number of required reloads. Result: - kernel can only access memory of process that was running when kernel was entered - kernel must be mapped into every process page table, so traps to kernel keep working Problem: - kernel must often access memory of arbitrary processes (e.g. send arbitrary processes messages); this can't happen directly any more; usually because that process' page table isn't loaded at all, sometimes because that memory isn't mapped in at all, sometimes because it isn't mapped in read-write. So: - kernel must be able to map in memory of any process, in its own address space. Implementation: - VM and kernel share a range of memory in which addresses of all page tables of all processes are available. This has two purposes: . Kernel has to know what data to copy in order to map in a range . Kernel has to know where to write the data in order to map it in That last point is because kernel has to write in the currently loaded page table. - Processes and kernel are separated through segments; kernel segments haven't changed. - The kernel keeps the process whose page table is currently loaded in 'ptproc.' - If it wants to map in a range of memory, it writes the value of the page directory entry for that range into the page directory entry in the currently loaded map. There is a slot reserved for such purposes. The kernel can then access this memory directly. - In order to do this, its segment has been increased (and the segments of processes start where it ends). - In the pagefault handler, detect if the kernel is doing 'trappable' memory access (i.e. a pagefault isn't a fatal error) and if so, - set the saved instruction pointer to phys_copy_fault, breaking out of phys_copy - set the saved eax register to the address of the page fault, both for sanity checking and for checking in which of the two ranges that phys_copy was called with the fault occured - Some boot-time processes do not have their own page table, and are mapped in with the kernel, and separated with segments. The kernel detects this using HASPT. If such a process has to be scheduled, any page table will work and no page table switch is done. Major changes in kernel are - When accessing user processes memory, kernel no longer explicitly checks before it does so if that memory is OK. It simply makes the mapping (if necessary), tries to do the operation, and traps the pagefault if that memory isn't present; if that happens, the copy function returns EFAULT. So all of the CHECKRANGE_OR_SUSPEND macros are gone. - Kernel no longer has to copy/read and parse page tables. - A message copying optimisation: when messages are copied, and the recipient isn't mapped in, they are copied into a buffer in the kernel. This is done in QueueMess. The next time the recipient is scheduled, this message is copied into its memory. This happens in schedcheck(). This eliminates the mapping/copying step for messages, and makes it easier to deliver messages. This eliminates soft_notify. - Kernel no longer creates a page table at all, so the vm_setbuf and pagetable writing in memory.c is gone. Minor changes in kernel are - ipc_stats thrown out, wasn't used - misc flags all renamed to MF_* - NOREC_* macros to enter and leave functions that should not be called recursively; just sanity checks really - code to fully decode segment selectors and descriptors to print on exceptions - lots of vmassert()s added, only executed if DEBUG_VMASSERT is 1
2009-09-21 16:31:52 +02:00
/* tell Vm about the pagefault */
m_pagefault.m_source = pr->p_endpoint;
m_pagefault.m_type = VM_PAGEFAULT;
m_pagefault.VPF_ADDR = pagefaultcr2;
m_pagefault.VPF_FLAGS = frame->errcode;
if ((err = mini_send(pr, VM_PROC_NR,
&m_pagefault, FROM_KERNEL))) {
panic("WARNING: pagefault: mini_send returned %d\n", err);
}
return;
}
2012-03-25 20:25:53 +02:00
static void inkernel_disaster(struct proc *saved_proc,
struct exception_frame * frame, struct ex_s *ep,
int is_nested)
{
#if USE_SYSDEBUG
if(ep) {
2011-09-16 20:03:15 +02:00
if (ep->msg == NULL)
printf("\nIntel-reserved exception %d\n", frame->vector);
else
printf("\n%s\n", ep->msg);
}
printf("cpu %d is_nested = %d ", cpuid, is_nested);
printf("vec_nr= %d, trap_errno= 0x%x, eip= 0x%x, "
"cs= 0x%x, eflags= 0x%x trap_esp 0x%08x\n",
frame->vector, frame->errcode, frame->eip,
frame->cs, frame->eflags, frame);
printf("KERNEL registers :\n");
#define REG(n) (((u32_t *)frame)[-n])
printf(
"\t%%eax 0x%08x %%ebx 0x%08x %%ecx 0x%08x %%edx 0x%08x\n"
"\t%%esp 0x%08x %%ebp 0x%08x %%esi 0x%08x %%edi 0x%08x\n",
REG(1), REG(2), REG(3), REG(4),
REG(5), REG(6), REG(7), REG(8));
{
reg_t k_ebp = REG(6);
printf("KERNEL stacktrace, starting with ebp = 0x%lx:\n", k_ebp);
proc_stacktrace_execute(proc_addr(SYSTEM), k_ebp, frame->eip);
}
if (saved_proc) {
printf("scheduled was: process %d (%s), ", saved_proc->p_endpoint, saved_proc->p_name);
No more intel/minix segments. This commit removes all traces of Minix segments (the text/data/stack memory map abstraction in the kernel) and significance of Intel segments (hardware segments like CS, DS that add offsets to all addressing before page table translation). This ultimately simplifies the memory layout and addressing and makes the same layout possible on non-Intel architectures. There are only two types of addresses in the world now: virtual and physical; even the kernel and processes have the same virtual address space. Kernel and user processes can be distinguished at a glance as processes won't use 0xF0000000 and above. No static pre-allocated memory sizes exist any more. Changes to booting: . The pre_init.c leaves the kernel and modules exactly as they were left by the bootloader in physical memory . The kernel starts running using physical addressing, loaded at a fixed location given in its linker script by the bootloader. All code and data in this phase are linked to this fixed low location. . It makes a bootstrap pagetable to map itself to a fixed high location (also in linker script) and jumps to the high address. All code and data then use this high addressing. . All code/data symbols linked at the low addresses is prefixed by an objcopy step with __k_unpaged_*, so that that code cannot reference highly-linked symbols (which aren't valid yet) or vice versa (symbols that aren't valid any more). . The two addressing modes are separated in the linker script by collecting the unpaged_*.o objects and linking them with low addresses, and linking the rest high. Some objects are linked twice, once low and once high. . The bootstrap phase passes a lot of information (e.g. free memory list, physical location of the modules, etc.) using the kinfo struct. . After this bootstrap the low-linked part is freed. . The kernel maps in VM into the bootstrap page table so that VM can begin executing. Its first job is to make page tables for all other boot processes. So VM runs before RS, and RS gets a fully dynamic, VM-managed address space. VM gets its privilege info from RS as usual but that happens after RS starts running. . Both the kernel loading VM and VM organizing boot processes happen using the libexec logic. This removes the last reason for VM to still know much about exec() and vm/exec.c is gone. Further Implementation: . All segments are based at 0 and have a 4 GB limit. . The kernel is mapped in at the top of the virtual address space so as not to constrain the user processes. . Processes do not use segments from the LDT at all; there are no segments in the LDT any more, so no LLDT is needed. . The Minix segments T/D/S are gone and so none of the user-space or in-kernel copy functions use them. The copy functions use a process endpoint of NONE to realize it's a physical address, virtual otherwise. . The umap call only makes sense to translate a virtual address to a physical address now. . Segments-related calls like newmap and alloc_segments are gone. . All segments-related translation in VM is gone (vir2map etc). . Initialization in VM is simpler as no moving around is necessary. . VM and all other boot processes can be linked wherever they wish and will be mapped in at the right location by the kernel and VM respectively. Other changes: . The multiboot code is less special: it does not use mb_print for its diagnostics any more but uses printf() as normal, saving the output into the diagnostics buffer, only printing to the screen using the direct print functions if a panic() occurs. . The multiboot code uses the flexible 'free memory map list' style to receive the list of free memory if available. . The kernel determines the memory layout of the processes to a degree: it tells VM where the kernel starts and ends and where the kernel wants the top of the process to be. VM then uses this entire range, i.e. the stack is right at the top, and mmap()ped bits of memory are placed below that downwards, and the break grows upwards. Other Consequences: . Every process gets its own page table as address spaces can't be separated any more by segments. . As all segments are 0-based, there is no distinction between virtual and linear addresses, nor between userspace and kernel addresses. . Less work is done when context switching, leading to a net performance increase. (8% faster on my machine for 'make servers'.) . The layout and configuration of the GDT makes sysenter and syscall possible.
2012-05-07 16:03:35 +02:00
printf("pc = 0x%x\n", (unsigned) saved_proc->p_reg.pc);
proc_stacktrace(saved_proc);
panic("Unhandled kernel exception");
}
/* in an early stage of boot process we don't have processes yet */
panic("exception in kernel while booting, no saved_proc yet");
#endif /* USE_SYSDEBUG */
}
2005-09-11 18:44:06 +02:00
/*===========================================================================*
* exception *
*===========================================================================*/
2012-03-25 20:25:53 +02:00
void exception_handler(int is_nested, struct exception_frame * frame)
2005-04-21 16:53:53 +02:00
{
/* An exception or unexpected interrupt has occurred. */
register struct ex_s *ep;
struct proc *saved_proc;
/* Save proc_ptr, because it may be changed by debug statements. */
saved_proc = get_cpulocal_var(proc_ptr);
Primary goal for these changes is: - no longer have kernel have its own page table that is loaded on every kernel entry (trap, interrupt, exception). the primary purpose is to reduce the number of required reloads. Result: - kernel can only access memory of process that was running when kernel was entered - kernel must be mapped into every process page table, so traps to kernel keep working Problem: - kernel must often access memory of arbitrary processes (e.g. send arbitrary processes messages); this can't happen directly any more; usually because that process' page table isn't loaded at all, sometimes because that memory isn't mapped in at all, sometimes because it isn't mapped in read-write. So: - kernel must be able to map in memory of any process, in its own address space. Implementation: - VM and kernel share a range of memory in which addresses of all page tables of all processes are available. This has two purposes: . Kernel has to know what data to copy in order to map in a range . Kernel has to know where to write the data in order to map it in That last point is because kernel has to write in the currently loaded page table. - Processes and kernel are separated through segments; kernel segments haven't changed. - The kernel keeps the process whose page table is currently loaded in 'ptproc.' - If it wants to map in a range of memory, it writes the value of the page directory entry for that range into the page directory entry in the currently loaded map. There is a slot reserved for such purposes. The kernel can then access this memory directly. - In order to do this, its segment has been increased (and the segments of processes start where it ends). - In the pagefault handler, detect if the kernel is doing 'trappable' memory access (i.e. a pagefault isn't a fatal error) and if so, - set the saved instruction pointer to phys_copy_fault, breaking out of phys_copy - set the saved eax register to the address of the page fault, both for sanity checking and for checking in which of the two ranges that phys_copy was called with the fault occured - Some boot-time processes do not have their own page table, and are mapped in with the kernel, and separated with segments. The kernel detects this using HASPT. If such a process has to be scheduled, any page table will work and no page table switch is done. Major changes in kernel are - When accessing user processes memory, kernel no longer explicitly checks before it does so if that memory is OK. It simply makes the mapping (if necessary), tries to do the operation, and traps the pagefault if that memory isn't present; if that happens, the copy function returns EFAULT. So all of the CHECKRANGE_OR_SUSPEND macros are gone. - Kernel no longer has to copy/read and parse page tables. - A message copying optimisation: when messages are copied, and the recipient isn't mapped in, they are copied into a buffer in the kernel. This is done in QueueMess. The next time the recipient is scheduled, this message is copied into its memory. This happens in schedcheck(). This eliminates the mapping/copying step for messages, and makes it easier to deliver messages. This eliminates soft_notify. - Kernel no longer creates a page table at all, so the vm_setbuf and pagetable writing in memory.c is gone. Minor changes in kernel are - ipc_stats thrown out, wasn't used - misc flags all renamed to MF_* - NOREC_* macros to enter and leave functions that should not be called recursively; just sanity checks really - code to fully decode segment selectors and descriptors to print on exceptions - lots of vmassert()s added, only executed if DEBUG_VMASSERT is 1
2009-09-21 16:31:52 +02:00
Complete ovehaul of mode switching code - after a trap to kernel, the code automatically switches to kernel stack, in the future local to the CPU - k_reenter variable replaced by a test whether the CS is kernel cs or not. The information is passed further if needed. Removes a global variable which would need to be cpu local - no need for global variables describing the exception or trap context. This information is kept on stack and a pointer to this structure is passed to the C code as a single structure - removed loadedcr3 variable and its use replaced by reading the %cr3 register - no need to redisable interrupts in restart() as they are already disabled. - unified handling of traps that push and don't push errorcode - removed save() function as the process context is not saved directly to process table but saved as required by the trap code. Essentially it means that save() code is inlined everywhere not only in the exception handling routine - returning from syscall is more arch independent - it sets the retger in C - top of the x86 stack contains the current CPU id and pointer to the currently scheduled process (the one right interrupted) so the mode switch code can find where to save the context without need to use proc_ptr which will be cpu local in the future and therefore difficult to access in assembler and expensive to access in general - some more clean up of level0 code. No need to read-back the argument passed in %eax from the proc structure. The mode switch code does not clobber %the general registers and hence we can just call what is in %eax - many assebly macros in sconst.h as they will be reused by the apic assembly
2009-11-06 10:08:26 +01:00
ep = &ex_data[frame->vector];
2005-04-21 16:53:53 +02:00
Complete ovehaul of mode switching code - after a trap to kernel, the code automatically switches to kernel stack, in the future local to the CPU - k_reenter variable replaced by a test whether the CS is kernel cs or not. The information is passed further if needed. Removes a global variable which would need to be cpu local - no need for global variables describing the exception or trap context. This information is kept on stack and a pointer to this structure is passed to the C code as a single structure - removed loadedcr3 variable and its use replaced by reading the %cr3 register - no need to redisable interrupts in restart() as they are already disabled. - unified handling of traps that push and don't push errorcode - removed save() function as the process context is not saved directly to process table but saved as required by the trap code. Essentially it means that save() code is inlined everywhere not only in the exception handling routine - returning from syscall is more arch independent - it sets the retger in C - top of the x86 stack contains the current CPU id and pointer to the currently scheduled process (the one right interrupted) so the mode switch code can find where to save the context without need to use proc_ptr which will be cpu local in the future and therefore difficult to access in assembler and expensive to access in general - some more clean up of level0 code. No need to read-back the argument passed in %eax from the proc structure. The mode switch code does not clobber %the general registers and hence we can just call what is in %eax - many assebly macros in sconst.h as they will be reused by the apic assembly
2009-11-06 10:08:26 +01:00
if (frame->vector == 2) { /* spurious NMI on some machines */
printf("got spurious NMI\n");
2005-04-21 16:53:53 +02:00
return;
}
/*
* handle special cases for nested problems as they might be tricky or filter
* them out quickly if the traps are not nested
*/
if (is_nested) {
/*
* if a problem occured while copying a message from userspace because
* of a wrong pointer supplied by userland, handle it the only way we
* can handle it ...
*/
if (((void*)frame->eip >= (void*)copy_msg_to_user &&
(void*)frame->eip <= (void*)__copy_msg_to_user_end) ||
((void*)frame->eip >= (void*)copy_msg_from_user &&
(void*)frame->eip <= (void*)__copy_msg_from_user_end)) {
switch(frame->vector) {
/* these error are expected */
case PAGE_FAULT_VECTOR:
case PROTECTION_VECTOR:
frame->eip = (reg_t) __user_copy_msg_pointer_failure;
return;
default:
panic("Copy involving a user pointer failed unexpectedly!");
}
}
/* Pass any error resulting from restoring FPU state, as a FPU
* exception to the process.
*/
if (((void*)frame->eip >= (void*)fxrstor &&
(void *)frame->eip <= (void*)__fxrstor_end) ||
((void*)frame->eip >= (void*)frstor &&
(void *)frame->eip <= (void*)__frstor_end)) {
frame->eip = (reg_t) __frstor_failure;
return;
}
if(frame->vector == DEBUG_VECTOR
&& (saved_proc->p_reg.psw & TRACEBIT)
&& (saved_proc->p_seg.p_kern_trap_style == KTS_NONE)) {
/* Getting a debug trap in the kernel is legitimate
* if a traced process entered the kernel using sysenter
* or syscall; the trap flag is not cleared then.
*
* It triggers on the first kernel entry so the trap
* style is still KTS_NONE.
*/
frame->eflags &= ~TRACEBIT;
return;
/* If control passes, this case is not recognized as legitimate
* and we panic later on after all.
*/
}
}
Complete ovehaul of mode switching code - after a trap to kernel, the code automatically switches to kernel stack, in the future local to the CPU - k_reenter variable replaced by a test whether the CS is kernel cs or not. The information is passed further if needed. Removes a global variable which would need to be cpu local - no need for global variables describing the exception or trap context. This information is kept on stack and a pointer to this structure is passed to the C code as a single structure - removed loadedcr3 variable and its use replaced by reading the %cr3 register - no need to redisable interrupts in restart() as they are already disabled. - unified handling of traps that push and don't push errorcode - removed save() function as the process context is not saved directly to process table but saved as required by the trap code. Essentially it means that save() code is inlined everywhere not only in the exception handling routine - returning from syscall is more arch independent - it sets the retger in C - top of the x86 stack contains the current CPU id and pointer to the currently scheduled process (the one right interrupted) so the mode switch code can find where to save the context without need to use proc_ptr which will be cpu local in the future and therefore difficult to access in assembler and expensive to access in general - some more clean up of level0 code. No need to read-back the argument passed in %eax from the proc structure. The mode switch code does not clobber %the general registers and hence we can just call what is in %eax - many assebly macros in sconst.h as they will be reused by the apic assembly
2009-11-06 10:08:26 +01:00
if(frame->vector == PAGE_FAULT_VECTOR) {
pagefault(saved_proc, frame, is_nested);
Primary goal for these changes is: - no longer have kernel have its own page table that is loaded on every kernel entry (trap, interrupt, exception). the primary purpose is to reduce the number of required reloads. Result: - kernel can only access memory of process that was running when kernel was entered - kernel must be mapped into every process page table, so traps to kernel keep working Problem: - kernel must often access memory of arbitrary processes (e.g. send arbitrary processes messages); this can't happen directly any more; usually because that process' page table isn't loaded at all, sometimes because that memory isn't mapped in at all, sometimes because it isn't mapped in read-write. So: - kernel must be able to map in memory of any process, in its own address space. Implementation: - VM and kernel share a range of memory in which addresses of all page tables of all processes are available. This has two purposes: . Kernel has to know what data to copy in order to map in a range . Kernel has to know where to write the data in order to map it in That last point is because kernel has to write in the currently loaded page table. - Processes and kernel are separated through segments; kernel segments haven't changed. - The kernel keeps the process whose page table is currently loaded in 'ptproc.' - If it wants to map in a range of memory, it writes the value of the page directory entry for that range into the page directory entry in the currently loaded map. There is a slot reserved for such purposes. The kernel can then access this memory directly. - In order to do this, its segment has been increased (and the segments of processes start where it ends). - In the pagefault handler, detect if the kernel is doing 'trappable' memory access (i.e. a pagefault isn't a fatal error) and if so, - set the saved instruction pointer to phys_copy_fault, breaking out of phys_copy - set the saved eax register to the address of the page fault, both for sanity checking and for checking in which of the two ranges that phys_copy was called with the fault occured - Some boot-time processes do not have their own page table, and are mapped in with the kernel, and separated with segments. The kernel detects this using HASPT. If such a process has to be scheduled, any page table will work and no page table switch is done. Major changes in kernel are - When accessing user processes memory, kernel no longer explicitly checks before it does so if that memory is OK. It simply makes the mapping (if necessary), tries to do the operation, and traps the pagefault if that memory isn't present; if that happens, the copy function returns EFAULT. So all of the CHECKRANGE_OR_SUSPEND macros are gone. - Kernel no longer has to copy/read and parse page tables. - A message copying optimisation: when messages are copied, and the recipient isn't mapped in, they are copied into a buffer in the kernel. This is done in QueueMess. The next time the recipient is scheduled, this message is copied into its memory. This happens in schedcheck(). This eliminates the mapping/copying step for messages, and makes it easier to deliver messages. This eliminates soft_notify. - Kernel no longer creates a page table at all, so the vm_setbuf and pagetable writing in memory.c is gone. Minor changes in kernel are - ipc_stats thrown out, wasn't used - misc flags all renamed to MF_* - NOREC_* macros to enter and leave functions that should not be called recursively; just sanity checks really - code to fully decode segment selectors and descriptors to print on exceptions - lots of vmassert()s added, only executed if DEBUG_VMASSERT is 1
2009-09-21 16:31:52 +02:00
return;
}
Complete ovehaul of mode switching code - after a trap to kernel, the code automatically switches to kernel stack, in the future local to the CPU - k_reenter variable replaced by a test whether the CS is kernel cs or not. The information is passed further if needed. Removes a global variable which would need to be cpu local - no need for global variables describing the exception or trap context. This information is kept on stack and a pointer to this structure is passed to the C code as a single structure - removed loadedcr3 variable and its use replaced by reading the %cr3 register - no need to redisable interrupts in restart() as they are already disabled. - unified handling of traps that push and don't push errorcode - removed save() function as the process context is not saved directly to process table but saved as required by the trap code. Essentially it means that save() code is inlined everywhere not only in the exception handling routine - returning from syscall is more arch independent - it sets the retger in C - top of the x86 stack contains the current CPU id and pointer to the currently scheduled process (the one right interrupted) so the mode switch code can find where to save the context without need to use proc_ptr which will be cpu local in the future and therefore difficult to access in assembler and expensive to access in general - some more clean up of level0 code. No need to read-back the argument passed in %eax from the proc structure. The mode switch code does not clobber %the general registers and hence we can just call what is in %eax - many assebly macros in sconst.h as they will be reused by the apic assembly
2009-11-06 10:08:26 +01:00
/* If an exception occurs while running a process, the is_nested variable
* will be zero. Exceptions in interrupt handlers or system traps will make
* is_nested non-zero.
2005-07-19 17:01:47 +02:00
*/
Complete ovehaul of mode switching code - after a trap to kernel, the code automatically switches to kernel stack, in the future local to the CPU - k_reenter variable replaced by a test whether the CS is kernel cs or not. The information is passed further if needed. Removes a global variable which would need to be cpu local - no need for global variables describing the exception or trap context. This information is kept on stack and a pointer to this structure is passed to the C code as a single structure - removed loadedcr3 variable and its use replaced by reading the %cr3 register - no need to redisable interrupts in restart() as they are already disabled. - unified handling of traps that push and don't push errorcode - removed save() function as the process context is not saved directly to process table but saved as required by the trap code. Essentially it means that save() code is inlined everywhere not only in the exception handling routine - returning from syscall is more arch independent - it sets the retger in C - top of the x86 stack contains the current CPU id and pointer to the currently scheduled process (the one right interrupted) so the mode switch code can find where to save the context without need to use proc_ptr which will be cpu local in the future and therefore difficult to access in assembler and expensive to access in general - some more clean up of level0 code. No need to read-back the argument passed in %eax from the proc structure. The mode switch code does not clobber %the general registers and hence we can just call what is in %eax - many assebly macros in sconst.h as they will be reused by the apic assembly
2009-11-06 10:08:26 +01:00
if (is_nested == 0 && ! iskernelp(saved_proc)) {
Merge of David's ptrace branch. Summary: o Support for ptrace T_ATTACH/T_DETACH and T_SYSCALL o PM signal handling logic should now work properly, even with debuggers being present o Asynchronous PM/VFS protocol, full IPC support for senda(), and AMF_NOREPLY senda() flag DETAILS Process stop and delay call handling of PM: o Added sys_runctl() kernel call with sys_stop() and sys_resume() aliases, for PM to stop and resume a process o Added exception for sending/syscall-traced processes to sys_runctl(), and matching SIGKREADY pseudo-signal to PM o Fixed PM signal logic to deal with requests from a process after stopping it (so-called "delay calls"), using the SIGKREADY facility o Fixed various PM panics due to race conditions with delay calls versus VFS calls o Removed special PRIO_STOP priority value o Added SYS_LOCK RTS kernel flag, to stop an individual process from running while modifying its process structure Signal and debugger handling in PM: o Fixed debugger signals being dropped if a second signal arrives when the debugger has not retrieved the first one o Fixed debugger signals being sent to the debugger more than once o Fixed debugger signals unpausing process in VFS; removed PM_UNPAUSE_TR protocol message o Detached debugger signals from general signal logic and from being blocked on VFS calls, meaning that even VFS can now be traced o Fixed debugger being unable to receive more than one pending signal in one process stop o Fixed signal delivery being delayed needlessly when multiple signals are pending o Fixed wait test for tracer, which was returning for children that were not waited for o Removed second parallel pending call from PM to VFS for any process o Fixed process becoming runnable between exec() and debugger trap o Added support for notifying the debugger before the parent when a debugged child exits o Fixed debugger death causing child to remain stopped forever o Fixed consistently incorrect use of _NSIG Extensions to ptrace(): o Added T_ATTACH and T_DETACH ptrace request, to attach and detach a debugger to and from a process o Added T_SYSCALL ptrace request, to trace system calls o Added T_SETOPT ptrace request, to set trace options o Added TO_TRACEFORK trace option, to attach automatically to children of a traced process o Added TO_ALTEXEC trace option, to send SIGSTOP instead of SIGTRAP upon a successful exec() of the tracee o Extended T_GETUSER ptrace support to allow retrieving a process's priv structure o Removed T_STOP ptrace request again, as it does not help implementing debuggers properly o Added MINIX3-specific ptrace test (test42) o Added proper manual page for ptrace(2) Asynchronous PM/VFS interface: o Fixed asynchronous messages not being checked when receive() is called with an endpoint other than ANY o Added AMF_NOREPLY senda() flag, preventing such messages from satisfying the receive part of a sendrec() o Added asynsend3() that takes optional flags; asynsend() is now a #define passing in 0 as third parameter o Made PM/VFS protocol asynchronous; reintroduced tell_fs() o Made PM_BASE request/reply number range unique o Hacked in a horrible temporary workaround into RS to deal with newly revealed RS-PM-VFS race condition triangle until VFS is asynchronous System signal handling: o Fixed shutdown logic of device drivers; removed old SIGKSTOP signal o Removed is-superuser check from PM's do_procstat() (aka getsigset()) o Added sigset macros to allow system processes to deal with the full signal set, rather than just the POSIX subset Miscellaneous PM fixes: o Split do_getset into do_get and do_set, merging common code and making structure clearer o Fixed setpriority() being able to put to sleep processes using an invalid parameter, or revive zombie processes o Made find_proc() global; removed obsolete proc_from_pid() o Cleanup here and there Also included: o Fixed false-positive boot order kernel warning o Removed last traces of old NOTIFY_FROM code THINGS OF POSSIBLE INTEREST o It should now be possible to run PM at any priority, even lower than user processes o No assumptions are made about communication speed between PM and VFS, although communication must be FIFO o A debugger will now receive incoming debuggee signals at kill time only; the process may not yet be fully stopped o A first step has been made towards making the SYSTEM task preemptible
2009-09-30 11:57:22 +02:00
#if 0
{
printf(
"vec_nr= %d, trap_errno= 0x%lx, eip= 0x%lx, cs= 0x%x, eflags= 0x%lx\n",
Complete ovehaul of mode switching code - after a trap to kernel, the code automatically switches to kernel stack, in the future local to the CPU - k_reenter variable replaced by a test whether the CS is kernel cs or not. The information is passed further if needed. Removes a global variable which would need to be cpu local - no need for global variables describing the exception or trap context. This information is kept on stack and a pointer to this structure is passed to the C code as a single structure - removed loadedcr3 variable and its use replaced by reading the %cr3 register - no need to redisable interrupts in restart() as they are already disabled. - unified handling of traps that push and don't push errorcode - removed save() function as the process context is not saved directly to process table but saved as required by the trap code. Essentially it means that save() code is inlined everywhere not only in the exception handling routine - returning from syscall is more arch independent - it sets the retger in C - top of the x86 stack contains the current CPU id and pointer to the currently scheduled process (the one right interrupted) so the mode switch code can find where to save the context without need to use proc_ptr which will be cpu local in the future and therefore difficult to access in assembler and expensive to access in general - some more clean up of level0 code. No need to read-back the argument passed in %eax from the proc structure. The mode switch code does not clobber %the general registers and hence we can just call what is in %eax - many assebly macros in sconst.h as they will be reused by the apic assembly
2009-11-06 10:08:26 +01:00
frame->vector, (unsigned long)frame->errcode,
(unsigned long)frame->eip, frame->cs,
(unsigned long)frame->eflags);
proc_stacktrace(saved_proc);
}
Merge of David's ptrace branch. Summary: o Support for ptrace T_ATTACH/T_DETACH and T_SYSCALL o PM signal handling logic should now work properly, even with debuggers being present o Asynchronous PM/VFS protocol, full IPC support for senda(), and AMF_NOREPLY senda() flag DETAILS Process stop and delay call handling of PM: o Added sys_runctl() kernel call with sys_stop() and sys_resume() aliases, for PM to stop and resume a process o Added exception for sending/syscall-traced processes to sys_runctl(), and matching SIGKREADY pseudo-signal to PM o Fixed PM signal logic to deal with requests from a process after stopping it (so-called "delay calls"), using the SIGKREADY facility o Fixed various PM panics due to race conditions with delay calls versus VFS calls o Removed special PRIO_STOP priority value o Added SYS_LOCK RTS kernel flag, to stop an individual process from running while modifying its process structure Signal and debugger handling in PM: o Fixed debugger signals being dropped if a second signal arrives when the debugger has not retrieved the first one o Fixed debugger signals being sent to the debugger more than once o Fixed debugger signals unpausing process in VFS; removed PM_UNPAUSE_TR protocol message o Detached debugger signals from general signal logic and from being blocked on VFS calls, meaning that even VFS can now be traced o Fixed debugger being unable to receive more than one pending signal in one process stop o Fixed signal delivery being delayed needlessly when multiple signals are pending o Fixed wait test for tracer, which was returning for children that were not waited for o Removed second parallel pending call from PM to VFS for any process o Fixed process becoming runnable between exec() and debugger trap o Added support for notifying the debugger before the parent when a debugged child exits o Fixed debugger death causing child to remain stopped forever o Fixed consistently incorrect use of _NSIG Extensions to ptrace(): o Added T_ATTACH and T_DETACH ptrace request, to attach and detach a debugger to and from a process o Added T_SYSCALL ptrace request, to trace system calls o Added T_SETOPT ptrace request, to set trace options o Added TO_TRACEFORK trace option, to attach automatically to children of a traced process o Added TO_ALTEXEC trace option, to send SIGSTOP instead of SIGTRAP upon a successful exec() of the tracee o Extended T_GETUSER ptrace support to allow retrieving a process's priv structure o Removed T_STOP ptrace request again, as it does not help implementing debuggers properly o Added MINIX3-specific ptrace test (test42) o Added proper manual page for ptrace(2) Asynchronous PM/VFS interface: o Fixed asynchronous messages not being checked when receive() is called with an endpoint other than ANY o Added AMF_NOREPLY senda() flag, preventing such messages from satisfying the receive part of a sendrec() o Added asynsend3() that takes optional flags; asynsend() is now a #define passing in 0 as third parameter o Made PM/VFS protocol asynchronous; reintroduced tell_fs() o Made PM_BASE request/reply number range unique o Hacked in a horrible temporary workaround into RS to deal with newly revealed RS-PM-VFS race condition triangle until VFS is asynchronous System signal handling: o Fixed shutdown logic of device drivers; removed old SIGKSTOP signal o Removed is-superuser check from PM's do_procstat() (aka getsigset()) o Added sigset macros to allow system processes to deal with the full signal set, rather than just the POSIX subset Miscellaneous PM fixes: o Split do_getset into do_get and do_set, merging common code and making structure clearer o Fixed setpriority() being able to put to sleep processes using an invalid parameter, or revive zombie processes o Made find_proc() global; removed obsolete proc_from_pid() o Cleanup here and there Also included: o Fixed false-positive boot order kernel warning o Removed last traces of old NOTIFY_FROM code THINGS OF POSSIBLE INTEREST o It should now be possible to run PM at any priority, even lower than user processes o No assumptions are made about communication speed between PM and VFS, although communication must be FIFO o A debugger will now receive incoming debuggee signals at kill time only; the process may not yet be fully stopped o A first step has been made towards making the SYSTEM task preemptible
2009-09-30 11:57:22 +02:00
#endif
cause_sig(proc_nr(saved_proc), ep->signum);
2005-04-21 16:53:53 +02:00
return;
}
/* Exception in system code. This is not supposed to happen. */
inkernel_disaster(saved_proc, frame, ep, is_nested);
2005-04-21 16:53:53 +02:00
panic("return from inkernel_disaster");
2005-04-21 16:53:53 +02:00
}
#if USE_SYSDEBUG
/*===========================================================================*
* proc_stacktrace_execute *
*===========================================================================*/
2012-03-25 20:25:53 +02:00
static void proc_stacktrace_execute(struct proc *whichproc, reg_t v_bp, reg_t pc)
{
reg_t v_hbp;
Primary goal for these changes is: - no longer have kernel have its own page table that is loaded on every kernel entry (trap, interrupt, exception). the primary purpose is to reduce the number of required reloads. Result: - kernel can only access memory of process that was running when kernel was entered - kernel must be mapped into every process page table, so traps to kernel keep working Problem: - kernel must often access memory of arbitrary processes (e.g. send arbitrary processes messages); this can't happen directly any more; usually because that process' page table isn't loaded at all, sometimes because that memory isn't mapped in at all, sometimes because it isn't mapped in read-write. So: - kernel must be able to map in memory of any process, in its own address space. Implementation: - VM and kernel share a range of memory in which addresses of all page tables of all processes are available. This has two purposes: . Kernel has to know what data to copy in order to map in a range . Kernel has to know where to write the data in order to map it in That last point is because kernel has to write in the currently loaded page table. - Processes and kernel are separated through segments; kernel segments haven't changed. - The kernel keeps the process whose page table is currently loaded in 'ptproc.' - If it wants to map in a range of memory, it writes the value of the page directory entry for that range into the page directory entry in the currently loaded map. There is a slot reserved for such purposes. The kernel can then access this memory directly. - In order to do this, its segment has been increased (and the segments of processes start where it ends). - In the pagefault handler, detect if the kernel is doing 'trappable' memory access (i.e. a pagefault isn't a fatal error) and if so, - set the saved instruction pointer to phys_copy_fault, breaking out of phys_copy - set the saved eax register to the address of the page fault, both for sanity checking and for checking in which of the two ranges that phys_copy was called with the fault occured - Some boot-time processes do not have their own page table, and are mapped in with the kernel, and separated with segments. The kernel detects this using HASPT. If such a process has to be scheduled, any page table will work and no page table switch is done. Major changes in kernel are - When accessing user processes memory, kernel no longer explicitly checks before it does so if that memory is OK. It simply makes the mapping (if necessary), tries to do the operation, and traps the pagefault if that memory isn't present; if that happens, the copy function returns EFAULT. So all of the CHECKRANGE_OR_SUSPEND macros are gone. - Kernel no longer has to copy/read and parse page tables. - A message copying optimisation: when messages are copied, and the recipient isn't mapped in, they are copied into a buffer in the kernel. This is done in QueueMess. The next time the recipient is scheduled, this message is copied into its memory. This happens in schedcheck(). This eliminates the mapping/copying step for messages, and makes it easier to deliver messages. This eliminates soft_notify. - Kernel no longer creates a page table at all, so the vm_setbuf and pagetable writing in memory.c is gone. Minor changes in kernel are - ipc_stats thrown out, wasn't used - misc flags all renamed to MF_* - NOREC_* macros to enter and leave functions that should not be called recursively; just sanity checks really - code to fully decode segment selectors and descriptors to print on exceptions - lots of vmassert()s added, only executed if DEBUG_VMASSERT is 1
2009-09-21 16:31:52 +02:00
int iskernel;
int n = 0;
Primary goal for these changes is: - no longer have kernel have its own page table that is loaded on every kernel entry (trap, interrupt, exception). the primary purpose is to reduce the number of required reloads. Result: - kernel can only access memory of process that was running when kernel was entered - kernel must be mapped into every process page table, so traps to kernel keep working Problem: - kernel must often access memory of arbitrary processes (e.g. send arbitrary processes messages); this can't happen directly any more; usually because that process' page table isn't loaded at all, sometimes because that memory isn't mapped in at all, sometimes because it isn't mapped in read-write. So: - kernel must be able to map in memory of any process, in its own address space. Implementation: - VM and kernel share a range of memory in which addresses of all page tables of all processes are available. This has two purposes: . Kernel has to know what data to copy in order to map in a range . Kernel has to know where to write the data in order to map it in That last point is because kernel has to write in the currently loaded page table. - Processes and kernel are separated through segments; kernel segments haven't changed. - The kernel keeps the process whose page table is currently loaded in 'ptproc.' - If it wants to map in a range of memory, it writes the value of the page directory entry for that range into the page directory entry in the currently loaded map. There is a slot reserved for such purposes. The kernel can then access this memory directly. - In order to do this, its segment has been increased (and the segments of processes start where it ends). - In the pagefault handler, detect if the kernel is doing 'trappable' memory access (i.e. a pagefault isn't a fatal error) and if so, - set the saved instruction pointer to phys_copy_fault, breaking out of phys_copy - set the saved eax register to the address of the page fault, both for sanity checking and for checking in which of the two ranges that phys_copy was called with the fault occured - Some boot-time processes do not have their own page table, and are mapped in with the kernel, and separated with segments. The kernel detects this using HASPT. If such a process has to be scheduled, any page table will work and no page table switch is done. Major changes in kernel are - When accessing user processes memory, kernel no longer explicitly checks before it does so if that memory is OK. It simply makes the mapping (if necessary), tries to do the operation, and traps the pagefault if that memory isn't present; if that happens, the copy function returns EFAULT. So all of the CHECKRANGE_OR_SUSPEND macros are gone. - Kernel no longer has to copy/read and parse page tables. - A message copying optimisation: when messages are copied, and the recipient isn't mapped in, they are copied into a buffer in the kernel. This is done in QueueMess. The next time the recipient is scheduled, this message is copied into its memory. This happens in schedcheck(). This eliminates the mapping/copying step for messages, and makes it easier to deliver messages. This eliminates soft_notify. - Kernel no longer creates a page table at all, so the vm_setbuf and pagetable writing in memory.c is gone. Minor changes in kernel are - ipc_stats thrown out, wasn't used - misc flags all renamed to MF_* - NOREC_* macros to enter and leave functions that should not be called recursively; just sanity checks really - code to fully decode segment selectors and descriptors to print on exceptions - lots of vmassert()s added, only executed if DEBUG_VMASSERT is 1
2009-09-21 16:31:52 +02:00
iskernel = iskernelp(whichproc);
printf("%-8.8s %6d 0x%lx ",
whichproc->p_name, whichproc->p_endpoint, pc);
while(v_bp) {
reg_t v_pc;
Primary goal for these changes is: - no longer have kernel have its own page table that is loaded on every kernel entry (trap, interrupt, exception). the primary purpose is to reduce the number of required reloads. Result: - kernel can only access memory of process that was running when kernel was entered - kernel must be mapped into every process page table, so traps to kernel keep working Problem: - kernel must often access memory of arbitrary processes (e.g. send arbitrary processes messages); this can't happen directly any more; usually because that process' page table isn't loaded at all, sometimes because that memory isn't mapped in at all, sometimes because it isn't mapped in read-write. So: - kernel must be able to map in memory of any process, in its own address space. Implementation: - VM and kernel share a range of memory in which addresses of all page tables of all processes are available. This has two purposes: . Kernel has to know what data to copy in order to map in a range . Kernel has to know where to write the data in order to map it in That last point is because kernel has to write in the currently loaded page table. - Processes and kernel are separated through segments; kernel segments haven't changed. - The kernel keeps the process whose page table is currently loaded in 'ptproc.' - If it wants to map in a range of memory, it writes the value of the page directory entry for that range into the page directory entry in the currently loaded map. There is a slot reserved for such purposes. The kernel can then access this memory directly. - In order to do this, its segment has been increased (and the segments of processes start where it ends). - In the pagefault handler, detect if the kernel is doing 'trappable' memory access (i.e. a pagefault isn't a fatal error) and if so, - set the saved instruction pointer to phys_copy_fault, breaking out of phys_copy - set the saved eax register to the address of the page fault, both for sanity checking and for checking in which of the two ranges that phys_copy was called with the fault occured - Some boot-time processes do not have their own page table, and are mapped in with the kernel, and separated with segments. The kernel detects this using HASPT. If such a process has to be scheduled, any page table will work and no page table switch is done. Major changes in kernel are - When accessing user processes memory, kernel no longer explicitly checks before it does so if that memory is OK. It simply makes the mapping (if necessary), tries to do the operation, and traps the pagefault if that memory isn't present; if that happens, the copy function returns EFAULT. So all of the CHECKRANGE_OR_SUSPEND macros are gone. - Kernel no longer has to copy/read and parse page tables. - A message copying optimisation: when messages are copied, and the recipient isn't mapped in, they are copied into a buffer in the kernel. This is done in QueueMess. The next time the recipient is scheduled, this message is copied into its memory. This happens in schedcheck(). This eliminates the mapping/copying step for messages, and makes it easier to deliver messages. This eliminates soft_notify. - Kernel no longer creates a page table at all, so the vm_setbuf and pagetable writing in memory.c is gone. Minor changes in kernel are - ipc_stats thrown out, wasn't used - misc flags all renamed to MF_* - NOREC_* macros to enter and leave functions that should not be called recursively; just sanity checks really - code to fully decode segment selectors and descriptors to print on exceptions - lots of vmassert()s added, only executed if DEBUG_VMASSERT is 1
2009-09-21 16:31:52 +02:00
#define PRCOPY(pr, pv, v, n) \
(iskernel ? (memcpy((char *) v, (char *) pv, n), OK) : \
data_copy(pr->p_endpoint, pv, KERNEL, (vir_bytes) (v), n))
Primary goal for these changes is: - no longer have kernel have its own page table that is loaded on every kernel entry (trap, interrupt, exception). the primary purpose is to reduce the number of required reloads. Result: - kernel can only access memory of process that was running when kernel was entered - kernel must be mapped into every process page table, so traps to kernel keep working Problem: - kernel must often access memory of arbitrary processes (e.g. send arbitrary processes messages); this can't happen directly any more; usually because that process' page table isn't loaded at all, sometimes because that memory isn't mapped in at all, sometimes because it isn't mapped in read-write. So: - kernel must be able to map in memory of any process, in its own address space. Implementation: - VM and kernel share a range of memory in which addresses of all page tables of all processes are available. This has two purposes: . Kernel has to know what data to copy in order to map in a range . Kernel has to know where to write the data in order to map it in That last point is because kernel has to write in the currently loaded page table. - Processes and kernel are separated through segments; kernel segments haven't changed. - The kernel keeps the process whose page table is currently loaded in 'ptproc.' - If it wants to map in a range of memory, it writes the value of the page directory entry for that range into the page directory entry in the currently loaded map. There is a slot reserved for such purposes. The kernel can then access this memory directly. - In order to do this, its segment has been increased (and the segments of processes start where it ends). - In the pagefault handler, detect if the kernel is doing 'trappable' memory access (i.e. a pagefault isn't a fatal error) and if so, - set the saved instruction pointer to phys_copy_fault, breaking out of phys_copy - set the saved eax register to the address of the page fault, both for sanity checking and for checking in which of the two ranges that phys_copy was called with the fault occured - Some boot-time processes do not have their own page table, and are mapped in with the kernel, and separated with segments. The kernel detects this using HASPT. If such a process has to be scheduled, any page table will work and no page table switch is done. Major changes in kernel are - When accessing user processes memory, kernel no longer explicitly checks before it does so if that memory is OK. It simply makes the mapping (if necessary), tries to do the operation, and traps the pagefault if that memory isn't present; if that happens, the copy function returns EFAULT. So all of the CHECKRANGE_OR_SUSPEND macros are gone. - Kernel no longer has to copy/read and parse page tables. - A message copying optimisation: when messages are copied, and the recipient isn't mapped in, they are copied into a buffer in the kernel. This is done in QueueMess. The next time the recipient is scheduled, this message is copied into its memory. This happens in schedcheck(). This eliminates the mapping/copying step for messages, and makes it easier to deliver messages. This eliminates soft_notify. - Kernel no longer creates a page table at all, so the vm_setbuf and pagetable writing in memory.c is gone. Minor changes in kernel are - ipc_stats thrown out, wasn't used - misc flags all renamed to MF_* - NOREC_* macros to enter and leave functions that should not be called recursively; just sanity checks really - code to fully decode segment selectors and descriptors to print on exceptions - lots of vmassert()s added, only executed if DEBUG_VMASSERT is 1
2009-09-21 16:31:52 +02:00
if(PRCOPY(whichproc, v_bp, &v_hbp, sizeof(v_hbp)) != OK) {
printf("(v_bp 0x%lx ?)", v_bp);
break;
}
Primary goal for these changes is: - no longer have kernel have its own page table that is loaded on every kernel entry (trap, interrupt, exception). the primary purpose is to reduce the number of required reloads. Result: - kernel can only access memory of process that was running when kernel was entered - kernel must be mapped into every process page table, so traps to kernel keep working Problem: - kernel must often access memory of arbitrary processes (e.g. send arbitrary processes messages); this can't happen directly any more; usually because that process' page table isn't loaded at all, sometimes because that memory isn't mapped in at all, sometimes because it isn't mapped in read-write. So: - kernel must be able to map in memory of any process, in its own address space. Implementation: - VM and kernel share a range of memory in which addresses of all page tables of all processes are available. This has two purposes: . Kernel has to know what data to copy in order to map in a range . Kernel has to know where to write the data in order to map it in That last point is because kernel has to write in the currently loaded page table. - Processes and kernel are separated through segments; kernel segments haven't changed. - The kernel keeps the process whose page table is currently loaded in 'ptproc.' - If it wants to map in a range of memory, it writes the value of the page directory entry for that range into the page directory entry in the currently loaded map. There is a slot reserved for such purposes. The kernel can then access this memory directly. - In order to do this, its segment has been increased (and the segments of processes start where it ends). - In the pagefault handler, detect if the kernel is doing 'trappable' memory access (i.e. a pagefault isn't a fatal error) and if so, - set the saved instruction pointer to phys_copy_fault, breaking out of phys_copy - set the saved eax register to the address of the page fault, both for sanity checking and for checking in which of the two ranges that phys_copy was called with the fault occured - Some boot-time processes do not have their own page table, and are mapped in with the kernel, and separated with segments. The kernel detects this using HASPT. If such a process has to be scheduled, any page table will work and no page table switch is done. Major changes in kernel are - When accessing user processes memory, kernel no longer explicitly checks before it does so if that memory is OK. It simply makes the mapping (if necessary), tries to do the operation, and traps the pagefault if that memory isn't present; if that happens, the copy function returns EFAULT. So all of the CHECKRANGE_OR_SUSPEND macros are gone. - Kernel no longer has to copy/read and parse page tables. - A message copying optimisation: when messages are copied, and the recipient isn't mapped in, they are copied into a buffer in the kernel. This is done in QueueMess. The next time the recipient is scheduled, this message is copied into its memory. This happens in schedcheck(). This eliminates the mapping/copying step for messages, and makes it easier to deliver messages. This eliminates soft_notify. - Kernel no longer creates a page table at all, so the vm_setbuf and pagetable writing in memory.c is gone. Minor changes in kernel are - ipc_stats thrown out, wasn't used - misc flags all renamed to MF_* - NOREC_* macros to enter and leave functions that should not be called recursively; just sanity checks really - code to fully decode segment selectors and descriptors to print on exceptions - lots of vmassert()s added, only executed if DEBUG_VMASSERT is 1
2009-09-21 16:31:52 +02:00
if(PRCOPY(whichproc, v_bp + sizeof(v_pc), &v_pc, sizeof(v_pc)) != OK) {
printf("(v_pc 0x%lx ?)", v_bp + sizeof(v_pc));
break;
}
printf("0x%lx ", (unsigned long) v_pc);
if(v_hbp != 0 && v_hbp <= v_bp) {
printf("(hbp %lx ?)", v_hbp);
break;
}
v_bp = v_hbp;
if(n++ > 50) {
printf("(truncated after %d steps) ", n);
break;
}
}
printf("\n");
}
#endif /* USE_SYSDEBUG */
/*===========================================================================*
* proc_stacktrace *
*===========================================================================*/
2012-03-25 20:25:53 +02:00
void proc_stacktrace(struct proc *whichproc)
{
u32_t use_bp;
if(whichproc->p_seg.p_kern_trap_style == KTS_NONE) {
printf("WARNING: stacktrace of running proecss\n");
}
switch(whichproc->p_seg.p_kern_trap_style) {
case KTS_SYSENTER:
case KTS_SYSCALL:
{
u32_t sp = whichproc->p_reg.sp;
/* Full context is not available in the p_reg
* struct. Obtain it from the user's stack.
* The use stack pointer is always available.
* The fact that it's there, and the 16 byte offset,
* is a dependency on the trap code in
* kernel/arch/i386/usermapped_glo_ipc.S.
*/
if(data_copy(whichproc->p_endpoint, sp+16,
KERNEL, (vir_bytes) &use_bp,
sizeof(use_bp)) != OK) {
printf("stacktrace: aborting, copy failed\n");
return;
}
break;
}
default:
/* Full context is available; use the stored ebp */
use_bp = whichproc->p_reg.fp;
break;
}
#if USE_SYSDEBUG
proc_stacktrace_execute(whichproc, use_bp, whichproc->p_reg.pc);
#endif /* USE_SYSDEBUG */
}
2012-03-25 20:25:53 +02:00
void enable_fpu_exception(void)
{
u32_t cr0 = read_cr0();
if(!(cr0 & I386_CR0_TS))
write_cr0(cr0 | I386_CR0_TS);
}
2012-03-25 20:25:53 +02:00
void disable_fpu_exception(void)
{
clts();
}