2005-04-21 16:53:53 +02:00
|
|
|
/* This file contains a simple exception handler. Exceptions in user
|
2005-07-14 17:12:12 +02:00
|
|
|
* processes are converted to signals. Exceptions in a kernel task cause
|
|
|
|
* a panic.
|
2005-04-21 16:53:53 +02:00
|
|
|
*/
|
|
|
|
|
2010-04-02 00:22:33 +02:00
|
|
|
#include "kernel/kernel.h"
|
2010-09-15 16:09:36 +02:00
|
|
|
#include "arch_proto.h"
|
2005-04-21 16:53:53 +02:00
|
|
|
#include <signal.h>
|
2008-11-19 13:26:10 +01:00
|
|
|
#include <string.h>
|
2010-03-10 14:00:05 +01:00
|
|
|
#include <assert.h>
|
2010-06-07 09:43:17 +02:00
|
|
|
#include <machine/vm.h>
|
2005-04-21 16:53:53 +02:00
|
|
|
|
2011-07-22 12:55:04 +02:00
|
|
|
struct ex_s {
|
|
|
|
char *msg;
|
|
|
|
int signum;
|
|
|
|
int minprocessor;
|
|
|
|
};
|
|
|
|
|
|
|
|
static struct ex_s ex_data[] = {
|
|
|
|
{ "Divide error", SIGFPE, 86 },
|
|
|
|
{ "Debug exception", SIGTRAP, 86 },
|
|
|
|
{ "Nonmaskable interrupt", SIGBUS, 86 },
|
|
|
|
{ "Breakpoint", SIGEMT, 86 },
|
|
|
|
{ "Overflow", SIGFPE, 86 },
|
|
|
|
{ "Bounds check", SIGFPE, 186 },
|
|
|
|
{ "Invalid opcode", SIGILL, 186 },
|
|
|
|
{ "Coprocessor not available", SIGFPE, 186 },
|
|
|
|
{ "Double fault", SIGBUS, 286 },
|
|
|
|
{ "Coprocessor segment overrun", SIGSEGV, 286 },
|
|
|
|
{ "Invalid TSS", SIGSEGV, 286 },
|
|
|
|
{ "Segment not present", SIGSEGV, 286 },
|
|
|
|
{ "Stack exception", SIGSEGV, 286 }, /* STACK_FAULT already used */
|
|
|
|
{ "General protection", SIGSEGV, 286 },
|
|
|
|
{ "Page fault", SIGSEGV, 386 }, /* not close */
|
|
|
|
{ NULL, SIGILL, 0 }, /* probably software trap */
|
|
|
|
{ "Coprocessor error", SIGFPE, 386 },
|
|
|
|
{ "Alignment check", SIGBUS, 386 },
|
|
|
|
{ "Machine check", SIGBUS, 386 },
|
|
|
|
{ "SIMD exception", SIGFPE, 386 },
|
|
|
|
};
|
|
|
|
|
2012-03-25 20:25:53 +02:00
|
|
|
static void inkernel_disaster(struct proc *saved_proc,
|
2011-07-22 15:19:40 +02:00
|
|
|
struct exception_frame *frame, struct ex_s *ep, int is_nested);
|
|
|
|
|
2010-06-24 14:23:23 +02:00
|
|
|
extern int catch_pagefaults;
|
2008-11-19 13:26:10 +01:00
|
|
|
|
2012-03-25 20:25:53 +02:00
|
|
|
static void proc_stacktrace_execute(struct proc *whichproc, reg_t v_bp, reg_t pc);
|
2011-07-22 12:55:04 +02:00
|
|
|
|
2012-03-25 20:25:53 +02:00
|
|
|
static void pagefault( struct proc *pr,
|
2010-04-27 22:30:33 +02:00
|
|
|
struct exception_frame * frame,
|
|
|
|
int is_nested)
|
2008-11-19 13:26:10 +01:00
|
|
|
{
|
2012-06-06 19:05:28 +02:00
|
|
|
int in_physcopy = 0, in_memset = 0;
|
2008-11-19 13:26:10 +01:00
|
|
|
|
2009-11-06 10:08:26 +01:00
|
|
|
reg_t pagefaultcr2;
|
2010-04-27 01:21:26 +02:00
|
|
|
message m_pagefault;
|
|
|
|
int err;
|
2008-11-19 13:26:10 +01:00
|
|
|
|
2009-11-06 10:08:26 +01:00
|
|
|
pagefaultcr2 = read_cr2();
|
Primary goal for these changes is:
- no longer have kernel have its own page table that is loaded
on every kernel entry (trap, interrupt, exception). the primary
purpose is to reduce the number of required reloads.
Result:
- kernel can only access memory of process that was running when
kernel was entered
- kernel must be mapped into every process page table, so traps to
kernel keep working
Problem:
- kernel must often access memory of arbitrary processes (e.g. send
arbitrary processes messages); this can't happen directly any more;
usually because that process' page table isn't loaded at all, sometimes
because that memory isn't mapped in at all, sometimes because it isn't
mapped in read-write.
So:
- kernel must be able to map in memory of any process, in its own
address space.
Implementation:
- VM and kernel share a range of memory in which addresses of
all page tables of all processes are available. This has two purposes:
. Kernel has to know what data to copy in order to map in a range
. Kernel has to know where to write the data in order to map it in
That last point is because kernel has to write in the currently loaded
page table.
- Processes and kernel are separated through segments; kernel segments
haven't changed.
- The kernel keeps the process whose page table is currently loaded
in 'ptproc.'
- If it wants to map in a range of memory, it writes the value of the
page directory entry for that range into the page directory entry
in the currently loaded map. There is a slot reserved for such
purposes. The kernel can then access this memory directly.
- In order to do this, its segment has been increased (and the
segments of processes start where it ends).
- In the pagefault handler, detect if the kernel is doing
'trappable' memory access (i.e. a pagefault isn't a fatal
error) and if so,
- set the saved instruction pointer to phys_copy_fault,
breaking out of phys_copy
- set the saved eax register to the address of the page
fault, both for sanity checking and for checking in
which of the two ranges that phys_copy was called
with the fault occured
- Some boot-time processes do not have their own page table,
and are mapped in with the kernel, and separated with
segments. The kernel detects this using HASPT. If such a
process has to be scheduled, any page table will work and
no page table switch is done.
Major changes in kernel are
- When accessing user processes memory, kernel no longer
explicitly checks before it does so if that memory is OK.
It simply makes the mapping (if necessary), tries to do the
operation, and traps the pagefault if that memory isn't present;
if that happens, the copy function returns EFAULT.
So all of the CHECKRANGE_OR_SUSPEND macros are gone.
- Kernel no longer has to copy/read and parse page tables.
- A message copying optimisation: when messages are copied, and
the recipient isn't mapped in, they are copied into a buffer
in the kernel. This is done in QueueMess. The next time
the recipient is scheduled, this message is copied into
its memory. This happens in schedcheck().
This eliminates the mapping/copying step for messages, and makes
it easier to deliver messages. This eliminates soft_notify.
- Kernel no longer creates a page table at all, so the vm_setbuf
and pagetable writing in memory.c is gone.
Minor changes in kernel are
- ipc_stats thrown out, wasn't used
- misc flags all renamed to MF_*
- NOREC_* macros to enter and leave functions that should not
be called recursively; just sanity checks really
- code to fully decode segment selectors and descriptors
to print on exceptions
- lots of vmassert()s added, only executed if DEBUG_VMASSERT is 1
2009-09-21 16:31:52 +02:00
|
|
|
|
|
|
|
#if 0
|
|
|
|
printf("kernel: pagefault in pr %d, addr 0x%lx, his cr3 0x%lx, actual cr3 0x%lx\n",
|
|
|
|
pr->p_endpoint, pagefaultcr2, pr->p_seg.p_cr3, read_cr3());
|
|
|
|
#endif
|
|
|
|
|
2009-11-06 10:08:26 +01:00
|
|
|
in_physcopy = (frame->eip > (vir_bytes) phys_copy) &&
|
|
|
|
(frame->eip < (vir_bytes) phys_copy_fault);
|
Primary goal for these changes is:
- no longer have kernel have its own page table that is loaded
on every kernel entry (trap, interrupt, exception). the primary
purpose is to reduce the number of required reloads.
Result:
- kernel can only access memory of process that was running when
kernel was entered
- kernel must be mapped into every process page table, so traps to
kernel keep working
Problem:
- kernel must often access memory of arbitrary processes (e.g. send
arbitrary processes messages); this can't happen directly any more;
usually because that process' page table isn't loaded at all, sometimes
because that memory isn't mapped in at all, sometimes because it isn't
mapped in read-write.
So:
- kernel must be able to map in memory of any process, in its own
address space.
Implementation:
- VM and kernel share a range of memory in which addresses of
all page tables of all processes are available. This has two purposes:
. Kernel has to know what data to copy in order to map in a range
. Kernel has to know where to write the data in order to map it in
That last point is because kernel has to write in the currently loaded
page table.
- Processes and kernel are separated through segments; kernel segments
haven't changed.
- The kernel keeps the process whose page table is currently loaded
in 'ptproc.'
- If it wants to map in a range of memory, it writes the value of the
page directory entry for that range into the page directory entry
in the currently loaded map. There is a slot reserved for such
purposes. The kernel can then access this memory directly.
- In order to do this, its segment has been increased (and the
segments of processes start where it ends).
- In the pagefault handler, detect if the kernel is doing
'trappable' memory access (i.e. a pagefault isn't a fatal
error) and if so,
- set the saved instruction pointer to phys_copy_fault,
breaking out of phys_copy
- set the saved eax register to the address of the page
fault, both for sanity checking and for checking in
which of the two ranges that phys_copy was called
with the fault occured
- Some boot-time processes do not have their own page table,
and are mapped in with the kernel, and separated with
segments. The kernel detects this using HASPT. If such a
process has to be scheduled, any page table will work and
no page table switch is done.
Major changes in kernel are
- When accessing user processes memory, kernel no longer
explicitly checks before it does so if that memory is OK.
It simply makes the mapping (if necessary), tries to do the
operation, and traps the pagefault if that memory isn't present;
if that happens, the copy function returns EFAULT.
So all of the CHECKRANGE_OR_SUSPEND macros are gone.
- Kernel no longer has to copy/read and parse page tables.
- A message copying optimisation: when messages are copied, and
the recipient isn't mapped in, they are copied into a buffer
in the kernel. This is done in QueueMess. The next time
the recipient is scheduled, this message is copied into
its memory. This happens in schedcheck().
This eliminates the mapping/copying step for messages, and makes
it easier to deliver messages. This eliminates soft_notify.
- Kernel no longer creates a page table at all, so the vm_setbuf
and pagetable writing in memory.c is gone.
Minor changes in kernel are
- ipc_stats thrown out, wasn't used
- misc flags all renamed to MF_*
- NOREC_* macros to enter and leave functions that should not
be called recursively; just sanity checks really
- code to fully decode segment selectors and descriptors
to print on exceptions
- lots of vmassert()s added, only executed if DEBUG_VMASSERT is 1
2009-09-21 16:31:52 +02:00
|
|
|
|
2012-06-06 19:05:28 +02:00
|
|
|
in_memset = (frame->eip > (vir_bytes) phys_memset) &&
|
|
|
|
(frame->eip < (vir_bytes) memset_fault);
|
|
|
|
|
2009-11-06 10:08:26 +01:00
|
|
|
if((is_nested || iskernelp(pr)) &&
|
2012-06-06 19:05:28 +02:00
|
|
|
catch_pagefaults && (in_physcopy || in_memset)) {
|
Primary goal for these changes is:
- no longer have kernel have its own page table that is loaded
on every kernel entry (trap, interrupt, exception). the primary
purpose is to reduce the number of required reloads.
Result:
- kernel can only access memory of process that was running when
kernel was entered
- kernel must be mapped into every process page table, so traps to
kernel keep working
Problem:
- kernel must often access memory of arbitrary processes (e.g. send
arbitrary processes messages); this can't happen directly any more;
usually because that process' page table isn't loaded at all, sometimes
because that memory isn't mapped in at all, sometimes because it isn't
mapped in read-write.
So:
- kernel must be able to map in memory of any process, in its own
address space.
Implementation:
- VM and kernel share a range of memory in which addresses of
all page tables of all processes are available. This has two purposes:
. Kernel has to know what data to copy in order to map in a range
. Kernel has to know where to write the data in order to map it in
That last point is because kernel has to write in the currently loaded
page table.
- Processes and kernel are separated through segments; kernel segments
haven't changed.
- The kernel keeps the process whose page table is currently loaded
in 'ptproc.'
- If it wants to map in a range of memory, it writes the value of the
page directory entry for that range into the page directory entry
in the currently loaded map. There is a slot reserved for such
purposes. The kernel can then access this memory directly.
- In order to do this, its segment has been increased (and the
segments of processes start where it ends).
- In the pagefault handler, detect if the kernel is doing
'trappable' memory access (i.e. a pagefault isn't a fatal
error) and if so,
- set the saved instruction pointer to phys_copy_fault,
breaking out of phys_copy
- set the saved eax register to the address of the page
fault, both for sanity checking and for checking in
which of the two ranges that phys_copy was called
with the fault occured
- Some boot-time processes do not have their own page table,
and are mapped in with the kernel, and separated with
segments. The kernel detects this using HASPT. If such a
process has to be scheduled, any page table will work and
no page table switch is done.
Major changes in kernel are
- When accessing user processes memory, kernel no longer
explicitly checks before it does so if that memory is OK.
It simply makes the mapping (if necessary), tries to do the
operation, and traps the pagefault if that memory isn't present;
if that happens, the copy function returns EFAULT.
So all of the CHECKRANGE_OR_SUSPEND macros are gone.
- Kernel no longer has to copy/read and parse page tables.
- A message copying optimisation: when messages are copied, and
the recipient isn't mapped in, they are copied into a buffer
in the kernel. This is done in QueueMess. The next time
the recipient is scheduled, this message is copied into
its memory. This happens in schedcheck().
This eliminates the mapping/copying step for messages, and makes
it easier to deliver messages. This eliminates soft_notify.
- Kernel no longer creates a page table at all, so the vm_setbuf
and pagetable writing in memory.c is gone.
Minor changes in kernel are
- ipc_stats thrown out, wasn't used
- misc flags all renamed to MF_*
- NOREC_* macros to enter and leave functions that should not
be called recursively; just sanity checks really
- code to fully decode segment selectors and descriptors
to print on exceptions
- lots of vmassert()s added, only executed if DEBUG_VMASSERT is 1
2009-09-21 16:31:52 +02:00
|
|
|
#if 0
|
|
|
|
printf("pf caught! addr 0x%lx\n", pagefaultcr2);
|
|
|
|
#endif
|
2009-11-06 10:08:26 +01:00
|
|
|
if (is_nested) {
|
2012-06-06 19:05:28 +02:00
|
|
|
if(in_physcopy) {
|
|
|
|
assert(!in_memset);
|
|
|
|
frame->eip = (reg_t) phys_copy_fault_in_kernel;
|
|
|
|
} else {
|
|
|
|
frame->eip = (reg_t) memset_fault_in_kernel;
|
|
|
|
}
|
2009-11-06 10:08:26 +01:00
|
|
|
}
|
|
|
|
else {
|
|
|
|
pr->p_reg.pc = (reg_t) phys_copy_fault;
|
|
|
|
pr->p_reg.retreg = pagefaultcr2;
|
|
|
|
}
|
2010-05-11 17:14:10 +02:00
|
|
|
|
Primary goal for these changes is:
- no longer have kernel have its own page table that is loaded
on every kernel entry (trap, interrupt, exception). the primary
purpose is to reduce the number of required reloads.
Result:
- kernel can only access memory of process that was running when
kernel was entered
- kernel must be mapped into every process page table, so traps to
kernel keep working
Problem:
- kernel must often access memory of arbitrary processes (e.g. send
arbitrary processes messages); this can't happen directly any more;
usually because that process' page table isn't loaded at all, sometimes
because that memory isn't mapped in at all, sometimes because it isn't
mapped in read-write.
So:
- kernel must be able to map in memory of any process, in its own
address space.
Implementation:
- VM and kernel share a range of memory in which addresses of
all page tables of all processes are available. This has two purposes:
. Kernel has to know what data to copy in order to map in a range
. Kernel has to know where to write the data in order to map it in
That last point is because kernel has to write in the currently loaded
page table.
- Processes and kernel are separated through segments; kernel segments
haven't changed.
- The kernel keeps the process whose page table is currently loaded
in 'ptproc.'
- If it wants to map in a range of memory, it writes the value of the
page directory entry for that range into the page directory entry
in the currently loaded map. There is a slot reserved for such
purposes. The kernel can then access this memory directly.
- In order to do this, its segment has been increased (and the
segments of processes start where it ends).
- In the pagefault handler, detect if the kernel is doing
'trappable' memory access (i.e. a pagefault isn't a fatal
error) and if so,
- set the saved instruction pointer to phys_copy_fault,
breaking out of phys_copy
- set the saved eax register to the address of the page
fault, both for sanity checking and for checking in
which of the two ranges that phys_copy was called
with the fault occured
- Some boot-time processes do not have their own page table,
and are mapped in with the kernel, and separated with
segments. The kernel detects this using HASPT. If such a
process has to be scheduled, any page table will work and
no page table switch is done.
Major changes in kernel are
- When accessing user processes memory, kernel no longer
explicitly checks before it does so if that memory is OK.
It simply makes the mapping (if necessary), tries to do the
operation, and traps the pagefault if that memory isn't present;
if that happens, the copy function returns EFAULT.
So all of the CHECKRANGE_OR_SUSPEND macros are gone.
- Kernel no longer has to copy/read and parse page tables.
- A message copying optimisation: when messages are copied, and
the recipient isn't mapped in, they are copied into a buffer
in the kernel. This is done in QueueMess. The next time
the recipient is scheduled, this message is copied into
its memory. This happens in schedcheck().
This eliminates the mapping/copying step for messages, and makes
it easier to deliver messages. This eliminates soft_notify.
- Kernel no longer creates a page table at all, so the vm_setbuf
and pagetable writing in memory.c is gone.
Minor changes in kernel are
- ipc_stats thrown out, wasn't used
- misc flags all renamed to MF_*
- NOREC_* macros to enter and leave functions that should not
be called recursively; just sanity checks really
- code to fully decode segment selectors and descriptors
to print on exceptions
- lots of vmassert()s added, only executed if DEBUG_VMASSERT is 1
2009-09-21 16:31:52 +02:00
|
|
|
return;
|
|
|
|
}
|
2008-11-19 13:26:10 +01:00
|
|
|
|
2010-05-11 17:14:10 +02:00
|
|
|
if(is_nested) {
|
2011-07-22 12:55:04 +02:00
|
|
|
printf("pagefault in kernel at pc 0x%lx address 0x%lx\n",
|
|
|
|
frame->eip, pagefaultcr2);
|
|
|
|
inkernel_disaster(pr, frame, NULL, is_nested);
|
2010-05-11 17:14:10 +02:00
|
|
|
}
|
|
|
|
|
No more intel/minix segments.
This commit removes all traces of Minix segments (the text/data/stack
memory map abstraction in the kernel) and significance of Intel segments
(hardware segments like CS, DS that add offsets to all addressing before
page table translation). This ultimately simplifies the memory layout
and addressing and makes the same layout possible on non-Intel
architectures.
There are only two types of addresses in the world now: virtual
and physical; even the kernel and processes have the same virtual
address space. Kernel and user processes can be distinguished at a
glance as processes won't use 0xF0000000 and above.
No static pre-allocated memory sizes exist any more.
Changes to booting:
. The pre_init.c leaves the kernel and modules exactly as
they were left by the bootloader in physical memory
. The kernel starts running using physical addressing,
loaded at a fixed location given in its linker script by the
bootloader. All code and data in this phase are linked to
this fixed low location.
. It makes a bootstrap pagetable to map itself to a
fixed high location (also in linker script) and jumps to
the high address. All code and data then use this high addressing.
. All code/data symbols linked at the low addresses is prefixed by
an objcopy step with __k_unpaged_*, so that that code cannot
reference highly-linked symbols (which aren't valid yet) or vice
versa (symbols that aren't valid any more).
. The two addressing modes are separated in the linker script by
collecting the unpaged_*.o objects and linking them with low
addresses, and linking the rest high. Some objects are linked
twice, once low and once high.
. The bootstrap phase passes a lot of information (e.g. free memory
list, physical location of the modules, etc.) using the kinfo
struct.
. After this bootstrap the low-linked part is freed.
. The kernel maps in VM into the bootstrap page table so that VM can
begin executing. Its first job is to make page tables for all other
boot processes. So VM runs before RS, and RS gets a fully dynamic,
VM-managed address space. VM gets its privilege info from RS as usual
but that happens after RS starts running.
. Both the kernel loading VM and VM organizing boot processes happen
using the libexec logic. This removes the last reason for VM to
still know much about exec() and vm/exec.c is gone.
Further Implementation:
. All segments are based at 0 and have a 4 GB limit.
. The kernel is mapped in at the top of the virtual address
space so as not to constrain the user processes.
. Processes do not use segments from the LDT at all; there are
no segments in the LDT any more, so no LLDT is needed.
. The Minix segments T/D/S are gone and so none of the
user-space or in-kernel copy functions use them. The copy
functions use a process endpoint of NONE to realize it's
a physical address, virtual otherwise.
. The umap call only makes sense to translate a virtual address
to a physical address now.
. Segments-related calls like newmap and alloc_segments are gone.
. All segments-related translation in VM is gone (vir2map etc).
. Initialization in VM is simpler as no moving around is necessary.
. VM and all other boot processes can be linked wherever they wish
and will be mapped in at the right location by the kernel and VM
respectively.
Other changes:
. The multiboot code is less special: it does not use mb_print
for its diagnostics any more but uses printf() as normal, saving
the output into the diagnostics buffer, only printing to the
screen using the direct print functions if a panic() occurs.
. The multiboot code uses the flexible 'free memory map list'
style to receive the list of free memory if available.
. The kernel determines the memory layout of the processes to
a degree: it tells VM where the kernel starts and ends and
where the kernel wants the top of the process to be. VM then
uses this entire range, i.e. the stack is right at the top,
and mmap()ped bits of memory are placed below that downwards,
and the break grows upwards.
Other Consequences:
. Every process gets its own page table as address spaces
can't be separated any more by segments.
. As all segments are 0-based, there is no distinction between
virtual and linear addresses, nor between userspace and
kernel addresses.
. Less work is done when context switching, leading to a net
performance increase. (8% faster on my machine for 'make servers'.)
. The layout and configuration of the GDT makes sysenter and syscall
possible.
2012-05-07 16:03:35 +02:00
|
|
|
/* VM can't handle page faults. */
|
|
|
|
if(pr->p_endpoint == VM_PROC_NR) {
|
2008-11-19 13:26:10 +01:00
|
|
|
/* Page fault we can't / don't want to
|
|
|
|
* handle.
|
|
|
|
*/
|
No more intel/minix segments.
This commit removes all traces of Minix segments (the text/data/stack
memory map abstraction in the kernel) and significance of Intel segments
(hardware segments like CS, DS that add offsets to all addressing before
page table translation). This ultimately simplifies the memory layout
and addressing and makes the same layout possible on non-Intel
architectures.
There are only two types of addresses in the world now: virtual
and physical; even the kernel and processes have the same virtual
address space. Kernel and user processes can be distinguished at a
glance as processes won't use 0xF0000000 and above.
No static pre-allocated memory sizes exist any more.
Changes to booting:
. The pre_init.c leaves the kernel and modules exactly as
they were left by the bootloader in physical memory
. The kernel starts running using physical addressing,
loaded at a fixed location given in its linker script by the
bootloader. All code and data in this phase are linked to
this fixed low location.
. It makes a bootstrap pagetable to map itself to a
fixed high location (also in linker script) and jumps to
the high address. All code and data then use this high addressing.
. All code/data symbols linked at the low addresses is prefixed by
an objcopy step with __k_unpaged_*, so that that code cannot
reference highly-linked symbols (which aren't valid yet) or vice
versa (symbols that aren't valid any more).
. The two addressing modes are separated in the linker script by
collecting the unpaged_*.o objects and linking them with low
addresses, and linking the rest high. Some objects are linked
twice, once low and once high.
. The bootstrap phase passes a lot of information (e.g. free memory
list, physical location of the modules, etc.) using the kinfo
struct.
. After this bootstrap the low-linked part is freed.
. The kernel maps in VM into the bootstrap page table so that VM can
begin executing. Its first job is to make page tables for all other
boot processes. So VM runs before RS, and RS gets a fully dynamic,
VM-managed address space. VM gets its privilege info from RS as usual
but that happens after RS starts running.
. Both the kernel loading VM and VM organizing boot processes happen
using the libexec logic. This removes the last reason for VM to
still know much about exec() and vm/exec.c is gone.
Further Implementation:
. All segments are based at 0 and have a 4 GB limit.
. The kernel is mapped in at the top of the virtual address
space so as not to constrain the user processes.
. Processes do not use segments from the LDT at all; there are
no segments in the LDT any more, so no LLDT is needed.
. The Minix segments T/D/S are gone and so none of the
user-space or in-kernel copy functions use them. The copy
functions use a process endpoint of NONE to realize it's
a physical address, virtual otherwise.
. The umap call only makes sense to translate a virtual address
to a physical address now.
. Segments-related calls like newmap and alloc_segments are gone.
. All segments-related translation in VM is gone (vir2map etc).
. Initialization in VM is simpler as no moving around is necessary.
. VM and all other boot processes can be linked wherever they wish
and will be mapped in at the right location by the kernel and VM
respectively.
Other changes:
. The multiboot code is less special: it does not use mb_print
for its diagnostics any more but uses printf() as normal, saving
the output into the diagnostics buffer, only printing to the
screen using the direct print functions if a panic() occurs.
. The multiboot code uses the flexible 'free memory map list'
style to receive the list of free memory if available.
. The kernel determines the memory layout of the processes to
a degree: it tells VM where the kernel starts and ends and
where the kernel wants the top of the process to be. VM then
uses this entire range, i.e. the stack is right at the top,
and mmap()ped bits of memory are placed below that downwards,
and the break grows upwards.
Other Consequences:
. Every process gets its own page table as address spaces
can't be separated any more by segments.
. As all segments are 0-based, there is no distinction between
virtual and linear addresses, nor between userspace and
kernel addresses.
. Less work is done when context switching, leading to a net
performance increase. (8% faster on my machine for 'make servers'.)
. The layout and configuration of the GDT makes sysenter and syscall
possible.
2012-05-07 16:03:35 +02:00
|
|
|
printf("pagefault for VM on CPU %d, "
|
2010-09-15 16:10:30 +02:00
|
|
|
"pc = 0x%x, addr = 0x%x, flags = 0x%x, is_nested %d\n",
|
No more intel/minix segments.
This commit removes all traces of Minix segments (the text/data/stack
memory map abstraction in the kernel) and significance of Intel segments
(hardware segments like CS, DS that add offsets to all addressing before
page table translation). This ultimately simplifies the memory layout
and addressing and makes the same layout possible on non-Intel
architectures.
There are only two types of addresses in the world now: virtual
and physical; even the kernel and processes have the same virtual
address space. Kernel and user processes can be distinguished at a
glance as processes won't use 0xF0000000 and above.
No static pre-allocated memory sizes exist any more.
Changes to booting:
. The pre_init.c leaves the kernel and modules exactly as
they were left by the bootloader in physical memory
. The kernel starts running using physical addressing,
loaded at a fixed location given in its linker script by the
bootloader. All code and data in this phase are linked to
this fixed low location.
. It makes a bootstrap pagetable to map itself to a
fixed high location (also in linker script) and jumps to
the high address. All code and data then use this high addressing.
. All code/data symbols linked at the low addresses is prefixed by
an objcopy step with __k_unpaged_*, so that that code cannot
reference highly-linked symbols (which aren't valid yet) or vice
versa (symbols that aren't valid any more).
. The two addressing modes are separated in the linker script by
collecting the unpaged_*.o objects and linking them with low
addresses, and linking the rest high. Some objects are linked
twice, once low and once high.
. The bootstrap phase passes a lot of information (e.g. free memory
list, physical location of the modules, etc.) using the kinfo
struct.
. After this bootstrap the low-linked part is freed.
. The kernel maps in VM into the bootstrap page table so that VM can
begin executing. Its first job is to make page tables for all other
boot processes. So VM runs before RS, and RS gets a fully dynamic,
VM-managed address space. VM gets its privilege info from RS as usual
but that happens after RS starts running.
. Both the kernel loading VM and VM organizing boot processes happen
using the libexec logic. This removes the last reason for VM to
still know much about exec() and vm/exec.c is gone.
Further Implementation:
. All segments are based at 0 and have a 4 GB limit.
. The kernel is mapped in at the top of the virtual address
space so as not to constrain the user processes.
. Processes do not use segments from the LDT at all; there are
no segments in the LDT any more, so no LLDT is needed.
. The Minix segments T/D/S are gone and so none of the
user-space or in-kernel copy functions use them. The copy
functions use a process endpoint of NONE to realize it's
a physical address, virtual otherwise.
. The umap call only makes sense to translate a virtual address
to a physical address now.
. Segments-related calls like newmap and alloc_segments are gone.
. All segments-related translation in VM is gone (vir2map etc).
. Initialization in VM is simpler as no moving around is necessary.
. VM and all other boot processes can be linked wherever they wish
and will be mapped in at the right location by the kernel and VM
respectively.
Other changes:
. The multiboot code is less special: it does not use mb_print
for its diagnostics any more but uses printf() as normal, saving
the output into the diagnostics buffer, only printing to the
screen using the direct print functions if a panic() occurs.
. The multiboot code uses the flexible 'free memory map list'
style to receive the list of free memory if available.
. The kernel determines the memory layout of the processes to
a degree: it tells VM where the kernel starts and ends and
where the kernel wants the top of the process to be. VM then
uses this entire range, i.e. the stack is right at the top,
and mmap()ped bits of memory are placed below that downwards,
and the break grows upwards.
Other Consequences:
. Every process gets its own page table as address spaces
can't be separated any more by segments.
. As all segments are 0-based, there is no distinction between
virtual and linear addresses, nor between userspace and
kernel addresses.
. Less work is done when context switching, leading to a net
performance increase. (8% faster on my machine for 'make servers'.)
. The layout and configuration of the GDT makes sysenter and syscall
possible.
2012-05-07 16:03:35 +02:00
|
|
|
cpuid, pr->p_reg.pc, pagefaultcr2, frame->errcode,
|
|
|
|
is_nested);
|
2008-11-19 13:26:10 +01:00
|
|
|
proc_stacktrace(pr);
|
2010-03-03 16:45:01 +01:00
|
|
|
printf("pc of pagefault: 0x%lx\n", frame->eip);
|
No more intel/minix segments.
This commit removes all traces of Minix segments (the text/data/stack
memory map abstraction in the kernel) and significance of Intel segments
(hardware segments like CS, DS that add offsets to all addressing before
page table translation). This ultimately simplifies the memory layout
and addressing and makes the same layout possible on non-Intel
architectures.
There are only two types of addresses in the world now: virtual
and physical; even the kernel and processes have the same virtual
address space. Kernel and user processes can be distinguished at a
glance as processes won't use 0xF0000000 and above.
No static pre-allocated memory sizes exist any more.
Changes to booting:
. The pre_init.c leaves the kernel and modules exactly as
they were left by the bootloader in physical memory
. The kernel starts running using physical addressing,
loaded at a fixed location given in its linker script by the
bootloader. All code and data in this phase are linked to
this fixed low location.
. It makes a bootstrap pagetable to map itself to a
fixed high location (also in linker script) and jumps to
the high address. All code and data then use this high addressing.
. All code/data symbols linked at the low addresses is prefixed by
an objcopy step with __k_unpaged_*, so that that code cannot
reference highly-linked symbols (which aren't valid yet) or vice
versa (symbols that aren't valid any more).
. The two addressing modes are separated in the linker script by
collecting the unpaged_*.o objects and linking them with low
addresses, and linking the rest high. Some objects are linked
twice, once low and once high.
. The bootstrap phase passes a lot of information (e.g. free memory
list, physical location of the modules, etc.) using the kinfo
struct.
. After this bootstrap the low-linked part is freed.
. The kernel maps in VM into the bootstrap page table so that VM can
begin executing. Its first job is to make page tables for all other
boot processes. So VM runs before RS, and RS gets a fully dynamic,
VM-managed address space. VM gets its privilege info from RS as usual
but that happens after RS starts running.
. Both the kernel loading VM and VM organizing boot processes happen
using the libexec logic. This removes the last reason for VM to
still know much about exec() and vm/exec.c is gone.
Further Implementation:
. All segments are based at 0 and have a 4 GB limit.
. The kernel is mapped in at the top of the virtual address
space so as not to constrain the user processes.
. Processes do not use segments from the LDT at all; there are
no segments in the LDT any more, so no LLDT is needed.
. The Minix segments T/D/S are gone and so none of the
user-space or in-kernel copy functions use them. The copy
functions use a process endpoint of NONE to realize it's
a physical address, virtual otherwise.
. The umap call only makes sense to translate a virtual address
to a physical address now.
. Segments-related calls like newmap and alloc_segments are gone.
. All segments-related translation in VM is gone (vir2map etc).
. Initialization in VM is simpler as no moving around is necessary.
. VM and all other boot processes can be linked wherever they wish
and will be mapped in at the right location by the kernel and VM
respectively.
Other changes:
. The multiboot code is less special: it does not use mb_print
for its diagnostics any more but uses printf() as normal, saving
the output into the diagnostics buffer, only printing to the
screen using the direct print functions if a panic() occurs.
. The multiboot code uses the flexible 'free memory map list'
style to receive the list of free memory if available.
. The kernel determines the memory layout of the processes to
a degree: it tells VM where the kernel starts and ends and
where the kernel wants the top of the process to be. VM then
uses this entire range, i.e. the stack is right at the top,
and mmap()ped bits of memory are placed below that downwards,
and the break grows upwards.
Other Consequences:
. Every process gets its own page table as address spaces
can't be separated any more by segments.
. As all segments are 0-based, there is no distinction between
virtual and linear addresses, nor between userspace and
kernel addresses.
. Less work is done when context switching, leading to a net
performance increase. (8% faster on my machine for 'make servers'.)
. The layout and configuration of the GDT makes sysenter and syscall
possible.
2012-05-07 16:03:35 +02:00
|
|
|
panic("pagefault in VM");
|
2010-07-21 01:51:34 +02:00
|
|
|
|
2008-11-19 13:26:10 +01:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
Primary goal for these changes is:
- no longer have kernel have its own page table that is loaded
on every kernel entry (trap, interrupt, exception). the primary
purpose is to reduce the number of required reloads.
Result:
- kernel can only access memory of process that was running when
kernel was entered
- kernel must be mapped into every process page table, so traps to
kernel keep working
Problem:
- kernel must often access memory of arbitrary processes (e.g. send
arbitrary processes messages); this can't happen directly any more;
usually because that process' page table isn't loaded at all, sometimes
because that memory isn't mapped in at all, sometimes because it isn't
mapped in read-write.
So:
- kernel must be able to map in memory of any process, in its own
address space.
Implementation:
- VM and kernel share a range of memory in which addresses of
all page tables of all processes are available. This has two purposes:
. Kernel has to know what data to copy in order to map in a range
. Kernel has to know where to write the data in order to map it in
That last point is because kernel has to write in the currently loaded
page table.
- Processes and kernel are separated through segments; kernel segments
haven't changed.
- The kernel keeps the process whose page table is currently loaded
in 'ptproc.'
- If it wants to map in a range of memory, it writes the value of the
page directory entry for that range into the page directory entry
in the currently loaded map. There is a slot reserved for such
purposes. The kernel can then access this memory directly.
- In order to do this, its segment has been increased (and the
segments of processes start where it ends).
- In the pagefault handler, detect if the kernel is doing
'trappable' memory access (i.e. a pagefault isn't a fatal
error) and if so,
- set the saved instruction pointer to phys_copy_fault,
breaking out of phys_copy
- set the saved eax register to the address of the page
fault, both for sanity checking and for checking in
which of the two ranges that phys_copy was called
with the fault occured
- Some boot-time processes do not have their own page table,
and are mapped in with the kernel, and separated with
segments. The kernel detects this using HASPT. If such a
process has to be scheduled, any page table will work and
no page table switch is done.
Major changes in kernel are
- When accessing user processes memory, kernel no longer
explicitly checks before it does so if that memory is OK.
It simply makes the mapping (if necessary), tries to do the
operation, and traps the pagefault if that memory isn't present;
if that happens, the copy function returns EFAULT.
So all of the CHECKRANGE_OR_SUSPEND macros are gone.
- Kernel no longer has to copy/read and parse page tables.
- A message copying optimisation: when messages are copied, and
the recipient isn't mapped in, they are copied into a buffer
in the kernel. This is done in QueueMess. The next time
the recipient is scheduled, this message is copied into
its memory. This happens in schedcheck().
This eliminates the mapping/copying step for messages, and makes
it easier to deliver messages. This eliminates soft_notify.
- Kernel no longer creates a page table at all, so the vm_setbuf
and pagetable writing in memory.c is gone.
Minor changes in kernel are
- ipc_stats thrown out, wasn't used
- misc flags all renamed to MF_*
- NOREC_* macros to enter and leave functions that should not
be called recursively; just sanity checks really
- code to fully decode segment selectors and descriptors
to print on exceptions
- lots of vmassert()s added, only executed if DEBUG_VMASSERT is 1
2009-09-21 16:31:52 +02:00
|
|
|
/* Don't schedule this process until pagefault is handled. */
|
2010-02-09 16:26:58 +01:00
|
|
|
RTS_SET(pr, RTS_PAGEFAULT);
|
Primary goal for these changes is:
- no longer have kernel have its own page table that is loaded
on every kernel entry (trap, interrupt, exception). the primary
purpose is to reduce the number of required reloads.
Result:
- kernel can only access memory of process that was running when
kernel was entered
- kernel must be mapped into every process page table, so traps to
kernel keep working
Problem:
- kernel must often access memory of arbitrary processes (e.g. send
arbitrary processes messages); this can't happen directly any more;
usually because that process' page table isn't loaded at all, sometimes
because that memory isn't mapped in at all, sometimes because it isn't
mapped in read-write.
So:
- kernel must be able to map in memory of any process, in its own
address space.
Implementation:
- VM and kernel share a range of memory in which addresses of
all page tables of all processes are available. This has two purposes:
. Kernel has to know what data to copy in order to map in a range
. Kernel has to know where to write the data in order to map it in
That last point is because kernel has to write in the currently loaded
page table.
- Processes and kernel are separated through segments; kernel segments
haven't changed.
- The kernel keeps the process whose page table is currently loaded
in 'ptproc.'
- If it wants to map in a range of memory, it writes the value of the
page directory entry for that range into the page directory entry
in the currently loaded map. There is a slot reserved for such
purposes. The kernel can then access this memory directly.
- In order to do this, its segment has been increased (and the
segments of processes start where it ends).
- In the pagefault handler, detect if the kernel is doing
'trappable' memory access (i.e. a pagefault isn't a fatal
error) and if so,
- set the saved instruction pointer to phys_copy_fault,
breaking out of phys_copy
- set the saved eax register to the address of the page
fault, both for sanity checking and for checking in
which of the two ranges that phys_copy was called
with the fault occured
- Some boot-time processes do not have their own page table,
and are mapped in with the kernel, and separated with
segments. The kernel detects this using HASPT. If such a
process has to be scheduled, any page table will work and
no page table switch is done.
Major changes in kernel are
- When accessing user processes memory, kernel no longer
explicitly checks before it does so if that memory is OK.
It simply makes the mapping (if necessary), tries to do the
operation, and traps the pagefault if that memory isn't present;
if that happens, the copy function returns EFAULT.
So all of the CHECKRANGE_OR_SUSPEND macros are gone.
- Kernel no longer has to copy/read and parse page tables.
- A message copying optimisation: when messages are copied, and
the recipient isn't mapped in, they are copied into a buffer
in the kernel. This is done in QueueMess. The next time
the recipient is scheduled, this message is copied into
its memory. This happens in schedcheck().
This eliminates the mapping/copying step for messages, and makes
it easier to deliver messages. This eliminates soft_notify.
- Kernel no longer creates a page table at all, so the vm_setbuf
and pagetable writing in memory.c is gone.
Minor changes in kernel are
- ipc_stats thrown out, wasn't used
- misc flags all renamed to MF_*
- NOREC_* macros to enter and leave functions that should not
be called recursively; just sanity checks really
- code to fully decode segment selectors and descriptors
to print on exceptions
- lots of vmassert()s added, only executed if DEBUG_VMASSERT is 1
2009-09-21 16:31:52 +02:00
|
|
|
|
2010-04-27 01:21:26 +02:00
|
|
|
/* tell Vm about the pagefault */
|
|
|
|
m_pagefault.m_source = pr->p_endpoint;
|
|
|
|
m_pagefault.m_type = VM_PAGEFAULT;
|
|
|
|
m_pagefault.VPF_ADDR = pagefaultcr2;
|
|
|
|
m_pagefault.VPF_FLAGS = frame->errcode;
|
|
|
|
|
|
|
|
if ((err = mini_send(pr, VM_PROC_NR,
|
|
|
|
&m_pagefault, FROM_KERNEL))) {
|
|
|
|
panic("WARNING: pagefault: mini_send returned %d\n", err);
|
|
|
|
}
|
2008-11-19 13:26:10 +01:00
|
|
|
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2012-03-25 20:25:53 +02:00
|
|
|
static void inkernel_disaster(struct proc *saved_proc,
|
2011-07-22 12:55:04 +02:00
|
|
|
struct exception_frame * frame, struct ex_s *ep,
|
|
|
|
int is_nested)
|
|
|
|
{
|
2011-09-16 14:57:49 +02:00
|
|
|
#if USE_SYSDEBUG
|
2011-07-22 12:55:04 +02:00
|
|
|
if(ep) {
|
2011-09-16 20:03:15 +02:00
|
|
|
if (ep->msg == NULL)
|
2011-07-22 12:55:04 +02:00
|
|
|
printf("\nIntel-reserved exception %d\n", frame->vector);
|
|
|
|
else
|
|
|
|
printf("\n%s\n", ep->msg);
|
|
|
|
}
|
|
|
|
|
|
|
|
printf("cpu %d is_nested = %d ", cpuid, is_nested);
|
|
|
|
|
|
|
|
printf("vec_nr= %d, trap_errno= 0x%x, eip= 0x%x, "
|
|
|
|
"cs= 0x%x, eflags= 0x%x trap_esp 0x%08x\n",
|
|
|
|
frame->vector, frame->errcode, frame->eip,
|
|
|
|
frame->cs, frame->eflags, frame);
|
|
|
|
printf("KERNEL registers :\n");
|
|
|
|
#define REG(n) (((u32_t *)frame)[-n])
|
|
|
|
printf(
|
|
|
|
"\t%%eax 0x%08x %%ebx 0x%08x %%ecx 0x%08x %%edx 0x%08x\n"
|
|
|
|
"\t%%esp 0x%08x %%ebp 0x%08x %%esi 0x%08x %%edi 0x%08x\n",
|
|
|
|
REG(1), REG(2), REG(3), REG(4),
|
|
|
|
REG(5), REG(6), REG(7), REG(8));
|
|
|
|
|
|
|
|
{
|
|
|
|
reg_t k_ebp = REG(6);
|
|
|
|
printf("KERNEL stacktrace, starting with ebp = 0x%lx:\n", k_ebp);
|
|
|
|
proc_stacktrace_execute(proc_addr(SYSTEM), k_ebp, frame->eip);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (saved_proc) {
|
|
|
|
printf("scheduled was: process %d (%s), ", saved_proc->p_endpoint, saved_proc->p_name);
|
No more intel/minix segments.
This commit removes all traces of Minix segments (the text/data/stack
memory map abstraction in the kernel) and significance of Intel segments
(hardware segments like CS, DS that add offsets to all addressing before
page table translation). This ultimately simplifies the memory layout
and addressing and makes the same layout possible on non-Intel
architectures.
There are only two types of addresses in the world now: virtual
and physical; even the kernel and processes have the same virtual
address space. Kernel and user processes can be distinguished at a
glance as processes won't use 0xF0000000 and above.
No static pre-allocated memory sizes exist any more.
Changes to booting:
. The pre_init.c leaves the kernel and modules exactly as
they were left by the bootloader in physical memory
. The kernel starts running using physical addressing,
loaded at a fixed location given in its linker script by the
bootloader. All code and data in this phase are linked to
this fixed low location.
. It makes a bootstrap pagetable to map itself to a
fixed high location (also in linker script) and jumps to
the high address. All code and data then use this high addressing.
. All code/data symbols linked at the low addresses is prefixed by
an objcopy step with __k_unpaged_*, so that that code cannot
reference highly-linked symbols (which aren't valid yet) or vice
versa (symbols that aren't valid any more).
. The two addressing modes are separated in the linker script by
collecting the unpaged_*.o objects and linking them with low
addresses, and linking the rest high. Some objects are linked
twice, once low and once high.
. The bootstrap phase passes a lot of information (e.g. free memory
list, physical location of the modules, etc.) using the kinfo
struct.
. After this bootstrap the low-linked part is freed.
. The kernel maps in VM into the bootstrap page table so that VM can
begin executing. Its first job is to make page tables for all other
boot processes. So VM runs before RS, and RS gets a fully dynamic,
VM-managed address space. VM gets its privilege info from RS as usual
but that happens after RS starts running.
. Both the kernel loading VM and VM organizing boot processes happen
using the libexec logic. This removes the last reason for VM to
still know much about exec() and vm/exec.c is gone.
Further Implementation:
. All segments are based at 0 and have a 4 GB limit.
. The kernel is mapped in at the top of the virtual address
space so as not to constrain the user processes.
. Processes do not use segments from the LDT at all; there are
no segments in the LDT any more, so no LLDT is needed.
. The Minix segments T/D/S are gone and so none of the
user-space or in-kernel copy functions use them. The copy
functions use a process endpoint of NONE to realize it's
a physical address, virtual otherwise.
. The umap call only makes sense to translate a virtual address
to a physical address now.
. Segments-related calls like newmap and alloc_segments are gone.
. All segments-related translation in VM is gone (vir2map etc).
. Initialization in VM is simpler as no moving around is necessary.
. VM and all other boot processes can be linked wherever they wish
and will be mapped in at the right location by the kernel and VM
respectively.
Other changes:
. The multiboot code is less special: it does not use mb_print
for its diagnostics any more but uses printf() as normal, saving
the output into the diagnostics buffer, only printing to the
screen using the direct print functions if a panic() occurs.
. The multiboot code uses the flexible 'free memory map list'
style to receive the list of free memory if available.
. The kernel determines the memory layout of the processes to
a degree: it tells VM where the kernel starts and ends and
where the kernel wants the top of the process to be. VM then
uses this entire range, i.e. the stack is right at the top,
and mmap()ped bits of memory are placed below that downwards,
and the break grows upwards.
Other Consequences:
. Every process gets its own page table as address spaces
can't be separated any more by segments.
. As all segments are 0-based, there is no distinction between
virtual and linear addresses, nor between userspace and
kernel addresses.
. Less work is done when context switching, leading to a net
performance increase. (8% faster on my machine for 'make servers'.)
. The layout and configuration of the GDT makes sysenter and syscall
possible.
2012-05-07 16:03:35 +02:00
|
|
|
printf("pc = 0x%x\n", (unsigned) saved_proc->p_reg.pc);
|
2011-07-22 12:55:04 +02:00
|
|
|
proc_stacktrace(saved_proc);
|
|
|
|
|
|
|
|
panic("Unhandled kernel exception");
|
|
|
|
}
|
|
|
|
|
|
|
|
/* in an early stage of boot process we don't have processes yet */
|
|
|
|
panic("exception in kernel while booting, no saved_proc yet");
|
2011-09-16 14:57:49 +02:00
|
|
|
#endif /* USE_SYSDEBUG */
|
2011-07-22 12:55:04 +02:00
|
|
|
}
|
|
|
|
|
2005-09-11 18:44:06 +02:00
|
|
|
/*===========================================================================*
|
|
|
|
* exception *
|
|
|
|
*===========================================================================*/
|
2012-03-25 20:25:53 +02:00
|
|
|
void exception_handler(int is_nested, struct exception_frame * frame)
|
2005-04-21 16:53:53 +02:00
|
|
|
{
|
|
|
|
/* An exception or unexpected interrupt has occurred. */
|
|
|
|
register struct ex_s *ep;
|
|
|
|
struct proc *saved_proc;
|
|
|
|
|
|
|
|
/* Save proc_ptr, because it may be changed by debug statements. */
|
2010-09-15 16:09:46 +02:00
|
|
|
saved_proc = get_cpulocal_var(proc_ptr);
|
Primary goal for these changes is:
- no longer have kernel have its own page table that is loaded
on every kernel entry (trap, interrupt, exception). the primary
purpose is to reduce the number of required reloads.
Result:
- kernel can only access memory of process that was running when
kernel was entered
- kernel must be mapped into every process page table, so traps to
kernel keep working
Problem:
- kernel must often access memory of arbitrary processes (e.g. send
arbitrary processes messages); this can't happen directly any more;
usually because that process' page table isn't loaded at all, sometimes
because that memory isn't mapped in at all, sometimes because it isn't
mapped in read-write.
So:
- kernel must be able to map in memory of any process, in its own
address space.
Implementation:
- VM and kernel share a range of memory in which addresses of
all page tables of all processes are available. This has two purposes:
. Kernel has to know what data to copy in order to map in a range
. Kernel has to know where to write the data in order to map it in
That last point is because kernel has to write in the currently loaded
page table.
- Processes and kernel are separated through segments; kernel segments
haven't changed.
- The kernel keeps the process whose page table is currently loaded
in 'ptproc.'
- If it wants to map in a range of memory, it writes the value of the
page directory entry for that range into the page directory entry
in the currently loaded map. There is a slot reserved for such
purposes. The kernel can then access this memory directly.
- In order to do this, its segment has been increased (and the
segments of processes start where it ends).
- In the pagefault handler, detect if the kernel is doing
'trappable' memory access (i.e. a pagefault isn't a fatal
error) and if so,
- set the saved instruction pointer to phys_copy_fault,
breaking out of phys_copy
- set the saved eax register to the address of the page
fault, both for sanity checking and for checking in
which of the two ranges that phys_copy was called
with the fault occured
- Some boot-time processes do not have their own page table,
and are mapped in with the kernel, and separated with
segments. The kernel detects this using HASPT. If such a
process has to be scheduled, any page table will work and
no page table switch is done.
Major changes in kernel are
- When accessing user processes memory, kernel no longer
explicitly checks before it does so if that memory is OK.
It simply makes the mapping (if necessary), tries to do the
operation, and traps the pagefault if that memory isn't present;
if that happens, the copy function returns EFAULT.
So all of the CHECKRANGE_OR_SUSPEND macros are gone.
- Kernel no longer has to copy/read and parse page tables.
- A message copying optimisation: when messages are copied, and
the recipient isn't mapped in, they are copied into a buffer
in the kernel. This is done in QueueMess. The next time
the recipient is scheduled, this message is copied into
its memory. This happens in schedcheck().
This eliminates the mapping/copying step for messages, and makes
it easier to deliver messages. This eliminates soft_notify.
- Kernel no longer creates a page table at all, so the vm_setbuf
and pagetable writing in memory.c is gone.
Minor changes in kernel are
- ipc_stats thrown out, wasn't used
- misc flags all renamed to MF_*
- NOREC_* macros to enter and leave functions that should not
be called recursively; just sanity checks really
- code to fully decode segment selectors and descriptors
to print on exceptions
- lots of vmassert()s added, only executed if DEBUG_VMASSERT is 1
2009-09-21 16:31:52 +02:00
|
|
|
|
2009-11-06 10:08:26 +01:00
|
|
|
ep = &ex_data[frame->vector];
|
2005-04-21 16:53:53 +02:00
|
|
|
|
2009-11-06 10:08:26 +01:00
|
|
|
if (frame->vector == 2) { /* spurious NMI on some machines */
|
2010-03-03 16:45:01 +01:00
|
|
|
printf("got spurious NMI\n");
|
2005-04-21 16:53:53 +02:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2010-02-09 16:15:45 +01:00
|
|
|
/*
|
|
|
|
* handle special cases for nested problems as they might be tricky or filter
|
|
|
|
* them out quickly if the traps are not nested
|
|
|
|
*/
|
|
|
|
if (is_nested) {
|
2010-02-14 19:39:47 +01:00
|
|
|
/*
|
|
|
|
* if a problem occured while copying a message from userspace because
|
|
|
|
* of a wrong pointer supplied by userland, handle it the only way we
|
|
|
|
* can handle it ...
|
|
|
|
*/
|
|
|
|
if (((void*)frame->eip >= (void*)copy_msg_to_user &&
|
|
|
|
(void*)frame->eip <= (void*)__copy_msg_to_user_end) ||
|
|
|
|
((void*)frame->eip >= (void*)copy_msg_from_user &&
|
|
|
|
(void*)frame->eip <= (void*)__copy_msg_from_user_end)) {
|
|
|
|
switch(frame->vector) {
|
|
|
|
/* these error are expected */
|
|
|
|
case PAGE_FAULT_VECTOR:
|
|
|
|
case PROTECTION_VECTOR:
|
|
|
|
frame->eip = (reg_t) __user_copy_msg_pointer_failure;
|
|
|
|
return;
|
|
|
|
default:
|
2010-03-05 16:05:11 +01:00
|
|
|
panic("Copy involving a user pointer failed unexpectedly!");
|
2010-02-14 19:39:47 +01:00
|
|
|
}
|
|
|
|
}
|
2012-03-03 19:25:57 +01:00
|
|
|
|
|
|
|
/* Pass any error resulting from restoring FPU state, as a FPU
|
|
|
|
* exception to the process.
|
|
|
|
*/
|
|
|
|
if (((void*)frame->eip >= (void*)fxrstor &&
|
|
|
|
(void *)frame->eip <= (void*)__fxrstor_end) ||
|
|
|
|
((void*)frame->eip >= (void*)frstor &&
|
|
|
|
(void *)frame->eip <= (void*)__frstor_end)) {
|
|
|
|
frame->eip = (reg_t) __frstor_failure;
|
|
|
|
return;
|
|
|
|
}
|
2013-01-06 19:18:41 +01:00
|
|
|
|
|
|
|
if(frame->vector == DEBUG_VECTOR
|
|
|
|
&& (saved_proc->p_reg.psw & TRACEBIT)
|
|
|
|
&& (saved_proc->p_seg.p_kern_trap_style == KTS_NONE)) {
|
|
|
|
/* Getting a debug trap in the kernel is legitimate
|
|
|
|
* if a traced process entered the kernel using sysenter
|
|
|
|
* or syscall; the trap flag is not cleared then.
|
|
|
|
*
|
|
|
|
* It triggers on the first kernel entry so the trap
|
|
|
|
* style is still KTS_NONE.
|
|
|
|
*/
|
|
|
|
|
|
|
|
frame->eflags &= ~TRACEBIT;
|
|
|
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
/* If control passes, this case is not recognized as legitimate
|
|
|
|
* and we panic later on after all.
|
|
|
|
*/
|
|
|
|
}
|
2010-02-09 16:15:45 +01:00
|
|
|
}
|
|
|
|
|
2009-11-06 10:08:26 +01:00
|
|
|
if(frame->vector == PAGE_FAULT_VECTOR) {
|
|
|
|
pagefault(saved_proc, frame, is_nested);
|
Primary goal for these changes is:
- no longer have kernel have its own page table that is loaded
on every kernel entry (trap, interrupt, exception). the primary
purpose is to reduce the number of required reloads.
Result:
- kernel can only access memory of process that was running when
kernel was entered
- kernel must be mapped into every process page table, so traps to
kernel keep working
Problem:
- kernel must often access memory of arbitrary processes (e.g. send
arbitrary processes messages); this can't happen directly any more;
usually because that process' page table isn't loaded at all, sometimes
because that memory isn't mapped in at all, sometimes because it isn't
mapped in read-write.
So:
- kernel must be able to map in memory of any process, in its own
address space.
Implementation:
- VM and kernel share a range of memory in which addresses of
all page tables of all processes are available. This has two purposes:
. Kernel has to know what data to copy in order to map in a range
. Kernel has to know where to write the data in order to map it in
That last point is because kernel has to write in the currently loaded
page table.
- Processes and kernel are separated through segments; kernel segments
haven't changed.
- The kernel keeps the process whose page table is currently loaded
in 'ptproc.'
- If it wants to map in a range of memory, it writes the value of the
page directory entry for that range into the page directory entry
in the currently loaded map. There is a slot reserved for such
purposes. The kernel can then access this memory directly.
- In order to do this, its segment has been increased (and the
segments of processes start where it ends).
- In the pagefault handler, detect if the kernel is doing
'trappable' memory access (i.e. a pagefault isn't a fatal
error) and if so,
- set the saved instruction pointer to phys_copy_fault,
breaking out of phys_copy
- set the saved eax register to the address of the page
fault, both for sanity checking and for checking in
which of the two ranges that phys_copy was called
with the fault occured
- Some boot-time processes do not have their own page table,
and are mapped in with the kernel, and separated with
segments. The kernel detects this using HASPT. If such a
process has to be scheduled, any page table will work and
no page table switch is done.
Major changes in kernel are
- When accessing user processes memory, kernel no longer
explicitly checks before it does so if that memory is OK.
It simply makes the mapping (if necessary), tries to do the
operation, and traps the pagefault if that memory isn't present;
if that happens, the copy function returns EFAULT.
So all of the CHECKRANGE_OR_SUSPEND macros are gone.
- Kernel no longer has to copy/read and parse page tables.
- A message copying optimisation: when messages are copied, and
the recipient isn't mapped in, they are copied into a buffer
in the kernel. This is done in QueueMess. The next time
the recipient is scheduled, this message is copied into
its memory. This happens in schedcheck().
This eliminates the mapping/copying step for messages, and makes
it easier to deliver messages. This eliminates soft_notify.
- Kernel no longer creates a page table at all, so the vm_setbuf
and pagetable writing in memory.c is gone.
Minor changes in kernel are
- ipc_stats thrown out, wasn't used
- misc flags all renamed to MF_*
- NOREC_* macros to enter and leave functions that should not
be called recursively; just sanity checks really
- code to fully decode segment selectors and descriptors
to print on exceptions
- lots of vmassert()s added, only executed if DEBUG_VMASSERT is 1
2009-09-21 16:31:52 +02:00
|
|
|
return;
|
2009-05-12 13:35:01 +02:00
|
|
|
}
|
|
|
|
|
2009-11-06 10:08:26 +01:00
|
|
|
/* If an exception occurs while running a process, the is_nested variable
|
|
|
|
* will be zero. Exceptions in interrupt handlers or system traps will make
|
|
|
|
* is_nested non-zero.
|
2005-07-19 17:01:47 +02:00
|
|
|
*/
|
2009-11-06 10:08:26 +01:00
|
|
|
if (is_nested == 0 && ! iskernelp(saved_proc)) {
|
Merge of David's ptrace branch. Summary:
o Support for ptrace T_ATTACH/T_DETACH and T_SYSCALL
o PM signal handling logic should now work properly, even with debuggers
being present
o Asynchronous PM/VFS protocol, full IPC support for senda(), and
AMF_NOREPLY senda() flag
DETAILS
Process stop and delay call handling of PM:
o Added sys_runctl() kernel call with sys_stop() and sys_resume()
aliases, for PM to stop and resume a process
o Added exception for sending/syscall-traced processes to sys_runctl(),
and matching SIGKREADY pseudo-signal to PM
o Fixed PM signal logic to deal with requests from a process after
stopping it (so-called "delay calls"), using the SIGKREADY facility
o Fixed various PM panics due to race conditions with delay calls versus
VFS calls
o Removed special PRIO_STOP priority value
o Added SYS_LOCK RTS kernel flag, to stop an individual process from
running while modifying its process structure
Signal and debugger handling in PM:
o Fixed debugger signals being dropped if a second signal arrives when
the debugger has not retrieved the first one
o Fixed debugger signals being sent to the debugger more than once
o Fixed debugger signals unpausing process in VFS; removed PM_UNPAUSE_TR
protocol message
o Detached debugger signals from general signal logic and from being
blocked on VFS calls, meaning that even VFS can now be traced
o Fixed debugger being unable to receive more than one pending signal in
one process stop
o Fixed signal delivery being delayed needlessly when multiple signals
are pending
o Fixed wait test for tracer, which was returning for children that were
not waited for
o Removed second parallel pending call from PM to VFS for any process
o Fixed process becoming runnable between exec() and debugger trap
o Added support for notifying the debugger before the parent when a
debugged child exits
o Fixed debugger death causing child to remain stopped forever
o Fixed consistently incorrect use of _NSIG
Extensions to ptrace():
o Added T_ATTACH and T_DETACH ptrace request, to attach and detach a
debugger to and from a process
o Added T_SYSCALL ptrace request, to trace system calls
o Added T_SETOPT ptrace request, to set trace options
o Added TO_TRACEFORK trace option, to attach automatically to children
of a traced process
o Added TO_ALTEXEC trace option, to send SIGSTOP instead of SIGTRAP upon
a successful exec() of the tracee
o Extended T_GETUSER ptrace support to allow retrieving a process's priv
structure
o Removed T_STOP ptrace request again, as it does not help implementing
debuggers properly
o Added MINIX3-specific ptrace test (test42)
o Added proper manual page for ptrace(2)
Asynchronous PM/VFS interface:
o Fixed asynchronous messages not being checked when receive() is called
with an endpoint other than ANY
o Added AMF_NOREPLY senda() flag, preventing such messages from
satisfying the receive part of a sendrec()
o Added asynsend3() that takes optional flags; asynsend() is now a
#define passing in 0 as third parameter
o Made PM/VFS protocol asynchronous; reintroduced tell_fs()
o Made PM_BASE request/reply number range unique
o Hacked in a horrible temporary workaround into RS to deal with newly
revealed RS-PM-VFS race condition triangle until VFS is asynchronous
System signal handling:
o Fixed shutdown logic of device drivers; removed old SIGKSTOP signal
o Removed is-superuser check from PM's do_procstat() (aka getsigset())
o Added sigset macros to allow system processes to deal with the full
signal set, rather than just the POSIX subset
Miscellaneous PM fixes:
o Split do_getset into do_get and do_set, merging common code and making
structure clearer
o Fixed setpriority() being able to put to sleep processes using an
invalid parameter, or revive zombie processes
o Made find_proc() global; removed obsolete proc_from_pid()
o Cleanup here and there
Also included:
o Fixed false-positive boot order kernel warning
o Removed last traces of old NOTIFY_FROM code
THINGS OF POSSIBLE INTEREST
o It should now be possible to run PM at any priority, even lower than
user processes
o No assumptions are made about communication speed between PM and VFS,
although communication must be FIFO
o A debugger will now receive incoming debuggee signals at kill time
only; the process may not yet be fully stopped
o A first step has been made towards making the SYSTEM task preemptible
2009-09-30 11:57:22 +02:00
|
|
|
#if 0
|
2006-05-11 16:49:46 +02:00
|
|
|
{
|
2008-11-19 13:26:10 +01:00
|
|
|
|
2010-03-03 16:45:01 +01:00
|
|
|
printf(
|
2008-02-22 11:43:18 +01:00
|
|
|
"vec_nr= %d, trap_errno= 0x%lx, eip= 0x%lx, cs= 0x%x, eflags= 0x%lx\n",
|
2009-11-06 10:08:26 +01:00
|
|
|
frame->vector, (unsigned long)frame->errcode,
|
|
|
|
(unsigned long)frame->eip, frame->cs,
|
|
|
|
(unsigned long)frame->eflags);
|
2008-11-19 13:26:10 +01:00
|
|
|
proc_stacktrace(saved_proc);
|
2006-05-11 16:49:46 +02:00
|
|
|
}
|
|
|
|
|
Merge of David's ptrace branch. Summary:
o Support for ptrace T_ATTACH/T_DETACH and T_SYSCALL
o PM signal handling logic should now work properly, even with debuggers
being present
o Asynchronous PM/VFS protocol, full IPC support for senda(), and
AMF_NOREPLY senda() flag
DETAILS
Process stop and delay call handling of PM:
o Added sys_runctl() kernel call with sys_stop() and sys_resume()
aliases, for PM to stop and resume a process
o Added exception for sending/syscall-traced processes to sys_runctl(),
and matching SIGKREADY pseudo-signal to PM
o Fixed PM signal logic to deal with requests from a process after
stopping it (so-called "delay calls"), using the SIGKREADY facility
o Fixed various PM panics due to race conditions with delay calls versus
VFS calls
o Removed special PRIO_STOP priority value
o Added SYS_LOCK RTS kernel flag, to stop an individual process from
running while modifying its process structure
Signal and debugger handling in PM:
o Fixed debugger signals being dropped if a second signal arrives when
the debugger has not retrieved the first one
o Fixed debugger signals being sent to the debugger more than once
o Fixed debugger signals unpausing process in VFS; removed PM_UNPAUSE_TR
protocol message
o Detached debugger signals from general signal logic and from being
blocked on VFS calls, meaning that even VFS can now be traced
o Fixed debugger being unable to receive more than one pending signal in
one process stop
o Fixed signal delivery being delayed needlessly when multiple signals
are pending
o Fixed wait test for tracer, which was returning for children that were
not waited for
o Removed second parallel pending call from PM to VFS for any process
o Fixed process becoming runnable between exec() and debugger trap
o Added support for notifying the debugger before the parent when a
debugged child exits
o Fixed debugger death causing child to remain stopped forever
o Fixed consistently incorrect use of _NSIG
Extensions to ptrace():
o Added T_ATTACH and T_DETACH ptrace request, to attach and detach a
debugger to and from a process
o Added T_SYSCALL ptrace request, to trace system calls
o Added T_SETOPT ptrace request, to set trace options
o Added TO_TRACEFORK trace option, to attach automatically to children
of a traced process
o Added TO_ALTEXEC trace option, to send SIGSTOP instead of SIGTRAP upon
a successful exec() of the tracee
o Extended T_GETUSER ptrace support to allow retrieving a process's priv
structure
o Removed T_STOP ptrace request again, as it does not help implementing
debuggers properly
o Added MINIX3-specific ptrace test (test42)
o Added proper manual page for ptrace(2)
Asynchronous PM/VFS interface:
o Fixed asynchronous messages not being checked when receive() is called
with an endpoint other than ANY
o Added AMF_NOREPLY senda() flag, preventing such messages from
satisfying the receive part of a sendrec()
o Added asynsend3() that takes optional flags; asynsend() is now a
#define passing in 0 as third parameter
o Made PM/VFS protocol asynchronous; reintroduced tell_fs()
o Made PM_BASE request/reply number range unique
o Hacked in a horrible temporary workaround into RS to deal with newly
revealed RS-PM-VFS race condition triangle until VFS is asynchronous
System signal handling:
o Fixed shutdown logic of device drivers; removed old SIGKSTOP signal
o Removed is-superuser check from PM's do_procstat() (aka getsigset())
o Added sigset macros to allow system processes to deal with the full
signal set, rather than just the POSIX subset
Miscellaneous PM fixes:
o Split do_getset into do_get and do_set, merging common code and making
structure clearer
o Fixed setpriority() being able to put to sleep processes using an
invalid parameter, or revive zombie processes
o Made find_proc() global; removed obsolete proc_from_pid()
o Cleanup here and there
Also included:
o Fixed false-positive boot order kernel warning
o Removed last traces of old NOTIFY_FROM code
THINGS OF POSSIBLE INTEREST
o It should now be possible to run PM at any priority, even lower than
user processes
o No assumptions are made about communication speed between PM and VFS,
although communication must be FIFO
o A debugger will now receive incoming debuggee signals at kill time
only; the process may not yet be fully stopped
o A first step has been made towards making the SYSTEM task preemptible
2009-09-30 11:57:22 +02:00
|
|
|
#endif
|
2005-05-26 15:17:57 +02:00
|
|
|
cause_sig(proc_nr(saved_proc), ep->signum);
|
2005-04-21 16:53:53 +02:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Exception in system code. This is not supposed to happen. */
|
2011-07-22 12:55:04 +02:00
|
|
|
inkernel_disaster(saved_proc, frame, ep, is_nested);
|
2005-04-21 16:53:53 +02:00
|
|
|
|
2011-07-22 12:55:04 +02:00
|
|
|
panic("return from inkernel_disaster");
|
2005-04-21 16:53:53 +02:00
|
|
|
}
|
|
|
|
|
2011-09-16 14:57:49 +02:00
|
|
|
#if USE_SYSDEBUG
|
2006-06-29 15:35:27 +02:00
|
|
|
/*===========================================================================*
|
2011-07-22 12:55:04 +02:00
|
|
|
* proc_stacktrace_execute *
|
2006-06-29 15:35:27 +02:00
|
|
|
*===========================================================================*/
|
2012-03-25 20:25:53 +02:00
|
|
|
static void proc_stacktrace_execute(struct proc *whichproc, reg_t v_bp, reg_t pc)
|
2006-06-29 15:35:27 +02:00
|
|
|
{
|
2011-07-22 12:55:04 +02:00
|
|
|
reg_t v_hbp;
|
Primary goal for these changes is:
- no longer have kernel have its own page table that is loaded
on every kernel entry (trap, interrupt, exception). the primary
purpose is to reduce the number of required reloads.
Result:
- kernel can only access memory of process that was running when
kernel was entered
- kernel must be mapped into every process page table, so traps to
kernel keep working
Problem:
- kernel must often access memory of arbitrary processes (e.g. send
arbitrary processes messages); this can't happen directly any more;
usually because that process' page table isn't loaded at all, sometimes
because that memory isn't mapped in at all, sometimes because it isn't
mapped in read-write.
So:
- kernel must be able to map in memory of any process, in its own
address space.
Implementation:
- VM and kernel share a range of memory in which addresses of
all page tables of all processes are available. This has two purposes:
. Kernel has to know what data to copy in order to map in a range
. Kernel has to know where to write the data in order to map it in
That last point is because kernel has to write in the currently loaded
page table.
- Processes and kernel are separated through segments; kernel segments
haven't changed.
- The kernel keeps the process whose page table is currently loaded
in 'ptproc.'
- If it wants to map in a range of memory, it writes the value of the
page directory entry for that range into the page directory entry
in the currently loaded map. There is a slot reserved for such
purposes. The kernel can then access this memory directly.
- In order to do this, its segment has been increased (and the
segments of processes start where it ends).
- In the pagefault handler, detect if the kernel is doing
'trappable' memory access (i.e. a pagefault isn't a fatal
error) and if so,
- set the saved instruction pointer to phys_copy_fault,
breaking out of phys_copy
- set the saved eax register to the address of the page
fault, both for sanity checking and for checking in
which of the two ranges that phys_copy was called
with the fault occured
- Some boot-time processes do not have their own page table,
and are mapped in with the kernel, and separated with
segments. The kernel detects this using HASPT. If such a
process has to be scheduled, any page table will work and
no page table switch is done.
Major changes in kernel are
- When accessing user processes memory, kernel no longer
explicitly checks before it does so if that memory is OK.
It simply makes the mapping (if necessary), tries to do the
operation, and traps the pagefault if that memory isn't present;
if that happens, the copy function returns EFAULT.
So all of the CHECKRANGE_OR_SUSPEND macros are gone.
- Kernel no longer has to copy/read and parse page tables.
- A message copying optimisation: when messages are copied, and
the recipient isn't mapped in, they are copied into a buffer
in the kernel. This is done in QueueMess. The next time
the recipient is scheduled, this message is copied into
its memory. This happens in schedcheck().
This eliminates the mapping/copying step for messages, and makes
it easier to deliver messages. This eliminates soft_notify.
- Kernel no longer creates a page table at all, so the vm_setbuf
and pagetable writing in memory.c is gone.
Minor changes in kernel are
- ipc_stats thrown out, wasn't used
- misc flags all renamed to MF_*
- NOREC_* macros to enter and leave functions that should not
be called recursively; just sanity checks really
- code to fully decode segment selectors and descriptors
to print on exceptions
- lots of vmassert()s added, only executed if DEBUG_VMASSERT is 1
2009-09-21 16:31:52 +02:00
|
|
|
int iskernel;
|
2010-11-02 22:27:04 +01:00
|
|
|
int n = 0;
|
Primary goal for these changes is:
- no longer have kernel have its own page table that is loaded
on every kernel entry (trap, interrupt, exception). the primary
purpose is to reduce the number of required reloads.
Result:
- kernel can only access memory of process that was running when
kernel was entered
- kernel must be mapped into every process page table, so traps to
kernel keep working
Problem:
- kernel must often access memory of arbitrary processes (e.g. send
arbitrary processes messages); this can't happen directly any more;
usually because that process' page table isn't loaded at all, sometimes
because that memory isn't mapped in at all, sometimes because it isn't
mapped in read-write.
So:
- kernel must be able to map in memory of any process, in its own
address space.
Implementation:
- VM and kernel share a range of memory in which addresses of
all page tables of all processes are available. This has two purposes:
. Kernel has to know what data to copy in order to map in a range
. Kernel has to know where to write the data in order to map it in
That last point is because kernel has to write in the currently loaded
page table.
- Processes and kernel are separated through segments; kernel segments
haven't changed.
- The kernel keeps the process whose page table is currently loaded
in 'ptproc.'
- If it wants to map in a range of memory, it writes the value of the
page directory entry for that range into the page directory entry
in the currently loaded map. There is a slot reserved for such
purposes. The kernel can then access this memory directly.
- In order to do this, its segment has been increased (and the
segments of processes start where it ends).
- In the pagefault handler, detect if the kernel is doing
'trappable' memory access (i.e. a pagefault isn't a fatal
error) and if so,
- set the saved instruction pointer to phys_copy_fault,
breaking out of phys_copy
- set the saved eax register to the address of the page
fault, both for sanity checking and for checking in
which of the two ranges that phys_copy was called
with the fault occured
- Some boot-time processes do not have their own page table,
and are mapped in with the kernel, and separated with
segments. The kernel detects this using HASPT. If such a
process has to be scheduled, any page table will work and
no page table switch is done.
Major changes in kernel are
- When accessing user processes memory, kernel no longer
explicitly checks before it does so if that memory is OK.
It simply makes the mapping (if necessary), tries to do the
operation, and traps the pagefault if that memory isn't present;
if that happens, the copy function returns EFAULT.
So all of the CHECKRANGE_OR_SUSPEND macros are gone.
- Kernel no longer has to copy/read and parse page tables.
- A message copying optimisation: when messages are copied, and
the recipient isn't mapped in, they are copied into a buffer
in the kernel. This is done in QueueMess. The next time
the recipient is scheduled, this message is copied into
its memory. This happens in schedcheck().
This eliminates the mapping/copying step for messages, and makes
it easier to deliver messages. This eliminates soft_notify.
- Kernel no longer creates a page table at all, so the vm_setbuf
and pagetable writing in memory.c is gone.
Minor changes in kernel are
- ipc_stats thrown out, wasn't used
- misc flags all renamed to MF_*
- NOREC_* macros to enter and leave functions that should not
be called recursively; just sanity checks really
- code to fully decode segment selectors and descriptors
to print on exceptions
- lots of vmassert()s added, only executed if DEBUG_VMASSERT is 1
2009-09-21 16:31:52 +02:00
|
|
|
|
|
|
|
iskernel = iskernelp(whichproc);
|
2006-06-29 15:35:27 +02:00
|
|
|
|
2010-03-03 16:45:01 +01:00
|
|
|
printf("%-8.8s %6d 0x%lx ",
|
2011-07-22 12:55:04 +02:00
|
|
|
whichproc->p_name, whichproc->p_endpoint, pc);
|
2008-11-19 13:26:10 +01:00
|
|
|
|
2006-06-29 15:35:27 +02:00
|
|
|
while(v_bp) {
|
2011-07-22 12:55:04 +02:00
|
|
|
reg_t v_pc;
|
Primary goal for these changes is:
- no longer have kernel have its own page table that is loaded
on every kernel entry (trap, interrupt, exception). the primary
purpose is to reduce the number of required reloads.
Result:
- kernel can only access memory of process that was running when
kernel was entered
- kernel must be mapped into every process page table, so traps to
kernel keep working
Problem:
- kernel must often access memory of arbitrary processes (e.g. send
arbitrary processes messages); this can't happen directly any more;
usually because that process' page table isn't loaded at all, sometimes
because that memory isn't mapped in at all, sometimes because it isn't
mapped in read-write.
So:
- kernel must be able to map in memory of any process, in its own
address space.
Implementation:
- VM and kernel share a range of memory in which addresses of
all page tables of all processes are available. This has two purposes:
. Kernel has to know what data to copy in order to map in a range
. Kernel has to know where to write the data in order to map it in
That last point is because kernel has to write in the currently loaded
page table.
- Processes and kernel are separated through segments; kernel segments
haven't changed.
- The kernel keeps the process whose page table is currently loaded
in 'ptproc.'
- If it wants to map in a range of memory, it writes the value of the
page directory entry for that range into the page directory entry
in the currently loaded map. There is a slot reserved for such
purposes. The kernel can then access this memory directly.
- In order to do this, its segment has been increased (and the
segments of processes start where it ends).
- In the pagefault handler, detect if the kernel is doing
'trappable' memory access (i.e. a pagefault isn't a fatal
error) and if so,
- set the saved instruction pointer to phys_copy_fault,
breaking out of phys_copy
- set the saved eax register to the address of the page
fault, both for sanity checking and for checking in
which of the two ranges that phys_copy was called
with the fault occured
- Some boot-time processes do not have their own page table,
and are mapped in with the kernel, and separated with
segments. The kernel detects this using HASPT. If such a
process has to be scheduled, any page table will work and
no page table switch is done.
Major changes in kernel are
- When accessing user processes memory, kernel no longer
explicitly checks before it does so if that memory is OK.
It simply makes the mapping (if necessary), tries to do the
operation, and traps the pagefault if that memory isn't present;
if that happens, the copy function returns EFAULT.
So all of the CHECKRANGE_OR_SUSPEND macros are gone.
- Kernel no longer has to copy/read and parse page tables.
- A message copying optimisation: when messages are copied, and
the recipient isn't mapped in, they are copied into a buffer
in the kernel. This is done in QueueMess. The next time
the recipient is scheduled, this message is copied into
its memory. This happens in schedcheck().
This eliminates the mapping/copying step for messages, and makes
it easier to deliver messages. This eliminates soft_notify.
- Kernel no longer creates a page table at all, so the vm_setbuf
and pagetable writing in memory.c is gone.
Minor changes in kernel are
- ipc_stats thrown out, wasn't used
- misc flags all renamed to MF_*
- NOREC_* macros to enter and leave functions that should not
be called recursively; just sanity checks really
- code to fully decode segment selectors and descriptors
to print on exceptions
- lots of vmassert()s added, only executed if DEBUG_VMASSERT is 1
2009-09-21 16:31:52 +02:00
|
|
|
|
|
|
|
#define PRCOPY(pr, pv, v, n) \
|
|
|
|
(iskernel ? (memcpy((char *) v, (char *) pv, n), OK) : \
|
2010-02-09 16:20:09 +01:00
|
|
|
data_copy(pr->p_endpoint, pv, KERNEL, (vir_bytes) (v), n))
|
Primary goal for these changes is:
- no longer have kernel have its own page table that is loaded
on every kernel entry (trap, interrupt, exception). the primary
purpose is to reduce the number of required reloads.
Result:
- kernel can only access memory of process that was running when
kernel was entered
- kernel must be mapped into every process page table, so traps to
kernel keep working
Problem:
- kernel must often access memory of arbitrary processes (e.g. send
arbitrary processes messages); this can't happen directly any more;
usually because that process' page table isn't loaded at all, sometimes
because that memory isn't mapped in at all, sometimes because it isn't
mapped in read-write.
So:
- kernel must be able to map in memory of any process, in its own
address space.
Implementation:
- VM and kernel share a range of memory in which addresses of
all page tables of all processes are available. This has two purposes:
. Kernel has to know what data to copy in order to map in a range
. Kernel has to know where to write the data in order to map it in
That last point is because kernel has to write in the currently loaded
page table.
- Processes and kernel are separated through segments; kernel segments
haven't changed.
- The kernel keeps the process whose page table is currently loaded
in 'ptproc.'
- If it wants to map in a range of memory, it writes the value of the
page directory entry for that range into the page directory entry
in the currently loaded map. There is a slot reserved for such
purposes. The kernel can then access this memory directly.
- In order to do this, its segment has been increased (and the
segments of processes start where it ends).
- In the pagefault handler, detect if the kernel is doing
'trappable' memory access (i.e. a pagefault isn't a fatal
error) and if so,
- set the saved instruction pointer to phys_copy_fault,
breaking out of phys_copy
- set the saved eax register to the address of the page
fault, both for sanity checking and for checking in
which of the two ranges that phys_copy was called
with the fault occured
- Some boot-time processes do not have their own page table,
and are mapped in with the kernel, and separated with
segments. The kernel detects this using HASPT. If such a
process has to be scheduled, any page table will work and
no page table switch is done.
Major changes in kernel are
- When accessing user processes memory, kernel no longer
explicitly checks before it does so if that memory is OK.
It simply makes the mapping (if necessary), tries to do the
operation, and traps the pagefault if that memory isn't present;
if that happens, the copy function returns EFAULT.
So all of the CHECKRANGE_OR_SUSPEND macros are gone.
- Kernel no longer has to copy/read and parse page tables.
- A message copying optimisation: when messages are copied, and
the recipient isn't mapped in, they are copied into a buffer
in the kernel. This is done in QueueMess. The next time
the recipient is scheduled, this message is copied into
its memory. This happens in schedcheck().
This eliminates the mapping/copying step for messages, and makes
it easier to deliver messages. This eliminates soft_notify.
- Kernel no longer creates a page table at all, so the vm_setbuf
and pagetable writing in memory.c is gone.
Minor changes in kernel are
- ipc_stats thrown out, wasn't used
- misc flags all renamed to MF_*
- NOREC_* macros to enter and leave functions that should not
be called recursively; just sanity checks really
- code to fully decode segment selectors and descriptors
to print on exceptions
- lots of vmassert()s added, only executed if DEBUG_VMASSERT is 1
2009-09-21 16:31:52 +02:00
|
|
|
|
|
|
|
if(PRCOPY(whichproc, v_bp, &v_hbp, sizeof(v_hbp)) != OK) {
|
2010-03-03 16:45:01 +01:00
|
|
|
printf("(v_bp 0x%lx ?)", v_bp);
|
2008-11-19 13:26:10 +01:00
|
|
|
break;
|
|
|
|
}
|
Primary goal for these changes is:
- no longer have kernel have its own page table that is loaded
on every kernel entry (trap, interrupt, exception). the primary
purpose is to reduce the number of required reloads.
Result:
- kernel can only access memory of process that was running when
kernel was entered
- kernel must be mapped into every process page table, so traps to
kernel keep working
Problem:
- kernel must often access memory of arbitrary processes (e.g. send
arbitrary processes messages); this can't happen directly any more;
usually because that process' page table isn't loaded at all, sometimes
because that memory isn't mapped in at all, sometimes because it isn't
mapped in read-write.
So:
- kernel must be able to map in memory of any process, in its own
address space.
Implementation:
- VM and kernel share a range of memory in which addresses of
all page tables of all processes are available. This has two purposes:
. Kernel has to know what data to copy in order to map in a range
. Kernel has to know where to write the data in order to map it in
That last point is because kernel has to write in the currently loaded
page table.
- Processes and kernel are separated through segments; kernel segments
haven't changed.
- The kernel keeps the process whose page table is currently loaded
in 'ptproc.'
- If it wants to map in a range of memory, it writes the value of the
page directory entry for that range into the page directory entry
in the currently loaded map. There is a slot reserved for such
purposes. The kernel can then access this memory directly.
- In order to do this, its segment has been increased (and the
segments of processes start where it ends).
- In the pagefault handler, detect if the kernel is doing
'trappable' memory access (i.e. a pagefault isn't a fatal
error) and if so,
- set the saved instruction pointer to phys_copy_fault,
breaking out of phys_copy
- set the saved eax register to the address of the page
fault, both for sanity checking and for checking in
which of the two ranges that phys_copy was called
with the fault occured
- Some boot-time processes do not have their own page table,
and are mapped in with the kernel, and separated with
segments. The kernel detects this using HASPT. If such a
process has to be scheduled, any page table will work and
no page table switch is done.
Major changes in kernel are
- When accessing user processes memory, kernel no longer
explicitly checks before it does so if that memory is OK.
It simply makes the mapping (if necessary), tries to do the
operation, and traps the pagefault if that memory isn't present;
if that happens, the copy function returns EFAULT.
So all of the CHECKRANGE_OR_SUSPEND macros are gone.
- Kernel no longer has to copy/read and parse page tables.
- A message copying optimisation: when messages are copied, and
the recipient isn't mapped in, they are copied into a buffer
in the kernel. This is done in QueueMess. The next time
the recipient is scheduled, this message is copied into
its memory. This happens in schedcheck().
This eliminates the mapping/copying step for messages, and makes
it easier to deliver messages. This eliminates soft_notify.
- Kernel no longer creates a page table at all, so the vm_setbuf
and pagetable writing in memory.c is gone.
Minor changes in kernel are
- ipc_stats thrown out, wasn't used
- misc flags all renamed to MF_*
- NOREC_* macros to enter and leave functions that should not
be called recursively; just sanity checks really
- code to fully decode segment selectors and descriptors
to print on exceptions
- lots of vmassert()s added, only executed if DEBUG_VMASSERT is 1
2009-09-21 16:31:52 +02:00
|
|
|
if(PRCOPY(whichproc, v_bp + sizeof(v_pc), &v_pc, sizeof(v_pc)) != OK) {
|
2010-03-03 16:45:01 +01:00
|
|
|
printf("(v_pc 0x%lx ?)", v_bp + sizeof(v_pc));
|
2006-06-29 15:35:27 +02:00
|
|
|
break;
|
|
|
|
}
|
2010-03-03 16:45:01 +01:00
|
|
|
printf("0x%lx ", (unsigned long) v_pc);
|
2006-06-29 15:35:27 +02:00
|
|
|
if(v_hbp != 0 && v_hbp <= v_bp) {
|
2010-03-03 16:45:01 +01:00
|
|
|
printf("(hbp %lx ?)", v_hbp);
|
2006-06-29 15:35:27 +02:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
v_bp = v_hbp;
|
2011-07-22 12:55:04 +02:00
|
|
|
if(n++ > 50) {
|
|
|
|
printf("(truncated after %d steps) ", n);
|
2010-11-02 22:27:04 +01:00
|
|
|
break;
|
2011-07-22 12:55:04 +02:00
|
|
|
}
|
2006-06-29 15:35:27 +02:00
|
|
|
}
|
2010-03-03 16:45:01 +01:00
|
|
|
printf("\n");
|
2006-06-29 15:35:27 +02:00
|
|
|
}
|
2011-09-16 14:57:49 +02:00
|
|
|
#endif /* USE_SYSDEBUG */
|
2010-06-07 09:43:17 +02:00
|
|
|
|
2011-07-22 12:55:04 +02:00
|
|
|
/*===========================================================================*
|
|
|
|
* proc_stacktrace *
|
|
|
|
*===========================================================================*/
|
2012-03-25 20:25:53 +02:00
|
|
|
void proc_stacktrace(struct proc *whichproc)
|
2011-07-22 12:55:04 +02:00
|
|
|
{
|
2013-01-04 19:26:10 +01:00
|
|
|
u32_t use_bp;
|
|
|
|
|
|
|
|
if(whichproc->p_seg.p_kern_trap_style == KTS_NONE) {
|
|
|
|
printf("WARNING: stacktrace of running proecss\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
switch(whichproc->p_seg.p_kern_trap_style) {
|
|
|
|
case KTS_SYSENTER:
|
|
|
|
case KTS_SYSCALL:
|
|
|
|
{
|
|
|
|
u32_t sp = whichproc->p_reg.sp;
|
|
|
|
|
|
|
|
/* Full context is not available in the p_reg
|
|
|
|
* struct. Obtain it from the user's stack.
|
|
|
|
* The use stack pointer is always available.
|
|
|
|
* The fact that it's there, and the 16 byte offset,
|
|
|
|
* is a dependency on the trap code in
|
|
|
|
* kernel/arch/i386/usermapped_glo_ipc.S.
|
|
|
|
*/
|
|
|
|
|
|
|
|
if(data_copy(whichproc->p_endpoint, sp+16,
|
|
|
|
KERNEL, (vir_bytes) &use_bp,
|
|
|
|
sizeof(use_bp)) != OK) {
|
|
|
|
printf("stacktrace: aborting, copy failed\n");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
/* Full context is available; use the stored ebp */
|
|
|
|
use_bp = whichproc->p_reg.fp;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2011-09-16 14:57:49 +02:00
|
|
|
#if USE_SYSDEBUG
|
2013-01-04 19:26:10 +01:00
|
|
|
proc_stacktrace_execute(whichproc, use_bp, whichproc->p_reg.pc);
|
2011-09-16 14:57:49 +02:00
|
|
|
#endif /* USE_SYSDEBUG */
|
2011-07-22 12:55:04 +02:00
|
|
|
}
|
|
|
|
|
2012-03-25 20:25:53 +02:00
|
|
|
void enable_fpu_exception(void)
|
2010-06-07 09:43:17 +02:00
|
|
|
{
|
2010-09-22 16:31:06 +02:00
|
|
|
u32_t cr0 = read_cr0();
|
|
|
|
if(!(cr0 & I386_CR0_TS))
|
|
|
|
write_cr0(cr0 | I386_CR0_TS);
|
2010-06-07 09:43:17 +02:00
|
|
|
}
|
|
|
|
|
2012-03-25 20:25:53 +02:00
|
|
|
void disable_fpu_exception(void)
|
2010-06-07 09:43:17 +02:00
|
|
|
{
|
|
|
|
clts();
|
|
|
|
}
|
|
|
|
|