2008-11-19 13:26:10 +01:00
|
|
|
|
|
|
|
#define _SYSTEM 1
|
|
|
|
|
|
|
|
#include <minix/callnr.h>
|
|
|
|
#include <minix/com.h>
|
|
|
|
#include <minix/config.h>
|
|
|
|
#include <minix/const.h>
|
|
|
|
#include <minix/ds.h>
|
|
|
|
#include <minix/endpoint.h>
|
|
|
|
#include <minix/minlib.h>
|
|
|
|
#include <minix/type.h>
|
|
|
|
#include <minix/ipc.h>
|
|
|
|
#include <minix/sysutil.h>
|
|
|
|
#include <minix/syslib.h>
|
|
|
|
#include <minix/safecopies.h>
|
2009-05-15 19:07:36 +02:00
|
|
|
#include <minix/cpufeature.h>
|
2009-09-21 16:49:49 +02:00
|
|
|
#include <minix/bitmap.h>
|
2010-04-12 13:25:24 +02:00
|
|
|
#include <minix/debug.h>
|
2008-11-19 13:26:10 +01:00
|
|
|
|
|
|
|
#include <errno.h>
|
2009-09-23 15:33:01 +02:00
|
|
|
#include <stdlib.h>
|
2008-11-19 13:26:10 +01:00
|
|
|
#include <assert.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <env.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <fcntl.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
|
2010-03-22 22:25:22 +01:00
|
|
|
#include "proto.h"
|
|
|
|
#include "glo.h"
|
|
|
|
#include "util.h"
|
|
|
|
#include "vm.h"
|
|
|
|
#include "sanitycheck.h"
|
2008-11-19 13:26:10 +01:00
|
|
|
|
2012-09-18 22:19:22 +02:00
|
|
|
static int vm_self_pages;
|
|
|
|
|
2009-09-21 16:49:49 +02:00
|
|
|
/* PDE used to map in kernel, kernel physical address. */
|
2013-02-10 19:37:12 +01:00
|
|
|
#define MAX_PAGEDIR_PDES 5
|
|
|
|
static struct pdm {
|
|
|
|
int pdeno;
|
|
|
|
u32_t val;
|
|
|
|
phys_bytes phys;
|
|
|
|
u32_t *page_directories;
|
|
|
|
} pagedir_mappings[MAX_PAGEDIR_PDES];
|
|
|
|
|
No more intel/minix segments.
This commit removes all traces of Minix segments (the text/data/stack
memory map abstraction in the kernel) and significance of Intel segments
(hardware segments like CS, DS that add offsets to all addressing before
page table translation). This ultimately simplifies the memory layout
and addressing and makes the same layout possible on non-Intel
architectures.
There are only two types of addresses in the world now: virtual
and physical; even the kernel and processes have the same virtual
address space. Kernel and user processes can be distinguished at a
glance as processes won't use 0xF0000000 and above.
No static pre-allocated memory sizes exist any more.
Changes to booting:
. The pre_init.c leaves the kernel and modules exactly as
they were left by the bootloader in physical memory
. The kernel starts running using physical addressing,
loaded at a fixed location given in its linker script by the
bootloader. All code and data in this phase are linked to
this fixed low location.
. It makes a bootstrap pagetable to map itself to a
fixed high location (also in linker script) and jumps to
the high address. All code and data then use this high addressing.
. All code/data symbols linked at the low addresses is prefixed by
an objcopy step with __k_unpaged_*, so that that code cannot
reference highly-linked symbols (which aren't valid yet) or vice
versa (symbols that aren't valid any more).
. The two addressing modes are separated in the linker script by
collecting the unpaged_*.o objects and linking them with low
addresses, and linking the rest high. Some objects are linked
twice, once low and once high.
. The bootstrap phase passes a lot of information (e.g. free memory
list, physical location of the modules, etc.) using the kinfo
struct.
. After this bootstrap the low-linked part is freed.
. The kernel maps in VM into the bootstrap page table so that VM can
begin executing. Its first job is to make page tables for all other
boot processes. So VM runs before RS, and RS gets a fully dynamic,
VM-managed address space. VM gets its privilege info from RS as usual
but that happens after RS starts running.
. Both the kernel loading VM and VM organizing boot processes happen
using the libexec logic. This removes the last reason for VM to
still know much about exec() and vm/exec.c is gone.
Further Implementation:
. All segments are based at 0 and have a 4 GB limit.
. The kernel is mapped in at the top of the virtual address
space so as not to constrain the user processes.
. Processes do not use segments from the LDT at all; there are
no segments in the LDT any more, so no LLDT is needed.
. The Minix segments T/D/S are gone and so none of the
user-space or in-kernel copy functions use them. The copy
functions use a process endpoint of NONE to realize it's
a physical address, virtual otherwise.
. The umap call only makes sense to translate a virtual address
to a physical address now.
. Segments-related calls like newmap and alloc_segments are gone.
. All segments-related translation in VM is gone (vir2map etc).
. Initialization in VM is simpler as no moving around is necessary.
. VM and all other boot processes can be linked wherever they wish
and will be mapped in at the right location by the kernel and VM
respectively.
Other changes:
. The multiboot code is less special: it does not use mb_print
for its diagnostics any more but uses printf() as normal, saving
the output into the diagnostics buffer, only printing to the
screen using the direct print functions if a panic() occurs.
. The multiboot code uses the flexible 'free memory map list'
style to receive the list of free memory if available.
. The kernel determines the memory layout of the processes to
a degree: it tells VM where the kernel starts and ends and
where the kernel wants the top of the process to be. VM then
uses this entire range, i.e. the stack is right at the top,
and mmap()ped bits of memory are placed below that downwards,
and the break grows upwards.
Other Consequences:
. Every process gets its own page table as address spaces
can't be separated any more by segments.
. As all segments are 0-based, there is no distinction between
virtual and linear addresses, nor between userspace and
kernel addresses.
. Less work is done when context switching, leading to a net
performance increase. (8% faster on my machine for 'make servers'.)
. The layout and configuration of the GDT makes sysenter and syscall
possible.
2012-05-07 16:03:35 +02:00
|
|
|
static multiboot_module_t *kern_mb_mod = NULL;
|
|
|
|
static size_t kern_size = 0;
|
|
|
|
static int kern_start_pde = -1;
|
2009-09-21 16:49:49 +02:00
|
|
|
|
2012-11-09 19:00:46 +01:00
|
|
|
/* big page size available in hardware? */
|
2012-10-31 19:24:14 +01:00
|
|
|
static int bigpage_ok = 1;
|
2008-11-19 13:26:10 +01:00
|
|
|
|
|
|
|
/* Our process table entry. */
|
2010-04-12 13:25:24 +02:00
|
|
|
struct vmproc *vmprocess = &vmproc[VM_PROC_NR];
|
2008-11-19 13:26:10 +01:00
|
|
|
|
|
|
|
/* Spare memory, ready to go after initialization, to avoid a
|
|
|
|
* circular dependency on allocating memory and writing it into VM's
|
|
|
|
* page table.
|
|
|
|
*/
|
2012-09-18 13:17:45 +02:00
|
|
|
#if SANITYCHECKS
|
2013-02-24 22:00:52 +01:00
|
|
|
#define SPAREPAGES 200
|
|
|
|
#define STATIC_SPAREPAGES 190
|
2012-09-18 13:17:45 +02:00
|
|
|
#else
|
2013-01-23 14:54:41 +01:00
|
|
|
#ifdef __arm__
|
2013-02-10 19:37:12 +01:00
|
|
|
# define SPAREPAGES 150
|
|
|
|
# define STATIC_SPAREPAGES 140
|
2013-01-23 14:54:41 +01:00
|
|
|
#else
|
|
|
|
# define SPAREPAGES 20
|
|
|
|
# define STATIC_SPAREPAGES 15
|
|
|
|
#endif /* __arm__ */
|
2012-10-31 19:24:14 +01:00
|
|
|
#endif
|
2012-12-17 19:26:52 +01:00
|
|
|
|
make vfs & filesystems use failable copying
Change the kernel to add features to vircopy and safecopies so that
transparent copy fixing won't happen to avoid deadlocks, and such copies
fail with EFAULT.
Transparently making copying work from filesystems (as normally done by
the kernel & VM when copying fails because of missing/readonly memory)
is problematic as it can happen that, for file-mapped ranges, that that
same filesystem that is blocked on the copy request is needed to satisfy
the memory range, leading to deadlock. Dito for VFS itself, if done with
a blocking call.
This change makes the copying done from a filesystem fail in such cases
with EFAULT by VFS adding the CPF_TRY flag to the grants. If a FS call
fails with EFAULT, VFS will then request the range to be made available
to VM after the FS is unblocked, allowing it to be used to satisfy the
range if need be in another VFS thread.
Similarly, for datacopies that VFS itself does, it uses the failable
vircopy variant and callers use a wrapper that talk to VM if necessary
to get the copy to work.
. kernel: add CPF_TRY flag to safecopies
. kernel: only request writable ranges to VM for the
target buffer when copying fails
. do copying in VFS TRY-first
. some fixes in VM to build SANITYCHECK mode
. add regression test for the cases where
- a FS system call needs memory mapped in a process that the
FS itself must map.
- such a range covers more than one file-mapped region.
. add 'try' mode to vircopy, physcopy
. add flags field to copy kernel call messages
. if CP_FLAG_TRY is set, do not transparently try
to fix memory ranges
. for use by VFS when accessing user buffers to avoid
deadlock
. remove some obsolete backwards compatability assignments
. VFS: let thread scheduling work for VM requests too
Allows VFS to make calls to VM while suspending and resuming
the currently running thread. Does currently not work for the
main thread.
. VM: add fix memory range call for use by VFS
Change-Id: I295794269cea51a3163519a9cfe5901301d90b32
2014-01-16 14:22:13 +01:00
|
|
|
#ifdef __i386__
|
|
|
|
static u32_t global_bit = 0;
|
|
|
|
#endif
|
|
|
|
|
2013-02-10 19:37:12 +01:00
|
|
|
#define SPAREPAGEDIRS 1
|
|
|
|
#define STATIC_SPAREPAGEDIRS 1
|
2012-12-17 19:26:52 +01:00
|
|
|
|
2012-10-31 19:24:14 +01:00
|
|
|
int missing_sparedirs = SPAREPAGEDIRS;
|
|
|
|
static struct {
|
|
|
|
void *pagedir;
|
|
|
|
phys_bytes phys;
|
|
|
|
} sparepagedirs[SPAREPAGEDIRS];
|
2012-11-09 19:00:46 +01:00
|
|
|
|
2012-09-18 22:19:22 +02:00
|
|
|
extern char _end;
|
|
|
|
#define is_staticaddr(v) ((vir_bytes) (v) < (vir_bytes) &_end)
|
|
|
|
|
2009-11-11 18:02:45 +01:00
|
|
|
#define MAX_KERNMAPPINGS 10
|
2012-03-25 20:25:53 +02:00
|
|
|
static struct {
|
2009-11-11 18:02:45 +01:00
|
|
|
phys_bytes phys_addr; /* Physical addr. */
|
|
|
|
phys_bytes len; /* Length in bytes. */
|
No more intel/minix segments.
This commit removes all traces of Minix segments (the text/data/stack
memory map abstraction in the kernel) and significance of Intel segments
(hardware segments like CS, DS that add offsets to all addressing before
page table translation). This ultimately simplifies the memory layout
and addressing and makes the same layout possible on non-Intel
architectures.
There are only two types of addresses in the world now: virtual
and physical; even the kernel and processes have the same virtual
address space. Kernel and user processes can be distinguished at a
glance as processes won't use 0xF0000000 and above.
No static pre-allocated memory sizes exist any more.
Changes to booting:
. The pre_init.c leaves the kernel and modules exactly as
they were left by the bootloader in physical memory
. The kernel starts running using physical addressing,
loaded at a fixed location given in its linker script by the
bootloader. All code and data in this phase are linked to
this fixed low location.
. It makes a bootstrap pagetable to map itself to a
fixed high location (also in linker script) and jumps to
the high address. All code and data then use this high addressing.
. All code/data symbols linked at the low addresses is prefixed by
an objcopy step with __k_unpaged_*, so that that code cannot
reference highly-linked symbols (which aren't valid yet) or vice
versa (symbols that aren't valid any more).
. The two addressing modes are separated in the linker script by
collecting the unpaged_*.o objects and linking them with low
addresses, and linking the rest high. Some objects are linked
twice, once low and once high.
. The bootstrap phase passes a lot of information (e.g. free memory
list, physical location of the modules, etc.) using the kinfo
struct.
. After this bootstrap the low-linked part is freed.
. The kernel maps in VM into the bootstrap page table so that VM can
begin executing. Its first job is to make page tables for all other
boot processes. So VM runs before RS, and RS gets a fully dynamic,
VM-managed address space. VM gets its privilege info from RS as usual
but that happens after RS starts running.
. Both the kernel loading VM and VM organizing boot processes happen
using the libexec logic. This removes the last reason for VM to
still know much about exec() and vm/exec.c is gone.
Further Implementation:
. All segments are based at 0 and have a 4 GB limit.
. The kernel is mapped in at the top of the virtual address
space so as not to constrain the user processes.
. Processes do not use segments from the LDT at all; there are
no segments in the LDT any more, so no LLDT is needed.
. The Minix segments T/D/S are gone and so none of the
user-space or in-kernel copy functions use them. The copy
functions use a process endpoint of NONE to realize it's
a physical address, virtual otherwise.
. The umap call only makes sense to translate a virtual address
to a physical address now.
. Segments-related calls like newmap and alloc_segments are gone.
. All segments-related translation in VM is gone (vir2map etc).
. Initialization in VM is simpler as no moving around is necessary.
. VM and all other boot processes can be linked wherever they wish
and will be mapped in at the right location by the kernel and VM
respectively.
Other changes:
. The multiboot code is less special: it does not use mb_print
for its diagnostics any more but uses printf() as normal, saving
the output into the diagnostics buffer, only printing to the
screen using the direct print functions if a panic() occurs.
. The multiboot code uses the flexible 'free memory map list'
style to receive the list of free memory if available.
. The kernel determines the memory layout of the processes to
a degree: it tells VM where the kernel starts and ends and
where the kernel wants the top of the process to be. VM then
uses this entire range, i.e. the stack is right at the top,
and mmap()ped bits of memory are placed below that downwards,
and the break grows upwards.
Other Consequences:
. Every process gets its own page table as address spaces
can't be separated any more by segments.
. As all segments are 0-based, there is no distinction between
virtual and linear addresses, nor between userspace and
kernel addresses.
. Less work is done when context switching, leading to a net
performance increase. (8% faster on my machine for 'make servers'.)
. The layout and configuration of the GDT makes sysenter and syscall
possible.
2012-05-07 16:03:35 +02:00
|
|
|
vir_bytes vir_addr; /* Offset in page table. */
|
2009-11-11 18:02:45 +01:00
|
|
|
int flags;
|
|
|
|
} kern_mappings[MAX_KERNMAPPINGS];
|
|
|
|
int kernmappings = 0;
|
|
|
|
|
2008-11-19 13:26:10 +01:00
|
|
|
/* Clicks must be pages, as
|
|
|
|
* - they must be page aligned to map them
|
|
|
|
* - they must be a multiple of the page size
|
|
|
|
* - it's inconvenient to have them bigger than pages, because we often want
|
|
|
|
* just one page
|
|
|
|
* May as well require them to be equal then.
|
|
|
|
*/
|
2012-10-31 19:24:14 +01:00
|
|
|
#if CLICK_SIZE != VM_PAGE_SIZE
|
2008-11-19 13:26:10 +01:00
|
|
|
#error CLICK_SIZE must be page size.
|
|
|
|
#endif
|
|
|
|
|
2012-12-29 01:16:33 +01:00
|
|
|
static void *spare_pagequeue;
|
2012-10-31 19:24:14 +01:00
|
|
|
static char static_sparepages[VM_PAGE_SIZE*STATIC_SPAREPAGES]
|
|
|
|
__aligned(VM_PAGE_SIZE);
|
|
|
|
|
|
|
|
#if defined(__arm__)
|
2012-11-09 19:00:46 +01:00
|
|
|
static char static_sparepagedirs[ARCH_PAGEDIR_SIZE*STATIC_SPAREPAGEDIRS + ARCH_PAGEDIR_SIZE] __aligned(ARCH_PAGEDIR_SIZE);
|
2012-10-31 19:24:14 +01:00
|
|
|
#endif
|
2012-11-09 19:00:46 +01:00
|
|
|
|
2008-11-19 13:26:10 +01:00
|
|
|
#if SANITYCHECKS
|
|
|
|
/*===========================================================================*
|
|
|
|
* pt_sanitycheck *
|
|
|
|
*===========================================================================*/
|
make vfs & filesystems use failable copying
Change the kernel to add features to vircopy and safecopies so that
transparent copy fixing won't happen to avoid deadlocks, and such copies
fail with EFAULT.
Transparently making copying work from filesystems (as normally done by
the kernel & VM when copying fails because of missing/readonly memory)
is problematic as it can happen that, for file-mapped ranges, that that
same filesystem that is blocked on the copy request is needed to satisfy
the memory range, leading to deadlock. Dito for VFS itself, if done with
a blocking call.
This change makes the copying done from a filesystem fail in such cases
with EFAULT by VFS adding the CPF_TRY flag to the grants. If a FS call
fails with EFAULT, VFS will then request the range to be made available
to VM after the FS is unblocked, allowing it to be used to satisfy the
range if need be in another VFS thread.
Similarly, for datacopies that VFS itself does, it uses the failable
vircopy variant and callers use a wrapper that talk to VM if necessary
to get the copy to work.
. kernel: add CPF_TRY flag to safecopies
. kernel: only request writable ranges to VM for the
target buffer when copying fails
. do copying in VFS TRY-first
. some fixes in VM to build SANITYCHECK mode
. add regression test for the cases where
- a FS system call needs memory mapped in a process that the
FS itself must map.
- such a range covers more than one file-mapped region.
. add 'try' mode to vircopy, physcopy
. add flags field to copy kernel call messages
. if CP_FLAG_TRY is set, do not transparently try
to fix memory ranges
. for use by VFS when accessing user buffers to avoid
deadlock
. remove some obsolete backwards compatability assignments
. VFS: let thread scheduling work for VM requests too
Allows VFS to make calls to VM while suspending and resuming
the currently running thread. Does currently not work for the
main thread.
. VM: add fix memory range call for use by VFS
Change-Id: I295794269cea51a3163519a9cfe5901301d90b32
2014-01-16 14:22:13 +01:00
|
|
|
void pt_sanitycheck(pt_t *pt, const char *file, int line)
|
2008-11-19 13:26:10 +01:00
|
|
|
{
|
|
|
|
/* Basic pt sanity check. */
|
2009-09-21 16:49:49 +02:00
|
|
|
int slot;
|
2008-11-19 13:26:10 +01:00
|
|
|
|
|
|
|
MYASSERT(pt);
|
|
|
|
MYASSERT(pt->pt_dir);
|
|
|
|
MYASSERT(pt->pt_dir_phys);
|
|
|
|
|
2009-09-21 16:49:49 +02:00
|
|
|
for(slot = 0; slot < ELEMENTS(vmproc); slot++) {
|
|
|
|
if(pt == &vmproc[slot].vm_pt)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if(slot >= ELEMENTS(vmproc)) {
|
2010-03-05 16:05:11 +01:00
|
|
|
panic("pt_sanitycheck: passed pt not in any proc");
|
2009-09-21 16:49:49 +02:00
|
|
|
}
|
|
|
|
|
2012-10-31 19:24:14 +01:00
|
|
|
MYASSERT(usedpages_add(pt->pt_dir_phys, VM_PAGE_SIZE) == OK);
|
2008-11-19 13:26:10 +01:00
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/*===========================================================================*
|
|
|
|
* findhole *
|
|
|
|
*===========================================================================*/
|
2012-10-31 19:24:14 +01:00
|
|
|
static u32_t findhole(int pages)
|
2008-11-19 13:26:10 +01:00
|
|
|
{
|
No more intel/minix segments.
This commit removes all traces of Minix segments (the text/data/stack
memory map abstraction in the kernel) and significance of Intel segments
(hardware segments like CS, DS that add offsets to all addressing before
page table translation). This ultimately simplifies the memory layout
and addressing and makes the same layout possible on non-Intel
architectures.
There are only two types of addresses in the world now: virtual
and physical; even the kernel and processes have the same virtual
address space. Kernel and user processes can be distinguished at a
glance as processes won't use 0xF0000000 and above.
No static pre-allocated memory sizes exist any more.
Changes to booting:
. The pre_init.c leaves the kernel and modules exactly as
they were left by the bootloader in physical memory
. The kernel starts running using physical addressing,
loaded at a fixed location given in its linker script by the
bootloader. All code and data in this phase are linked to
this fixed low location.
. It makes a bootstrap pagetable to map itself to a
fixed high location (also in linker script) and jumps to
the high address. All code and data then use this high addressing.
. All code/data symbols linked at the low addresses is prefixed by
an objcopy step with __k_unpaged_*, so that that code cannot
reference highly-linked symbols (which aren't valid yet) or vice
versa (symbols that aren't valid any more).
. The two addressing modes are separated in the linker script by
collecting the unpaged_*.o objects and linking them with low
addresses, and linking the rest high. Some objects are linked
twice, once low and once high.
. The bootstrap phase passes a lot of information (e.g. free memory
list, physical location of the modules, etc.) using the kinfo
struct.
. After this bootstrap the low-linked part is freed.
. The kernel maps in VM into the bootstrap page table so that VM can
begin executing. Its first job is to make page tables for all other
boot processes. So VM runs before RS, and RS gets a fully dynamic,
VM-managed address space. VM gets its privilege info from RS as usual
but that happens after RS starts running.
. Both the kernel loading VM and VM organizing boot processes happen
using the libexec logic. This removes the last reason for VM to
still know much about exec() and vm/exec.c is gone.
Further Implementation:
. All segments are based at 0 and have a 4 GB limit.
. The kernel is mapped in at the top of the virtual address
space so as not to constrain the user processes.
. Processes do not use segments from the LDT at all; there are
no segments in the LDT any more, so no LLDT is needed.
. The Minix segments T/D/S are gone and so none of the
user-space or in-kernel copy functions use them. The copy
functions use a process endpoint of NONE to realize it's
a physical address, virtual otherwise.
. The umap call only makes sense to translate a virtual address
to a physical address now.
. Segments-related calls like newmap and alloc_segments are gone.
. All segments-related translation in VM is gone (vir2map etc).
. Initialization in VM is simpler as no moving around is necessary.
. VM and all other boot processes can be linked wherever they wish
and will be mapped in at the right location by the kernel and VM
respectively.
Other changes:
. The multiboot code is less special: it does not use mb_print
for its diagnostics any more but uses printf() as normal, saving
the output into the diagnostics buffer, only printing to the
screen using the direct print functions if a panic() occurs.
. The multiboot code uses the flexible 'free memory map list'
style to receive the list of free memory if available.
. The kernel determines the memory layout of the processes to
a degree: it tells VM where the kernel starts and ends and
where the kernel wants the top of the process to be. VM then
uses this entire range, i.e. the stack is right at the top,
and mmap()ped bits of memory are placed below that downwards,
and the break grows upwards.
Other Consequences:
. Every process gets its own page table as address spaces
can't be separated any more by segments.
. As all segments are 0-based, there is no distinction between
virtual and linear addresses, nor between userspace and
kernel addresses.
. Less work is done when context switching, leading to a net
performance increase. (8% faster on my machine for 'make servers'.)
. The layout and configuration of the GDT makes sysenter and syscall
possible.
2012-05-07 16:03:35 +02:00
|
|
|
/* Find a space in the virtual address space of VM. */
|
2011-06-01 11:30:58 +02:00
|
|
|
u32_t curv;
|
2008-11-19 13:26:10 +01:00
|
|
|
int pde = 0, try_restart;
|
2009-09-23 18:02:27 +02:00
|
|
|
static u32_t lastv = 0;
|
No more intel/minix segments.
This commit removes all traces of Minix segments (the text/data/stack
memory map abstraction in the kernel) and significance of Intel segments
(hardware segments like CS, DS that add offsets to all addressing before
page table translation). This ultimately simplifies the memory layout
and addressing and makes the same layout possible on non-Intel
architectures.
There are only two types of addresses in the world now: virtual
and physical; even the kernel and processes have the same virtual
address space. Kernel and user processes can be distinguished at a
glance as processes won't use 0xF0000000 and above.
No static pre-allocated memory sizes exist any more.
Changes to booting:
. The pre_init.c leaves the kernel and modules exactly as
they were left by the bootloader in physical memory
. The kernel starts running using physical addressing,
loaded at a fixed location given in its linker script by the
bootloader. All code and data in this phase are linked to
this fixed low location.
. It makes a bootstrap pagetable to map itself to a
fixed high location (also in linker script) and jumps to
the high address. All code and data then use this high addressing.
. All code/data symbols linked at the low addresses is prefixed by
an objcopy step with __k_unpaged_*, so that that code cannot
reference highly-linked symbols (which aren't valid yet) or vice
versa (symbols that aren't valid any more).
. The two addressing modes are separated in the linker script by
collecting the unpaged_*.o objects and linking them with low
addresses, and linking the rest high. Some objects are linked
twice, once low and once high.
. The bootstrap phase passes a lot of information (e.g. free memory
list, physical location of the modules, etc.) using the kinfo
struct.
. After this bootstrap the low-linked part is freed.
. The kernel maps in VM into the bootstrap page table so that VM can
begin executing. Its first job is to make page tables for all other
boot processes. So VM runs before RS, and RS gets a fully dynamic,
VM-managed address space. VM gets its privilege info from RS as usual
but that happens after RS starts running.
. Both the kernel loading VM and VM organizing boot processes happen
using the libexec logic. This removes the last reason for VM to
still know much about exec() and vm/exec.c is gone.
Further Implementation:
. All segments are based at 0 and have a 4 GB limit.
. The kernel is mapped in at the top of the virtual address
space so as not to constrain the user processes.
. Processes do not use segments from the LDT at all; there are
no segments in the LDT any more, so no LLDT is needed.
. The Minix segments T/D/S are gone and so none of the
user-space or in-kernel copy functions use them. The copy
functions use a process endpoint of NONE to realize it's
a physical address, virtual otherwise.
. The umap call only makes sense to translate a virtual address
to a physical address now.
. Segments-related calls like newmap and alloc_segments are gone.
. All segments-related translation in VM is gone (vir2map etc).
. Initialization in VM is simpler as no moving around is necessary.
. VM and all other boot processes can be linked wherever they wish
and will be mapped in at the right location by the kernel and VM
respectively.
Other changes:
. The multiboot code is less special: it does not use mb_print
for its diagnostics any more but uses printf() as normal, saving
the output into the diagnostics buffer, only printing to the
screen using the direct print functions if a panic() occurs.
. The multiboot code uses the flexible 'free memory map list'
style to receive the list of free memory if available.
. The kernel determines the memory layout of the processes to
a degree: it tells VM where the kernel starts and ends and
where the kernel wants the top of the process to be. VM then
uses this entire range, i.e. the stack is right at the top,
and mmap()ped bits of memory are placed below that downwards,
and the break grows upwards.
Other Consequences:
. Every process gets its own page table as address spaces
can't be separated any more by segments.
. As all segments are 0-based, there is no distinction between
virtual and linear addresses, nor between userspace and
kernel addresses.
. Less work is done when context switching, leading to a net
performance increase. (8% faster on my machine for 'make servers'.)
. The layout and configuration of the GDT makes sysenter and syscall
possible.
2012-05-07 16:03:35 +02:00
|
|
|
pt_t *pt = &vmprocess->vm_pt;
|
|
|
|
vir_bytes vmin, vmax;
|
2013-02-24 22:00:52 +01:00
|
|
|
u32_t holev = NO_MEM;
|
|
|
|
int holesize = -1;
|
No more intel/minix segments.
This commit removes all traces of Minix segments (the text/data/stack
memory map abstraction in the kernel) and significance of Intel segments
(hardware segments like CS, DS that add offsets to all addressing before
page table translation). This ultimately simplifies the memory layout
and addressing and makes the same layout possible on non-Intel
architectures.
There are only two types of addresses in the world now: virtual
and physical; even the kernel and processes have the same virtual
address space. Kernel and user processes can be distinguished at a
glance as processes won't use 0xF0000000 and above.
No static pre-allocated memory sizes exist any more.
Changes to booting:
. The pre_init.c leaves the kernel and modules exactly as
they were left by the bootloader in physical memory
. The kernel starts running using physical addressing,
loaded at a fixed location given in its linker script by the
bootloader. All code and data in this phase are linked to
this fixed low location.
. It makes a bootstrap pagetable to map itself to a
fixed high location (also in linker script) and jumps to
the high address. All code and data then use this high addressing.
. All code/data symbols linked at the low addresses is prefixed by
an objcopy step with __k_unpaged_*, so that that code cannot
reference highly-linked symbols (which aren't valid yet) or vice
versa (symbols that aren't valid any more).
. The two addressing modes are separated in the linker script by
collecting the unpaged_*.o objects and linking them with low
addresses, and linking the rest high. Some objects are linked
twice, once low and once high.
. The bootstrap phase passes a lot of information (e.g. free memory
list, physical location of the modules, etc.) using the kinfo
struct.
. After this bootstrap the low-linked part is freed.
. The kernel maps in VM into the bootstrap page table so that VM can
begin executing. Its first job is to make page tables for all other
boot processes. So VM runs before RS, and RS gets a fully dynamic,
VM-managed address space. VM gets its privilege info from RS as usual
but that happens after RS starts running.
. Both the kernel loading VM and VM organizing boot processes happen
using the libexec logic. This removes the last reason for VM to
still know much about exec() and vm/exec.c is gone.
Further Implementation:
. All segments are based at 0 and have a 4 GB limit.
. The kernel is mapped in at the top of the virtual address
space so as not to constrain the user processes.
. Processes do not use segments from the LDT at all; there are
no segments in the LDT any more, so no LLDT is needed.
. The Minix segments T/D/S are gone and so none of the
user-space or in-kernel copy functions use them. The copy
functions use a process endpoint of NONE to realize it's
a physical address, virtual otherwise.
. The umap call only makes sense to translate a virtual address
to a physical address now.
. Segments-related calls like newmap and alloc_segments are gone.
. All segments-related translation in VM is gone (vir2map etc).
. Initialization in VM is simpler as no moving around is necessary.
. VM and all other boot processes can be linked wherever they wish
and will be mapped in at the right location by the kernel and VM
respectively.
Other changes:
. The multiboot code is less special: it does not use mb_print
for its diagnostics any more but uses printf() as normal, saving
the output into the diagnostics buffer, only printing to the
screen using the direct print functions if a panic() occurs.
. The multiboot code uses the flexible 'free memory map list'
style to receive the list of free memory if available.
. The kernel determines the memory layout of the processes to
a degree: it tells VM where the kernel starts and ends and
where the kernel wants the top of the process to be. VM then
uses this entire range, i.e. the stack is right at the top,
and mmap()ped bits of memory are placed below that downwards,
and the break grows upwards.
Other Consequences:
. Every process gets its own page table as address spaces
can't be separated any more by segments.
. As all segments are 0-based, there is no distinction between
virtual and linear addresses, nor between userspace and
kernel addresses.
. Less work is done when context switching, leading to a net
performance increase. (8% faster on my machine for 'make servers'.)
. The layout and configuration of the GDT makes sysenter and syscall
possible.
2012-05-07 16:03:35 +02:00
|
|
|
|
2012-12-17 19:26:52 +01:00
|
|
|
vmin = (vir_bytes) (&_end); /* marks end of VM BSS */
|
|
|
|
vmin += 1024*1024*1024; /* reserve 1GB virtual address space for VM heap */
|
|
|
|
vmin &= ARCH_VM_ADDR_MASK;
|
2013-02-24 22:00:52 +01:00
|
|
|
vmax = vmin + 100 * 1024 * 1024; /* allow 100MB of address space for VM */
|
2008-11-19 13:26:10 +01:00
|
|
|
|
|
|
|
/* Input sanity check. */
|
2012-10-31 19:24:14 +01:00
|
|
|
assert(vmin + VM_PAGE_SIZE >= vmin);
|
|
|
|
assert(vmax >= vmin + VM_PAGE_SIZE);
|
|
|
|
assert((vmin % VM_PAGE_SIZE) == 0);
|
|
|
|
assert((vmax % VM_PAGE_SIZE) == 0);
|
|
|
|
assert(pages > 0);
|
2008-11-19 13:26:10 +01:00
|
|
|
|
2009-09-23 18:02:27 +02:00
|
|
|
curv = lastv;
|
|
|
|
if(curv < vmin || curv >= vmax)
|
|
|
|
curv = vmin;
|
2013-02-01 17:48:40 +01:00
|
|
|
|
2009-09-23 15:33:01 +02:00
|
|
|
try_restart = 1;
|
2008-11-19 13:26:10 +01:00
|
|
|
|
2009-09-23 15:33:01 +02:00
|
|
|
/* Start looking for a free page starting at vmin. */
|
|
|
|
while(curv < vmax) {
|
|
|
|
int pte;
|
2008-11-19 13:26:10 +01:00
|
|
|
|
2010-04-12 14:37:28 +02:00
|
|
|
assert(curv >= vmin);
|
|
|
|
assert(curv < vmax);
|
2008-11-19 13:26:10 +01:00
|
|
|
|
2013-03-06 13:17:07 +01:00
|
|
|
pde = ARCH_VM_PDE(curv);
|
|
|
|
pte = ARCH_VM_PTE(curv);
|
2008-11-19 13:26:10 +01:00
|
|
|
|
2013-02-24 22:00:52 +01:00
|
|
|
if((pt->pt_dir[pde] & ARCH_VM_PDE_PRESENT) &&
|
|
|
|
(pt->pt_pt[pde][pte] & ARCH_VM_PTE_PRESENT)) {
|
|
|
|
/* there is a page here - so keep looking for holes */
|
|
|
|
holev = NO_MEM;
|
|
|
|
holesize = 0;
|
|
|
|
} else {
|
|
|
|
/* there is no page here - so we have a hole, a bigger
|
|
|
|
* one if we already had one
|
|
|
|
*/
|
|
|
|
if(holev == NO_MEM) {
|
|
|
|
holev = curv;
|
|
|
|
holesize = 1;
|
|
|
|
} else holesize++;
|
|
|
|
|
|
|
|
assert(holesize > 0);
|
|
|
|
assert(holesize <= pages);
|
|
|
|
|
|
|
|
/* if it's big enough, return it */
|
|
|
|
if(holesize == pages) {
|
|
|
|
lastv = curv + VM_PAGE_SIZE;
|
|
|
|
return holev;
|
|
|
|
}
|
2008-11-19 13:26:10 +01:00
|
|
|
}
|
|
|
|
|
2012-10-31 19:24:14 +01:00
|
|
|
curv+=VM_PAGE_SIZE;
|
2008-11-19 13:26:10 +01:00
|
|
|
|
2013-02-24 22:00:52 +01:00
|
|
|
/* if we reached the limit, start scanning from the beginning if
|
|
|
|
* we haven't looked there yet
|
|
|
|
*/
|
2008-11-19 13:26:10 +01:00
|
|
|
if(curv >= vmax && try_restart) {
|
|
|
|
try_restart = 0;
|
2013-02-24 22:00:52 +01:00
|
|
|
curv = vmin;
|
2008-11-19 13:26:10 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-09-23 15:33:01 +02:00
|
|
|
printf("VM: out of virtual address space in vm\n");
|
2008-11-19 13:26:10 +01:00
|
|
|
|
|
|
|
return NO_MEM;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*===========================================================================*
|
|
|
|
* vm_freepages *
|
|
|
|
*===========================================================================*/
|
2012-09-18 13:17:50 +02:00
|
|
|
void vm_freepages(vir_bytes vir, int pages)
|
2008-11-19 13:26:10 +01:00
|
|
|
{
|
2012-10-31 19:24:14 +01:00
|
|
|
assert(!(vir % VM_PAGE_SIZE));
|
No more intel/minix segments.
This commit removes all traces of Minix segments (the text/data/stack
memory map abstraction in the kernel) and significance of Intel segments
(hardware segments like CS, DS that add offsets to all addressing before
page table translation). This ultimately simplifies the memory layout
and addressing and makes the same layout possible on non-Intel
architectures.
There are only two types of addresses in the world now: virtual
and physical; even the kernel and processes have the same virtual
address space. Kernel and user processes can be distinguished at a
glance as processes won't use 0xF0000000 and above.
No static pre-allocated memory sizes exist any more.
Changes to booting:
. The pre_init.c leaves the kernel and modules exactly as
they were left by the bootloader in physical memory
. The kernel starts running using physical addressing,
loaded at a fixed location given in its linker script by the
bootloader. All code and data in this phase are linked to
this fixed low location.
. It makes a bootstrap pagetable to map itself to a
fixed high location (also in linker script) and jumps to
the high address. All code and data then use this high addressing.
. All code/data symbols linked at the low addresses is prefixed by
an objcopy step with __k_unpaged_*, so that that code cannot
reference highly-linked symbols (which aren't valid yet) or vice
versa (symbols that aren't valid any more).
. The two addressing modes are separated in the linker script by
collecting the unpaged_*.o objects and linking them with low
addresses, and linking the rest high. Some objects are linked
twice, once low and once high.
. The bootstrap phase passes a lot of information (e.g. free memory
list, physical location of the modules, etc.) using the kinfo
struct.
. After this bootstrap the low-linked part is freed.
. The kernel maps in VM into the bootstrap page table so that VM can
begin executing. Its first job is to make page tables for all other
boot processes. So VM runs before RS, and RS gets a fully dynamic,
VM-managed address space. VM gets its privilege info from RS as usual
but that happens after RS starts running.
. Both the kernel loading VM and VM organizing boot processes happen
using the libexec logic. This removes the last reason for VM to
still know much about exec() and vm/exec.c is gone.
Further Implementation:
. All segments are based at 0 and have a 4 GB limit.
. The kernel is mapped in at the top of the virtual address
space so as not to constrain the user processes.
. Processes do not use segments from the LDT at all; there are
no segments in the LDT any more, so no LLDT is needed.
. The Minix segments T/D/S are gone and so none of the
user-space or in-kernel copy functions use them. The copy
functions use a process endpoint of NONE to realize it's
a physical address, virtual otherwise.
. The umap call only makes sense to translate a virtual address
to a physical address now.
. Segments-related calls like newmap and alloc_segments are gone.
. All segments-related translation in VM is gone (vir2map etc).
. Initialization in VM is simpler as no moving around is necessary.
. VM and all other boot processes can be linked wherever they wish
and will be mapped in at the right location by the kernel and VM
respectively.
Other changes:
. The multiboot code is less special: it does not use mb_print
for its diagnostics any more but uses printf() as normal, saving
the output into the diagnostics buffer, only printing to the
screen using the direct print functions if a panic() occurs.
. The multiboot code uses the flexible 'free memory map list'
style to receive the list of free memory if available.
. The kernel determines the memory layout of the processes to
a degree: it tells VM where the kernel starts and ends and
where the kernel wants the top of the process to be. VM then
uses this entire range, i.e. the stack is right at the top,
and mmap()ped bits of memory are placed below that downwards,
and the break grows upwards.
Other Consequences:
. Every process gets its own page table as address spaces
can't be separated any more by segments.
. As all segments are 0-based, there is no distinction between
virtual and linear addresses, nor between userspace and
kernel addresses.
. Less work is done when context switching, leading to a net
performance increase. (8% faster on my machine for 'make servers'.)
. The layout and configuration of the GDT makes sysenter and syscall
possible.
2012-05-07 16:03:35 +02:00
|
|
|
|
2012-09-18 22:19:22 +02:00
|
|
|
if(is_staticaddr(vir)) {
|
No more intel/minix segments.
This commit removes all traces of Minix segments (the text/data/stack
memory map abstraction in the kernel) and significance of Intel segments
(hardware segments like CS, DS that add offsets to all addressing before
page table translation). This ultimately simplifies the memory layout
and addressing and makes the same layout possible on non-Intel
architectures.
There are only two types of addresses in the world now: virtual
and physical; even the kernel and processes have the same virtual
address space. Kernel and user processes can be distinguished at a
glance as processes won't use 0xF0000000 and above.
No static pre-allocated memory sizes exist any more.
Changes to booting:
. The pre_init.c leaves the kernel and modules exactly as
they were left by the bootloader in physical memory
. The kernel starts running using physical addressing,
loaded at a fixed location given in its linker script by the
bootloader. All code and data in this phase are linked to
this fixed low location.
. It makes a bootstrap pagetable to map itself to a
fixed high location (also in linker script) and jumps to
the high address. All code and data then use this high addressing.
. All code/data symbols linked at the low addresses is prefixed by
an objcopy step with __k_unpaged_*, so that that code cannot
reference highly-linked symbols (which aren't valid yet) or vice
versa (symbols that aren't valid any more).
. The two addressing modes are separated in the linker script by
collecting the unpaged_*.o objects and linking them with low
addresses, and linking the rest high. Some objects are linked
twice, once low and once high.
. The bootstrap phase passes a lot of information (e.g. free memory
list, physical location of the modules, etc.) using the kinfo
struct.
. After this bootstrap the low-linked part is freed.
. The kernel maps in VM into the bootstrap page table so that VM can
begin executing. Its first job is to make page tables for all other
boot processes. So VM runs before RS, and RS gets a fully dynamic,
VM-managed address space. VM gets its privilege info from RS as usual
but that happens after RS starts running.
. Both the kernel loading VM and VM organizing boot processes happen
using the libexec logic. This removes the last reason for VM to
still know much about exec() and vm/exec.c is gone.
Further Implementation:
. All segments are based at 0 and have a 4 GB limit.
. The kernel is mapped in at the top of the virtual address
space so as not to constrain the user processes.
. Processes do not use segments from the LDT at all; there are
no segments in the LDT any more, so no LLDT is needed.
. The Minix segments T/D/S are gone and so none of the
user-space or in-kernel copy functions use them. The copy
functions use a process endpoint of NONE to realize it's
a physical address, virtual otherwise.
. The umap call only makes sense to translate a virtual address
to a physical address now.
. Segments-related calls like newmap and alloc_segments are gone.
. All segments-related translation in VM is gone (vir2map etc).
. Initialization in VM is simpler as no moving around is necessary.
. VM and all other boot processes can be linked wherever they wish
and will be mapped in at the right location by the kernel and VM
respectively.
Other changes:
. The multiboot code is less special: it does not use mb_print
for its diagnostics any more but uses printf() as normal, saving
the output into the diagnostics buffer, only printing to the
screen using the direct print functions if a panic() occurs.
. The multiboot code uses the flexible 'free memory map list'
style to receive the list of free memory if available.
. The kernel determines the memory layout of the processes to
a degree: it tells VM where the kernel starts and ends and
where the kernel wants the top of the process to be. VM then
uses this entire range, i.e. the stack is right at the top,
and mmap()ped bits of memory are placed below that downwards,
and the break grows upwards.
Other Consequences:
. Every process gets its own page table as address spaces
can't be separated any more by segments.
. As all segments are 0-based, there is no distinction between
virtual and linear addresses, nor between userspace and
kernel addresses.
. Less work is done when context switching, leading to a net
performance increase. (8% faster on my machine for 'make servers'.)
. The layout and configuration of the GDT makes sysenter and syscall
possible.
2012-05-07 16:03:35 +02:00
|
|
|
printf("VM: not freeing static page\n");
|
|
|
|
return;
|
2008-11-19 13:26:10 +01:00
|
|
|
}
|
2010-04-12 13:25:24 +02:00
|
|
|
|
No more intel/minix segments.
This commit removes all traces of Minix segments (the text/data/stack
memory map abstraction in the kernel) and significance of Intel segments
(hardware segments like CS, DS that add offsets to all addressing before
page table translation). This ultimately simplifies the memory layout
and addressing and makes the same layout possible on non-Intel
architectures.
There are only two types of addresses in the world now: virtual
and physical; even the kernel and processes have the same virtual
address space. Kernel and user processes can be distinguished at a
glance as processes won't use 0xF0000000 and above.
No static pre-allocated memory sizes exist any more.
Changes to booting:
. The pre_init.c leaves the kernel and modules exactly as
they were left by the bootloader in physical memory
. The kernel starts running using physical addressing,
loaded at a fixed location given in its linker script by the
bootloader. All code and data in this phase are linked to
this fixed low location.
. It makes a bootstrap pagetable to map itself to a
fixed high location (also in linker script) and jumps to
the high address. All code and data then use this high addressing.
. All code/data symbols linked at the low addresses is prefixed by
an objcopy step with __k_unpaged_*, so that that code cannot
reference highly-linked symbols (which aren't valid yet) or vice
versa (symbols that aren't valid any more).
. The two addressing modes are separated in the linker script by
collecting the unpaged_*.o objects and linking them with low
addresses, and linking the rest high. Some objects are linked
twice, once low and once high.
. The bootstrap phase passes a lot of information (e.g. free memory
list, physical location of the modules, etc.) using the kinfo
struct.
. After this bootstrap the low-linked part is freed.
. The kernel maps in VM into the bootstrap page table so that VM can
begin executing. Its first job is to make page tables for all other
boot processes. So VM runs before RS, and RS gets a fully dynamic,
VM-managed address space. VM gets its privilege info from RS as usual
but that happens after RS starts running.
. Both the kernel loading VM and VM organizing boot processes happen
using the libexec logic. This removes the last reason for VM to
still know much about exec() and vm/exec.c is gone.
Further Implementation:
. All segments are based at 0 and have a 4 GB limit.
. The kernel is mapped in at the top of the virtual address
space so as not to constrain the user processes.
. Processes do not use segments from the LDT at all; there are
no segments in the LDT any more, so no LLDT is needed.
. The Minix segments T/D/S are gone and so none of the
user-space or in-kernel copy functions use them. The copy
functions use a process endpoint of NONE to realize it's
a physical address, virtual otherwise.
. The umap call only makes sense to translate a virtual address
to a physical address now.
. Segments-related calls like newmap and alloc_segments are gone.
. All segments-related translation in VM is gone (vir2map etc).
. Initialization in VM is simpler as no moving around is necessary.
. VM and all other boot processes can be linked wherever they wish
and will be mapped in at the right location by the kernel and VM
respectively.
Other changes:
. The multiboot code is less special: it does not use mb_print
for its diagnostics any more but uses printf() as normal, saving
the output into the diagnostics buffer, only printing to the
screen using the direct print functions if a panic() occurs.
. The multiboot code uses the flexible 'free memory map list'
style to receive the list of free memory if available.
. The kernel determines the memory layout of the processes to
a degree: it tells VM where the kernel starts and ends and
where the kernel wants the top of the process to be. VM then
uses this entire range, i.e. the stack is right at the top,
and mmap()ped bits of memory are placed below that downwards,
and the break grows upwards.
Other Consequences:
. Every process gets its own page table as address spaces
can't be separated any more by segments.
. As all segments are 0-based, there is no distinction between
virtual and linear addresses, nor between userspace and
kernel addresses.
. Less work is done when context switching, leading to a net
performance increase. (8% faster on my machine for 'make servers'.)
. The layout and configuration of the GDT makes sysenter and syscall
possible.
2012-05-07 16:03:35 +02:00
|
|
|
if(pt_writemap(vmprocess, &vmprocess->vm_pt, vir,
|
2012-10-31 19:24:14 +01:00
|
|
|
MAP_NONE, pages*VM_PAGE_SIZE, 0,
|
2012-09-18 13:17:50 +02:00
|
|
|
WMF_OVERWRITE | WMF_FREE) != OK)
|
No more intel/minix segments.
This commit removes all traces of Minix segments (the text/data/stack
memory map abstraction in the kernel) and significance of Intel segments
(hardware segments like CS, DS that add offsets to all addressing before
page table translation). This ultimately simplifies the memory layout
and addressing and makes the same layout possible on non-Intel
architectures.
There are only two types of addresses in the world now: virtual
and physical; even the kernel and processes have the same virtual
address space. Kernel and user processes can be distinguished at a
glance as processes won't use 0xF0000000 and above.
No static pre-allocated memory sizes exist any more.
Changes to booting:
. The pre_init.c leaves the kernel and modules exactly as
they were left by the bootloader in physical memory
. The kernel starts running using physical addressing,
loaded at a fixed location given in its linker script by the
bootloader. All code and data in this phase are linked to
this fixed low location.
. It makes a bootstrap pagetable to map itself to a
fixed high location (also in linker script) and jumps to
the high address. All code and data then use this high addressing.
. All code/data symbols linked at the low addresses is prefixed by
an objcopy step with __k_unpaged_*, so that that code cannot
reference highly-linked symbols (which aren't valid yet) or vice
versa (symbols that aren't valid any more).
. The two addressing modes are separated in the linker script by
collecting the unpaged_*.o objects and linking them with low
addresses, and linking the rest high. Some objects are linked
twice, once low and once high.
. The bootstrap phase passes a lot of information (e.g. free memory
list, physical location of the modules, etc.) using the kinfo
struct.
. After this bootstrap the low-linked part is freed.
. The kernel maps in VM into the bootstrap page table so that VM can
begin executing. Its first job is to make page tables for all other
boot processes. So VM runs before RS, and RS gets a fully dynamic,
VM-managed address space. VM gets its privilege info from RS as usual
but that happens after RS starts running.
. Both the kernel loading VM and VM organizing boot processes happen
using the libexec logic. This removes the last reason for VM to
still know much about exec() and vm/exec.c is gone.
Further Implementation:
. All segments are based at 0 and have a 4 GB limit.
. The kernel is mapped in at the top of the virtual address
space so as not to constrain the user processes.
. Processes do not use segments from the LDT at all; there are
no segments in the LDT any more, so no LLDT is needed.
. The Minix segments T/D/S are gone and so none of the
user-space or in-kernel copy functions use them. The copy
functions use a process endpoint of NONE to realize it's
a physical address, virtual otherwise.
. The umap call only makes sense to translate a virtual address
to a physical address now.
. Segments-related calls like newmap and alloc_segments are gone.
. All segments-related translation in VM is gone (vir2map etc).
. Initialization in VM is simpler as no moving around is necessary.
. VM and all other boot processes can be linked wherever they wish
and will be mapped in at the right location by the kernel and VM
respectively.
Other changes:
. The multiboot code is less special: it does not use mb_print
for its diagnostics any more but uses printf() as normal, saving
the output into the diagnostics buffer, only printing to the
screen using the direct print functions if a panic() occurs.
. The multiboot code uses the flexible 'free memory map list'
style to receive the list of free memory if available.
. The kernel determines the memory layout of the processes to
a degree: it tells VM where the kernel starts and ends and
where the kernel wants the top of the process to be. VM then
uses this entire range, i.e. the stack is right at the top,
and mmap()ped bits of memory are placed below that downwards,
and the break grows upwards.
Other Consequences:
. Every process gets its own page table as address spaces
can't be separated any more by segments.
. As all segments are 0-based, there is no distinction between
virtual and linear addresses, nor between userspace and
kernel addresses.
. Less work is done when context switching, leading to a net
performance increase. (8% faster on my machine for 'make servers'.)
. The layout and configuration of the GDT makes sysenter and syscall
possible.
2012-05-07 16:03:35 +02:00
|
|
|
panic("vm_freepages: pt_writemap failed");
|
|
|
|
|
2012-09-18 22:19:22 +02:00
|
|
|
vm_self_pages--;
|
|
|
|
|
2010-04-12 13:25:24 +02:00
|
|
|
#if SANITYCHECKS
|
|
|
|
/* If SANITYCHECKS are on, flush tlb so accessing freed pages is
|
|
|
|
* always trapped, also if not in tlb.
|
|
|
|
*/
|
|
|
|
if((sys_vmctl(SELF, VMCTL_FLUSHTLB, 0)) != OK) {
|
|
|
|
panic("VMCTL_FLUSHTLB failed");
|
|
|
|
}
|
|
|
|
#endif
|
2008-11-19 13:26:10 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/*===========================================================================*
|
|
|
|
* vm_getsparepage *
|
|
|
|
*===========================================================================*/
|
2012-03-25 20:25:53 +02:00
|
|
|
static void *vm_getsparepage(phys_bytes *phys)
|
2008-11-19 13:26:10 +01:00
|
|
|
{
|
2012-12-29 01:16:33 +01:00
|
|
|
void *ptr;
|
|
|
|
if(reservedqueue_alloc(spare_pagequeue, phys, &ptr) != OK) {
|
|
|
|
printf("vm_getsparepage: no spare found\n");
|
|
|
|
return NULL;
|
2008-11-19 13:26:10 +01:00
|
|
|
}
|
2012-12-29 01:16:33 +01:00
|
|
|
assert(ptr);
|
|
|
|
return ptr;
|
2008-11-19 13:26:10 +01:00
|
|
|
}
|
|
|
|
|
2012-10-31 19:24:14 +01:00
|
|
|
/*===========================================================================*
|
|
|
|
* vm_getsparepagedir *
|
|
|
|
*===========================================================================*/
|
|
|
|
static void *vm_getsparepagedir(phys_bytes *phys)
|
|
|
|
{
|
|
|
|
int s;
|
|
|
|
assert(missing_sparedirs >= 0 && missing_sparedirs <= SPAREPAGEDIRS);
|
|
|
|
for(s = 0; s < SPAREPAGEDIRS; s++) {
|
|
|
|
if(sparepagedirs[s].pagedir) {
|
|
|
|
void *sp;
|
|
|
|
sp = sparepagedirs[s].pagedir;
|
|
|
|
*phys = sparepagedirs[s].phys;
|
|
|
|
sparepagedirs[s].pagedir = NULL;
|
|
|
|
missing_sparedirs++;
|
|
|
|
assert(missing_sparedirs >= 0 && missing_sparedirs <= SPAREPAGEDIRS);
|
|
|
|
return sp;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2012-12-29 01:16:33 +01:00
|
|
|
void *vm_mappages(phys_bytes p, int pages)
|
2008-11-19 13:26:10 +01:00
|
|
|
{
|
2012-12-29 01:16:33 +01:00
|
|
|
vir_bytes loc;
|
|
|
|
int r;
|
|
|
|
pt_t *pt = &vmprocess->vm_pt;
|
|
|
|
|
|
|
|
/* Where in our virtual address space can we put it? */
|
|
|
|
loc = findhole(pages);
|
|
|
|
if(loc == NO_MEM) {
|
|
|
|
printf("vm_mappages: findhole failed\n");
|
|
|
|
return NULL;
|
2008-11-19 13:26:10 +01:00
|
|
|
}
|
2009-09-21 16:49:49 +02:00
|
|
|
|
2012-12-29 01:16:33 +01:00
|
|
|
/* Map this page into our address space. */
|
|
|
|
if((r=pt_writemap(vmprocess, pt, loc, p, VM_PAGE_SIZE*pages,
|
|
|
|
ARCH_VM_PTE_PRESENT | ARCH_VM_PTE_USER | ARCH_VM_PTE_RW
|
|
|
|
#if defined(__arm__)
|
2013-09-13 09:40:20 +02:00
|
|
|
| ARM_VM_PTE_CACHED
|
2012-12-29 01:16:33 +01:00
|
|
|
#endif
|
|
|
|
, 0)) != OK) {
|
|
|
|
printf("vm_mappages writemap failed\n");
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if((r=sys_vmctl(SELF, VMCTL_FLUSHTLB, 0)) != OK) {
|
|
|
|
panic("VMCTL_FLUSHTLB failed: %d", r);
|
|
|
|
}
|
|
|
|
|
|
|
|
assert(loc);
|
|
|
|
|
|
|
|
return (void *) loc;
|
2008-11-19 13:26:10 +01:00
|
|
|
}
|
|
|
|
|
2012-09-18 13:17:48 +02:00
|
|
|
static int pt_init_done;
|
|
|
|
|
2008-11-19 13:26:10 +01:00
|
|
|
/*===========================================================================*
|
2009-09-23 15:33:01 +02:00
|
|
|
* vm_allocpage *
|
2008-11-19 13:26:10 +01:00
|
|
|
*===========================================================================*/
|
2012-12-17 19:26:52 +01:00
|
|
|
void *vm_allocpages(phys_bytes *phys, int reason, int pages)
|
2008-11-19 13:26:10 +01:00
|
|
|
{
|
2009-09-23 15:33:01 +02:00
|
|
|
/* Allocate a page for use by VM itself. */
|
2008-11-19 13:26:10 +01:00
|
|
|
phys_bytes newpage;
|
|
|
|
static int level = 0;
|
2009-09-23 15:33:01 +02:00
|
|
|
void *ret;
|
2012-12-17 19:26:52 +01:00
|
|
|
u32_t mem_flags = 0;
|
2008-11-19 13:26:10 +01:00
|
|
|
|
2010-04-12 14:37:28 +02:00
|
|
|
assert(reason >= 0 && reason < VMP_CATEGORIES);
|
2008-11-19 13:26:10 +01:00
|
|
|
|
2012-12-17 19:26:52 +01:00
|
|
|
assert(pages > 0);
|
|
|
|
|
2009-09-23 15:33:01 +02:00
|
|
|
level++;
|
2008-11-19 13:26:10 +01:00
|
|
|
|
2010-04-12 14:37:28 +02:00
|
|
|
assert(level >= 1);
|
|
|
|
assert(level <= 2);
|
2008-11-19 13:26:10 +01:00
|
|
|
|
2012-09-18 13:17:48 +02:00
|
|
|
if((level > 1) || !pt_init_done) {
|
2008-11-19 13:26:10 +01:00
|
|
|
void *s;
|
2012-12-17 19:26:52 +01:00
|
|
|
|
|
|
|
if(pages == 1) s=vm_getsparepage(phys);
|
|
|
|
else if(pages == 4) s=vm_getsparepagedir(phys);
|
|
|
|
else panic("%d pages", pages);
|
2012-10-31 19:24:14 +01:00
|
|
|
|
2008-11-19 13:26:10 +01:00
|
|
|
level--;
|
2009-09-23 15:33:01 +02:00
|
|
|
if(!s) {
|
2009-09-23 18:02:27 +02:00
|
|
|
util_stacktrace();
|
2009-09-23 15:33:01 +02:00
|
|
|
printf("VM: warning: out of spare pages\n");
|
|
|
|
}
|
2012-09-18 22:19:22 +02:00
|
|
|
if(!is_staticaddr(s)) vm_self_pages++;
|
2008-11-19 13:26:10 +01:00
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
2012-10-31 19:24:14 +01:00
|
|
|
#if defined(__arm__)
|
|
|
|
if (reason == VMP_PAGEDIR) {
|
2012-12-17 19:26:52 +01:00
|
|
|
mem_flags |= PAF_ALIGN16K;
|
2012-10-31 19:24:14 +01:00
|
|
|
}
|
|
|
|
#endif
|
2012-12-17 19:26:52 +01:00
|
|
|
|
2009-09-23 15:33:01 +02:00
|
|
|
/* Allocate page of memory for use by VM. As VM
|
2008-11-19 13:26:10 +01:00
|
|
|
* is trusted, we don't have to pre-clear it.
|
|
|
|
*/
|
2012-12-17 19:26:52 +01:00
|
|
|
if((newpage = alloc_mem(pages, mem_flags)) == NO_MEM) {
|
2008-11-19 13:26:10 +01:00
|
|
|
level--;
|
2010-04-12 13:25:24 +02:00
|
|
|
printf("VM: vm_allocpage: alloc_mem failed\n");
|
2008-11-19 13:26:10 +01:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
*phys = CLICK2ABS(newpage);
|
|
|
|
|
2012-12-29 01:16:33 +01:00
|
|
|
if(!(ret = vm_mappages(*phys, pages))) {
|
2009-09-23 17:19:36 +02:00
|
|
|
level--;
|
2012-12-29 01:16:33 +01:00
|
|
|
printf("VM: vm_allocpage: vm_mappages failed\n");
|
2008-11-19 13:26:10 +01:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
level--;
|
2012-09-18 22:19:22 +02:00
|
|
|
vm_self_pages++;
|
2012-12-29 01:16:33 +01:00
|
|
|
|
2009-09-23 15:33:01 +02:00
|
|
|
return ret;
|
2008-11-19 13:26:10 +01:00
|
|
|
}
|
|
|
|
|
2012-12-17 19:26:52 +01:00
|
|
|
void *vm_allocpage(phys_bytes *phys, int reason)
|
|
|
|
{
|
|
|
|
return vm_allocpages(phys, reason, 1);
|
|
|
|
}
|
|
|
|
|
2009-09-21 16:49:49 +02:00
|
|
|
/*===========================================================================*
|
|
|
|
* vm_pagelock *
|
|
|
|
*===========================================================================*/
|
2012-03-25 20:25:53 +02:00
|
|
|
void vm_pagelock(void *vir, int lockflag)
|
2009-09-21 16:49:49 +02:00
|
|
|
{
|
2009-09-23 15:33:01 +02:00
|
|
|
/* Mark a page allocated by vm_allocpage() unwritable, i.e. only for VM. */
|
No more intel/minix segments.
This commit removes all traces of Minix segments (the text/data/stack
memory map abstraction in the kernel) and significance of Intel segments
(hardware segments like CS, DS that add offsets to all addressing before
page table translation). This ultimately simplifies the memory layout
and addressing and makes the same layout possible on non-Intel
architectures.
There are only two types of addresses in the world now: virtual
and physical; even the kernel and processes have the same virtual
address space. Kernel and user processes can be distinguished at a
glance as processes won't use 0xF0000000 and above.
No static pre-allocated memory sizes exist any more.
Changes to booting:
. The pre_init.c leaves the kernel and modules exactly as
they were left by the bootloader in physical memory
. The kernel starts running using physical addressing,
loaded at a fixed location given in its linker script by the
bootloader. All code and data in this phase are linked to
this fixed low location.
. It makes a bootstrap pagetable to map itself to a
fixed high location (also in linker script) and jumps to
the high address. All code and data then use this high addressing.
. All code/data symbols linked at the low addresses is prefixed by
an objcopy step with __k_unpaged_*, so that that code cannot
reference highly-linked symbols (which aren't valid yet) or vice
versa (symbols that aren't valid any more).
. The two addressing modes are separated in the linker script by
collecting the unpaged_*.o objects and linking them with low
addresses, and linking the rest high. Some objects are linked
twice, once low and once high.
. The bootstrap phase passes a lot of information (e.g. free memory
list, physical location of the modules, etc.) using the kinfo
struct.
. After this bootstrap the low-linked part is freed.
. The kernel maps in VM into the bootstrap page table so that VM can
begin executing. Its first job is to make page tables for all other
boot processes. So VM runs before RS, and RS gets a fully dynamic,
VM-managed address space. VM gets its privilege info from RS as usual
but that happens after RS starts running.
. Both the kernel loading VM and VM organizing boot processes happen
using the libexec logic. This removes the last reason for VM to
still know much about exec() and vm/exec.c is gone.
Further Implementation:
. All segments are based at 0 and have a 4 GB limit.
. The kernel is mapped in at the top of the virtual address
space so as not to constrain the user processes.
. Processes do not use segments from the LDT at all; there are
no segments in the LDT any more, so no LLDT is needed.
. The Minix segments T/D/S are gone and so none of the
user-space or in-kernel copy functions use them. The copy
functions use a process endpoint of NONE to realize it's
a physical address, virtual otherwise.
. The umap call only makes sense to translate a virtual address
to a physical address now.
. Segments-related calls like newmap and alloc_segments are gone.
. All segments-related translation in VM is gone (vir2map etc).
. Initialization in VM is simpler as no moving around is necessary.
. VM and all other boot processes can be linked wherever they wish
and will be mapped in at the right location by the kernel and VM
respectively.
Other changes:
. The multiboot code is less special: it does not use mb_print
for its diagnostics any more but uses printf() as normal, saving
the output into the diagnostics buffer, only printing to the
screen using the direct print functions if a panic() occurs.
. The multiboot code uses the flexible 'free memory map list'
style to receive the list of free memory if available.
. The kernel determines the memory layout of the processes to
a degree: it tells VM where the kernel starts and ends and
where the kernel wants the top of the process to be. VM then
uses this entire range, i.e. the stack is right at the top,
and mmap()ped bits of memory are placed below that downwards,
and the break grows upwards.
Other Consequences:
. Every process gets its own page table as address spaces
can't be separated any more by segments.
. As all segments are 0-based, there is no distinction between
virtual and linear addresses, nor between userspace and
kernel addresses.
. Less work is done when context switching, leading to a net
performance increase. (8% faster on my machine for 'make servers'.)
. The layout and configuration of the GDT makes sysenter and syscall
possible.
2012-05-07 16:03:35 +02:00
|
|
|
vir_bytes m = (vir_bytes) vir;
|
2009-09-21 16:49:49 +02:00
|
|
|
int r;
|
2012-10-31 19:24:14 +01:00
|
|
|
u32_t flags = ARCH_VM_PTE_PRESENT | ARCH_VM_PTE_USER;
|
2009-09-21 16:49:49 +02:00
|
|
|
pt_t *pt;
|
|
|
|
|
2010-04-12 13:25:24 +02:00
|
|
|
pt = &vmprocess->vm_pt;
|
2009-09-21 16:49:49 +02:00
|
|
|
|
2012-10-31 19:24:14 +01:00
|
|
|
assert(!(m % VM_PAGE_SIZE));
|
2009-09-21 16:49:49 +02:00
|
|
|
|
|
|
|
if(!lockflag)
|
2012-10-31 19:24:14 +01:00
|
|
|
flags |= ARCH_VM_PTE_RW;
|
|
|
|
#if defined(__arm__)
|
|
|
|
else
|
|
|
|
flags |= ARCH_VM_PTE_RO;
|
2013-09-13 09:40:20 +02:00
|
|
|
|
|
|
|
flags |= ARM_VM_PTE_CACHED ;
|
2012-10-31 19:24:14 +01:00
|
|
|
#endif
|
2009-09-21 16:49:49 +02:00
|
|
|
|
|
|
|
/* Update flags. */
|
2012-10-31 19:24:14 +01:00
|
|
|
if((r=pt_writemap(vmprocess, pt, m, 0, VM_PAGE_SIZE,
|
2009-09-21 16:49:49 +02:00
|
|
|
flags, WMF_OVERWRITE | WMF_WRITEFLAGSONLY)) != OK) {
|
2010-03-05 16:05:11 +01:00
|
|
|
panic("vm_lockpage: pt_writemap failed");
|
2009-09-21 16:49:49 +02:00
|
|
|
}
|
|
|
|
|
2011-09-27 17:15:51 +02:00
|
|
|
if((r=sys_vmctl(SELF, VMCTL_FLUSHTLB, 0)) != OK) {
|
|
|
|
panic("VMCTL_FLUSHTLB failed: %d", r);
|
2009-09-21 16:49:49 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2010-04-12 13:25:24 +02:00
|
|
|
/*===========================================================================*
|
|
|
|
* vm_addrok *
|
|
|
|
*===========================================================================*/
|
2012-03-25 20:25:53 +02:00
|
|
|
int vm_addrok(void *vir, int writeflag)
|
2010-04-12 13:25:24 +02:00
|
|
|
{
|
|
|
|
pt_t *pt = &vmprocess->vm_pt;
|
|
|
|
int pde, pte;
|
No more intel/minix segments.
This commit removes all traces of Minix segments (the text/data/stack
memory map abstraction in the kernel) and significance of Intel segments
(hardware segments like CS, DS that add offsets to all addressing before
page table translation). This ultimately simplifies the memory layout
and addressing and makes the same layout possible on non-Intel
architectures.
There are only two types of addresses in the world now: virtual
and physical; even the kernel and processes have the same virtual
address space. Kernel and user processes can be distinguished at a
glance as processes won't use 0xF0000000 and above.
No static pre-allocated memory sizes exist any more.
Changes to booting:
. The pre_init.c leaves the kernel and modules exactly as
they were left by the bootloader in physical memory
. The kernel starts running using physical addressing,
loaded at a fixed location given in its linker script by the
bootloader. All code and data in this phase are linked to
this fixed low location.
. It makes a bootstrap pagetable to map itself to a
fixed high location (also in linker script) and jumps to
the high address. All code and data then use this high addressing.
. All code/data symbols linked at the low addresses is prefixed by
an objcopy step with __k_unpaged_*, so that that code cannot
reference highly-linked symbols (which aren't valid yet) or vice
versa (symbols that aren't valid any more).
. The two addressing modes are separated in the linker script by
collecting the unpaged_*.o objects and linking them with low
addresses, and linking the rest high. Some objects are linked
twice, once low and once high.
. The bootstrap phase passes a lot of information (e.g. free memory
list, physical location of the modules, etc.) using the kinfo
struct.
. After this bootstrap the low-linked part is freed.
. The kernel maps in VM into the bootstrap page table so that VM can
begin executing. Its first job is to make page tables for all other
boot processes. So VM runs before RS, and RS gets a fully dynamic,
VM-managed address space. VM gets its privilege info from RS as usual
but that happens after RS starts running.
. Both the kernel loading VM and VM organizing boot processes happen
using the libexec logic. This removes the last reason for VM to
still know much about exec() and vm/exec.c is gone.
Further Implementation:
. All segments are based at 0 and have a 4 GB limit.
. The kernel is mapped in at the top of the virtual address
space so as not to constrain the user processes.
. Processes do not use segments from the LDT at all; there are
no segments in the LDT any more, so no LLDT is needed.
. The Minix segments T/D/S are gone and so none of the
user-space or in-kernel copy functions use them. The copy
functions use a process endpoint of NONE to realize it's
a physical address, virtual otherwise.
. The umap call only makes sense to translate a virtual address
to a physical address now.
. Segments-related calls like newmap and alloc_segments are gone.
. All segments-related translation in VM is gone (vir2map etc).
. Initialization in VM is simpler as no moving around is necessary.
. VM and all other boot processes can be linked wherever they wish
and will be mapped in at the right location by the kernel and VM
respectively.
Other changes:
. The multiboot code is less special: it does not use mb_print
for its diagnostics any more but uses printf() as normal, saving
the output into the diagnostics buffer, only printing to the
screen using the direct print functions if a panic() occurs.
. The multiboot code uses the flexible 'free memory map list'
style to receive the list of free memory if available.
. The kernel determines the memory layout of the processes to
a degree: it tells VM where the kernel starts and ends and
where the kernel wants the top of the process to be. VM then
uses this entire range, i.e. the stack is right at the top,
and mmap()ped bits of memory are placed below that downwards,
and the break grows upwards.
Other Consequences:
. Every process gets its own page table as address spaces
can't be separated any more by segments.
. As all segments are 0-based, there is no distinction between
virtual and linear addresses, nor between userspace and
kernel addresses.
. Less work is done when context switching, leading to a net
performance increase. (8% faster on my machine for 'make servers'.)
. The layout and configuration of the GDT makes sysenter and syscall
possible.
2012-05-07 16:03:35 +02:00
|
|
|
vir_bytes v = (vir_bytes) vir;
|
2010-04-12 13:25:24 +02:00
|
|
|
|
2013-03-06 13:17:07 +01:00
|
|
|
pde = ARCH_VM_PDE(v);
|
|
|
|
pte = ARCH_VM_PTE(v);
|
2010-04-12 13:25:24 +02:00
|
|
|
|
2012-10-31 19:24:14 +01:00
|
|
|
if(!(pt->pt_dir[pde] & ARCH_VM_PDE_PRESENT)) {
|
2010-04-12 13:25:24 +02:00
|
|
|
printf("addr not ok: missing pde %d\n", pde);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2012-10-31 19:24:14 +01:00
|
|
|
#if defined(__i386__)
|
2010-04-12 13:25:24 +02:00
|
|
|
if(writeflag &&
|
2012-10-31 19:24:14 +01:00
|
|
|
!(pt->pt_dir[pde] & ARCH_VM_PTE_RW)) {
|
2010-04-12 13:25:24 +02:00
|
|
|
printf("addr not ok: pde %d present but pde unwritable\n", pde);
|
|
|
|
return 0;
|
|
|
|
}
|
2013-02-10 20:20:14 +01:00
|
|
|
#elif defined(__arm__)
|
|
|
|
if(writeflag &&
|
|
|
|
(pt->pt_dir[pde] & ARCH_VM_PTE_RO)) {
|
|
|
|
printf("addr not ok: pde %d present but pde unwritable\n", pde);
|
|
|
|
return 0;
|
|
|
|
}
|
2010-04-12 13:25:24 +02:00
|
|
|
|
2012-10-31 19:24:14 +01:00
|
|
|
#endif
|
|
|
|
if(!(pt->pt_pt[pde][pte] & ARCH_VM_PTE_PRESENT)) {
|
2010-04-12 13:25:24 +02:00
|
|
|
printf("addr not ok: missing pde %d / pte %d\n",
|
|
|
|
pde, pte);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2012-10-31 19:24:14 +01:00
|
|
|
#if defined(__i386__)
|
2010-04-12 13:25:24 +02:00
|
|
|
if(writeflag &&
|
2012-10-31 19:24:14 +01:00
|
|
|
!(pt->pt_pt[pde][pte] & ARCH_VM_PTE_RW)) {
|
2010-04-12 13:25:24 +02:00
|
|
|
printf("addr not ok: pde %d / pte %d present but unwritable\n",
|
2013-02-10 20:20:14 +01:00
|
|
|
pde, pte);
|
2012-10-31 19:24:14 +01:00
|
|
|
#elif defined(__arm__)
|
2013-02-10 20:20:14 +01:00
|
|
|
if(writeflag &&
|
|
|
|
(pt->pt_pt[pde][pte] & ARCH_VM_PTE_RO)) {
|
|
|
|
printf("addr not ok: pde %d / pte %d present but unwritable\n",
|
2010-04-12 13:25:24 +02:00
|
|
|
pde, pte);
|
2013-02-10 20:20:14 +01:00
|
|
|
#endif
|
2010-04-12 13:25:24 +02:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2008-11-19 13:26:10 +01:00
|
|
|
/*===========================================================================*
|
|
|
|
* pt_ptalloc *
|
|
|
|
*===========================================================================*/
|
2012-03-25 20:25:53 +02:00
|
|
|
static int pt_ptalloc(pt_t *pt, int pde, u32_t flags)
|
2008-11-19 13:26:10 +01:00
|
|
|
{
|
|
|
|
/* Allocate a page table and write its address into the page directory. */
|
|
|
|
int i;
|
2011-04-27 15:00:52 +02:00
|
|
|
phys_bytes pt_phys;
|
2014-02-25 15:04:02 +01:00
|
|
|
u32_t *p;
|
2008-11-19 13:26:10 +01:00
|
|
|
|
|
|
|
/* Argument must make sense. */
|
2012-10-31 19:24:14 +01:00
|
|
|
assert(pde >= 0 && pde < ARCH_VM_DIR_ENTRIES);
|
2010-04-12 14:37:28 +02:00
|
|
|
assert(!(flags & ~(PTF_ALLFLAGS)));
|
2008-11-19 13:26:10 +01:00
|
|
|
|
|
|
|
/* We don't expect to overwrite page directory entry, nor
|
|
|
|
* storage for the page table.
|
|
|
|
*/
|
2012-10-31 19:24:14 +01:00
|
|
|
assert(!(pt->pt_dir[pde] & ARCH_VM_PDE_PRESENT));
|
2010-04-12 14:37:28 +02:00
|
|
|
assert(!pt->pt_pt[pde]);
|
2008-11-19 13:26:10 +01:00
|
|
|
|
2014-02-25 15:04:02 +01:00
|
|
|
/* Get storage for the page table. The allocation call may in fact
|
|
|
|
* recursively create the directory entry as a side effect. In that
|
|
|
|
* case, we free the newly allocated page and do nothing else.
|
|
|
|
*/
|
|
|
|
if (!(p = vm_allocpage(&pt_phys, VMP_PAGETABLE)))
|
2008-11-19 13:26:10 +01:00
|
|
|
return ENOMEM;
|
2014-02-25 15:04:02 +01:00
|
|
|
if (pt->pt_pt[pde]) {
|
|
|
|
vm_freepages((vir_bytes) p, 1);
|
|
|
|
assert(pt->pt_pt[pde]);
|
|
|
|
return OK;
|
|
|
|
}
|
|
|
|
pt->pt_pt[pde] = p;
|
2008-11-19 13:26:10 +01:00
|
|
|
|
2012-10-31 19:24:14 +01:00
|
|
|
for(i = 0; i < ARCH_VM_PT_ENTRIES; i++)
|
2008-11-19 13:26:10 +01:00
|
|
|
pt->pt_pt[pde][i] = 0; /* Empty entry. */
|
|
|
|
|
|
|
|
/* Make page directory entry.
|
|
|
|
* The PDE is always 'present,' 'writable,' and 'user accessible,'
|
|
|
|
* relying on the PTE for protection.
|
|
|
|
*/
|
2012-10-31 19:24:14 +01:00
|
|
|
#if defined(__i386__)
|
|
|
|
pt->pt_dir[pde] = (pt_phys & ARCH_VM_ADDR_MASK) | flags
|
|
|
|
| ARCH_VM_PDE_PRESENT | ARCH_VM_PTE_USER | ARCH_VM_PTE_RW;
|
|
|
|
#elif defined(__arm__)
|
|
|
|
pt->pt_dir[pde] = (pt_phys & ARCH_VM_PDE_MASK)
|
2013-02-10 20:20:14 +01:00
|
|
|
| ARCH_VM_PDE_PRESENT | ARM_VM_PDE_DOMAIN; //LSC FIXME
|
2012-10-31 19:24:14 +01:00
|
|
|
#endif
|
2008-11-19 13:26:10 +01:00
|
|
|
|
|
|
|
return OK;
|
|
|
|
}
|
|
|
|
|
2010-07-20 04:08:28 +02:00
|
|
|
/*===========================================================================*
|
|
|
|
* pt_ptalloc_in_range *
|
|
|
|
*===========================================================================*/
|
2012-03-25 20:25:53 +02:00
|
|
|
int pt_ptalloc_in_range(pt_t *pt, vir_bytes start, vir_bytes end,
|
2010-07-20 04:08:28 +02:00
|
|
|
u32_t flags, int verify)
|
|
|
|
{
|
|
|
|
/* Allocate all the page tables in the range specified. */
|
|
|
|
int pde, first_pde, last_pde;
|
|
|
|
|
2013-03-06 13:17:07 +01:00
|
|
|
first_pde = ARCH_VM_PDE(start);
|
|
|
|
last_pde = ARCH_VM_PDE(end-1);
|
|
|
|
|
2010-07-21 15:46:29 +02:00
|
|
|
assert(first_pde >= 0);
|
2012-10-31 19:24:14 +01:00
|
|
|
assert(last_pde < ARCH_VM_DIR_ENTRIES);
|
2010-07-20 04:08:28 +02:00
|
|
|
|
|
|
|
/* Scan all page-directory entries in the range. */
|
|
|
|
for(pde = first_pde; pde <= last_pde; pde++) {
|
2012-10-31 19:24:14 +01:00
|
|
|
assert(!(pt->pt_dir[pde] & ARCH_VM_BIGPAGE));
|
|
|
|
if(!(pt->pt_dir[pde] & ARCH_VM_PDE_PRESENT)) {
|
2010-07-20 04:08:28 +02:00
|
|
|
int r;
|
|
|
|
if(verify) {
|
|
|
|
printf("pt_ptalloc_in_range: no pde %d\n", pde);
|
|
|
|
return EFAULT;
|
|
|
|
}
|
|
|
|
assert(!pt->pt_dir[pde]);
|
|
|
|
if((r=pt_ptalloc(pt, pde, flags)) != OK) {
|
|
|
|
/* Couldn't do (complete) mapping.
|
|
|
|
* Don't bother freeing any previously
|
|
|
|
* allocated page tables, they're
|
|
|
|
* still writable, don't point to nonsense,
|
|
|
|
* and pt_ptalloc leaves the directory
|
|
|
|
* and other data in a consistent state.
|
|
|
|
*/
|
|
|
|
return r;
|
|
|
|
}
|
2013-01-29 18:52:08 +01:00
|
|
|
assert(pt->pt_pt[pde]);
|
2010-07-20 04:08:28 +02:00
|
|
|
}
|
2013-01-29 18:52:08 +01:00
|
|
|
assert(pt->pt_pt[pde]);
|
No more intel/minix segments.
This commit removes all traces of Minix segments (the text/data/stack
memory map abstraction in the kernel) and significance of Intel segments
(hardware segments like CS, DS that add offsets to all addressing before
page table translation). This ultimately simplifies the memory layout
and addressing and makes the same layout possible on non-Intel
architectures.
There are only two types of addresses in the world now: virtual
and physical; even the kernel and processes have the same virtual
address space. Kernel and user processes can be distinguished at a
glance as processes won't use 0xF0000000 and above.
No static pre-allocated memory sizes exist any more.
Changes to booting:
. The pre_init.c leaves the kernel and modules exactly as
they were left by the bootloader in physical memory
. The kernel starts running using physical addressing,
loaded at a fixed location given in its linker script by the
bootloader. All code and data in this phase are linked to
this fixed low location.
. It makes a bootstrap pagetable to map itself to a
fixed high location (also in linker script) and jumps to
the high address. All code and data then use this high addressing.
. All code/data symbols linked at the low addresses is prefixed by
an objcopy step with __k_unpaged_*, so that that code cannot
reference highly-linked symbols (which aren't valid yet) or vice
versa (symbols that aren't valid any more).
. The two addressing modes are separated in the linker script by
collecting the unpaged_*.o objects and linking them with low
addresses, and linking the rest high. Some objects are linked
twice, once low and once high.
. The bootstrap phase passes a lot of information (e.g. free memory
list, physical location of the modules, etc.) using the kinfo
struct.
. After this bootstrap the low-linked part is freed.
. The kernel maps in VM into the bootstrap page table so that VM can
begin executing. Its first job is to make page tables for all other
boot processes. So VM runs before RS, and RS gets a fully dynamic,
VM-managed address space. VM gets its privilege info from RS as usual
but that happens after RS starts running.
. Both the kernel loading VM and VM organizing boot processes happen
using the libexec logic. This removes the last reason for VM to
still know much about exec() and vm/exec.c is gone.
Further Implementation:
. All segments are based at 0 and have a 4 GB limit.
. The kernel is mapped in at the top of the virtual address
space so as not to constrain the user processes.
. Processes do not use segments from the LDT at all; there are
no segments in the LDT any more, so no LLDT is needed.
. The Minix segments T/D/S are gone and so none of the
user-space or in-kernel copy functions use them. The copy
functions use a process endpoint of NONE to realize it's
a physical address, virtual otherwise.
. The umap call only makes sense to translate a virtual address
to a physical address now.
. Segments-related calls like newmap and alloc_segments are gone.
. All segments-related translation in VM is gone (vir2map etc).
. Initialization in VM is simpler as no moving around is necessary.
. VM and all other boot processes can be linked wherever they wish
and will be mapped in at the right location by the kernel and VM
respectively.
Other changes:
. The multiboot code is less special: it does not use mb_print
for its diagnostics any more but uses printf() as normal, saving
the output into the diagnostics buffer, only printing to the
screen using the direct print functions if a panic() occurs.
. The multiboot code uses the flexible 'free memory map list'
style to receive the list of free memory if available.
. The kernel determines the memory layout of the processes to
a degree: it tells VM where the kernel starts and ends and
where the kernel wants the top of the process to be. VM then
uses this entire range, i.e. the stack is right at the top,
and mmap()ped bits of memory are placed below that downwards,
and the break grows upwards.
Other Consequences:
. Every process gets its own page table as address spaces
can't be separated any more by segments.
. As all segments are 0-based, there is no distinction between
virtual and linear addresses, nor between userspace and
kernel addresses.
. Less work is done when context switching, leading to a net
performance increase. (8% faster on my machine for 'make servers'.)
. The layout and configuration of the GDT makes sysenter and syscall
possible.
2012-05-07 16:03:35 +02:00
|
|
|
assert(pt->pt_dir[pde]);
|
2012-10-31 19:24:14 +01:00
|
|
|
assert(pt->pt_dir[pde] & ARCH_VM_PDE_PRESENT);
|
2010-07-20 04:08:28 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
return OK;
|
|
|
|
}
|
|
|
|
|
2013-08-20 14:02:33 +02:00
|
|
|
static const char *ptestr(u32_t pte)
|
2010-04-12 13:25:24 +02:00
|
|
|
{
|
|
|
|
#define FLAG(constant, name) { \
|
|
|
|
if(pte & (constant)) { strcat(str, name); strcat(str, " "); } \
|
|
|
|
}
|
|
|
|
|
|
|
|
static char str[30];
|
2012-10-31 19:24:14 +01:00
|
|
|
if(!(pte & ARCH_VM_PTE_PRESENT)) {
|
2010-04-12 13:25:24 +02:00
|
|
|
return "not present";
|
|
|
|
}
|
|
|
|
str[0] = '\0';
|
2012-10-31 19:24:14 +01:00
|
|
|
#if defined(__i386__)
|
|
|
|
FLAG(ARCH_VM_PTE_RW, "W");
|
|
|
|
#elif defined(__arm__)
|
|
|
|
if(pte & ARCH_VM_PTE_RO) {
|
|
|
|
strcat(str, "R ");
|
|
|
|
} else {
|
|
|
|
strcat(str, "W ");
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
FLAG(ARCH_VM_PTE_USER, "U");
|
|
|
|
#if defined(__i386__)
|
2010-04-12 13:25:24 +02:00
|
|
|
FLAG(I386_VM_PWT, "PWT");
|
|
|
|
FLAG(I386_VM_PCD, "PCD");
|
|
|
|
FLAG(I386_VM_ACC, "ACC");
|
|
|
|
FLAG(I386_VM_DIRTY, "DIRTY");
|
|
|
|
FLAG(I386_VM_PS, "PS");
|
|
|
|
FLAG(I386_VM_GLOBAL, "G");
|
|
|
|
FLAG(I386_VM_PTAVAIL1, "AV1");
|
|
|
|
FLAG(I386_VM_PTAVAIL2, "AV2");
|
|
|
|
FLAG(I386_VM_PTAVAIL3, "AV3");
|
2012-10-31 19:24:14 +01:00
|
|
|
#elif defined(__arm__)
|
|
|
|
FLAG(ARM_VM_PTE_SUPER, "S");
|
2013-05-16 10:11:12 +02:00
|
|
|
FLAG(ARM_VM_PTE_S, "SH");
|
2012-10-31 19:24:14 +01:00
|
|
|
FLAG(ARM_VM_PTE_WB, "WB");
|
|
|
|
FLAG(ARM_VM_PTE_WT, "WT");
|
|
|
|
#endif
|
2010-04-12 13:25:24 +02:00
|
|
|
|
|
|
|
return str;
|
|
|
|
}
|
|
|
|
|
2010-07-21 01:03:52 +02:00
|
|
|
/*===========================================================================*
|
|
|
|
* pt_map_in_range *
|
|
|
|
*===========================================================================*/
|
2012-03-25 20:25:53 +02:00
|
|
|
int pt_map_in_range(struct vmproc *src_vmp, struct vmproc *dst_vmp,
|
2010-07-21 01:03:52 +02:00
|
|
|
vir_bytes start, vir_bytes end)
|
|
|
|
{
|
|
|
|
/* Transfer all the mappings from the pt of the source process to the pt of
|
|
|
|
* the destination process in the range specified.
|
|
|
|
*/
|
|
|
|
int pde, pte;
|
2011-06-01 11:30:58 +02:00
|
|
|
vir_bytes viraddr;
|
2010-07-21 01:03:52 +02:00
|
|
|
pt_t *pt, *dst_pt;
|
|
|
|
|
|
|
|
pt = &src_vmp->vm_pt;
|
|
|
|
dst_pt = &dst_vmp->vm_pt;
|
|
|
|
|
|
|
|
end = end ? end : VM_DATATOP;
|
2012-10-31 19:24:14 +01:00
|
|
|
assert(start % VM_PAGE_SIZE == 0);
|
|
|
|
assert(end % VM_PAGE_SIZE == 0);
|
2013-03-06 13:17:07 +01:00
|
|
|
|
2013-08-20 14:02:33 +02:00
|
|
|
assert( /* ARCH_VM_PDE(start) >= 0 && */ start <= end);
|
2013-03-06 13:17:07 +01:00
|
|
|
assert(ARCH_VM_PDE(end) < ARCH_VM_DIR_ENTRIES);
|
2010-07-21 01:03:52 +02:00
|
|
|
|
|
|
|
#if LU_DEBUG
|
|
|
|
printf("VM: pt_map_in_range: src = %d, dst = %d\n",
|
|
|
|
src_vmp->vm_endpoint, dst_vmp->vm_endpoint);
|
|
|
|
printf("VM: pt_map_in_range: transferring from 0x%08x (pde %d pte %d) to 0x%08x (pde %d pte %d)\n",
|
2013-03-06 13:17:07 +01:00
|
|
|
start, ARCH_VM_PDE(start), ARCH_VM_PTE(start),
|
|
|
|
end, ARCH_VM_PDE(end), ARCH_VM_PTE(end));
|
2010-07-21 01:03:52 +02:00
|
|
|
#endif
|
|
|
|
|
|
|
|
/* Scan all page-table entries in the range. */
|
2012-10-31 19:24:14 +01:00
|
|
|
for(viraddr = start; viraddr <= end; viraddr += VM_PAGE_SIZE) {
|
2013-03-06 13:17:07 +01:00
|
|
|
pde = ARCH_VM_PDE(viraddr);
|
2012-10-31 19:24:14 +01:00
|
|
|
if(!(pt->pt_dir[pde] & ARCH_VM_PDE_PRESENT)) {
|
2010-07-21 01:03:52 +02:00
|
|
|
if(viraddr == VM_DATATOP) break;
|
|
|
|
continue;
|
|
|
|
}
|
2013-03-06 13:17:07 +01:00
|
|
|
pte = ARCH_VM_PTE(viraddr);
|
2012-10-31 19:24:14 +01:00
|
|
|
if(!(pt->pt_pt[pde][pte] & ARCH_VM_PTE_PRESENT)) {
|
2010-07-21 01:03:52 +02:00
|
|
|
if(viraddr == VM_DATATOP) break;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Transfer the mapping. */
|
|
|
|
dst_pt->pt_pt[pde][pte] = pt->pt_pt[pde][pte];
|
|
|
|
|
|
|
|
if(viraddr == VM_DATATOP) break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return OK;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*===========================================================================*
|
|
|
|
* pt_ptmap *
|
|
|
|
*===========================================================================*/
|
2012-03-25 20:25:53 +02:00
|
|
|
int pt_ptmap(struct vmproc *src_vmp, struct vmproc *dst_vmp)
|
2010-07-21 01:03:52 +02:00
|
|
|
{
|
|
|
|
/* Transfer mappings to page dir and page tables from source process and
|
|
|
|
* destination process. Make sure all the mappings are above the stack, not
|
|
|
|
* to corrupt valid mappings in the data segment of the destination process.
|
|
|
|
*/
|
|
|
|
int pde, r;
|
|
|
|
phys_bytes physaddr;
|
|
|
|
vir_bytes viraddr;
|
|
|
|
pt_t *pt;
|
|
|
|
|
|
|
|
pt = &src_vmp->vm_pt;
|
|
|
|
|
|
|
|
#if LU_DEBUG
|
|
|
|
printf("VM: pt_ptmap: src = %d, dst = %d\n",
|
|
|
|
src_vmp->vm_endpoint, dst_vmp->vm_endpoint);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* Transfer mapping to the page directory. */
|
No more intel/minix segments.
This commit removes all traces of Minix segments (the text/data/stack
memory map abstraction in the kernel) and significance of Intel segments
(hardware segments like CS, DS that add offsets to all addressing before
page table translation). This ultimately simplifies the memory layout
and addressing and makes the same layout possible on non-Intel
architectures.
There are only two types of addresses in the world now: virtual
and physical; even the kernel and processes have the same virtual
address space. Kernel and user processes can be distinguished at a
glance as processes won't use 0xF0000000 and above.
No static pre-allocated memory sizes exist any more.
Changes to booting:
. The pre_init.c leaves the kernel and modules exactly as
they were left by the bootloader in physical memory
. The kernel starts running using physical addressing,
loaded at a fixed location given in its linker script by the
bootloader. All code and data in this phase are linked to
this fixed low location.
. It makes a bootstrap pagetable to map itself to a
fixed high location (also in linker script) and jumps to
the high address. All code and data then use this high addressing.
. All code/data symbols linked at the low addresses is prefixed by
an objcopy step with __k_unpaged_*, so that that code cannot
reference highly-linked symbols (which aren't valid yet) or vice
versa (symbols that aren't valid any more).
. The two addressing modes are separated in the linker script by
collecting the unpaged_*.o objects and linking them with low
addresses, and linking the rest high. Some objects are linked
twice, once low and once high.
. The bootstrap phase passes a lot of information (e.g. free memory
list, physical location of the modules, etc.) using the kinfo
struct.
. After this bootstrap the low-linked part is freed.
. The kernel maps in VM into the bootstrap page table so that VM can
begin executing. Its first job is to make page tables for all other
boot processes. So VM runs before RS, and RS gets a fully dynamic,
VM-managed address space. VM gets its privilege info from RS as usual
but that happens after RS starts running.
. Both the kernel loading VM and VM organizing boot processes happen
using the libexec logic. This removes the last reason for VM to
still know much about exec() and vm/exec.c is gone.
Further Implementation:
. All segments are based at 0 and have a 4 GB limit.
. The kernel is mapped in at the top of the virtual address
space so as not to constrain the user processes.
. Processes do not use segments from the LDT at all; there are
no segments in the LDT any more, so no LLDT is needed.
. The Minix segments T/D/S are gone and so none of the
user-space or in-kernel copy functions use them. The copy
functions use a process endpoint of NONE to realize it's
a physical address, virtual otherwise.
. The umap call only makes sense to translate a virtual address
to a physical address now.
. Segments-related calls like newmap and alloc_segments are gone.
. All segments-related translation in VM is gone (vir2map etc).
. Initialization in VM is simpler as no moving around is necessary.
. VM and all other boot processes can be linked wherever they wish
and will be mapped in at the right location by the kernel and VM
respectively.
Other changes:
. The multiboot code is less special: it does not use mb_print
for its diagnostics any more but uses printf() as normal, saving
the output into the diagnostics buffer, only printing to the
screen using the direct print functions if a panic() occurs.
. The multiboot code uses the flexible 'free memory map list'
style to receive the list of free memory if available.
. The kernel determines the memory layout of the processes to
a degree: it tells VM where the kernel starts and ends and
where the kernel wants the top of the process to be. VM then
uses this entire range, i.e. the stack is right at the top,
and mmap()ped bits of memory are placed below that downwards,
and the break grows upwards.
Other Consequences:
. Every process gets its own page table as address spaces
can't be separated any more by segments.
. As all segments are 0-based, there is no distinction between
virtual and linear addresses, nor between userspace and
kernel addresses.
. Less work is done when context switching, leading to a net
performance increase. (8% faster on my machine for 'make servers'.)
. The layout and configuration of the GDT makes sysenter and syscall
possible.
2012-05-07 16:03:35 +02:00
|
|
|
viraddr = (vir_bytes) pt->pt_dir;
|
2012-10-31 19:24:14 +01:00
|
|
|
physaddr = pt->pt_dir_phys & ARCH_VM_ADDR_MASK;
|
|
|
|
#if defined(__i386__)
|
|
|
|
if((r=pt_writemap(dst_vmp, &dst_vmp->vm_pt, viraddr, physaddr, VM_PAGE_SIZE,
|
|
|
|
ARCH_VM_PTE_PRESENT | ARCH_VM_PTE_USER | ARCH_VM_PTE_RW,
|
|
|
|
#elif defined(__arm__)
|
|
|
|
if((r=pt_writemap(dst_vmp, &dst_vmp->vm_pt, viraddr, physaddr, ARCH_PAGEDIR_SIZE,
|
2013-02-10 20:20:14 +01:00
|
|
|
ARCH_VM_PTE_PRESENT | ARCH_VM_PTE_USER |
|
2013-09-13 09:40:20 +02:00
|
|
|
ARM_VM_PTE_CACHED ,
|
2012-10-31 19:24:14 +01:00
|
|
|
#endif
|
2010-07-21 01:03:52 +02:00
|
|
|
WMF_OVERWRITE)) != OK) {
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
#if LU_DEBUG
|
|
|
|
printf("VM: pt_ptmap: transferred mapping to page dir: 0x%08x (0x%08x)\n",
|
|
|
|
viraddr, physaddr);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* Scan all non-reserved page-directory entries. */
|
2012-10-31 19:24:14 +01:00
|
|
|
for(pde=0; pde < ARCH_VM_DIR_ENTRIES; pde++) {
|
|
|
|
if(!(pt->pt_dir[pde] & ARCH_VM_PDE_PRESENT)) {
|
2010-07-21 01:03:52 +02:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Transfer mapping to the page table. */
|
No more intel/minix segments.
This commit removes all traces of Minix segments (the text/data/stack
memory map abstraction in the kernel) and significance of Intel segments
(hardware segments like CS, DS that add offsets to all addressing before
page table translation). This ultimately simplifies the memory layout
and addressing and makes the same layout possible on non-Intel
architectures.
There are only two types of addresses in the world now: virtual
and physical; even the kernel and processes have the same virtual
address space. Kernel and user processes can be distinguished at a
glance as processes won't use 0xF0000000 and above.
No static pre-allocated memory sizes exist any more.
Changes to booting:
. The pre_init.c leaves the kernel and modules exactly as
they were left by the bootloader in physical memory
. The kernel starts running using physical addressing,
loaded at a fixed location given in its linker script by the
bootloader. All code and data in this phase are linked to
this fixed low location.
. It makes a bootstrap pagetable to map itself to a
fixed high location (also in linker script) and jumps to
the high address. All code and data then use this high addressing.
. All code/data symbols linked at the low addresses is prefixed by
an objcopy step with __k_unpaged_*, so that that code cannot
reference highly-linked symbols (which aren't valid yet) or vice
versa (symbols that aren't valid any more).
. The two addressing modes are separated in the linker script by
collecting the unpaged_*.o objects and linking them with low
addresses, and linking the rest high. Some objects are linked
twice, once low and once high.
. The bootstrap phase passes a lot of information (e.g. free memory
list, physical location of the modules, etc.) using the kinfo
struct.
. After this bootstrap the low-linked part is freed.
. The kernel maps in VM into the bootstrap page table so that VM can
begin executing. Its first job is to make page tables for all other
boot processes. So VM runs before RS, and RS gets a fully dynamic,
VM-managed address space. VM gets its privilege info from RS as usual
but that happens after RS starts running.
. Both the kernel loading VM and VM organizing boot processes happen
using the libexec logic. This removes the last reason for VM to
still know much about exec() and vm/exec.c is gone.
Further Implementation:
. All segments are based at 0 and have a 4 GB limit.
. The kernel is mapped in at the top of the virtual address
space so as not to constrain the user processes.
. Processes do not use segments from the LDT at all; there are
no segments in the LDT any more, so no LLDT is needed.
. The Minix segments T/D/S are gone and so none of the
user-space or in-kernel copy functions use them. The copy
functions use a process endpoint of NONE to realize it's
a physical address, virtual otherwise.
. The umap call only makes sense to translate a virtual address
to a physical address now.
. Segments-related calls like newmap and alloc_segments are gone.
. All segments-related translation in VM is gone (vir2map etc).
. Initialization in VM is simpler as no moving around is necessary.
. VM and all other boot processes can be linked wherever they wish
and will be mapped in at the right location by the kernel and VM
respectively.
Other changes:
. The multiboot code is less special: it does not use mb_print
for its diagnostics any more but uses printf() as normal, saving
the output into the diagnostics buffer, only printing to the
screen using the direct print functions if a panic() occurs.
. The multiboot code uses the flexible 'free memory map list'
style to receive the list of free memory if available.
. The kernel determines the memory layout of the processes to
a degree: it tells VM where the kernel starts and ends and
where the kernel wants the top of the process to be. VM then
uses this entire range, i.e. the stack is right at the top,
and mmap()ped bits of memory are placed below that downwards,
and the break grows upwards.
Other Consequences:
. Every process gets its own page table as address spaces
can't be separated any more by segments.
. As all segments are 0-based, there is no distinction between
virtual and linear addresses, nor between userspace and
kernel addresses.
. Less work is done when context switching, leading to a net
performance increase. (8% faster on my machine for 'make servers'.)
. The layout and configuration of the GDT makes sysenter and syscall
possible.
2012-05-07 16:03:35 +02:00
|
|
|
viraddr = (vir_bytes) pt->pt_pt[pde];
|
2012-10-31 19:24:14 +01:00
|
|
|
#if defined(__i386__)
|
|
|
|
physaddr = pt->pt_dir[pde] & ARCH_VM_ADDR_MASK;
|
|
|
|
#elif defined(__arm__)
|
|
|
|
physaddr = pt->pt_dir[pde] & ARCH_VM_PDE_MASK;
|
|
|
|
#endif
|
|
|
|
if((r=pt_writemap(dst_vmp, &dst_vmp->vm_pt, viraddr, physaddr, VM_PAGE_SIZE,
|
2012-11-09 19:00:46 +01:00
|
|
|
ARCH_VM_PTE_PRESENT | ARCH_VM_PTE_USER | ARCH_VM_PTE_RW
|
|
|
|
#ifdef __arm__
|
2013-09-13 09:40:20 +02:00
|
|
|
| ARM_VM_PTE_CACHED
|
2012-10-31 19:24:14 +01:00
|
|
|
#endif
|
2012-11-09 19:00:46 +01:00
|
|
|
,
|
2010-07-21 01:03:52 +02:00
|
|
|
WMF_OVERWRITE)) != OK) {
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return OK;
|
|
|
|
}
|
|
|
|
|
2012-03-25 20:25:53 +02:00
|
|
|
void pt_clearmapcache(void)
|
2012-01-02 18:20:02 +01:00
|
|
|
{
|
|
|
|
/* Make sure kernel will invalidate tlb when using current
|
|
|
|
* pagetable (i.e. vm's) to make new mappings before new cr3
|
|
|
|
* is loaded.
|
|
|
|
*/
|
No more intel/minix segments.
This commit removes all traces of Minix segments (the text/data/stack
memory map abstraction in the kernel) and significance of Intel segments
(hardware segments like CS, DS that add offsets to all addressing before
page table translation). This ultimately simplifies the memory layout
and addressing and makes the same layout possible on non-Intel
architectures.
There are only two types of addresses in the world now: virtual
and physical; even the kernel and processes have the same virtual
address space. Kernel and user processes can be distinguished at a
glance as processes won't use 0xF0000000 and above.
No static pre-allocated memory sizes exist any more.
Changes to booting:
. The pre_init.c leaves the kernel and modules exactly as
they were left by the bootloader in physical memory
. The kernel starts running using physical addressing,
loaded at a fixed location given in its linker script by the
bootloader. All code and data in this phase are linked to
this fixed low location.
. It makes a bootstrap pagetable to map itself to a
fixed high location (also in linker script) and jumps to
the high address. All code and data then use this high addressing.
. All code/data symbols linked at the low addresses is prefixed by
an objcopy step with __k_unpaged_*, so that that code cannot
reference highly-linked symbols (which aren't valid yet) or vice
versa (symbols that aren't valid any more).
. The two addressing modes are separated in the linker script by
collecting the unpaged_*.o objects and linking them with low
addresses, and linking the rest high. Some objects are linked
twice, once low and once high.
. The bootstrap phase passes a lot of information (e.g. free memory
list, physical location of the modules, etc.) using the kinfo
struct.
. After this bootstrap the low-linked part is freed.
. The kernel maps in VM into the bootstrap page table so that VM can
begin executing. Its first job is to make page tables for all other
boot processes. So VM runs before RS, and RS gets a fully dynamic,
VM-managed address space. VM gets its privilege info from RS as usual
but that happens after RS starts running.
. Both the kernel loading VM and VM organizing boot processes happen
using the libexec logic. This removes the last reason for VM to
still know much about exec() and vm/exec.c is gone.
Further Implementation:
. All segments are based at 0 and have a 4 GB limit.
. The kernel is mapped in at the top of the virtual address
space so as not to constrain the user processes.
. Processes do not use segments from the LDT at all; there are
no segments in the LDT any more, so no LLDT is needed.
. The Minix segments T/D/S are gone and so none of the
user-space or in-kernel copy functions use them. The copy
functions use a process endpoint of NONE to realize it's
a physical address, virtual otherwise.
. The umap call only makes sense to translate a virtual address
to a physical address now.
. Segments-related calls like newmap and alloc_segments are gone.
. All segments-related translation in VM is gone (vir2map etc).
. Initialization in VM is simpler as no moving around is necessary.
. VM and all other boot processes can be linked wherever they wish
and will be mapped in at the right location by the kernel and VM
respectively.
Other changes:
. The multiboot code is less special: it does not use mb_print
for its diagnostics any more but uses printf() as normal, saving
the output into the diagnostics buffer, only printing to the
screen using the direct print functions if a panic() occurs.
. The multiboot code uses the flexible 'free memory map list'
style to receive the list of free memory if available.
. The kernel determines the memory layout of the processes to
a degree: it tells VM where the kernel starts and ends and
where the kernel wants the top of the process to be. VM then
uses this entire range, i.e. the stack is right at the top,
and mmap()ped bits of memory are placed below that downwards,
and the break grows upwards.
Other Consequences:
. Every process gets its own page table as address spaces
can't be separated any more by segments.
. As all segments are 0-based, there is no distinction between
virtual and linear addresses, nor between userspace and
kernel addresses.
. Less work is done when context switching, leading to a net
performance increase. (8% faster on my machine for 'make servers'.)
. The layout and configuration of the GDT makes sysenter and syscall
possible.
2012-05-07 16:03:35 +02:00
|
|
|
if(sys_vmctl(SELF, VMCTL_CLEARMAPCACHE, 0) != OK)
|
|
|
|
panic("VMCTL_CLEARMAPCACHE failed");
|
2012-01-02 18:20:02 +01:00
|
|
|
}
|
|
|
|
|
2013-02-24 22:00:52 +01:00
|
|
|
int pt_writable(struct vmproc *vmp, vir_bytes v)
|
|
|
|
{
|
|
|
|
u32_t entry;
|
|
|
|
pt_t *pt = &vmp->vm_pt;
|
|
|
|
assert(!(v % VM_PAGE_SIZE));
|
2013-03-06 13:17:07 +01:00
|
|
|
int pde = ARCH_VM_PDE(v);
|
|
|
|
int pte = ARCH_VM_PTE(v);
|
2013-02-24 22:00:52 +01:00
|
|
|
|
|
|
|
assert(pt->pt_dir[pde] & ARCH_VM_PDE_PRESENT);
|
|
|
|
assert(pt->pt_pt[pde]);
|
|
|
|
|
|
|
|
entry = pt->pt_pt[pde][pte];
|
|
|
|
|
|
|
|
#if defined(__i386__)
|
|
|
|
return((entry & PTF_WRITE) ? 1 : 0);
|
|
|
|
#elif defined(__arm__)
|
|
|
|
return((entry & ARCH_VM_PTE_RO) ? 0 : 1);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2008-11-19 13:26:10 +01:00
|
|
|
/*===========================================================================*
|
|
|
|
* pt_writemap *
|
|
|
|
*===========================================================================*/
|
2012-03-25 20:25:53 +02:00
|
|
|
int pt_writemap(struct vmproc * vmp,
|
2010-09-15 16:11:12 +02:00
|
|
|
pt_t *pt,
|
|
|
|
vir_bytes v,
|
|
|
|
phys_bytes physaddr,
|
|
|
|
size_t bytes,
|
|
|
|
u32_t flags,
|
|
|
|
u32_t writemapflags)
|
2008-11-19 13:26:10 +01:00
|
|
|
{
|
|
|
|
/* Write mapping into page table. Allocate a new page table if necessary. */
|
|
|
|
/* Page directory and table entries for this virtual address. */
|
2011-06-01 11:30:58 +02:00
|
|
|
int p, pages;
|
2009-09-23 15:33:01 +02:00
|
|
|
int verify = 0;
|
2010-09-15 16:11:12 +02:00
|
|
|
int ret = OK;
|
|
|
|
|
2011-02-11 15:56:52 +01:00
|
|
|
#ifdef CONFIG_SMP
|
2011-11-06 22:37:34 +01:00
|
|
|
int vminhibit_clear = 0;
|
2010-09-15 16:11:12 +02:00
|
|
|
/* FIXME
|
|
|
|
* don't do it everytime, stop the process only on the first change and
|
|
|
|
* resume the execution on the last change. Do in a wrapper of this
|
|
|
|
* function
|
|
|
|
*/
|
|
|
|
if (vmp && vmp->vm_endpoint != NONE && vmp->vm_endpoint != VM_PROC_NR &&
|
2011-11-06 22:37:34 +01:00
|
|
|
!(vmp->vm_flags & VMF_EXITING)) {
|
2010-09-15 16:11:12 +02:00
|
|
|
sys_vmctl(vmp->vm_endpoint, VMCTL_VMINHIBIT_SET, 0);
|
2011-11-06 22:37:34 +01:00
|
|
|
vminhibit_clear = 1;
|
|
|
|
}
|
2011-02-11 15:56:52 +01:00
|
|
|
#endif
|
2009-09-23 15:33:01 +02:00
|
|
|
|
|
|
|
if(writemapflags & WMF_VERIFY)
|
|
|
|
verify = 1;
|
2008-11-19 13:26:10 +01:00
|
|
|
|
2012-10-31 19:24:14 +01:00
|
|
|
assert(!(bytes % VM_PAGE_SIZE));
|
2010-04-12 14:37:28 +02:00
|
|
|
assert(!(flags & ~(PTF_ALLFLAGS)));
|
2008-11-19 13:26:10 +01:00
|
|
|
|
2012-10-31 19:24:14 +01:00
|
|
|
pages = bytes / VM_PAGE_SIZE;
|
2008-11-19 13:26:10 +01:00
|
|
|
|
2008-12-19 14:29:12 +01:00
|
|
|
/* MAP_NONE means to clear the mapping. It doesn't matter
|
2012-10-31 19:24:14 +01:00
|
|
|
* what's actually written into the PTE if PRESENT
|
2008-12-19 14:29:12 +01:00
|
|
|
* isn't on, so we can just write MAP_NONE into it.
|
|
|
|
*/
|
2012-10-31 19:24:14 +01:00
|
|
|
assert(physaddr == MAP_NONE || (flags & ARCH_VM_PTE_PRESENT));
|
2010-04-12 14:37:28 +02:00
|
|
|
assert(physaddr != MAP_NONE || !flags);
|
2008-11-19 13:26:10 +01:00
|
|
|
|
|
|
|
/* First make sure all the necessary page tables are allocated,
|
|
|
|
* before we start writing in any of them, because it's a pain
|
2010-07-20 04:08:28 +02:00
|
|
|
* to undo our work properly.
|
2008-11-19 13:26:10 +01:00
|
|
|
*/
|
2012-10-31 19:24:14 +01:00
|
|
|
ret = pt_ptalloc_in_range(pt, v, v + VM_PAGE_SIZE*pages, flags, verify);
|
2010-09-15 16:11:12 +02:00
|
|
|
if(ret != OK) {
|
No more intel/minix segments.
This commit removes all traces of Minix segments (the text/data/stack
memory map abstraction in the kernel) and significance of Intel segments
(hardware segments like CS, DS that add offsets to all addressing before
page table translation). This ultimately simplifies the memory layout
and addressing and makes the same layout possible on non-Intel
architectures.
There are only two types of addresses in the world now: virtual
and physical; even the kernel and processes have the same virtual
address space. Kernel and user processes can be distinguished at a
glance as processes won't use 0xF0000000 and above.
No static pre-allocated memory sizes exist any more.
Changes to booting:
. The pre_init.c leaves the kernel and modules exactly as
they were left by the bootloader in physical memory
. The kernel starts running using physical addressing,
loaded at a fixed location given in its linker script by the
bootloader. All code and data in this phase are linked to
this fixed low location.
. It makes a bootstrap pagetable to map itself to a
fixed high location (also in linker script) and jumps to
the high address. All code and data then use this high addressing.
. All code/data symbols linked at the low addresses is prefixed by
an objcopy step with __k_unpaged_*, so that that code cannot
reference highly-linked symbols (which aren't valid yet) or vice
versa (symbols that aren't valid any more).
. The two addressing modes are separated in the linker script by
collecting the unpaged_*.o objects and linking them with low
addresses, and linking the rest high. Some objects are linked
twice, once low and once high.
. The bootstrap phase passes a lot of information (e.g. free memory
list, physical location of the modules, etc.) using the kinfo
struct.
. After this bootstrap the low-linked part is freed.
. The kernel maps in VM into the bootstrap page table so that VM can
begin executing. Its first job is to make page tables for all other
boot processes. So VM runs before RS, and RS gets a fully dynamic,
VM-managed address space. VM gets its privilege info from RS as usual
but that happens after RS starts running.
. Both the kernel loading VM and VM organizing boot processes happen
using the libexec logic. This removes the last reason for VM to
still know much about exec() and vm/exec.c is gone.
Further Implementation:
. All segments are based at 0 and have a 4 GB limit.
. The kernel is mapped in at the top of the virtual address
space so as not to constrain the user processes.
. Processes do not use segments from the LDT at all; there are
no segments in the LDT any more, so no LLDT is needed.
. The Minix segments T/D/S are gone and so none of the
user-space or in-kernel copy functions use them. The copy
functions use a process endpoint of NONE to realize it's
a physical address, virtual otherwise.
. The umap call only makes sense to translate a virtual address
to a physical address now.
. Segments-related calls like newmap and alloc_segments are gone.
. All segments-related translation in VM is gone (vir2map etc).
. Initialization in VM is simpler as no moving around is necessary.
. VM and all other boot processes can be linked wherever they wish
and will be mapped in at the right location by the kernel and VM
respectively.
Other changes:
. The multiboot code is less special: it does not use mb_print
for its diagnostics any more but uses printf() as normal, saving
the output into the diagnostics buffer, only printing to the
screen using the direct print functions if a panic() occurs.
. The multiboot code uses the flexible 'free memory map list'
style to receive the list of free memory if available.
. The kernel determines the memory layout of the processes to
a degree: it tells VM where the kernel starts and ends and
where the kernel wants the top of the process to be. VM then
uses this entire range, i.e. the stack is right at the top,
and mmap()ped bits of memory are placed below that downwards,
and the break grows upwards.
Other Consequences:
. Every process gets its own page table as address spaces
can't be separated any more by segments.
. As all segments are 0-based, there is no distinction between
virtual and linear addresses, nor between userspace and
kernel addresses.
. Less work is done when context switching, leading to a net
performance increase. (8% faster on my machine for 'make servers'.)
. The layout and configuration of the GDT makes sysenter and syscall
possible.
2012-05-07 16:03:35 +02:00
|
|
|
printf("VM: writemap: pt_ptalloc_in_range failed\n");
|
2010-09-15 16:11:12 +02:00
|
|
|
goto resume_exit;
|
2008-11-19 13:26:10 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Now write in them. */
|
|
|
|
for(p = 0; p < pages; p++) {
|
2009-09-23 15:33:01 +02:00
|
|
|
u32_t entry;
|
2013-03-06 13:17:07 +01:00
|
|
|
int pde = ARCH_VM_PDE(v);
|
|
|
|
int pte = ARCH_VM_PTE(v);
|
2008-11-19 13:26:10 +01:00
|
|
|
|
2012-10-31 19:24:14 +01:00
|
|
|
assert(!(v % VM_PAGE_SIZE));
|
|
|
|
assert(pte >= 0 && pte < ARCH_VM_PT_ENTRIES);
|
|
|
|
assert(pde >= 0 && pde < ARCH_VM_DIR_ENTRIES);
|
2008-11-19 13:26:10 +01:00
|
|
|
|
|
|
|
/* Page table has to be there. */
|
2012-10-31 19:24:14 +01:00
|
|
|
assert(pt->pt_dir[pde] & ARCH_VM_PDE_PRESENT);
|
2008-11-19 13:26:10 +01:00
|
|
|
|
No more intel/minix segments.
This commit removes all traces of Minix segments (the text/data/stack
memory map abstraction in the kernel) and significance of Intel segments
(hardware segments like CS, DS that add offsets to all addressing before
page table translation). This ultimately simplifies the memory layout
and addressing and makes the same layout possible on non-Intel
architectures.
There are only two types of addresses in the world now: virtual
and physical; even the kernel and processes have the same virtual
address space. Kernel and user processes can be distinguished at a
glance as processes won't use 0xF0000000 and above.
No static pre-allocated memory sizes exist any more.
Changes to booting:
. The pre_init.c leaves the kernel and modules exactly as
they were left by the bootloader in physical memory
. The kernel starts running using physical addressing,
loaded at a fixed location given in its linker script by the
bootloader. All code and data in this phase are linked to
this fixed low location.
. It makes a bootstrap pagetable to map itself to a
fixed high location (also in linker script) and jumps to
the high address. All code and data then use this high addressing.
. All code/data symbols linked at the low addresses is prefixed by
an objcopy step with __k_unpaged_*, so that that code cannot
reference highly-linked symbols (which aren't valid yet) or vice
versa (symbols that aren't valid any more).
. The two addressing modes are separated in the linker script by
collecting the unpaged_*.o objects and linking them with low
addresses, and linking the rest high. Some objects are linked
twice, once low and once high.
. The bootstrap phase passes a lot of information (e.g. free memory
list, physical location of the modules, etc.) using the kinfo
struct.
. After this bootstrap the low-linked part is freed.
. The kernel maps in VM into the bootstrap page table so that VM can
begin executing. Its first job is to make page tables for all other
boot processes. So VM runs before RS, and RS gets a fully dynamic,
VM-managed address space. VM gets its privilege info from RS as usual
but that happens after RS starts running.
. Both the kernel loading VM and VM organizing boot processes happen
using the libexec logic. This removes the last reason for VM to
still know much about exec() and vm/exec.c is gone.
Further Implementation:
. All segments are based at 0 and have a 4 GB limit.
. The kernel is mapped in at the top of the virtual address
space so as not to constrain the user processes.
. Processes do not use segments from the LDT at all; there are
no segments in the LDT any more, so no LLDT is needed.
. The Minix segments T/D/S are gone and so none of the
user-space or in-kernel copy functions use them. The copy
functions use a process endpoint of NONE to realize it's
a physical address, virtual otherwise.
. The umap call only makes sense to translate a virtual address
to a physical address now.
. Segments-related calls like newmap and alloc_segments are gone.
. All segments-related translation in VM is gone (vir2map etc).
. Initialization in VM is simpler as no moving around is necessary.
. VM and all other boot processes can be linked wherever they wish
and will be mapped in at the right location by the kernel and VM
respectively.
Other changes:
. The multiboot code is less special: it does not use mb_print
for its diagnostics any more but uses printf() as normal, saving
the output into the diagnostics buffer, only printing to the
screen using the direct print functions if a panic() occurs.
. The multiboot code uses the flexible 'free memory map list'
style to receive the list of free memory if available.
. The kernel determines the memory layout of the processes to
a degree: it tells VM where the kernel starts and ends and
where the kernel wants the top of the process to be. VM then
uses this entire range, i.e. the stack is right at the top,
and mmap()ped bits of memory are placed below that downwards,
and the break grows upwards.
Other Consequences:
. Every process gets its own page table as address spaces
can't be separated any more by segments.
. As all segments are 0-based, there is no distinction between
virtual and linear addresses, nor between userspace and
kernel addresses.
. Less work is done when context switching, leading to a net
performance increase. (8% faster on my machine for 'make servers'.)
. The layout and configuration of the GDT makes sysenter and syscall
possible.
2012-05-07 16:03:35 +02:00
|
|
|
/* We do not expect it to be a bigpage. */
|
2012-10-31 19:24:14 +01:00
|
|
|
assert(!(pt->pt_dir[pde] & ARCH_VM_BIGPAGE));
|
No more intel/minix segments.
This commit removes all traces of Minix segments (the text/data/stack
memory map abstraction in the kernel) and significance of Intel segments
(hardware segments like CS, DS that add offsets to all addressing before
page table translation). This ultimately simplifies the memory layout
and addressing and makes the same layout possible on non-Intel
architectures.
There are only two types of addresses in the world now: virtual
and physical; even the kernel and processes have the same virtual
address space. Kernel and user processes can be distinguished at a
glance as processes won't use 0xF0000000 and above.
No static pre-allocated memory sizes exist any more.
Changes to booting:
. The pre_init.c leaves the kernel and modules exactly as
they were left by the bootloader in physical memory
. The kernel starts running using physical addressing,
loaded at a fixed location given in its linker script by the
bootloader. All code and data in this phase are linked to
this fixed low location.
. It makes a bootstrap pagetable to map itself to a
fixed high location (also in linker script) and jumps to
the high address. All code and data then use this high addressing.
. All code/data symbols linked at the low addresses is prefixed by
an objcopy step with __k_unpaged_*, so that that code cannot
reference highly-linked symbols (which aren't valid yet) or vice
versa (symbols that aren't valid any more).
. The two addressing modes are separated in the linker script by
collecting the unpaged_*.o objects and linking them with low
addresses, and linking the rest high. Some objects are linked
twice, once low and once high.
. The bootstrap phase passes a lot of information (e.g. free memory
list, physical location of the modules, etc.) using the kinfo
struct.
. After this bootstrap the low-linked part is freed.
. The kernel maps in VM into the bootstrap page table so that VM can
begin executing. Its first job is to make page tables for all other
boot processes. So VM runs before RS, and RS gets a fully dynamic,
VM-managed address space. VM gets its privilege info from RS as usual
but that happens after RS starts running.
. Both the kernel loading VM and VM organizing boot processes happen
using the libexec logic. This removes the last reason for VM to
still know much about exec() and vm/exec.c is gone.
Further Implementation:
. All segments are based at 0 and have a 4 GB limit.
. The kernel is mapped in at the top of the virtual address
space so as not to constrain the user processes.
. Processes do not use segments from the LDT at all; there are
no segments in the LDT any more, so no LLDT is needed.
. The Minix segments T/D/S are gone and so none of the
user-space or in-kernel copy functions use them. The copy
functions use a process endpoint of NONE to realize it's
a physical address, virtual otherwise.
. The umap call only makes sense to translate a virtual address
to a physical address now.
. Segments-related calls like newmap and alloc_segments are gone.
. All segments-related translation in VM is gone (vir2map etc).
. Initialization in VM is simpler as no moving around is necessary.
. VM and all other boot processes can be linked wherever they wish
and will be mapped in at the right location by the kernel and VM
respectively.
Other changes:
. The multiboot code is less special: it does not use mb_print
for its diagnostics any more but uses printf() as normal, saving
the output into the diagnostics buffer, only printing to the
screen using the direct print functions if a panic() occurs.
. The multiboot code uses the flexible 'free memory map list'
style to receive the list of free memory if available.
. The kernel determines the memory layout of the processes to
a degree: it tells VM where the kernel starts and ends and
where the kernel wants the top of the process to be. VM then
uses this entire range, i.e. the stack is right at the top,
and mmap()ped bits of memory are placed below that downwards,
and the break grows upwards.
Other Consequences:
. Every process gets its own page table as address spaces
can't be separated any more by segments.
. As all segments are 0-based, there is no distinction between
virtual and linear addresses, nor between userspace and
kernel addresses.
. Less work is done when context switching, leading to a net
performance increase. (8% faster on my machine for 'make servers'.)
. The layout and configuration of the GDT makes sysenter and syscall
possible.
2012-05-07 16:03:35 +02:00
|
|
|
|
2008-11-19 13:26:10 +01:00
|
|
|
/* Make sure page directory entry for this page table
|
|
|
|
* is marked present and page table entry is available.
|
|
|
|
*/
|
2010-04-12 14:37:28 +02:00
|
|
|
assert(pt->pt_pt[pde]);
|
2008-11-19 13:26:10 +01:00
|
|
|
|
|
|
|
#if SANITYCHECKS
|
|
|
|
/* We don't expect to overwrite a page. */
|
2009-09-23 15:33:01 +02:00
|
|
|
if(!(writemapflags & (WMF_OVERWRITE|WMF_VERIFY)))
|
2012-10-31 19:24:14 +01:00
|
|
|
assert(!(pt->pt_pt[pde][pte] & ARCH_VM_PTE_PRESENT));
|
2008-11-19 13:26:10 +01:00
|
|
|
#endif
|
2009-09-23 15:33:01 +02:00
|
|
|
if(writemapflags & (WMF_WRITEFLAGSONLY|WMF_FREE)) {
|
2012-10-31 19:24:14 +01:00
|
|
|
#if defined(__i386__)
|
|
|
|
physaddr = pt->pt_pt[pde][pte] & ARCH_VM_ADDR_MASK;
|
|
|
|
#elif defined(__arm__)
|
|
|
|
physaddr = pt->pt_pt[pde][pte] & ARM_VM_PTE_MASK;
|
|
|
|
#endif
|
2009-09-21 16:49:49 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
if(writemapflags & WMF_FREE) {
|
2010-04-12 13:25:24 +02:00
|
|
|
free_mem(ABS2CLICK(physaddr), 1);
|
2009-09-21 16:49:49 +02:00
|
|
|
}
|
2008-11-19 13:26:10 +01:00
|
|
|
|
2009-09-23 15:33:01 +02:00
|
|
|
/* Entry we will write. */
|
2012-10-31 19:24:14 +01:00
|
|
|
#if defined(__i386__)
|
|
|
|
entry = (physaddr & ARCH_VM_ADDR_MASK) | flags;
|
|
|
|
#elif defined(__arm__)
|
|
|
|
entry = (physaddr & ARM_VM_PTE_MASK) | flags;
|
|
|
|
#endif
|
2009-09-23 15:33:01 +02:00
|
|
|
|
|
|
|
if(verify) {
|
|
|
|
u32_t maskedentry;
|
|
|
|
maskedentry = pt->pt_pt[pde][pte];
|
2012-10-31 19:24:14 +01:00
|
|
|
#if defined(__i386__)
|
2009-09-23 15:33:01 +02:00
|
|
|
maskedentry &= ~(I386_VM_ACC|I386_VM_DIRTY);
|
2012-10-31 19:24:14 +01:00
|
|
|
#endif
|
2009-09-23 15:33:01 +02:00
|
|
|
/* Verify pagetable entry. */
|
2013-02-01 17:48:40 +01:00
|
|
|
#if defined(__i386__)
|
2012-10-31 19:24:14 +01:00
|
|
|
if(entry & ARCH_VM_PTE_RW) {
|
2010-07-19 20:19:16 +02:00
|
|
|
/* If we expect a writable page, allow a readonly page. */
|
2012-10-31 19:24:14 +01:00
|
|
|
maskedentry |= ARCH_VM_PTE_RW;
|
2010-07-19 20:19:16 +02:00
|
|
|
}
|
2013-02-01 17:48:40 +01:00
|
|
|
#elif defined(__arm__)
|
|
|
|
if(!(entry & ARCH_VM_PTE_RO)) {
|
|
|
|
/* If we expect a writable page, allow a readonly page. */
|
|
|
|
maskedentry &= ~ARCH_VM_PTE_RO;
|
|
|
|
}
|
2013-05-16 10:11:12 +02:00
|
|
|
maskedentry &= ~(ARM_VM_PTE_WB|ARM_VM_PTE_WT);
|
2013-02-01 17:48:40 +01:00
|
|
|
#endif
|
2009-09-23 15:33:01 +02:00
|
|
|
if(maskedentry != entry) {
|
2010-04-12 13:25:24 +02:00
|
|
|
printf("pt_writemap: mismatch: ");
|
2012-10-31 19:24:14 +01:00
|
|
|
#if defined(__i386__)
|
|
|
|
if((entry & ARCH_VM_ADDR_MASK) !=
|
|
|
|
(maskedentry & ARCH_VM_ADDR_MASK)) {
|
|
|
|
#elif defined(__arm__)
|
|
|
|
if((entry & ARM_VM_PTE_MASK) !=
|
|
|
|
(maskedentry & ARM_VM_PTE_MASK)) {
|
|
|
|
#endif
|
2011-04-27 15:00:52 +02:00
|
|
|
printf("pt_writemap: physaddr mismatch (0x%lx, 0x%lx); ",
|
|
|
|
(long)entry, (long)maskedentry);
|
2010-04-12 13:25:24 +02:00
|
|
|
} else printf("phys ok; ");
|
|
|
|
printf(" flags: found %s; ",
|
|
|
|
ptestr(pt->pt_pt[pde][pte]));
|
|
|
|
printf(" masked %s; ",
|
|
|
|
ptestr(maskedentry));
|
|
|
|
printf(" expected %s\n", ptestr(entry));
|
2013-02-01 17:48:40 +01:00
|
|
|
printf("found 0x%x, wanted 0x%x\n",
|
|
|
|
pt->pt_pt[pde][pte], entry);
|
2010-09-15 16:11:12 +02:00
|
|
|
ret = EFAULT;
|
|
|
|
goto resume_exit;
|
2009-09-23 15:33:01 +02:00
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/* Write pagetable entry. */
|
|
|
|
pt->pt_pt[pde][pte] = entry;
|
|
|
|
}
|
2008-11-19 13:26:10 +01:00
|
|
|
|
2012-10-31 19:24:14 +01:00
|
|
|
physaddr += VM_PAGE_SIZE;
|
|
|
|
v += VM_PAGE_SIZE;
|
2008-11-19 13:26:10 +01:00
|
|
|
}
|
|
|
|
|
2010-09-15 16:11:12 +02:00
|
|
|
resume_exit:
|
|
|
|
|
2011-02-11 15:56:52 +01:00
|
|
|
#ifdef CONFIG_SMP
|
2011-11-06 22:37:34 +01:00
|
|
|
if (vminhibit_clear) {
|
|
|
|
assert(vmp && vmp->vm_endpoint != NONE && vmp->vm_endpoint != VM_PROC_NR &&
|
|
|
|
!(vmp->vm_flags & VMF_EXITING));
|
2010-09-15 16:11:12 +02:00
|
|
|
sys_vmctl(vmp->vm_endpoint, VMCTL_VMINHIBIT_CLEAR, 0);
|
2011-11-06 22:37:34 +01:00
|
|
|
}
|
2011-02-11 15:56:52 +01:00
|
|
|
#endif
|
2010-09-15 16:11:12 +02:00
|
|
|
|
|
|
|
return ret;
|
2008-11-19 13:26:10 +01:00
|
|
|
}
|
|
|
|
|
2009-09-27 14:36:48 +02:00
|
|
|
/*===========================================================================*
|
|
|
|
* pt_checkrange *
|
|
|
|
*===========================================================================*/
|
2012-03-25 20:25:53 +02:00
|
|
|
int pt_checkrange(pt_t *pt, vir_bytes v, size_t bytes,
|
2009-09-27 14:36:48 +02:00
|
|
|
int write)
|
|
|
|
{
|
2011-06-01 11:30:58 +02:00
|
|
|
int p, pages;
|
2009-09-27 14:36:48 +02:00
|
|
|
|
2012-10-31 19:24:14 +01:00
|
|
|
assert(!(bytes % VM_PAGE_SIZE));
|
2009-09-27 14:36:48 +02:00
|
|
|
|
2012-10-31 19:24:14 +01:00
|
|
|
pages = bytes / VM_PAGE_SIZE;
|
2009-09-27 14:36:48 +02:00
|
|
|
|
|
|
|
for(p = 0; p < pages; p++) {
|
2013-03-06 13:17:07 +01:00
|
|
|
int pde = ARCH_VM_PDE(v);
|
|
|
|
int pte = ARCH_VM_PTE(v);
|
2009-09-27 14:36:48 +02:00
|
|
|
|
2012-10-31 19:24:14 +01:00
|
|
|
assert(!(v % VM_PAGE_SIZE));
|
|
|
|
assert(pte >= 0 && pte < ARCH_VM_PT_ENTRIES);
|
|
|
|
assert(pde >= 0 && pde < ARCH_VM_DIR_ENTRIES);
|
2009-09-27 14:36:48 +02:00
|
|
|
|
|
|
|
/* Page table has to be there. */
|
2012-10-31 19:24:14 +01:00
|
|
|
if(!(pt->pt_dir[pde] & ARCH_VM_PDE_PRESENT))
|
2009-09-27 14:36:48 +02:00
|
|
|
return EFAULT;
|
|
|
|
|
|
|
|
/* Make sure page directory entry for this page table
|
|
|
|
* is marked present and page table entry is available.
|
|
|
|
*/
|
2012-10-31 19:24:14 +01:00
|
|
|
assert((pt->pt_dir[pde] & ARCH_VM_PDE_PRESENT) && pt->pt_pt[pde]);
|
2009-09-27 14:36:48 +02:00
|
|
|
|
2012-10-31 19:24:14 +01:00
|
|
|
if(!(pt->pt_pt[pde][pte] & ARCH_VM_PTE_PRESENT)) {
|
2009-09-27 14:36:48 +02:00
|
|
|
return EFAULT;
|
|
|
|
}
|
|
|
|
|
2012-10-31 19:24:14 +01:00
|
|
|
#if defined(__i386__)
|
|
|
|
if(write && !(pt->pt_pt[pde][pte] & ARCH_VM_PTE_RW)) {
|
|
|
|
#elif defined(__arm__)
|
|
|
|
if(write && (pt->pt_pt[pde][pte] & ARCH_VM_PTE_RO)) {
|
|
|
|
#endif
|
2009-09-27 14:36:48 +02:00
|
|
|
return EFAULT;
|
|
|
|
}
|
|
|
|
|
2012-10-31 19:24:14 +01:00
|
|
|
v += VM_PAGE_SIZE;
|
2009-09-27 14:36:48 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
return OK;
|
|
|
|
}
|
|
|
|
|
2008-11-19 13:26:10 +01:00
|
|
|
/*===========================================================================*
|
|
|
|
* pt_new *
|
|
|
|
*===========================================================================*/
|
2012-03-25 20:25:53 +02:00
|
|
|
int pt_new(pt_t *pt)
|
2008-11-19 13:26:10 +01:00
|
|
|
{
|
2012-10-31 19:24:14 +01:00
|
|
|
/* Allocate a pagetable root. Allocate a page-aligned page directory
|
2008-11-19 13:26:10 +01:00
|
|
|
* and set them to 0 (indicating no page tables are allocated). Lookup
|
|
|
|
* its physical address as we'll need that in the future. Verify it's
|
|
|
|
* page-aligned.
|
|
|
|
*/
|
2012-11-09 16:50:31 +01:00
|
|
|
int i, r;
|
2008-11-19 13:26:10 +01:00
|
|
|
|
2009-09-21 16:49:49 +02:00
|
|
|
/* Don't ever re-allocate/re-move a certain process slot's
|
|
|
|
* page directory once it's been created. This is a fraction
|
|
|
|
* faster, but also avoids having to invalidate the page
|
|
|
|
* mappings from in-kernel page tables pointing to
|
|
|
|
* the page directories (the page_directories data).
|
|
|
|
*/
|
|
|
|
if(!pt->pt_dir &&
|
2012-12-17 19:26:52 +01:00
|
|
|
!(pt->pt_dir = vm_allocpages((phys_bytes *)&pt->pt_dir_phys,
|
|
|
|
VMP_PAGEDIR, ARCH_PAGEDIR_SIZE/VM_PAGE_SIZE))) {
|
2008-11-19 13:26:10 +01:00
|
|
|
return ENOMEM;
|
|
|
|
}
|
2012-12-17 19:26:52 +01:00
|
|
|
|
2012-10-31 19:24:14 +01:00
|
|
|
assert(!((u32_t)pt->pt_dir_phys % ARCH_PAGEDIR_SIZE));
|
2008-11-19 13:26:10 +01:00
|
|
|
|
2012-10-31 19:24:14 +01:00
|
|
|
for(i = 0; i < ARCH_VM_DIR_ENTRIES; i++) {
|
|
|
|
pt->pt_dir[i] = 0; /* invalid entry (PRESENT bit = 0) */
|
2008-11-19 13:26:10 +01:00
|
|
|
pt->pt_pt[i] = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Where to start looking for free virtual address space? */
|
2009-05-12 13:38:29 +02:00
|
|
|
pt->pt_virtop = 0;
|
2008-11-19 13:26:10 +01:00
|
|
|
|
2009-05-18 17:34:42 +02:00
|
|
|
/* Map in kernel. */
|
2012-11-09 16:50:31 +01:00
|
|
|
if((r=pt_mapkernel(pt)) != OK)
|
|
|
|
return r;
|
2009-05-18 17:34:42 +02:00
|
|
|
|
2008-11-19 13:26:10 +01:00
|
|
|
return OK;
|
|
|
|
}
|
|
|
|
|
No more intel/minix segments.
This commit removes all traces of Minix segments (the text/data/stack
memory map abstraction in the kernel) and significance of Intel segments
(hardware segments like CS, DS that add offsets to all addressing before
page table translation). This ultimately simplifies the memory layout
and addressing and makes the same layout possible on non-Intel
architectures.
There are only two types of addresses in the world now: virtual
and physical; even the kernel and processes have the same virtual
address space. Kernel and user processes can be distinguished at a
glance as processes won't use 0xF0000000 and above.
No static pre-allocated memory sizes exist any more.
Changes to booting:
. The pre_init.c leaves the kernel and modules exactly as
they were left by the bootloader in physical memory
. The kernel starts running using physical addressing,
loaded at a fixed location given in its linker script by the
bootloader. All code and data in this phase are linked to
this fixed low location.
. It makes a bootstrap pagetable to map itself to a
fixed high location (also in linker script) and jumps to
the high address. All code and data then use this high addressing.
. All code/data symbols linked at the low addresses is prefixed by
an objcopy step with __k_unpaged_*, so that that code cannot
reference highly-linked symbols (which aren't valid yet) or vice
versa (symbols that aren't valid any more).
. The two addressing modes are separated in the linker script by
collecting the unpaged_*.o objects and linking them with low
addresses, and linking the rest high. Some objects are linked
twice, once low and once high.
. The bootstrap phase passes a lot of information (e.g. free memory
list, physical location of the modules, etc.) using the kinfo
struct.
. After this bootstrap the low-linked part is freed.
. The kernel maps in VM into the bootstrap page table so that VM can
begin executing. Its first job is to make page tables for all other
boot processes. So VM runs before RS, and RS gets a fully dynamic,
VM-managed address space. VM gets its privilege info from RS as usual
but that happens after RS starts running.
. Both the kernel loading VM and VM organizing boot processes happen
using the libexec logic. This removes the last reason for VM to
still know much about exec() and vm/exec.c is gone.
Further Implementation:
. All segments are based at 0 and have a 4 GB limit.
. The kernel is mapped in at the top of the virtual address
space so as not to constrain the user processes.
. Processes do not use segments from the LDT at all; there are
no segments in the LDT any more, so no LLDT is needed.
. The Minix segments T/D/S are gone and so none of the
user-space or in-kernel copy functions use them. The copy
functions use a process endpoint of NONE to realize it's
a physical address, virtual otherwise.
. The umap call only makes sense to translate a virtual address
to a physical address now.
. Segments-related calls like newmap and alloc_segments are gone.
. All segments-related translation in VM is gone (vir2map etc).
. Initialization in VM is simpler as no moving around is necessary.
. VM and all other boot processes can be linked wherever they wish
and will be mapped in at the right location by the kernel and VM
respectively.
Other changes:
. The multiboot code is less special: it does not use mb_print
for its diagnostics any more but uses printf() as normal, saving
the output into the diagnostics buffer, only printing to the
screen using the direct print functions if a panic() occurs.
. The multiboot code uses the flexible 'free memory map list'
style to receive the list of free memory if available.
. The kernel determines the memory layout of the processes to
a degree: it tells VM where the kernel starts and ends and
where the kernel wants the top of the process to be. VM then
uses this entire range, i.e. the stack is right at the top,
and mmap()ped bits of memory are placed below that downwards,
and the break grows upwards.
Other Consequences:
. Every process gets its own page table as address spaces
can't be separated any more by segments.
. As all segments are 0-based, there is no distinction between
virtual and linear addresses, nor between userspace and
kernel addresses.
. Less work is done when context switching, leading to a net
performance increase. (8% faster on my machine for 'make servers'.)
. The layout and configuration of the GDT makes sysenter and syscall
possible.
2012-05-07 16:03:35 +02:00
|
|
|
static int freepde(void)
|
|
|
|
{
|
|
|
|
int p = kernel_boot_info.freepde_start++;
|
2012-10-31 19:24:14 +01:00
|
|
|
assert(kernel_boot_info.freepde_start < ARCH_VM_DIR_ENTRIES);
|
No more intel/minix segments.
This commit removes all traces of Minix segments (the text/data/stack
memory map abstraction in the kernel) and significance of Intel segments
(hardware segments like CS, DS that add offsets to all addressing before
page table translation). This ultimately simplifies the memory layout
and addressing and makes the same layout possible on non-Intel
architectures.
There are only two types of addresses in the world now: virtual
and physical; even the kernel and processes have the same virtual
address space. Kernel and user processes can be distinguished at a
glance as processes won't use 0xF0000000 and above.
No static pre-allocated memory sizes exist any more.
Changes to booting:
. The pre_init.c leaves the kernel and modules exactly as
they were left by the bootloader in physical memory
. The kernel starts running using physical addressing,
loaded at a fixed location given in its linker script by the
bootloader. All code and data in this phase are linked to
this fixed low location.
. It makes a bootstrap pagetable to map itself to a
fixed high location (also in linker script) and jumps to
the high address. All code and data then use this high addressing.
. All code/data symbols linked at the low addresses is prefixed by
an objcopy step with __k_unpaged_*, so that that code cannot
reference highly-linked symbols (which aren't valid yet) or vice
versa (symbols that aren't valid any more).
. The two addressing modes are separated in the linker script by
collecting the unpaged_*.o objects and linking them with low
addresses, and linking the rest high. Some objects are linked
twice, once low and once high.
. The bootstrap phase passes a lot of information (e.g. free memory
list, physical location of the modules, etc.) using the kinfo
struct.
. After this bootstrap the low-linked part is freed.
. The kernel maps in VM into the bootstrap page table so that VM can
begin executing. Its first job is to make page tables for all other
boot processes. So VM runs before RS, and RS gets a fully dynamic,
VM-managed address space. VM gets its privilege info from RS as usual
but that happens after RS starts running.
. Both the kernel loading VM and VM organizing boot processes happen
using the libexec logic. This removes the last reason for VM to
still know much about exec() and vm/exec.c is gone.
Further Implementation:
. All segments are based at 0 and have a 4 GB limit.
. The kernel is mapped in at the top of the virtual address
space so as not to constrain the user processes.
. Processes do not use segments from the LDT at all; there are
no segments in the LDT any more, so no LLDT is needed.
. The Minix segments T/D/S are gone and so none of the
user-space or in-kernel copy functions use them. The copy
functions use a process endpoint of NONE to realize it's
a physical address, virtual otherwise.
. The umap call only makes sense to translate a virtual address
to a physical address now.
. Segments-related calls like newmap and alloc_segments are gone.
. All segments-related translation in VM is gone (vir2map etc).
. Initialization in VM is simpler as no moving around is necessary.
. VM and all other boot processes can be linked wherever they wish
and will be mapped in at the right location by the kernel and VM
respectively.
Other changes:
. The multiboot code is less special: it does not use mb_print
for its diagnostics any more but uses printf() as normal, saving
the output into the diagnostics buffer, only printing to the
screen using the direct print functions if a panic() occurs.
. The multiboot code uses the flexible 'free memory map list'
style to receive the list of free memory if available.
. The kernel determines the memory layout of the processes to
a degree: it tells VM where the kernel starts and ends and
where the kernel wants the top of the process to be. VM then
uses this entire range, i.e. the stack is right at the top,
and mmap()ped bits of memory are placed below that downwards,
and the break grows upwards.
Other Consequences:
. Every process gets its own page table as address spaces
can't be separated any more by segments.
. As all segments are 0-based, there is no distinction between
virtual and linear addresses, nor between userspace and
kernel addresses.
. Less work is done when context switching, leading to a net
performance increase. (8% faster on my machine for 'make servers'.)
. The layout and configuration of the GDT makes sysenter and syscall
possible.
2012-05-07 16:03:35 +02:00
|
|
|
return p;
|
|
|
|
}
|
|
|
|
|
2008-11-19 13:26:10 +01:00
|
|
|
/*===========================================================================*
|
2009-05-11 21:11:37 +02:00
|
|
|
* pt_init *
|
2008-11-19 13:26:10 +01:00
|
|
|
*===========================================================================*/
|
No more intel/minix segments.
This commit removes all traces of Minix segments (the text/data/stack
memory map abstraction in the kernel) and significance of Intel segments
(hardware segments like CS, DS that add offsets to all addressing before
page table translation). This ultimately simplifies the memory layout
and addressing and makes the same layout possible on non-Intel
architectures.
There are only two types of addresses in the world now: virtual
and physical; even the kernel and processes have the same virtual
address space. Kernel and user processes can be distinguished at a
glance as processes won't use 0xF0000000 and above.
No static pre-allocated memory sizes exist any more.
Changes to booting:
. The pre_init.c leaves the kernel and modules exactly as
they were left by the bootloader in physical memory
. The kernel starts running using physical addressing,
loaded at a fixed location given in its linker script by the
bootloader. All code and data in this phase are linked to
this fixed low location.
. It makes a bootstrap pagetable to map itself to a
fixed high location (also in linker script) and jumps to
the high address. All code and data then use this high addressing.
. All code/data symbols linked at the low addresses is prefixed by
an objcopy step with __k_unpaged_*, so that that code cannot
reference highly-linked symbols (which aren't valid yet) or vice
versa (symbols that aren't valid any more).
. The two addressing modes are separated in the linker script by
collecting the unpaged_*.o objects and linking them with low
addresses, and linking the rest high. Some objects are linked
twice, once low and once high.
. The bootstrap phase passes a lot of information (e.g. free memory
list, physical location of the modules, etc.) using the kinfo
struct.
. After this bootstrap the low-linked part is freed.
. The kernel maps in VM into the bootstrap page table so that VM can
begin executing. Its first job is to make page tables for all other
boot processes. So VM runs before RS, and RS gets a fully dynamic,
VM-managed address space. VM gets its privilege info from RS as usual
but that happens after RS starts running.
. Both the kernel loading VM and VM organizing boot processes happen
using the libexec logic. This removes the last reason for VM to
still know much about exec() and vm/exec.c is gone.
Further Implementation:
. All segments are based at 0 and have a 4 GB limit.
. The kernel is mapped in at the top of the virtual address
space so as not to constrain the user processes.
. Processes do not use segments from the LDT at all; there are
no segments in the LDT any more, so no LLDT is needed.
. The Minix segments T/D/S are gone and so none of the
user-space or in-kernel copy functions use them. The copy
functions use a process endpoint of NONE to realize it's
a physical address, virtual otherwise.
. The umap call only makes sense to translate a virtual address
to a physical address now.
. Segments-related calls like newmap and alloc_segments are gone.
. All segments-related translation in VM is gone (vir2map etc).
. Initialization in VM is simpler as no moving around is necessary.
. VM and all other boot processes can be linked wherever they wish
and will be mapped in at the right location by the kernel and VM
respectively.
Other changes:
. The multiboot code is less special: it does not use mb_print
for its diagnostics any more but uses printf() as normal, saving
the output into the diagnostics buffer, only printing to the
screen using the direct print functions if a panic() occurs.
. The multiboot code uses the flexible 'free memory map list'
style to receive the list of free memory if available.
. The kernel determines the memory layout of the processes to
a degree: it tells VM where the kernel starts and ends and
where the kernel wants the top of the process to be. VM then
uses this entire range, i.e. the stack is right at the top,
and mmap()ped bits of memory are placed below that downwards,
and the break grows upwards.
Other Consequences:
. Every process gets its own page table as address spaces
can't be separated any more by segments.
. As all segments are 0-based, there is no distinction between
virtual and linear addresses, nor between userspace and
kernel addresses.
. Less work is done when context switching, leading to a net
performance increase. (8% faster on my machine for 'make servers'.)
. The layout and configuration of the GDT makes sysenter and syscall
possible.
2012-05-07 16:03:35 +02:00
|
|
|
void pt_init(void)
|
2008-11-19 13:26:10 +01:00
|
|
|
{
|
2009-05-11 21:11:37 +02:00
|
|
|
pt_t *newpt;
|
No more intel/minix segments.
This commit removes all traces of Minix segments (the text/data/stack
memory map abstraction in the kernel) and significance of Intel segments
(hardware segments like CS, DS that add offsets to all addressing before
page table translation). This ultimately simplifies the memory layout
and addressing and makes the same layout possible on non-Intel
architectures.
There are only two types of addresses in the world now: virtual
and physical; even the kernel and processes have the same virtual
address space. Kernel and user processes can be distinguished at a
glance as processes won't use 0xF0000000 and above.
No static pre-allocated memory sizes exist any more.
Changes to booting:
. The pre_init.c leaves the kernel and modules exactly as
they were left by the bootloader in physical memory
. The kernel starts running using physical addressing,
loaded at a fixed location given in its linker script by the
bootloader. All code and data in this phase are linked to
this fixed low location.
. It makes a bootstrap pagetable to map itself to a
fixed high location (also in linker script) and jumps to
the high address. All code and data then use this high addressing.
. All code/data symbols linked at the low addresses is prefixed by
an objcopy step with __k_unpaged_*, so that that code cannot
reference highly-linked symbols (which aren't valid yet) or vice
versa (symbols that aren't valid any more).
. The two addressing modes are separated in the linker script by
collecting the unpaged_*.o objects and linking them with low
addresses, and linking the rest high. Some objects are linked
twice, once low and once high.
. The bootstrap phase passes a lot of information (e.g. free memory
list, physical location of the modules, etc.) using the kinfo
struct.
. After this bootstrap the low-linked part is freed.
. The kernel maps in VM into the bootstrap page table so that VM can
begin executing. Its first job is to make page tables for all other
boot processes. So VM runs before RS, and RS gets a fully dynamic,
VM-managed address space. VM gets its privilege info from RS as usual
but that happens after RS starts running.
. Both the kernel loading VM and VM organizing boot processes happen
using the libexec logic. This removes the last reason for VM to
still know much about exec() and vm/exec.c is gone.
Further Implementation:
. All segments are based at 0 and have a 4 GB limit.
. The kernel is mapped in at the top of the virtual address
space so as not to constrain the user processes.
. Processes do not use segments from the LDT at all; there are
no segments in the LDT any more, so no LLDT is needed.
. The Minix segments T/D/S are gone and so none of the
user-space or in-kernel copy functions use them. The copy
functions use a process endpoint of NONE to realize it's
a physical address, virtual otherwise.
. The umap call only makes sense to translate a virtual address
to a physical address now.
. Segments-related calls like newmap and alloc_segments are gone.
. All segments-related translation in VM is gone (vir2map etc).
. Initialization in VM is simpler as no moving around is necessary.
. VM and all other boot processes can be linked wherever they wish
and will be mapped in at the right location by the kernel and VM
respectively.
Other changes:
. The multiboot code is less special: it does not use mb_print
for its diagnostics any more but uses printf() as normal, saving
the output into the diagnostics buffer, only printing to the
screen using the direct print functions if a panic() occurs.
. The multiboot code uses the flexible 'free memory map list'
style to receive the list of free memory if available.
. The kernel determines the memory layout of the processes to
a degree: it tells VM where the kernel starts and ends and
where the kernel wants the top of the process to be. VM then
uses this entire range, i.e. the stack is right at the top,
and mmap()ped bits of memory are placed below that downwards,
and the break grows upwards.
Other Consequences:
. Every process gets its own page table as address spaces
can't be separated any more by segments.
. As all segments are 0-based, there is no distinction between
virtual and linear addresses, nor between userspace and
kernel addresses.
. Less work is done when context switching, leading to a net
performance increase. (8% faster on my machine for 'make servers'.)
. The layout and configuration of the GDT makes sysenter and syscall
possible.
2012-05-07 16:03:35 +02:00
|
|
|
int s, r, p;
|
2009-09-23 15:33:01 +02:00
|
|
|
vir_bytes sparepages_mem;
|
2012-10-31 19:24:14 +01:00
|
|
|
#if defined(__arm__)
|
|
|
|
vir_bytes sparepagedirs_mem;
|
|
|
|
#endif
|
|
|
|
static u32_t currentpagedir[ARCH_VM_DIR_ENTRIES];
|
No more intel/minix segments.
This commit removes all traces of Minix segments (the text/data/stack
memory map abstraction in the kernel) and significance of Intel segments
(hardware segments like CS, DS that add offsets to all addressing before
page table translation). This ultimately simplifies the memory layout
and addressing and makes the same layout possible on non-Intel
architectures.
There are only two types of addresses in the world now: virtual
and physical; even the kernel and processes have the same virtual
address space. Kernel and user processes can be distinguished at a
glance as processes won't use 0xF0000000 and above.
No static pre-allocated memory sizes exist any more.
Changes to booting:
. The pre_init.c leaves the kernel and modules exactly as
they were left by the bootloader in physical memory
. The kernel starts running using physical addressing,
loaded at a fixed location given in its linker script by the
bootloader. All code and data in this phase are linked to
this fixed low location.
. It makes a bootstrap pagetable to map itself to a
fixed high location (also in linker script) and jumps to
the high address. All code and data then use this high addressing.
. All code/data symbols linked at the low addresses is prefixed by
an objcopy step with __k_unpaged_*, so that that code cannot
reference highly-linked symbols (which aren't valid yet) or vice
versa (symbols that aren't valid any more).
. The two addressing modes are separated in the linker script by
collecting the unpaged_*.o objects and linking them with low
addresses, and linking the rest high. Some objects are linked
twice, once low and once high.
. The bootstrap phase passes a lot of information (e.g. free memory
list, physical location of the modules, etc.) using the kinfo
struct.
. After this bootstrap the low-linked part is freed.
. The kernel maps in VM into the bootstrap page table so that VM can
begin executing. Its first job is to make page tables for all other
boot processes. So VM runs before RS, and RS gets a fully dynamic,
VM-managed address space. VM gets its privilege info from RS as usual
but that happens after RS starts running.
. Both the kernel loading VM and VM organizing boot processes happen
using the libexec logic. This removes the last reason for VM to
still know much about exec() and vm/exec.c is gone.
Further Implementation:
. All segments are based at 0 and have a 4 GB limit.
. The kernel is mapped in at the top of the virtual address
space so as not to constrain the user processes.
. Processes do not use segments from the LDT at all; there are
no segments in the LDT any more, so no LLDT is needed.
. The Minix segments T/D/S are gone and so none of the
user-space or in-kernel copy functions use them. The copy
functions use a process endpoint of NONE to realize it's
a physical address, virtual otherwise.
. The umap call only makes sense to translate a virtual address
to a physical address now.
. Segments-related calls like newmap and alloc_segments are gone.
. All segments-related translation in VM is gone (vir2map etc).
. Initialization in VM is simpler as no moving around is necessary.
. VM and all other boot processes can be linked wherever they wish
and will be mapped in at the right location by the kernel and VM
respectively.
Other changes:
. The multiboot code is less special: it does not use mb_print
for its diagnostics any more but uses printf() as normal, saving
the output into the diagnostics buffer, only printing to the
screen using the direct print functions if a panic() occurs.
. The multiboot code uses the flexible 'free memory map list'
style to receive the list of free memory if available.
. The kernel determines the memory layout of the processes to
a degree: it tells VM where the kernel starts and ends and
where the kernel wants the top of the process to be. VM then
uses this entire range, i.e. the stack is right at the top,
and mmap()ped bits of memory are placed below that downwards,
and the break grows upwards.
Other Consequences:
. Every process gets its own page table as address spaces
can't be separated any more by segments.
. As all segments are 0-based, there is no distinction between
virtual and linear addresses, nor between userspace and
kernel addresses.
. Less work is done when context switching, leading to a net
performance increase. (8% faster on my machine for 'make servers'.)
. The layout and configuration of the GDT makes sysenter and syscall
possible.
2012-05-07 16:03:35 +02:00
|
|
|
int m = kernel_boot_info.kern_mod;
|
2012-10-31 19:24:14 +01:00
|
|
|
#if defined(__i386__)
|
2013-02-10 19:37:12 +01:00
|
|
|
int global_bit_ok = 0;
|
2012-08-11 17:45:35 +02:00
|
|
|
u32_t mypdbr; /* Page Directory Base Register (cr3) value */
|
2012-10-31 19:24:14 +01:00
|
|
|
#elif defined(__arm__)
|
|
|
|
u32_t myttbr;
|
|
|
|
#endif
|
No more intel/minix segments.
This commit removes all traces of Minix segments (the text/data/stack
memory map abstraction in the kernel) and significance of Intel segments
(hardware segments like CS, DS that add offsets to all addressing before
page table translation). This ultimately simplifies the memory layout
and addressing and makes the same layout possible on non-Intel
architectures.
There are only two types of addresses in the world now: virtual
and physical; even the kernel and processes have the same virtual
address space. Kernel and user processes can be distinguished at a
glance as processes won't use 0xF0000000 and above.
No static pre-allocated memory sizes exist any more.
Changes to booting:
. The pre_init.c leaves the kernel and modules exactly as
they were left by the bootloader in physical memory
. The kernel starts running using physical addressing,
loaded at a fixed location given in its linker script by the
bootloader. All code and data in this phase are linked to
this fixed low location.
. It makes a bootstrap pagetable to map itself to a
fixed high location (also in linker script) and jumps to
the high address. All code and data then use this high addressing.
. All code/data symbols linked at the low addresses is prefixed by
an objcopy step with __k_unpaged_*, so that that code cannot
reference highly-linked symbols (which aren't valid yet) or vice
versa (symbols that aren't valid any more).
. The two addressing modes are separated in the linker script by
collecting the unpaged_*.o objects and linking them with low
addresses, and linking the rest high. Some objects are linked
twice, once low and once high.
. The bootstrap phase passes a lot of information (e.g. free memory
list, physical location of the modules, etc.) using the kinfo
struct.
. After this bootstrap the low-linked part is freed.
. The kernel maps in VM into the bootstrap page table so that VM can
begin executing. Its first job is to make page tables for all other
boot processes. So VM runs before RS, and RS gets a fully dynamic,
VM-managed address space. VM gets its privilege info from RS as usual
but that happens after RS starts running.
. Both the kernel loading VM and VM organizing boot processes happen
using the libexec logic. This removes the last reason for VM to
still know much about exec() and vm/exec.c is gone.
Further Implementation:
. All segments are based at 0 and have a 4 GB limit.
. The kernel is mapped in at the top of the virtual address
space so as not to constrain the user processes.
. Processes do not use segments from the LDT at all; there are
no segments in the LDT any more, so no LLDT is needed.
. The Minix segments T/D/S are gone and so none of the
user-space or in-kernel copy functions use them. The copy
functions use a process endpoint of NONE to realize it's
a physical address, virtual otherwise.
. The umap call only makes sense to translate a virtual address
to a physical address now.
. Segments-related calls like newmap and alloc_segments are gone.
. All segments-related translation in VM is gone (vir2map etc).
. Initialization in VM is simpler as no moving around is necessary.
. VM and all other boot processes can be linked wherever they wish
and will be mapped in at the right location by the kernel and VM
respectively.
Other changes:
. The multiboot code is less special: it does not use mb_print
for its diagnostics any more but uses printf() as normal, saving
the output into the diagnostics buffer, only printing to the
screen using the direct print functions if a panic() occurs.
. The multiboot code uses the flexible 'free memory map list'
style to receive the list of free memory if available.
. The kernel determines the memory layout of the processes to
a degree: it tells VM where the kernel starts and ends and
where the kernel wants the top of the process to be. VM then
uses this entire range, i.e. the stack is right at the top,
and mmap()ped bits of memory are placed below that downwards,
and the break grows upwards.
Other Consequences:
. Every process gets its own page table as address spaces
can't be separated any more by segments.
. As all segments are 0-based, there is no distinction between
virtual and linear addresses, nor between userspace and
kernel addresses.
. Less work is done when context switching, leading to a net
performance increase. (8% faster on my machine for 'make servers'.)
. The layout and configuration of the GDT makes sysenter and syscall
possible.
2012-05-07 16:03:35 +02:00
|
|
|
|
|
|
|
/* Find what the physical location of the kernel is. */
|
|
|
|
assert(m >= 0);
|
|
|
|
assert(m < kernel_boot_info.mods_with_kernel);
|
|
|
|
assert(kernel_boot_info.mods_with_kernel < MULTIBOOT_MAX_MODS);
|
|
|
|
kern_mb_mod = &kernel_boot_info.module_list[m];
|
|
|
|
kern_size = kern_mb_mod->mod_end - kern_mb_mod->mod_start;
|
2012-10-31 19:24:14 +01:00
|
|
|
assert(!(kern_mb_mod->mod_start % ARCH_BIG_PAGE_SIZE));
|
|
|
|
assert(!(kernel_boot_info.vir_kern_start % ARCH_BIG_PAGE_SIZE));
|
|
|
|
kern_start_pde = kernel_boot_info.vir_kern_start / ARCH_BIG_PAGE_SIZE;
|
2009-12-07 13:10:44 +01:00
|
|
|
|
2009-09-23 15:33:01 +02:00
|
|
|
/* Get ourselves spare pages. */
|
No more intel/minix segments.
This commit removes all traces of Minix segments (the text/data/stack
memory map abstraction in the kernel) and significance of Intel segments
(hardware segments like CS, DS that add offsets to all addressing before
page table translation). This ultimately simplifies the memory layout
and addressing and makes the same layout possible on non-Intel
architectures.
There are only two types of addresses in the world now: virtual
and physical; even the kernel and processes have the same virtual
address space. Kernel and user processes can be distinguished at a
glance as processes won't use 0xF0000000 and above.
No static pre-allocated memory sizes exist any more.
Changes to booting:
. The pre_init.c leaves the kernel and modules exactly as
they were left by the bootloader in physical memory
. The kernel starts running using physical addressing,
loaded at a fixed location given in its linker script by the
bootloader. All code and data in this phase are linked to
this fixed low location.
. It makes a bootstrap pagetable to map itself to a
fixed high location (also in linker script) and jumps to
the high address. All code and data then use this high addressing.
. All code/data symbols linked at the low addresses is prefixed by
an objcopy step with __k_unpaged_*, so that that code cannot
reference highly-linked symbols (which aren't valid yet) or vice
versa (symbols that aren't valid any more).
. The two addressing modes are separated in the linker script by
collecting the unpaged_*.o objects and linking them with low
addresses, and linking the rest high. Some objects are linked
twice, once low and once high.
. The bootstrap phase passes a lot of information (e.g. free memory
list, physical location of the modules, etc.) using the kinfo
struct.
. After this bootstrap the low-linked part is freed.
. The kernel maps in VM into the bootstrap page table so that VM can
begin executing. Its first job is to make page tables for all other
boot processes. So VM runs before RS, and RS gets a fully dynamic,
VM-managed address space. VM gets its privilege info from RS as usual
but that happens after RS starts running.
. Both the kernel loading VM and VM organizing boot processes happen
using the libexec logic. This removes the last reason for VM to
still know much about exec() and vm/exec.c is gone.
Further Implementation:
. All segments are based at 0 and have a 4 GB limit.
. The kernel is mapped in at the top of the virtual address
space so as not to constrain the user processes.
. Processes do not use segments from the LDT at all; there are
no segments in the LDT any more, so no LLDT is needed.
. The Minix segments T/D/S are gone and so none of the
user-space or in-kernel copy functions use them. The copy
functions use a process endpoint of NONE to realize it's
a physical address, virtual otherwise.
. The umap call only makes sense to translate a virtual address
to a physical address now.
. Segments-related calls like newmap and alloc_segments are gone.
. All segments-related translation in VM is gone (vir2map etc).
. Initialization in VM is simpler as no moving around is necessary.
. VM and all other boot processes can be linked wherever they wish
and will be mapped in at the right location by the kernel and VM
respectively.
Other changes:
. The multiboot code is less special: it does not use mb_print
for its diagnostics any more but uses printf() as normal, saving
the output into the diagnostics buffer, only printing to the
screen using the direct print functions if a panic() occurs.
. The multiboot code uses the flexible 'free memory map list'
style to receive the list of free memory if available.
. The kernel determines the memory layout of the processes to
a degree: it tells VM where the kernel starts and ends and
where the kernel wants the top of the process to be. VM then
uses this entire range, i.e. the stack is right at the top,
and mmap()ped bits of memory are placed below that downwards,
and the break grows upwards.
Other Consequences:
. Every process gets its own page table as address spaces
can't be separated any more by segments.
. As all segments are 0-based, there is no distinction between
virtual and linear addresses, nor between userspace and
kernel addresses.
. Less work is done when context switching, leading to a net
performance increase. (8% faster on my machine for 'make servers'.)
. The layout and configuration of the GDT makes sysenter and syscall
possible.
2012-05-07 16:03:35 +02:00
|
|
|
sparepages_mem = (vir_bytes) static_sparepages;
|
2012-10-31 19:24:14 +01:00
|
|
|
assert(!(sparepages_mem % VM_PAGE_SIZE));
|
|
|
|
|
|
|
|
#if defined(__arm__)
|
|
|
|
/* Get ourselves spare pagedirs. */
|
|
|
|
sparepagedirs_mem = (vir_bytes) static_sparepagedirs;
|
|
|
|
assert(!(sparepagedirs_mem % ARCH_PAGEDIR_SIZE));
|
|
|
|
#endif
|
2012-11-09 19:00:46 +01:00
|
|
|
|
No more intel/minix segments.
This commit removes all traces of Minix segments (the text/data/stack
memory map abstraction in the kernel) and significance of Intel segments
(hardware segments like CS, DS that add offsets to all addressing before
page table translation). This ultimately simplifies the memory layout
and addressing and makes the same layout possible on non-Intel
architectures.
There are only two types of addresses in the world now: virtual
and physical; even the kernel and processes have the same virtual
address space. Kernel and user processes can be distinguished at a
glance as processes won't use 0xF0000000 and above.
No static pre-allocated memory sizes exist any more.
Changes to booting:
. The pre_init.c leaves the kernel and modules exactly as
they were left by the bootloader in physical memory
. The kernel starts running using physical addressing,
loaded at a fixed location given in its linker script by the
bootloader. All code and data in this phase are linked to
this fixed low location.
. It makes a bootstrap pagetable to map itself to a
fixed high location (also in linker script) and jumps to
the high address. All code and data then use this high addressing.
. All code/data symbols linked at the low addresses is prefixed by
an objcopy step with __k_unpaged_*, so that that code cannot
reference highly-linked symbols (which aren't valid yet) or vice
versa (symbols that aren't valid any more).
. The two addressing modes are separated in the linker script by
collecting the unpaged_*.o objects and linking them with low
addresses, and linking the rest high. Some objects are linked
twice, once low and once high.
. The bootstrap phase passes a lot of information (e.g. free memory
list, physical location of the modules, etc.) using the kinfo
struct.
. After this bootstrap the low-linked part is freed.
. The kernel maps in VM into the bootstrap page table so that VM can
begin executing. Its first job is to make page tables for all other
boot processes. So VM runs before RS, and RS gets a fully dynamic,
VM-managed address space. VM gets its privilege info from RS as usual
but that happens after RS starts running.
. Both the kernel loading VM and VM organizing boot processes happen
using the libexec logic. This removes the last reason for VM to
still know much about exec() and vm/exec.c is gone.
Further Implementation:
. All segments are based at 0 and have a 4 GB limit.
. The kernel is mapped in at the top of the virtual address
space so as not to constrain the user processes.
. Processes do not use segments from the LDT at all; there are
no segments in the LDT any more, so no LLDT is needed.
. The Minix segments T/D/S are gone and so none of the
user-space or in-kernel copy functions use them. The copy
functions use a process endpoint of NONE to realize it's
a physical address, virtual otherwise.
. The umap call only makes sense to translate a virtual address
to a physical address now.
. Segments-related calls like newmap and alloc_segments are gone.
. All segments-related translation in VM is gone (vir2map etc).
. Initialization in VM is simpler as no moving around is necessary.
. VM and all other boot processes can be linked wherever they wish
and will be mapped in at the right location by the kernel and VM
respectively.
Other changes:
. The multiboot code is less special: it does not use mb_print
for its diagnostics any more but uses printf() as normal, saving
the output into the diagnostics buffer, only printing to the
screen using the direct print functions if a panic() occurs.
. The multiboot code uses the flexible 'free memory map list'
style to receive the list of free memory if available.
. The kernel determines the memory layout of the processes to
a degree: it tells VM where the kernel starts and ends and
where the kernel wants the top of the process to be. VM then
uses this entire range, i.e. the stack is right at the top,
and mmap()ped bits of memory are placed below that downwards,
and the break grows upwards.
Other Consequences:
. Every process gets its own page table as address spaces
can't be separated any more by segments.
. As all segments are 0-based, there is no distinction between
virtual and linear addresses, nor between userspace and
kernel addresses.
. Less work is done when context switching, leading to a net
performance increase. (8% faster on my machine for 'make servers'.)
. The layout and configuration of the GDT makes sysenter and syscall
possible.
2012-05-07 16:03:35 +02:00
|
|
|
/* Spare pages are used to allocate memory before VM has its own page
|
|
|
|
* table that things (i.e. arbitrary physical memory) can be mapped into.
|
|
|
|
* We get it by pre-allocating it in our bss (allocated and mapped in by
|
|
|
|
* the kernel) in static_sparepages. We also need the physical addresses
|
|
|
|
* though; we look them up now so they are ready for use.
|
|
|
|
*/
|
2012-10-31 19:24:14 +01:00
|
|
|
#if defined(__arm__)
|
|
|
|
missing_sparedirs = 0;
|
2013-02-10 19:37:12 +01:00
|
|
|
assert(STATIC_SPAREPAGEDIRS <= SPAREPAGEDIRS);
|
2012-10-31 19:24:14 +01:00
|
|
|
for(s = 0; s < SPAREPAGEDIRS; s++) {
|
|
|
|
vir_bytes v = (sparepagedirs_mem + s*ARCH_PAGEDIR_SIZE);;
|
|
|
|
phys_bytes ph;
|
|
|
|
if((r=sys_umap(SELF, VM_D, (vir_bytes) v,
|
|
|
|
ARCH_PAGEDIR_SIZE, &ph)) != OK)
|
|
|
|
panic("pt_init: sys_umap failed: %d", r);
|
|
|
|
if(s >= STATIC_SPAREPAGEDIRS) {
|
|
|
|
sparepagedirs[s].pagedir = NULL;
|
|
|
|
missing_sparedirs++;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
sparepagedirs[s].pagedir = (void *) v;
|
|
|
|
sparepagedirs[s].phys = ph;
|
|
|
|
}
|
|
|
|
#endif
|
2009-09-23 15:33:01 +02:00
|
|
|
|
2012-12-29 01:16:33 +01:00
|
|
|
if(!(spare_pagequeue = reservedqueue_new(SPAREPAGES, 1, 1, 0)))
|
|
|
|
panic("reservedqueue_new for single pages failed");
|
|
|
|
|
2010-07-20 23:59:27 +02:00
|
|
|
assert(STATIC_SPAREPAGES < SPAREPAGES);
|
2012-12-29 01:16:33 +01:00
|
|
|
for(s = 0; s < STATIC_SPAREPAGES; s++) {
|
|
|
|
void *v = (void *) (sparepages_mem + s*VM_PAGE_SIZE);
|
No more intel/minix segments.
This commit removes all traces of Minix segments (the text/data/stack
memory map abstraction in the kernel) and significance of Intel segments
(hardware segments like CS, DS that add offsets to all addressing before
page table translation). This ultimately simplifies the memory layout
and addressing and makes the same layout possible on non-Intel
architectures.
There are only two types of addresses in the world now: virtual
and physical; even the kernel and processes have the same virtual
address space. Kernel and user processes can be distinguished at a
glance as processes won't use 0xF0000000 and above.
No static pre-allocated memory sizes exist any more.
Changes to booting:
. The pre_init.c leaves the kernel and modules exactly as
they were left by the bootloader in physical memory
. The kernel starts running using physical addressing,
loaded at a fixed location given in its linker script by the
bootloader. All code and data in this phase are linked to
this fixed low location.
. It makes a bootstrap pagetable to map itself to a
fixed high location (also in linker script) and jumps to
the high address. All code and data then use this high addressing.
. All code/data symbols linked at the low addresses is prefixed by
an objcopy step with __k_unpaged_*, so that that code cannot
reference highly-linked symbols (which aren't valid yet) or vice
versa (symbols that aren't valid any more).
. The two addressing modes are separated in the linker script by
collecting the unpaged_*.o objects and linking them with low
addresses, and linking the rest high. Some objects are linked
twice, once low and once high.
. The bootstrap phase passes a lot of information (e.g. free memory
list, physical location of the modules, etc.) using the kinfo
struct.
. After this bootstrap the low-linked part is freed.
. The kernel maps in VM into the bootstrap page table so that VM can
begin executing. Its first job is to make page tables for all other
boot processes. So VM runs before RS, and RS gets a fully dynamic,
VM-managed address space. VM gets its privilege info from RS as usual
but that happens after RS starts running.
. Both the kernel loading VM and VM organizing boot processes happen
using the libexec logic. This removes the last reason for VM to
still know much about exec() and vm/exec.c is gone.
Further Implementation:
. All segments are based at 0 and have a 4 GB limit.
. The kernel is mapped in at the top of the virtual address
space so as not to constrain the user processes.
. Processes do not use segments from the LDT at all; there are
no segments in the LDT any more, so no LLDT is needed.
. The Minix segments T/D/S are gone and so none of the
user-space or in-kernel copy functions use them. The copy
functions use a process endpoint of NONE to realize it's
a physical address, virtual otherwise.
. The umap call only makes sense to translate a virtual address
to a physical address now.
. Segments-related calls like newmap and alloc_segments are gone.
. All segments-related translation in VM is gone (vir2map etc).
. Initialization in VM is simpler as no moving around is necessary.
. VM and all other boot processes can be linked wherever they wish
and will be mapped in at the right location by the kernel and VM
respectively.
Other changes:
. The multiboot code is less special: it does not use mb_print
for its diagnostics any more but uses printf() as normal, saving
the output into the diagnostics buffer, only printing to the
screen using the direct print functions if a panic() occurs.
. The multiboot code uses the flexible 'free memory map list'
style to receive the list of free memory if available.
. The kernel determines the memory layout of the processes to
a degree: it tells VM where the kernel starts and ends and
where the kernel wants the top of the process to be. VM then
uses this entire range, i.e. the stack is right at the top,
and mmap()ped bits of memory are placed below that downwards,
and the break grows upwards.
Other Consequences:
. Every process gets its own page table as address spaces
can't be separated any more by segments.
. As all segments are 0-based, there is no distinction between
virtual and linear addresses, nor between userspace and
kernel addresses.
. Less work is done when context switching, leading to a net
performance increase. (8% faster on my machine for 'make servers'.)
. The layout and configuration of the GDT makes sysenter and syscall
possible.
2012-05-07 16:03:35 +02:00
|
|
|
phys_bytes ph;
|
2012-10-31 19:24:14 +01:00
|
|
|
if((r=sys_umap(SELF, VM_D, (vir_bytes) v,
|
|
|
|
VM_PAGE_SIZE*SPAREPAGES, &ph)) != OK)
|
No more intel/minix segments.
This commit removes all traces of Minix segments (the text/data/stack
memory map abstraction in the kernel) and significance of Intel segments
(hardware segments like CS, DS that add offsets to all addressing before
page table translation). This ultimately simplifies the memory layout
and addressing and makes the same layout possible on non-Intel
architectures.
There are only two types of addresses in the world now: virtual
and physical; even the kernel and processes have the same virtual
address space. Kernel and user processes can be distinguished at a
glance as processes won't use 0xF0000000 and above.
No static pre-allocated memory sizes exist any more.
Changes to booting:
. The pre_init.c leaves the kernel and modules exactly as
they were left by the bootloader in physical memory
. The kernel starts running using physical addressing,
loaded at a fixed location given in its linker script by the
bootloader. All code and data in this phase are linked to
this fixed low location.
. It makes a bootstrap pagetable to map itself to a
fixed high location (also in linker script) and jumps to
the high address. All code and data then use this high addressing.
. All code/data symbols linked at the low addresses is prefixed by
an objcopy step with __k_unpaged_*, so that that code cannot
reference highly-linked symbols (which aren't valid yet) or vice
versa (symbols that aren't valid any more).
. The two addressing modes are separated in the linker script by
collecting the unpaged_*.o objects and linking them with low
addresses, and linking the rest high. Some objects are linked
twice, once low and once high.
. The bootstrap phase passes a lot of information (e.g. free memory
list, physical location of the modules, etc.) using the kinfo
struct.
. After this bootstrap the low-linked part is freed.
. The kernel maps in VM into the bootstrap page table so that VM can
begin executing. Its first job is to make page tables for all other
boot processes. So VM runs before RS, and RS gets a fully dynamic,
VM-managed address space. VM gets its privilege info from RS as usual
but that happens after RS starts running.
. Both the kernel loading VM and VM organizing boot processes happen
using the libexec logic. This removes the last reason for VM to
still know much about exec() and vm/exec.c is gone.
Further Implementation:
. All segments are based at 0 and have a 4 GB limit.
. The kernel is mapped in at the top of the virtual address
space so as not to constrain the user processes.
. Processes do not use segments from the LDT at all; there are
no segments in the LDT any more, so no LLDT is needed.
. The Minix segments T/D/S are gone and so none of the
user-space or in-kernel copy functions use them. The copy
functions use a process endpoint of NONE to realize it's
a physical address, virtual otherwise.
. The umap call only makes sense to translate a virtual address
to a physical address now.
. Segments-related calls like newmap and alloc_segments are gone.
. All segments-related translation in VM is gone (vir2map etc).
. Initialization in VM is simpler as no moving around is necessary.
. VM and all other boot processes can be linked wherever they wish
and will be mapped in at the right location by the kernel and VM
respectively.
Other changes:
. The multiboot code is less special: it does not use mb_print
for its diagnostics any more but uses printf() as normal, saving
the output into the diagnostics buffer, only printing to the
screen using the direct print functions if a panic() occurs.
. The multiboot code uses the flexible 'free memory map list'
style to receive the list of free memory if available.
. The kernel determines the memory layout of the processes to
a degree: it tells VM where the kernel starts and ends and
where the kernel wants the top of the process to be. VM then
uses this entire range, i.e. the stack is right at the top,
and mmap()ped bits of memory are placed below that downwards,
and the break grows upwards.
Other Consequences:
. Every process gets its own page table as address spaces
can't be separated any more by segments.
. As all segments are 0-based, there is no distinction between
virtual and linear addresses, nor between userspace and
kernel addresses.
. Less work is done when context switching, leading to a net
performance increase. (8% faster on my machine for 'make servers'.)
. The layout and configuration of the GDT makes sysenter and syscall
possible.
2012-05-07 16:03:35 +02:00
|
|
|
panic("pt_init: sys_umap failed: %d", r);
|
2012-12-29 01:16:33 +01:00
|
|
|
reservedqueue_add(spare_pagequeue, v, ph);
|
2009-05-11 21:11:37 +02:00
|
|
|
}
|
2009-05-12 13:38:29 +02:00
|
|
|
|
2012-10-31 19:24:14 +01:00
|
|
|
#if defined(__i386__)
|
2009-09-21 16:49:49 +02:00
|
|
|
/* global bit and 4MB pages available? */
|
|
|
|
global_bit_ok = _cpufeature(_CPUF_I386_PGE);
|
|
|
|
bigpage_ok = _cpufeature(_CPUF_I386_PSE);
|
|
|
|
|
|
|
|
/* Set bit for PTE's and PDE's if available. */
|
|
|
|
if(global_bit_ok)
|
|
|
|
global_bit = I386_VM_GLOBAL;
|
2012-10-31 19:24:14 +01:00
|
|
|
#endif
|
2012-11-09 19:00:46 +01:00
|
|
|
|
2009-11-11 18:02:45 +01:00
|
|
|
/* Now reserve another pde for kernel's own mappings. */
|
|
|
|
{
|
|
|
|
int kernmap_pde;
|
|
|
|
phys_bytes addr, len;
|
2013-08-20 14:02:33 +02:00
|
|
|
int flags, pindex = 0;
|
2009-11-11 18:02:45 +01:00
|
|
|
u32_t offset = 0;
|
|
|
|
|
No more intel/minix segments.
This commit removes all traces of Minix segments (the text/data/stack
memory map abstraction in the kernel) and significance of Intel segments
(hardware segments like CS, DS that add offsets to all addressing before
page table translation). This ultimately simplifies the memory layout
and addressing and makes the same layout possible on non-Intel
architectures.
There are only two types of addresses in the world now: virtual
and physical; even the kernel and processes have the same virtual
address space. Kernel and user processes can be distinguished at a
glance as processes won't use 0xF0000000 and above.
No static pre-allocated memory sizes exist any more.
Changes to booting:
. The pre_init.c leaves the kernel and modules exactly as
they were left by the bootloader in physical memory
. The kernel starts running using physical addressing,
loaded at a fixed location given in its linker script by the
bootloader. All code and data in this phase are linked to
this fixed low location.
. It makes a bootstrap pagetable to map itself to a
fixed high location (also in linker script) and jumps to
the high address. All code and data then use this high addressing.
. All code/data symbols linked at the low addresses is prefixed by
an objcopy step with __k_unpaged_*, so that that code cannot
reference highly-linked symbols (which aren't valid yet) or vice
versa (symbols that aren't valid any more).
. The two addressing modes are separated in the linker script by
collecting the unpaged_*.o objects and linking them with low
addresses, and linking the rest high. Some objects are linked
twice, once low and once high.
. The bootstrap phase passes a lot of information (e.g. free memory
list, physical location of the modules, etc.) using the kinfo
struct.
. After this bootstrap the low-linked part is freed.
. The kernel maps in VM into the bootstrap page table so that VM can
begin executing. Its first job is to make page tables for all other
boot processes. So VM runs before RS, and RS gets a fully dynamic,
VM-managed address space. VM gets its privilege info from RS as usual
but that happens after RS starts running.
. Both the kernel loading VM and VM organizing boot processes happen
using the libexec logic. This removes the last reason for VM to
still know much about exec() and vm/exec.c is gone.
Further Implementation:
. All segments are based at 0 and have a 4 GB limit.
. The kernel is mapped in at the top of the virtual address
space so as not to constrain the user processes.
. Processes do not use segments from the LDT at all; there are
no segments in the LDT any more, so no LLDT is needed.
. The Minix segments T/D/S are gone and so none of the
user-space or in-kernel copy functions use them. The copy
functions use a process endpoint of NONE to realize it's
a physical address, virtual otherwise.
. The umap call only makes sense to translate a virtual address
to a physical address now.
. Segments-related calls like newmap and alloc_segments are gone.
. All segments-related translation in VM is gone (vir2map etc).
. Initialization in VM is simpler as no moving around is necessary.
. VM and all other boot processes can be linked wherever they wish
and will be mapped in at the right location by the kernel and VM
respectively.
Other changes:
. The multiboot code is less special: it does not use mb_print
for its diagnostics any more but uses printf() as normal, saving
the output into the diagnostics buffer, only printing to the
screen using the direct print functions if a panic() occurs.
. The multiboot code uses the flexible 'free memory map list'
style to receive the list of free memory if available.
. The kernel determines the memory layout of the processes to
a degree: it tells VM where the kernel starts and ends and
where the kernel wants the top of the process to be. VM then
uses this entire range, i.e. the stack is right at the top,
and mmap()ped bits of memory are placed below that downwards,
and the break grows upwards.
Other Consequences:
. Every process gets its own page table as address spaces
can't be separated any more by segments.
. As all segments are 0-based, there is no distinction between
virtual and linear addresses, nor between userspace and
kernel addresses.
. Less work is done when context switching, leading to a net
performance increase. (8% faster on my machine for 'make servers'.)
. The layout and configuration of the GDT makes sysenter and syscall
possible.
2012-05-07 16:03:35 +02:00
|
|
|
kernmap_pde = freepde();
|
2012-10-31 19:24:14 +01:00
|
|
|
offset = kernmap_pde * ARCH_BIG_PAGE_SIZE;
|
2009-11-11 18:02:45 +01:00
|
|
|
|
2013-08-20 14:02:33 +02:00
|
|
|
while(sys_vmctl_get_mapping(pindex, &addr, &len,
|
2009-11-11 18:02:45 +01:00
|
|
|
&flags) == OK) {
|
2013-01-29 18:52:08 +01:00
|
|
|
int usedpde;
|
2009-11-11 18:02:45 +01:00
|
|
|
vir_bytes vir;
|
2013-08-20 14:02:33 +02:00
|
|
|
if(pindex >= MAX_KERNMAPPINGS)
|
|
|
|
panic("VM: too many kernel mappings: %d", pindex);
|
|
|
|
kern_mappings[pindex].phys_addr = addr;
|
|
|
|
kern_mappings[pindex].len = len;
|
|
|
|
kern_mappings[pindex].flags = flags;
|
|
|
|
kern_mappings[pindex].vir_addr = offset;
|
|
|
|
kern_mappings[pindex].flags =
|
2012-10-31 19:24:14 +01:00
|
|
|
ARCH_VM_PTE_PRESENT;
|
2009-11-11 18:02:45 +01:00
|
|
|
if(flags & VMMF_UNCACHED)
|
2012-10-31 19:24:14 +01:00
|
|
|
#if defined(__i386__)
|
2013-08-20 14:02:33 +02:00
|
|
|
kern_mappings[pindex].flags |= PTF_NOCACHE;
|
2012-10-31 19:24:14 +01:00
|
|
|
#elif defined(__arm__)
|
2013-08-20 14:02:33 +02:00
|
|
|
kern_mappings[pindex].flags |= ARM_VM_PTE_DEVICE;
|
2013-09-13 09:44:26 +02:00
|
|
|
else {
|
|
|
|
kern_mappings[pindex].flags |= ARM_VM_PTE_CACHED;
|
|
|
|
}
|
2012-10-31 19:24:14 +01:00
|
|
|
#endif
|
2012-07-18 18:53:20 +02:00
|
|
|
if(flags & VMMF_USER)
|
2013-08-20 14:02:33 +02:00
|
|
|
kern_mappings[pindex].flags |= ARCH_VM_PTE_USER;
|
2012-10-31 19:24:14 +01:00
|
|
|
#if defined(__arm__)
|
|
|
|
else
|
2013-08-20 14:02:33 +02:00
|
|
|
kern_mappings[pindex].flags |= ARM_VM_PTE_SUPER;
|
2012-10-31 19:24:14 +01:00
|
|
|
#endif
|
2012-07-18 18:53:20 +02:00
|
|
|
if(flags & VMMF_WRITE)
|
2013-08-20 14:02:33 +02:00
|
|
|
kern_mappings[pindex].flags |= ARCH_VM_PTE_RW;
|
2013-09-13 09:48:45 +02:00
|
|
|
#if defined(__arm__)
|
|
|
|
else
|
|
|
|
kern_mappings[pindex].flags |= ARCH_VM_PTE_RO;
|
|
|
|
#endif
|
|
|
|
|
2012-10-31 19:24:14 +01:00
|
|
|
#if defined(__i386__)
|
2012-07-18 18:53:20 +02:00
|
|
|
if(flags & VMMF_GLO)
|
2013-08-20 14:02:33 +02:00
|
|
|
kern_mappings[pindex].flags |= I386_VM_GLOBAL;
|
2012-10-31 19:24:14 +01:00
|
|
|
#endif
|
2013-09-13 09:48:45 +02:00
|
|
|
|
2012-10-31 19:24:14 +01:00
|
|
|
if(addr % VM_PAGE_SIZE)
|
2013-02-02 01:55:35 +01:00
|
|
|
panic("VM: addr unaligned: %lu", addr);
|
2012-10-31 19:24:14 +01:00
|
|
|
if(len % VM_PAGE_SIZE)
|
2013-02-02 01:55:35 +01:00
|
|
|
panic("VM: len unaligned: %lu", len);
|
No more intel/minix segments.
This commit removes all traces of Minix segments (the text/data/stack
memory map abstraction in the kernel) and significance of Intel segments
(hardware segments like CS, DS that add offsets to all addressing before
page table translation). This ultimately simplifies the memory layout
and addressing and makes the same layout possible on non-Intel
architectures.
There are only two types of addresses in the world now: virtual
and physical; even the kernel and processes have the same virtual
address space. Kernel and user processes can be distinguished at a
glance as processes won't use 0xF0000000 and above.
No static pre-allocated memory sizes exist any more.
Changes to booting:
. The pre_init.c leaves the kernel and modules exactly as
they were left by the bootloader in physical memory
. The kernel starts running using physical addressing,
loaded at a fixed location given in its linker script by the
bootloader. All code and data in this phase are linked to
this fixed low location.
. It makes a bootstrap pagetable to map itself to a
fixed high location (also in linker script) and jumps to
the high address. All code and data then use this high addressing.
. All code/data symbols linked at the low addresses is prefixed by
an objcopy step with __k_unpaged_*, so that that code cannot
reference highly-linked symbols (which aren't valid yet) or vice
versa (symbols that aren't valid any more).
. The two addressing modes are separated in the linker script by
collecting the unpaged_*.o objects and linking them with low
addresses, and linking the rest high. Some objects are linked
twice, once low and once high.
. The bootstrap phase passes a lot of information (e.g. free memory
list, physical location of the modules, etc.) using the kinfo
struct.
. After this bootstrap the low-linked part is freed.
. The kernel maps in VM into the bootstrap page table so that VM can
begin executing. Its first job is to make page tables for all other
boot processes. So VM runs before RS, and RS gets a fully dynamic,
VM-managed address space. VM gets its privilege info from RS as usual
but that happens after RS starts running.
. Both the kernel loading VM and VM organizing boot processes happen
using the libexec logic. This removes the last reason for VM to
still know much about exec() and vm/exec.c is gone.
Further Implementation:
. All segments are based at 0 and have a 4 GB limit.
. The kernel is mapped in at the top of the virtual address
space so as not to constrain the user processes.
. Processes do not use segments from the LDT at all; there are
no segments in the LDT any more, so no LLDT is needed.
. The Minix segments T/D/S are gone and so none of the
user-space or in-kernel copy functions use them. The copy
functions use a process endpoint of NONE to realize it's
a physical address, virtual otherwise.
. The umap call only makes sense to translate a virtual address
to a physical address now.
. Segments-related calls like newmap and alloc_segments are gone.
. All segments-related translation in VM is gone (vir2map etc).
. Initialization in VM is simpler as no moving around is necessary.
. VM and all other boot processes can be linked wherever they wish
and will be mapped in at the right location by the kernel and VM
respectively.
Other changes:
. The multiboot code is less special: it does not use mb_print
for its diagnostics any more but uses printf() as normal, saving
the output into the diagnostics buffer, only printing to the
screen using the direct print functions if a panic() occurs.
. The multiboot code uses the flexible 'free memory map list'
style to receive the list of free memory if available.
. The kernel determines the memory layout of the processes to
a degree: it tells VM where the kernel starts and ends and
where the kernel wants the top of the process to be. VM then
uses this entire range, i.e. the stack is right at the top,
and mmap()ped bits of memory are placed below that downwards,
and the break grows upwards.
Other Consequences:
. Every process gets its own page table as address spaces
can't be separated any more by segments.
. As all segments are 0-based, there is no distinction between
virtual and linear addresses, nor between userspace and
kernel addresses.
. Less work is done when context switching, leading to a net
performance increase. (8% faster on my machine for 'make servers'.)
. The layout and configuration of the GDT makes sysenter and syscall
possible.
2012-05-07 16:03:35 +02:00
|
|
|
vir = offset;
|
2013-08-20 14:02:33 +02:00
|
|
|
if(sys_vmctl_reply_mapping(pindex, vir) != OK)
|
2010-03-05 16:05:11 +01:00
|
|
|
panic("VM: reply failed");
|
2009-11-11 18:02:45 +01:00
|
|
|
offset += len;
|
2013-08-20 14:02:33 +02:00
|
|
|
pindex++;
|
2009-11-11 18:02:45 +01:00
|
|
|
kernmappings++;
|
2013-01-29 18:52:08 +01:00
|
|
|
|
2013-03-06 13:17:07 +01:00
|
|
|
usedpde = ARCH_VM_PDE(offset);
|
2013-01-29 18:52:08 +01:00
|
|
|
while(usedpde > kernmap_pde) {
|
|
|
|
int newpde = freepde();
|
|
|
|
assert(newpde == kernmap_pde+1);
|
|
|
|
kernmap_pde = newpde;
|
|
|
|
}
|
2009-11-11 18:02:45 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-02-10 19:37:12 +01:00
|
|
|
/* Reserve PDEs available for mapping in the page directories. */
|
|
|
|
{
|
|
|
|
int pd;
|
|
|
|
for(pd = 0; pd < MAX_PAGEDIR_PDES; pd++) {
|
|
|
|
struct pdm *pdm = &pagedir_mappings[pd];
|
|
|
|
pdm->pdeno = freepde();
|
|
|
|
phys_bytes ph;
|
|
|
|
|
|
|
|
/* Allocate us a page table in which to
|
|
|
|
* remember page directory pointers.
|
|
|
|
*/
|
|
|
|
if(!(pdm->page_directories =
|
|
|
|
vm_allocpage(&ph, VMP_PAGETABLE))) {
|
|
|
|
panic("no virt addr for vm mappings");
|
|
|
|
}
|
|
|
|
memset(pdm->page_directories, 0, VM_PAGE_SIZE);
|
|
|
|
pdm->phys = ph;
|
|
|
|
|
2012-10-31 19:24:14 +01:00
|
|
|
#if defined(__i386__)
|
2013-02-10 19:37:12 +01:00
|
|
|
pdm->val = (ph & ARCH_VM_ADDR_MASK) |
|
|
|
|
ARCH_VM_PDE_PRESENT | ARCH_VM_PTE_RW;
|
2012-10-31 19:24:14 +01:00
|
|
|
#elif defined(__arm__)
|
2013-02-10 20:20:14 +01:00
|
|
|
pdm->val = (ph & ARCH_VM_PDE_MASK)
|
|
|
|
| ARCH_VM_PDE_PRESENT
|
2013-09-13 09:40:20 +02:00
|
|
|
| ARM_VM_PTE_CACHED
|
2013-02-10 20:20:14 +01:00
|
|
|
| ARM_VM_PDE_DOMAIN; //LSC FIXME
|
2012-10-31 19:24:14 +01:00
|
|
|
#endif
|
2013-02-10 19:37:12 +01:00
|
|
|
}
|
|
|
|
}
|
No more intel/minix segments.
This commit removes all traces of Minix segments (the text/data/stack
memory map abstraction in the kernel) and significance of Intel segments
(hardware segments like CS, DS that add offsets to all addressing before
page table translation). This ultimately simplifies the memory layout
and addressing and makes the same layout possible on non-Intel
architectures.
There are only two types of addresses in the world now: virtual
and physical; even the kernel and processes have the same virtual
address space. Kernel and user processes can be distinguished at a
glance as processes won't use 0xF0000000 and above.
No static pre-allocated memory sizes exist any more.
Changes to booting:
. The pre_init.c leaves the kernel and modules exactly as
they were left by the bootloader in physical memory
. The kernel starts running using physical addressing,
loaded at a fixed location given in its linker script by the
bootloader. All code and data in this phase are linked to
this fixed low location.
. It makes a bootstrap pagetable to map itself to a
fixed high location (also in linker script) and jumps to
the high address. All code and data then use this high addressing.
. All code/data symbols linked at the low addresses is prefixed by
an objcopy step with __k_unpaged_*, so that that code cannot
reference highly-linked symbols (which aren't valid yet) or vice
versa (symbols that aren't valid any more).
. The two addressing modes are separated in the linker script by
collecting the unpaged_*.o objects and linking them with low
addresses, and linking the rest high. Some objects are linked
twice, once low and once high.
. The bootstrap phase passes a lot of information (e.g. free memory
list, physical location of the modules, etc.) using the kinfo
struct.
. After this bootstrap the low-linked part is freed.
. The kernel maps in VM into the bootstrap page table so that VM can
begin executing. Its first job is to make page tables for all other
boot processes. So VM runs before RS, and RS gets a fully dynamic,
VM-managed address space. VM gets its privilege info from RS as usual
but that happens after RS starts running.
. Both the kernel loading VM and VM organizing boot processes happen
using the libexec logic. This removes the last reason for VM to
still know much about exec() and vm/exec.c is gone.
Further Implementation:
. All segments are based at 0 and have a 4 GB limit.
. The kernel is mapped in at the top of the virtual address
space so as not to constrain the user processes.
. Processes do not use segments from the LDT at all; there are
no segments in the LDT any more, so no LLDT is needed.
. The Minix segments T/D/S are gone and so none of the
user-space or in-kernel copy functions use them. The copy
functions use a process endpoint of NONE to realize it's
a physical address, virtual otherwise.
. The umap call only makes sense to translate a virtual address
to a physical address now.
. Segments-related calls like newmap and alloc_segments are gone.
. All segments-related translation in VM is gone (vir2map etc).
. Initialization in VM is simpler as no moving around is necessary.
. VM and all other boot processes can be linked wherever they wish
and will be mapped in at the right location by the kernel and VM
respectively.
Other changes:
. The multiboot code is less special: it does not use mb_print
for its diagnostics any more but uses printf() as normal, saving
the output into the diagnostics buffer, only printing to the
screen using the direct print functions if a panic() occurs.
. The multiboot code uses the flexible 'free memory map list'
style to receive the list of free memory if available.
. The kernel determines the memory layout of the processes to
a degree: it tells VM where the kernel starts and ends and
where the kernel wants the top of the process to be. VM then
uses this entire range, i.e. the stack is right at the top,
and mmap()ped bits of memory are placed below that downwards,
and the break grows upwards.
Other Consequences:
. Every process gets its own page table as address spaces
can't be separated any more by segments.
. As all segments are 0-based, there is no distinction between
virtual and linear addresses, nor between userspace and
kernel addresses.
. Less work is done when context switching, leading to a net
performance increase. (8% faster on my machine for 'make servers'.)
. The layout and configuration of the GDT makes sysenter and syscall
possible.
2012-05-07 16:03:35 +02:00
|
|
|
|
|
|
|
/* Allright. Now. We have to make our own page directory and page tables,
|
|
|
|
* that the kernel has already set up, accessible to us. It's easier to
|
|
|
|
* understand if we just copy all the required pages (i.e. page directory
|
|
|
|
* and page tables), and set up the pointers as if VM had done it itself.
|
|
|
|
*
|
|
|
|
* This allocation will happen without using any page table, and just
|
|
|
|
* uses spare pages.
|
|
|
|
*/
|
|
|
|
newpt = &vmprocess->vm_pt;
|
|
|
|
if(pt_new(newpt) != OK)
|
|
|
|
panic("vm pt_new failed");
|
|
|
|
|
|
|
|
/* Get our current pagedir so we can see it. */
|
2012-10-31 19:24:14 +01:00
|
|
|
#if defined(__i386__)
|
2012-08-11 17:45:35 +02:00
|
|
|
if(sys_vmctl_get_pdbr(SELF, &mypdbr) != OK)
|
2012-10-31 19:24:14 +01:00
|
|
|
#elif defined(__arm__)
|
|
|
|
if(sys_vmctl_get_pdbr(SELF, &myttbr) != OK)
|
|
|
|
#endif
|
2013-09-13 09:40:20 +02:00
|
|
|
|
2012-08-11 17:45:35 +02:00
|
|
|
panic("VM: sys_vmctl_get_pdbr failed");
|
2012-10-31 19:24:14 +01:00
|
|
|
#if defined(__i386__)
|
2012-08-11 17:45:35 +02:00
|
|
|
if(sys_vircopy(NONE, mypdbr, SELF,
|
make vfs & filesystems use failable copying
Change the kernel to add features to vircopy and safecopies so that
transparent copy fixing won't happen to avoid deadlocks, and such copies
fail with EFAULT.
Transparently making copying work from filesystems (as normally done by
the kernel & VM when copying fails because of missing/readonly memory)
is problematic as it can happen that, for file-mapped ranges, that that
same filesystem that is blocked on the copy request is needed to satisfy
the memory range, leading to deadlock. Dito for VFS itself, if done with
a blocking call.
This change makes the copying done from a filesystem fail in such cases
with EFAULT by VFS adding the CPF_TRY flag to the grants. If a FS call
fails with EFAULT, VFS will then request the range to be made available
to VM after the FS is unblocked, allowing it to be used to satisfy the
range if need be in another VFS thread.
Similarly, for datacopies that VFS itself does, it uses the failable
vircopy variant and callers use a wrapper that talk to VM if necessary
to get the copy to work.
. kernel: add CPF_TRY flag to safecopies
. kernel: only request writable ranges to VM for the
target buffer when copying fails
. do copying in VFS TRY-first
. some fixes in VM to build SANITYCHECK mode
. add regression test for the cases where
- a FS system call needs memory mapped in a process that the
FS itself must map.
- such a range covers more than one file-mapped region.
. add 'try' mode to vircopy, physcopy
. add flags field to copy kernel call messages
. if CP_FLAG_TRY is set, do not transparently try
to fix memory ranges
. for use by VFS when accessing user buffers to avoid
deadlock
. remove some obsolete backwards compatability assignments
. VFS: let thread scheduling work for VM requests too
Allows VFS to make calls to VM while suspending and resuming
the currently running thread. Does currently not work for the
main thread.
. VM: add fix memory range call for use by VFS
Change-Id: I295794269cea51a3163519a9cfe5901301d90b32
2014-01-16 14:22:13 +01:00
|
|
|
(vir_bytes) currentpagedir, VM_PAGE_SIZE, 0) != OK)
|
2012-10-31 19:24:14 +01:00
|
|
|
#elif defined(__arm__)
|
|
|
|
if(sys_vircopy(NONE, myttbr, SELF,
|
make vfs & filesystems use failable copying
Change the kernel to add features to vircopy and safecopies so that
transparent copy fixing won't happen to avoid deadlocks, and such copies
fail with EFAULT.
Transparently making copying work from filesystems (as normally done by
the kernel & VM when copying fails because of missing/readonly memory)
is problematic as it can happen that, for file-mapped ranges, that that
same filesystem that is blocked on the copy request is needed to satisfy
the memory range, leading to deadlock. Dito for VFS itself, if done with
a blocking call.
This change makes the copying done from a filesystem fail in such cases
with EFAULT by VFS adding the CPF_TRY flag to the grants. If a FS call
fails with EFAULT, VFS will then request the range to be made available
to VM after the FS is unblocked, allowing it to be used to satisfy the
range if need be in another VFS thread.
Similarly, for datacopies that VFS itself does, it uses the failable
vircopy variant and callers use a wrapper that talk to VM if necessary
to get the copy to work.
. kernel: add CPF_TRY flag to safecopies
. kernel: only request writable ranges to VM for the
target buffer when copying fails
. do copying in VFS TRY-first
. some fixes in VM to build SANITYCHECK mode
. add regression test for the cases where
- a FS system call needs memory mapped in a process that the
FS itself must map.
- such a range covers more than one file-mapped region.
. add 'try' mode to vircopy, physcopy
. add flags field to copy kernel call messages
. if CP_FLAG_TRY is set, do not transparently try
to fix memory ranges
. for use by VFS when accessing user buffers to avoid
deadlock
. remove some obsolete backwards compatability assignments
. VFS: let thread scheduling work for VM requests too
Allows VFS to make calls to VM while suspending and resuming
the currently running thread. Does currently not work for the
main thread.
. VM: add fix memory range call for use by VFS
Change-Id: I295794269cea51a3163519a9cfe5901301d90b32
2014-01-16 14:22:13 +01:00
|
|
|
(vir_bytes) currentpagedir, ARCH_PAGEDIR_SIZE, 0) != OK)
|
2012-10-31 19:24:14 +01:00
|
|
|
#endif
|
No more intel/minix segments.
This commit removes all traces of Minix segments (the text/data/stack
memory map abstraction in the kernel) and significance of Intel segments
(hardware segments like CS, DS that add offsets to all addressing before
page table translation). This ultimately simplifies the memory layout
and addressing and makes the same layout possible on non-Intel
architectures.
There are only two types of addresses in the world now: virtual
and physical; even the kernel and processes have the same virtual
address space. Kernel and user processes can be distinguished at a
glance as processes won't use 0xF0000000 and above.
No static pre-allocated memory sizes exist any more.
Changes to booting:
. The pre_init.c leaves the kernel and modules exactly as
they were left by the bootloader in physical memory
. The kernel starts running using physical addressing,
loaded at a fixed location given in its linker script by the
bootloader. All code and data in this phase are linked to
this fixed low location.
. It makes a bootstrap pagetable to map itself to a
fixed high location (also in linker script) and jumps to
the high address. All code and data then use this high addressing.
. All code/data symbols linked at the low addresses is prefixed by
an objcopy step with __k_unpaged_*, so that that code cannot
reference highly-linked symbols (which aren't valid yet) or vice
versa (symbols that aren't valid any more).
. The two addressing modes are separated in the linker script by
collecting the unpaged_*.o objects and linking them with low
addresses, and linking the rest high. Some objects are linked
twice, once low and once high.
. The bootstrap phase passes a lot of information (e.g. free memory
list, physical location of the modules, etc.) using the kinfo
struct.
. After this bootstrap the low-linked part is freed.
. The kernel maps in VM into the bootstrap page table so that VM can
begin executing. Its first job is to make page tables for all other
boot processes. So VM runs before RS, and RS gets a fully dynamic,
VM-managed address space. VM gets its privilege info from RS as usual
but that happens after RS starts running.
. Both the kernel loading VM and VM organizing boot processes happen
using the libexec logic. This removes the last reason for VM to
still know much about exec() and vm/exec.c is gone.
Further Implementation:
. All segments are based at 0 and have a 4 GB limit.
. The kernel is mapped in at the top of the virtual address
space so as not to constrain the user processes.
. Processes do not use segments from the LDT at all; there are
no segments in the LDT any more, so no LLDT is needed.
. The Minix segments T/D/S are gone and so none of the
user-space or in-kernel copy functions use them. The copy
functions use a process endpoint of NONE to realize it's
a physical address, virtual otherwise.
. The umap call only makes sense to translate a virtual address
to a physical address now.
. Segments-related calls like newmap and alloc_segments are gone.
. All segments-related translation in VM is gone (vir2map etc).
. Initialization in VM is simpler as no moving around is necessary.
. VM and all other boot processes can be linked wherever they wish
and will be mapped in at the right location by the kernel and VM
respectively.
Other changes:
. The multiboot code is less special: it does not use mb_print
for its diagnostics any more but uses printf() as normal, saving
the output into the diagnostics buffer, only printing to the
screen using the direct print functions if a panic() occurs.
. The multiboot code uses the flexible 'free memory map list'
style to receive the list of free memory if available.
. The kernel determines the memory layout of the processes to
a degree: it tells VM where the kernel starts and ends and
where the kernel wants the top of the process to be. VM then
uses this entire range, i.e. the stack is right at the top,
and mmap()ped bits of memory are placed below that downwards,
and the break grows upwards.
Other Consequences:
. Every process gets its own page table as address spaces
can't be separated any more by segments.
. As all segments are 0-based, there is no distinction between
virtual and linear addresses, nor between userspace and
kernel addresses.
. Less work is done when context switching, leading to a net
performance increase. (8% faster on my machine for 'make servers'.)
. The layout and configuration of the GDT makes sysenter and syscall
possible.
2012-05-07 16:03:35 +02:00
|
|
|
panic("VM: sys_vircopy failed");
|
|
|
|
|
|
|
|
/* We have mapped in kernel ourselves; now copy mappings for VM
|
|
|
|
* that kernel made, including allocations for BSS. Skip identity
|
|
|
|
* mapping bits; just map in VM.
|
|
|
|
*/
|
2012-10-31 19:24:14 +01:00
|
|
|
for(p = 0; p < ARCH_VM_DIR_ENTRIES; p++) {
|
No more intel/minix segments.
This commit removes all traces of Minix segments (the text/data/stack
memory map abstraction in the kernel) and significance of Intel segments
(hardware segments like CS, DS that add offsets to all addressing before
page table translation). This ultimately simplifies the memory layout
and addressing and makes the same layout possible on non-Intel
architectures.
There are only two types of addresses in the world now: virtual
and physical; even the kernel and processes have the same virtual
address space. Kernel and user processes can be distinguished at a
glance as processes won't use 0xF0000000 and above.
No static pre-allocated memory sizes exist any more.
Changes to booting:
. The pre_init.c leaves the kernel and modules exactly as
they were left by the bootloader in physical memory
. The kernel starts running using physical addressing,
loaded at a fixed location given in its linker script by the
bootloader. All code and data in this phase are linked to
this fixed low location.
. It makes a bootstrap pagetable to map itself to a
fixed high location (also in linker script) and jumps to
the high address. All code and data then use this high addressing.
. All code/data symbols linked at the low addresses is prefixed by
an objcopy step with __k_unpaged_*, so that that code cannot
reference highly-linked symbols (which aren't valid yet) or vice
versa (symbols that aren't valid any more).
. The two addressing modes are separated in the linker script by
collecting the unpaged_*.o objects and linking them with low
addresses, and linking the rest high. Some objects are linked
twice, once low and once high.
. The bootstrap phase passes a lot of information (e.g. free memory
list, physical location of the modules, etc.) using the kinfo
struct.
. After this bootstrap the low-linked part is freed.
. The kernel maps in VM into the bootstrap page table so that VM can
begin executing. Its first job is to make page tables for all other
boot processes. So VM runs before RS, and RS gets a fully dynamic,
VM-managed address space. VM gets its privilege info from RS as usual
but that happens after RS starts running.
. Both the kernel loading VM and VM organizing boot processes happen
using the libexec logic. This removes the last reason for VM to
still know much about exec() and vm/exec.c is gone.
Further Implementation:
. All segments are based at 0 and have a 4 GB limit.
. The kernel is mapped in at the top of the virtual address
space so as not to constrain the user processes.
. Processes do not use segments from the LDT at all; there are
no segments in the LDT any more, so no LLDT is needed.
. The Minix segments T/D/S are gone and so none of the
user-space or in-kernel copy functions use them. The copy
functions use a process endpoint of NONE to realize it's
a physical address, virtual otherwise.
. The umap call only makes sense to translate a virtual address
to a physical address now.
. Segments-related calls like newmap and alloc_segments are gone.
. All segments-related translation in VM is gone (vir2map etc).
. Initialization in VM is simpler as no moving around is necessary.
. VM and all other boot processes can be linked wherever they wish
and will be mapped in at the right location by the kernel and VM
respectively.
Other changes:
. The multiboot code is less special: it does not use mb_print
for its diagnostics any more but uses printf() as normal, saving
the output into the diagnostics buffer, only printing to the
screen using the direct print functions if a panic() occurs.
. The multiboot code uses the flexible 'free memory map list'
style to receive the list of free memory if available.
. The kernel determines the memory layout of the processes to
a degree: it tells VM where the kernel starts and ends and
where the kernel wants the top of the process to be. VM then
uses this entire range, i.e. the stack is right at the top,
and mmap()ped bits of memory are placed below that downwards,
and the break grows upwards.
Other Consequences:
. Every process gets its own page table as address spaces
can't be separated any more by segments.
. As all segments are 0-based, there is no distinction between
virtual and linear addresses, nor between userspace and
kernel addresses.
. Less work is done when context switching, leading to a net
performance increase. (8% faster on my machine for 'make servers'.)
. The layout and configuration of the GDT makes sysenter and syscall
possible.
2012-05-07 16:03:35 +02:00
|
|
|
u32_t entry = currentpagedir[p];
|
|
|
|
phys_bytes ptaddr_kern, ptaddr_us;
|
2009-09-21 16:49:49 +02:00
|
|
|
|
No more intel/minix segments.
This commit removes all traces of Minix segments (the text/data/stack
memory map abstraction in the kernel) and significance of Intel segments
(hardware segments like CS, DS that add offsets to all addressing before
page table translation). This ultimately simplifies the memory layout
and addressing and makes the same layout possible on non-Intel
architectures.
There are only two types of addresses in the world now: virtual
and physical; even the kernel and processes have the same virtual
address space. Kernel and user processes can be distinguished at a
glance as processes won't use 0xF0000000 and above.
No static pre-allocated memory sizes exist any more.
Changes to booting:
. The pre_init.c leaves the kernel and modules exactly as
they were left by the bootloader in physical memory
. The kernel starts running using physical addressing,
loaded at a fixed location given in its linker script by the
bootloader. All code and data in this phase are linked to
this fixed low location.
. It makes a bootstrap pagetable to map itself to a
fixed high location (also in linker script) and jumps to
the high address. All code and data then use this high addressing.
. All code/data symbols linked at the low addresses is prefixed by
an objcopy step with __k_unpaged_*, so that that code cannot
reference highly-linked symbols (which aren't valid yet) or vice
versa (symbols that aren't valid any more).
. The two addressing modes are separated in the linker script by
collecting the unpaged_*.o objects and linking them with low
addresses, and linking the rest high. Some objects are linked
twice, once low and once high.
. The bootstrap phase passes a lot of information (e.g. free memory
list, physical location of the modules, etc.) using the kinfo
struct.
. After this bootstrap the low-linked part is freed.
. The kernel maps in VM into the bootstrap page table so that VM can
begin executing. Its first job is to make page tables for all other
boot processes. So VM runs before RS, and RS gets a fully dynamic,
VM-managed address space. VM gets its privilege info from RS as usual
but that happens after RS starts running.
. Both the kernel loading VM and VM organizing boot processes happen
using the libexec logic. This removes the last reason for VM to
still know much about exec() and vm/exec.c is gone.
Further Implementation:
. All segments are based at 0 and have a 4 GB limit.
. The kernel is mapped in at the top of the virtual address
space so as not to constrain the user processes.
. Processes do not use segments from the LDT at all; there are
no segments in the LDT any more, so no LLDT is needed.
. The Minix segments T/D/S are gone and so none of the
user-space or in-kernel copy functions use them. The copy
functions use a process endpoint of NONE to realize it's
a physical address, virtual otherwise.
. The umap call only makes sense to translate a virtual address
to a physical address now.
. Segments-related calls like newmap and alloc_segments are gone.
. All segments-related translation in VM is gone (vir2map etc).
. Initialization in VM is simpler as no moving around is necessary.
. VM and all other boot processes can be linked wherever they wish
and will be mapped in at the right location by the kernel and VM
respectively.
Other changes:
. The multiboot code is less special: it does not use mb_print
for its diagnostics any more but uses printf() as normal, saving
the output into the diagnostics buffer, only printing to the
screen using the direct print functions if a panic() occurs.
. The multiboot code uses the flexible 'free memory map list'
style to receive the list of free memory if available.
. The kernel determines the memory layout of the processes to
a degree: it tells VM where the kernel starts and ends and
where the kernel wants the top of the process to be. VM then
uses this entire range, i.e. the stack is right at the top,
and mmap()ped bits of memory are placed below that downwards,
and the break grows upwards.
Other Consequences:
. Every process gets its own page table as address spaces
can't be separated any more by segments.
. As all segments are 0-based, there is no distinction between
virtual and linear addresses, nor between userspace and
kernel addresses.
. Less work is done when context switching, leading to a net
performance increase. (8% faster on my machine for 'make servers'.)
. The layout and configuration of the GDT makes sysenter and syscall
possible.
2012-05-07 16:03:35 +02:00
|
|
|
/* BIGPAGEs are kernel mapping (do ourselves) or boot
|
|
|
|
* identity mapping (don't want).
|
|
|
|
*/
|
2012-10-31 19:24:14 +01:00
|
|
|
if(!(entry & ARCH_VM_PDE_PRESENT)) continue;
|
|
|
|
if((entry & ARCH_VM_BIGPAGE)) continue;
|
2009-09-21 16:49:49 +02:00
|
|
|
|
No more intel/minix segments.
This commit removes all traces of Minix segments (the text/data/stack
memory map abstraction in the kernel) and significance of Intel segments
(hardware segments like CS, DS that add offsets to all addressing before
page table translation). This ultimately simplifies the memory layout
and addressing and makes the same layout possible on non-Intel
architectures.
There are only two types of addresses in the world now: virtual
and physical; even the kernel and processes have the same virtual
address space. Kernel and user processes can be distinguished at a
glance as processes won't use 0xF0000000 and above.
No static pre-allocated memory sizes exist any more.
Changes to booting:
. The pre_init.c leaves the kernel and modules exactly as
they were left by the bootloader in physical memory
. The kernel starts running using physical addressing,
loaded at a fixed location given in its linker script by the
bootloader. All code and data in this phase are linked to
this fixed low location.
. It makes a bootstrap pagetable to map itself to a
fixed high location (also in linker script) and jumps to
the high address. All code and data then use this high addressing.
. All code/data symbols linked at the low addresses is prefixed by
an objcopy step with __k_unpaged_*, so that that code cannot
reference highly-linked symbols (which aren't valid yet) or vice
versa (symbols that aren't valid any more).
. The two addressing modes are separated in the linker script by
collecting the unpaged_*.o objects and linking them with low
addresses, and linking the rest high. Some objects are linked
twice, once low and once high.
. The bootstrap phase passes a lot of information (e.g. free memory
list, physical location of the modules, etc.) using the kinfo
struct.
. After this bootstrap the low-linked part is freed.
. The kernel maps in VM into the bootstrap page table so that VM can
begin executing. Its first job is to make page tables for all other
boot processes. So VM runs before RS, and RS gets a fully dynamic,
VM-managed address space. VM gets its privilege info from RS as usual
but that happens after RS starts running.
. Both the kernel loading VM and VM organizing boot processes happen
using the libexec logic. This removes the last reason for VM to
still know much about exec() and vm/exec.c is gone.
Further Implementation:
. All segments are based at 0 and have a 4 GB limit.
. The kernel is mapped in at the top of the virtual address
space so as not to constrain the user processes.
. Processes do not use segments from the LDT at all; there are
no segments in the LDT any more, so no LLDT is needed.
. The Minix segments T/D/S are gone and so none of the
user-space or in-kernel copy functions use them. The copy
functions use a process endpoint of NONE to realize it's
a physical address, virtual otherwise.
. The umap call only makes sense to translate a virtual address
to a physical address now.
. Segments-related calls like newmap and alloc_segments are gone.
. All segments-related translation in VM is gone (vir2map etc).
. Initialization in VM is simpler as no moving around is necessary.
. VM and all other boot processes can be linked wherever they wish
and will be mapped in at the right location by the kernel and VM
respectively.
Other changes:
. The multiboot code is less special: it does not use mb_print
for its diagnostics any more but uses printf() as normal, saving
the output into the diagnostics buffer, only printing to the
screen using the direct print functions if a panic() occurs.
. The multiboot code uses the flexible 'free memory map list'
style to receive the list of free memory if available.
. The kernel determines the memory layout of the processes to
a degree: it tells VM where the kernel starts and ends and
where the kernel wants the top of the process to be. VM then
uses this entire range, i.e. the stack is right at the top,
and mmap()ped bits of memory are placed below that downwards,
and the break grows upwards.
Other Consequences:
. Every process gets its own page table as address spaces
can't be separated any more by segments.
. As all segments are 0-based, there is no distinction between
virtual and linear addresses, nor between userspace and
kernel addresses.
. Less work is done when context switching, leading to a net
performance increase. (8% faster on my machine for 'make servers'.)
. The layout and configuration of the GDT makes sysenter and syscall
possible.
2012-05-07 16:03:35 +02:00
|
|
|
if(pt_ptalloc(newpt, p, 0) != OK)
|
|
|
|
panic("pt_ptalloc failed");
|
2012-10-31 19:24:14 +01:00
|
|
|
assert(newpt->pt_dir[p] & ARCH_VM_PDE_PRESENT);
|
|
|
|
|
|
|
|
#if defined(__i386__)
|
|
|
|
ptaddr_kern = entry & ARCH_VM_ADDR_MASK;
|
|
|
|
ptaddr_us = newpt->pt_dir[p] & ARCH_VM_ADDR_MASK;
|
|
|
|
#elif defined(__arm__)
|
|
|
|
ptaddr_kern = entry & ARCH_VM_PDE_MASK;
|
|
|
|
ptaddr_us = newpt->pt_dir[p] & ARCH_VM_PDE_MASK;
|
|
|
|
#endif
|
2009-09-21 16:49:49 +02:00
|
|
|
|
No more intel/minix segments.
This commit removes all traces of Minix segments (the text/data/stack
memory map abstraction in the kernel) and significance of Intel segments
(hardware segments like CS, DS that add offsets to all addressing before
page table translation). This ultimately simplifies the memory layout
and addressing and makes the same layout possible on non-Intel
architectures.
There are only two types of addresses in the world now: virtual
and physical; even the kernel and processes have the same virtual
address space. Kernel and user processes can be distinguished at a
glance as processes won't use 0xF0000000 and above.
No static pre-allocated memory sizes exist any more.
Changes to booting:
. The pre_init.c leaves the kernel and modules exactly as
they were left by the bootloader in physical memory
. The kernel starts running using physical addressing,
loaded at a fixed location given in its linker script by the
bootloader. All code and data in this phase are linked to
this fixed low location.
. It makes a bootstrap pagetable to map itself to a
fixed high location (also in linker script) and jumps to
the high address. All code and data then use this high addressing.
. All code/data symbols linked at the low addresses is prefixed by
an objcopy step with __k_unpaged_*, so that that code cannot
reference highly-linked symbols (which aren't valid yet) or vice
versa (symbols that aren't valid any more).
. The two addressing modes are separated in the linker script by
collecting the unpaged_*.o objects and linking them with low
addresses, and linking the rest high. Some objects are linked
twice, once low and once high.
. The bootstrap phase passes a lot of information (e.g. free memory
list, physical location of the modules, etc.) using the kinfo
struct.
. After this bootstrap the low-linked part is freed.
. The kernel maps in VM into the bootstrap page table so that VM can
begin executing. Its first job is to make page tables for all other
boot processes. So VM runs before RS, and RS gets a fully dynamic,
VM-managed address space. VM gets its privilege info from RS as usual
but that happens after RS starts running.
. Both the kernel loading VM and VM organizing boot processes happen
using the libexec logic. This removes the last reason for VM to
still know much about exec() and vm/exec.c is gone.
Further Implementation:
. All segments are based at 0 and have a 4 GB limit.
. The kernel is mapped in at the top of the virtual address
space so as not to constrain the user processes.
. Processes do not use segments from the LDT at all; there are
no segments in the LDT any more, so no LLDT is needed.
. The Minix segments T/D/S are gone and so none of the
user-space or in-kernel copy functions use them. The copy
functions use a process endpoint of NONE to realize it's
a physical address, virtual otherwise.
. The umap call only makes sense to translate a virtual address
to a physical address now.
. Segments-related calls like newmap and alloc_segments are gone.
. All segments-related translation in VM is gone (vir2map etc).
. Initialization in VM is simpler as no moving around is necessary.
. VM and all other boot processes can be linked wherever they wish
and will be mapped in at the right location by the kernel and VM
respectively.
Other changes:
. The multiboot code is less special: it does not use mb_print
for its diagnostics any more but uses printf() as normal, saving
the output into the diagnostics buffer, only printing to the
screen using the direct print functions if a panic() occurs.
. The multiboot code uses the flexible 'free memory map list'
style to receive the list of free memory if available.
. The kernel determines the memory layout of the processes to
a degree: it tells VM where the kernel starts and ends and
where the kernel wants the top of the process to be. VM then
uses this entire range, i.e. the stack is right at the top,
and mmap()ped bits of memory are placed below that downwards,
and the break grows upwards.
Other Consequences:
. Every process gets its own page table as address spaces
can't be separated any more by segments.
. As all segments are 0-based, there is no distinction between
virtual and linear addresses, nor between userspace and
kernel addresses.
. Less work is done when context switching, leading to a net
performance increase. (8% faster on my machine for 'make servers'.)
. The layout and configuration of the GDT makes sysenter and syscall
possible.
2012-05-07 16:03:35 +02:00
|
|
|
/* Copy kernel-initialized pagetable contents into our
|
|
|
|
* normally accessible pagetable.
|
|
|
|
*/
|
2012-10-31 19:24:14 +01:00
|
|
|
if(sys_abscopy(ptaddr_kern, ptaddr_us, VM_PAGE_SIZE) != OK)
|
No more intel/minix segments.
This commit removes all traces of Minix segments (the text/data/stack
memory map abstraction in the kernel) and significance of Intel segments
(hardware segments like CS, DS that add offsets to all addressing before
page table translation). This ultimately simplifies the memory layout
and addressing and makes the same layout possible on non-Intel
architectures.
There are only two types of addresses in the world now: virtual
and physical; even the kernel and processes have the same virtual
address space. Kernel and user processes can be distinguished at a
glance as processes won't use 0xF0000000 and above.
No static pre-allocated memory sizes exist any more.
Changes to booting:
. The pre_init.c leaves the kernel and modules exactly as
they were left by the bootloader in physical memory
. The kernel starts running using physical addressing,
loaded at a fixed location given in its linker script by the
bootloader. All code and data in this phase are linked to
this fixed low location.
. It makes a bootstrap pagetable to map itself to a
fixed high location (also in linker script) and jumps to
the high address. All code and data then use this high addressing.
. All code/data symbols linked at the low addresses is prefixed by
an objcopy step with __k_unpaged_*, so that that code cannot
reference highly-linked symbols (which aren't valid yet) or vice
versa (symbols that aren't valid any more).
. The two addressing modes are separated in the linker script by
collecting the unpaged_*.o objects and linking them with low
addresses, and linking the rest high. Some objects are linked
twice, once low and once high.
. The bootstrap phase passes a lot of information (e.g. free memory
list, physical location of the modules, etc.) using the kinfo
struct.
. After this bootstrap the low-linked part is freed.
. The kernel maps in VM into the bootstrap page table so that VM can
begin executing. Its first job is to make page tables for all other
boot processes. So VM runs before RS, and RS gets a fully dynamic,
VM-managed address space. VM gets its privilege info from RS as usual
but that happens after RS starts running.
. Both the kernel loading VM and VM organizing boot processes happen
using the libexec logic. This removes the last reason for VM to
still know much about exec() and vm/exec.c is gone.
Further Implementation:
. All segments are based at 0 and have a 4 GB limit.
. The kernel is mapped in at the top of the virtual address
space so as not to constrain the user processes.
. Processes do not use segments from the LDT at all; there are
no segments in the LDT any more, so no LLDT is needed.
. The Minix segments T/D/S are gone and so none of the
user-space or in-kernel copy functions use them. The copy
functions use a process endpoint of NONE to realize it's
a physical address, virtual otherwise.
. The umap call only makes sense to translate a virtual address
to a physical address now.
. Segments-related calls like newmap and alloc_segments are gone.
. All segments-related translation in VM is gone (vir2map etc).
. Initialization in VM is simpler as no moving around is necessary.
. VM and all other boot processes can be linked wherever they wish
and will be mapped in at the right location by the kernel and VM
respectively.
Other changes:
. The multiboot code is less special: it does not use mb_print
for its diagnostics any more but uses printf() as normal, saving
the output into the diagnostics buffer, only printing to the
screen using the direct print functions if a panic() occurs.
. The multiboot code uses the flexible 'free memory map list'
style to receive the list of free memory if available.
. The kernel determines the memory layout of the processes to
a degree: it tells VM where the kernel starts and ends and
where the kernel wants the top of the process to be. VM then
uses this entire range, i.e. the stack is right at the top,
and mmap()ped bits of memory are placed below that downwards,
and the break grows upwards.
Other Consequences:
. Every process gets its own page table as address spaces
can't be separated any more by segments.
. As all segments are 0-based, there is no distinction between
virtual and linear addresses, nor between userspace and
kernel addresses.
. Less work is done when context switching, leading to a net
performance increase. (8% faster on my machine for 'make servers'.)
. The layout and configuration of the GDT makes sysenter and syscall
possible.
2012-05-07 16:03:35 +02:00
|
|
|
panic("pt_init: abscopy failed");
|
|
|
|
}
|
2010-07-21 01:03:52 +02:00
|
|
|
|
No more intel/minix segments.
This commit removes all traces of Minix segments (the text/data/stack
memory map abstraction in the kernel) and significance of Intel segments
(hardware segments like CS, DS that add offsets to all addressing before
page table translation). This ultimately simplifies the memory layout
and addressing and makes the same layout possible on non-Intel
architectures.
There are only two types of addresses in the world now: virtual
and physical; even the kernel and processes have the same virtual
address space. Kernel and user processes can be distinguished at a
glance as processes won't use 0xF0000000 and above.
No static pre-allocated memory sizes exist any more.
Changes to booting:
. The pre_init.c leaves the kernel and modules exactly as
they were left by the bootloader in physical memory
. The kernel starts running using physical addressing,
loaded at a fixed location given in its linker script by the
bootloader. All code and data in this phase are linked to
this fixed low location.
. It makes a bootstrap pagetable to map itself to a
fixed high location (also in linker script) and jumps to
the high address. All code and data then use this high addressing.
. All code/data symbols linked at the low addresses is prefixed by
an objcopy step with __k_unpaged_*, so that that code cannot
reference highly-linked symbols (which aren't valid yet) or vice
versa (symbols that aren't valid any more).
. The two addressing modes are separated in the linker script by
collecting the unpaged_*.o objects and linking them with low
addresses, and linking the rest high. Some objects are linked
twice, once low and once high.
. The bootstrap phase passes a lot of information (e.g. free memory
list, physical location of the modules, etc.) using the kinfo
struct.
. After this bootstrap the low-linked part is freed.
. The kernel maps in VM into the bootstrap page table so that VM can
begin executing. Its first job is to make page tables for all other
boot processes. So VM runs before RS, and RS gets a fully dynamic,
VM-managed address space. VM gets its privilege info from RS as usual
but that happens after RS starts running.
. Both the kernel loading VM and VM organizing boot processes happen
using the libexec logic. This removes the last reason for VM to
still know much about exec() and vm/exec.c is gone.
Further Implementation:
. All segments are based at 0 and have a 4 GB limit.
. The kernel is mapped in at the top of the virtual address
space so as not to constrain the user processes.
. Processes do not use segments from the LDT at all; there are
no segments in the LDT any more, so no LLDT is needed.
. The Minix segments T/D/S are gone and so none of the
user-space or in-kernel copy functions use them. The copy
functions use a process endpoint of NONE to realize it's
a physical address, virtual otherwise.
. The umap call only makes sense to translate a virtual address
to a physical address now.
. Segments-related calls like newmap and alloc_segments are gone.
. All segments-related translation in VM is gone (vir2map etc).
. Initialization in VM is simpler as no moving around is necessary.
. VM and all other boot processes can be linked wherever they wish
and will be mapped in at the right location by the kernel and VM
respectively.
Other changes:
. The multiboot code is less special: it does not use mb_print
for its diagnostics any more but uses printf() as normal, saving
the output into the diagnostics buffer, only printing to the
screen using the direct print functions if a panic() occurs.
. The multiboot code uses the flexible 'free memory map list'
style to receive the list of free memory if available.
. The kernel determines the memory layout of the processes to
a degree: it tells VM where the kernel starts and ends and
where the kernel wants the top of the process to be. VM then
uses this entire range, i.e. the stack is right at the top,
and mmap()ped bits of memory are placed below that downwards,
and the break grows upwards.
Other Consequences:
. Every process gets its own page table as address spaces
can't be separated any more by segments.
. As all segments are 0-based, there is no distinction between
virtual and linear addresses, nor between userspace and
kernel addresses.
. Less work is done when context switching, leading to a net
performance increase. (8% faster on my machine for 'make servers'.)
. The layout and configuration of the GDT makes sysenter and syscall
possible.
2012-05-07 16:03:35 +02:00
|
|
|
/* Inform kernel vm has a newly built page table. */
|
|
|
|
assert(vmproc[VM_PROC_NR].vm_endpoint == VM_PROC_NR);
|
|
|
|
pt_bind(newpt, &vmproc[VM_PROC_NR]);
|
2010-07-21 01:03:52 +02:00
|
|
|
|
2012-09-18 13:17:48 +02:00
|
|
|
pt_init_done = 1;
|
|
|
|
|
2009-05-11 21:11:37 +02:00
|
|
|
/* All OK. */
|
|
|
|
return;
|
2008-11-19 13:26:10 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/*===========================================================================*
|
|
|
|
* pt_bind *
|
|
|
|
*===========================================================================*/
|
2012-03-25 20:25:53 +02:00
|
|
|
int pt_bind(pt_t *pt, struct vmproc *who)
|
2008-11-19 13:26:10 +01:00
|
|
|
{
|
2013-02-10 19:37:12 +01:00
|
|
|
int procslot, pdeslot;
|
2009-09-21 16:49:49 +02:00
|
|
|
u32_t phys;
|
2010-05-12 10:31:05 +02:00
|
|
|
void *pdes;
|
2013-02-10 19:37:12 +01:00
|
|
|
int pagedir_pde;
|
|
|
|
int slots_per_pde;
|
2012-10-31 19:24:14 +01:00
|
|
|
int pages_per_pagedir = ARCH_PAGEDIR_SIZE/VM_PAGE_SIZE;
|
2013-02-10 19:37:12 +01:00
|
|
|
struct pdm *pdm;
|
|
|
|
|
|
|
|
slots_per_pde = ARCH_VM_PT_ENTRIES / pages_per_pagedir;
|
2009-05-15 19:07:36 +02:00
|
|
|
|
2008-11-19 13:26:10 +01:00
|
|
|
/* Basic sanity checks. */
|
2010-04-12 14:37:28 +02:00
|
|
|
assert(who);
|
|
|
|
assert(who->vm_flags & VMF_INUSE);
|
|
|
|
assert(pt);
|
2009-05-15 19:07:36 +02:00
|
|
|
|
2013-02-10 19:37:12 +01:00
|
|
|
procslot = who->vm_slot;
|
|
|
|
pdm = &pagedir_mappings[procslot/slots_per_pde];
|
|
|
|
pdeslot = procslot%slots_per_pde;
|
|
|
|
pagedir_pde = pdm->pdeno;
|
|
|
|
assert(pdeslot >= 0);
|
|
|
|
assert(procslot < ELEMENTS(vmproc));
|
|
|
|
assert(pdeslot < ARCH_VM_PT_ENTRIES / pages_per_pagedir);
|
2010-05-12 10:31:05 +02:00
|
|
|
assert(pagedir_pde >= 0);
|
|
|
|
|
2012-10-31 19:24:14 +01:00
|
|
|
#if defined(__i386__)
|
|
|
|
phys = pt->pt_dir_phys & ARCH_VM_ADDR_MASK;
|
|
|
|
#elif defined(__arm__)
|
|
|
|
phys = pt->pt_dir_phys & ARM_VM_PTE_MASK;
|
|
|
|
#endif
|
2010-04-12 14:37:28 +02:00
|
|
|
assert(pt->pt_dir_phys == phys);
|
2012-10-31 19:24:14 +01:00
|
|
|
assert(!(pt->pt_dir_phys % ARCH_PAGEDIR_SIZE));
|
2009-09-21 16:49:49 +02:00
|
|
|
|
|
|
|
/* Update "page directory pagetable." */
|
2012-10-31 19:24:14 +01:00
|
|
|
#if defined(__i386__)
|
2013-02-10 19:37:12 +01:00
|
|
|
pdm->page_directories[pdeslot] =
|
|
|
|
phys | ARCH_VM_PDE_PRESENT|ARCH_VM_PTE_RW;
|
2012-10-31 19:24:14 +01:00
|
|
|
#elif defined(__arm__)
|
2013-02-10 19:37:12 +01:00
|
|
|
{
|
2012-11-09 19:00:46 +01:00
|
|
|
int i;
|
2013-02-10 19:37:12 +01:00
|
|
|
for (i = 0; i < pages_per_pagedir; i++) {
|
|
|
|
pdm->page_directories[pdeslot*pages_per_pagedir+i] =
|
2013-02-10 20:20:14 +01:00
|
|
|
(phys+i*VM_PAGE_SIZE)
|
|
|
|
| ARCH_VM_PTE_PRESENT
|
|
|
|
| ARCH_VM_PTE_RW
|
2013-09-25 10:30:18 +02:00
|
|
|
| ARM_VM_PTE_CACHED
|
2013-02-10 20:20:14 +01:00
|
|
|
| ARCH_VM_PTE_USER; //LSC FIXME
|
2012-11-09 19:00:46 +01:00
|
|
|
}
|
2013-02-10 19:37:12 +01:00
|
|
|
}
|
2012-10-31 19:24:14 +01:00
|
|
|
#endif
|
2008-11-19 13:26:10 +01:00
|
|
|
|
2010-05-12 10:31:05 +02:00
|
|
|
/* This is where the PDE's will be visible to the kernel
|
|
|
|
* in its address space.
|
|
|
|
*/
|
2012-10-31 19:24:14 +01:00
|
|
|
pdes = (void *) (pagedir_pde*ARCH_BIG_PAGE_SIZE +
|
|
|
|
#if defined(__i386__)
|
2013-02-10 19:37:12 +01:00
|
|
|
pdeslot * VM_PAGE_SIZE);
|
2012-10-31 19:24:14 +01:00
|
|
|
#elif defined(__arm__)
|
2013-02-10 19:37:12 +01:00
|
|
|
pdeslot * ARCH_PAGEDIR_SIZE);
|
2012-10-31 19:24:14 +01:00
|
|
|
#endif
|
2010-05-12 10:31:05 +02:00
|
|
|
|
2008-11-19 13:26:10 +01:00
|
|
|
/* Tell kernel about new page table root. */
|
2013-09-13 09:40:20 +02:00
|
|
|
return sys_vmctl_set_addrspace(who->vm_endpoint, pt->pt_dir_phys , pdes);
|
2008-11-19 13:26:10 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/*===========================================================================*
|
|
|
|
* pt_free *
|
|
|
|
*===========================================================================*/
|
2012-03-25 20:25:53 +02:00
|
|
|
void pt_free(pt_t *pt)
|
2008-11-19 13:26:10 +01:00
|
|
|
{
|
|
|
|
/* Free memory associated with this pagetable. */
|
|
|
|
int i;
|
|
|
|
|
2012-10-31 19:24:14 +01:00
|
|
|
for(i = 0; i < ARCH_VM_DIR_ENTRIES; i++)
|
2009-09-21 16:49:49 +02:00
|
|
|
if(pt->pt_pt[i])
|
2012-09-18 13:17:50 +02:00
|
|
|
vm_freepages((vir_bytes) pt->pt_pt[i], 1);
|
2008-11-19 13:26:10 +01:00
|
|
|
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*===========================================================================*
|
|
|
|
* pt_mapkernel *
|
|
|
|
*===========================================================================*/
|
2012-03-25 20:25:53 +02:00
|
|
|
int pt_mapkernel(pt_t *pt)
|
2008-11-19 13:26:10 +01:00
|
|
|
{
|
2011-06-01 11:30:58 +02:00
|
|
|
int i;
|
No more intel/minix segments.
This commit removes all traces of Minix segments (the text/data/stack
memory map abstraction in the kernel) and significance of Intel segments
(hardware segments like CS, DS that add offsets to all addressing before
page table translation). This ultimately simplifies the memory layout
and addressing and makes the same layout possible on non-Intel
architectures.
There are only two types of addresses in the world now: virtual
and physical; even the kernel and processes have the same virtual
address space. Kernel and user processes can be distinguished at a
glance as processes won't use 0xF0000000 and above.
No static pre-allocated memory sizes exist any more.
Changes to booting:
. The pre_init.c leaves the kernel and modules exactly as
they were left by the bootloader in physical memory
. The kernel starts running using physical addressing,
loaded at a fixed location given in its linker script by the
bootloader. All code and data in this phase are linked to
this fixed low location.
. It makes a bootstrap pagetable to map itself to a
fixed high location (also in linker script) and jumps to
the high address. All code and data then use this high addressing.
. All code/data symbols linked at the low addresses is prefixed by
an objcopy step with __k_unpaged_*, so that that code cannot
reference highly-linked symbols (which aren't valid yet) or vice
versa (symbols that aren't valid any more).
. The two addressing modes are separated in the linker script by
collecting the unpaged_*.o objects and linking them with low
addresses, and linking the rest high. Some objects are linked
twice, once low and once high.
. The bootstrap phase passes a lot of information (e.g. free memory
list, physical location of the modules, etc.) using the kinfo
struct.
. After this bootstrap the low-linked part is freed.
. The kernel maps in VM into the bootstrap page table so that VM can
begin executing. Its first job is to make page tables for all other
boot processes. So VM runs before RS, and RS gets a fully dynamic,
VM-managed address space. VM gets its privilege info from RS as usual
but that happens after RS starts running.
. Both the kernel loading VM and VM organizing boot processes happen
using the libexec logic. This removes the last reason for VM to
still know much about exec() and vm/exec.c is gone.
Further Implementation:
. All segments are based at 0 and have a 4 GB limit.
. The kernel is mapped in at the top of the virtual address
space so as not to constrain the user processes.
. Processes do not use segments from the LDT at all; there are
no segments in the LDT any more, so no LLDT is needed.
. The Minix segments T/D/S are gone and so none of the
user-space or in-kernel copy functions use them. The copy
functions use a process endpoint of NONE to realize it's
a physical address, virtual otherwise.
. The umap call only makes sense to translate a virtual address
to a physical address now.
. Segments-related calls like newmap and alloc_segments are gone.
. All segments-related translation in VM is gone (vir2map etc).
. Initialization in VM is simpler as no moving around is necessary.
. VM and all other boot processes can be linked wherever they wish
and will be mapped in at the right location by the kernel and VM
respectively.
Other changes:
. The multiboot code is less special: it does not use mb_print
for its diagnostics any more but uses printf() as normal, saving
the output into the diagnostics buffer, only printing to the
screen using the direct print functions if a panic() occurs.
. The multiboot code uses the flexible 'free memory map list'
style to receive the list of free memory if available.
. The kernel determines the memory layout of the processes to
a degree: it tells VM where the kernel starts and ends and
where the kernel wants the top of the process to be. VM then
uses this entire range, i.e. the stack is right at the top,
and mmap()ped bits of memory are placed below that downwards,
and the break grows upwards.
Other Consequences:
. Every process gets its own page table as address spaces
can't be separated any more by segments.
. As all segments are 0-based, there is no distinction between
virtual and linear addresses, nor between userspace and
kernel addresses.
. Less work is done when context switching, leading to a net
performance increase. (8% faster on my machine for 'make servers'.)
. The layout and configuration of the GDT makes sysenter and syscall
possible.
2012-05-07 16:03:35 +02:00
|
|
|
int kern_pde = kern_start_pde;
|
|
|
|
phys_bytes addr, mapped = 0;
|
2009-05-15 19:07:36 +02:00
|
|
|
|
2012-11-09 19:00:46 +01:00
|
|
|
/* Any page table needs to map in the kernel address space. */
|
No more intel/minix segments.
This commit removes all traces of Minix segments (the text/data/stack
memory map abstraction in the kernel) and significance of Intel segments
(hardware segments like CS, DS that add offsets to all addressing before
page table translation). This ultimately simplifies the memory layout
and addressing and makes the same layout possible on non-Intel
architectures.
There are only two types of addresses in the world now: virtual
and physical; even the kernel and processes have the same virtual
address space. Kernel and user processes can be distinguished at a
glance as processes won't use 0xF0000000 and above.
No static pre-allocated memory sizes exist any more.
Changes to booting:
. The pre_init.c leaves the kernel and modules exactly as
they were left by the bootloader in physical memory
. The kernel starts running using physical addressing,
loaded at a fixed location given in its linker script by the
bootloader. All code and data in this phase are linked to
this fixed low location.
. It makes a bootstrap pagetable to map itself to a
fixed high location (also in linker script) and jumps to
the high address. All code and data then use this high addressing.
. All code/data symbols linked at the low addresses is prefixed by
an objcopy step with __k_unpaged_*, so that that code cannot
reference highly-linked symbols (which aren't valid yet) or vice
versa (symbols that aren't valid any more).
. The two addressing modes are separated in the linker script by
collecting the unpaged_*.o objects and linking them with low
addresses, and linking the rest high. Some objects are linked
twice, once low and once high.
. The bootstrap phase passes a lot of information (e.g. free memory
list, physical location of the modules, etc.) using the kinfo
struct.
. After this bootstrap the low-linked part is freed.
. The kernel maps in VM into the bootstrap page table so that VM can
begin executing. Its first job is to make page tables for all other
boot processes. So VM runs before RS, and RS gets a fully dynamic,
VM-managed address space. VM gets its privilege info from RS as usual
but that happens after RS starts running.
. Both the kernel loading VM and VM organizing boot processes happen
using the libexec logic. This removes the last reason for VM to
still know much about exec() and vm/exec.c is gone.
Further Implementation:
. All segments are based at 0 and have a 4 GB limit.
. The kernel is mapped in at the top of the virtual address
space so as not to constrain the user processes.
. Processes do not use segments from the LDT at all; there are
no segments in the LDT any more, so no LLDT is needed.
. The Minix segments T/D/S are gone and so none of the
user-space or in-kernel copy functions use them. The copy
functions use a process endpoint of NONE to realize it's
a physical address, virtual otherwise.
. The umap call only makes sense to translate a virtual address
to a physical address now.
. Segments-related calls like newmap and alloc_segments are gone.
. All segments-related translation in VM is gone (vir2map etc).
. Initialization in VM is simpler as no moving around is necessary.
. VM and all other boot processes can be linked wherever they wish
and will be mapped in at the right location by the kernel and VM
respectively.
Other changes:
. The multiboot code is less special: it does not use mb_print
for its diagnostics any more but uses printf() as normal, saving
the output into the diagnostics buffer, only printing to the
screen using the direct print functions if a panic() occurs.
. The multiboot code uses the flexible 'free memory map list'
style to receive the list of free memory if available.
. The kernel determines the memory layout of the processes to
a degree: it tells VM where the kernel starts and ends and
where the kernel wants the top of the process to be. VM then
uses this entire range, i.e. the stack is right at the top,
and mmap()ped bits of memory are placed below that downwards,
and the break grows upwards.
Other Consequences:
. Every process gets its own page table as address spaces
can't be separated any more by segments.
. As all segments are 0-based, there is no distinction between
virtual and linear addresses, nor between userspace and
kernel addresses.
. Less work is done when context switching, leading to a net
performance increase. (8% faster on my machine for 'make servers'.)
. The layout and configuration of the GDT makes sysenter and syscall
possible.
2012-05-07 16:03:35 +02:00
|
|
|
assert(bigpage_ok);
|
|
|
|
assert(kern_pde >= 0);
|
|
|
|
|
|
|
|
/* pt_init() has made sure this is ok. */
|
|
|
|
addr = kern_mb_mod->mod_start;
|
|
|
|
|
|
|
|
/* Actually mapping in kernel */
|
|
|
|
while(mapped < kern_size) {
|
2012-10-31 19:24:14 +01:00
|
|
|
#if defined(__i386__)
|
|
|
|
pt->pt_dir[kern_pde] = addr | ARCH_VM_PDE_PRESENT |
|
|
|
|
ARCH_VM_BIGPAGE | ARCH_VM_PTE_RW | global_bit;
|
|
|
|
#elif defined(__arm__)
|
2013-08-27 14:32:23 +02:00
|
|
|
pt->pt_dir[kern_pde] = (addr & ARM_VM_SECTION_MASK)
|
2013-02-10 20:20:14 +01:00
|
|
|
| ARM_VM_SECTION
|
|
|
|
| ARM_VM_SECTION_DOMAIN
|
2013-09-13 09:40:20 +02:00
|
|
|
| ARM_VM_SECTION_CACHED
|
2013-02-10 20:20:14 +01:00
|
|
|
| ARM_VM_SECTION_SUPER;
|
2012-10-31 19:24:14 +01:00
|
|
|
#endif
|
No more intel/minix segments.
This commit removes all traces of Minix segments (the text/data/stack
memory map abstraction in the kernel) and significance of Intel segments
(hardware segments like CS, DS that add offsets to all addressing before
page table translation). This ultimately simplifies the memory layout
and addressing and makes the same layout possible on non-Intel
architectures.
There are only two types of addresses in the world now: virtual
and physical; even the kernel and processes have the same virtual
address space. Kernel and user processes can be distinguished at a
glance as processes won't use 0xF0000000 and above.
No static pre-allocated memory sizes exist any more.
Changes to booting:
. The pre_init.c leaves the kernel and modules exactly as
they were left by the bootloader in physical memory
. The kernel starts running using physical addressing,
loaded at a fixed location given in its linker script by the
bootloader. All code and data in this phase are linked to
this fixed low location.
. It makes a bootstrap pagetable to map itself to a
fixed high location (also in linker script) and jumps to
the high address. All code and data then use this high addressing.
. All code/data symbols linked at the low addresses is prefixed by
an objcopy step with __k_unpaged_*, so that that code cannot
reference highly-linked symbols (which aren't valid yet) or vice
versa (symbols that aren't valid any more).
. The two addressing modes are separated in the linker script by
collecting the unpaged_*.o objects and linking them with low
addresses, and linking the rest high. Some objects are linked
twice, once low and once high.
. The bootstrap phase passes a lot of information (e.g. free memory
list, physical location of the modules, etc.) using the kinfo
struct.
. After this bootstrap the low-linked part is freed.
. The kernel maps in VM into the bootstrap page table so that VM can
begin executing. Its first job is to make page tables for all other
boot processes. So VM runs before RS, and RS gets a fully dynamic,
VM-managed address space. VM gets its privilege info from RS as usual
but that happens after RS starts running.
. Both the kernel loading VM and VM organizing boot processes happen
using the libexec logic. This removes the last reason for VM to
still know much about exec() and vm/exec.c is gone.
Further Implementation:
. All segments are based at 0 and have a 4 GB limit.
. The kernel is mapped in at the top of the virtual address
space so as not to constrain the user processes.
. Processes do not use segments from the LDT at all; there are
no segments in the LDT any more, so no LLDT is needed.
. The Minix segments T/D/S are gone and so none of the
user-space or in-kernel copy functions use them. The copy
functions use a process endpoint of NONE to realize it's
a physical address, virtual otherwise.
. The umap call only makes sense to translate a virtual address
to a physical address now.
. Segments-related calls like newmap and alloc_segments are gone.
. All segments-related translation in VM is gone (vir2map etc).
. Initialization in VM is simpler as no moving around is necessary.
. VM and all other boot processes can be linked wherever they wish
and will be mapped in at the right location by the kernel and VM
respectively.
Other changes:
. The multiboot code is less special: it does not use mb_print
for its diagnostics any more but uses printf() as normal, saving
the output into the diagnostics buffer, only printing to the
screen using the direct print functions if a panic() occurs.
. The multiboot code uses the flexible 'free memory map list'
style to receive the list of free memory if available.
. The kernel determines the memory layout of the processes to
a degree: it tells VM where the kernel starts and ends and
where the kernel wants the top of the process to be. VM then
uses this entire range, i.e. the stack is right at the top,
and mmap()ped bits of memory are placed below that downwards,
and the break grows upwards.
Other Consequences:
. Every process gets its own page table as address spaces
can't be separated any more by segments.
. As all segments are 0-based, there is no distinction between
virtual and linear addresses, nor between userspace and
kernel addresses.
. Less work is done when context switching, leading to a net
performance increase. (8% faster on my machine for 'make servers'.)
. The layout and configuration of the GDT makes sysenter and syscall
possible.
2012-05-07 16:03:35 +02:00
|
|
|
kern_pde++;
|
2012-10-31 19:24:14 +01:00
|
|
|
mapped += ARCH_BIG_PAGE_SIZE;
|
|
|
|
addr += ARCH_BIG_PAGE_SIZE;
|
2009-05-18 17:34:42 +02:00
|
|
|
}
|
2008-11-19 13:26:10 +01:00
|
|
|
|
No more intel/minix segments.
This commit removes all traces of Minix segments (the text/data/stack
memory map abstraction in the kernel) and significance of Intel segments
(hardware segments like CS, DS that add offsets to all addressing before
page table translation). This ultimately simplifies the memory layout
and addressing and makes the same layout possible on non-Intel
architectures.
There are only two types of addresses in the world now: virtual
and physical; even the kernel and processes have the same virtual
address space. Kernel and user processes can be distinguished at a
glance as processes won't use 0xF0000000 and above.
No static pre-allocated memory sizes exist any more.
Changes to booting:
. The pre_init.c leaves the kernel and modules exactly as
they were left by the bootloader in physical memory
. The kernel starts running using physical addressing,
loaded at a fixed location given in its linker script by the
bootloader. All code and data in this phase are linked to
this fixed low location.
. It makes a bootstrap pagetable to map itself to a
fixed high location (also in linker script) and jumps to
the high address. All code and data then use this high addressing.
. All code/data symbols linked at the low addresses is prefixed by
an objcopy step with __k_unpaged_*, so that that code cannot
reference highly-linked symbols (which aren't valid yet) or vice
versa (symbols that aren't valid any more).
. The two addressing modes are separated in the linker script by
collecting the unpaged_*.o objects and linking them with low
addresses, and linking the rest high. Some objects are linked
twice, once low and once high.
. The bootstrap phase passes a lot of information (e.g. free memory
list, physical location of the modules, etc.) using the kinfo
struct.
. After this bootstrap the low-linked part is freed.
. The kernel maps in VM into the bootstrap page table so that VM can
begin executing. Its first job is to make page tables for all other
boot processes. So VM runs before RS, and RS gets a fully dynamic,
VM-managed address space. VM gets its privilege info from RS as usual
but that happens after RS starts running.
. Both the kernel loading VM and VM organizing boot processes happen
using the libexec logic. This removes the last reason for VM to
still know much about exec() and vm/exec.c is gone.
Further Implementation:
. All segments are based at 0 and have a 4 GB limit.
. The kernel is mapped in at the top of the virtual address
space so as not to constrain the user processes.
. Processes do not use segments from the LDT at all; there are
no segments in the LDT any more, so no LLDT is needed.
. The Minix segments T/D/S are gone and so none of the
user-space or in-kernel copy functions use them. The copy
functions use a process endpoint of NONE to realize it's
a physical address, virtual otherwise.
. The umap call only makes sense to translate a virtual address
to a physical address now.
. Segments-related calls like newmap and alloc_segments are gone.
. All segments-related translation in VM is gone (vir2map etc).
. Initialization in VM is simpler as no moving around is necessary.
. VM and all other boot processes can be linked wherever they wish
and will be mapped in at the right location by the kernel and VM
respectively.
Other changes:
. The multiboot code is less special: it does not use mb_print
for its diagnostics any more but uses printf() as normal, saving
the output into the diagnostics buffer, only printing to the
screen using the direct print functions if a panic() occurs.
. The multiboot code uses the flexible 'free memory map list'
style to receive the list of free memory if available.
. The kernel determines the memory layout of the processes to
a degree: it tells VM where the kernel starts and ends and
where the kernel wants the top of the process to be. VM then
uses this entire range, i.e. the stack is right at the top,
and mmap()ped bits of memory are placed below that downwards,
and the break grows upwards.
Other Consequences:
. Every process gets its own page table as address spaces
can't be separated any more by segments.
. As all segments are 0-based, there is no distinction between
virtual and linear addresses, nor between userspace and
kernel addresses.
. Less work is done when context switching, leading to a net
performance increase. (8% faster on my machine for 'make servers'.)
. The layout and configuration of the GDT makes sysenter and syscall
possible.
2012-05-07 16:03:35 +02:00
|
|
|
/* Kernel also wants to know about all page directories. */
|
2013-02-10 19:37:12 +01:00
|
|
|
{
|
|
|
|
int pd;
|
|
|
|
for(pd = 0; pd < MAX_PAGEDIR_PDES; pd++) {
|
|
|
|
struct pdm *pdm = &pagedir_mappings[pd];
|
|
|
|
|
|
|
|
assert(pdm->pdeno > 0);
|
|
|
|
assert(pdm->pdeno > kern_pde);
|
|
|
|
pt->pt_dir[pdm->pdeno] = pdm->val;
|
|
|
|
}
|
|
|
|
}
|
2009-09-21 16:49:49 +02:00
|
|
|
|
No more intel/minix segments.
This commit removes all traces of Minix segments (the text/data/stack
memory map abstraction in the kernel) and significance of Intel segments
(hardware segments like CS, DS that add offsets to all addressing before
page table translation). This ultimately simplifies the memory layout
and addressing and makes the same layout possible on non-Intel
architectures.
There are only two types of addresses in the world now: virtual
and physical; even the kernel and processes have the same virtual
address space. Kernel and user processes can be distinguished at a
glance as processes won't use 0xF0000000 and above.
No static pre-allocated memory sizes exist any more.
Changes to booting:
. The pre_init.c leaves the kernel and modules exactly as
they were left by the bootloader in physical memory
. The kernel starts running using physical addressing,
loaded at a fixed location given in its linker script by the
bootloader. All code and data in this phase are linked to
this fixed low location.
. It makes a bootstrap pagetable to map itself to a
fixed high location (also in linker script) and jumps to
the high address. All code and data then use this high addressing.
. All code/data symbols linked at the low addresses is prefixed by
an objcopy step with __k_unpaged_*, so that that code cannot
reference highly-linked symbols (which aren't valid yet) or vice
versa (symbols that aren't valid any more).
. The two addressing modes are separated in the linker script by
collecting the unpaged_*.o objects and linking them with low
addresses, and linking the rest high. Some objects are linked
twice, once low and once high.
. The bootstrap phase passes a lot of information (e.g. free memory
list, physical location of the modules, etc.) using the kinfo
struct.
. After this bootstrap the low-linked part is freed.
. The kernel maps in VM into the bootstrap page table so that VM can
begin executing. Its first job is to make page tables for all other
boot processes. So VM runs before RS, and RS gets a fully dynamic,
VM-managed address space. VM gets its privilege info from RS as usual
but that happens after RS starts running.
. Both the kernel loading VM and VM organizing boot processes happen
using the libexec logic. This removes the last reason for VM to
still know much about exec() and vm/exec.c is gone.
Further Implementation:
. All segments are based at 0 and have a 4 GB limit.
. The kernel is mapped in at the top of the virtual address
space so as not to constrain the user processes.
. Processes do not use segments from the LDT at all; there are
no segments in the LDT any more, so no LLDT is needed.
. The Minix segments T/D/S are gone and so none of the
user-space or in-kernel copy functions use them. The copy
functions use a process endpoint of NONE to realize it's
a physical address, virtual otherwise.
. The umap call only makes sense to translate a virtual address
to a physical address now.
. Segments-related calls like newmap and alloc_segments are gone.
. All segments-related translation in VM is gone (vir2map etc).
. Initialization in VM is simpler as no moving around is necessary.
. VM and all other boot processes can be linked wherever they wish
and will be mapped in at the right location by the kernel and VM
respectively.
Other changes:
. The multiboot code is less special: it does not use mb_print
for its diagnostics any more but uses printf() as normal, saving
the output into the diagnostics buffer, only printing to the
screen using the direct print functions if a panic() occurs.
. The multiboot code uses the flexible 'free memory map list'
style to receive the list of free memory if available.
. The kernel determines the memory layout of the processes to
a degree: it tells VM where the kernel starts and ends and
where the kernel wants the top of the process to be. VM then
uses this entire range, i.e. the stack is right at the top,
and mmap()ped bits of memory are placed below that downwards,
and the break grows upwards.
Other Consequences:
. Every process gets its own page table as address spaces
can't be separated any more by segments.
. As all segments are 0-based, there is no distinction between
virtual and linear addresses, nor between userspace and
kernel addresses.
. Less work is done when context switching, leading to a net
performance increase. (8% faster on my machine for 'make servers'.)
. The layout and configuration of the GDT makes sysenter and syscall
possible.
2012-05-07 16:03:35 +02:00
|
|
|
/* Kernel also wants various mappings of its own. */
|
2009-11-11 18:02:45 +01:00
|
|
|
for(i = 0; i < kernmappings; i++) {
|
2012-11-09 16:50:31 +01:00
|
|
|
int r;
|
|
|
|
if((r=pt_writemap(NULL, pt,
|
No more intel/minix segments.
This commit removes all traces of Minix segments (the text/data/stack
memory map abstraction in the kernel) and significance of Intel segments
(hardware segments like CS, DS that add offsets to all addressing before
page table translation). This ultimately simplifies the memory layout
and addressing and makes the same layout possible on non-Intel
architectures.
There are only two types of addresses in the world now: virtual
and physical; even the kernel and processes have the same virtual
address space. Kernel and user processes can be distinguished at a
glance as processes won't use 0xF0000000 and above.
No static pre-allocated memory sizes exist any more.
Changes to booting:
. The pre_init.c leaves the kernel and modules exactly as
they were left by the bootloader in physical memory
. The kernel starts running using physical addressing,
loaded at a fixed location given in its linker script by the
bootloader. All code and data in this phase are linked to
this fixed low location.
. It makes a bootstrap pagetable to map itself to a
fixed high location (also in linker script) and jumps to
the high address. All code and data then use this high addressing.
. All code/data symbols linked at the low addresses is prefixed by
an objcopy step with __k_unpaged_*, so that that code cannot
reference highly-linked symbols (which aren't valid yet) or vice
versa (symbols that aren't valid any more).
. The two addressing modes are separated in the linker script by
collecting the unpaged_*.o objects and linking them with low
addresses, and linking the rest high. Some objects are linked
twice, once low and once high.
. The bootstrap phase passes a lot of information (e.g. free memory
list, physical location of the modules, etc.) using the kinfo
struct.
. After this bootstrap the low-linked part is freed.
. The kernel maps in VM into the bootstrap page table so that VM can
begin executing. Its first job is to make page tables for all other
boot processes. So VM runs before RS, and RS gets a fully dynamic,
VM-managed address space. VM gets its privilege info from RS as usual
but that happens after RS starts running.
. Both the kernel loading VM and VM organizing boot processes happen
using the libexec logic. This removes the last reason for VM to
still know much about exec() and vm/exec.c is gone.
Further Implementation:
. All segments are based at 0 and have a 4 GB limit.
. The kernel is mapped in at the top of the virtual address
space so as not to constrain the user processes.
. Processes do not use segments from the LDT at all; there are
no segments in the LDT any more, so no LLDT is needed.
. The Minix segments T/D/S are gone and so none of the
user-space or in-kernel copy functions use them. The copy
functions use a process endpoint of NONE to realize it's
a physical address, virtual otherwise.
. The umap call only makes sense to translate a virtual address
to a physical address now.
. Segments-related calls like newmap and alloc_segments are gone.
. All segments-related translation in VM is gone (vir2map etc).
. Initialization in VM is simpler as no moving around is necessary.
. VM and all other boot processes can be linked wherever they wish
and will be mapped in at the right location by the kernel and VM
respectively.
Other changes:
. The multiboot code is less special: it does not use mb_print
for its diagnostics any more but uses printf() as normal, saving
the output into the diagnostics buffer, only printing to the
screen using the direct print functions if a panic() occurs.
. The multiboot code uses the flexible 'free memory map list'
style to receive the list of free memory if available.
. The kernel determines the memory layout of the processes to
a degree: it tells VM where the kernel starts and ends and
where the kernel wants the top of the process to be. VM then
uses this entire range, i.e. the stack is right at the top,
and mmap()ped bits of memory are placed below that downwards,
and the break grows upwards.
Other Consequences:
. Every process gets its own page table as address spaces
can't be separated any more by segments.
. As all segments are 0-based, there is no distinction between
virtual and linear addresses, nor between userspace and
kernel addresses.
. Less work is done when context switching, leading to a net
performance increase. (8% faster on my machine for 'make servers'.)
. The layout and configuration of the GDT makes sysenter and syscall
possible.
2012-05-07 16:03:35 +02:00
|
|
|
kern_mappings[i].vir_addr,
|
2009-11-11 18:02:45 +01:00
|
|
|
kern_mappings[i].phys_addr,
|
|
|
|
kern_mappings[i].len,
|
2012-11-09 16:50:31 +01:00
|
|
|
kern_mappings[i].flags, 0)) != OK) {
|
|
|
|
return r;
|
2009-11-11 18:02:45 +01:00
|
|
|
}
|
2013-01-29 18:52:08 +01:00
|
|
|
|
2009-11-11 18:02:45 +01:00
|
|
|
}
|
|
|
|
|
2008-11-19 13:26:10 +01:00
|
|
|
return OK;
|
|
|
|
}
|
|
|
|
|
2012-09-18 22:19:22 +02:00
|
|
|
int get_vm_self_pages(void) { return vm_self_pages; }
|