minix/kernel/arch/i386/memory.c
Ben Gras 1e399dd8bd various kernel printing fixes
. remove some call cycles by low-level functions invoking printf(); e.g.
	  send_sig() gets a return value that the caller should check
	. reason: very-early-phase printf() would trigger a printf() causing
	  infinite recursion -> GPF
	. move serial initialization a little earlier so DEBUG_EXTRA works for
	  serial earlier (e.g. its first instance, for "cstart")
	. closes tracker item 583:
	  System Fails to Complete Startup with Verbose 2 and 3 Boot Parameters,
	  reported by Stephen Hatton / pikpik.
2012-03-28 18:23:12 +02:00

1161 lines
31 KiB
C

#include "kernel/kernel.h"
#include "kernel/proc.h"
#include "kernel/vm.h"
#include <machine/vm.h>
#include <minix/type.h>
#include <minix/syslib.h>
#include <minix/cpufeature.h>
#include <string.h>
#include <assert.h>
#include <signal.h>
#include <stdlib.h>
#include <machine/vm.h>
#include "oxpcie.h"
#include "arch_proto.h"
#include "kernel/proto.h"
#include "kernel/debug.h"
#ifdef USE_APIC
#include "apic.h"
#ifdef USE_WATCHDOG
#include "kernel/watchdog.h"
#endif
#endif
int i386_paging_enabled = 0;
static int psok = 0;
#define MAX_FREEPDES 2
static int nfreepdes = 0, freepdes[MAX_FREEPDES];
#define HASPT(procptr) ((procptr)->p_seg.p_cr3 != 0)
static u32_t phys_get32(phys_bytes v);
static void vm_enable_paging(void);
void segmentation2paging(struct proc * current)
{
/* switch to the current process page tables before turning paging on */
switch_address_space(current);
vm_enable_paging();
}
/* This function sets up a mapping from within the kernel's address
* space to any other area of memory, either straight physical
* memory (pr == NULL) or a process view of memory, in 4MB windows.
* I.e., it maps in 4MB chunks of virtual (or physical) address space
* to 4MB chunks of kernel virtual address space.
*
* It recognizes pr already being in memory as a special case (no
* mapping required).
*
* The target (i.e. in-kernel) mapping area is one of the freepdes[]
* VM has earlier already told the kernel about that is available. It is
* identified as the 'pde' parameter. This value can be chosen freely
* by the caller, as long as it is in range (i.e. 0 or higher and corresonds
* to a known freepde slot). It is up to the caller to keep track of which
* freepde's are in use, and to determine which ones are free to use.
*
* The logical number supplied by the caller is translated into an actual
* pde number to be used, and a pointer to it (linear address) is returned
* for actual use by phys_copy or phys_memset.
*/
static phys_bytes createpde(
const struct proc *pr, /* Requested process, NULL for physical. */
const phys_bytes linaddr,/* Address after segment translation. */
phys_bytes *bytes, /* Size of chunk, function may truncate it. */
int free_pde_idx, /* index of the free slot to use */
int *changed /* If mapping is made, this is set to 1. */
)
{
u32_t pdeval;
phys_bytes offset;
int pde;
assert(free_pde_idx >= 0 && free_pde_idx < nfreepdes);
pde = freepdes[free_pde_idx];
assert(pde >= 0 && pde < 1024);
if(pr && ((pr == get_cpulocal_var(ptproc)) || !HASPT(pr))) {
/* Process memory is requested, and
* it's a process that is already in current page table, or
* a process that is in every page table.
* Therefore linaddr is valid directly, with the requested
* size.
*/
return linaddr;
}
if(pr) {
/* Requested address is in a process that is not currently
* accessible directly. Grab the PDE entry of that process'
* page table that corresponds to the requested address.
*/
assert(pr->p_seg.p_cr3_v);
pdeval = pr->p_seg.p_cr3_v[I386_VM_PDE(linaddr)];
} else {
/* Requested address is physical. Make up the PDE entry. */
pdeval = (linaddr & I386_VM_ADDR_MASK_4MB) |
I386_VM_BIGPAGE | I386_VM_PRESENT |
I386_VM_WRITE | I386_VM_USER;
}
/* Write the pde value that we need into a pde that the kernel
* can access, into the currently loaded page table so it becomes
* visible.
*/
assert(get_cpulocal_var(ptproc)->p_seg.p_cr3_v);
if(get_cpulocal_var(ptproc)->p_seg.p_cr3_v[pde] != pdeval) {
get_cpulocal_var(ptproc)->p_seg.p_cr3_v[pde] = pdeval;
*changed = 1;
}
/* Memory is now available, but only the 4MB window of virtual
* address space that we have mapped; calculate how much of
* the requested range is visible and return that in *bytes,
* if that is less than the requested range.
*/
offset = linaddr & I386_VM_OFFSET_MASK_4MB; /* Offset in 4MB window. */
*bytes = MIN(*bytes, I386_BIG_PAGE_SIZE - offset);
/* Return the linear address of the start of the new mapping. */
return I386_BIG_PAGE_SIZE*pde + offset;
}
/*===========================================================================*
* lin_lin_copy *
*===========================================================================*/
static int lin_lin_copy(struct proc *srcproc, vir_bytes srclinaddr,
struct proc *dstproc, vir_bytes dstlinaddr, vir_bytes bytes)
{
u32_t addr;
proc_nr_t procslot;
assert(vm_running);
assert(nfreepdes >= MAX_FREEPDES);
assert(get_cpulocal_var(ptproc));
assert(get_cpulocal_var(proc_ptr));
assert(read_cr3() == get_cpulocal_var(ptproc)->p_seg.p_cr3);
procslot = get_cpulocal_var(ptproc)->p_nr;
assert(procslot >= 0 && procslot < I386_VM_DIR_ENTRIES);
if(srcproc) assert(!RTS_ISSET(srcproc, RTS_SLOT_FREE));
if(dstproc) assert(!RTS_ISSET(dstproc, RTS_SLOT_FREE));
assert(!RTS_ISSET(get_cpulocal_var(ptproc), RTS_SLOT_FREE));
assert(get_cpulocal_var(ptproc)->p_seg.p_cr3_v);
if(srcproc) assert(!RTS_ISSET(srcproc, RTS_VMINHIBIT));
if(dstproc) assert(!RTS_ISSET(dstproc, RTS_VMINHIBIT));
while(bytes > 0) {
phys_bytes srcptr, dstptr;
vir_bytes chunk = bytes;
int changed = 0;
#ifdef CONFIG_SMP
unsigned cpu = cpuid;
if (GET_BIT(srcproc->p_stale_tlb, cpu)) {
changed = 1;
UNSET_BIT(srcproc->p_stale_tlb, cpu);
}
if (GET_BIT(dstproc->p_stale_tlb, cpu)) {
changed = 1;
UNSET_BIT(dstproc->p_stale_tlb, cpu);
}
#endif
/* Set up 4MB ranges. */
srcptr = createpde(srcproc, srclinaddr, &chunk, 0, &changed);
dstptr = createpde(dstproc, dstlinaddr, &chunk, 1, &changed);
if(changed)
reload_cr3();
/* Copy pages. */
PHYS_COPY_CATCH(srcptr, dstptr, chunk, addr);
if(addr) {
/* If addr is nonzero, a page fault was caught. */
if(addr >= srcptr && addr < (srcptr + chunk)) {
return EFAULT_SRC;
}
if(addr >= dstptr && addr < (dstptr + chunk)) {
return EFAULT_DST;
}
panic("lin_lin_copy fault out of range");
/* Not reached. */
return EFAULT;
}
/* Update counter and addresses for next iteration, if any. */
bytes -= chunk;
srclinaddr += chunk;
dstlinaddr += chunk;
}
if(srcproc) assert(!RTS_ISSET(srcproc, RTS_SLOT_FREE));
if(dstproc) assert(!RTS_ISSET(dstproc, RTS_SLOT_FREE));
assert(!RTS_ISSET(get_cpulocal_var(ptproc), RTS_SLOT_FREE));
assert(get_cpulocal_var(ptproc)->p_seg.p_cr3_v);
return OK;
}
static u32_t phys_get32(phys_bytes addr)
{
const u32_t v;
int r;
if(!vm_running) {
phys_copy(addr, vir2phys(&v), sizeof(v));
return v;
}
if((r=lin_lin_copy(NULL, addr,
proc_addr(SYSTEM), vir2phys(&v), sizeof(v))) != OK) {
panic("lin_lin_copy for phys_get32 failed: %d", r);
}
return v;
}
#if 0
static char *cr0_str(u32_t e)
{
static char str[80];
strcpy(str, "");
#define FLAG(v) do { if(e & (v)) { strcat(str, #v " "); e &= ~v; } } while(0)
FLAG(I386_CR0_PE);
FLAG(I386_CR0_MP);
FLAG(I386_CR0_EM);
FLAG(I386_CR0_TS);
FLAG(I386_CR0_ET);
FLAG(I386_CR0_PG);
FLAG(I386_CR0_WP);
if(e) { strcat(str, " (++)"); }
return str;
}
static char *cr4_str(u32_t e)
{
static char str[80];
strcpy(str, "");
FLAG(I386_CR4_VME);
FLAG(I386_CR4_PVI);
FLAG(I386_CR4_TSD);
FLAG(I386_CR4_DE);
FLAG(I386_CR4_PSE);
FLAG(I386_CR4_PAE);
FLAG(I386_CR4_MCE);
FLAG(I386_CR4_PGE);
if(e) { strcat(str, " (++)"); }
return str;
}
#endif
void vm_stop(void)
{
write_cr0(read_cr0() & ~I386_CR0_PG);
}
static void vm_enable_paging(void)
{
u32_t cr0, cr4;
int pgeok;
psok = _cpufeature(_CPUF_I386_PSE);
pgeok = _cpufeature(_CPUF_I386_PGE);
cr0= read_cr0();
cr4= read_cr4();
/* First clear PG and PGE flag, as PGE must be enabled after PG. */
write_cr0(cr0 & ~I386_CR0_PG);
write_cr4(cr4 & ~(I386_CR4_PGE | I386_CR4_PSE));
cr0= read_cr0();
cr4= read_cr4();
/* Our first page table contains 4MB entries. */
if(psok)
cr4 |= I386_CR4_PSE;
write_cr4(cr4);
/* First enable paging, then enable global page flag. */
cr0 |= I386_CR0_PG;
write_cr0(cr0 );
cr0 |= I386_CR0_WP;
write_cr0(cr0);
/* May we enable these features? */
if(pgeok)
cr4 |= I386_CR4_PGE;
write_cr4(cr4);
}
/*===========================================================================*
* umap_bios *
*===========================================================================*/
phys_bytes umap_bios(vir_addr, bytes)
vir_bytes vir_addr; /* virtual address in BIOS segment */
vir_bytes bytes; /* # of bytes to be copied */
{
/* Calculate the physical memory address at the BIOS. Note: currently, BIOS
* address zero (the first BIOS interrupt vector) is not considered as an
* error here, but since the physical address will be zero as well, the
* calling function will think an error occurred. This is not a problem,
* since no one uses the first BIOS interrupt vector.
*/
/* Check all acceptable ranges. */
if (vir_addr >= BIOS_MEM_BEGIN && vir_addr + bytes <= BIOS_MEM_END)
return (phys_bytes) vir_addr;
else if (vir_addr >= BASE_MEM_TOP && vir_addr + bytes <= UPPER_MEM_END)
return (phys_bytes) vir_addr;
printf("Warning, error in umap_bios, virtual address 0x%lx\n", vir_addr);
return 0;
}
/*===========================================================================*
* umap_local *
*===========================================================================*/
phys_bytes umap_local(rp, seg, vir_addr, bytes)
register struct proc *rp; /* pointer to proc table entry for process */
int seg; /* T, D, or S segment */
vir_bytes vir_addr; /* virtual address in bytes within the seg */
vir_bytes bytes; /* # of bytes to be copied */
{
/* Calculate the physical memory address for a given virtual address. */
vir_clicks vc; /* the virtual address in clicks */
phys_bytes pa; /* intermediate variables as phys_bytes */
phys_bytes seg_base;
if(seg != T && seg != D && seg != S)
panic("umap_local: wrong seg: %d", seg);
if (bytes <= 0) return( (phys_bytes) 0);
if (vir_addr + bytes <= vir_addr) return 0; /* overflow */
vc = (vir_addr + bytes - 1) >> CLICK_SHIFT; /* last click of data */
if (seg != T)
seg = (vc < rp->p_memmap[D].mem_vir + rp->p_memmap[D].mem_len ? D : S);
else if (rp->p_memmap[T].mem_len == 0) /* common I&D? */
seg = D; /* ptrace needs this */
if ((vir_addr>>CLICK_SHIFT) >= rp->p_memmap[seg].mem_vir +
rp->p_memmap[seg].mem_len) return( (phys_bytes) 0 );
if (vc >= rp->p_memmap[seg].mem_vir +
rp->p_memmap[seg].mem_len) return( (phys_bytes) 0 );
seg_base = (phys_bytes) rp->p_memmap[seg].mem_phys;
seg_base = seg_base << CLICK_SHIFT; /* segment origin in bytes */
pa = (phys_bytes) vir_addr;
pa -= rp->p_memmap[seg].mem_vir << CLICK_SHIFT;
return(seg_base + pa);
}
/*===========================================================================*
* umap_virtual *
*===========================================================================*/
phys_bytes umap_virtual(rp, seg, vir_addr, bytes)
register struct proc *rp; /* pointer to proc table entry for process */
int seg; /* T, D, or S segment */
vir_bytes vir_addr; /* virtual address in bytes within the seg */
vir_bytes bytes; /* # of bytes to be copied */
{
vir_bytes linear;
phys_bytes phys = 0;
if(!(linear = umap_local(rp, seg, vir_addr, bytes))) {
printf("SYSTEM:umap_virtual: umap_local failed\n");
phys = 0;
} else {
if(vm_lookup(rp, linear, &phys, NULL) != OK) {
printf("SYSTEM:umap_virtual: vm_lookup of %s: seg 0x%x: 0x%lx failed\n", rp->p_name, seg, vir_addr);
phys = 0;
} else {
if(phys == 0)
panic("vm_lookup returned phys: %d", phys);
}
}
if(phys == 0) {
printf("SYSTEM:umap_virtual: lookup failed\n");
return 0;
}
/* Now make sure addresses are contiguous in physical memory
* so that the umap makes sense.
*/
if(bytes > 0 && vm_lookup_range(rp, linear, NULL, bytes) != bytes) {
printf("umap_virtual: %s: %lu at 0x%lx (vir 0x%lx) not contiguous\n",
rp->p_name, bytes, linear, vir_addr);
return 0;
}
/* phys must be larger than 0 (or the caller will think the call
* failed), and address must not cross a page boundary.
*/
assert(phys);
return phys;
}
/*===========================================================================*
* vm_lookup *
*===========================================================================*/
int vm_lookup(const struct proc *proc, const vir_bytes virtual,
phys_bytes *physical, u32_t *ptent)
{
u32_t *root, *pt;
int pde, pte;
u32_t pde_v, pte_v;
assert(proc);
assert(physical);
assert(!isemptyp(proc));
if(!HASPT(proc)) {
*physical = virtual;
return OK;
}
/* Retrieve page directory entry. */
root = (u32_t *) proc->p_seg.p_cr3;
assert(!((u32_t) root % I386_PAGE_SIZE));
pde = I386_VM_PDE(virtual);
assert(pde >= 0 && pde < I386_VM_DIR_ENTRIES);
pde_v = phys_get32((u32_t) (root + pde));
if(!(pde_v & I386_VM_PRESENT)) {
return EFAULT;
}
/* We don't expect to ever see this. */
if(pde_v & I386_VM_BIGPAGE) {
*physical = pde_v & I386_VM_ADDR_MASK_4MB;
if(ptent) *ptent = pde_v;
*physical += virtual & I386_VM_OFFSET_MASK_4MB;
} else {
/* Retrieve page table entry. */
pt = (u32_t *) I386_VM_PFA(pde_v);
assert(!((u32_t) pt % I386_PAGE_SIZE));
pte = I386_VM_PTE(virtual);
assert(pte >= 0 && pte < I386_VM_PT_ENTRIES);
pte_v = phys_get32((u32_t) (pt + pte));
if(!(pte_v & I386_VM_PRESENT)) {
return EFAULT;
}
if(ptent) *ptent = pte_v;
/* Actual address now known; retrieve it and add page offset. */
*physical = I386_VM_PFA(pte_v);
*physical += virtual % I386_PAGE_SIZE;
}
return OK;
}
/*===========================================================================*
* vm_lookup_range *
*===========================================================================*/
size_t vm_lookup_range(const struct proc *proc, vir_bytes vir_addr,
phys_bytes *phys_addr, size_t bytes)
{
/* Look up the physical address corresponding to linear virtual address
* 'vir_addr' for process 'proc'. Return the size of the range covered
* by contiguous physical memory starting from that address; this may
* be anywhere between 0 and 'bytes' inclusive. If the return value is
* nonzero, and 'phys_addr' is non-NULL, 'phys_addr' will be set to the
* base physical address of the range. 'vir_addr' and 'bytes' need not
* be page-aligned, but the caller must have verified that the given
* linear range is valid for the given process at all.
*/
phys_bytes phys, next_phys;
size_t len;
assert(proc);
assert(bytes > 0);
if (!HASPT(proc))
return bytes;
/* Look up the first page. */
if (vm_lookup(proc, vir_addr, &phys, NULL) != OK)
return 0;
if (phys_addr != NULL)
*phys_addr = phys;
len = I386_PAGE_SIZE - (vir_addr % I386_PAGE_SIZE);
vir_addr += len;
next_phys = phys + len;
/* Look up any next pages and test physical contiguity. */
while (len < bytes) {
if (vm_lookup(proc, vir_addr, &phys, NULL) != OK)
break;
if (next_phys != phys)
break;
len += I386_PAGE_SIZE;
vir_addr += I386_PAGE_SIZE;
next_phys += I386_PAGE_SIZE;
}
/* We might now have overshot the requested length somewhat. */
return MIN(bytes, len);
}
/*===========================================================================*
* vm_suspend *
*===========================================================================*/
static void vm_suspend(struct proc *caller, const struct proc *target,
const vir_bytes linaddr, const vir_bytes len, const int type)
{
/* This range is not OK for this process. Set parameters
* of the request and notify VM about the pending request.
*/
assert(!RTS_ISSET(caller, RTS_VMREQUEST));
assert(!RTS_ISSET(target, RTS_VMREQUEST));
RTS_SET(caller, RTS_VMREQUEST);
caller->p_vmrequest.req_type = VMPTYPE_CHECK;
caller->p_vmrequest.target = target->p_endpoint;
caller->p_vmrequest.params.check.start = linaddr;
caller->p_vmrequest.params.check.length = len;
caller->p_vmrequest.params.check.writeflag = 1;
caller->p_vmrequest.type = type;
/* Connect caller on vmrequest wait queue. */
if(!(caller->p_vmrequest.nextrequestor = vmrequest))
if(OK != send_sig(VM_PROC_NR, SIGKMEM))
panic("send_sig failed");
vmrequest = caller;
}
/*===========================================================================*
* vm_check_range *
*===========================================================================*/
int vm_check_range(struct proc *caller, struct proc *target,
vir_bytes vir_addr, size_t bytes)
{
/* Public interface to vm_suspend(), for use by kernel calls. On behalf
* of 'caller', call into VM to check linear virtual address range of
* process 'target', starting at 'vir_addr', for 'bytes' bytes. This
* function assumes that it will called twice if VM returned an error
* the first time (since nothing has changed in that case), and will
* then return the error code resulting from the first call. Upon the
* first call, a non-success error code is returned as well.
*/
int r;
if (!vm_running)
return EFAULT;
if ((caller->p_misc_flags & MF_KCALL_RESUME) &&
(r = caller->p_vmrequest.vmresult) != OK)
return r;
vm_suspend(caller, target, vir_addr, bytes, VMSTYPE_KERNELCALL);
return VMSUSPEND;
}
/*===========================================================================*
* delivermsg *
*===========================================================================*/
void delivermsg(struct proc *rp)
{
int r = OK;
assert(rp->p_misc_flags & MF_DELIVERMSG);
assert(rp->p_delivermsg.m_source != NONE);
if (copy_msg_to_user(rp, &rp->p_delivermsg,
(message *) rp->p_delivermsg_vir)) {
printf("WARNING wrong user pointer 0x%08lx from "
"process %s / %d\n",
rp->p_delivermsg_vir,
rp->p_name,
rp->p_endpoint);
r = EFAULT;
}
/* Indicate message has been delivered; address is 'used'. */
rp->p_delivermsg.m_source = NONE;
rp->p_misc_flags &= ~MF_DELIVERMSG;
if(!(rp->p_misc_flags & MF_CONTEXT_SET)) {
rp->p_reg.retreg = r;
}
}
#if 0
static char *flagstr(u32_t e, const int dir)
{
static char str[80];
strcpy(str, "");
FLAG(I386_VM_PRESENT);
FLAG(I386_VM_WRITE);
FLAG(I386_VM_USER);
FLAG(I386_VM_PWT);
FLAG(I386_VM_PCD);
FLAG(I386_VM_GLOBAL);
if(dir)
FLAG(I386_VM_BIGPAGE); /* Page directory entry only */
else
FLAG(I386_VM_DIRTY); /* Page table entry only */
return str;
}
static void vm_pt_print(u32_t *pagetable, const u32_t v)
{
int pte;
int col = 0;
assert(!((u32_t) pagetable % I386_PAGE_SIZE));
for(pte = 0; pte < I386_VM_PT_ENTRIES; pte++) {
u32_t pte_v, pfa;
pte_v = phys_get32((u32_t) (pagetable + pte));
if(!(pte_v & I386_VM_PRESENT))
continue;
pfa = I386_VM_PFA(pte_v);
printf("%4d:%08lx:%08lx %2s ",
pte, v + I386_PAGE_SIZE*pte, pfa,
(pte_v & I386_VM_WRITE) ? "rw":"RO");
col++;
if(col == 3) { printf("\n"); col = 0; }
}
if(col > 0) printf("\n");
return;
}
static void vm_print(u32_t *root)
{
int pde;
assert(!((u32_t) root % I386_PAGE_SIZE));
printf("page table 0x%lx:\n", root);
for(pde = 0; pde < I386_VM_DIR_ENTRIES; pde++) {
u32_t pde_v;
u32_t *pte_a;
pde_v = phys_get32((u32_t) (root + pde));
if(!(pde_v & I386_VM_PRESENT))
continue;
if(pde_v & I386_VM_BIGPAGE) {
printf("%4d: 0x%lx, flags %s\n",
pde, I386_VM_PFA(pde_v), flagstr(pde_v, 1));
} else {
pte_a = (u32_t *) I386_VM_PFA(pde_v);
printf("%4d: pt %08lx %s\n",
pde, pte_a, flagstr(pde_v, 1));
vm_pt_print(pte_a, pde * I386_VM_PT_ENTRIES * I386_PAGE_SIZE);
printf("\n");
}
}
return;
}
#endif
/*===========================================================================*
* lin_memset *
*===========================================================================*/
int vm_phys_memset(phys_bytes ph, const u8_t c, phys_bytes bytes)
{
u32_t p;
p = c | (c << 8) | (c << 16) | (c << 24);
if(!vm_running) {
phys_memset(ph, p, bytes);
return OK;
}
assert(nfreepdes >= MAX_FREEPDES);
assert(get_cpulocal_var(ptproc)->p_seg.p_cr3_v);
/* With VM, we have to map in the physical memory.
* We can do this 4MB at a time.
*/
while(bytes > 0) {
int changed = 0;
phys_bytes chunk = bytes, ptr;
ptr = createpde(NULL, ph, &chunk, 0, &changed);
if(changed)
reload_cr3();
/* We can memset as many bytes as we have remaining,
* or as many as remain in the 4MB chunk we mapped in.
*/
phys_memset(ptr, p, chunk);
bytes -= chunk;
ph += chunk;
}
assert(get_cpulocal_var(ptproc)->p_seg.p_cr3_v);
return OK;
}
/*===========================================================================*
* virtual_copy_f *
*===========================================================================*/
int virtual_copy_f(caller, src_addr, dst_addr, bytes, vmcheck)
struct proc * caller;
struct vir_addr *src_addr; /* source virtual address */
struct vir_addr *dst_addr; /* destination virtual address */
vir_bytes bytes; /* # of bytes to copy */
int vmcheck; /* if nonzero, can return VMSUSPEND */
{
/* Copy bytes from virtual address src_addr to virtual address dst_addr.
* Virtual addresses can be in ABS, LOCAL_SEG, or BIOS_SEG.
*/
struct vir_addr *vir_addr[2]; /* virtual source and destination address */
phys_bytes phys_addr[2]; /* absolute source and destination */
int seg_index;
int i;
struct proc *procs[2];
assert((vmcheck && caller) || (!vmcheck && !caller));
/* Check copy count. */
if (bytes <= 0) return(EDOM);
/* Do some more checks and map virtual addresses to physical addresses. */
vir_addr[_SRC_] = src_addr;
vir_addr[_DST_] = dst_addr;
for (i=_SRC_; i<=_DST_; i++) {
int proc_nr, type;
struct proc *p;
type = vir_addr[i]->segment & SEGMENT_TYPE;
if((type != PHYS_SEG && type != BIOS_SEG) &&
isokendpt(vir_addr[i]->proc_nr_e, &proc_nr))
p = proc_addr(proc_nr);
else
p = NULL;
procs[i] = p;
/* Get physical address. */
switch(type) {
case LOCAL_SEG:
case LOCAL_VM_SEG:
if(!p) {
return EDEADSRCDST;
}
seg_index = vir_addr[i]->segment & SEGMENT_INDEX;
if(type == LOCAL_SEG)
phys_addr[i] = umap_local(p, seg_index, vir_addr[i]->offset,
bytes);
else
phys_addr[i] = umap_virtual(p, seg_index,
vir_addr[i]->offset, bytes);
if(phys_addr[i] == 0) {
printf("virtual_copy: map 0x%x failed for %s seg %d, "
"offset %lx, len %lu, i %d\n",
type, p->p_name, seg_index, vir_addr[i]->offset,
bytes, i);
}
break;
#if _MINIX_CHIP == _CHIP_INTEL
case BIOS_SEG:
phys_addr[i] = umap_bios(vir_addr[i]->offset, bytes );
break;
#endif
case PHYS_SEG:
phys_addr[i] = vir_addr[i]->offset;
break;
default:
printf("virtual_copy: strange type 0x%x\n", type);
return EINVAL;
}
/* Check if mapping succeeded. */
if (phys_addr[i] <= 0 && vir_addr[i]->segment != PHYS_SEG) {
printf("virtual_copy EFAULT\n");
return EFAULT;
}
}
if(vm_running) {
int r;
if(caller && (caller->p_misc_flags & MF_KCALL_RESUME)) {
assert(caller->p_vmrequest.vmresult != VMSUSPEND);
if(caller->p_vmrequest.vmresult != OK) {
return caller->p_vmrequest.vmresult;
}
}
if((r=lin_lin_copy(procs[_SRC_], phys_addr[_SRC_],
procs[_DST_], phys_addr[_DST_], bytes)) != OK) {
struct proc *target = NULL;
phys_bytes lin;
if(r != EFAULT_SRC && r != EFAULT_DST)
panic("lin_lin_copy failed: %d", r);
if(!vmcheck || !caller) {
return r;
}
if(r == EFAULT_SRC) {
lin = phys_addr[_SRC_];
target = procs[_SRC_];
} else if(r == EFAULT_DST) {
lin = phys_addr[_DST_];
target = procs[_DST_];
} else {
panic("r strange: %d", r);
}
assert(caller);
assert(target);
vm_suspend(caller, target, lin, bytes, VMSTYPE_KERNELCALL);
return VMSUSPEND;
}
return OK;
}
assert(!vm_running);
/* can't copy to/from process with PT without VM */
#define NOPT(p) (!(p) || !HASPT(p))
if(!NOPT(procs[_SRC_])) {
printf("ignoring page table src: %s / %d at 0x%x\n",
procs[_SRC_]->p_name, procs[_SRC_]->p_endpoint, procs[_SRC_]->p_seg.p_cr3);
}
if(!NOPT(procs[_DST_])) {
printf("ignoring page table dst: %s / %d at 0x%x\n",
procs[_DST_]->p_name, procs[_DST_]->p_endpoint,
procs[_DST_]->p_seg.p_cr3);
}
/* Now copy bytes between physical addresseses. */
if(phys_copy(phys_addr[_SRC_], phys_addr[_DST_], (phys_bytes) bytes))
return EFAULT;
return OK;
}
/*===========================================================================*
* data_copy *
*===========================================================================*/
int data_copy(const endpoint_t from_proc, const vir_bytes from_addr,
const endpoint_t to_proc, const vir_bytes to_addr,
size_t bytes)
{
struct vir_addr src, dst;
src.segment = dst.segment = D;
src.offset = from_addr;
dst.offset = to_addr;
src.proc_nr_e = from_proc;
dst.proc_nr_e = to_proc;
return virtual_copy(&src, &dst, bytes);
}
/*===========================================================================*
* data_copy_vmcheck *
*===========================================================================*/
int data_copy_vmcheck(struct proc * caller,
const endpoint_t from_proc, const vir_bytes from_addr,
const endpoint_t to_proc, const vir_bytes to_addr,
size_t bytes)
{
struct vir_addr src, dst;
src.segment = dst.segment = D;
src.offset = from_addr;
dst.offset = to_addr;
src.proc_nr_e = from_proc;
dst.proc_nr_e = to_proc;
return virtual_copy_vmcheck(caller, &src, &dst, bytes);
}
/*===========================================================================*
* arch_pre_exec *
*===========================================================================*/
void arch_pre_exec(struct proc *pr, const u32_t ip, const u32_t sp)
{
/* set program counter and stack pointer. */
pr->p_reg.pc = ip;
pr->p_reg.sp = sp;
}
/*===========================================================================*
* arch_umap *
*===========================================================================*/
int arch_umap(const struct proc *pr, vir_bytes offset, vir_bytes count,
int seg, phys_bytes *addr)
{
switch(seg) {
case BIOS_SEG:
*addr = umap_bios(offset, count);
return OK;
}
/* This must be EINVAL; the umap fallback function in
* lib/syslib/alloc_util.c depends on it to detect an
* older kernel (as opposed to mapping error).
*/
return EINVAL;
}
/* VM reports page directory slot we're allowed to use freely. */
void i386_freepde(const int pde)
{
if(nfreepdes >= MAX_FREEPDES)
return;
freepdes[nfreepdes++] = pde;
}
static int oxpcie_mapping_index = -1,
lapic_mapping_index = -1,
ioapic_first_index = -1,
ioapic_last_index = -1;
int arch_phys_map(const int index,
phys_bytes *addr,
phys_bytes *len,
int *flags)
{
static int first = 1;
int freeidx = 0;
static char *ser_var = NULL;
if(first) {
#ifdef USE_APIC
if(lapic_addr)
lapic_mapping_index = freeidx++;
if (ioapic_enabled) {
ioapic_first_index = freeidx;
assert(nioapics > 0);
freeidx += nioapics;
ioapic_last_index = freeidx-1;
}
#endif
#ifdef CONFIG_OXPCIE
if((ser_var = env_get("oxpcie"))) {
if(ser_var[0] != '0' || ser_var[1] != 'x') {
printf("oxpcie address in hex please\n");
} else {
printf("oxpcie address is %s\n", ser_var);
oxpcie_mapping_index = freeidx++;
}
}
#endif
first = 0;
}
#ifdef USE_APIC
/* map the local APIC if enabled */
if (index == lapic_mapping_index) {
if (!lapic_addr)
return EINVAL;
*addr = vir2phys(lapic_addr);
*len = 4 << 10 /* 4kB */;
*flags = VMMF_UNCACHED;
return OK;
}
else if (ioapic_enabled && index <= nioapics) {
*addr = io_apic[index - 1].paddr;
*len = 4 << 10 /* 4kB */;
*flags = VMMF_UNCACHED;
return OK;
}
#endif
#if CONFIG_OXPCIE
if(index == oxpcie_mapping_index) {
*addr = strtoul(ser_var+2, NULL, 16);
*len = 0x4000;
*flags = VMMF_UNCACHED;
return OK;
}
#endif
return EINVAL;
}
int arch_phys_map_reply(const int index, const vir_bytes addr)
{
#ifdef USE_APIC
/* if local APIC is enabled */
if (index == lapic_mapping_index && lapic_addr) {
lapic_addr_vaddr = addr;
return OK;
}
else if (ioapic_enabled && index >= ioapic_first_index &&
index <= ioapic_last_index) {
io_apic[index - ioapic_first_index].vaddr = addr;
return OK;
}
#endif
#if CONFIG_OXPCIE
if (index == oxpcie_mapping_index) {
oxpcie_set_vaddr((unsigned char *) addr);
return OK;
}
#endif
return EINVAL;
}
int arch_enable_paging(struct proc * caller, const message * m_ptr)
{
struct vm_ep_data ep_data;
int r;
/* switch_address_space() checks what is in cr3, and do nothing if it's
* the same as the cr3 of its argument, newptproc. If MINIX was
* previously booted, this could very well be the case.
*
* The first time switch_address_space() is called, we want to
* force it to do something (load cr3 and set newptproc), so we
* zero cr3, and force paging off to make that a safe thing to do.
*
* After that, segmentation2paging() enables paging with the page table
* of caller loaded.
*/
vm_stop();
write_cr3(0);
/* switch from segmentation only to paging */
segmentation2paging(caller);
vm_running = 1;
/*
* copy the extra data associated with the call from userspace
*/
if((r=data_copy(caller->p_endpoint, (vir_bytes)m_ptr->SVMCTL_VALUE,
KERNEL, (vir_bytes) &ep_data, sizeof(ep_data))) != OK) {
printf("vmctl_enable_paging: data_copy failed! (%d)\n", r);
return r;
}
/*
* when turning paging on i386 we also change the segment limits to make
* the special mappings requested by the kernel reachable
*/
if ((r = prot_set_kern_seg_limit(ep_data.data_seg_limit)) != OK)
return r;
/*
* install the new map provided by the call
*/
if (newmap(caller, caller, ep_data.mem_map) != OK)
panic("arch_enable_paging: newmap failed");
#ifdef USE_APIC
/* start using the virtual addresses */
/* if local APIC is enabled */
if (lapic_addr) {
lapic_addr = lapic_addr_vaddr;
lapic_eoi_addr = LAPIC_EOI;
}
/* if IO apics are enabled */
if (ioapic_enabled) {
int i;
for (i = 0; i < nioapics; i++) {
io_apic[i].addr = io_apic[i].vaddr;
}
}
#if CONFIG_SMP
barrier();
i386_paging_enabled = 1;
wait_for_APs_to_finish_booting();
#endif
#endif
#ifdef USE_WATCHDOG
/*
* We make sure that we don't enable the watchdog until paging is turned
* on as we might get an NMI while switching and we might still use wrong
* lapic address. Bad things would happen. It is unfortunate but such is
* life
*/
if (watchdog_enabled)
i386_watchdog_start();
#endif
return OK;
}
void release_address_space(struct proc *pr)
{
pr->p_seg.p_cr3_v = NULL;
}
/* computes a checksum of a buffer of a given length. The byte sum must be zero */
int platform_tbl_checksum_ok(void *ptr, unsigned int length)
{
u8_t total = 0;
unsigned int i;
for (i = 0; i < length; i++)
total += ((unsigned char *)ptr)[i];
return !total;
}
int platform_tbl_ptr(phys_bytes start,
phys_bytes end,
unsigned increment,
void * buff,
unsigned size,
phys_bytes * phys_addr,
int ((* cmp_f)(void *)))
{
phys_bytes addr;
for (addr = start; addr < end; addr += increment) {
phys_copy (addr, vir2phys(buff), size);
if (cmp_f(buff)) {
if (phys_addr)
*phys_addr = addr;
return 1;
}
}
return 0;
}