50e2064049
This commit removes all traces of Minix segments (the text/data/stack memory map abstraction in the kernel) and significance of Intel segments (hardware segments like CS, DS that add offsets to all addressing before page table translation). This ultimately simplifies the memory layout and addressing and makes the same layout possible on non-Intel architectures. There are only two types of addresses in the world now: virtual and physical; even the kernel and processes have the same virtual address space. Kernel and user processes can be distinguished at a glance as processes won't use 0xF0000000 and above. No static pre-allocated memory sizes exist any more. Changes to booting: . The pre_init.c leaves the kernel and modules exactly as they were left by the bootloader in physical memory . The kernel starts running using physical addressing, loaded at a fixed location given in its linker script by the bootloader. All code and data in this phase are linked to this fixed low location. . It makes a bootstrap pagetable to map itself to a fixed high location (also in linker script) and jumps to the high address. All code and data then use this high addressing. . All code/data symbols linked at the low addresses is prefixed by an objcopy step with __k_unpaged_*, so that that code cannot reference highly-linked symbols (which aren't valid yet) or vice versa (symbols that aren't valid any more). . The two addressing modes are separated in the linker script by collecting the unpaged_*.o objects and linking them with low addresses, and linking the rest high. Some objects are linked twice, once low and once high. . The bootstrap phase passes a lot of information (e.g. free memory list, physical location of the modules, etc.) using the kinfo struct. . After this bootstrap the low-linked part is freed. . The kernel maps in VM into the bootstrap page table so that VM can begin executing. Its first job is to make page tables for all other boot processes. So VM runs before RS, and RS gets a fully dynamic, VM-managed address space. VM gets its privilege info from RS as usual but that happens after RS starts running. . Both the kernel loading VM and VM organizing boot processes happen using the libexec logic. This removes the last reason for VM to still know much about exec() and vm/exec.c is gone. Further Implementation: . All segments are based at 0 and have a 4 GB limit. . The kernel is mapped in at the top of the virtual address space so as not to constrain the user processes. . Processes do not use segments from the LDT at all; there are no segments in the LDT any more, so no LLDT is needed. . The Minix segments T/D/S are gone and so none of the user-space or in-kernel copy functions use them. The copy functions use a process endpoint of NONE to realize it's a physical address, virtual otherwise. . The umap call only makes sense to translate a virtual address to a physical address now. . Segments-related calls like newmap and alloc_segments are gone. . All segments-related translation in VM is gone (vir2map etc). . Initialization in VM is simpler as no moving around is necessary. . VM and all other boot processes can be linked wherever they wish and will be mapped in at the right location by the kernel and VM respectively. Other changes: . The multiboot code is less special: it does not use mb_print for its diagnostics any more but uses printf() as normal, saving the output into the diagnostics buffer, only printing to the screen using the direct print functions if a panic() occurs. . The multiboot code uses the flexible 'free memory map list' style to receive the list of free memory if available. . The kernel determines the memory layout of the processes to a degree: it tells VM where the kernel starts and ends and where the kernel wants the top of the process to be. VM then uses this entire range, i.e. the stack is right at the top, and mmap()ped bits of memory are placed below that downwards, and the break grows upwards. Other Consequences: . Every process gets its own page table as address spaces can't be separated any more by segments. . As all segments are 0-based, there is no distinction between virtual and linear addresses, nor between userspace and kernel addresses. . Less work is done when context switching, leading to a net performance increase. (8% faster on my machine for 'make servers'.) . The layout and configuration of the GDT makes sysenter and syscall possible.
282 lines
10 KiB
C
282 lines
10 KiB
C
#ifndef PROC_H
|
|
#define PROC_H
|
|
|
|
#include <minix/const.h>
|
|
#include <sys/cdefs.h>
|
|
|
|
#ifndef __ASSEMBLY__
|
|
|
|
/* Here is the declaration of the process table. It contains all process
|
|
* data, including registers, flags, scheduling priority, memory map,
|
|
* accounting, message passing (IPC) information, and so on.
|
|
*
|
|
* Many assembly code routines reference fields in it. The offsets to these
|
|
* fields are defined in the assembler include file sconst.h. When changing
|
|
* struct proc, be sure to change sconst.h to match.
|
|
*/
|
|
#include <minix/com.h>
|
|
#include <minix/portio.h>
|
|
#include "const.h"
|
|
#include "priv.h"
|
|
|
|
struct proc {
|
|
struct stackframe_s p_reg; /* process' registers saved in stack frame */
|
|
struct segframe p_seg; /* segment descriptors */
|
|
proc_nr_t p_nr; /* number of this process (for fast access) */
|
|
struct priv *p_priv; /* system privileges structure */
|
|
volatile u32_t p_rts_flags; /* process is runnable only if zero */
|
|
volatile u32_t p_misc_flags; /* flags that do not suspend the process */
|
|
|
|
char p_priority; /* current process priority */
|
|
u64_t p_cpu_time_left; /* time left to use the cpu */
|
|
unsigned p_quantum_size_ms; /* assigned time quantum in ms
|
|
FIXME remove this */
|
|
struct proc *p_scheduler; /* who should get out of quantum msg */
|
|
unsigned p_cpu; /* what CPU is the process running on */
|
|
#ifdef CONFIG_SMP
|
|
bitchunk_t p_cpu_mask[BITMAP_CHUNKS(CONFIG_MAX_CPUS)]; /* what CPUs is hte
|
|
process allowed to
|
|
run on */
|
|
bitchunk_t p_stale_tlb[BITMAP_CHUNKS(CONFIG_MAX_CPUS)]; /* On which cpu are
|
|
possibly stale entries from this process and has
|
|
to be fresed the next kernel touches this
|
|
processes memory
|
|
*/
|
|
#endif
|
|
|
|
/* Accounting statistics that get passed to the process' scheduler */
|
|
struct {
|
|
u64_t enter_queue; /* time when enqueued (cycles) */
|
|
u64_t time_in_queue; /* time spent in queue */
|
|
unsigned long dequeues;
|
|
unsigned long ipc_sync;
|
|
unsigned long ipc_async;
|
|
unsigned long preempted;
|
|
} p_accounting;
|
|
|
|
clock_t p_user_time; /* user time in ticks */
|
|
clock_t p_sys_time; /* sys time in ticks */
|
|
|
|
clock_t p_virt_left; /* number of ticks left on virtual timer */
|
|
clock_t p_prof_left; /* number of ticks left on profile timer */
|
|
|
|
u64_t p_cycles; /* how many cycles did the process use */
|
|
u64_t p_kcall_cycles; /* kernel cycles caused by this proc (kcall) */
|
|
u64_t p_kipc_cycles; /* cycles caused by this proc (ipc) */
|
|
|
|
struct proc *p_nextready; /* pointer to next ready process */
|
|
struct proc *p_caller_q; /* head of list of procs wishing to send */
|
|
struct proc *p_q_link; /* link to next proc wishing to send */
|
|
endpoint_t p_getfrom_e; /* from whom does process want to receive? */
|
|
endpoint_t p_sendto_e; /* to whom does process want to send? */
|
|
|
|
sigset_t p_pending; /* bit map for pending kernel signals */
|
|
|
|
char p_name[PROC_NAME_LEN]; /* name of the process, including \0 */
|
|
|
|
endpoint_t p_endpoint; /* endpoint number, generation-aware */
|
|
|
|
message p_sendmsg; /* Message from this process if SENDING */
|
|
message p_delivermsg; /* Message for this process if MF_DELIVERMSG */
|
|
vir_bytes p_delivermsg_vir; /* Virtual addr this proc wants message at */
|
|
|
|
/* If handler functions detect a process wants to do something with
|
|
* memory that isn't present, VM has to fix it. Until it has asked
|
|
* what needs to be done and fixed it, save necessary state here.
|
|
*
|
|
* The requestor gets a copy of its request message in reqmsg and gets
|
|
* VMREQUEST set.
|
|
*/
|
|
struct {
|
|
struct proc *nextrestart; /* next in vmrestart chain */
|
|
struct proc *nextrequestor; /* next in vmrequest chain */
|
|
#define VMSTYPE_SYS_NONE 0
|
|
#define VMSTYPE_KERNELCALL 1
|
|
#define VMSTYPE_DELIVERMSG 2
|
|
#define VMSTYPE_MAP 3
|
|
|
|
int type; /* suspended operation */
|
|
union {
|
|
/* VMSTYPE_SYS_MESSAGE */
|
|
message reqmsg; /* suspended request message */
|
|
} saved;
|
|
|
|
/* Parameters of request to VM */
|
|
int req_type;
|
|
endpoint_t target;
|
|
union {
|
|
struct {
|
|
vir_bytes start, length; /* memory range */
|
|
u8_t writeflag; /* nonzero for write access */
|
|
} check;
|
|
struct {
|
|
char writeflag;
|
|
endpoint_t ep_s;
|
|
vir_bytes vir_s, vir_d;
|
|
vir_bytes length;
|
|
} map;
|
|
} params;
|
|
/* VM result when available */
|
|
int vmresult;
|
|
|
|
/* If the suspended operation is a sys_call, its details are
|
|
* stored here.
|
|
*/
|
|
} p_vmrequest;
|
|
|
|
int p_found; /* consistency checking variables */
|
|
int p_magic; /* check validity of proc pointers */
|
|
|
|
#if DEBUG_TRACE
|
|
int p_schedules;
|
|
#endif
|
|
};
|
|
|
|
#endif /* __ASSEMBLY__ */
|
|
|
|
/* Bits for the runtime flags. A process is runnable iff p_rts_flags == 0. */
|
|
#define RTS_SLOT_FREE 0x01 /* process slot is free */
|
|
#define RTS_PROC_STOP 0x02 /* process has been stopped */
|
|
#define RTS_SENDING 0x04 /* process blocked trying to send */
|
|
#define RTS_RECEIVING 0x08 /* process blocked trying to receive */
|
|
#define RTS_SIGNALED 0x10 /* set when new kernel signal arrives */
|
|
#define RTS_SIG_PENDING 0x20 /* unready while signal being processed */
|
|
#define RTS_P_STOP 0x40 /* set when process is being traced */
|
|
#define RTS_NO_PRIV 0x80 /* keep forked system process from running */
|
|
#define RTS_NO_ENDPOINT 0x100 /* process cannot send or receive messages */
|
|
#define RTS_VMINHIBIT 0x200 /* not scheduled until pagetable set by VM */
|
|
#define RTS_PAGEFAULT 0x400 /* process has unhandled pagefault */
|
|
#define RTS_VMREQUEST 0x800 /* originator of vm memory request */
|
|
#define RTS_VMREQTARGET 0x1000 /* target of vm memory request */
|
|
#define RTS_PREEMPTED 0x4000 /* this process was preempted by a higher
|
|
priority process and we should pick a new one
|
|
to run. Processes with this flag should be
|
|
returned to the front of their current
|
|
priority queue if they are still runnable
|
|
before we pick a new one
|
|
*/
|
|
#define RTS_NO_QUANTUM 0x8000 /* process ran out of its quantum and we should
|
|
pick a new one. Process was dequeued and
|
|
should be enqueued at the end of some run
|
|
queue again */
|
|
|
|
/* A process is runnable iff p_rts_flags == 0. */
|
|
#define rts_f_is_runnable(flg) ((flg) == 0)
|
|
#define proc_is_runnable(p) (rts_f_is_runnable((p)->p_rts_flags))
|
|
|
|
#define proc_is_preempted(p) ((p)->p_rts_flags & RTS_PREEMPTED)
|
|
#define proc_no_quantum(p) ((p)->p_rts_flags & RTS_NO_QUANTUM)
|
|
#define proc_ptr_ok(p) ((p)->p_magic == PMAGIC)
|
|
#define proc_used_fpu(p) ((p)->p_misc_flags & (MF_FPU_INITIALIZED))
|
|
|
|
/* test whether the process is scheduled by the kernel's default policy */
|
|
#define proc_kernel_scheduler(p) ((p)->p_scheduler == NULL || \
|
|
(p)->p_scheduler == (p))
|
|
|
|
/* Macro to return: on which process is a certain process blocked?
|
|
* return endpoint number (can be ANY) or NONE. It's important to
|
|
* check RTS_SENDING first, and then RTS_RECEIVING, as they could
|
|
* both be on (if a sendrec() blocks on sending), and p_getfrom_e
|
|
* could be nonsense even though RTS_RECEIVING is on.
|
|
*/
|
|
#define P_BLOCKEDON(p) \
|
|
( \
|
|
((p)->p_rts_flags & RTS_SENDING) ? \
|
|
(p)->p_sendto_e : \
|
|
( \
|
|
( \
|
|
((p)->p_rts_flags & RTS_RECEIVING) ? \
|
|
(p)->p_getfrom_e : \
|
|
NONE \
|
|
) \
|
|
) \
|
|
)
|
|
|
|
/* These runtime flags can be tested and manipulated by these macros. */
|
|
|
|
#define RTS_ISSET(rp, f) (((rp)->p_rts_flags & (f)) == (f))
|
|
|
|
|
|
/* Set flag and dequeue if the process was runnable. */
|
|
#define RTS_SET(rp, f) \
|
|
do { \
|
|
const int rts = (rp)->p_rts_flags; \
|
|
(rp)->p_rts_flags |= (f); \
|
|
if(rts_f_is_runnable(rts) && !proc_is_runnable(rp)) { \
|
|
dequeue(rp); \
|
|
} \
|
|
} while(0)
|
|
|
|
/* Clear flag and enqueue if the process was not runnable but is now. */
|
|
#define RTS_UNSET(rp, f) \
|
|
do { \
|
|
int rts; \
|
|
rts = (rp)->p_rts_flags; \
|
|
(rp)->p_rts_flags &= ~(f); \
|
|
if(!rts_f_is_runnable(rts) && proc_is_runnable(rp)) { \
|
|
enqueue(rp); \
|
|
} \
|
|
} while(0)
|
|
|
|
/* Set flags to this value. */
|
|
#define RTS_SETFLAGS(rp, f) \
|
|
do { \
|
|
if(proc_is_runnable(rp) && (f)) { dequeue(rp); } \
|
|
(rp)->p_rts_flags = (f); \
|
|
} while(0)
|
|
|
|
/* Misc flags */
|
|
#define MF_REPLY_PEND 0x001 /* reply to IPC_REQUEST is pending */
|
|
#define MF_VIRT_TIMER 0x002 /* process-virtual timer is running */
|
|
#define MF_PROF_TIMER 0x004 /* process-virtual profile timer is running */
|
|
#define MF_KCALL_RESUME 0x008 /* processing a kernel call was interrupted,
|
|
most likely because we need VM to resolve a
|
|
problem or a long running copy was preempted.
|
|
We need to resume the kernel call execution
|
|
now
|
|
*/
|
|
#define MF_DELIVERMSG 0x040 /* Copy message for him before running */
|
|
#define MF_SIG_DELAY 0x080 /* Send signal when no longer sending */
|
|
#define MF_SC_ACTIVE 0x100 /* Syscall tracing: in a system call now */
|
|
#define MF_SC_DEFER 0x200 /* Syscall tracing: deferred system call */
|
|
#define MF_SC_TRACE 0x400 /* Syscall tracing: trigger syscall events */
|
|
#define MF_FPU_INITIALIZED 0x1000 /* process already used math, so fpu
|
|
* regs are significant (initialized)*/
|
|
#define MF_SENDING_FROM_KERNEL 0x2000 /* message of this process is from kernel */
|
|
#define MF_CONTEXT_SET 0x4000 /* don't touch context */
|
|
#define MF_SPROF_SEEN 0x8000 /* profiling has seen this process */
|
|
#define MF_FLUSH_TLB 0x10000 /* if set, TLB must be flushed before letting
|
|
this process run again. Currently it only
|
|
applies to SMP */
|
|
#define MF_SENDA_VM_MISS 0x20000 /* set if a processes wanted to receive an asyn
|
|
message from this sender but could not
|
|
because of VM modifying the sender's address
|
|
space*/
|
|
|
|
/* Magic process table addresses. */
|
|
#define BEG_PROC_ADDR (&proc[0])
|
|
#define BEG_USER_ADDR (&proc[NR_TASKS])
|
|
#define END_PROC_ADDR (&proc[NR_TASKS + NR_PROCS])
|
|
|
|
#define proc_addr(n) (&(proc[NR_TASKS + (n)]))
|
|
#define proc_nr(p) ((p)->p_nr)
|
|
|
|
#define isokprocn(n) ((unsigned) ((n) + NR_TASKS) < NR_PROCS + NR_TASKS)
|
|
#define isemptyn(n) isemptyp(proc_addr(n))
|
|
#define isemptyp(p) ((p)->p_rts_flags == RTS_SLOT_FREE)
|
|
#define iskernelp(p) ((p) < BEG_USER_ADDR)
|
|
#define iskerneln(n) ((n) < 0)
|
|
#define isuserp(p) isusern((p) >= BEG_USER_ADDR)
|
|
#define isusern(n) ((n) >= 0)
|
|
#define isrootsysn(n) ((n) == ROOT_SYS_PROC_NR)
|
|
|
|
#ifndef __ASSEMBLY__
|
|
|
|
EXTERN struct proc proc[NR_TASKS + NR_PROCS]; /* process table */
|
|
|
|
int mini_send(struct proc *caller_ptr, endpoint_t dst_e, message *m_ptr,
|
|
int flags);
|
|
|
|
#endif /* __ASSEMBLY__ */
|
|
|
|
#endif /* PROC_H */
|