minix/include/minix/syslib.h
Ben Gras 50e2064049 No more intel/minix segments.
This commit removes all traces of Minix segments (the text/data/stack
memory map abstraction in the kernel) and significance of Intel segments
(hardware segments like CS, DS that add offsets to all addressing before
page table translation). This ultimately simplifies the memory layout
and addressing and makes the same layout possible on non-Intel
architectures.

There are only two types of addresses in the world now: virtual
and physical; even the kernel and processes have the same virtual
address space. Kernel and user processes can be distinguished at a
glance as processes won't use 0xF0000000 and above.

No static pre-allocated memory sizes exist any more.

Changes to booting:
        . The pre_init.c leaves the kernel and modules exactly as
          they were left by the bootloader in physical memory
        . The kernel starts running using physical addressing,
          loaded at a fixed location given in its linker script by the
          bootloader.  All code and data in this phase are linked to
          this fixed low location.
        . It makes a bootstrap pagetable to map itself to a
          fixed high location (also in linker script) and jumps to
          the high address. All code and data then use this high addressing.
        . All code/data symbols linked at the low addresses is prefixed by
          an objcopy step with __k_unpaged_*, so that that code cannot
          reference highly-linked symbols (which aren't valid yet) or vice
          versa (symbols that aren't valid any more).
        . The two addressing modes are separated in the linker script by
          collecting the unpaged_*.o objects and linking them with low
          addresses, and linking the rest high. Some objects are linked
          twice, once low and once high.
        . The bootstrap phase passes a lot of information (e.g. free memory
          list, physical location of the modules, etc.) using the kinfo
          struct.
        . After this bootstrap the low-linked part is freed.
        . The kernel maps in VM into the bootstrap page table so that VM can
          begin executing. Its first job is to make page tables for all other
          boot processes. So VM runs before RS, and RS gets a fully dynamic,
          VM-managed address space. VM gets its privilege info from RS as usual
          but that happens after RS starts running.
        . Both the kernel loading VM and VM organizing boot processes happen
	  using the libexec logic. This removes the last reason for VM to
	  still know much about exec() and vm/exec.c is gone.

Further Implementation:
        . All segments are based at 0 and have a 4 GB limit.
        . The kernel is mapped in at the top of the virtual address
          space so as not to constrain the user processes.
        . Processes do not use segments from the LDT at all; there are
          no segments in the LDT any more, so no LLDT is needed.
        . The Minix segments T/D/S are gone and so none of the
          user-space or in-kernel copy functions use them. The copy
          functions use a process endpoint of NONE to realize it's
          a physical address, virtual otherwise.
        . The umap call only makes sense to translate a virtual address
          to a physical address now.
        . Segments-related calls like newmap and alloc_segments are gone.
        . All segments-related translation in VM is gone (vir2map etc).
        . Initialization in VM is simpler as no moving around is necessary.
        . VM and all other boot processes can be linked wherever they wish
          and will be mapped in at the right location by the kernel and VM
          respectively.

Other changes:
        . The multiboot code is less special: it does not use mb_print
          for its diagnostics any more but uses printf() as normal, saving
          the output into the diagnostics buffer, only printing to the
          screen using the direct print functions if a panic() occurs.
        . The multiboot code uses the flexible 'free memory map list'
          style to receive the list of free memory if available.
        . The kernel determines the memory layout of the processes to
          a degree: it tells VM where the kernel starts and ends and
          where the kernel wants the top of the process to be. VM then
          uses this entire range, i.e. the stack is right at the top,
          and mmap()ped bits of memory are placed below that downwards,
          and the break grows upwards.

Other Consequences:
        . Every process gets its own page table as address spaces
          can't be separated any more by segments.
        . As all segments are 0-based, there is no distinction between
          virtual and linear addresses, nor between userspace and
          kernel addresses.
        . Less work is done when context switching, leading to a net
          performance increase. (8% faster on my machine for 'make servers'.)
	. The layout and configuration of the GDT makes sysenter and syscall
	  possible.
2012-07-15 22:30:15 +02:00

262 lines
10 KiB
C

/* Prototypes for system library functions. */
#ifndef _SYSLIB_H
#define _SYSLIB_H
#ifndef _TYPES_H
#include <minix/types.h>
#endif
#ifndef _IPC_H
#include <minix/ipc.h>
#endif
#include <minix/u64.h>
#ifndef _DEVIO_H
#include <minix/devio.h>
#endif
#include <minix/safecopies.h>
#include <minix/sef.h>
#include <machine/mcontext.h>
/* Forward declaration */
struct reg86u;
struct rs_pci;
#define SYSTASK SYSTEM
/*==========================================================================*
* Minix system library. *
*==========================================================================*/
int _taskcall(endpoint_t who, int syscallnr, message *msgptr);
int _kernel_call(int syscallnr, message *msgptr);
int sys_abort(int how, ...);
int sys_enable_iop(endpoint_t proc_ep);
int sys_exec(endpoint_t proc_ep, char *ptr, char *aout, vir_bytes
initpc);
int sys_fork(endpoint_t parent, endpoint_t child, endpoint_t *,
u32_t vm, vir_bytes *);
int sys_clear(endpoint_t proc_ep);
int sys_exit(void);
int sys_trace(int req, endpoint_t proc_ep, long addr, long *data_p);
int sys_schedule(endpoint_t proc_ep, int priority, int quantum, int
cpu);
int sys_schedctl(unsigned flags, endpoint_t proc_ep, int priority, int
quantum, int cpu);
/* Shorthands for sys_runctl() system call. */
#define sys_stop(proc_ep) sys_runctl(proc_ep, RC_STOP, 0)
#define sys_delay_stop(proc_ep) sys_runctl(proc_ep, RC_STOP, RC_DELAY)
#define sys_resume(proc_ep) sys_runctl(proc_ep, RC_RESUME, 0)
int sys_runctl(endpoint_t proc_ep, int action, int flags);
int sys_update(endpoint_t src_ep, endpoint_t dst_ep);
int sys_statectl(int request);
int sys_privctl(endpoint_t proc_ep, int req, void *p);
int sys_privquery_mem(endpoint_t proc_ep, phys_bytes physstart,
phys_bytes physlen);
int sys_setgrant(cp_grant_t *grants, int ngrants);
int sys_int86(struct reg86u *reg86p);
int sys_vm_setbuf(phys_bytes base, phys_bytes size, phys_bytes high);
int sys_vm_map(endpoint_t proc_ep, int do_map, phys_bytes base,
phys_bytes size, phys_bytes offset);
int sys_vmctl(endpoint_t who, int param, u32_t value);
int sys_vmctl_get_cr3_i386(endpoint_t who, u32_t *cr3);
int sys_vmctl_get_memreq(endpoint_t *who, vir_bytes *mem, vir_bytes
*len, int *wrflag, endpoint_t *who_s, vir_bytes *mem_s, endpoint_t *);
int sys_vmctl_enable_paging(void * data);
int sys_readbios(phys_bytes address, void *buf, size_t size);
int sys_stime(time_t boottime);
int sys_sysctl(int ctl, char *arg1, int arg2);
int sys_sysctl_stacktrace(endpoint_t who);
int sys_vmctl_get_mapping(int index, phys_bytes *addr, phys_bytes *len,
int *flags);
int sys_vmctl_reply_mapping(int index, vir_bytes addr);
int sys_vmctl_set_addrspace(endpoint_t who, phys_bytes ptroot, void
*ptroot_v);
/* Shorthands for sys_sdevio() system call. */
#define sys_insb(port, proc_ep, buffer, count) \
sys_sdevio(DIO_INPUT_BYTE, port, proc_ep, buffer, count, 0)
#define sys_insw(port, proc_ep, buffer, count) \
sys_sdevio(DIO_INPUT_WORD, port, proc_ep, buffer, count, 0)
#define sys_outsb(port, proc_ep, buffer, count) \
sys_sdevio(DIO_OUTPUT_BYTE, port, proc_ep, buffer, count, 0)
#define sys_outsw(port, proc_ep, buffer, count) \
sys_sdevio(DIO_OUTPUT_WORD, port, proc_ep, buffer, count, 0)
#define sys_safe_insb(port, ept, grant, offset, count) \
sys_sdevio(DIO_SAFE_INPUT_BYTE, port, ept, (void*)grant, count, offset)
#define sys_safe_outsb(port, ept, grant, offset, count) \
sys_sdevio(DIO_SAFE_OUTPUT_BYTE, port, ept, (void*)grant, count, offset)
#define sys_safe_insw(port, ept, grant, offset, count) \
sys_sdevio(DIO_SAFE_INPUT_WORD, port, ept, (void*)grant, count, offset)
#define sys_safe_outsw(port, ept, grant, offset, count) \
sys_sdevio(DIO_SAFE_OUTPUT_WORD, port, ept, (void*)grant, count, offset)
int sys_sdevio(int req, long port, endpoint_t proc_ep, void *buffer, int
count, vir_bytes offset);
void *alloc_contig(size_t len, int flags, phys_bytes *phys);
int free_contig(void *addr, size_t len);
#define AC_ALIGN4K 0x01
#define AC_LOWER16M 0x02
#define AC_ALIGN64K 0x04
#define AC_LOWER1M 0x08
/* Clock functionality: get system times, (un)schedule an alarm call, or
* retrieve/set a process-virtual timer.
*/
int sys_times(endpoint_t proc_ep, clock_t *user_time, clock_t *sys_time,
clock_t *uptime, time_t *boottime);
int sys_setalarm(clock_t exp_time, int abs_time);
int sys_vtimer(endpoint_t proc_nr, int which, clock_t *newval, clock_t
*oldval);
/* Shorthands for sys_irqctl() system call. */
#define sys_irqdisable(hook_id) \
sys_irqctl(IRQ_DISABLE, 0, 0, hook_id)
#define sys_irqenable(hook_id) \
sys_irqctl(IRQ_ENABLE, 0, 0, hook_id)
#define sys_irqsetpolicy(irq_vec, policy, hook_id) \
sys_irqctl(IRQ_SETPOLICY, irq_vec, policy, hook_id)
#define sys_irqrmpolicy(hook_id) \
sys_irqctl(IRQ_RMPOLICY, 0, 0, hook_id)
int sys_irqctl(int request, int irq_vec, int policy, int *irq_hook_id);
/* Shorthands for sys_vircopy() and sys_physcopy() system calls. */
#define sys_datacopy sys_vircopy
int sys_vircopy(endpoint_t src_proc, vir_bytes src_v,
endpoint_t dst_proc, vir_bytes dst_vir, phys_bytes bytes);
#define sys_abscopy(src_phys, dst_phys, bytes) \
sys_physcopy(NONE, src_phys, NONE, dst_phys, bytes)
int sys_physcopy(endpoint_t src_proc, vir_bytes src_vir,
endpoint_t dst_proc, vir_bytes dst_vir, phys_bytes bytes);
/* Grant-based copy functions. */
int sys_safecopyfrom(endpoint_t source, cp_grant_id_t grant, vir_bytes
grant_offset, vir_bytes my_address, size_t bytes);
int sys_safecopyto(endpoint_t dest, cp_grant_id_t grant, vir_bytes
grant_offset, vir_bytes my_address, size_t bytes);
int sys_vsafecopy(struct vscp_vec *copyvec, int elements);
int sys_memset(endpoint_t who, unsigned long pattern,
phys_bytes base, phys_bytes bytes);
/* Grant-based map functions. */
int sys_safemap(endpoint_t grantor, cp_grant_id_t grant, vir_bytes
grant_offset, vir_bytes my_address, size_t bytes, int writable);
int sys_saferevmap_gid(cp_grant_id_t grant);
int sys_saferevmap_addr(vir_bytes addr);
int sys_safeunmap(vir_bytes my_address);
int sys_vumap(endpoint_t endpt, struct vumap_vir *vvec,
int vcount, size_t offset, int access, struct vumap_phys *pvec,
int *pcount);
int sys_umap(endpoint_t proc_ep, int seg, vir_bytes vir_addr, vir_bytes
bytes, phys_bytes *phys_addr);
int sys_umap_data_fb(endpoint_t proc_ep, vir_bytes vir_addr, vir_bytes
bytes, phys_bytes *phys_addr);
int sys_umap_remote(endpoint_t proc_ep, endpoint_t grantee, int seg,
vir_bytes vir_addr, vir_bytes bytes, phys_bytes *phys_addr);
/* Shorthands for sys_getinfo() system call. */
#define sys_getkmessages(dst) sys_getinfo(GET_KMESSAGES, dst, 0,0,0)
#define sys_getkinfo(dst) sys_getinfo(GET_KINFO, dst, 0,0,0)
#define sys_getloadinfo(dst) sys_getinfo(GET_LOADINFO, dst, 0,0,0)
#define sys_getmachine(dst) sys_getinfo(GET_MACHINE, dst, 0,0,0)
#define sys_getcpuinfo(dst) sys_getinfo(GET_CPUINFO, dst, 0,0,0)
#define sys_getproctab(dst) sys_getinfo(GET_PROCTAB, dst, 0,0,0)
#define sys_getprivtab(dst) sys_getinfo(GET_PRIVTAB, dst, 0,0,0)
#define sys_getproc(dst,nr) sys_getinfo(GET_PROC, dst, 0,0, nr)
#define sys_getrandomness(dst) sys_getinfo(GET_RANDOMNESS, dst, 0,0,0)
#define sys_getrandom_bin(d,b) sys_getinfo(GET_RANDOMNESS_BIN, d, 0,0,b)
#define sys_getimage(dst) sys_getinfo(GET_IMAGE, dst, 0,0,0)
#define sys_getirqhooks(dst) sys_getinfo(GET_IRQHOOKS, dst, 0,0,0)
#define sys_getirqactids(dst) sys_getinfo(GET_IRQACTIDS, dst, 0,0,0)
#define sys_getmonparams(v,vl) sys_getinfo(GET_MONPARAMS, v,vl, 0,0)
#define sys_getschedinfo(v1,v2) sys_getinfo(GET_SCHEDINFO, v1,0, v2,0)
#define sys_getpriv(dst, nr) sys_getinfo(GET_PRIV, dst, 0,0, nr)
#define sys_getidletsc(dst) sys_getinfo(GET_IDLETSC, dst, 0,0,0)
#define sys_getregs(dst,nr) sys_getinfo(GET_REGS, dst, 0,0, nr)
int sys_getinfo(int request, void *val_ptr, int val_len, void *val_ptr2,
int val_len2);
int sys_whoami(endpoint_t *ep, char *name, int namelen, int
*priv_flags);
/* Signal control. */
int sys_kill(endpoint_t proc_ep, int sig);
int sys_sigsend(endpoint_t proc_ep, struct sigmsg *sig_ctxt);
int sys_sigreturn(endpoint_t proc_ep, struct sigmsg *sig_ctxt);
int sys_getksig(endpoint_t *proc_ep, sigset_t *k_sig_map);
int sys_endksig(endpoint_t proc_ep);
/* NOTE: two different approaches were used to distinguish the device I/O
* types 'byte', 'word', 'long': the latter uses #define and results in a
* smaller implementation, but looses the static type checking.
*/
int sys_voutb(pvb_pair_t *pvb_pairs, int nr_ports);
int sys_voutw(pvw_pair_t *pvw_pairs, int nr_ports);
int sys_voutl(pvl_pair_t *pvl_pairs, int nr_ports);
int sys_vinb(pvb_pair_t *pvb_pairs, int nr_ports);
int sys_vinw(pvw_pair_t *pvw_pairs, int nr_ports);
int sys_vinl(pvl_pair_t *pvl_pairs, int nr_ports);
/* Shorthands for sys_out() system call. */
#define sys_outb(p,v) sys_out((p), (u32_t) (v), _DIO_BYTE)
#define sys_outw(p,v) sys_out((p), (u32_t) (v), _DIO_WORD)
#define sys_outl(p,v) sys_out((p), (u32_t) (v), _DIO_LONG)
int sys_out(int port, u32_t value, int type);
/* Shorthands for sys_in() system call. */
#define sys_inb(p,v) sys_in((p), (v), _DIO_BYTE)
#define sys_inw(p,v) sys_in((p), (v), _DIO_WORD)
#define sys_inl(p,v) sys_in((p), (v), _DIO_LONG)
int sys_in(int port, u32_t *value, int type);
/* pci.c */
void pci_init(void);
int pci_first_dev(int *devindp, u16_t *vidp, u16_t *didp);
int pci_next_dev(int *devindp, u16_t *vidp, u16_t *didp);
int pci_find_dev(u8_t bus, u8_t dev, u8_t func, int *devindp);
void pci_reserve(int devind);
int pci_reserve_ok(int devind);
void pci_ids(int devind, u16_t *vidp, u16_t *didp);
void pci_rescan_bus(u8_t busnr);
u8_t pci_attr_r8(int devind, int port);
u16_t pci_attr_r16(int devind, int port);
u32_t pci_attr_r32(int devind, int port);
void pci_attr_w8(int devind, int port, u8_t value);
void pci_attr_w16(int devind, int port, u16_t value);
void pci_attr_w32(int devind, int port, u32_t value);
char *pci_dev_name(u16_t vid, u16_t did);
char *pci_slot_name(int devind);
int pci_set_acl(struct rs_pci *rs_pci);
int pci_del_acl(endpoint_t proc_ep);
int pci_get_bar(int devind, int port, u32_t *base, u32_t *size, int
*ioflag);
/* Profiling. */
int sys_sprof(int action, int size, int freq, int type, endpoint_t
endpt, void *ctl_ptr, void *mem_ptr);
int sys_cprof(int action, int size, endpoint_t endpt, void *ctl_ptr,
void *mem_ptr);
int sys_profbuf(void *ctl_ptr, void *mem_ptr);
/* machine context */
int sys_getmcontext(endpoint_t proc, mcontext_t *mcp);
int sys_setmcontext(endpoint_t proc, mcontext_t *mcp);
/* input */
int tty_input_inject(int type, int code, int val);
#endif /* _SYSLIB_H */