From bd427054338cd97c19933afdcbcb85855037b09c Mon Sep 17 00:00:00 2001 From: Ben Gras Date: Wed, 2 Dec 2009 13:01:48 +0000 Subject: [PATCH] FPU context switching support by Evgeniy Ivanov. --- commands/i386/gas2ack/asm86.h | 7 ++ commands/i386/gas2ack/emit_ack.c | 54 +++++++++++++ commands/i386/gas2ack/parse_gnu.c | 13 ++- commands/scripts/netconf.sh | 20 +++-- include/minix/cpufeature.h | 9 +++ include/signal.h | 11 +++ include/sys/sigcontext.h | 114 ++++++--------------------- include/sys/vm_i386.h | 8 ++ kernel/arch/i386/exception.c | 3 + kernel/arch/i386/include/archconst.h | 6 +- kernel/arch/i386/include/archtypes.h | 45 +++-------- kernel/arch/i386/klib386.S | 23 ++++++ kernel/arch/i386/mpx386.S | 83 ++++++++++++++++++- kernel/arch/i386/protect.c | 3 + kernel/arch/i386/proto.h | 6 ++ kernel/arch/i386/sconst.h | 12 ++- kernel/arch/i386/system.c | 59 ++++++++++++-- kernel/glo.h | 2 + kernel/proc.h | 4 + kernel/system/do_exec.c | 3 + kernel/system/do_exit.c | 3 +- kernel/system/do_fork.c | 7 ++ kernel/system/do_sigreturn.c | 9 ++- kernel/system/do_sigsend.c | 49 +++++++++++- lib/i386/misc/_cpufeature.c | 28 ++++++- 25 files changed, 434 insertions(+), 147 deletions(-) diff --git a/commands/i386/gas2ack/asm86.h b/commands/i386/gas2ack/asm86.h index 52afc6917..a71bc811b 100644 --- a/commands/i386/gas2ack/asm86.h +++ b/commands/i386/gas2ack/asm86.h @@ -78,7 +78,11 @@ typedef enum opcode { /* 80486 opcodes, from the i486 reference manual. FLDCW, FLDENV, FMULD, FMULS, FMULP, FIMULL, FIMULS, + FNINIT, FNOP, + FNSAVE, + FNSTCW, + FNSTSW, FPATAN, FPREM, FPREM1, @@ -86,6 +90,9 @@ typedef enum opcode { /* 80486 opcodes, from the i486 reference manual. FRNDINT, FRSTOR, FSAVE, + FWAIT, + FXRSTOR, + FXSAVE, FSCALE, FSIN, FSINCOS, diff --git a/commands/i386/gas2ack/emit_ack.c b/commands/i386/gas2ack/emit_ack.c index 3082cc789..585ad67e6 100644 --- a/commands/i386/gas2ack/emit_ack.c +++ b/commands/i386/gas2ack/emit_ack.c @@ -138,7 +138,11 @@ static mnemonic_t mnemtab[] = { { FMULD, "fmuld" }, { FMULP, "fmulp" }, { FMULS, "fmuls" }, + { FNINIT, "fninit" }, { FNOP, "fnop" }, + { FNSAVE, "fnsave" }, + { FNSTCW, "fnstcw" }, + { FNSTSW, "fnstsw" }, { FPATAN, "fpatan" }, { FPREM, "fprem" }, { FPREM1, "fprem1" }, @@ -146,6 +150,9 @@ static mnemonic_t mnemtab[] = { { FRNDINT, "frndint" }, { FRSTOR, "frstor" }, { FSAVE, "fsave" }, + { FWAIT, "fwait" }, + { FXRSTOR, "fxrstor" }, + { FXSAVE, "fxsave" }, { FSCALE, "fscale" }, { FSIN, "fsin" }, { FSINCOS, "fsincos" }, @@ -611,6 +618,53 @@ void ack_emit_instruction(asm86_t *a) return; } } + if (a->opcode == RDMSR) { + ack_printf(".data1 0x0f, 0x32\n"); + return; + } + if (a->opcode == WRMSR) { + ack_printf(".data1 0x0f, 0x30\n"); + } + /* unsupported fninit */ + if (a->opcode == FNINIT) { + ack_printf(".data1 0xDB, 0xE3\n"); /* FNINIT */ + return; + } + /* unsupported fnsave */ + if (a->opcode == FNSAVE) { + ack_printf(".data1 0xDD, 0x30\n"); /* FNSAVE [eax] */ + return; + } + /* unsupported fnstcw */ + if (a->opcode == FNSTCW) { + ack_printf(".data1 0xD9, 0x38\n"); /* FNSTCW [eax] */ + return; + } + /* unsupported fnstsw */ + if (a->opcode == FNSTSW) { + ack_printf(".data1 0xDF, 0xD0\n"); /* FNSTSW [eax] */ + return; + } + /* unsupported frstor */ + if (a->opcode == FRSTOR) { + ack_printf(".data1 0xDD, 0x20\n"); /* FRSTOR [eax] */ + return; + } + /* unsupported fwait */ + if (a->opcode == FWAIT) { + ack_printf(".data1 0x9B\n"); /* FWAIT */ + return; + } + /* unsupported fxrstor */ + if (a->opcode == FXRSTOR) { + ack_printf(".data1 0x0F, 0xAE, 0x08\n"); /* FXRSTOR [eax] */ + return; + } + /* unsupported fxsave */ + if (a->opcode == FXSAVE) { + ack_printf(".data1 0x0F, 0xAE, 0x00\n"); /* FXSAVE [eax] */ + return; + } /* we are translating from GNU */ if (a->args && a->args->operator == ',' /* don't swap ljmp prefixed with segment */ diff --git a/commands/i386/gas2ack/parse_gnu.c b/commands/i386/gas2ack/parse_gnu.c index 295cf9cb5..295a28e2d 100644 --- a/commands/i386/gas2ack/parse_gnu.c +++ b/commands/i386/gas2ack/parse_gnu.c @@ -142,6 +142,7 @@ static mnemonic_t mnemtab[] = { /* This array is sorted. */ { "fimull", FIMULL, WORD }, { "fimuls", FIMULS, WORD }, { "fincstp", FINCSTP, WORD }, + { "finit", FINIT, WORD }, { "fistl", FISTL, WORD }, { "fistp", FISTP, WORD }, { "fists", FISTS, WORD }, @@ -165,10 +166,11 @@ static mnemonic_t mnemtab[] = { /* This array is sorted. */ { "fmulp", FMULP, WORD }, { "fmuls", FMULS, WORD }, { "fnclex", FCLEX, WORD }, - { "fninit", FINIT, WORD }, + { "fninit", FNINIT, NONE }, { "fnop", FNOP, WORD }, - { "fnsave", FSAVE, WORD }, - { "fnstcw", FSTCW, WORD }, + { "fnsave", FNSAVE, WORD }, + { "fnstcw", FNSTCW, WORD }, + { "fnstsw", FNSTSW, WORD }, { "fnstenv", FSTENV, WORD }, { "fpatan", FPATAN, WORD }, { "fprem", FPREM, WORD }, @@ -176,6 +178,7 @@ static mnemonic_t mnemtab[] = { /* This array is sorted. */ { "fptan", FPTAN, WORD }, { "frndint", FRNDINT, WORD }, { "frstor", FRSTOR, WORD }, + { "fsave", FSAVE, WORD }, { "fscale", FSCALE, WORD }, { "fsin", FSIN, WORD }, { "fsincos", FSINCOS, WORD }, @@ -185,6 +188,7 @@ static mnemonic_t mnemtab[] = { /* This array is sorted. */ { "fstps", FSTPS, WORD }, { "fstpx", FSTPX, WORD }, { "fsts", FSTS, WORD }, + { "fstcw", FSTCW, WORD }, { "fstsw", FSTSW, WORD }, { "fsubd", FSUBD, WORD }, { "fsubp", FSUBP, WORD }, @@ -196,8 +200,11 @@ static mnemonic_t mnemtab[] = { /* This array is sorted. */ { "fucom", FUCOM, WORD }, { "fucomp", FUCOMP, WORD }, { "fucompp", FUCOMPP, WORD }, + { "fwait", FWAIT, NONE }, { "fxam", FXAM, WORD }, { "fxch", FXCH, WORD }, + { "fxrstor", FXRSTOR, WORD }, + { "fxsave", FXSAVE, WORD }, { "fxtract", FXTRACT, WORD }, { "fyl2x", FYL2X, WORD }, { "fyl2xp1", FYL2XP1, WORD }, diff --git a/commands/scripts/netconf.sh b/commands/scripts/netconf.sh index 29928c206..351fc1c03 100755 --- a/commands/scripts/netconf.sh +++ b/commands/scripts/netconf.sh @@ -103,10 +103,13 @@ cards() "1186:1340" "11DB:1234" "1259:A117" "1259:A11E" "126C:1211" \ "13D1:AB06" "1432:9130" "14EA:AB06" "14EA:AB07" "1500:1360" \ "1743:8139" "4033:1360" - card 4 "Realtek 8029 based card (also emulated by Qemu)" "10EC:8029" - card 5 "NE2000, 3com 503 or WD based card (also emulated by Bochs)" - card 6 "AMD LANCE (also emulated by VMWare and VirtualBox)" "1022:2000" - card 7 "Different Ethernet card (no networking)" + card 4 "Realtek 8169 based card" \ + "10EC:8129" "10EC:8167" "10EC:8169" "1186:4300" "1259:C107" \ + "1385:8169" "16EC:0116" "1737:1032" + card 5 "Realtek 8029 based card (also emulated by Qemu)" "10EC:8029" + card 6 "NE2000, 3com 503 or WD based card (also emulated by Bochs)" + card 7 "AMD LANCE (also emulated by VMWare and VirtualBox)" "1022:2000" + card 8 "Different Ethernet card (no networking)" } warn() @@ -143,15 +146,16 @@ drv_params() test "$v" = 1 && echo "" test "$v" = 1 && echo "Note: After installing, edit $LOCALRC to the right configuration." ;; + 4) driver=rtl8169; ;; 3) driver=rtl8139; ;; - 4) driver=dp8390; driverargs="dp8390_arg='DPETH0=pci'"; ;; - 5) driver=dp8390; driverargs="dp8390_arg='DPETH0=240:9'"; + 5) driver=dp8390; driverargs="dp8390_arg='DPETH0=pci'"; ;; + 6) driver=dp8390; driverargs="dp8390_arg='DPETH0=240:9'"; test "$v" = 1 && echo "" test "$v" = 1 && echo "Note: After installing, edit $LOCALRC to the right configuration." test "$v" = 1 && echo " chose option 4, the defaults for emulation by Bochs have been set." ;; - 6) driver="lance"; ;; - 7) driver="psip0"; ;; + 7) driver="lance"; ;; + 8) driver="psip0"; ;; *) warn "choose a number" esac } diff --git a/include/minix/cpufeature.h b/include/minix/cpufeature.h index a5fec4ada..1f733f868 100644 --- a/include/minix/cpufeature.h +++ b/include/minix/cpufeature.h @@ -2,10 +2,19 @@ #ifndef _MINIX_CPUFEATURE_H #define _MINIX_CPUFEATURE_H 1 +#define _CPUF_I386_FPU 0 /* FPU-x87 FPU on Chip */ #define _CPUF_I386_PSE 1 /* Page Size Extension */ #define _CPUF_I386_PGE 2 /* Page Global Enable */ #define _CPUF_I386_APIC_ON_CHIP 3 /* APIC is present on the chip */ #define _CPUF_I386_TSC 4 /* Timestamp counter present */ +#define _CPUF_I386_SSEx 5 /* Support for SSE/SSE2/SSE3/SSSE3/SSE4 Extensions and FXSR */ +#define _CPUF_I386_FXSR 6 +#define _CPUF_I386_SSE 7 +#define _CPUF_I386_SSE2 8 +#define _CPUF_I386_SSE3 9 +#define _CPUF_I386_SSSE3 10 +#define _CPUF_I386_SSE4_1 11 +#define _CPUF_I386_SSE4_2 12 _PROTOTYPE(int _cpufeature, (int featureno)); diff --git a/include/signal.h b/include/signal.h index 0be58f01c..00f02de01 100644 --- a/include/signal.h +++ b/include/signal.h @@ -102,6 +102,17 @@ struct sigaction { #define SIG_UNBLOCK 1 /* for unblocking signals */ #define SIG_SETMASK 2 /* for setting the signal mask */ #define SIG_INQUIRE 4 /* for internal use only */ + +/* codes for SIGFPE */ +#define FPE_INTOVF 1 /* integer divide by zero */ +#define FPE_INTDIV 2 /* integer overflow */ +#define FPE_FLTDIV 3 /* floating-point divide by zero */ +#define FPE_FLTOVF 4 /* floating-point overflow */ +#define FPE_FLTUND 5 /* floating-point underflow */ +#define FPE_FLTRES 6 /* floating-point inexact result */ +#define FPE_FLTINV 7 /* floating-point invalid operation */ +#define FPE_FLTSUB 8 /* subscript out of range */ + #endif /* _POSIX_SOURCE */ /* POSIX and ANSI function prototypes. */ diff --git a/include/sys/sigcontext.h b/include/sys/sigcontext.h index a9e0fb9db..42fd1b9f1 100644 --- a/include/sys/sigcontext.h +++ b/include/sys/sigcontext.h @@ -23,30 +23,10 @@ * be added in a different struct. */ #if (_MINIX_CHIP == _CHIP_INTEL) -struct sigregs { -#if _WORD_SIZE == 4 - short sr_gs; - short sr_fs; -#endif /* _WORD_SIZE == 4 */ - short sr_es; - short sr_ds; - int sr_di; - int sr_si; - int sr_bp; - int sr_st; /* stack top -- used in kernel */ - int sr_bx; - int sr_dx; - int sr_cx; - int sr_retreg; - int sr_retadr; /* return address to caller of save -- used - * in kernel */ - int sr_pc; - int sr_cs; - int sr_psw; - int sr_sp; - int sr_ss; -}; +#include +#include +typedef struct stackframe_s sigregs; struct sigframe { /* stack frame created for signalled process */ _PROTOTYPE( void (*sf_retadr), (void) ); int sf_signo; @@ -57,85 +37,43 @@ struct sigframe { /* stack frame created for signalled process */ struct sigcontext *sf_scpcopy; }; -#else -#if (_MINIX_CHIP == _CHIP_M68000) -struct sigregs { - long sr_retreg; /* d0 */ - long sr_d1; - long sr_d2; - long sr_d3; - long sr_d4; - long sr_d5; - long sr_d6; - long sr_d7; - long sr_a0; - long sr_a1; - long sr_a2; - long sr_a3; - long sr_a4; - long sr_a5; - long sr_a6; - long sr_sp; /* also known as a7 */ - long sr_pc; - short sr_psw; - short sr_dummy; /* make size multiple of 4 for system.c */ -}; #else #include "error, _MINIX_CHIP is not supported" -#endif #endif /* _MINIX_CHIP == _CHIP_INTEL */ struct sigcontext { - int sc_flags; /* sigstack state to restore */ + int sc_flags; /* sigstack state to restore (including MF_FPU_INITIALIZED) */ long sc_mask; /* signal mask to restore */ - struct sigregs sc_regs; /* register set to restore */ + sigregs sc_regs; /* register set to restore */ +#if (_MINIX_CHIP == _CHIP_INTEL) + union fpu_state_u fpu_state; +#endif }; #if (_MINIX_CHIP == _CHIP_INTEL) #if _WORD_SIZE == 4 -#define sc_gs sc_regs.sr_gs -#define sc_fs sc_regs.sr_fs +#define sc_gs sc_regs.gs +#define sc_fs sc_regs.fs #endif /* _WORD_SIZE == 4 */ -#define sc_es sc_regs.sr_es -#define sc_ds sc_regs.sr_ds -#define sc_di sc_regs.sr_di -#define sc_si sc_regs.sr_si -#define sc_fp sc_regs.sr_bp -#define sc_st sc_regs.sr_st /* stack top -- used in kernel */ -#define sc_bx sc_regs.sr_bx -#define sc_dx sc_regs.sr_dx -#define sc_cx sc_regs.sr_cx -#define sc_retreg sc_regs.sr_retreg -#define sc_retadr sc_regs.sr_retadr /* return address to caller of +#define sc_es sc_regs.es +#define sc_ds sc_regs.ds +#define sc_di sc_regs.di +#define sc_si sc_regs.si +#define sc_fp sc_regs.bp +#define sc_st sc_regs.st /* stack top -- used in kernel */ +#define sc_bx sc_regs.bx +#define sc_dx sc_regs.dx +#define sc_cx sc_regs.cx +#define sc_retreg sc_regs.retreg +#define sc_retadr sc_regs.retadr /* return address to caller of save -- used in kernel */ -#define sc_pc sc_regs.sr_pc -#define sc_cs sc_regs.sr_cs -#define sc_psw sc_regs.sr_psw -#define sc_sp sc_regs.sr_sp -#define sc_ss sc_regs.sr_ss +#define sc_pc sc_regs.pc +#define sc_cs sc_regs.cs +#define sc_psw sc_regs.psw +#define sc_sp sc_regs.sp +#define sc_ss sc_regs.ss #endif /* _MINIX_CHIP == _CHIP_INTEL */ -#if (_MINIX_CHIP == M68000) -#define sc_retreg sc_regs.sr_retreg -#define sc_d1 sc_regs.sr_d1 -#define sc_d2 sc_regs.sr_d2 -#define sc_d3 sc_regs.sr_d3 -#define sc_d4 sc_regs.sr_d4 -#define sc_d5 sc_regs.sr_d5 -#define sc_d6 sc_regs.sr_d6 -#define sc_d7 sc_regs.sr_d7 -#define sc_a0 sc_regs.sr_a0 -#define sc_a1 sc_regs.sr_a1 -#define sc_a2 sc_regs.sr_a2 -#define sc_a3 sc_regs.sr_a3 -#define sc_a4 sc_regs.sr_a4 -#define sc_a5 sc_regs.sr_a5 -#define sc_fp sc_regs.sr_a6 -#define sc_sp sc_regs.sr_sp -#define sc_pc sc_regs.sr_pc -#define sc_psw sc_regs.sr_psw -#endif /* _MINIX_CHIP == M68000 */ - _PROTOTYPE( int sigreturn, (struct sigcontext *_scp) ); #endif /* _SIGCONTEXT_H */ diff --git a/include/sys/vm_i386.h b/include/sys/vm_i386.h index da94c341b..a864a078a 100644 --- a/include/sys/vm_i386.h +++ b/include/sys/vm_i386.h @@ -70,9 +70,17 @@ sys/vm_i386.h #define I386_VM_PFE_U 0x04 /* CPU in user mode (otherwise supervisor) */ /* CPUID flags */ +#define CPUID1_EDX_FPU (1L) /* FPU presence */ #define CPUID1_EDX_PSE (1L << 3) /* Page Size Extension */ #define CPUID1_EDX_PGE (1L << 13) /* Page Global (bit) Enable */ #define CPUID1_EDX_APIC_ON_CHIP (1L << 9) /* APIC is present on the chip */ #define CPUID1_EDX_TSC (1L << 4) /* Timestamp counter present */ +#define CPUID1_EDX_FXSR (1L << 24) +#define CPUID1_EDX_SSE (1L << 25) +#define CPUID1_EDX_SSE2 (1L << 26) +#define CPUID1_ECX_SSE3 (1L) +#define CPUID1_ECX_SSSE3 (1L << 9) +#define CPUID1_ECX_SSE4_1 (1L << 19) +#define CPUID1_ECX_SSE4_2 (1L << 20) #endif /* __SYS_VM_386_H__ */ diff --git a/kernel/arch/i386/exception.c b/kernel/arch/i386/exception.c index 98d244005..9c8e5b542 100644 --- a/kernel/arch/i386/exception.c +++ b/kernel/arch/i386/exception.c @@ -133,6 +133,9 @@ struct proc *t; { "Page fault", SIGSEGV, 386 }, /* not close */ { NIL_PTR, SIGILL, 0 }, /* probably software trap */ { "Coprocessor error", SIGFPE, 386 }, + { "Alignment check", SIGBUS, 386 }, + { "Machine check", SIGBUS, 386 }, + { "SIMD exception", SIGFPE, 386 }, }; register struct ex_s *ep; struct proc *saved_proc; diff --git a/kernel/arch/i386/include/archconst.h b/kernel/arch/i386/include/archconst.h index 137d6aae9..b0e7479d5 100644 --- a/kernel/arch/i386/include/archconst.h +++ b/kernel/arch/i386/include/archconst.h @@ -109,6 +109,9 @@ /* Exception vector numbers. */ #define PAGE_FAULT_VECTOR 14 #define COPROC_ERR_VECTOR 16 /* coprocessor error */ +#define ALIGNMENT_CHECK_VECTOR 17 +#define MACHINE_CHECK_VECTOR 18 +#define SIMD_EXCEPTION_VECTOR 19 /* SIMD Floating-Point Exception (#XM) */ /* Descriptor structure offsets. */ #define DESC_GRANULARITY 6 /* to granularity byte */ @@ -152,6 +155,7 @@ #define AMD_CPUID_GEN_EDX 0x69746e65 /* ASCII value of "enti" */ #define AMD_CPUID_GEN_ECX 0x444d4163 /* ASCII value of "cAMD" */ - +/* fpu context should be saved in 16-byte aligned memory */ +#define FPUALIGN 16 #endif /* _I386_ACONST_H */ diff --git a/kernel/arch/i386/include/archtypes.h b/kernel/arch/i386/include/archtypes.h index bdd8f8246..942363bc6 100644 --- a/kernel/arch/i386/include/archtypes.h +++ b/kernel/arch/i386/include/archtypes.h @@ -4,38 +4,8 @@ #include #include "archconst.h" - -typedef unsigned reg_t; /* machine register */ -typedef reg_t segdesc_t; - -/* The stack frame layout is determined by the software, but for efficiency - * it is laid out so the assembly code to use it is as simple as possible. - * 80286 protected mode and all real modes use the same frame, built with - * 16-bit registers. Real mode lacks an automatic stack switch, so little - * is lost by using the 286 frame for it. The 386 frame differs only in - * having 32-bit registers and more segment registers. The same names are - * used for the larger registers to avoid differences in the code. - */ -struct stackframe_s { /* proc_ptr points here */ - u16_t gs; /* last item pushed by save */ - u16_t fs; /* ^ */ - u16_t es; /* | */ - u16_t ds; /* | */ - reg_t di; /* di through cx are not accessed in C */ - reg_t si; /* order is to match pusha/popa */ - reg_t fp; /* bp */ - reg_t st; /* hole for another copy of sp */ - reg_t bx; /* | */ - reg_t dx; /* | */ - reg_t cx; /* | */ - reg_t retreg; /* ax and above are all pushed by save */ - reg_t retadr; /* return address for assembly code save() */ - reg_t pc; /* ^ last item pushed by interrupt */ - reg_t cs; /* | */ - reg_t psw; /* | */ - reg_t sp; /* | */ - reg_t ss; /* these are pushed by CPU during interrupt */ -}; +#include +#include struct segdesc_s { /* segment descriptor for protected mode */ u16_t limit_low; @@ -68,6 +38,17 @@ struct pagefault u32_t pf_flags; /* Pagefault flags on stack. */ }; + +/* fpu_state_s is used in kernel proc table. + * Any changes in this structure requires changes in sconst.h, + * since this structure is used in proc structure. */ +struct fpu_state_s { + union fpu_state_u *fpu_save_area_p; /* 16-aligned fpu_save_area */ + /* fpu_image includes 512 bytes of image itself and + * additional 15 bytes required for manual 16-byte alignment. */ + char fpu_image[527]; +}; + #define INMEMORY(p) (!p->p_seg.p_cr3 || ptproc == p) #endif /* #ifndef _I386_TYPES_H */ diff --git a/kernel/arch/i386/klib386.S b/kernel/arch/i386/klib386.S index bb74790bf..affc8e8ae 100644 --- a/kernel/arch/i386/klib386.S +++ b/kernel/arch/i386/klib386.S @@ -47,6 +47,10 @@ .globl read_ss .globl idt_reload /* reload idt when returning to monitor. */ +.globl fninit /* non-waiting FPU initialization */ +.globl fnstsw /* store status word (non-waiting) */ +.globl fnstcw /* store control word (non-waiting) */ + /* * The routines only guarantee to preserve the registers the C compiler * expects to be preserved (ebx, esi, edi, ebp, esp, segment registers, and @@ -536,6 +540,25 @@ read_ss: ret +/*===========================================================================*/ +/* fpu_routines */ +/*===========================================================================*/ +fninit: + fninit + ret + +fnstsw: + xor %eax, %eax + fnstsw %ax + ret + +fnstcw: + push %eax + mov 8(%esp), %eax + fnstcw (%eax) + pop %eax + ret + /*===========================================================================*/ /* read_cr0 */ /*===========================================================================*/ diff --git a/kernel/arch/i386/mpx386.S b/kernel/arch/i386/mpx386.S index 583448fe9..abf7a4929 100644 --- a/kernel/arch/i386/mpx386.S +++ b/kernel/arch/i386/mpx386.S @@ -68,6 +68,7 @@ begbss: #include #include #include "../../const.h" +#include "../../proc.h" #include "sconst.h" /* Selected 386 tss offsets. */ @@ -101,11 +102,15 @@ begbss: .globl general_protection .globl page_fault .globl copr_error +.globl alignment_check +.globl machine_check +.globl simd_exception .globl params_size .globl params_offset .globl mon_ds .globl schedcheck .globl dirtypde +.globl lazy_fpu .globl hwint00 /* handlers for hardware interrupts */ .globl hwint01 @@ -522,7 +527,33 @@ inval_opcode: EXCEPTION_NO_ERR_CODE(INVAL_OP_VECTOR) copr_not_available: - EXCEPTION_NO_ERR_CODE(COPROC_NOT_VECTOR) + TEST_INT_IN_KERNEL(4, copr_not_available_in_kernel) + clts + cld /* set direction flag to a known value */ + SAVE_PROCESS_CTX_NON_LAZY(0) + lea P_MISC_FLAGS(%ebp), %ebx + movw (%ebx), %cx + and $MF_FPU_INITIALIZED, %cx + jnz 0f /* jump if FPU is already initialized */ + orw $MF_FPU_INITIALIZED, (%ebx) + fninit + jmp copr_return +0: /* load FPU context for current process */ + mov %ss:FP_SAVE_AREA_P(%ebp), %eax + cmp $0, osfxsr_feature + jz fp_l_no_fxsr /* FXSR is not avaible. */ + fxrstor (%eax) + jmp copr_return +fp_l_no_fxsr: + frstor (%eax) +copr_return: + orw $MF_USED_FPU, (%ebx) /* fpu was used during last execution */ + jmp restart + +copr_not_available_in_kernel: + movl $NO_NUM, 4(%esp) + movl $0, (%esp) + call minix_panic double_fault: EXCEPTION_ERR_CODE(DOUBLE_FAULT_VECTOR) @@ -548,6 +579,56 @@ page_fault: copr_error: EXCEPTION_NO_ERR_CODE(COPROC_ERR_VECTOR) +alignment_check: + EXCEPTION_NO_ERR_CODE(ALIGNMENT_CHECK_VECTOR) + +machine_check: + EXCEPTION_NO_ERR_CODE(MACHINE_CHECK_VECTOR) + +simd_exception: + EXCEPTION_NO_ERR_CODE(SIMD_EXCEPTION_VECTOR) + +/*===========================================================================*/ +/* lazy_fpu */ +/*===========================================================================*/ +/* void lazy_fpu(struct proc *pptr) + * It's called, when we are on kernel stack. + * Actualy lazy code is just few lines, which check MF_USED_FPU, + * another part is save_init_fpu(). + */ +lazy_fpu: + push %ebp + mov %esp, %ebp + push %eax + push %ebx + push %ecx + cmp $0, fpu_presence /* Do we have FPU? */ + jz no_fpu_available + mov 8(%ebp), %eax /* Get pptr */ + lea P_MISC_FLAGS(%eax), %ebx + movw (%ebx), %cx + and $MF_USED_FPU, %cx + jz 0f /* Don't save FPU */ + mov %ss:FP_SAVE_AREA_P(%eax), %eax + cmp $0, osfxsr_feature + jz fp_s_no_fxsr /* FXSR is not avaible. */ + fxsave (%eax) + fninit + jmp fp_saved +fp_s_no_fxsr: + fnsave (%eax) + fwait /* required for compatibility with processors prior pentium */ +fp_saved: + andw $~MF_USED_FPU, (%ebx) +0: mov %cr0, %eax + or $0x00000008, %eax /* Set TS flag */ + mov %eax, %cr0 +no_fpu_available: + pop %ecx + pop %ebx + pop %eax + pop %ebp + ret /*===========================================================================*/ /* write_cr3 */ diff --git a/kernel/arch/i386/protect.c b/kernel/arch/i386/protect.c index 30f6597f2..203865e34 100644 --- a/kernel/arch/i386/protect.c +++ b/kernel/arch/i386/protect.c @@ -215,6 +215,9 @@ PUBLIC void idt_init(void) { general_protection, PROTECTION_VECTOR, INTR_PRIVILEGE }, { page_fault, PAGE_FAULT_VECTOR, INTR_PRIVILEGE }, { copr_error, COPROC_ERR_VECTOR, INTR_PRIVILEGE }, + { alignment_check, ALIGNMENT_CHECK_VECTOR, INTR_PRIVILEGE }, + { machine_check, MACHINE_CHECK_VECTOR, INTR_PRIVILEGE }, + { simd_exception, SIMD_EXCEPTION_VECTOR, INTR_PRIVILEGE }, { syscall_entry, SYS386_VECTOR, USER_PRIVILEGE },/* 386 system call */ { level0_call, LEVEL0_VECTOR, TASK_PRIVILEGE }, { NULL, 0, 0} diff --git a/kernel/arch/i386/proto.h b/kernel/arch/i386/proto.h index 4a0c42978..217fbcfd0 100644 --- a/kernel/arch/i386/proto.h +++ b/kernel/arch/i386/proto.h @@ -37,6 +37,9 @@ void _PROTOTYPE( stack_exception, (void) ); void _PROTOTYPE( general_protection, (void) ); void _PROTOTYPE( page_fault, (void) ); void _PROTOTYPE( copr_error, (void) ); +void _PROTOTYPE( alignment_check, (void) ); +void _PROTOTYPE( machine_check, (void) ); +void _PROTOTYPE( simd_exception, (void) ); /* Software interrupt handlers, in numerical order. */ _PROTOTYPE( void trp, (void) ); @@ -84,6 +87,9 @@ _PROTOTYPE( void phys_memset, (phys_bytes ph, u32_t c, phys_bytes bytes)); _PROTOTYPE( void reload_ds, (void) ); _PROTOTYPE( void ia32_msr_read, (u32_t reg, u32_t * hi, u32_t * lo) ); _PROTOTYPE( void ia32_msr_write, (u32_t reg, u32_t hi, u32_t lo) ); +_PROTOTYPE( void fninit, (void)); +_PROTOTYPE( unsigned short fnstsw, (void)); +_PROTOTYPE( void fnstcw, (unsigned short* cw)); /* protect.c */ struct tss_s { diff --git a/kernel/arch/i386/sconst.h b/kernel/arch/i386/sconst.h index 09ee7d989..d9669c643 100644 --- a/kernel/arch/i386/sconst.h +++ b/kernel/arch/i386/sconst.h @@ -27,9 +27,11 @@ SPREG = PSWREG+W SSREG = SPREG+W P_STACKTOP = SSREG+W - P_LDT_SEL = P_STACKTOP + FP_SAVE_AREA_P = P_STACKTOP + P_LDT_SEL = FP_SAVE_AREA_P + 532 P_CR3 = P_LDT_SEL+W P_LDT = P_CR3+W + P_MISC_FLAGS = P_LDT + 50 Msize = 9 /* size of a message in 32-bit words*/ @@ -115,7 +117,7 @@ * displ is the stack displacement. In case of an exception, there are two extra * value on the stack - error code and the exception number */ -#define SAVE_PROCESS_CTX(displ) \ +#define SAVE_PROCESS_CTX_NON_LAZY(displ) \ push %ebp ;\ ;\ movl (CURR_PROC_PTR + 4 + displ)(%esp), %ebp ;\ @@ -130,4 +132,10 @@ RESTORE_KERNEL_SEGS ;\ SAVE_TRAP_CTX(displ, %ebp, %esi) ; +#define SAVE_PROCESS_CTX(displ) \ + SAVE_PROCESS_CTX_NON_LAZY(displ) ;\ + push %ebp ;\ + call lazy_fpu ;\ + add $4, %esp ; + #endif /* __SCONST_H__ */ diff --git a/kernel/arch/i386/system.c b/kernel/arch/i386/system.c index f929604f2..0ca6a7092 100644 --- a/kernel/arch/i386/system.c +++ b/kernel/arch/i386/system.c @@ -9,7 +9,9 @@ #include #include #include +#include #include +#include #include "proto.h" #include "../../proc.h" @@ -19,7 +21,12 @@ #include "apic.h" #endif -#define CR0_EM 0x0004 /* set to enable trap on any FP instruction */ +/* set MP and NE flags to handle FPU exceptions in native mode. */ +#define CR0_MP_NE 0x0022 +/* set CR4.OSFXSR[bit 9] if FXSR is supported. */ +#define CR4_OSFXSR (1L<<9) +/* set OSXMMEXCPT[bit 10] if we provide #XM handler. */ +#define CR4_OSXMMEXCPT (1L<<10) FORWARD _PROTOTYPE( void ser_debug, (int c)); @@ -130,6 +137,51 @@ PUBLIC void tss_init(struct tss_s * tss, void * kernel_stack, unsigned cpu) PUBLIC void arch_init(void) { + unsigned short cw, sw; + + fninit(); + sw = fnstsw(); + fnstcw(&cw); + + if((sw & 0xff) == 0 && + (cw & 0x103f) == 0x3f) { + /* We have some sort of FPU, but don't check exact model. + * Set CR0_NE and CR0_MP to handle fpu exceptions + * in native mode. */ + write_cr0(read_cr0() | CR0_MP_NE); + fpu_presence = 1; + if(_cpufeature(_CPUF_I386_FXSR)) { + register struct proc *rp; + phys_bytes aligned_fp_area; + + /* Enable FXSR feature usage. */ + write_cr4(read_cr4() | CR4_OSFXSR | CR4_OSXMMEXCPT); + osfxsr_feature = 1; + + for (rp = BEG_PROC_ADDR; rp < END_PROC_ADDR; ++rp) { + /* FXSR requires 16-byte alignment of memory image, + * but unfortunately some old tools (probably linker) + * ignores ".balign 16" applied to our memory image. + * Thus we have to do manual alignment. + */ + aligned_fp_area = (phys_bytes) &rp->fpu_state.fpu_image; + if(aligned_fp_area % FPUALIGN) { + aligned_fp_area += FPUALIGN - + (aligned_fp_area % FPUALIGN); + } + rp->fpu_state.fpu_save_area_p = + (void *) aligned_fp_area; + } + } else { + osfxsr_feature = 0; + } + } else { + /* No FPU presents. */ + fpu_presence = 0; + osfxsr_feature = 0; + return; + } + #ifdef CONFIG_APIC /* * this is setting kernel segments to cover most of the phys memory. The @@ -153,11 +205,6 @@ PUBLIC void arch_init(void) } #endif -#if 0 - /* Set CR0_EM until we get FP context switching */ - write_cr0(read_cr0() | CR0_EM); -#endif - } #define COM1_BASE 0x3F8 diff --git a/kernel/glo.h b/kernel/glo.h index 41a2560ab..72bd5722e 100644 --- a/kernel/glo.h +++ b/kernel/glo.h @@ -53,6 +53,8 @@ EXTERN time_t boottime; EXTERN char params_buffer[512]; /* boot monitor parameters */ EXTERN int minix_panicing; EXTERN int locklevel; +EXTERN char fpu_presence; +EXTERN char osfxsr_feature; /* FXSAVE/FXRSTOR instructions support (SSEx) */ #define MAGICTEST 0xC0FFEE23 EXTERN u32_t magictest; /* global magic number */ diff --git a/kernel/proc.h b/kernel/proc.h index 69f1124c9..0aa1ed2b2 100644 --- a/kernel/proc.h +++ b/kernel/proc.h @@ -18,6 +18,7 @@ struct proc { struct stackframe_s p_reg; /* process' registers saved in stack frame */ + struct fpu_state_s fpu_state; /* process' fpu_regs saved lazily */ struct segframe p_seg; /* segment descriptors */ proc_nr_t p_nr; /* number of this process (for fast access) */ struct priv *p_priv; /* system privileges structure */ @@ -214,6 +215,9 @@ struct proc { #define MF_SC_ACTIVE 0x100 /* Syscall tracing: in a system call now */ #define MF_SC_DEFER 0x200 /* Syscall tracing: deferred system call */ #define MF_SC_TRACE 0x400 /* Syscall tracing: trigger syscall events */ +#define MF_USED_FPU 0x800 /* process used fpu during last execution run */ +#define MF_FPU_INITIALIZED 0x1000 /* process already used math, so fpu + * regs are significant (initialized)*/ /* Scheduling priorities for p_priority. Values must start at zero (highest * priority) and increment. Priorities of the processes in the boot image diff --git a/kernel/system/do_exec.c b/kernel/system/do_exec.c index 519054263..4da151ee5 100644 --- a/kernel/system/do_exec.c +++ b/kernel/system/do_exec.c @@ -47,6 +47,9 @@ register message *m_ptr; /* pointer to request message */ /* No reply to EXEC call */ RTS_LOCK_UNSET(rp, RTS_RECEIVING); + /* Mark fpu_regs contents as not significant, so fpu + * will be initialized, when it's used next time. */ + rp->p_misc_flags &= ~MF_FPU_INITIALIZED; return(OK); } #endif /* USE_EXEC */ diff --git a/kernel/system/do_exit.c b/kernel/system/do_exit.c index d5f85fbed..394cc0649 100644 --- a/kernel/system/do_exit.c +++ b/kernel/system/do_exit.c @@ -73,9 +73,10 @@ register struct proc *rc; /* slot of process to clean up */ reset_timer(&priv(rc)->s_alarm_timer); /* Make sure that the exiting process is no longer scheduled, - * and mark slot as FREE. + * and mark slot as FREE. Also mark saved fpu contents as not significant. */ RTS_LOCK_SETFLAGS(rc, RTS_SLOT_FREE); + rc->p_misc_flags &= ~MF_FPU_INITIALIZED; /* Release the process table slot. If this is a system process, also * release its privilege structure. Further cleanup is not needed at diff --git a/kernel/system/do_fork.c b/kernel/system/do_fork.c index c3831b8f1..69b3439de 100644 --- a/kernel/system/do_fork.c +++ b/kernel/system/do_fork.c @@ -25,6 +25,7 @@ register message *m_ptr; /* pointer to request message */ /* Handle sys_fork(). PR_ENDPT has forked. The child is PR_SLOT. */ #if (_MINIX_CHIP == _CHIP_INTEL) reg_t old_ldt_sel; + void *old_fpu_save_area_p; #endif register struct proc *rpc; /* child process pointer */ struct proc *rpp; /* parent process pointer */ @@ -59,10 +60,16 @@ register message *m_ptr; /* pointer to request message */ gen = _ENDPOINT_G(rpc->p_endpoint); #if (_MINIX_CHIP == _CHIP_INTEL) old_ldt_sel = rpc->p_seg.p_ldt_sel; /* backup local descriptors */ + old_fpu_save_area_p = rpc->fpu_state.fpu_save_area_p; #endif *rpc = *rpp; /* copy 'proc' struct */ #if (_MINIX_CHIP == _CHIP_INTEL) rpc->p_seg.p_ldt_sel = old_ldt_sel; /* restore descriptors */ + rpc->fpu_state.fpu_save_area_p = old_fpu_save_area_p; + if(rpp->p_misc_flags & MF_FPU_INITIALIZED) + memcpy(rpc->fpu_state.fpu_save_area_p, + rpp->fpu_state.fpu_save_area_p, + FPU_XFP_SIZE); #endif if(++gen >= _ENDPOINT_MAX_GENERATION) /* increase generation */ gen = 1; /* generation number wraparound */ diff --git a/kernel/system/do_sigreturn.c b/kernel/system/do_sigreturn.c index c382e0389..086a9bd32 100644 --- a/kernel/system/do_sigreturn.c +++ b/kernel/system/do_sigreturn.c @@ -54,7 +54,14 @@ message *m_ptr; /* pointer to request message */ #endif /* Restore the registers. */ - memcpy(&rp->p_reg, &sc.sc_regs, sizeof(struct sigregs)); + memcpy(&rp->p_reg, &sc.sc_regs, sizeof(sigregs)); +#if (_MINIX_CHIP == _CHIP_INTEL) + if(sc.sc_flags & MF_FPU_INITIALIZED) + { + memcpy(rp->fpu_state.fpu_save_area_p, &sc.fpu_state, FPU_XFP_SIZE); + rp->p_misc_flags |= MF_FPU_INITIALIZED; /* Restore math usage flag. */ + } +#endif return(OK); } diff --git a/kernel/system/do_sigsend.c b/kernel/system/do_sigsend.c index 1a09dd70b..91706ac3a 100644 --- a/kernel/system/do_sigsend.c +++ b/kernel/system/do_sigsend.c @@ -29,6 +29,9 @@ message *m_ptr; /* pointer to request message */ struct sigcontext sc, *scp; struct sigframe fr, *frp; int proc_nr, r; + #if (_MINIX_CHIP == _CHIP_INTEL) + unsigned short int fp_error; + #endif if (!isokendpt(m_ptr->SIG_ENDPT, &proc_nr)) return(EINVAL); if (iskerneln(proc_nr)) return(EPERM); @@ -43,11 +46,15 @@ message *m_ptr; /* pointer to request message */ scp = (struct sigcontext *) smsg.sm_stkptr - 1; /* Copy the registers to the sigcontext structure. */ - memcpy(&sc.sc_regs, (char *) &rp->p_reg, sizeof(struct sigregs)); + memcpy(&sc.sc_regs, (char *) &rp->p_reg, sizeof(sigregs)); + #if (_MINIX_CHIP == _CHIP_INTEL) + if(rp->p_misc_flags & MF_FPU_INITIALIZED) + memcpy(&sc.fpu_state, rp->fpu_state.fpu_save_area_p, FPU_XFP_SIZE); + #endif /* Finish the sigcontext initialization. */ - sc.sc_flags = 0; /* unused at this time */ sc.sc_mask = smsg.sm_mask; + sc.sc_flags = 0 | rp->p_misc_flags & MF_FPU_INITIALIZED; /* Copy the sigcontext structure to the user's stack. */ if((r=data_copy_vmcheck(SYSTEM, (vir_bytes) &sc, m_ptr->SIG_ENDPT, @@ -61,7 +68,40 @@ message *m_ptr; /* pointer to request message */ fr.sf_fp = rp->p_reg.fp; rp->p_reg.fp = (reg_t) &frp->sf_fp; fr.sf_scp = scp; - fr.sf_code = 0; /* XXX - should be used for type of FP exception */ + + #if (_MINIX_CHIP == _CHIP_INTEL) + if (osfxsr_feature == 1) { + fp_error = sc.fpu_state.xfp_regs.fp_status & + ~sc.fpu_state.xfp_regs.fp_control; + } else { + fp_error = sc.fpu_state.fpu_regs.fp_status & + ~sc.fpu_state.fpu_regs.fp_control; + } + + if (fp_error & 0x001) { /* Invalid op */ + /* + * swd & 0x240 == 0x040: Stack Underflow + * swd & 0x240 == 0x240: Stack Overflow + * User must clear the SF bit (0x40) if set + */ + fr.sf_code = FPE_FLTINV; + } else if (fp_error & 0x004) { + fr.sf_code = FPE_FLTDIV; /* Divide by Zero */ + } else if (fp_error & 0x008) { + fr.sf_code = FPE_FLTOVF; /* Overflow */ + } else if (fp_error & 0x012) { + fr.sf_code = FPE_FLTUND; /* Denormal, Underflow */ + } else if (fp_error & 0x020) { + fr.sf_code = FPE_FLTRES; /* Precision */ + } else { + fr.sf_code = 0; /* XXX - probably should be used for FPE_INTOVF or + * FPE_INTDIV */ + } + +#else + fr.sf_code = 0; +#endif + fr.sf_signo = smsg.sm_signo; fr.sf_retadr = (void (*)()) smsg.sm_sigreturn; @@ -75,6 +115,9 @@ message *m_ptr; /* pointer to request message */ rp->p_reg.sp = (reg_t) frp; rp->p_reg.pc = (reg_t) smsg.sm_sighandler; + /* Signal handler should get clean FPU. */ + rp->p_misc_flags &= ~MF_FPU_INITIALIZED; + if(!RTS_ISSET(rp, RTS_PROC_STOP)) { struct proc *caller; caller = proc_addr(who_p); diff --git a/lib/i386/misc/_cpufeature.c b/lib/i386/misc/_cpufeature.c index 93a93a839..84642bd44 100644 --- a/lib/i386/misc/_cpufeature.c +++ b/lib/i386/misc/_cpufeature.c @@ -7,6 +7,7 @@ int _cpufeature(int cpufeature) { u32_t cpuid_feature_edx = 0; + u32_t cpuid_feature_ecx = 0; int proc; proc = getprocessor(); @@ -18,7 +19,8 @@ int _cpufeature(int cpufeature) u32_t params, a, b, c, d; _cpuid(0, ¶ms, &b, &c, &d); if(params > 0) { - _cpuid(1, &a, &b, &c, &cpuid_feature_edx); + _cpuid(1, &a, &b, &cpuid_feature_ecx, + &cpuid_feature_edx); } } @@ -31,6 +33,30 @@ int _cpufeature(int cpufeature) return cpuid_feature_edx & CPUID1_EDX_APIC_ON_CHIP; case _CPUF_I386_TSC: return cpuid_feature_edx & CPUID1_EDX_TSC; + case _CPUF_I386_FPU: + return cpuid_feature_edx & CPUID1_EDX_FPU; + case _CPUF_I386_SSEx: + return (cpuid_feature_edx & (CPUID1_EDX_FXSR | + CPUID1_EDX_SSE | + CPUID1_EDX_SSE2)) && + (cpuid_feature_ecx & (CPUID1_ECX_SSE3 | + CPUID1_ECX_SSSE3 | + CPUID1_ECX_SSE4_1 | + CPUID1_ECX_SSE4_2)); + case _CPUF_I386_FXSR: + return cpuid_feature_edx & CPUID1_EDX_FXSR; + case _CPUF_I386_SSE: + return cpuid_feature_edx & CPUID1_EDX_SSE; + case _CPUF_I386_SSE2: + return cpuid_feature_edx & CPUID1_EDX_SSE2; + case _CPUF_I386_SSE3: + return cpuid_feature_ecx & CPUID1_ECX_SSE3; + case _CPUF_I386_SSSE3: + return cpuid_feature_ecx & CPUID1_ECX_SSSE3; + case _CPUF_I386_SSE4_1: + return cpuid_feature_ecx & CPUID1_ECX_SSE4_1; + case _CPUF_I386_SSE4_2: + return cpuid_feature_ecx & CPUID1_ECX_SSE4_2; } return 0;