SMP - lazy FPU
- when a process is migrated to a different CPU it may have an active FPU context in the processor registers. We must save it and migrate it together with the process.
This commit is contained in:
parent
1f89845bb2
commit
5b8b623765
17 changed files with 142 additions and 60 deletions
|
@ -60,6 +60,7 @@ PRIVATE phys_bytes copy_trampoline(void)
|
|||
|
||||
tramp_size = (unsigned) &__trampoline_end - (unsigned)&trampoline;
|
||||
s = env_get("memory");
|
||||
s = (char *) get_value(params_buffer, "memory");
|
||||
if (!s)
|
||||
return 0;
|
||||
|
||||
|
@ -238,6 +239,7 @@ PRIVATE void ap_finish_booting(void)
|
|||
printf("CPU %d paging is on\n", cpu);
|
||||
|
||||
lapic_enable(cpu);
|
||||
fpu_init();
|
||||
|
||||
if (app_cpu_init_timer(system_hz)) {
|
||||
panic("FATAL : failed to initialize timer interrupts CPU %d, "
|
||||
|
|
|
@ -205,7 +205,7 @@ PUBLIC void arch_get_aout_headers(const int i, struct exec *h)
|
|||
phys_copy(aout + i * A_MINHDR, vir2phys(h), (phys_bytes) A_MINHDR);
|
||||
}
|
||||
|
||||
PRIVATE void fpu_init(void)
|
||||
PUBLIC void fpu_init(void)
|
||||
{
|
||||
unsigned short cw, sw;
|
||||
|
||||
|
@ -219,10 +219,8 @@ PRIVATE void fpu_init(void)
|
|||
* Set CR0_NE and CR0_MP to handle fpu exceptions
|
||||
* in native mode. */
|
||||
write_cr0(read_cr0() | CR0_MP_NE);
|
||||
fpu_presence = 1;
|
||||
get_cpulocal_var(fpu_presence) = 1;
|
||||
if(_cpufeature(_CPUF_I386_FXSR)) {
|
||||
register struct proc *rp;
|
||||
phys_bytes aligned_fp_area;
|
||||
u32_t cr4 = read_cr4() | CR4_OSFXSR; /* Enable FXSR. */
|
||||
|
||||
/* OSXMMEXCPT if supported
|
||||
|
@ -233,35 +231,18 @@ PRIVATE void fpu_init(void)
|
|||
|
||||
write_cr4(cr4);
|
||||
osfxsr_feature = 1;
|
||||
|
||||
for (rp = BEG_PROC_ADDR; rp < END_PROC_ADDR; ++rp) {
|
||||
/* FXSR requires 16-byte alignment of memory
|
||||
* image, but unfortunately some old tools
|
||||
* (probably linker) ignores ".balign 16"
|
||||
* applied to our memory image.
|
||||
* Thus we have to do manual alignment.
|
||||
*/
|
||||
aligned_fp_area =
|
||||
(phys_bytes) &rp->p_fpu_state.fpu_image;
|
||||
if(aligned_fp_area % FPUALIGN) {
|
||||
aligned_fp_area += FPUALIGN -
|
||||
(aligned_fp_area % FPUALIGN);
|
||||
}
|
||||
rp->p_fpu_state.fpu_save_area_p =
|
||||
(void *) aligned_fp_area;
|
||||
}
|
||||
} else {
|
||||
osfxsr_feature = 0;
|
||||
}
|
||||
} else {
|
||||
/* No FPU presents. */
|
||||
fpu_presence = 0;
|
||||
get_cpulocal_var(fpu_presence) = 0;
|
||||
osfxsr_feature = 0;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
PUBLIC void save_fpu(struct proc *pr)
|
||||
PUBLIC void save_local_fpu(struct proc *pr)
|
||||
{
|
||||
if(!fpu_presence)
|
||||
return;
|
||||
|
@ -275,6 +256,34 @@ PUBLIC void save_fpu(struct proc *pr)
|
|||
}
|
||||
}
|
||||
|
||||
PUBLIC void save_fpu(struct proc *pr)
|
||||
{
|
||||
#if CONFIG_SMP
|
||||
if (cpuid == pr->p_cpu) {
|
||||
save_local_fpu(pr);
|
||||
}
|
||||
else {
|
||||
int stopped;
|
||||
|
||||
/* remember if the process was already stopped */
|
||||
stopped = RTS_ISSET(pr, RTS_PROC_STOP);
|
||||
|
||||
/* stop the remote process and force it's context to be saved */
|
||||
smp_schedule_stop_proc_save_ctx(pr);
|
||||
|
||||
/*
|
||||
* If the process wasn't stopped let the process run again. The
|
||||
* process is kept block by the fact that the kernel cannot run
|
||||
* on its cpu
|
||||
*/
|
||||
if (!stopped)
|
||||
RTS_UNSET(pr, RTS_PROC_STOP);
|
||||
}
|
||||
#else
|
||||
save_local_fpu(pr);
|
||||
#endif
|
||||
}
|
||||
|
||||
PUBLIC void restore_fpu(struct proc *pr)
|
||||
{
|
||||
if(!proc_used_fpu(pr)) {
|
||||
|
@ -328,8 +337,6 @@ PUBLIC void arch_init(void)
|
|||
BOOT_VERBOSE(printf("APIC not present, using legacy PIC\n"));
|
||||
}
|
||||
#endif
|
||||
|
||||
fpu_init();
|
||||
}
|
||||
|
||||
PUBLIC void ser_putc(char c)
|
||||
|
|
|
@ -78,6 +78,9 @@ DECLARE_CPULOCAL(int, cpu_is_idle); /* let the others know that you are idle */
|
|||
|
||||
DECLARE_CPULOCAL(u64_t ,tsc_ctr_switch); /* when did we switched time accounting */
|
||||
|
||||
DECLARE_CPULOCAL(char ,fpu_presence); /* whether the cpu has FPU or not */
|
||||
DECLARE_CPULOCAL(struct proc * ,fpu_owner); /* who owns the FPU of the local cpu */
|
||||
|
||||
DECLARE_CPULOCAL_END
|
||||
|
||||
#endif /* __ASSEMBLY__ */
|
||||
|
|
|
@ -44,7 +44,6 @@ EXTERN time_t boottime;
|
|||
EXTERN char params_buffer[512]; /* boot monitor parameters */
|
||||
EXTERN int minix_panicing;
|
||||
EXTERN char fpu_presence;
|
||||
EXTERN struct proc * fpu_owner;
|
||||
EXTERN int verboseboot; /* verbose boot, init'ed in cstart */
|
||||
#define MAGICTEST 0xC0FFEE23
|
||||
EXTERN u32_t magictest; /* global magic number */
|
||||
|
|
|
@ -32,7 +32,6 @@
|
|||
/* Prototype declarations for PRIVATE functions. */
|
||||
FORWARD _PROTOTYPE( void announce, (void));
|
||||
|
||||
void ser_dump_queues(void);
|
||||
PUBLIC void bsp_finish_booting(void)
|
||||
{
|
||||
int i;
|
||||
|
@ -69,6 +68,19 @@ PUBLIC void bsp_finish_booting(void)
|
|||
"cannot continue without any clock source!");
|
||||
}
|
||||
|
||||
fpu_init();
|
||||
|
||||
#ifdef CONFIG_WATCHDOG
|
||||
if (watchdog_enabled) {
|
||||
if (arch_watchdog_init()) {
|
||||
printf("WARNING watchdog initialization failed! Disabled\n");
|
||||
watchdog_enabled = 0;
|
||||
}
|
||||
else
|
||||
BOOT_VERBOSE(printf("Watchdog enabled\n"););
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Warnings for sanity checks that take time. These warnings are printed
|
||||
* so it's a clear warning no full release should be done with them
|
||||
* enabled.
|
||||
|
|
|
@ -155,6 +155,23 @@ PUBLIC void proc_init(void)
|
|||
ip->p_rts_flags |= RTS_PROC_STOP;
|
||||
set_idle_name(ip->p_name, i);
|
||||
}
|
||||
|
||||
for (rp = BEG_PROC_ADDR; rp < END_PROC_ADDR; ++rp) {
|
||||
/*
|
||||
* FXSR requires 16-byte alignment of memory image, but
|
||||
* unfortunately a.out does not preserve the alignment while
|
||||
* linking. Thus we have to do manual alignment.
|
||||
*/
|
||||
phys_bytes aligned_fp_area;
|
||||
aligned_fp_area =
|
||||
(phys_bytes) &rp->p_fpu_state.fpu_image;
|
||||
if(aligned_fp_area % FPUALIGN) {
|
||||
aligned_fp_area += FPUALIGN -
|
||||
(aligned_fp_area % FPUALIGN);
|
||||
}
|
||||
rp->p_fpu_state.fpu_save_area_p =
|
||||
(void *) aligned_fp_area;
|
||||
}
|
||||
}
|
||||
|
||||
PRIVATE void switch_address_space_idle(void)
|
||||
|
@ -346,7 +363,7 @@ check_misc_flags:
|
|||
context_stop(proc_addr(KERNEL));
|
||||
|
||||
/* If the process isn't the owner of FPU, enable the FPU exception */
|
||||
if(fpu_owner != p)
|
||||
if(get_cpulocal_var(fpu_owner) != p)
|
||||
enable_fpu_exception();
|
||||
else
|
||||
disable_fpu_exception();
|
||||
|
@ -1573,6 +1590,7 @@ PUBLIC void proc_no_time(struct proc * p)
|
|||
PUBLIC void copr_not_available_handler(void)
|
||||
{
|
||||
struct proc * p;
|
||||
struct proc ** local_fpu_owner;
|
||||
/*
|
||||
* Disable the FPU exception (both for the kernel and for the process
|
||||
* once it's scheduled), and initialize or restore the FPU state.
|
||||
|
@ -1583,9 +1601,10 @@ PUBLIC void copr_not_available_handler(void)
|
|||
p = get_cpulocal_var(proc_ptr);
|
||||
|
||||
/* if FPU is not owned by anyone, do not store anything */
|
||||
if (fpu_owner != NULL) {
|
||||
assert(fpu_owner != p);
|
||||
save_fpu(fpu_owner);
|
||||
local_fpu_owner = get_cpulocal_var_ptr(fpu_owner);
|
||||
if (*local_fpu_owner != NULL) {
|
||||
assert(*local_fpu_owner != p);
|
||||
save_local_fpu(*local_fpu_owner);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1593,12 +1612,17 @@ PUBLIC void copr_not_available_handler(void)
|
|||
* schedule!
|
||||
*/
|
||||
restore_fpu(p);
|
||||
fpu_owner = p;
|
||||
*local_fpu_owner = p;
|
||||
context_stop(proc_addr(KERNEL));
|
||||
restore_user_context(p);
|
||||
NOT_REACHABLE;
|
||||
}
|
||||
|
||||
PUBLIC void release_fpu(void) {
|
||||
fpu_owner = NULL;
|
||||
PUBLIC void release_fpu(struct proc * p) {
|
||||
struct proc ** fpu_owner_ptr;
|
||||
|
||||
fpu_owner_ptr = get_cpu_var_ptr(p->p_cpu, fpu_owner);
|
||||
|
||||
if (*fpu_owner_ptr == p)
|
||||
*fpu_owner_ptr = NULL;
|
||||
}
|
||||
|
|
|
@ -34,6 +34,7 @@ _PROTOTYPE( void context_stop, (struct proc * p) );
|
|||
_PROTOTYPE( void context_stop_idle, (void) );
|
||||
_PROTOTYPE( void restore_fpu, (struct proc *) );
|
||||
_PROTOTYPE( void save_fpu, (struct proc *) );
|
||||
_PROTOTYPE( void save_local_fpu, (struct proc *) );
|
||||
_PROTOTYPE( void fpu_sigcontext, (struct proc *, struct sigframe *fr, struct sigcontext *sc) );
|
||||
|
||||
/* main.c */
|
||||
|
@ -178,6 +179,10 @@ _PROTOTYPE( vir_bytes alloc_remote_segment, (u32_t *, segframe_t *,
|
|||
_PROTOTYPE( int intr_init, (int, int) );
|
||||
_PROTOTYPE( void halt_cpu, (void) );
|
||||
_PROTOTYPE( void arch_init, (void) );
|
||||
/* arch dependent FPU initialization per CPU */
|
||||
_PROTOTYPE( void fpu_init, (void) );
|
||||
/* returns true if pfu is present and initialized */
|
||||
_PROTOTYPE( int is_fpu, (void) );
|
||||
_PROTOTYPE( void ser_putc, (char) );
|
||||
_PROTOTYPE( __dead void arch_shutdown, (int) );
|
||||
_PROTOTYPE( __dead void arch_monitor, (void) );
|
||||
|
@ -213,7 +218,7 @@ _PROTOTYPE(void release_address_space, (struct proc *pr));
|
|||
|
||||
_PROTOTYPE(void enable_fpu_exception, (void));
|
||||
_PROTOTYPE(void disable_fpu_exception, (void));
|
||||
_PROTOTYPE(void release_fpu, (void));
|
||||
_PROTOTYPE(void release_fpu, (struct proc * p));
|
||||
|
||||
/* utility.c */
|
||||
_PROTOTYPE( void cpu_print_freq, (unsigned cpu));
|
||||
|
|
35
kernel/smp.c
35
kernel/smp.c
|
@ -19,6 +19,7 @@ PRIVATE struct sched_ipi_data sched_ipi_data[CONFIG_MAX_CPUS];
|
|||
|
||||
#define SCHED_IPI_STOP_PROC 1
|
||||
#define SCHED_IPI_VM_INHIBIT 2
|
||||
#define SCHED_IPI_SAVE_CTX 4
|
||||
|
||||
static volatile unsigned ap_cpus_booted;
|
||||
|
||||
|
@ -116,6 +117,30 @@ PUBLIC void smp_schedule_vminhibit(struct proc * p)
|
|||
assert(RTS_ISSET(p, RTS_VMINHIBIT));
|
||||
}
|
||||
|
||||
PUBLIC void smp_schedule_stop_proc_save_ctx(struct proc * p)
|
||||
{
|
||||
/*
|
||||
* stop the processes and force the complete context of the process to
|
||||
* be saved (i.e. including FPU state and such)
|
||||
*/
|
||||
smp_schedule_sync(p, SCHED_IPI_STOP_PROC | SCHED_IPI_SAVE_CTX);
|
||||
assert(RTS_ISSET(p, RTS_PROC_STOP));
|
||||
}
|
||||
|
||||
PUBLIC void smp_schedule_migrate_proc(struct proc * p, unsigned dest_cpu)
|
||||
{
|
||||
/*
|
||||
* stop the processes and force the complete context of the process to
|
||||
* be saved (i.e. including FPU state and such)
|
||||
*/
|
||||
smp_schedule_sync(p, SCHED_IPI_STOP_PROC | SCHED_IPI_SAVE_CTX);
|
||||
assert(RTS_ISSET(p, RTS_PROC_STOP));
|
||||
|
||||
/* assign the new cpu and let the process run again */
|
||||
p->p_cpu = dest_cpu;
|
||||
RTS_UNSET(p, RTS_PROC_STOP);
|
||||
}
|
||||
|
||||
PUBLIC void smp_ipi_sched_handler(void)
|
||||
{
|
||||
struct proc * curr;
|
||||
|
@ -134,6 +159,16 @@ PUBLIC void smp_ipi_sched_handler(void)
|
|||
if (flgs & SCHED_IPI_STOP_PROC) {
|
||||
RTS_SET(p, RTS_PROC_STOP);
|
||||
}
|
||||
if (flgs & SCHED_IPI_SAVE_CTX) {
|
||||
/* all context have been save already, FPU remains */
|
||||
if (proc_used_fpu(p) &&
|
||||
get_cpulocal_var(fpu_owner) == p) {
|
||||
disable_fpu_exception();
|
||||
save_local_fpu(p);
|
||||
/* we re preparing to migrate somewhere else */
|
||||
release_fpu(p);
|
||||
}
|
||||
}
|
||||
if (flgs & SCHED_IPI_VM_INHIBIT) {
|
||||
RTS_SET(p, RTS_VMINHIBIT);
|
||||
}
|
||||
|
|
|
@ -63,6 +63,11 @@ _PROTOTYPE(void smp_schedule, (unsigned cpu));
|
|||
_PROTOTYPE(void smp_schedule_stop_proc, (struct proc * p));
|
||||
/* stop a process on a different cpu because its adress space is being changed */
|
||||
_PROTOTYPE(void smp_schedule_vminhibit, (struct proc * p));
|
||||
/* stop the process and for saving its full context */
|
||||
_PROTOTYPE(void smp_schedule_stop_proc_save_ctx, (struct proc * p));
|
||||
/* migrate the full context of a process to the destination CPU */
|
||||
_PROTOTYPE(void smp_schedule_migrate_proc,
|
||||
(struct proc * p, unsigned dest_cpu));
|
||||
|
||||
_PROTOTYPE(void arch_send_smp_schedule_ipi, (unsigned cpu));
|
||||
_PROTOTYPE(void arch_smp_halt_cpu, (void));
|
||||
|
|
|
@ -669,9 +669,7 @@ PUBLIC int sched_proc(struct proc *p,
|
|||
if (proc_is_runnable(p)) {
|
||||
#ifdef CONFIG_SMP
|
||||
if (p->p_cpu != cpuid && cpu != -1 && cpu != p->p_cpu) {
|
||||
printf("WARNING : changing cpu of a runnable process %d "
|
||||
"on a different cpu!\n", p->p_endpoint);
|
||||
return(EINVAL);
|
||||
smp_schedule_migrate_proc(p, cpu);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
|
@ -54,6 +54,9 @@ PUBLIC int do_clear(struct proc * caller, message * m_ptr)
|
|||
* and mark slot as FREE. Also mark saved fpu contents as not significant.
|
||||
*/
|
||||
RTS_SETFLAGS(rc, RTS_SLOT_FREE);
|
||||
|
||||
/* release FPU */
|
||||
release_fpu(rc);
|
||||
rc->p_misc_flags &= ~MF_FPU_INITIALIZED;
|
||||
|
||||
/* Release the process table slot. If this is a system process, also
|
||||
|
@ -70,10 +73,6 @@ PUBLIC int do_clear(struct proc * caller, message * m_ptr)
|
|||
}
|
||||
#endif
|
||||
|
||||
/* release FPU */
|
||||
if (fpu_owner == rc)
|
||||
release_fpu();
|
||||
|
||||
return OK;
|
||||
}
|
||||
|
||||
|
|
|
@ -46,8 +46,7 @@ PUBLIC int do_exec(struct proc * caller, message * m_ptr)
|
|||
* will be initialized, when it's used next time. */
|
||||
rp->p_misc_flags &= ~MF_FPU_INITIALIZED;
|
||||
/* force reloading FPU if the current process is the owner */
|
||||
if (rp == fpu_owner)
|
||||
release_fpu();
|
||||
release_fpu(rp);
|
||||
return(OK);
|
||||
}
|
||||
#endif /* USE_EXEC */
|
||||
|
|
|
@ -53,10 +53,7 @@ PUBLIC int do_fork(struct proc * caller, message * m_ptr)
|
|||
map_ptr= (struct mem_map *) m_ptr->PR_MEM_PTR;
|
||||
|
||||
/* make sure that the FPU context is saved in parent before copy */
|
||||
if (fpu_owner == rpp) {
|
||||
disable_fpu_exception();
|
||||
save_fpu(rpp);
|
||||
}
|
||||
/* Copy parent 'proc' struct to child. And reinitialize some fields. */
|
||||
gen = _ENDPOINT_G(rpc->p_endpoint);
|
||||
#if (_MINIX_CHIP == _CHIP_INTEL)
|
||||
|
|
|
@ -43,10 +43,7 @@ PUBLIC int do_getmcontext(struct proc * caller, message * m_ptr)
|
|||
mc.mc_fpu_flags = 0;
|
||||
if (proc_used_fpu(rp)) {
|
||||
/* make sure that the FPU context is saved into proc structure first */
|
||||
if (fpu_owner == rp) {
|
||||
disable_fpu_exception();
|
||||
save_fpu(rp);
|
||||
}
|
||||
mc.mc_fpu_flags = 0 | rp->p_misc_flags & MF_FPU_INITIALIZED;
|
||||
memcpy(&(mc.mc_fpu_state), rp->p_fpu_state.fpu_save_area_p,
|
||||
FPU_XFP_SIZE);
|
||||
|
@ -92,8 +89,7 @@ PUBLIC int do_setmcontext(struct proc * caller, message * m_ptr)
|
|||
} else
|
||||
rp->p_misc_flags &= ~MF_FPU_INITIALIZED;
|
||||
/* force reloading FPU in either case */
|
||||
if (fpu_owner == rp)
|
||||
release_fpu();
|
||||
release_fpu(rp);
|
||||
#endif
|
||||
|
||||
return(OK);
|
||||
|
|
|
@ -60,8 +60,7 @@ PUBLIC int do_sigreturn(struct proc * caller, message * m_ptr)
|
|||
FPU_XFP_SIZE);
|
||||
rp->p_misc_flags |= MF_FPU_INITIALIZED; /* Restore math usage flag. */
|
||||
/* force reloading FPU */
|
||||
if (fpu_owner == rp)
|
||||
release_fpu();
|
||||
release_fpu(rp);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
|
@ -46,10 +46,7 @@ PUBLIC int do_sigsend(struct proc * caller, message * m_ptr)
|
|||
#if (_MINIX_CHIP == _CHIP_INTEL)
|
||||
if(proc_used_fpu(rp)) {
|
||||
/* save the FPU context before saving it to the sig context */
|
||||
if (fpu_owner == rp) {
|
||||
disable_fpu_exception();
|
||||
save_fpu(rp);
|
||||
}
|
||||
memcpy(&sc.sc_fpu_state, rp->p_fpu_state.fpu_save_area_p,
|
||||
FPU_XFP_SIZE);
|
||||
}
|
||||
|
|
|
@ -80,3 +80,8 @@ PUBLIC void cpu_print_freq(unsigned cpu)
|
|||
freq = cpu_get_freq(cpu);
|
||||
printf("CPU %d freq %lu MHz\n", cpu, div64u(freq, 1000000));
|
||||
}
|
||||
|
||||
PUBLIC int is_fpu(void)
|
||||
{
|
||||
return get_cpulocal_var(fpu_presence);
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue