SMP - lazy FPU

- when a process is migrated to a different CPU it may have an active
  FPU context in the processor registers. We must save it and migrate
  it together with the process.
This commit is contained in:
Tomas Hruby 2010-09-15 14:11:25 +00:00
parent 1f89845bb2
commit 5b8b623765
17 changed files with 142 additions and 60 deletions

View file

@ -60,6 +60,7 @@ PRIVATE phys_bytes copy_trampoline(void)
tramp_size = (unsigned) &__trampoline_end - (unsigned)&trampoline;
s = env_get("memory");
s = (char *) get_value(params_buffer, "memory");
if (!s)
return 0;
@ -238,6 +239,7 @@ PRIVATE void ap_finish_booting(void)
printf("CPU %d paging is on\n", cpu);
lapic_enable(cpu);
fpu_init();
if (app_cpu_init_timer(system_hz)) {
panic("FATAL : failed to initialize timer interrupts CPU %d, "

View file

@ -205,7 +205,7 @@ PUBLIC void arch_get_aout_headers(const int i, struct exec *h)
phys_copy(aout + i * A_MINHDR, vir2phys(h), (phys_bytes) A_MINHDR);
}
PRIVATE void fpu_init(void)
PUBLIC void fpu_init(void)
{
unsigned short cw, sw;
@ -219,10 +219,8 @@ PRIVATE void fpu_init(void)
* Set CR0_NE and CR0_MP to handle fpu exceptions
* in native mode. */
write_cr0(read_cr0() | CR0_MP_NE);
fpu_presence = 1;
get_cpulocal_var(fpu_presence) = 1;
if(_cpufeature(_CPUF_I386_FXSR)) {
register struct proc *rp;
phys_bytes aligned_fp_area;
u32_t cr4 = read_cr4() | CR4_OSFXSR; /* Enable FXSR. */
/* OSXMMEXCPT if supported
@ -233,35 +231,18 @@ PRIVATE void fpu_init(void)
write_cr4(cr4);
osfxsr_feature = 1;
for (rp = BEG_PROC_ADDR; rp < END_PROC_ADDR; ++rp) {
/* FXSR requires 16-byte alignment of memory
* image, but unfortunately some old tools
* (probably linker) ignores ".balign 16"
* applied to our memory image.
* Thus we have to do manual alignment.
*/
aligned_fp_area =
(phys_bytes) &rp->p_fpu_state.fpu_image;
if(aligned_fp_area % FPUALIGN) {
aligned_fp_area += FPUALIGN -
(aligned_fp_area % FPUALIGN);
}
rp->p_fpu_state.fpu_save_area_p =
(void *) aligned_fp_area;
}
} else {
osfxsr_feature = 0;
}
} else {
/* No FPU presents. */
fpu_presence = 0;
get_cpulocal_var(fpu_presence) = 0;
osfxsr_feature = 0;
return;
}
}
PUBLIC void save_fpu(struct proc *pr)
PUBLIC void save_local_fpu(struct proc *pr)
{
if(!fpu_presence)
return;
@ -275,6 +256,34 @@ PUBLIC void save_fpu(struct proc *pr)
}
}
PUBLIC void save_fpu(struct proc *pr)
{
#if CONFIG_SMP
if (cpuid == pr->p_cpu) {
save_local_fpu(pr);
}
else {
int stopped;
/* remember if the process was already stopped */
stopped = RTS_ISSET(pr, RTS_PROC_STOP);
/* stop the remote process and force it's context to be saved */
smp_schedule_stop_proc_save_ctx(pr);
/*
* If the process wasn't stopped let the process run again. The
* process is kept block by the fact that the kernel cannot run
* on its cpu
*/
if (!stopped)
RTS_UNSET(pr, RTS_PROC_STOP);
}
#else
save_local_fpu(pr);
#endif
}
PUBLIC void restore_fpu(struct proc *pr)
{
if(!proc_used_fpu(pr)) {
@ -328,8 +337,6 @@ PUBLIC void arch_init(void)
BOOT_VERBOSE(printf("APIC not present, using legacy PIC\n"));
}
#endif
fpu_init();
}
PUBLIC void ser_putc(char c)

View file

@ -78,6 +78,9 @@ DECLARE_CPULOCAL(int, cpu_is_idle); /* let the others know that you are idle */
DECLARE_CPULOCAL(u64_t ,tsc_ctr_switch); /* when did we switched time accounting */
DECLARE_CPULOCAL(char ,fpu_presence); /* whether the cpu has FPU or not */
DECLARE_CPULOCAL(struct proc * ,fpu_owner); /* who owns the FPU of the local cpu */
DECLARE_CPULOCAL_END
#endif /* __ASSEMBLY__ */

View file

@ -44,7 +44,6 @@ EXTERN time_t boottime;
EXTERN char params_buffer[512]; /* boot monitor parameters */
EXTERN int minix_panicing;
EXTERN char fpu_presence;
EXTERN struct proc * fpu_owner;
EXTERN int verboseboot; /* verbose boot, init'ed in cstart */
#define MAGICTEST 0xC0FFEE23
EXTERN u32_t magictest; /* global magic number */

View file

@ -32,7 +32,6 @@
/* Prototype declarations for PRIVATE functions. */
FORWARD _PROTOTYPE( void announce, (void));
void ser_dump_queues(void);
PUBLIC void bsp_finish_booting(void)
{
int i;
@ -69,6 +68,19 @@ PUBLIC void bsp_finish_booting(void)
"cannot continue without any clock source!");
}
fpu_init();
#ifdef CONFIG_WATCHDOG
if (watchdog_enabled) {
if (arch_watchdog_init()) {
printf("WARNING watchdog initialization failed! Disabled\n");
watchdog_enabled = 0;
}
else
BOOT_VERBOSE(printf("Watchdog enabled\n"););
}
#endif
/* Warnings for sanity checks that take time. These warnings are printed
* so it's a clear warning no full release should be done with them
* enabled.

View file

@ -155,6 +155,23 @@ PUBLIC void proc_init(void)
ip->p_rts_flags |= RTS_PROC_STOP;
set_idle_name(ip->p_name, i);
}
for (rp = BEG_PROC_ADDR; rp < END_PROC_ADDR; ++rp) {
/*
* FXSR requires 16-byte alignment of memory image, but
* unfortunately a.out does not preserve the alignment while
* linking. Thus we have to do manual alignment.
*/
phys_bytes aligned_fp_area;
aligned_fp_area =
(phys_bytes) &rp->p_fpu_state.fpu_image;
if(aligned_fp_area % FPUALIGN) {
aligned_fp_area += FPUALIGN -
(aligned_fp_area % FPUALIGN);
}
rp->p_fpu_state.fpu_save_area_p =
(void *) aligned_fp_area;
}
}
PRIVATE void switch_address_space_idle(void)
@ -346,7 +363,7 @@ check_misc_flags:
context_stop(proc_addr(KERNEL));
/* If the process isn't the owner of FPU, enable the FPU exception */
if(fpu_owner != p)
if(get_cpulocal_var(fpu_owner) != p)
enable_fpu_exception();
else
disable_fpu_exception();
@ -1573,6 +1590,7 @@ PUBLIC void proc_no_time(struct proc * p)
PUBLIC void copr_not_available_handler(void)
{
struct proc * p;
struct proc ** local_fpu_owner;
/*
* Disable the FPU exception (both for the kernel and for the process
* once it's scheduled), and initialize or restore the FPU state.
@ -1583,9 +1601,10 @@ PUBLIC void copr_not_available_handler(void)
p = get_cpulocal_var(proc_ptr);
/* if FPU is not owned by anyone, do not store anything */
if (fpu_owner != NULL) {
assert(fpu_owner != p);
save_fpu(fpu_owner);
local_fpu_owner = get_cpulocal_var_ptr(fpu_owner);
if (*local_fpu_owner != NULL) {
assert(*local_fpu_owner != p);
save_local_fpu(*local_fpu_owner);
}
/*
@ -1593,12 +1612,17 @@ PUBLIC void copr_not_available_handler(void)
* schedule!
*/
restore_fpu(p);
fpu_owner = p;
*local_fpu_owner = p;
context_stop(proc_addr(KERNEL));
restore_user_context(p);
NOT_REACHABLE;
}
PUBLIC void release_fpu(void) {
fpu_owner = NULL;
PUBLIC void release_fpu(struct proc * p) {
struct proc ** fpu_owner_ptr;
fpu_owner_ptr = get_cpu_var_ptr(p->p_cpu, fpu_owner);
if (*fpu_owner_ptr == p)
*fpu_owner_ptr = NULL;
}

View file

@ -34,6 +34,7 @@ _PROTOTYPE( void context_stop, (struct proc * p) );
_PROTOTYPE( void context_stop_idle, (void) );
_PROTOTYPE( void restore_fpu, (struct proc *) );
_PROTOTYPE( void save_fpu, (struct proc *) );
_PROTOTYPE( void save_local_fpu, (struct proc *) );
_PROTOTYPE( void fpu_sigcontext, (struct proc *, struct sigframe *fr, struct sigcontext *sc) );
/* main.c */
@ -178,6 +179,10 @@ _PROTOTYPE( vir_bytes alloc_remote_segment, (u32_t *, segframe_t *,
_PROTOTYPE( int intr_init, (int, int) );
_PROTOTYPE( void halt_cpu, (void) );
_PROTOTYPE( void arch_init, (void) );
/* arch dependent FPU initialization per CPU */
_PROTOTYPE( void fpu_init, (void) );
/* returns true if pfu is present and initialized */
_PROTOTYPE( int is_fpu, (void) );
_PROTOTYPE( void ser_putc, (char) );
_PROTOTYPE( __dead void arch_shutdown, (int) );
_PROTOTYPE( __dead void arch_monitor, (void) );
@ -213,7 +218,7 @@ _PROTOTYPE(void release_address_space, (struct proc *pr));
_PROTOTYPE(void enable_fpu_exception, (void));
_PROTOTYPE(void disable_fpu_exception, (void));
_PROTOTYPE(void release_fpu, (void));
_PROTOTYPE(void release_fpu, (struct proc * p));
/* utility.c */
_PROTOTYPE( void cpu_print_freq, (unsigned cpu));

View file

@ -19,6 +19,7 @@ PRIVATE struct sched_ipi_data sched_ipi_data[CONFIG_MAX_CPUS];
#define SCHED_IPI_STOP_PROC 1
#define SCHED_IPI_VM_INHIBIT 2
#define SCHED_IPI_SAVE_CTX 4
static volatile unsigned ap_cpus_booted;
@ -116,6 +117,30 @@ PUBLIC void smp_schedule_vminhibit(struct proc * p)
assert(RTS_ISSET(p, RTS_VMINHIBIT));
}
PUBLIC void smp_schedule_stop_proc_save_ctx(struct proc * p)
{
/*
* stop the processes and force the complete context of the process to
* be saved (i.e. including FPU state and such)
*/
smp_schedule_sync(p, SCHED_IPI_STOP_PROC | SCHED_IPI_SAVE_CTX);
assert(RTS_ISSET(p, RTS_PROC_STOP));
}
PUBLIC void smp_schedule_migrate_proc(struct proc * p, unsigned dest_cpu)
{
/*
* stop the processes and force the complete context of the process to
* be saved (i.e. including FPU state and such)
*/
smp_schedule_sync(p, SCHED_IPI_STOP_PROC | SCHED_IPI_SAVE_CTX);
assert(RTS_ISSET(p, RTS_PROC_STOP));
/* assign the new cpu and let the process run again */
p->p_cpu = dest_cpu;
RTS_UNSET(p, RTS_PROC_STOP);
}
PUBLIC void smp_ipi_sched_handler(void)
{
struct proc * curr;
@ -134,6 +159,16 @@ PUBLIC void smp_ipi_sched_handler(void)
if (flgs & SCHED_IPI_STOP_PROC) {
RTS_SET(p, RTS_PROC_STOP);
}
if (flgs & SCHED_IPI_SAVE_CTX) {
/* all context have been save already, FPU remains */
if (proc_used_fpu(p) &&
get_cpulocal_var(fpu_owner) == p) {
disable_fpu_exception();
save_local_fpu(p);
/* we re preparing to migrate somewhere else */
release_fpu(p);
}
}
if (flgs & SCHED_IPI_VM_INHIBIT) {
RTS_SET(p, RTS_VMINHIBIT);
}

View file

@ -63,6 +63,11 @@ _PROTOTYPE(void smp_schedule, (unsigned cpu));
_PROTOTYPE(void smp_schedule_stop_proc, (struct proc * p));
/* stop a process on a different cpu because its adress space is being changed */
_PROTOTYPE(void smp_schedule_vminhibit, (struct proc * p));
/* stop the process and for saving its full context */
_PROTOTYPE(void smp_schedule_stop_proc_save_ctx, (struct proc * p));
/* migrate the full context of a process to the destination CPU */
_PROTOTYPE(void smp_schedule_migrate_proc,
(struct proc * p, unsigned dest_cpu));
_PROTOTYPE(void arch_send_smp_schedule_ipi, (unsigned cpu));
_PROTOTYPE(void arch_smp_halt_cpu, (void));

View file

@ -669,9 +669,7 @@ PUBLIC int sched_proc(struct proc *p,
if (proc_is_runnable(p)) {
#ifdef CONFIG_SMP
if (p->p_cpu != cpuid && cpu != -1 && cpu != p->p_cpu) {
printf("WARNING : changing cpu of a runnable process %d "
"on a different cpu!\n", p->p_endpoint);
return(EINVAL);
smp_schedule_migrate_proc(p, cpu);
}
#endif

View file

@ -54,6 +54,9 @@ PUBLIC int do_clear(struct proc * caller, message * m_ptr)
* and mark slot as FREE. Also mark saved fpu contents as not significant.
*/
RTS_SETFLAGS(rc, RTS_SLOT_FREE);
/* release FPU */
release_fpu(rc);
rc->p_misc_flags &= ~MF_FPU_INITIALIZED;
/* Release the process table slot. If this is a system process, also
@ -70,10 +73,6 @@ PUBLIC int do_clear(struct proc * caller, message * m_ptr)
}
#endif
/* release FPU */
if (fpu_owner == rc)
release_fpu();
return OK;
}

View file

@ -46,8 +46,7 @@ PUBLIC int do_exec(struct proc * caller, message * m_ptr)
* will be initialized, when it's used next time. */
rp->p_misc_flags &= ~MF_FPU_INITIALIZED;
/* force reloading FPU if the current process is the owner */
if (rp == fpu_owner)
release_fpu();
release_fpu(rp);
return(OK);
}
#endif /* USE_EXEC */

View file

@ -53,10 +53,7 @@ PUBLIC int do_fork(struct proc * caller, message * m_ptr)
map_ptr= (struct mem_map *) m_ptr->PR_MEM_PTR;
/* make sure that the FPU context is saved in parent before copy */
if (fpu_owner == rpp) {
disable_fpu_exception();
save_fpu(rpp);
}
/* Copy parent 'proc' struct to child. And reinitialize some fields. */
gen = _ENDPOINT_G(rpc->p_endpoint);
#if (_MINIX_CHIP == _CHIP_INTEL)

View file

@ -43,10 +43,7 @@ PUBLIC int do_getmcontext(struct proc * caller, message * m_ptr)
mc.mc_fpu_flags = 0;
if (proc_used_fpu(rp)) {
/* make sure that the FPU context is saved into proc structure first */
if (fpu_owner == rp) {
disable_fpu_exception();
save_fpu(rp);
}
mc.mc_fpu_flags = 0 | rp->p_misc_flags & MF_FPU_INITIALIZED;
memcpy(&(mc.mc_fpu_state), rp->p_fpu_state.fpu_save_area_p,
FPU_XFP_SIZE);
@ -92,8 +89,7 @@ PUBLIC int do_setmcontext(struct proc * caller, message * m_ptr)
} else
rp->p_misc_flags &= ~MF_FPU_INITIALIZED;
/* force reloading FPU in either case */
if (fpu_owner == rp)
release_fpu();
release_fpu(rp);
#endif
return(OK);

View file

@ -60,8 +60,7 @@ PUBLIC int do_sigreturn(struct proc * caller, message * m_ptr)
FPU_XFP_SIZE);
rp->p_misc_flags |= MF_FPU_INITIALIZED; /* Restore math usage flag. */
/* force reloading FPU */
if (fpu_owner == rp)
release_fpu();
release_fpu(rp);
}
#endif

View file

@ -46,10 +46,7 @@ PUBLIC int do_sigsend(struct proc * caller, message * m_ptr)
#if (_MINIX_CHIP == _CHIP_INTEL)
if(proc_used_fpu(rp)) {
/* save the FPU context before saving it to the sig context */
if (fpu_owner == rp) {
disable_fpu_exception();
save_fpu(rp);
}
memcpy(&sc.sc_fpu_state, rp->p_fpu_state.fpu_save_area_p,
FPU_XFP_SIZE);
}

View file

@ -80,3 +80,8 @@ PUBLIC void cpu_print_freq(unsigned cpu)
freq = cpu_get_freq(cpu);
printf("CPU %d freq %lu MHz\n", cpu, div64u(freq, 1000000));
}
PUBLIC int is_fpu(void)
{
return get_cpulocal_var(fpu_presence);
}