SMP - can boot even if some cpus fail to boot

- EBADCPU is returned is scheduler tries to run a process on a CPU
  that either does not exist or isn't booted

- this change was originally meant to deal with stupid cpuid
  instruction which provides totally useless information about
  hyper-threading and MPS which does not deal with ht at all. ACPI
  provides correct information. If ht is turned off it looks like some
  CPUs failed to boot.  Nevertheless this patch may be handy for
  testing/benchmarking in the future.
This commit is contained in:
Tomas Hruby 2010-09-15 14:11:21 +00:00
parent 421f324baa
commit 1f89845bb2
6 changed files with 40 additions and 5 deletions

View file

@ -130,4 +130,6 @@ extern int errno; /* place where the error numbers go */
#define EBADEPT (_SIGN 301) /* specified endpoint is bad */
#define EDEADEPT (_SIGN 302) /* specified endpoint is not alive */
#define EBADCPU (_SIGN 1000) /* requested CPU does not work */
#endif /* _ERRNO_H */

View file

@ -36,8 +36,8 @@ extern void * __trampoline_end;
extern u32_t busclock[CONFIG_MAX_CPUS];
extern int panicking;
static int ap_cpu_ready;
static int cpu_down;
static int volatile ap_cpu_ready;
static int volatile cpu_down;
/* there can be at most 255 local APIC ids, each fits in 8 bits */
PRIVATE unsigned char apicid2cpuid[255];
@ -186,6 +186,11 @@ PUBLIC void smp_shutdown_aps(void)
for (cpu = 0; cpu < ncpus; cpu++) {
if (cpu == cpuid)
continue;
if (!cpu_test_flag(cpu, CPU_IS_READY)) {
printf("CPU %d didn't boot\n", cpu);
continue;
}
cpu_down = -1;
barrier();
apic_send_ipi(APIC_SMP_CPU_HALT_VECTOR, cpu, APIC_IPI_DEST);

View file

@ -27,9 +27,20 @@ SPINLOCK_DEFINE(boot_lock)
PUBLIC void wait_for_APs_to_finish_booting(void)
{
unsigned n = 0;
int i;
/* check how many cpus are actually alive */
for (i = 0 ; i < ncpus ; i++) {
if (cpu_test_flag(i, CPU_IS_READY))
n++;
}
if (n != ncpus)
printf("WARNING only %d out of %d cpus booted\n", n, ncpus);
/* we must let the other CPUs to run in kernel mode first */
BKL_UNLOCK();
while (ap_cpus_booted != (ncpus - 1))
while (ap_cpus_booted != (n - 1))
arch_pause();
/* now we have to take the lock again as we continu execution */
BKL_LOCK();

View file

@ -653,6 +653,8 @@ PUBLIC int sched_proc(struct proc *p,
#ifdef CONFIG_SMP
if ((cpu < 0 && cpu != -1) || (cpu > 0 && (unsigned) cpu >= ncpus))
return(EINVAL);
if (cpu != -1 && !(cpu_is_ready(cpu)))
return EBADCPU;
#endif
/* In some cases, we might be rescheduling a runnable process. In such

View file

@ -29,5 +29,5 @@ EXTERN char monitor_code[256];
EXTERN struct machine machine; /* machine info */
#ifdef CONFIG_SMP
EXTERN unsigned cpu_proc[CONFIG_MAX_CPUS];
EXTERN int cpu_proc[CONFIG_MAX_CPUS];
#endif

View file

@ -38,6 +38,10 @@ FORWARD _PROTOTYPE( void balance_queues, (struct timer *tp) );
#define schedule_process_migrate(p) \
schedule_process(p, SCHEDULE_CHANGE_CPU)
#define CPU_DEAD -1
#define cpu_is_available(c) (cpu_proc[c] >= 0)
#define DEFAULT_USER_TIME_SLICE 200
/* processes created by RS are sysytem processes */
@ -62,7 +66,12 @@ PRIVATE void pick_cpu(struct schedproc * proc)
return;
}
/* if no other cpu available, try BSP */
cpu = machine.bsp_id;
for (c = 0; c < machine.processors_count; c++) {
/* skip dead cpus */
if (!cpu_is_available(c))
continue;
if (c != machine.bsp_id && cpu_load > cpu_proc[c]) {
cpu_load = cpu_proc[c];
cpu = c;
@ -218,7 +227,13 @@ PUBLIC int do_start_scheduling(message *m_ptr)
/* Schedule the process, giving it some quantum */
pick_cpu(rmp);
if ((rv = schedule_process(rmp, SCHEDULE_CHANGE_ALL)) != OK) {
while ((rv = schedule_process(rmp, SCHEDULE_CHANGE_ALL)) == EBADCPU) {
/* don't try this CPU ever again */
cpu_proc[rmp->cpu] = CPU_DEAD;
pick_cpu(rmp);
}
if (rv != OK) {
printf("Sched: Error while scheduling process, kernel replied %d\n",
rv);
return rv;