From 5efa92f754d5e761fd1db6f9a0570a0672c10415 Mon Sep 17 00:00:00 2001 From: Tomas Hruby Date: Sat, 16 Jan 2010 20:53:55 +0000 Subject: [PATCH] NMI watchdog is an awesome feature for debugging locked up kernels. There is not that much use for it on a single CPU, however, deadlock between kernel and system task can be delected. Or a runaway loop. If a kernel gets locked up the timer interrupts don't occure (as all interrupts are disabled in kernel mode). The only chance is to interrupt the kernel by a non-maskable interrupt. This patch generates NMIs using performance counters. It uses the most widely available performace counters. As the performance counters are highly model-specific this patch is not guaranteed to work on every machine. Unfortunately this is also true for KVM :-/ On the other hand adding this feature for other models is not extremely difficult and the framework makes it hopefully easy enough. Depending on the frequency of the CPU an NMI is generated at most about every 0.5s If the cpu's speed is less then 2Ghz it is generated at most every 1s. In general an NMI is generated much less often as the performance counter counts down only if the cpu is not idle. Therefore the overhead of this feature is fairly minimal even if the load is high. Uppon detecting that the kernel is locked up the kernel dumps the state of the kernel registers and panics. Local APIC must be enabled for the watchdog to work. The code is _always_ compiled in, however, it is only enabled if watchdog= is set in the boot monitor. One corner case is serial console debugging. As dumping a lot of stuff to the serial link may take a lot of time, the watchdog does not detect lockups during this time!!! as it would result in too many false positives. 10 nmi have to be handled before the lockup is detected. This means something between ~5s to 10s. Another corner case is that the watchdog is enabled only after the paging is enabled as it would be pure madness to try to get it right. --- kernel/Makefile | 3 +- kernel/arch/i386/Makefile | 3 +- kernel/arch/i386/apic.c | 104 +++++---------------------- kernel/arch/i386/apic.h | 55 +++++++++++++++ kernel/arch/i386/exception.c | 4 +- kernel/arch/i386/memory.c | 13 ++++ kernel/arch/i386/mpx386.S | 79 ++++++++++++++++++--- kernel/arch/i386/sconst.h | 5 +- kernel/arch/i386/system.c | 4 +- kernel/arch/i386/watchdog.c | 133 +++++++++++++++++++++++++++++++++++ kernel/arch/i386/watchdog.h | 26 +++++++ kernel/clock.c | 13 ++++ kernel/glo.h | 7 ++ kernel/kernel.h | 16 +++++ kernel/start.c | 10 +++ kernel/watchdog.c | 52 ++++++++++++++ kernel/watchdog.h | 36 ++++++++++ 17 files changed, 460 insertions(+), 103 deletions(-) create mode 100644 kernel/arch/i386/watchdog.c create mode 100644 kernel/arch/i386/watchdog.h create mode 100644 kernel/watchdog.c create mode 100644 kernel/watchdog.h diff --git a/kernel/Makefile b/kernel/Makefile index b4e9f81ab..3fc3bc070 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -21,7 +21,8 @@ LDFLAGS=-i HEAD = $a/mpx386.o OBJS = start.o table.o main.o proc.o \ - system.o clock.o utility.o debug.o profile.o interrupt.o + system.o clock.o utility.o debug.o profile.o interrupt.o \ + watchdog.o SYSTEM = system.a ARCHLIB = $a/$(ARCH).a LIBS = -ltimers -lsys diff --git a/kernel/arch/i386/Makefile b/kernel/arch/i386/Makefile index 0779512f7..481f9c51c 100644 --- a/kernel/arch/i386/Makefile +++ b/kernel/arch/i386/Makefile @@ -22,7 +22,8 @@ OBJS= arch_do_vmctl.o \ protect.o \ system.o \ apic.o \ - apic_asm.o + apic_asm.o \ + watchdog.o CPPFLAGS=-Iinclude CFLAGS=$(CPPFLAGS) -Wall $(CPROFILE) diff --git a/kernel/arch/i386/apic.c b/kernel/arch/i386/apic.c index 126010441..453caac34 100644 --- a/kernel/arch/i386/apic.c +++ b/kernel/arch/i386/apic.c @@ -21,73 +21,9 @@ #include "../../clock.h" #include "glo.h" -#define APIC_ENABLE 0x100 -#define APIC_FOCUS (~(1 << 9)) -#define APIC_SIV 0xFF - -#define APIC_TDCR_2 0x00 -#define APIC_TDCR_4 0x01 -#define APIC_TDCR_8 0x02 -#define APIC_TDCR_16 0x03 -#define APIC_TDCR_32 0x08 -#define APIC_TDCR_64 0x09 -#define APIC_TDCR_128 0x0a -#define APIC_TDCR_1 0x0b - -#define IS_SET(mask) (mask) -#define IS_CLEAR(mask) 0 - -#define APIC_LVTT_VECTOR_MASK 0x000000FF -#define APIC_LVTT_DS_PENDING (1 << 12) -#define APIC_LVTT_MASK (1 << 16) -#define APIC_LVTT_TM (1 << 17) - -#define APIC_LVT_IIPP_MASK 0x00002000 -#define APIC_LVT_IIPP_AH 0x00002000 -#define APIC_LVT_IIPP_AL 0x00000000 - -#define APIC_LVT_TM_ONESHOT IS_CLEAR(APIC_LVTT_TM) -#define APIC_LVT_TM_PERIODIC IS_SET(APIC_LVTT_TM) - -#define APIC_SVR_SWEN 0x00000100 -#define APIC_SVR_FOCUS 0x00000200 - -#define IOAPIC_REGSEL 0x0 -#define IOAPIC_RW 0x10 - -#define APIC_ICR_DM_MASK 0x00000700 -#define APIC_ICR_VECTOR APIC_LVTT_VECTOR_MASK -#define APIC_ICR_DM_FIXED (0 << 8) -#define APIC_ICR_DM_LOWEST_PRIORITY (1 << 8) -#define APIC_ICR_DM_SMI (2 << 8) -#define APIC_ICR_DM_RESERVED (3 << 8) -#define APIC_ICR_DM_NMI (4 << 8) -#define APIC_ICR_DM_INIT (5 << 8) -#define APIC_ICR_DM_STARTUP (6 << 8) -#define APIC_ICR_DM_EXTINT (7 << 8) - -#define APIC_ICR_DM_PHYSICAL (0 << 11) -#define APIC_ICR_DM_LOGICAL (1 << 11) - -#define APIC_ICR_DELIVERY_PENDING (1 << 12) - -#define APIC_ICR_INT_POLARITY (1 << 13) -#define APIC_ICR_INTPOL_LOW IS_SET(APIC_ICR_INT_POLARITY) -#define APIC_ICR_INTPOL_HIGH IS_CLEAR(APIC_ICR_INT_POLARITY) - -#define APIC_ICR_LEVEL_ASSERT (1 << 14) -#define APIC_ICR_LEVEL_DEASSERT (0 << 14) - -#define APIC_ICR_TRIGGER (1 << 15) -#define APIC_ICR_TM_LEVEL IS_CLEAR(APIC_ICR_TRIGGER) -#define APIC_ICR_TM_EDGE IS_CLEAR(APIC_ICR_TRIGGER) - -#define APIC_ICR_INT_MASK (1 << 16) - -#define APIC_ICR_DEST_FIELD (0 << 18) -#define APIC_ICR_DEST_SELF (1 << 18) -#define APIC_ICR_DEST_ALL (2 << 18) -#define APIC_ICR_DEST_ALL_BUT_SELF (3 << 18) +#ifdef CONFIG_WATCHDOG +#include "../../watchdog.h" +#endif #define IA32_APIC_BASE 0x1b #define IA32_APIC_BASE_ENABLE_BIT 11 @@ -103,11 +39,6 @@ #define CONFIG_MAX_CPUS 1 #define cpu_is_bsp(x) 1 -PRIVATE int cpuid(void) -{ - return 0; -} - #define lapic_write_icr1(val) lapic_write(LAPIC_ICR1, val) #define lapic_write_icr2(val) lapic_write(LAPIC_ICR2, val) @@ -226,12 +157,14 @@ PUBLIC void apic_calibrate_clocks(void) lapic_delta = lapic_tctr0 - lapic_tctr1; tsc_delta = sub64(tsc1, tsc0); - lapic_bus_freq[cpuid()] = system_hz * lapic_delta / (PROBE_TICKS - 1); + lapic_bus_freq[cpuid] = system_hz * lapic_delta / (PROBE_TICKS - 1); BOOT_VERBOSE(kprintf("APIC bus freq %lu MHz\n", - lapic_bus_freq[cpuid()] / 1000000)); + lapic_bus_freq[cpuid] / 1000000)); cpu_freq = div64u(tsc_delta, PROBE_TICKS - 1) * system_hz; - BOOT_VERBOSE(kprintf("CPU %d freq %lu MHz\n", cpuid(), + BOOT_VERBOSE(kprintf("CPU %d freq %lu MHz\n", cpuid, cpu_freq / 1000000)); + + cpu_set_freq(cpuid, cpu_freq); } PRIVATE void lapic_set_timer_one_shot(u32_t value) @@ -239,7 +172,7 @@ PRIVATE void lapic_set_timer_one_shot(u32_t value) /* sleep in micro seconds */ u32_t lvtt; u32_t ticks_per_us; - u8_t cpu = cpuid (); + u8_t cpu = cpuid; ticks_per_us = lapic_bus_freq[cpu] / 1000000; @@ -259,7 +192,7 @@ PUBLIC void lapic_set_timer_periodic(unsigned freq) /* sleep in micro seconds */ u32_t lvtt; u32_t lapic_ticks_per_clock_tick; - u8_t cpu = cpuid(); + u8_t cpu = cpuid; lapic_ticks_per_clock_tick = lapic_bus_freq[cpu] / freq; @@ -267,7 +200,7 @@ PUBLIC void lapic_set_timer_periodic(unsigned freq) lapic_write(LAPIC_TIMER_DCR, lvtt); /* configure timer as periodic */ - lvtt = APIC_LVT_TM_PERIODIC | APIC_TIMER_INT_VECTOR; + lvtt = APIC_LVTT_TM | APIC_TIMER_INT_VECTOR; lapic_write(LAPIC_LVTTR, lvtt); lapic_write(LAPIC_TIMER_ICR, lapic_ticks_per_clock_tick); @@ -329,7 +262,7 @@ PRIVATE void lapic_enable_no_lints(void) lapic_extint_assigned = (val & APIC_ICR_DM_MASK) == APIC_ICR_DM_EXTINT; val &= ~(APIC_ICR_DM_MASK|APIC_ICR_INT_MASK); - if (!ioapic_enabled && cpu_is_bsp(cpuid())) + if (!ioapic_enabled && cpu_is_bsp(cpuid)) val |= (APIC_ICR_DM_EXTINT); /* ExtINT at LINT0 */ else val |= (APIC_ICR_DM_EXTINT|APIC_ICR_INT_MASK); /* Masked ExtINT at LINT0 */ @@ -339,7 +272,7 @@ PRIVATE void lapic_enable_no_lints(void) val = lapic_read(LAPIC_LINT1); val &= ~(APIC_ICR_DM_MASK|APIC_ICR_INT_MASK); - if (!ioapic_enabled && cpu_is_bsp(cpuid())) + if (!ioapic_enabled && cpu_is_bsp(cpuid)) val |= APIC_ICR_DM_NMI; else val |= (APIC_ICR_DM_NMI | APIC_ICR_INT_MASK); /* NMI at LINT1 */ @@ -378,7 +311,7 @@ PUBLIC int lapic_enable(void) u32_t timeout = 0xFFFF; u32_t errstatus = 0; int i; - unsigned cpu = cpuid (); + unsigned cpu = cpuid; if (!lapic_addr) return 0; @@ -400,13 +333,14 @@ PUBLIC int lapic_enable(void) /* Enable Local APIC and set the spurious vector to 0xff. */ val = lapic_read(LAPIC_SIVR) & 0xFFFFFF00; - val |= (APIC_ENABLE | APIC_FOCUS | APIC_SPURIOUS_INT_VECTOR); + val |= APIC_ENABLE | APIC_SPURIOUS_INT_VECTOR; + val &= ~APIC_FOCUS_DISABLED; lapic_write(LAPIC_SIVR, val); lapic_read(LAPIC_SIVR); *((u32_t *)lapic_eoi_addr) = 0; - cpu = cpuid (); + cpu = cpuid; /* Program Logical Destination Register. */ val = lapic_read(LAPIC_LDR) & ~0xFF000000; @@ -514,7 +448,7 @@ PUBLIC void apic_idt_init(int reset) } #ifdef CONFIG_APIC_DEBUG - if (cpu_is_bsp(cpuid())) + if (cpu_is_bsp(cpuid)) kprintf("APIC debugging is enabled\n"); lapic_set_dummy_handlers(); #endif @@ -528,7 +462,7 @@ PUBLIC void apic_idt_init(int reset) idt_copy_vectors(gate_table_common); /* configure the timer interupt handler */ - if (cpu_is_bsp(cpuid())) { + if (cpu_is_bsp(cpuid)) { local_timer_intr_handler = (vir_bytes) lapic_bsp_timer_int_handler; BOOT_VERBOSE(kprintf("Initiating BSP timer handler\n")); } else { diff --git a/kernel/arch/i386/apic.h b/kernel/arch/i386/apic.h index 88a5f9422..8b7587c0a 100644 --- a/kernel/arch/i386/apic.h +++ b/kernel/arch/i386/apic.h @@ -1,6 +1,61 @@ #ifndef __APIC_H__ #define __APIC_H__ +#define APIC_ENABLE 0x100 +#define APIC_FOCUS_DISABLED (1 << 9) +#define APIC_SIV 0xFF + +#define APIC_TDCR_2 0x00 +#define APIC_TDCR_4 0x01 +#define APIC_TDCR_8 0x02 +#define APIC_TDCR_16 0x03 +#define APIC_TDCR_32 0x08 +#define APIC_TDCR_64 0x09 +#define APIC_TDCR_128 0x0a +#define APIC_TDCR_1 0x0b + +#define APIC_LVTT_VECTOR_MASK 0x000000FF +#define APIC_LVTT_DS_PENDING (1 << 12) +#define APIC_LVTT_MASK (1 << 16) +#define APIC_LVTT_TM (1 << 17) + +#define APIC_LVT_IIPP_MASK 0x00002000 +#define APIC_LVT_IIPP_AH 0x00002000 +#define APIC_LVT_IIPP_AL 0x00000000 + +#define IOAPIC_REGSEL 0x0 +#define IOAPIC_RW 0x10 + +#define APIC_ICR_DM_MASK 0x00000700 +#define APIC_ICR_VECTOR APIC_LVTT_VECTOR_MASK +#define APIC_ICR_DM_FIXED (0 << 8) +#define APIC_ICR_DM_LOWEST_PRIORITY (1 << 8) +#define APIC_ICR_DM_SMI (2 << 8) +#define APIC_ICR_DM_RESERVED (3 << 8) +#define APIC_ICR_DM_NMI (4 << 8) +#define APIC_ICR_DM_INIT (5 << 8) +#define APIC_ICR_DM_STARTUP (6 << 8) +#define APIC_ICR_DM_EXTINT (7 << 8) + +#define APIC_ICR_DM_PHYSICAL (0 << 11) +#define APIC_ICR_DM_LOGICAL (1 << 11) + +#define APIC_ICR_DELIVERY_PENDING (1 << 12) + +#define APIC_ICR_INT_POLARITY (1 << 13) + +#define APIC_ICR_LEVEL_ASSERT (1 << 14) +#define APIC_ICR_LEVEL_DEASSERT (0 << 14) + +#define APIC_ICR_TRIGGER (1 << 15) + +#define APIC_ICR_INT_MASK (1 << 16) + +#define APIC_ICR_DEST_FIELD (0 << 18) +#define APIC_ICR_DEST_SELF (1 << 18) +#define APIC_ICR_DEST_ALL (2 << 18) +#define APIC_ICR_DEST_ALL_BUT_SELF (3 << 18) + #define LOCAL_APIC_DEF_ADDR 0xfee00000 /* default local apic address */ #define IO_APIC_DEF_ADDR 0xfec00000 /* default i/o apic address */ diff --git a/kernel/arch/i386/exception.c b/kernel/arch/i386/exception.c index d93a7c3c8..7cc234334 100644 --- a/kernel/arch/i386/exception.c +++ b/kernel/arch/i386/exception.c @@ -188,8 +188,8 @@ struct proc *t; kprintf("\n%s\n", ep->msg); kprintf("is_nested = %d ", is_nested); - kprintf("vec_nr= %d, trap_errno= 0x%x, eip= 0x%x, cs= 0x%x, eflags= 0x%x\n", - frame->vector, frame->errcode, frame->eip, frame->cs, frame->eflags); + kprintf("vec_nr= %d, trap_errno= 0x%x, eip= 0x%x, cs= 0x%x, eflags= 0x%x trap_esp 0x%08x\n", + frame->vector, frame->errcode, frame->eip, frame->cs, frame->eflags, frame); /* TODO should we enable this only when compiled for some debug mode? */ if (saved_proc) { kprintf("scheduled was: process %d (%s), ", proc_nr(saved_proc), saved_proc->p_name); diff --git a/kernel/arch/i386/memory.c b/kernel/arch/i386/memory.c index 4b5793b6b..8ed11e724 100644 --- a/kernel/arch/i386/memory.c +++ b/kernel/arch/i386/memory.c @@ -20,6 +20,9 @@ #ifdef CONFIG_APIC #include "apic.h" +#ifdef CONFIG_WATCHDOG +#include "../../watchdog.h" +#endif #endif PRIVATE int psok = 0; @@ -1073,5 +1076,15 @@ PUBLIC int arch_enable_paging(void) lapic_eoi_addr = LAPIC_EOI; } #endif +#ifdef CONFIG_WATCHDOG + /* + * We make sure that we don't enable the watchdog until paging is turned + * on as we might get a NMI while switching and we might still use wrong + * lapic address. Bad things would happen. It is unfortunate but such is + * life + */ + level0(i386_watchdog_start); +#endif + return OK; } diff --git a/kernel/arch/i386/mpx386.S b/kernel/arch/i386/mpx386.S index 58351d0b7..896d84afd 100644 --- a/kernel/arch/i386/mpx386.S +++ b/kernel/arch/i386/mpx386.S @@ -42,6 +42,8 @@ * and modified by the boot monitor before the kernel starts. */ +#include "../../kernel.h" /* configures the kernel */ + /* sections */ #include @@ -466,31 +468,43 @@ restart: call schedcheck /* %eax is set by schedcheck() to the process to run */ - mov %eax, %esp /* will assume P_STACKBASE == 0 */ + mov %eax, %ebp /* will assume P_STACKBASE == 0 */ - lldt P_LDT_SEL(%esp) /* enable process' segment descriptors */ - cmpl $0, P_CR3(%esp) + cmpl $0, P_CR3(%ebp) jz 0f /* * test if the cr3 is loaded with the current value to avoid unnecessary * TLB flushes */ - mov P_CR3(%esp), %eax + mov P_CR3(%ebp), %eax mov %cr3, %ecx cmp %ecx, %eax jz 0f mov %eax, %cr3 - mov %esp, ptproc + mov %ebp, ptproc movl $0, dirtypde 0: - popw %gs - popw %fs - popw %es - popw %ds - popal - add $4, %esp /* skip return adr FIXME unused value */ + /* reconstruct the stack for iret */ + movl SSREG(%ebp), %eax + push %eax + movl SPREG(%ebp), %eax + push %eax + movl PSWREG(%ebp), %eax + push %eax + movl CSREG(%ebp), %eax + push %eax + movl PCREG(%ebp), %eax + push %eax + + RESTORE_GP_REGS(%ebp) + + lldt P_LDT_SEL(%ebp) /* enable process' segment descriptors */ + RESTORE_SEGS(%ebp) + + movl %ss:BPREG(%ebp), %ebp + iret /* continue process */ /*===========================================================================*/ @@ -512,7 +526,50 @@ single_step_exception: EXCEPTION_NO_ERR_CODE(DEBUG_VECTOR) nmi: +#ifndef CONFIG_WATCHDOG EXCEPTION_NO_ERR_CODE(NMI_VECTOR) +#else + /* + * We have to be very careful as this interrupt can occur anytime. On + * the other hand, if it interrupts a user process, we will resume the + * same process which makes things a little simpler. We know that we are + * already on kernel stack whenever it happened and we can be + * conservative and save everything as we don't need to be extremely + * efficient as the interrupt is infrequent and some overhead is already + * expected. + */ + + /* + * save the important registers. We don't save %cs and %ss and they are + * saved and restored by CPU + */ + pushw %ds + pushw %es + pushw %fs + pushw %gs + pusha + + /* + * We cannot be sure about the state of the kernel segment register, + * however, we always set %ds and %es to the same as %ss + */ + mov %ss, %si + mov %si, %ds + mov %si, %es + + push %esp + call nmi_watchdog_handler + add $4, %esp + + /* restore all the important registers as they were before the trap */ + popa + popw %gs + popw %fs + popw %es + popw %ds + + iret +#endif breakpoint_exception: EXCEPTION_NO_ERR_CODE(BREAKPOINT_VECTOR) diff --git a/kernel/arch/i386/sconst.h b/kernel/arch/i386/sconst.h index d9669c643..efda211d0 100644 --- a/kernel/arch/i386/sconst.h +++ b/kernel/arch/i386/sconst.h @@ -90,7 +90,10 @@ #define RESTORE_KERNEL_SEGS \ mov %ss, %si ;\ mov %si, %ds ;\ - mov %si, %es ; + mov %si, %es ;\ + movw $0, %si ;\ + mov %si, %gs ;\ + mov %si, %fs ; #define SAVE_GP_REGS(pptr) \ mov %eax, %ss:AXREG(pptr) ;\ diff --git a/kernel/arch/i386/system.c b/kernel/arch/i386/system.c index 4a1a1979c..d9254a477 100644 --- a/kernel/arch/i386/system.c +++ b/kernel/arch/i386/system.c @@ -280,7 +280,7 @@ PRIVATE void ser_debug(int c) { int u = 0; - do_serial_debug++; + serial_debug_active = 1; /* Disable interrupts so that we get a consistent state. */ if(!intr_disabled()) { lock; u = 1; }; @@ -314,7 +314,7 @@ PRIVATE void ser_debug(int c) TOGGLECASE('9', VF_PICKPROC) #endif } - do_serial_debug--; + serial_debug_active = 0; if(u) { unlock; } } diff --git a/kernel/arch/i386/watchdog.c b/kernel/arch/i386/watchdog.c new file mode 100644 index 000000000..a9362d465 --- /dev/null +++ b/kernel/arch/i386/watchdog.c @@ -0,0 +1,133 @@ +#include "../../kernel.h" +#include "../../watchdog.h" + +#include "apic.h" + +#define CPUID_UNHALTED_CORE_CYCLES_AVAILABLE 0 + +#define MSR_PERFMON_CRT0 0xc1 +#define MSR_PERFMON_SEL0 0x186 + +#define MSR_PERFMON_SEL0_ENABLE (1 << 22) + +/* + * Intel architecture performance counters watchdog + */ + +PRIVATE void intel_arch_watchdog_init(int cpu) +{ + u32_t cpuf; + u32_t val; + + ia32_msr_write(MSR_PERFMON_CRT0, 0, 0); + + /* Int, OS, USR, Core ccyles */ + val = 1 << 20 | 1 << 17 | 1 << 16 | 0x3c; + ia32_msr_write(MSR_PERFMON_SEL0, 0, val); + + /* + * should give as a tick approx. every 0.5-1s, the perf counter has only + * lowest 31 bits writable :( + */ + cpuf = cpu_get_freq(cpu); + if (cpuf > 0x7fffffffU) + cpuf >>= 2; + watchdog->resetval = cpuf; + + ia32_msr_write(MSR_PERFMON_CRT0, 0, -cpuf); + + ia32_msr_write(MSR_PERFMON_SEL0, 0, val | MSR_PERFMON_SEL0_ENABLE); + + /* unmask the performance counter interrupt */ + lapic_write(LAPIC_LVTPCR, APIC_ICR_DM_NMI); +} + +PRIVATE void intel_arch_watchdog_reinit(int cpu) +{ + lapic_write(LAPIC_LVTPCR, APIC_ICR_DM_NMI); + ia32_msr_write(MSR_PERFMON_CRT0, 0, -watchdog->resetval); +} + +PRIVATE struct arch_watchdog intel_arch_watchdog = { + /*.init = */ intel_arch_watchdog_init, + /*.reinit = */ intel_arch_watchdog_reinit +}; + +int arch_watchdog_init(void) +{ + reg_t eax, ebx, ecx, edx; + + eax = 0xA; + + _cpuid(&eax, &ebx, &ecx, &edx); + + /* FIXME currently we support only watchdog base on the intel + * architectural performance counters. Some Intel CPUs don't have this + * feature + */ + if (ebx & (1 << CPUID_UNHALTED_CORE_CYCLES_AVAILABLE)) + return -1; + if (!((((eax >> 8)) & 0xff) > 0)) + return -1; + + watchdog = &intel_arch_watchdog; + + /* Setup PC tas NMI for watchdog, is is masked for now */ + lapic_write(LAPIC_LVTPCR, APIC_ICR_INT_MASK | APIC_ICR_DM_NMI); + lapic_read(LAPIC_LVTPCR); + + /* double check if LAPIC is enabled */ + if (lapic_addr && watchdog_enabled && watchdog->init) { + watchdog->init(cpuid); + } + + return 0; +} + +void arch_watchdog_lockup(struct nmi_frame * frame) +{ + kprintf("KERNEL LOCK UP\n" + "eax 0x%08x\n" + "ecx 0x%08x\n" + "edx 0x%08x\n" + "ebx 0x%08x\n" + "ebp 0x%08x\n" + "esi 0x%08x\n" + "edi 0x%08x\n" + "gs 0x%08x\n" + "fs 0x%08x\n" + "es 0x%08x\n" + "ds 0x%08x\n" + "pc 0x%08x\n" + "cs 0x%08x\n" + "eflags 0x%08x\n", + frame->eax, + frame->ecx, + frame->edx, + frame->ebx, + frame->ebp, + frame->esi, + frame->edi, + frame->gs, + frame->fs, + frame->es, + frame->ds, + frame->pc, + frame->cs, + frame->eflags + ); + minix_panic("Kernel lockup\n", NO_NUM); +} + +void i386_watchdog_start(void) +{ + if (watchdog_enabled) { + if (arch_watchdog_init()) { + kprintf("WARNING watchdog initialization " + "failed! Disabled\n"); + watchdog_enabled = 0; + } + else + BOOT_VERBOSE(kprintf("Watchdog enabled\n");); + } +} diff --git a/kernel/arch/i386/watchdog.h b/kernel/arch/i386/watchdog.h new file mode 100644 index 000000000..4d30ec537 --- /dev/null +++ b/kernel/arch/i386/watchdog.h @@ -0,0 +1,26 @@ +#ifndef __I386_WATCHDOG_H__ +#define __I386_WATCHDOG_H__ + +#include "../../kernel.h" + +struct nmi_frame { + reg_t eax; + reg_t ecx; + reg_t edx; + reg_t ebx; + reg_t esp; + reg_t ebp; + reg_t esi; + reg_t edi; + u16_t gs; + u16_t fs; + u16_t es; + u16_t ds; + reg_t pc; /* arch independent name for program counter */ + reg_t cs; + reg_t eflags; +}; + +void i386_watchdog_start(void); + +#endif /* __I386_WATCHDOG_H__ */ diff --git a/kernel/clock.c b/kernel/clock.c index 488b443b3..7bd28f635 100644 --- a/kernel/clock.c +++ b/kernel/clock.c @@ -38,6 +38,10 @@ #include "clock.h" +#ifdef CONFIG_WATCHDOG +#include "watchdog.h" +#endif + /* Function prototype for PRIVATE functions. */ FORWARD _PROTOTYPE( void init_clock, (void) ); @@ -232,6 +236,15 @@ PUBLIC int ap_timer_int_handler(void) IDLE_STOP; +#ifdef CONFIG_WATCHDOG + /* + * we need to know whether local timer ticks are happening or whether + * the kernel is locked up. We don't care about overflows as we only + * need to know that it's still ticking or not + */ + watchdog_local_timer_ticks++; +#endif + /* Update user and system accounting times. Charge the current process * for user time. If the current process is not billable, that is, if a * non-user process is running, charge the billable process for system diff --git a/kernel/glo.h b/kernel/glo.h index 72bd5722e..141f016c6 100644 --- a/kernel/glo.h +++ b/kernel/glo.h @@ -72,6 +72,11 @@ EXTERN u64_t idle_stop; EXTERN int idle_active; #endif +EXTERN unsigned cpu_hz[CONFIG_MAX_CPUS]; + +#define cpu_set_freq(cpu, freq) do {cpu_hz[cpu] = freq;} while (0) +#define cpu_get_freq(cpu) cpu_hz[cpu] + /* VM */ EXTERN int vm_running; EXTERN int catch_pagefaults; @@ -85,4 +90,6 @@ extern struct boot_image image[]; /* system image processes */ extern char *t_stack[]; /* task stack space */ extern struct segdesc_s gdt[]; /* global descriptor table */ +EXTERN volatile int serial_debug_active; + #endif /* GLO_H */ diff --git a/kernel/kernel.h b/kernel/kernel.h index f3971a4b9..97809c1d2 100644 --- a/kernel/kernel.h +++ b/kernel/kernel.h @@ -5,6 +5,14 @@ #define CONFIG_APIC /* boot verbose */ #define CONFIG_BOOT_VERBOSE +/* + * compile in the nmi watchdog by default. It is not enabled until watchdog=1 + * (non-zero) is set in monitor + */ +#define CONFIG_WATCHDOG +/* We only support 1 cpu now */ +#define CONFIG_MAX_CPUS 1 +#define cpuid 0 /* measure cumulative idle timestamp counter ticks */ #undef CONFIG_IDLE_TSC @@ -15,6 +23,12 @@ #define _MINIX 1 /* tell headers to include MINIX stuff */ #define _SYSTEM 1 /* tell headers that this is the kernel */ +/* + * we need the defines above in assembly files to configure the kernel + * correctly. However we don't need the rest + */ +#ifndef __ASSEMBLY__ + /* The following are so basic, all the *.c files get them automatically. */ #include /* global configuration, MUST be first */ #include /* C style: ANSI or K&R, MUST be second */ @@ -36,4 +50,6 @@ #include "profile.h" /* system profiling */ #include "debug.h" /* debugging, MUST be last kernel header */ +#endif /* __ASSEMBLY__ */ + #endif /* KERNEL_H */ diff --git a/kernel/start.c b/kernel/start.c index 17da976db..fff5b0535 100644 --- a/kernel/start.c +++ b/kernel/start.c @@ -8,6 +8,10 @@ #include #include "proto.h" +#ifdef CONFIG_WATCHDOG +#include "watchdog.h" +#endif + FORWARD _PROTOTYPE( char *get_value, (_CONST char *params, _CONST char *key)); /*===========================================================================* * cstart * @@ -84,6 +88,12 @@ U16_t parmoff, parmsize; /* boot parameters offset and length */ config_no_apic = 1; #endif +#ifdef CONFIG_WATCHDOG + value = get_value(params_buffer, "watchdog"); + if (value) + watchdog_enabled = atoi(value); +#endif + /* Return to assembler code to switch to protected mode (if 286), * reload selectors and call main(). */ diff --git a/kernel/watchdog.c b/kernel/watchdog.c new file mode 100644 index 000000000..d1807316d --- /dev/null +++ b/kernel/watchdog.c @@ -0,0 +1,52 @@ +/* + * This is arch independent NMI watchdog implementaion part. It is used to + * detect kernel lockups and help debugging. each architecture must add its own + * low level code that triggers periodic checks + */ + +#include "watchdog.h" + +unsigned watchdog_local_timer_ticks; +struct arch_watchdog *watchdog; +int watchdog_enabled; + +void nmi_watchdog_handler(struct nmi_frame * frame) +{ + /* FIXME this should be CPU local */ + static unsigned no_ticks; + static unsigned last_tick_count = (unsigned) -1; + + /* + * when debugging on serial console, printing takes a lot of time some + * times while the kernel is certainly not locked up. We don't want to + * report a lockup in such situation + */ + if (serial_debug_active) + goto reset_and_continue; + + if (last_tick_count != watchdog_local_timer_ticks) { + if (no_ticks == 1) { + kprintf("watchdog : kernel unlocked\n"); + no_ticks = 0; + } + /* we are still ticking, everything seems good */ + last_tick_count = watchdog_local_timer_ticks; + goto reset_and_continue; + } + + /* + * if watchdog_local_timer_ticks didn't changed since last time, give it + * some more time and only if it still dead, trigger the watchdog alarm + */ + if (++no_ticks < 10) { + if (no_ticks == 1) + kprintf("WARNING watchdog : possible kernel lockup\n"); + goto reset_and_continue; + } + + arch_watchdog_lockup(frame); + +reset_and_continue: + if (watchdog->reinit) + watchdog->reinit(cpuid); +} diff --git a/kernel/watchdog.h b/kernel/watchdog.h new file mode 100644 index 000000000..132fed9ac --- /dev/null +++ b/kernel/watchdog.h @@ -0,0 +1,36 @@ +#ifndef __WATCHDOG_H__ +#define __WATCHDOG_H__ + +#include "kernel.h" +#include "arch/i386/watchdog.h" + +extern int watchdog_enabled; /* if set to non-zero the watch dog is enabled */ +extern unsigned watchdog_local_timer_ticks; /* is timer still ticking? */ + +/* + * as the implementation is not only architecture dependent but like in x86 case + * very much model specific, we need to keep a collection of methods that + * implement it in runtime after the correct arch/model was detected + */ + +typedef void (* arch_watchdog_method_t)(int); + +struct arch_watchdog { + arch_watchdog_method_t init; /* initial setup */ + arch_watchdog_method_t reinit; /* reinitialization after a tick */ + unsigned resetval; +}; + +extern struct arch_watchdog *watchdog; + +/* let the arch code do whatever it needs to setup the watchdog */ +int arch_watchdog_init(void); +/* if the watchdog detects lockup, let the arch code to handle it */ +void arch_watchdog_lockup(struct nmi_frame * frame); + +/* generic NMI handler. Takes one agument which points to where the arch + * specific low level handler dumped CPU information and can be inspected by the + * arch specific code of the watchdog implementaion */ +void nmi_watchdog_handler(struct nmi_frame * frame); + +#endif /* __WATCHDOG_H__ */