NMI watchdog is an awesome feature for debugging locked up kernels.

There is not that much use for it on a single CPU, however, deadlock
between kernel and system task can be delected. Or a runaway loop.

If a kernel gets locked up the timer interrupts don't occure (as all
interrupts are disabled in kernel mode). The only chance is to
interrupt the kernel by a non-maskable interrupt.

This patch generates NMIs using performance counters. It uses the most
widely available performace counters. As the performance counters are 
highly model-specific this patch is not guaranteed to work on every
machine.  Unfortunately this is also true for KVM :-/ On the other
hand adding this feature for other models is not extremely difficult
and the framework makes it hopefully easy enough.

Depending on the frequency of the CPU an NMI is generated at most
about every 0.5s If the cpu's speed is less then 2Ghz it is generated
at most every 1s. In general an NMI is generated much less often as
the performance counter counts down only if the cpu is not idle.
Therefore the overhead of this feature is fairly minimal even if the
load is high.

Uppon detecting that the kernel is locked up the kernel dumps the 
state of the kernel registers and panics.

Local APIC must be enabled for the watchdog to work.

The code is _always_ compiled in, however, it is only enabled if  
watchdog=<non-zero> is set in the boot monitor.

One corner case is serial console debugging. As dumping a lot of stuff
to the serial link may take a lot of time, the watchdog does not 
detect lockups during this time!!! as it would result in too many
false positives. 10 nmi have to be handled before the lockup is
detected. This means something between ~5s to 10s.

Another corner case is that the watchdog is enabled only after the
paging is enabled as it would be pure madness to try to get it right.
This commit is contained in:
Tomas Hruby 2010-01-16 20:53:55 +00:00
parent a8b52644c4
commit 5efa92f754
17 changed files with 460 additions and 103 deletions

View file

@ -21,7 +21,8 @@ LDFLAGS=-i
HEAD = $a/mpx386.o HEAD = $a/mpx386.o
OBJS = start.o table.o main.o proc.o \ OBJS = start.o table.o main.o proc.o \
system.o clock.o utility.o debug.o profile.o interrupt.o system.o clock.o utility.o debug.o profile.o interrupt.o \
watchdog.o
SYSTEM = system.a SYSTEM = system.a
ARCHLIB = $a/$(ARCH).a ARCHLIB = $a/$(ARCH).a
LIBS = -ltimers -lsys LIBS = -ltimers -lsys

View file

@ -22,7 +22,8 @@ OBJS= arch_do_vmctl.o \
protect.o \ protect.o \
system.o \ system.o \
apic.o \ apic.o \
apic_asm.o apic_asm.o \
watchdog.o
CPPFLAGS=-Iinclude CPPFLAGS=-Iinclude
CFLAGS=$(CPPFLAGS) -Wall $(CPROFILE) CFLAGS=$(CPPFLAGS) -Wall $(CPROFILE)

View file

@ -21,73 +21,9 @@
#include "../../clock.h" #include "../../clock.h"
#include "glo.h" #include "glo.h"
#define APIC_ENABLE 0x100 #ifdef CONFIG_WATCHDOG
#define APIC_FOCUS (~(1 << 9)) #include "../../watchdog.h"
#define APIC_SIV 0xFF #endif
#define APIC_TDCR_2 0x00
#define APIC_TDCR_4 0x01
#define APIC_TDCR_8 0x02
#define APIC_TDCR_16 0x03
#define APIC_TDCR_32 0x08
#define APIC_TDCR_64 0x09
#define APIC_TDCR_128 0x0a
#define APIC_TDCR_1 0x0b
#define IS_SET(mask) (mask)
#define IS_CLEAR(mask) 0
#define APIC_LVTT_VECTOR_MASK 0x000000FF
#define APIC_LVTT_DS_PENDING (1 << 12)
#define APIC_LVTT_MASK (1 << 16)
#define APIC_LVTT_TM (1 << 17)
#define APIC_LVT_IIPP_MASK 0x00002000
#define APIC_LVT_IIPP_AH 0x00002000
#define APIC_LVT_IIPP_AL 0x00000000
#define APIC_LVT_TM_ONESHOT IS_CLEAR(APIC_LVTT_TM)
#define APIC_LVT_TM_PERIODIC IS_SET(APIC_LVTT_TM)
#define APIC_SVR_SWEN 0x00000100
#define APIC_SVR_FOCUS 0x00000200
#define IOAPIC_REGSEL 0x0
#define IOAPIC_RW 0x10
#define APIC_ICR_DM_MASK 0x00000700
#define APIC_ICR_VECTOR APIC_LVTT_VECTOR_MASK
#define APIC_ICR_DM_FIXED (0 << 8)
#define APIC_ICR_DM_LOWEST_PRIORITY (1 << 8)
#define APIC_ICR_DM_SMI (2 << 8)
#define APIC_ICR_DM_RESERVED (3 << 8)
#define APIC_ICR_DM_NMI (4 << 8)
#define APIC_ICR_DM_INIT (5 << 8)
#define APIC_ICR_DM_STARTUP (6 << 8)
#define APIC_ICR_DM_EXTINT (7 << 8)
#define APIC_ICR_DM_PHYSICAL (0 << 11)
#define APIC_ICR_DM_LOGICAL (1 << 11)
#define APIC_ICR_DELIVERY_PENDING (1 << 12)
#define APIC_ICR_INT_POLARITY (1 << 13)
#define APIC_ICR_INTPOL_LOW IS_SET(APIC_ICR_INT_POLARITY)
#define APIC_ICR_INTPOL_HIGH IS_CLEAR(APIC_ICR_INT_POLARITY)
#define APIC_ICR_LEVEL_ASSERT (1 << 14)
#define APIC_ICR_LEVEL_DEASSERT (0 << 14)
#define APIC_ICR_TRIGGER (1 << 15)
#define APIC_ICR_TM_LEVEL IS_CLEAR(APIC_ICR_TRIGGER)
#define APIC_ICR_TM_EDGE IS_CLEAR(APIC_ICR_TRIGGER)
#define APIC_ICR_INT_MASK (1 << 16)
#define APIC_ICR_DEST_FIELD (0 << 18)
#define APIC_ICR_DEST_SELF (1 << 18)
#define APIC_ICR_DEST_ALL (2 << 18)
#define APIC_ICR_DEST_ALL_BUT_SELF (3 << 18)
#define IA32_APIC_BASE 0x1b #define IA32_APIC_BASE 0x1b
#define IA32_APIC_BASE_ENABLE_BIT 11 #define IA32_APIC_BASE_ENABLE_BIT 11
@ -103,11 +39,6 @@
#define CONFIG_MAX_CPUS 1 #define CONFIG_MAX_CPUS 1
#define cpu_is_bsp(x) 1 #define cpu_is_bsp(x) 1
PRIVATE int cpuid(void)
{
return 0;
}
#define lapic_write_icr1(val) lapic_write(LAPIC_ICR1, val) #define lapic_write_icr1(val) lapic_write(LAPIC_ICR1, val)
#define lapic_write_icr2(val) lapic_write(LAPIC_ICR2, val) #define lapic_write_icr2(val) lapic_write(LAPIC_ICR2, val)
@ -226,12 +157,14 @@ PUBLIC void apic_calibrate_clocks(void)
lapic_delta = lapic_tctr0 - lapic_tctr1; lapic_delta = lapic_tctr0 - lapic_tctr1;
tsc_delta = sub64(tsc1, tsc0); tsc_delta = sub64(tsc1, tsc0);
lapic_bus_freq[cpuid()] = system_hz * lapic_delta / (PROBE_TICKS - 1); lapic_bus_freq[cpuid] = system_hz * lapic_delta / (PROBE_TICKS - 1);
BOOT_VERBOSE(kprintf("APIC bus freq %lu MHz\n", BOOT_VERBOSE(kprintf("APIC bus freq %lu MHz\n",
lapic_bus_freq[cpuid()] / 1000000)); lapic_bus_freq[cpuid] / 1000000));
cpu_freq = div64u(tsc_delta, PROBE_TICKS - 1) * system_hz; cpu_freq = div64u(tsc_delta, PROBE_TICKS - 1) * system_hz;
BOOT_VERBOSE(kprintf("CPU %d freq %lu MHz\n", cpuid(), BOOT_VERBOSE(kprintf("CPU %d freq %lu MHz\n", cpuid,
cpu_freq / 1000000)); cpu_freq / 1000000));
cpu_set_freq(cpuid, cpu_freq);
} }
PRIVATE void lapic_set_timer_one_shot(u32_t value) PRIVATE void lapic_set_timer_one_shot(u32_t value)
@ -239,7 +172,7 @@ PRIVATE void lapic_set_timer_one_shot(u32_t value)
/* sleep in micro seconds */ /* sleep in micro seconds */
u32_t lvtt; u32_t lvtt;
u32_t ticks_per_us; u32_t ticks_per_us;
u8_t cpu = cpuid (); u8_t cpu = cpuid;
ticks_per_us = lapic_bus_freq[cpu] / 1000000; ticks_per_us = lapic_bus_freq[cpu] / 1000000;
@ -259,7 +192,7 @@ PUBLIC void lapic_set_timer_periodic(unsigned freq)
/* sleep in micro seconds */ /* sleep in micro seconds */
u32_t lvtt; u32_t lvtt;
u32_t lapic_ticks_per_clock_tick; u32_t lapic_ticks_per_clock_tick;
u8_t cpu = cpuid(); u8_t cpu = cpuid;
lapic_ticks_per_clock_tick = lapic_bus_freq[cpu] / freq; lapic_ticks_per_clock_tick = lapic_bus_freq[cpu] / freq;
@ -267,7 +200,7 @@ PUBLIC void lapic_set_timer_periodic(unsigned freq)
lapic_write(LAPIC_TIMER_DCR, lvtt); lapic_write(LAPIC_TIMER_DCR, lvtt);
/* configure timer as periodic */ /* configure timer as periodic */
lvtt = APIC_LVT_TM_PERIODIC | APIC_TIMER_INT_VECTOR; lvtt = APIC_LVTT_TM | APIC_TIMER_INT_VECTOR;
lapic_write(LAPIC_LVTTR, lvtt); lapic_write(LAPIC_LVTTR, lvtt);
lapic_write(LAPIC_TIMER_ICR, lapic_ticks_per_clock_tick); lapic_write(LAPIC_TIMER_ICR, lapic_ticks_per_clock_tick);
@ -329,7 +262,7 @@ PRIVATE void lapic_enable_no_lints(void)
lapic_extint_assigned = (val & APIC_ICR_DM_MASK) == APIC_ICR_DM_EXTINT; lapic_extint_assigned = (val & APIC_ICR_DM_MASK) == APIC_ICR_DM_EXTINT;
val &= ~(APIC_ICR_DM_MASK|APIC_ICR_INT_MASK); val &= ~(APIC_ICR_DM_MASK|APIC_ICR_INT_MASK);
if (!ioapic_enabled && cpu_is_bsp(cpuid())) if (!ioapic_enabled && cpu_is_bsp(cpuid))
val |= (APIC_ICR_DM_EXTINT); /* ExtINT at LINT0 */ val |= (APIC_ICR_DM_EXTINT); /* ExtINT at LINT0 */
else else
val |= (APIC_ICR_DM_EXTINT|APIC_ICR_INT_MASK); /* Masked ExtINT at LINT0 */ val |= (APIC_ICR_DM_EXTINT|APIC_ICR_INT_MASK); /* Masked ExtINT at LINT0 */
@ -339,7 +272,7 @@ PRIVATE void lapic_enable_no_lints(void)
val = lapic_read(LAPIC_LINT1); val = lapic_read(LAPIC_LINT1);
val &= ~(APIC_ICR_DM_MASK|APIC_ICR_INT_MASK); val &= ~(APIC_ICR_DM_MASK|APIC_ICR_INT_MASK);
if (!ioapic_enabled && cpu_is_bsp(cpuid())) if (!ioapic_enabled && cpu_is_bsp(cpuid))
val |= APIC_ICR_DM_NMI; val |= APIC_ICR_DM_NMI;
else else
val |= (APIC_ICR_DM_NMI | APIC_ICR_INT_MASK); /* NMI at LINT1 */ val |= (APIC_ICR_DM_NMI | APIC_ICR_INT_MASK); /* NMI at LINT1 */
@ -378,7 +311,7 @@ PUBLIC int lapic_enable(void)
u32_t timeout = 0xFFFF; u32_t timeout = 0xFFFF;
u32_t errstatus = 0; u32_t errstatus = 0;
int i; int i;
unsigned cpu = cpuid (); unsigned cpu = cpuid;
if (!lapic_addr) if (!lapic_addr)
return 0; return 0;
@ -400,13 +333,14 @@ PUBLIC int lapic_enable(void)
/* Enable Local APIC and set the spurious vector to 0xff. */ /* Enable Local APIC and set the spurious vector to 0xff. */
val = lapic_read(LAPIC_SIVR) & 0xFFFFFF00; val = lapic_read(LAPIC_SIVR) & 0xFFFFFF00;
val |= (APIC_ENABLE | APIC_FOCUS | APIC_SPURIOUS_INT_VECTOR); val |= APIC_ENABLE | APIC_SPURIOUS_INT_VECTOR;
val &= ~APIC_FOCUS_DISABLED;
lapic_write(LAPIC_SIVR, val); lapic_write(LAPIC_SIVR, val);
lapic_read(LAPIC_SIVR); lapic_read(LAPIC_SIVR);
*((u32_t *)lapic_eoi_addr) = 0; *((u32_t *)lapic_eoi_addr) = 0;
cpu = cpuid (); cpu = cpuid;
/* Program Logical Destination Register. */ /* Program Logical Destination Register. */
val = lapic_read(LAPIC_LDR) & ~0xFF000000; val = lapic_read(LAPIC_LDR) & ~0xFF000000;
@ -514,7 +448,7 @@ PUBLIC void apic_idt_init(int reset)
} }
#ifdef CONFIG_APIC_DEBUG #ifdef CONFIG_APIC_DEBUG
if (cpu_is_bsp(cpuid())) if (cpu_is_bsp(cpuid))
kprintf("APIC debugging is enabled\n"); kprintf("APIC debugging is enabled\n");
lapic_set_dummy_handlers(); lapic_set_dummy_handlers();
#endif #endif
@ -528,7 +462,7 @@ PUBLIC void apic_idt_init(int reset)
idt_copy_vectors(gate_table_common); idt_copy_vectors(gate_table_common);
/* configure the timer interupt handler */ /* configure the timer interupt handler */
if (cpu_is_bsp(cpuid())) { if (cpu_is_bsp(cpuid)) {
local_timer_intr_handler = (vir_bytes) lapic_bsp_timer_int_handler; local_timer_intr_handler = (vir_bytes) lapic_bsp_timer_int_handler;
BOOT_VERBOSE(kprintf("Initiating BSP timer handler\n")); BOOT_VERBOSE(kprintf("Initiating BSP timer handler\n"));
} else { } else {

View file

@ -1,6 +1,61 @@
#ifndef __APIC_H__ #ifndef __APIC_H__
#define __APIC_H__ #define __APIC_H__
#define APIC_ENABLE 0x100
#define APIC_FOCUS_DISABLED (1 << 9)
#define APIC_SIV 0xFF
#define APIC_TDCR_2 0x00
#define APIC_TDCR_4 0x01
#define APIC_TDCR_8 0x02
#define APIC_TDCR_16 0x03
#define APIC_TDCR_32 0x08
#define APIC_TDCR_64 0x09
#define APIC_TDCR_128 0x0a
#define APIC_TDCR_1 0x0b
#define APIC_LVTT_VECTOR_MASK 0x000000FF
#define APIC_LVTT_DS_PENDING (1 << 12)
#define APIC_LVTT_MASK (1 << 16)
#define APIC_LVTT_TM (1 << 17)
#define APIC_LVT_IIPP_MASK 0x00002000
#define APIC_LVT_IIPP_AH 0x00002000
#define APIC_LVT_IIPP_AL 0x00000000
#define IOAPIC_REGSEL 0x0
#define IOAPIC_RW 0x10
#define APIC_ICR_DM_MASK 0x00000700
#define APIC_ICR_VECTOR APIC_LVTT_VECTOR_MASK
#define APIC_ICR_DM_FIXED (0 << 8)
#define APIC_ICR_DM_LOWEST_PRIORITY (1 << 8)
#define APIC_ICR_DM_SMI (2 << 8)
#define APIC_ICR_DM_RESERVED (3 << 8)
#define APIC_ICR_DM_NMI (4 << 8)
#define APIC_ICR_DM_INIT (5 << 8)
#define APIC_ICR_DM_STARTUP (6 << 8)
#define APIC_ICR_DM_EXTINT (7 << 8)
#define APIC_ICR_DM_PHYSICAL (0 << 11)
#define APIC_ICR_DM_LOGICAL (1 << 11)
#define APIC_ICR_DELIVERY_PENDING (1 << 12)
#define APIC_ICR_INT_POLARITY (1 << 13)
#define APIC_ICR_LEVEL_ASSERT (1 << 14)
#define APIC_ICR_LEVEL_DEASSERT (0 << 14)
#define APIC_ICR_TRIGGER (1 << 15)
#define APIC_ICR_INT_MASK (1 << 16)
#define APIC_ICR_DEST_FIELD (0 << 18)
#define APIC_ICR_DEST_SELF (1 << 18)
#define APIC_ICR_DEST_ALL (2 << 18)
#define APIC_ICR_DEST_ALL_BUT_SELF (3 << 18)
#define LOCAL_APIC_DEF_ADDR 0xfee00000 /* default local apic address */ #define LOCAL_APIC_DEF_ADDR 0xfee00000 /* default local apic address */
#define IO_APIC_DEF_ADDR 0xfec00000 /* default i/o apic address */ #define IO_APIC_DEF_ADDR 0xfec00000 /* default i/o apic address */

View file

@ -188,8 +188,8 @@ struct proc *t;
kprintf("\n%s\n", ep->msg); kprintf("\n%s\n", ep->msg);
kprintf("is_nested = %d ", is_nested); kprintf("is_nested = %d ", is_nested);
kprintf("vec_nr= %d, trap_errno= 0x%x, eip= 0x%x, cs= 0x%x, eflags= 0x%x\n", kprintf("vec_nr= %d, trap_errno= 0x%x, eip= 0x%x, cs= 0x%x, eflags= 0x%x trap_esp 0x%08x\n",
frame->vector, frame->errcode, frame->eip, frame->cs, frame->eflags); frame->vector, frame->errcode, frame->eip, frame->cs, frame->eflags, frame);
/* TODO should we enable this only when compiled for some debug mode? */ /* TODO should we enable this only when compiled for some debug mode? */
if (saved_proc) { if (saved_proc) {
kprintf("scheduled was: process %d (%s), ", proc_nr(saved_proc), saved_proc->p_name); kprintf("scheduled was: process %d (%s), ", proc_nr(saved_proc), saved_proc->p_name);

View file

@ -20,6 +20,9 @@
#ifdef CONFIG_APIC #ifdef CONFIG_APIC
#include "apic.h" #include "apic.h"
#ifdef CONFIG_WATCHDOG
#include "../../watchdog.h"
#endif
#endif #endif
PRIVATE int psok = 0; PRIVATE int psok = 0;
@ -1073,5 +1076,15 @@ PUBLIC int arch_enable_paging(void)
lapic_eoi_addr = LAPIC_EOI; lapic_eoi_addr = LAPIC_EOI;
} }
#endif #endif
#ifdef CONFIG_WATCHDOG
/*
* We make sure that we don't enable the watchdog until paging is turned
* on as we might get a NMI while switching and we might still use wrong
* lapic address. Bad things would happen. It is unfortunate but such is
* life
*/
level0(i386_watchdog_start);
#endif
return OK; return OK;
} }

View file

@ -42,6 +42,8 @@
* and modified by the boot monitor before the kernel starts. * and modified by the boot monitor before the kernel starts.
*/ */
#include "../../kernel.h" /* configures the kernel */
/* sections */ /* sections */
#include <sys/vm_i386.h> #include <sys/vm_i386.h>
@ -466,31 +468,43 @@ restart:
call schedcheck call schedcheck
/* %eax is set by schedcheck() to the process to run */ /* %eax is set by schedcheck() to the process to run */
mov %eax, %esp /* will assume P_STACKBASE == 0 */ mov %eax, %ebp /* will assume P_STACKBASE == 0 */
lldt P_LDT_SEL(%esp) /* enable process' segment descriptors */ cmpl $0, P_CR3(%ebp)
cmpl $0, P_CR3(%esp)
jz 0f jz 0f
/* /*
* test if the cr3 is loaded with the current value to avoid unnecessary * test if the cr3 is loaded with the current value to avoid unnecessary
* TLB flushes * TLB flushes
*/ */
mov P_CR3(%esp), %eax mov P_CR3(%ebp), %eax
mov %cr3, %ecx mov %cr3, %ecx
cmp %ecx, %eax cmp %ecx, %eax
jz 0f jz 0f
mov %eax, %cr3 mov %eax, %cr3
mov %esp, ptproc mov %ebp, ptproc
movl $0, dirtypde movl $0, dirtypde
0: 0:
popw %gs /* reconstruct the stack for iret */
popw %fs movl SSREG(%ebp), %eax
popw %es push %eax
popw %ds movl SPREG(%ebp), %eax
popal push %eax
add $4, %esp /* skip return adr FIXME unused value */ movl PSWREG(%ebp), %eax
push %eax
movl CSREG(%ebp), %eax
push %eax
movl PCREG(%ebp), %eax
push %eax
RESTORE_GP_REGS(%ebp)
lldt P_LDT_SEL(%ebp) /* enable process' segment descriptors */
RESTORE_SEGS(%ebp)
movl %ss:BPREG(%ebp), %ebp
iret /* continue process */ iret /* continue process */
/*===========================================================================*/ /*===========================================================================*/
@ -512,7 +526,50 @@ single_step_exception:
EXCEPTION_NO_ERR_CODE(DEBUG_VECTOR) EXCEPTION_NO_ERR_CODE(DEBUG_VECTOR)
nmi: nmi:
#ifndef CONFIG_WATCHDOG
EXCEPTION_NO_ERR_CODE(NMI_VECTOR) EXCEPTION_NO_ERR_CODE(NMI_VECTOR)
#else
/*
* We have to be very careful as this interrupt can occur anytime. On
* the other hand, if it interrupts a user process, we will resume the
* same process which makes things a little simpler. We know that we are
* already on kernel stack whenever it happened and we can be
* conservative and save everything as we don't need to be extremely
* efficient as the interrupt is infrequent and some overhead is already
* expected.
*/
/*
* save the important registers. We don't save %cs and %ss and they are
* saved and restored by CPU
*/
pushw %ds
pushw %es
pushw %fs
pushw %gs
pusha
/*
* We cannot be sure about the state of the kernel segment register,
* however, we always set %ds and %es to the same as %ss
*/
mov %ss, %si
mov %si, %ds
mov %si, %es
push %esp
call nmi_watchdog_handler
add $4, %esp
/* restore all the important registers as they were before the trap */
popa
popw %gs
popw %fs
popw %es
popw %ds
iret
#endif
breakpoint_exception: breakpoint_exception:
EXCEPTION_NO_ERR_CODE(BREAKPOINT_VECTOR) EXCEPTION_NO_ERR_CODE(BREAKPOINT_VECTOR)

View file

@ -90,7 +90,10 @@
#define RESTORE_KERNEL_SEGS \ #define RESTORE_KERNEL_SEGS \
mov %ss, %si ;\ mov %ss, %si ;\
mov %si, %ds ;\ mov %si, %ds ;\
mov %si, %es ; mov %si, %es ;\
movw $0, %si ;\
mov %si, %gs ;\
mov %si, %fs ;
#define SAVE_GP_REGS(pptr) \ #define SAVE_GP_REGS(pptr) \
mov %eax, %ss:AXREG(pptr) ;\ mov %eax, %ss:AXREG(pptr) ;\

View file

@ -280,7 +280,7 @@ PRIVATE void ser_debug(int c)
{ {
int u = 0; int u = 0;
do_serial_debug++; serial_debug_active = 1;
/* Disable interrupts so that we get a consistent state. */ /* Disable interrupts so that we get a consistent state. */
if(!intr_disabled()) { lock; u = 1; }; if(!intr_disabled()) { lock; u = 1; };
@ -314,7 +314,7 @@ PRIVATE void ser_debug(int c)
TOGGLECASE('9', VF_PICKPROC) TOGGLECASE('9', VF_PICKPROC)
#endif #endif
} }
do_serial_debug--; serial_debug_active = 0;
if(u) { unlock; } if(u) { unlock; }
} }

133
kernel/arch/i386/watchdog.c Normal file
View file

@ -0,0 +1,133 @@
#include "../../kernel.h"
#include "../../watchdog.h"
#include "apic.h"
#define CPUID_UNHALTED_CORE_CYCLES_AVAILABLE 0
#define MSR_PERFMON_CRT0 0xc1
#define MSR_PERFMON_SEL0 0x186
#define MSR_PERFMON_SEL0_ENABLE (1 << 22)
/*
* Intel architecture performance counters watchdog
*/
PRIVATE void intel_arch_watchdog_init(int cpu)
{
u32_t cpuf;
u32_t val;
ia32_msr_write(MSR_PERFMON_CRT0, 0, 0);
/* Int, OS, USR, Core ccyles */
val = 1 << 20 | 1 << 17 | 1 << 16 | 0x3c;
ia32_msr_write(MSR_PERFMON_SEL0, 0, val);
/*
* should give as a tick approx. every 0.5-1s, the perf counter has only
* lowest 31 bits writable :(
*/
cpuf = cpu_get_freq(cpu);
if (cpuf > 0x7fffffffU)
cpuf >>= 2;
watchdog->resetval = cpuf;
ia32_msr_write(MSR_PERFMON_CRT0, 0, -cpuf);
ia32_msr_write(MSR_PERFMON_SEL0, 0, val | MSR_PERFMON_SEL0_ENABLE);
/* unmask the performance counter interrupt */
lapic_write(LAPIC_LVTPCR, APIC_ICR_DM_NMI);
}
PRIVATE void intel_arch_watchdog_reinit(int cpu)
{
lapic_write(LAPIC_LVTPCR, APIC_ICR_DM_NMI);
ia32_msr_write(MSR_PERFMON_CRT0, 0, -watchdog->resetval);
}
PRIVATE struct arch_watchdog intel_arch_watchdog = {
/*.init = */ intel_arch_watchdog_init,
/*.reinit = */ intel_arch_watchdog_reinit
};
int arch_watchdog_init(void)
{
reg_t eax, ebx, ecx, edx;
eax = 0xA;
_cpuid(&eax, &ebx, &ecx, &edx);
/* FIXME currently we support only watchdog base on the intel
* architectural performance counters. Some Intel CPUs don't have this
* feature
*/
if (ebx & (1 << CPUID_UNHALTED_CORE_CYCLES_AVAILABLE))
return -1;
if (!((((eax >> 8)) & 0xff) > 0))
return -1;
watchdog = &intel_arch_watchdog;
/* Setup PC tas NMI for watchdog, is is masked for now */
lapic_write(LAPIC_LVTPCR, APIC_ICR_INT_MASK | APIC_ICR_DM_NMI);
lapic_read(LAPIC_LVTPCR);
/* double check if LAPIC is enabled */
if (lapic_addr && watchdog_enabled && watchdog->init) {
watchdog->init(cpuid);
}
return 0;
}
void arch_watchdog_lockup(struct nmi_frame * frame)
{
kprintf("KERNEL LOCK UP\n"
"eax 0x%08x\n"
"ecx 0x%08x\n"
"edx 0x%08x\n"
"ebx 0x%08x\n"
"ebp 0x%08x\n"
"esi 0x%08x\n"
"edi 0x%08x\n"
"gs 0x%08x\n"
"fs 0x%08x\n"
"es 0x%08x\n"
"ds 0x%08x\n"
"pc 0x%08x\n"
"cs 0x%08x\n"
"eflags 0x%08x\n",
frame->eax,
frame->ecx,
frame->edx,
frame->ebx,
frame->ebp,
frame->esi,
frame->edi,
frame->gs,
frame->fs,
frame->es,
frame->ds,
frame->pc,
frame->cs,
frame->eflags
);
minix_panic("Kernel lockup\n", NO_NUM);
}
void i386_watchdog_start(void)
{
if (watchdog_enabled) {
if (arch_watchdog_init()) {
kprintf("WARNING watchdog initialization "
"failed! Disabled\n");
watchdog_enabled = 0;
}
else
BOOT_VERBOSE(kprintf("Watchdog enabled\n"););
}
}

View file

@ -0,0 +1,26 @@
#ifndef __I386_WATCHDOG_H__
#define __I386_WATCHDOG_H__
#include "../../kernel.h"
struct nmi_frame {
reg_t eax;
reg_t ecx;
reg_t edx;
reg_t ebx;
reg_t esp;
reg_t ebp;
reg_t esi;
reg_t edi;
u16_t gs;
u16_t fs;
u16_t es;
u16_t ds;
reg_t pc; /* arch independent name for program counter */
reg_t cs;
reg_t eflags;
};
void i386_watchdog_start(void);
#endif /* __I386_WATCHDOG_H__ */

View file

@ -38,6 +38,10 @@
#include "clock.h" #include "clock.h"
#ifdef CONFIG_WATCHDOG
#include "watchdog.h"
#endif
/* Function prototype for PRIVATE functions. /* Function prototype for PRIVATE functions.
*/ */
FORWARD _PROTOTYPE( void init_clock, (void) ); FORWARD _PROTOTYPE( void init_clock, (void) );
@ -232,6 +236,15 @@ PUBLIC int ap_timer_int_handler(void)
IDLE_STOP; IDLE_STOP;
#ifdef CONFIG_WATCHDOG
/*
* we need to know whether local timer ticks are happening or whether
* the kernel is locked up. We don't care about overflows as we only
* need to know that it's still ticking or not
*/
watchdog_local_timer_ticks++;
#endif
/* Update user and system accounting times. Charge the current process /* Update user and system accounting times. Charge the current process
* for user time. If the current process is not billable, that is, if a * for user time. If the current process is not billable, that is, if a
* non-user process is running, charge the billable process for system * non-user process is running, charge the billable process for system

View file

@ -72,6 +72,11 @@ EXTERN u64_t idle_stop;
EXTERN int idle_active; EXTERN int idle_active;
#endif #endif
EXTERN unsigned cpu_hz[CONFIG_MAX_CPUS];
#define cpu_set_freq(cpu, freq) do {cpu_hz[cpu] = freq;} while (0)
#define cpu_get_freq(cpu) cpu_hz[cpu]
/* VM */ /* VM */
EXTERN int vm_running; EXTERN int vm_running;
EXTERN int catch_pagefaults; EXTERN int catch_pagefaults;
@ -85,4 +90,6 @@ extern struct boot_image image[]; /* system image processes */
extern char *t_stack[]; /* task stack space */ extern char *t_stack[]; /* task stack space */
extern struct segdesc_s gdt[]; /* global descriptor table */ extern struct segdesc_s gdt[]; /* global descriptor table */
EXTERN volatile int serial_debug_active;
#endif /* GLO_H */ #endif /* GLO_H */

View file

@ -5,6 +5,14 @@
#define CONFIG_APIC #define CONFIG_APIC
/* boot verbose */ /* boot verbose */
#define CONFIG_BOOT_VERBOSE #define CONFIG_BOOT_VERBOSE
/*
* compile in the nmi watchdog by default. It is not enabled until watchdog=1
* (non-zero) is set in monitor
*/
#define CONFIG_WATCHDOG
/* We only support 1 cpu now */
#define CONFIG_MAX_CPUS 1
#define cpuid 0
/* measure cumulative idle timestamp counter ticks */ /* measure cumulative idle timestamp counter ticks */
#undef CONFIG_IDLE_TSC #undef CONFIG_IDLE_TSC
@ -15,6 +23,12 @@
#define _MINIX 1 /* tell headers to include MINIX stuff */ #define _MINIX 1 /* tell headers to include MINIX stuff */
#define _SYSTEM 1 /* tell headers that this is the kernel */ #define _SYSTEM 1 /* tell headers that this is the kernel */
/*
* we need the defines above in assembly files to configure the kernel
* correctly. However we don't need the rest
*/
#ifndef __ASSEMBLY__
/* The following are so basic, all the *.c files get them automatically. */ /* The following are so basic, all the *.c files get them automatically. */
#include <minix/config.h> /* global configuration, MUST be first */ #include <minix/config.h> /* global configuration, MUST be first */
#include <ansi.h> /* C style: ANSI or K&R, MUST be second */ #include <ansi.h> /* C style: ANSI or K&R, MUST be second */
@ -36,4 +50,6 @@
#include "profile.h" /* system profiling */ #include "profile.h" /* system profiling */
#include "debug.h" /* debugging, MUST be last kernel header */ #include "debug.h" /* debugging, MUST be last kernel header */
#endif /* __ASSEMBLY__ */
#endif /* KERNEL_H */ #endif /* KERNEL_H */

View file

@ -8,6 +8,10 @@
#include <archconst.h> #include <archconst.h>
#include "proto.h" #include "proto.h"
#ifdef CONFIG_WATCHDOG
#include "watchdog.h"
#endif
FORWARD _PROTOTYPE( char *get_value, (_CONST char *params, _CONST char *key)); FORWARD _PROTOTYPE( char *get_value, (_CONST char *params, _CONST char *key));
/*===========================================================================* /*===========================================================================*
* cstart * * cstart *
@ -84,6 +88,12 @@ U16_t parmoff, parmsize; /* boot parameters offset and length */
config_no_apic = 1; config_no_apic = 1;
#endif #endif
#ifdef CONFIG_WATCHDOG
value = get_value(params_buffer, "watchdog");
if (value)
watchdog_enabled = atoi(value);
#endif
/* Return to assembler code to switch to protected mode (if 286), /* Return to assembler code to switch to protected mode (if 286),
* reload selectors and call main(). * reload selectors and call main().
*/ */

52
kernel/watchdog.c Normal file
View file

@ -0,0 +1,52 @@
/*
* This is arch independent NMI watchdog implementaion part. It is used to
* detect kernel lockups and help debugging. each architecture must add its own
* low level code that triggers periodic checks
*/
#include "watchdog.h"
unsigned watchdog_local_timer_ticks;
struct arch_watchdog *watchdog;
int watchdog_enabled;
void nmi_watchdog_handler(struct nmi_frame * frame)
{
/* FIXME this should be CPU local */
static unsigned no_ticks;
static unsigned last_tick_count = (unsigned) -1;
/*
* when debugging on serial console, printing takes a lot of time some
* times while the kernel is certainly not locked up. We don't want to
* report a lockup in such situation
*/
if (serial_debug_active)
goto reset_and_continue;
if (last_tick_count != watchdog_local_timer_ticks) {
if (no_ticks == 1) {
kprintf("watchdog : kernel unlocked\n");
no_ticks = 0;
}
/* we are still ticking, everything seems good */
last_tick_count = watchdog_local_timer_ticks;
goto reset_and_continue;
}
/*
* if watchdog_local_timer_ticks didn't changed since last time, give it
* some more time and only if it still dead, trigger the watchdog alarm
*/
if (++no_ticks < 10) {
if (no_ticks == 1)
kprintf("WARNING watchdog : possible kernel lockup\n");
goto reset_and_continue;
}
arch_watchdog_lockup(frame);
reset_and_continue:
if (watchdog->reinit)
watchdog->reinit(cpuid);
}

36
kernel/watchdog.h Normal file
View file

@ -0,0 +1,36 @@
#ifndef __WATCHDOG_H__
#define __WATCHDOG_H__
#include "kernel.h"
#include "arch/i386/watchdog.h"
extern int watchdog_enabled; /* if set to non-zero the watch dog is enabled */
extern unsigned watchdog_local_timer_ticks; /* is timer still ticking? */
/*
* as the implementation is not only architecture dependent but like in x86 case
* very much model specific, we need to keep a collection of methods that
* implement it in runtime after the correct arch/model was detected
*/
typedef void (* arch_watchdog_method_t)(int);
struct arch_watchdog {
arch_watchdog_method_t init; /* initial setup */
arch_watchdog_method_t reinit; /* reinitialization after a tick */
unsigned resetval;
};
extern struct arch_watchdog *watchdog;
/* let the arch code do whatever it needs to setup the watchdog */
int arch_watchdog_init(void);
/* if the watchdog detects lockup, let the arch code to handle it */
void arch_watchdog_lockup(struct nmi_frame * frame);
/* generic NMI handler. Takes one agument which points to where the arch
* specific low level handler dumped CPU information and can be inspected by the
* arch specific code of the watchdog implementaion */
void nmi_watchdog_handler(struct nmi_frame * frame);
#endif /* __WATCHDOG_H__ */