NMI sampling

- if profile --nmi kernel uses NMI watchdog based sampling based on
  Intel architecture performance counters

- using NMI makes kernel profiling possible

- watchdog kernel lockup detection is disabled while sampling as we
  may get unpredictable interrupts in kernel and thus possibly many
  false positives

- if watchdog is not enabled at boot time, profiling enables it and
  turns it of again when done
This commit is contained in:
Tomas Hruby 2010-09-23 10:49:45 +00:00
parent 74c5cd7668
commit e63b85a50b
8 changed files with 197 additions and 45 deletions

View file

@ -18,7 +18,9 @@
* Intel architecture performance counters watchdog * Intel architecture performance counters watchdog
*/ */
PRIVATE void intel_arch_watchdog_init(int cpu) PRIVATE struct arch_watchdog intel_arch_watchdog;
PRIVATE void intel_arch_watchdog_init(const unsigned cpu)
{ {
u64_t cpuf; u64_t cpuf;
u32_t val; u32_t val;
@ -36,7 +38,7 @@ PRIVATE void intel_arch_watchdog_init(int cpu)
cpuf = cpu_get_freq(cpu); cpuf = cpu_get_freq(cpu);
while (cpuf.hi || cpuf.lo > 0x7fffffffU) while (cpuf.hi || cpuf.lo > 0x7fffffffU)
cpuf = div64u64(cpuf, 2); cpuf = div64u64(cpuf, 2);
watchdog->resetval = cpuf.lo; watchdog->resetval = watchdog->watchdog_resetval = cpuf.lo;
ia32_msr_write(MSR_PERFMON_CRT0, 0, -cpuf.lo); ia32_msr_write(MSR_PERFMON_CRT0, 0, -cpuf.lo);
@ -46,21 +48,21 @@ PRIVATE void intel_arch_watchdog_init(int cpu)
lapic_write(LAPIC_LVTPCR, APIC_ICR_DM_NMI); lapic_write(LAPIC_LVTPCR, APIC_ICR_DM_NMI);
} }
PRIVATE void intel_arch_watchdog_reinit(const int cpu) PRIVATE void intel_arch_watchdog_reinit(const unsigned cpu)
{ {
lapic_write(LAPIC_LVTPCR, APIC_ICR_DM_NMI); lapic_write(LAPIC_LVTPCR, APIC_ICR_DM_NMI);
ia32_msr_write(MSR_PERFMON_CRT0, 0, -watchdog->resetval); ia32_msr_write(MSR_PERFMON_CRT0, 0, -watchdog->resetval);
} }
PRIVATE struct arch_watchdog intel_arch_watchdog = { PUBLIC int arch_watchdog_init(void)
/*.init = */ intel_arch_watchdog_init,
/*.reinit = */ intel_arch_watchdog_reinit
};
int arch_watchdog_init(void)
{ {
u32_t eax, ebx, ecx, edx; u32_t eax, ebx, ecx, edx;
if (!lapic_addr) {
printf("ERROR : Cannot use NMI watchdog if APIC is not enabled\n");
return -1;
}
eax = 0xA; eax = 0xA;
_cpuid(&eax, &ebx, &ecx, &edx); _cpuid(&eax, &ebx, &ecx, &edx);
@ -81,14 +83,18 @@ int arch_watchdog_init(void)
(void) lapic_read(LAPIC_LVTPCR); (void) lapic_read(LAPIC_LVTPCR);
/* double check if LAPIC is enabled */ /* double check if LAPIC is enabled */
if (lapic_addr && watchdog_enabled && watchdog->init) { if (lapic_addr && watchdog->init) {
watchdog->init(cpuid); watchdog->init(cpuid);
} }
return 0; return 0;
} }
void arch_watchdog_lockup(const struct nmi_frame * frame) PUBLIC void arch_watchdog_stop(void)
{
}
PUBLIC void arch_watchdog_lockup(const struct nmi_frame * frame)
{ {
printf("KERNEL LOCK UP\n" printf("KERNEL LOCK UP\n"
"eax 0x%08x\n" "eax 0x%08x\n"
@ -123,15 +129,44 @@ void arch_watchdog_lockup(const struct nmi_frame * frame)
panic("Kernel lockup"); panic("Kernel lockup");
} }
void i386_watchdog_start(void) PUBLIC int i386_watchdog_start(void)
{ {
if (watchdog_enabled) {
if (arch_watchdog_init()) { if (arch_watchdog_init()) {
printf("WARNING watchdog initialization " printf("WARNING watchdog initialization "
"failed! Disabled\n"); "failed! Disabled\n");
watchdog_enabled = 0; watchdog_enabled = 0;
return -1;
} }
else else
BOOT_VERBOSE(printf("Watchdog enabled\n");); BOOT_VERBOSE(printf("Watchdog enabled\n"););
}
return 0;
} }
PRIVATE int intel_arch_watchdog_profile_init(const unsigned freq)
{
u64_t cpuf;
/* FIXME works only if all CPUs have the same freq */
cpuf = cpu_get_freq(cpuid);
cpuf = div64u64(cpuf, freq);
/*
* if freq is too low and the cpu freq too high we may get in a range of
* insane value which cannot be handled by the 31bit CPU perf counter
*/
if (cpuf.hi != 0 || cpuf.lo > 0x7fffffffU) {
printf("ERROR : nmi watchdog ticks exceed 31bits, use higher frequency\n");
return EINVAL;
}
watchdog->profile_resetval = cpuf.lo;
return OK;
}
PRIVATE struct arch_watchdog intel_arch_watchdog = {
/*.init = */ intel_arch_watchdog_init,
/*.reinit = */ intel_arch_watchdog_reinit,
/*.profile_init = */ intel_arch_watchdog_profile_init
};

View file

@ -21,6 +21,8 @@ struct nmi_frame {
reg_t eflags; reg_t eflags;
}; };
void i386_watchdog_start(void); _PROTOTYPE(int i386_watchdog_start, (void));
#define nmi_in_kernel(f) ((f)->cs == CS_SELECTOR)
#endif /* __I386_WATCHDOG_H__ */ #endif /* __I386_WATCHDOG_H__ */

View file

@ -1103,10 +1103,11 @@ PUBLIC int arch_enable_paging(struct proc * caller, const message * m_ptr)
#ifdef CONFIG_WATCHDOG #ifdef CONFIG_WATCHDOG
/* /*
* We make sure that we don't enable the watchdog until paging is turned * We make sure that we don't enable the watchdog until paging is turned
* on as we might get a NMI while switching and we might still use wrong * on as we might get an NMI while switching and we might still use wrong
* lapic address. Bad things would happen. It is unfortunate but such is * lapic address. Bad things would happen. It is unfortunate but such is
* life * life
*/ */
if (watchdog_enabled)
i386_watchdog_start(); i386_watchdog_start();
#endif #endif

View file

@ -26,6 +26,7 @@
#if SPROFILE #if SPROFILE
#include <string.h> #include <string.h>
#include "watchdog.h"
/* Function prototype for the profiling clock handler. */ /* Function prototype for the profiling clock handler. */
FORWARD _PROTOTYPE( int profile_clock_handler, (irq_hook_t *hook) ); FORWARD _PROTOTYPE( int profile_clock_handler, (irq_hook_t *hook) );
@ -90,25 +91,20 @@ PRIVATE sprof_save_proc(struct proc * p)
sprof_info.mem_used += sizeof(s); sprof_info.mem_used += sizeof(s);
} }
/*===========================================================================* PRIVATE void profile_sample(struct proc * p)
* profile_clock_handler *
*===========================================================================*/
PRIVATE int profile_clock_handler(irq_hook_t *hook)
{ {
struct proc * p;
/* This executes on every tick of the CMOS timer. */ /* This executes on every tick of the CMOS timer. */
/* Are we profiling, and profiling memory not full? */ /* Are we profiling, and profiling memory not full? */
if (!sprofiling || sprof_info.mem_used == -1) return (1); if (!sprofiling || sprof_info.mem_used == -1)
return;
/* Check if enough memory available before writing sample. */ /* Check if enough memory available before writing sample. */
if (sprof_info.mem_used + sizeof(sprof_info) > sprof_mem_size) { if (sprof_info.mem_used + sizeof(sprof_info) > sprof_mem_size) {
sprof_info.mem_used = -1; sprof_info.mem_used = -1;
return(1); return;
} }
p = get_cpulocal_var(proc_ptr);
if (!(p->p_misc_flags & MF_SPROF_SEEN)) { if (!(p->p_misc_flags & MF_SPROF_SEEN)) {
p->p_misc_flags |= MF_SPROF_SEEN; p->p_misc_flags |= MF_SPROF_SEEN;
sprof_save_proc(p); sprof_save_proc(p);
@ -126,6 +122,17 @@ PRIVATE int profile_clock_handler(irq_hook_t *hook)
} }
sprof_info.total_samples++; sprof_info.total_samples++;
}
/*===========================================================================*
* profile_clock_handler *
*===========================================================================*/
PRIVATE int profile_clock_handler(irq_hook_t *hook)
{
struct proc * p;
p = get_cpulocal_var(proc_ptr);
profile_sample(p);
/* Acknowledge interrupt if necessary. */ /* Acknowledge interrupt if necessary. */
arch_ack_profile_clock(); arch_ack_profile_clock();
@ -133,6 +140,25 @@ PRIVATE int profile_clock_handler(irq_hook_t *hook)
return(1); /* reenable interrupts */ return(1); /* reenable interrupts */
} }
PUBLIC void nmi_sprofile_handler(struct nmi_frame * frame)
{
/*
* test if the kernel was interrupted. If so, save first a sample fo
* kernel and than for the current process, otherwise save just the
* process
*/
if (nmi_in_kernel(frame)) {
struct proc *kern;
kern = proc_addr(KERNEL);
kern->p_reg.pc = frame->pc;
profile_sample(kern);
}
profile_sample(get_cpulocal_var(proc_ptr));
}
#endif /* SPROFILE */ #endif /* SPROFILE */
#if CPROFILE #if CPROFILE

View file

@ -5,12 +5,17 @@
#if SPROFILE /* statistical profiling */ #if SPROFILE /* statistical profiling */
#include "arch_watchdog.h"
EXTERN int sprofiling; /* whether profiling is running */ EXTERN int sprofiling; /* whether profiling is running */
EXTERN int sprofiling_type; /* whether profiling is running */
EXTERN int sprof_mem_size; /* available user memory for data */ EXTERN int sprof_mem_size; /* available user memory for data */
EXTERN struct sprof_info_s sprof_info; /* profiling info for user program */ EXTERN struct sprof_info_s sprof_info; /* profiling info for user program */
EXTERN vir_bytes sprof_data_addr_vir; /* user address to write data */ EXTERN vir_bytes sprof_data_addr_vir; /* user address to write data */
EXTERN endpoint_t sprof_ep; /* user process */ EXTERN endpoint_t sprof_ep; /* user process */
_PROTOTYPE(void nmi_sprofile_handler, (struct nmi_frame * frame));
#endif /* SPROFILE */ #endif /* SPROFILE */

View file

@ -34,6 +34,7 @@ PRIVATE clean_seen_flag(void)
PUBLIC int do_sprofile(struct proc * caller, message * m_ptr) PUBLIC int do_sprofile(struct proc * caller, message * m_ptr)
{ {
int proc_nr; int proc_nr;
int err;
switch(m_ptr->PROF_ACTION) { switch(m_ptr->PROF_ACTION) {
@ -66,7 +67,19 @@ PUBLIC int do_sprofile(struct proc * caller, message * m_ptr)
sprof_mem_size = m_ptr->PROF_MEM_SIZE; sprof_mem_size = m_ptr->PROF_MEM_SIZE;
switch (sprofiling_type = m_ptr->PROF_INTR_TYPE) {
case PROF_RTC:
init_profile_clock(m_ptr->PROF_FREQ); init_profile_clock(m_ptr->PROF_FREQ);
break;
case PROF_NMI:
err = nmi_watchdog_start_profiling(m_ptr->PROF_FREQ);
if (err)
return err;
break;
default:
printf("ERROR : unknown profiling interrupt type\n");
return EINVAL;
}
sprofiling = 1; sprofiling = 1;
@ -87,7 +100,14 @@ PUBLIC int do_sprofile(struct proc * caller, message * m_ptr)
sprofiling = 0; sprofiling = 0;
switch (sprofiling_type) {
case PROF_RTC:
stop_profile_clock(); stop_profile_clock();
break;
case PROF_NMI:
nmi_watchdog_stop_profiling();
break;
}
data_copy(KERNEL, (vir_bytes) &sprof_info, data_copy(KERNEL, (vir_bytes) &sprof_info,
sprof_ep, sprof_info_addr_vir, sizeof(sprof_info)); sprof_ep, sprof_info_addr_vir, sizeof(sprof_info));

View file

@ -6,12 +6,13 @@
#include "watchdog.h" #include "watchdog.h"
#include "arch/i386/glo.h" #include "arch/i386/glo.h"
#include "profile.h"
unsigned watchdog_local_timer_ticks = 0U; unsigned watchdog_local_timer_ticks = 0U;
struct arch_watchdog *watchdog; struct arch_watchdog *watchdog;
int watchdog_enabled; int watchdog_enabled;
void nmi_watchdog_handler(struct nmi_frame * frame) PRIVATE void lockup_check(struct nmi_frame * frame)
{ {
/* FIXME this should be CPU local */ /* FIXME this should be CPU local */
static unsigned no_ticks; static unsigned no_ticks;
@ -23,7 +24,7 @@ void nmi_watchdog_handler(struct nmi_frame * frame)
* report a lockup in such situation * report a lockup in such situation
*/ */
if (serial_debug_active) if (serial_debug_active)
goto reset_and_continue; return;
if (last_tick_count != watchdog_local_timer_ticks) { if (last_tick_count != watchdog_local_timer_ticks) {
if (no_ticks == 1) { if (no_ticks == 1) {
@ -32,7 +33,7 @@ void nmi_watchdog_handler(struct nmi_frame * frame)
} }
/* we are still ticking, everything seems good */ /* we are still ticking, everything seems good */
last_tick_count = watchdog_local_timer_ticks; last_tick_count = watchdog_local_timer_ticks;
goto reset_and_continue; return;
} }
/* /*
@ -42,12 +43,63 @@ void nmi_watchdog_handler(struct nmi_frame * frame)
if (++no_ticks < 10) { if (++no_ticks < 10) {
if (no_ticks == 1) if (no_ticks == 1)
printf("WARNING watchdog : possible kernel lockup\n"); printf("WARNING watchdog : possible kernel lockup\n");
goto reset_and_continue; return;
} }
/* if we get this far, the kernel is locked up */
arch_watchdog_lockup(frame); arch_watchdog_lockup(frame);
}
reset_and_continue: PUBLIC void nmi_watchdog_handler(struct nmi_frame * frame)
if (watchdog->reinit) {
/*
* Do not check for lockups while profiling, it is extremely likely that
* a false positive is detected if the frequency is high
*/
if (watchdog_enabled && !sprofiling)
lockup_check(frame);
if (sprofiling)
nmi_sprofile_handler(frame);
if ((watchdog_enabled || sprofiling) && watchdog->reinit)
watchdog->reinit(cpuid); watchdog->reinit(cpuid);
} }
int nmi_watchdog_start_profiling(const unsigned freq)
{
int err;
/* if watchdog hasn't been enabled, we must enable it now */
if (!watchdog_enabled) {
if (arch_watchdog_init())
return ENODEV;
}
if (!watchdog->profile_init) {
printf("WARNING NMI watchdog profiling not supported\n");
nmi_watchdog_stop_profiling();
return ENODEV;
}
err = watchdog->profile_init(freq);
if (err != OK)
return err;
watchdog->resetval = watchdog->profile_resetval;
return OK;
}
void nmi_watchdog_stop_profiling(void)
{
/*
* if we do not rearm the NMI source, we are done, if we want to keep
* the watchdog running, we reset is to its normal value
*/
if (watchdog)
watchdog->resetval = watchdog->watchdog_resetval;
if (!watchdog_enabled)
arch_watchdog_stop();
}

View file

@ -13,18 +13,23 @@ extern unsigned watchdog_local_timer_ticks; /* is timer still ticking? */
* implement it in runtime after the correct arch/model was detected * implement it in runtime after the correct arch/model was detected
*/ */
typedef void (* arch_watchdog_method_t)(int); typedef void (* arch_watchdog_method_t)(const unsigned);
typedef int (* arch_watchdog_profile_init_t)(const unsigned);
struct arch_watchdog { struct arch_watchdog {
arch_watchdog_method_t init; /* initial setup */ arch_watchdog_method_t init; /* initial setup */
arch_watchdog_method_t reinit; /* reinitialization after a tick */ arch_watchdog_method_t reinit; /* reinit after a tick */
arch_watchdog_profile_init_t profile_init;
unsigned resetval; unsigned resetval;
unsigned watchdog_resetval;
unsigned profile_resetval;
}; };
extern struct arch_watchdog *watchdog; extern struct arch_watchdog *watchdog;
/* let the arch code do whatever it needs to setup the watchdog */ /* let the arch code do whatever it needs to setup or quit the watchdog */
int arch_watchdog_init(void); int arch_watchdog_init(void);
void arch_watchdog_stop(void);
/* if the watchdog detects lockup, let the arch code to handle it */ /* if the watchdog detects lockup, let the arch code to handle it */
void arch_watchdog_lockup(const struct nmi_frame * frame); void arch_watchdog_lockup(const struct nmi_frame * frame);
@ -33,4 +38,10 @@ void arch_watchdog_lockup(const struct nmi_frame * frame);
* arch specific code of the watchdog implementaion */ * arch specific code of the watchdog implementaion */
void nmi_watchdog_handler(struct nmi_frame * frame); void nmi_watchdog_handler(struct nmi_frame * frame);
/*
* start and stop profiling using the NMI watchdog
*/
int nmi_watchdog_start_profiling(const unsigned freq);
void nmi_watchdog_stop_profiling(void);
#endif /* __WATCHDOG_H__ */ #endif /* __WATCHDOG_H__ */