From a665ae3de1b6f420ff48f03d5963351f22b66f69 Mon Sep 17 00:00:00 2001 From: Tomas Hruby Date: Sun, 19 Sep 2010 15:52:12 +0000 Subject: [PATCH] Userspace scheduling - exporting stats - contributed by Bjorn Swift - adds process accounting, for example counting the number of messages sent, how often the process was preemted and how much time it spent in the run queue. These statistics, along with the current cpu load, are sent back to the user-space scheduler in the Out Of Quantum message. - the user-space scheduler may choose to make use of these statistics when making scheduling decisions. For isntance the cpu load becomes especially useful when scheduling on multiple cores. --- commands/profile/sprofalyze.pl | 2 + include/minix/com.h | 14 ++++++- kernel/arch/i386/arch_clock.c | 49 ++++++++++++++++++++++++- kernel/clock.h | 1 + kernel/cpulocals.h | 5 +++ kernel/main.c | 2 + kernel/proc.c | 67 +++++++++++++++++++++++++++++++--- kernel/proc.h | 10 +++++ kernel/proto.h | 4 +- kernel/system/do_fork.c | 1 + 10 files changed, 146 insertions(+), 9 deletions(-) diff --git a/commands/profile/sprofalyze.pl b/commands/profile/sprofalyze.pl index 2971cf115..f0dec85ee 100755 --- a/commands/profile/sprofalyze.pl +++ b/commands/profile/sprofalyze.pl @@ -38,9 +38,11 @@ servers/rs/rs servers/sched/sched servers/vfs/vfs servers/vm/vm +servers/sched/sched commands/service/service drivers/ahci/ahci +drivers/acpi/acpi drivers/amddev/amddev drivers/at_wini/at_wini drivers/atl2/atl2 diff --git a/include/minix/com.h b/include/minix/com.h index 250e5e21a..53e36ce6b 100644 --- a/include/minix/com.h +++ b/include/minix/com.h @@ -1141,13 +1141,23 @@ #define SCHEDULING_BASE 0xF00 #define SCHEDULING_NO_QUANTUM (SCHEDULING_BASE+1) +# define SCHEDULING_ACNT_DEQS m9_l1 +# define SCHEDULING_ACNT_IPC_SYNC m9_l2 +# define SCHEDULING_ACNT_IPC_ASYNC m9_l3 +# define SCHEDULING_ACNT_PREEMPT m9_l4 +# define SCHEDULING_ACNT_QUEUE m9_l5 +# define SCHEDULING_ACNT_CPU m9_s1 +# define SCHEDULING_ACNT_CPU_LOAD m9_s2 +/* These are used for SYS_SCHEDULE, a reply to SCHEDULING_NO_QUANTUM */ # define SCHEDULING_ENDPOINT m9_l1 # define SCHEDULING_QUANTUM m9_l2 # define SCHEDULING_PRIORITY m9_s1 # define SCHEDULING_CPU m9_l4 -/* SCHEDULING_START uses _ENDPOINT, _PRIORITY and _QUANTUM from - * SCHEDULING_NO_QUANTUM */ +/* + * SCHEDULING_START uses _ENDPOINT, _PRIORITY and _QUANTUM from + * SCHEDULING_NO_QUANTUM/SYS_SCHEDULE + */ #define SCHEDULING_START (SCHEDULING_BASE+2) # define SCHEDULING_SCHEDULER m9_l1 /* Overrides _ENDPOINT on return*/ # define SCHEDULING_PARENT m9_l3 diff --git a/kernel/arch/i386/arch_clock.c b/kernel/arch/i386/arch_clock.c index e7aae786e..db5b65c9a 100644 --- a/kernel/arch/i386/arch_clock.c +++ b/kernel/arch/i386/arch_clock.c @@ -183,7 +183,12 @@ PUBLIC int register_local_timer_handler(const irq_handler_t handler) PUBLIC void cycles_accounting_init(void) { - read_tsc_64(get_cpulocal_var_ptr(tsc_ctr_switch)); + unsigned cpu = cpuid; + + read_tsc_64(get_cpu_var_ptr(cpu, tsc_ctr_switch)); + + make_zero64(get_cpu_var(cpu, cpu_last_tsc)); + make_zero64(get_cpu_var(cpu, cpu_last_idle)); } PUBLIC void context_stop(struct proc * p) @@ -273,3 +278,45 @@ PUBLIC u64_t ms_2_cpu_time(unsigned ms) { return mul64u(tsc_per_ms[cpuid], ms); } + +PUBLIC unsigned cpu_time_2_ms(u64_t cpu_time) +{ + return div64u(cpu_time, tsc_per_ms[cpuid]); +} + +PUBLIC short cpu_load(void) +{ + u64_t current_tsc, *current_idle; + u64_t tsc_delta, idle_delta, busy; + struct proc *idle; + short load; + unsigned cpu = cpuid; + + u64_t *last_tsc, *last_idle; + + last_tsc = get_cpu_var_ptr(cpu, cpu_last_tsc); + last_idle = get_cpu_var_ptr(cpu, cpu_last_idle); + + idle = get_cpu_var_ptr(cpu, idle_proc);; + read_tsc_64(¤t_tsc); + current_idle = &idle->p_cycles; /* ptr to idle proc */ + + /* calculate load since last cpu_load invocation */ + if (!is_zero64(*last_tsc)) { + tsc_delta = sub64(current_tsc, *last_tsc); + idle_delta = sub64(*current_idle, *last_idle); + + busy = sub64(tsc_delta, idle_delta); + busy = mul64(busy, make64(100, 0)); + load = div64(busy, tsc_delta).lo; + printf("CPULOAD %d\n", load); + + if (load > 100) + load = 100; + } else + load = 0; + + *last_tsc = current_tsc; + *last_idle = *current_idle; + return load; +} diff --git a/kernel/clock.h b/kernel/clock.h index 10813de79..8556d223d 100644 --- a/kernel/clock.h +++ b/kernel/clock.h @@ -17,5 +17,6 @@ _PROTOTYPE(void restart_local_timer, (void)); _PROTOTYPE(int register_local_timer_handler, (irq_handler_t handler)); _PROTOTYPE( u64_t ms_2_cpu_time, (unsigned ms)); +_PROTOTYPE( unsigned cpu_time_2_ms, (u64_t cpu_time)); #endif /* __CLOCK_H__ */ diff --git a/kernel/cpulocals.h b/kernel/cpulocals.h index 6c0437cf3..0d3c0ade4 100644 --- a/kernel/cpulocals.h +++ b/kernel/cpulocals.h @@ -78,6 +78,11 @@ DECLARE_CPULOCAL(int, cpu_is_idle); /* let the others know that you are idle */ DECLARE_CPULOCAL(u64_t ,tsc_ctr_switch); /* when did we switched time accounting */ +/* last values read from cpu when sending ooq msg to scheduler */ +DECLARE_CPULOCAL(u64_t, cpu_last_tsc); +DECLARE_CPULOCAL(u64_t, cpu_last_idle); + + DECLARE_CPULOCAL(char ,fpu_presence); /* whether the cpu has FPU or not */ DECLARE_CPULOCAL(struct proc * ,fpu_owner); /* who owns the FPU of the local cpu */ diff --git a/kernel/main.c b/kernel/main.c index d191ee9e0..1576502df 100644 --- a/kernel/main.c +++ b/kernel/main.c @@ -158,6 +158,8 @@ PUBLIC int main(void) ip->endpoint = rp->p_endpoint; /* ipc endpoint */ make_zero64(rp->p_cpu_time_left); strncpy(rp->p_name, ip->proc_name, P_NAME_LEN); /* set process name */ + + reset_proc_accounting(rp); /* See if this process is immediately schedulable. * In that case, set its privileges now and allow it to run. diff --git a/kernel/proc.c b/kernel/proc.c index f0289ea78..a3f339917 100644 --- a/kernel/proc.c +++ b/kernel/proc.c @@ -560,16 +560,24 @@ PUBLIC int do_ipc(reg_t r1, reg_t r2, reg_t r3) case RECEIVE: case NOTIFY: case SENDNB: + { + /* Process accounting for scheduling */ + caller_ptr->p_accounting.ipc_sync++; + return do_sync_ipc(caller_ptr, call_nr, (endpoint_t) r2, (message *) r3); + } case SENDA: { - /* + /* * Get and check the size of the argument in bytes as it is a * table */ size_t msg_size = (size_t) r2; + /* Process accounting for scheduling */ + caller_ptr->p_accounting.ipc_async++; + /* Limit size to something reasonable. An arbitrary choice is 16 * times the number of process table entries. */ @@ -1350,6 +1358,10 @@ PUBLIC void enqueue( } #endif + /* Make note of when this process was added to queue */ + read_tsc_64(&(get_cpulocal_var(proc_ptr)->p_accounting.enter_queue)); + + #if DEBUG_SANITYCHECKS assert(runqueues_ok_local()); #endif @@ -1394,6 +1406,14 @@ PRIVATE void enqueue_head(struct proc *rp) rp->p_nextready = rdy_head[q]; /* chain head of queue */ rdy_head[q] = rp; /* set new queue head */ + /* Make note of when this process was added to queue */ + read_tsc_64(&(get_cpulocal_var(proc_ptr->p_accounting.enter_queue))); + + + /* Process accounting for scheduling */ + rp->p_accounting.dequeues--; + rp->p_accounting.preempted++; + #if DEBUG_SANITYCHECKS assert(runqueues_ok_local()); #endif @@ -1402,7 +1422,7 @@ PRIVATE void enqueue_head(struct proc *rp) /*===========================================================================* * dequeue * *===========================================================================*/ -PUBLIC void dequeue(const struct proc *rp) +PUBLIC void dequeue(struct proc *rp) /* this process is no longer runnable */ { /* A process must be removed from the scheduling queues, for example, because @@ -1412,9 +1432,10 @@ PUBLIC void dequeue(const struct proc *rp) * This function can operate x-cpu as it always removes the process from the * queue of the cpu the process is currently assigned to. */ - register int q = rp->p_priority; /* queue to use */ - register struct proc **xpp; /* iterate over queue */ - register struct proc *prev_xp; + int q = rp->p_priority; /* queue to use */ + struct proc **xpp; /* iterate over queue */ + struct proc *prev_xp; + u64_t tsc, tsc_delta; struct proc **rdy_tail; @@ -1444,6 +1465,22 @@ PUBLIC void dequeue(const struct proc *rp) prev_xp = *xpp; /* save previous in chain */ } + + /* Process accounting for scheduling */ + rp->p_accounting.dequeues++; + + /* this is not all that accurate on virtual machines, especially with + IO bound processes that only spend a short amount of time in the queue + at a time. */ + if (!is_zero64(rp->p_accounting.enter_queue)) { + read_tsc_64(&tsc); + tsc_delta = sub64(tsc, rp->p_accounting.enter_queue); + rp->p_accounting.time_in_queue = add64(rp->p_accounting.time_in_queue, + tsc_delta); + make_zero64(rp->p_accounting.enter_queue); + } + + #if DEBUG_SANITYCHECKS assert(runqueues_ok_local()); #endif @@ -1561,6 +1598,16 @@ PRIVATE void notify_scheduler(struct proc *p) */ m_no_quantum.m_source = p->p_endpoint; m_no_quantum.m_type = SCHEDULING_NO_QUANTUM; + m_no_quantum.SCHEDULING_ACNT_QUEUE = cpu_time_2_ms(p->p_accounting.time_in_queue); + m_no_quantum.SCHEDULING_ACNT_DEQS = p->p_accounting.dequeues; + m_no_quantum.SCHEDULING_ACNT_IPC_SYNC = p->p_accounting.ipc_sync; + m_no_quantum.SCHEDULING_ACNT_IPC_ASYNC = p->p_accounting.ipc_async; + m_no_quantum.SCHEDULING_ACNT_PREEMPT = p->p_accounting.preempted; + m_no_quantum.SCHEDULING_ACNT_CPU = cpuid; + m_no_quantum.SCHEDULING_ACNT_CPU_LOAD = cpu_load(); + + /* Reset accounting */ + reset_proc_accounting(p); if ((err = mini_send(p, p->p_scheduler->p_endpoint, &m_no_quantum, FROM_KERNEL))) { @@ -1586,6 +1633,16 @@ PUBLIC void proc_no_time(struct proc * p) #endif } } + +PUBLIC void reset_proc_accounting(struct proc *p) +{ + p->p_accounting.preempted = 0; + p->p_accounting.ipc_sync = 0; + p->p_accounting.ipc_async = 0; + p->p_accounting.dequeues = 0; + make_zero64(p->p_accounting.time_in_queue); + make_zero64(p->p_accounting.enter_queue); +} PUBLIC void copr_not_available_handler(void) { diff --git a/kernel/proc.h b/kernel/proc.h index e1fa0fc88..0a9c78bc4 100644 --- a/kernel/proc.h +++ b/kernel/proc.h @@ -39,6 +39,16 @@ struct proc { run on */ #endif + /* Accounting statistics that get passed to the process' scheduler */ + struct { + u64_t enter_queue; /* time when enqueued (cycles) */ + u64_t time_in_queue; /* time spent in queue */ + unsigned long dequeues; + unsigned long ipc_sync; + unsigned long ipc_async; + unsigned long preempted; + } p_accounting; + struct mem_map p_memmap[NR_LOCAL_SEGS]; /* memory map (T, D, S) */ clock_t p_user_time; /* user time in ticks */ diff --git a/kernel/proto.h b/kernel/proto.h index 76725ca7c..167e78912 100644 --- a/kernel/proto.h +++ b/kernel/proto.h @@ -49,7 +49,7 @@ _PROTOTYPE( int do_ipc, (reg_t r1, reg_t r2, reg_t r3) ); _PROTOTYPE( void proc_init, (void) ); _PROTOTYPE( int mini_notify, (const struct proc *src, endpoint_t dst) ); _PROTOTYPE( void enqueue, (struct proc *rp) ); -_PROTOTYPE( void dequeue, (const struct proc *rp) ); +_PROTOTYPE( void dequeue, (struct proc *rp) ); _PROTOTYPE( void switch_to_user, (void) ); _PROTOTYPE( struct proc * arch_finish_switch_to_user, (void) ); _PROTOTYPE( struct proc *endpoint_lookup, (endpoint_t ep) ); @@ -61,6 +61,8 @@ _PROTOTYPE( int isokendpt_f, (endpoint_t e, int *p, int f) ); #define isokendpt_d(e, p, f) isokendpt_f((e), (p), (f)) #endif _PROTOTYPE( void proc_no_time, (struct proc *p)); +_PROTOTYPE( void reset_proc_accounting, (struct proc *p)); +_PROTOTYPE( void flag_account, (struct proc *p, int flag)); /* start.c */ _PROTOTYPE( void cstart, (u16_t cs, u16_t ds, u16_t mds, diff --git a/kernel/system/do_fork.c b/kernel/system/do_fork.c index e73f75487..12c1610f9 100644 --- a/kernel/system/do_fork.c +++ b/kernel/system/do_fork.c @@ -85,6 +85,7 @@ PUBLIC int do_fork(struct proc * caller, message * m_ptr) /* the child process is not runnable until it's scheduled. */ RTS_SET(rpc, RTS_NO_QUANTUM); + reset_proc_accounting(rpc); make_zero64(rpc->p_cpu_time_left); make_zero64(rpc->p_cycles);