scheduling - time quantum in miliseconds

- Currently the cpu time quantum is timer-ticks based. Thus the
  remaining quantum is decreased only if the processes is interrupted
  by a timer tick. As processes block a lot this typically does not
  happen for normal user processes. Also the quantum depends on the
  frequency of the timer.

- This change makes the quantum miliseconds based. Internally the
  miliseconds are translated into cpu cycles. Everytime userspace
  execution is interrupted by kernel the cycles just consumed by the
  current process are deducted from the remaining quantum.

- It makes the quantum system timer frequency independent.

- The boot processes quantum is loosely derived from the tick-based
  quantas and 60Hz timer and subject to future change

- the 64bit arithmetics is a little ugly, will be changes once we have
  compiler support for 64bit integers (soon)
This commit is contained in:
Tomas Hruby 2010-05-25 08:06:14 +00:00
parent ac14a989b3
commit 451a6890d6
17 changed files with 188 additions and 102 deletions

View file

@ -35,4 +35,7 @@ unsigned long ex64lo(u64_t i);
unsigned long ex64hi(u64_t i);
u64_t make64(unsigned long lo, unsigned long hi);
#define is_zero64(i) ((i).lo == 0 && (i).hi == 0)
#define make_zero64(i) do { (i).lo = (i).hi = 0; } while(0)
#endif /* _MINIX__U64_H */

View file

@ -106,7 +106,7 @@ PRIVATE void apic_calibrate_clocks(void)
{
u32_t lvtt, val, lapic_delta;
u64_t tsc_delta;
u32_t cpu_freq;
u64_t cpu_freq;
irq_hook_t calib_clk;
@ -162,11 +162,9 @@ PRIVATE void apic_calibrate_clocks(void)
lapic_bus_freq[cpuid] = system_hz * lapic_delta / (PROBE_TICKS - 1);
BOOT_VERBOSE(printf("APIC bus freq %lu MHz\n",
lapic_bus_freq[cpuid] / 1000000));
cpu_freq = div64u(tsc_delta, PROBE_TICKS - 1) * system_hz;
BOOT_VERBOSE(printf("CPU %d freq %lu MHz\n", cpuid,
cpu_freq / 1000000));
cpu_freq = mul64(div64u64(tsc_delta, PROBE_TICKS - 1), make64(system_hz, 0));
cpu_set_freq(cpuid, cpu_freq);
BOOT_VERBOSE(cpu_print_freq(cpuid));
}
PRIVATE void lapic_set_timer_one_shot(const u32_t value)

View file

@ -30,6 +30,12 @@ PRIVATE u64_t tsc_ctr_switch; /* when did we switched time accounting */
PRIVATE irq_hook_t pic_timer_hook; /* interrupt handler hook */
PRIVATE unsigned probe_ticks;
PRIVATE u64_t tsc0, tsc1;
#define PROBE_TICKS (system_hz / 10)
PRIVATE unsigned tsc_per_ms[CONFIG_MAX_CPUS];
/*===========================================================================*
* init_8235A_timer *
*===========================================================================*/
@ -77,12 +83,64 @@ PRIVATE clock_t read_8253A_timer(void)
return count;
}
PRIVATE int calib_cpu_handler(irq_hook_t * UNUSED(hook))
{
u64_t tsc;
probe_ticks++;
read_tsc_64(&tsc);
if (probe_ticks == 1) {
tsc0 = tsc;
}
else if (probe_ticks == PROBE_TICKS) {
tsc1 = tsc;
}
return 1;
}
PRIVATE void estimate_cpu_freq(void)
{
u64_t tsc_delta;
u64_t cpu_freq;
irq_hook_t calib_cpu;
/* set the probe, we use the legacy timer, IRQ 0 */
put_irq_handler(&calib_cpu, CLOCK_IRQ, calib_cpu_handler);
/* set the PIC timer to get some time */
intr_enable();
init_8253A_timer(system_hz);
/* loop for some time to get a sample */
while(probe_ticks < PROBE_TICKS) {
intr_enable();
}
intr_disable();
stop_8253A_timer();
/* remove the probe */
rm_irq_handler(&calib_cpu);
tsc_delta = sub64(tsc1, tsc0);
cpu_freq = mul64(div64u64(tsc_delta, PROBE_TICKS - 1), make64(system_hz, 0));
cpu_set_freq(cpuid, cpu_freq);
BOOT_VERBOSE(cpu_print_freq(cpuid));
}
PUBLIC int arch_init_local_timer(unsigned freq)
{
#ifdef CONFIG_APIC
/* if we know the address, lapic is enabled and we should use it */
if (lapic_addr) {
unsigned cpu = cpuid;
lapic_set_timer_periodic(freq);
tsc_per_ms[cpu] = div64u(cpu_get_freq(cpu), 1000);
} else
{
BOOT_VERBOSE(printf("Initiating legacy i8253 timer\n"));
@ -90,6 +148,9 @@ PUBLIC int arch_init_local_timer(unsigned freq)
{
#endif
init_8253A_timer(freq);
estimate_cpu_freq();
/* always only 1 cpu in the system */
tsc_per_ms[0] = div64u(cpu_get_freq(0), 1000);
}
return 0;
@ -133,14 +194,40 @@ PUBLIC void cycles_accounting_init(void)
PUBLIC void context_stop(struct proc * p)
{
u64_t tsc;
u64_t tsc, tsc_delta;
read_tsc_64(&tsc);
p->p_cycles = add64(p->p_cycles, sub64(tsc, tsc_ctr_switch));
tsc_delta = sub64(tsc, tsc_ctr_switch);
p->p_cycles = add64(p->p_cycles, tsc_delta);
tsc_ctr_switch = tsc;
/*
* deduct the just consumed cpu cycles from the cpu time left for this
* process during its current quantum. Skip IDLE and other pseudo kernel
* tasks
*/
if (p->p_endpoint >= 0) {
#if DEBUG_RACE
make_zero64(p->p_cpu_time_left);
#else
/* if (tsc_delta < p->p_cpu_time_left) in 64bit */
if (tsc_delta.hi < p->p_cpu_time_left.hi ||
(tsc_delta.hi == p->p_cpu_time_left.hi &&
tsc_delta.lo < p->p_cpu_time_left.lo))
p->p_cpu_time_left = sub64(p->p_cpu_time_left, tsc_delta);
else {
make_zero64(p->p_cpu_time_left);
}
#endif
}
}
PUBLIC void context_stop_idle(void)
{
context_stop(proc_addr(IDLE));
}
PUBLIC u64_t ms_2_cpu_time(unsigned ms)
{
return mul64u(tsc_per_ms[cpuid], ms);
}

View file

@ -2,6 +2,7 @@
#include "kernel/watchdog.h"
#include "proto.h"
#include <minix/minlib.h>
#include <minix/u64.h>
#include "apic.h"
@ -18,7 +19,7 @@
PRIVATE void intel_arch_watchdog_init(int cpu)
{
u32_t cpuf;
u64_t cpuf;
u32_t val;
ia32_msr_write(MSR_PERFMON_CRT0, 0, 0);
@ -32,11 +33,11 @@ PRIVATE void intel_arch_watchdog_init(int cpu)
* lowest 31 bits writable :(
*/
cpuf = cpu_get_freq(cpu);
if (cpuf > 0x7fffffffU)
cpuf >>= 2;
watchdog->resetval = cpuf;
while (cpuf.hi || cpuf.lo > 0x7fffffffU)
cpuf = div64u64(cpuf, 2);
watchdog->resetval = cpuf.lo;
ia32_msr_write(MSR_PERFMON_CRT0, 0, -cpuf);
ia32_msr_write(MSR_PERFMON_CRT0, 0, -cpuf.lo);
ia32_msr_write(MSR_PERFMON_SEL0, 0, val | MSR_PERFMON_SEL0_ENABLE);

View file

@ -63,6 +63,7 @@ PRIVATE clock_t realtime = 0; /* real time clock */
* The boot processor timer interrupt handler. In addition to non-boot cpus it
* keeps real time and notifies the clock task if need be
*/
extern unsigned ooq_msg;
PUBLIC int bsp_timer_int_handler(void)
{
unsigned ticks;
@ -76,7 +77,6 @@ PUBLIC int bsp_timer_int_handler(void)
realtime += ticks;
ap_timer_int_handler();
assert(!proc_is_runnable(proc_ptr) || proc_ptr->p_ticks_left > 0);
/* if a timer expired, notify the clock task */
if ((next_timeout <= realtime)) {
@ -201,14 +201,7 @@ PUBLIC int ap_timer_int_handler(void)
p->p_user_time += ticks;
#if DEBUG_RACE
/* With DEBUG_RACE, every process gets interrupted. */
p->p_ticks_left = 0;
#else
if (priv(p)->s_flags & PREEMPTIBLE) {
p->p_ticks_left -= ticks;
}
#endif
/* FIXME make this ms too */
if (! (priv(p)->s_flags & BILLABLE)) {
billp->p_sys_time += ticks;
}
@ -243,9 +236,6 @@ PUBLIC int ap_timer_int_handler(void)
/* Update load average. */
load_update();
/* check if the processes still have some ticks left */
check_ticks_left(p);
return 1;
}

View file

@ -13,4 +13,6 @@ _PROTOTYPE(int arch_init_local_timer, (unsigned freq));
_PROTOTYPE(void arch_stop_local_timer, (void));
_PROTOTYPE(int arch_register_local_timer_handler, (irq_handler_t handler));
_PROTOTYPE( u64_t ms_2_cpu_time, (unsigned ms));
#endif /* __CLOCK_H__ */

View file

@ -58,7 +58,7 @@ EXTERN int verboseflags;
EXTERN int config_no_apic; /* optionaly turn off apic */
#endif
EXTERN unsigned cpu_hz[CONFIG_MAX_CPUS];
EXTERN u64_t cpu_hz[CONFIG_MAX_CPUS];
#define cpu_set_freq(cpu, freq) do {cpu_hz[cpu] = freq;} while (0)
#define cpu_get_freq(cpu) cpu_hz[cpu]

View file

@ -15,6 +15,7 @@
#include <a.out.h>
#include <minix/com.h>
#include <minix/endpoint.h>
#include <minix/u64.h>
#include "proc.h"
#include "debug.h"
#include "clock.h"
@ -81,8 +82,8 @@ PUBLIC void main(void)
ip->endpoint = rp->p_endpoint; /* ipc endpoint */
rp->p_scheduler = NULL; /* no user space scheduler */
rp->p_priority = ip->priority; /* current priority */
rp->p_quantum_size = ip->quantum; /* quantum size in ticks */
rp->p_ticks_left = ip->quantum; /* current credit */
rp->p_quantum_size_ms = ip->quantum; /* quantum size */
make_zero64(rp->p_cpu_time_left);
strncpy(rp->p_name, ip->proc_name, P_NAME_LEN); /* set process name */
/* See if this process is immediately schedulable.

View file

@ -40,6 +40,7 @@
#include "kernel.h"
#include "proc.h"
#include "vm.h"
#include "clock.h"
/* Scheduling and message passing functions */
FORWARD _PROTOTYPE( void idle, (void));
@ -112,7 +113,6 @@ PUBLIC void switch_to_user(void)
*/
if (proc_is_runnable(proc_ptr))
goto check_misc_flags;
/*
* if a process becomes not runnable while handling the misc flags, we
* need to pick a new one here and start from scratch. Also if the
@ -121,18 +121,12 @@ PUBLIC void switch_to_user(void)
not_runnable_pick_new:
if (proc_is_preempted(proc_ptr)) {
proc_ptr->p_rts_flags &= ~RTS_PREEMPTED;
if (proc_is_runnable(proc_ptr))
enqueue_head(proc_ptr);
}
/*
* If this process is scheduled by the kernel, we renew it's quantum
* and remove it's RTS_NO_QUANTUM flag.
*/
if (proc_no_quantum(proc_ptr) && proc_kernel_scheduler(proc_ptr)) {
/* give new quantum */
proc_ptr->p_ticks_left = proc_ptr->p_quantum_size;
RTS_UNSET(proc_ptr, RTS_NO_QUANTUM);
if (proc_is_runnable(proc_ptr)) {
if (!is_zero64(proc_ptr->p_cpu_time_left))
enqueue_head(proc_ptr);
else
enqueue(proc_ptr);
}
}
/*
@ -154,7 +148,6 @@ check_misc_flags:
assert(proc_ptr);
assert(proc_is_runnable(proc_ptr));
assert(proc_ptr->p_ticks_left > 0);
while (proc_ptr->p_misc_flags &
(MF_KCALL_RESUME | MF_DELIVERMSG |
MF_SC_DEFER | MF_SC_TRACE | MF_SC_ACTIVE)) {
@ -218,6 +211,13 @@ check_misc_flags:
if (!proc_is_runnable(proc_ptr))
break;
}
/*
* check the quantum left before it runs again. We must do it only here
* as we are sure that a possible out-of-quantum message to the
* scheduler will not collide with the regular ipc
*/
if (is_zero64(proc_ptr->p_cpu_time_left))
proc_no_time(proc_ptr);
/*
* After handling the misc flags the selected process might not be
* runnable anymore. We have to checkit and schedule another one
@ -233,7 +233,7 @@ check_misc_flags:
proc_ptr = arch_finish_switch_to_user();
assert(proc_ptr->p_ticks_left > 0);
assert(!is_zero64(proc_ptr->p_cpu_time_left));
context_stop(proc_addr(KERNEL));
@ -1206,7 +1206,7 @@ PRIVATE void enqueue_head(struct proc *rp)
* the process was runnable without its quantum expired when dequeued. A
* process with no time left should vahe been handled else and differently
*/
assert(rp->p_ticks_left > 0);
assert(!is_zero64(rp->p_cpu_time_left));
assert(q >= 0);
@ -1394,6 +1394,11 @@ const int fatalflag;
PRIVATE void notify_scheduler(struct proc *p)
{
message m_no_quantum;
int err;
assert(!proc_kernel_scheduler(p));
/* dequeue the process */
RTS_SET(p, RTS_NO_QUANTUM);
/*
@ -1401,46 +1406,30 @@ PRIVATE void notify_scheduler(struct proc *p)
* quantum. This is done by sending a message to the scheduler
* on the process's behalf
*/
if (proc_kernel_scheduler(p)) {
/*
* If a scheduler is scheduling itself or has no scheduler, and
* runs out of quantum, we don't send a message. The
* RTS_NO_QUANTUM flag will be removed in switch_to_user.
*/
m_no_quantum.m_source = p->p_endpoint;
m_no_quantum.m_type = SCHEDULING_NO_QUANTUM;
if ((err = mini_send(p, p->p_scheduler->p_endpoint,
&m_no_quantum, FROM_KERNEL))) {
panic("WARNING: Scheduling: mini_send returned %d\n", err);
}
}
PUBLIC void proc_no_time(struct proc * p)
{
if (!proc_kernel_scheduler(p) && priv(p)->s_flags & PREEMPTIBLE) {
/* this dequeues the process */
notify_scheduler(p);
}
else {
message m_no_quantum;
int err;
m_no_quantum.m_source = p->p_endpoint;
m_no_quantum.m_type = SCHEDULING_NO_QUANTUM;
if ((err = mini_send(p, p->p_scheduler->p_endpoint,
&m_no_quantum, FROM_KERNEL))) {
panic("WARNING: Scheduling: mini_send returned %d\n", err);
}
}
}
PUBLIC void check_ticks_left(struct proc * p)
{
if (p->p_ticks_left <= 0) {
p->p_ticks_left = 0;
if (priv(p)->s_flags & PREEMPTIBLE) {
/* this dequeues the process */
notify_scheduler(p);
}
else {
/*
* non-preemptible processes only need their quantum to
* be renewed. In fact, they by pass scheduling
*/
p->p_ticks_left = p->p_quantum_size;
/*
* non-preemptible processes only need their quantum to
* be renewed. In fact, they by pass scheduling
*/
p->p_cpu_time_left = ms_2_cpu_time(p->p_quantum_size_ms);
#if DEBUG_RACE
RTS_SET(proc_ptr, RTS_PREEMPTED);
RTS_UNSET(proc_ptr, RTS_PREEMPTED);
RTS_SET(proc_ptr, RTS_PREEMPTED);
RTS_UNSET(proc_ptr, RTS_PREEMPTED);
#endif
}
}
}

View file

@ -26,8 +26,9 @@ struct proc {
short p_misc_flags; /* flags that do not suspend the process */
char p_priority; /* current process priority */
char p_ticks_left; /* number of scheduling ticks left */
char p_quantum_size; /* quantum size in ticks */
u64_t p_cpu_time_left; /* time left to use the cpu */
unsigned p_quantum_size_ms; /* assigned time quantum in ms
FIXME remove this */
struct proc *p_scheduler; /* who should get out of quantum msg */
struct mem_map p_memmap[NR_LOCAL_SEGS]; /* memory map (T, D, S) */

View file

@ -50,7 +50,7 @@ _PROTOTYPE( int isokendpt_f, (const char *file, int line, endpoint_t e, int *p,
_PROTOTYPE( int isokendpt_f, (endpoint_t e, int *p, int f) );
#define isokendpt_d(e, p, f) isokendpt_f((e), (p), (f))
#endif
_PROTOTYPE( void check_ticks_left, (struct proc *p));
_PROTOTYPE( void proc_no_time, (struct proc *p));
/* start.c */
_PROTOTYPE( void cstart, (u16_t cs, u16_t ds, u16_t mds,
@ -184,4 +184,7 @@ _PROTOTYPE( int copy_msg_to_user, (struct proc * p, message * src,
message * user_mbuf));
_PROTOTYPE(void switch_address_space, (struct proc * p));
_PROTOTYPE(void release_address_space, (struct proc *pr));
/* utility.c */
_PROTOTYPE( void cpu_print_freq, (unsigned cpu));
#endif /* PROTO_H */

View file

@ -2,6 +2,7 @@
#include <signal.h>
#include <sys/sigcontext.h>
#include <minix/endpoint.h>
#include "kernel/clock.h"
/*===========================================================================*
* do_schedule *
@ -29,9 +30,9 @@ PUBLIC int do_schedule(struct proc * caller, message * m_ptr)
RTS_SET(p, RTS_NO_QUANTUM);
/* Clear the scheduling bit and enqueue the process */
p->p_priority = m_ptr->SCHEDULING_PRIORITY;
p->p_quantum_size = m_ptr->SCHEDULING_QUANTUM;
p->p_ticks_left = m_ptr->SCHEDULING_QUANTUM;
p->p_priority = m_ptr->SCHEDULING_PRIORITY;
p->p_quantum_size_ms = m_ptr->SCHEDULING_QUANTUM;
p->p_cpu_time_left = ms_2_cpu_time(m_ptr->SCHEDULING_QUANTUM);
RTS_UNSET(p, RTS_NO_QUANTUM);

View file

@ -10,6 +10,7 @@
#include "kernel/system.h"
#include <minix/endpoint.h>
#include <assert.h>
#if USE_SETALARM
@ -49,6 +50,7 @@ PUBLIC int do_setalarm(struct proc * caller, message * m_ptr)
reset_timer(tp);
} else {
tp->tmr_exp_time = (use_abs_time) ? exp_time : exp_time + get_uptime();
assert(tp->tmr_exp_time > get_uptime());
set_timer(tp, tp->tmr_exp_time, tp->tmr_func);
}
return(OK);

View file

@ -65,25 +65,25 @@ PUBLIC char *t_stack[TOT_STACK_SPACE / sizeof(char *)];
*/
PUBLIC struct boot_image image[] = {
/* process nr, flags, qs, queue, stack, name */
/* process nr, flags, ms, queue, stack, name */
{IDLE, 0, 0, 0, IDL_S, "idle" },
{CLOCK, 0, 0, 0, IDL_S, "clock" },
{SYSTEM, 0, 0, 0, IDL_S, "system"},
{HARDWARE, 0, 0, 0, IDL_S, "kernel"},
{DS_PROC_NR, BVM_F, 4, 4, 0, "ds" },
{RS_PROC_NR, 0, 4, 4, 0, "rs" },
{DS_PROC_NR, BVM_F, 50, 4, 0, "ds" },
{RS_PROC_NR, 0, 50, 4, 0, "rs" },
{PM_PROC_NR, 0, 32, 4, 0, "pm" },
{SCHED_PROC_NR, 0, 32, 4, 0, "sched" },
{FS_PROC_NR, 0, 32, 5, 0, "vfs" },
{MEM_PROC_NR, BVM_F, 4, 3, 0, "memory"},
{LOG_PROC_NR, BVM_F, 4, 2, 0, "log" },
{TTY_PROC_NR, BVM_F, 4, 1, 0, "tty" },
{MFS_PROC_NR, BVM_F, 32, 5, 0, "mfs" },
{VM_PROC_NR, 0, 32, 2, 0, "vm" },
{PFS_PROC_NR, BVM_F, 32, 5, 0, "pfs" },
{INIT_PROC_NR, BVM_F, 8, USER_Q, 0, "init" },
{PM_PROC_NR, 0,500, 4, 0, "pm" },
{SCHED_PROC_NR, 0,500, 4, 0, "sched" },
{FS_PROC_NR, 0,500, 5, 0, "vfs" },
{MEM_PROC_NR, BVM_F, 50, 3, 0, "memory"},
{LOG_PROC_NR, BVM_F, 50, 2, 0, "log" },
{TTY_PROC_NR, BVM_F, 50, 1, 0, "tty" },
{MFS_PROC_NR, BVM_F,500, 5, 0, "mfs" },
{VM_PROC_NR, 0,500, 2, 0, "vm" },
{PFS_PROC_NR, BVM_F,500, 5, 0, "pfs" },
{INIT_PROC_NR, BVM_F,200, USER_Q, 0, "init" },
};
/* Verify the size of the system image table at compile time. Also verify that

View file

@ -14,7 +14,7 @@ typedef struct { /* bitmap for system indexes */
struct boot_image {
proc_nr_t proc_nr; /* process number to use */
int flags; /* process flags */
unsigned char quantum; /* quantum (tick count) */
unsigned quantum; /* time quantum in ms */
int priority; /* scheduling priority */
int stksize; /* stack size for tasks */
char proc_name[P_NAME_LEN]; /* name in process table */

View file

@ -71,3 +71,11 @@ int c; /* character to append */
}
return;
}
PUBLIC void cpu_print_freq(unsigned cpu)
{
u64_t freq;
freq = cpu_get_freq(cpu);
printf("CPU %d freq %lu MHz\n", cpu, div64u(freq, 1000000));
}

View file

@ -397,10 +397,10 @@ PUBLIC void proctab_dmp()
size = rp->p_memmap[T].mem_len
+ ((rp->p_memmap[S].mem_phys + rp->p_memmap[S].mem_len) - data);
printf(" %5d %10d ", _ENDPOINT_G(rp->p_endpoint), rp->p_endpoint);
printf("%-8.8s %02u %02d/%02u %6lu %6lu ",
printf("%-8.8s %02u 0x%08x%08x/%02u %6lu %6lu ",
rp->p_name,
rp->p_priority,
rp->p_ticks_left, rp->p_quantum_size,
rp->p_cpu_time_left.lo, rp->p_cpu_time_left.hi, rp->p_quantum_size_ms,
rp->p_user_time, rp->p_sys_time);
PRINTRTS(rp);
printf("\n");