On 03/14/2007 01:31 PM, Jeremy Fitzhardinge wrote: > Dan Hecht wrote: >> Sounds good. I don't see this in your patchset you sent yesterday >> though; did you add it after sending out those patches? > > Yes. > >> if so, could you forward the new patch? does it explicitly prevent >> stolen time from getting accounted as user/system time or does it >> just rely on NO_HZ mode sort of happening to work that way (since the >> one shot timer is skipped ahead for missed ticks)? > > Hm, not sure. It doesn't care how often it gets called; it just > accumulates results up to that point, but I'm not sure if the time would > get double accounted. Perhaps it doesn't matter when using > xen_sched_clock(). > I think you might be double counting time in some cases. sched_clock() isn't really relevant to stolen time accounting (i.e. cpustat->steal). I think what you want is to make sure that the sum of the cputime passed to all of: account_user_time account_system_time account_steal_time adds up to the total amount of time that has passed. I think it is sort of working for you (i.e. doesn't always double count stolen ticks) since in NO_HZ mode, update_process_time (which calls account_user_time & account_system_time) happens to be skipped during periods of stolen time due to the hrtimer_forward()'ing of the one shot expiry. > Did the get_scheduled_time -> sched_clock make sense to you? > The get_scheduled_time change should work fine for vmi. > J > > > ------------------------------------------------------------------------ > > Signed-off-by: Jeremy Fitzhardinge <jeremy@xxxxxxxxxxxxx> > Cc: john stultz <johnstul@xxxxxxxxxx> > > --- > arch/i386/xen/time.c | 101 ++++++++++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 101 insertions(+) > > =================================================================== > --- a/arch/i386/xen/time.c > +++ b/arch/i386/xen/time.c > @@ -2,6 +2,7 @@ > #include <linux/interrupt.h> > #include <linux/clocksource.h> > #include <linux/clockchips.h> > +#include <linux/kernel_stat.h> > > #include <asm/xen/hypervisor.h> > #include <asm/xen/hypercall.h> > @@ -14,6 +15,7 @@ > > #define XEN_SHIFT 22 > #define TIMER_SLOP 100000 /* Xen may fire a timer up to this many ns early */ > +#define NS_PER_TICK (1000000000ll / HZ) > > static DEFINE_PER_CPU(struct clock_event_device, xen_clock_events); > > @@ -28,6 +30,99 @@ struct shadow_time_info { > > static DEFINE_PER_CPU(struct shadow_time_info, shadow_time); > > +/* runstate info updated by Xen */ > +static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate); > + > +/* snapshots of runstate info */ > +static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate_snapshot); > + > +/* unused ns of stolen and blocked time */ > +static DEFINE_PER_CPU(u64, residual_stolen); > +static DEFINE_PER_CPU(u64, residual_blocked); > + > +/* > + Runstate accounting > + */ > +static void get_runstate_snapshot(struct vcpu_runstate_info *res) > +{ > + u64 state_time; > + struct vcpu_runstate_info *state; > + > + preempt_disable(); > + > + state = &__get_cpu_var(runstate); > + > + do { > + state_time = state->state_entry_time; > + barrier(); > + *res = *state; > + barrier(); > + } while(state->state_entry_time != state_time); > + > + preempt_enable(); > +} > + > +static void setup_runstate_info(void) > +{ > + struct vcpu_register_runstate_memory_area area; > + > + area.addr.v = &__get_cpu_var(runstate); > + > + if (HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area, > + smp_processor_id(), &area)) > + BUG(); > + > + get_runstate_snapshot(&__get_cpu_var(runstate_snapshot)); > +} > + > +static void do_stolen_accounting(void) > +{ > + struct vcpu_runstate_info state; > + struct vcpu_runstate_info *snap; > + u64 blocked, runnable, offline, stolen; > + cputime_t ticks; > + > + get_runstate_snapshot(&state); > + > + WARN_ON(state.state != RUNSTATE_running); > + > + snap = &__get_cpu_var(runstate_snapshot); > + > + /* work out how much time the VCPU has not been runn*ing* */ > + blocked = state.time[RUNSTATE_blocked] - snap->time[RUNSTATE_blocked]; > + runnable = state.time[RUNSTATE_runnable] - snap->time[RUNSTATE_runnable]; > + offline = state.time[RUNSTATE_offline] - snap->time[RUNSTATE_offline]; > + > + *snap = state; > + > + /* Add the appropriate number of ticks of stolen time, > + including any left-overs from last time. Passing NULL to > + account_steal_time accounts the time as stolen. */ > + stolen = runnable + offline + __get_cpu_var(residual_stolen); > + ticks = 0; > + while(stolen >= NS_PER_TICK) { > + ticks++; > + stolen -= NS_PER_TICK; > + } > + __get_cpu_var(residual_stolen) = stolen; > + account_steal_time(NULL, ticks); > + > + /* Add the appropriate number of ticks of blocked time, > + including any left-overs from last time. Passing idle to > + account_steal_time accounts the time as idle/wait. */ > + blocked += __get_cpu_var(residual_blocked); > + ticks = 0; > + while(blocked >= NS_PER_TICK) { > + ticks++; > + blocked -= NS_PER_TICK; > + } > + __get_cpu_var(residual_blocked) = blocked; > + account_steal_time(idle_task(smp_processor_id()), ticks); > +} > + > + > + > +/* Get the CPU speed from Xen */ > unsigned long xen_cpu_khz(void) > { > u64 cpu_khz = 1000000ULL << 32; > @@ -264,6 +359,8 @@ static irqreturn_t xen_timerop_timer_int > ret = IRQ_HANDLED; > } > > + do_stolen_accounting(); > + > return ret; > } > > @@ -338,6 +435,8 @@ static irqreturn_t xen_vcpuop_timer_inte > ret = IRQ_HANDLED; > } > > + do_stolen_accounting(); > + > return ret; > } > > @@ -380,6 +479,8 @@ static void xen_setup_timer(int cpu) > evt->cpumask = cpumask_of_cpu(cpu); > evt->irq = irq; > clockevents_register_device(evt); > + > + setup_runstate_info(); > > put_cpu_var(xen_clock_events); > } > > > ------------------------------------------------------------------------ > > Subject: Implement xen_sched_clock > > Implement xen_sched_clock, which returns the number of ns the current > vcpu has been actually in the running state (vs blocked, > runnable-but-not-running, or offline) since boot. > > Signed-off-by: Jeremy Fitzhardinge <jeremy@xxxxxxxxxxxxx> > Cc: john stultz <johnstul@xxxxxxxxxx> > > --- > arch/i386/xen/enlighten.c | 2 +- > arch/i386/xen/time.c | 14 ++++++++++++++ > arch/i386/xen/xen-ops.h | 1 + > 3 files changed, 16 insertions(+), 1 deletion(-) > > =================================================================== > --- a/arch/i386/xen/enlighten.c > +++ b/arch/i386/xen/enlighten.c > @@ -664,7 +664,7 @@ static const struct paravirt_ops xen_par > .set_wallclock = xen_set_wallclock, > .get_wallclock = xen_get_wallclock, > .get_cpu_khz = xen_cpu_khz, > - .get_scheduled_cycles = native_read_tsc, > + .sched_clock = xen_sched_clock, > > #ifdef CONFIG_X86_LOCAL_APIC > .apic_write = paravirt_nop, > =================================================================== > --- a/arch/i386/xen/time.c > +++ b/arch/i386/xen/time.c > @@ -16,6 +16,8 @@ > #define XEN_SHIFT 22 > #define TIMER_SLOP 100000 /* Xen may fire a timer up to this many ns early */ > #define NS_PER_TICK (1000000000ll / HZ) > + > +static cycle_t xen_clocksource_read(void); > > static DEFINE_PER_CPU(struct clock_event_device, xen_clock_events); > > @@ -120,6 +122,18 @@ static void do_stolen_accounting(void) > account_steal_time(idle_task(smp_processor_id()), ticks); > } > > +/* Xen sched_clock implementation. Returns the number of RUNNING ns */ > +unsigned long long xen_sched_clock(void) > +{ > + struct vcpu_runstate_info state; > + cycle_t now = xen_clocksource_read(); > + > + get_runstate_snapshot(&state); > + > + WARN_ON(state.state != RUNSTATE_running); > + > + return state.time[RUNSTATE_running] + (now - state.state_entry_time); > +} > > > /* Get the CPU speed from Xen */ > =================================================================== > --- a/arch/i386/xen/xen-ops.h > +++ b/arch/i386/xen/xen-ops.h > @@ -14,6 +14,7 @@ void __init xen_time_init(void); > void __init xen_time_init(void); > unsigned long xen_get_wallclock(void); > int xen_set_wallclock(unsigned long time); > +unsigned long long xen_sched_clock(void); > > void xen_mark_init_mm_pinned(void); > _______________________________________________ Virtualization mailing list Virtualization@xxxxxxxxxxxxxx https://lists.osdl.org/mailman/listinfo/virtualization