On Tue, Aug 04, 2015 at 07:25:52PM +0900, Seiichi Ikarashi wrote: > I found miscalculation of irq_info->load for any irqs assigned to > topo_obj other than CPUs. With the debug option, you can see it as > follows: > > -- > Package 0: numa_node is 0 cpu mask is 0000003f (load 1770000000) > (snip) > Interrupt 102 node_num is -1 (ethernet/459999226:11117) > Interrupt 33 node_num is -1 (legacy/1:0) > Interrupt 0 node_num is -1 (other/1770000000:1) > Interrupt 64 node_num is -1 (other/1:0) > -- > > Though IRQ0 had just one interruption during the period, its load value > was wrongly calculated to be 1,770,000,000 nsec. > This is because compute_irq_branch_load_share() does not take into > account any irq counts of children and grandchildren, and does mis- > understand that only IRQ0 has consumed all the (irq + softirq) CPU time. > > To fix the problem, I introduce following two changes: > > - Add topo_obj->irq_count in order to accumulate interrupts effectively, > - Change topo_obj->load from the average of children's load to the sum > of them. I believe it would make sense from the topological point of > view. > > With the modification, the debug log shows: > > -- > Package 0: numa_node is 0 cpu mask is 0000003f (load 7340266498) > (snip) > Interrupt 102 node_num is -1 (ethernet/2459985766:61406) > Interrupt 33 node_num is -1 (legacy/1:0) > Interrupt 64 node_num is -1 (other/1:0) > Interrupt 62 node_num is -1 (other/1:0) > Interrupt 60 node_num is -1 (other/1:0) > Interrupt 58 node_num is -1 (other/1:0) > Interrupt 9 node_num is -1 (other/1:0) > Interrupt 4 node_num is -1 (other/1:0) > Interrupt 0 node_num is -1 (other/20815:1) > -- > > The load of IRQ0 is now calculated to be 20,815 nsec. It looks more > accurate. > > Signed-off-by: Seiichi Ikarashi <s.ikarashi at jp.fujitsu.com> > > --- > procinterrupts.c | 70 ++++++++++++++++++++++++++++++++++++++--------------- > types.h | 1 + > 2 files changed, 51 insertions(+), 20 deletions(-) > > diff --git a/procinterrupts.c b/procinterrupts.c > index f4dcdc7..b7fb8ee 100644 > --- a/procinterrupts.c > +++ b/procinterrupts.c > @@ -211,13 +211,6 @@ void parse_proc_interrupts(void) > } > > > -static void accumulate_irq_count(struct irq_info *info, void *data) > -{ > - uint64_t *acc = data; > - > - *acc += (info->irq_count - info->last_irq_count); > -} > - > static void assign_load_slice(struct irq_info *info, void *data) > { > uint64_t *load_slice = data; > @@ -243,36 +236,68 @@ static uint64_t get_parent_branch_irq_count_share(struct topo_obj *d) > total_irq_count /= g_list_length((d->parent)->children); > } > > - if (g_list_length(d->interrupts) > 0) > - for_each_irq(d->interrupts, accumulate_irq_count, &total_irq_count); > + total_irq_count += d->irq_count; > > return total_irq_count; > } > > +static void get_children_branch_irq_count(struct topo_obj *d, void *data) > +{ > + uint64_t *total_irq_count = data; > + > + if (g_list_length(d->children) > 0) > + for_each_object(d->children, get_children_branch_irq_count, total_irq_count); > + > + *total_irq_count += d->irq_count; > +} > + > static void compute_irq_branch_load_share(struct topo_obj *d, void *data __attribute__((unused))) > { > uint64_t local_irq_counts = 0; > uint64_t load_slice; > - int load_divisor = g_list_length(d->children); > - > - d->load /= (load_divisor ? load_divisor : 1); > > if (g_list_length(d->interrupts) > 0) { > local_irq_counts = get_parent_branch_irq_count_share(d); > + if (g_list_length(d->children) > 0) > + for_each_object(d->children, get_children_branch_irq_count, &local_irq_counts); > load_slice = local_irq_counts ? (d->load / local_irq_counts) : 1; > for_each_irq(d->interrupts, assign_load_slice, &load_slice); > } > > - if (d->parent) > - d->parent->load += d->load; > } > > -static void reset_load(struct topo_obj *d, void *data __attribute__((unused))) > +static void accumulate_irq_count(struct irq_info *info, void *data) > +{ > + uint64_t *acc = data; > + > + *acc += (info->irq_count - info->last_irq_count); > +} > + > +static void accumulate_interrupts(struct topo_obj *d, void *data __attribute__((unused))) > +{ > + if (g_list_length(d->children) > 0) { > + for_each_object(d->children, accumulate_interrupts, NULL); > + } > + > + d->irq_count = 0; > + if (g_list_length(d->interrupts) > 0) > + for_each_irq(d->interrupts, accumulate_irq_count, &(d->irq_count)); > +} > + > +static void accumulate_load(struct topo_obj *d, void *data) > { > - if (d->parent) > - reset_load(d->parent, NULL); > + uint64_t *load = data; > + > + *load += d->load; > +} > > - d->load = 0; > +static void set_load(struct topo_obj *d, void *data __attribute__((unused))) > +{ > + if (g_list_length(d->children) > 0) { > + for_each_object(d->children, set_load, NULL); > + d->load = 0; > + for_each_object(d->children, accumulate_load, &(d->load)); > + } > } > > void parse_proc_stat(void) > @@ -347,9 +372,14 @@ void parse_proc_stat(void) > } > > /* > - * Reset the load values for all objects above cpus > + * Set the load values for all objects above cpus > + */ > + for_each_object(numa_nodes, set_load, NULL); > + > + /* > + * Collect local irq_count on each object > */ > - for_each_object(cache_domains, reset_load, NULL); > + for_each_object(numa_nodes, accumulate_interrupts, NULL); > > /* > * Now that we have load for each cpu attribute a fair share of the load > diff --git a/types.h b/types.h > index 71ce6a2..d022154 100644 > --- a/types.h > +++ b/types.h > @@ -46,6 +46,7 @@ enum obj_type_e { > struct topo_obj { > uint64_t load; > uint64_t last_load; > + uint64_t irq_count; > enum obj_type_e obj_type; > int number; > int powersave_mode; > Applied, thanks Neil