On Tue, Jan 15, 2019 at 10:15:01AM +0000, Patrick Bellasi wrote: > +#ifdef CONFIG_UCLAMP_TASK > +struct uclamp_bucket { > + unsigned long value : bits_per(SCHED_CAPACITY_SCALE); > + unsigned long tasks : BITS_PER_LONG - bits_per(SCHED_CAPACITY_SCALE); > +}; > +struct uclamp_cpu { > + unsigned int value; /* 4 byte hole */ > + struct uclamp_bucket bucket[UCLAMP_BUCKETS]; > +}; With the default of 5, this UCLAMP_BUCKETS := 6, so struct uclamp_cpu ends up being 7 'unsigned long's, or 56 bytes on 64bit (with a 4 byte hole). > +#endif /* CONFIG_UCLAMP_TASK */ > + > /* > * This is the main, per-CPU runqueue data structure. > * > @@ -835,6 +879,11 @@ struct rq { > unsigned long nr_load_updates; > u64 nr_switches; > > +#ifdef CONFIG_UCLAMP_TASK > + /* Utilization clamp values based on CPU's RUNNABLE tasks */ > + struct uclamp_cpu uclamp[UCLAMP_CNT] ____cacheline_aligned; Which makes this 112 bytes with 8 bytes in 2 holes, which is short of 2 64 byte cachelines. Is that the best layout? > +#endif > + > struct cfs_rq cfs; > struct rt_rq rt; > struct dl_rq dl; > -- > 2.19.2 >