Nitpicks ahead... On 28/04/21 10:32, Beata Michalska wrote: > @@ -1958,65 +1958,308 @@ static bool topology_span_sane(struct sched_domain_topology_level *tl, > > return true; > } > +/** > + * Asym capacity bits > + */ > + > +/** > + * Cached cpu masks for those sched domains, at a given topology level, > + * that do represent CPUs with asymmetric capacities. > + * > + * Each topology level will get the cached data assigned, > + * with asym cap sched_flags (SD_ASYM_CPUCAPACITY and SD_ASYM_CPUCAPACITY_FULL > + * accordingly) and the corresponding cpumask for: > + * - domains that do span CPUs with different capacities > + * - domains where all CPU capacities are visible for all CPUs within > + * the domain > + * > + * Within a single topology level there might be domains > + * with different scope of asymmetry: > + * none -> . > + * partial -> SD_ASYM_CPUCAPACITY > + * full -> SD_ASYM_CPUCAPACITY|SD_ASYM_CPUCAPACITY_FULL > + */ > +struct asym_cache_data { > + ^ That should go [...] > - if (!asym) > - return NULL; > + /* No asymmetry detected so skip the rest */ > + if (!(cap_count > 1)) > + goto leave; > + > + if (!alloc_cpumask_var(&cpu_mask, GFP_KERNEL)) > + goto leave; > > + /* Get the number of topology levels */ > + for_each_sd_topology(tl) level_count++; > /* > - * Examine topology from all CPU's point of views to detect the lowest > - * sched_domain_topology_level where a highest capacity CPU is visible > - * to everyone. > + * Allocate an array to store cached data per each topology level > */ That comment can be squashed into a single line. > - for_each_cpu(i, cpu_map) { > - unsigned long max_capacity = arch_scale_cpu_capacity(i); > - int tl_id = 0; > + scan_data = kcalloc(level_count, sizeof(*scan_data), GFP_KERNEL); > + if (!scan_data) { > + free_cpumask_var(cpu_mask); > + goto leave; > + } > > - for_each_sd_topology(tl) { > - if (tl_id < asym_level) > - goto next_level; > + level_count = 0; > + > + for_each_sd_topology(tl) { > + unsigned int local_cap_count; > + bool full_asym = true; > + const struct cpumask *mask; > + struct asym_cache_data *data = &scan_data[level_count++]; > > - for_each_cpu_and(j, tl->mask(i), cpu_map) { > - unsigned long capacity; > +#ifdef CONFIG_NUMA > + /* > + * For NUMA we might end-up in a sched domain that spans numa > + * nodes with cpus with different capacities which would not be > + * caught by the above scan as those will have separate ^ Stray whitespace >>>>>>>>>^ > + * cpumasks - subject to numa level > + * @see: sched_domains_curr_level & sd_numa_mask > + * Considered to be a no-go > + */ > + if (WARN_ON_ONCE(tl->numa_level && !full_asym)) > + goto leave; > +#endif > > - capacity = arch_scale_cpu_capacity(j); > + if (asym_tl) { > + data->sched_flags = SD_ASYM_CPUCAPACITY | > + SD_ASYM_CPUCAPACITY_FULL; > + continue; > + } > > - if (capacity <= max_capacity) > - continue; > + cpumask_copy(cpu_mask, cpu_map); > + cpu = cpumask_first(cpu_mask); > + > + while (cpu < nr_cpu_ids) { > + int i; > + > + /* > + * Tracking each CPU capacity 'scan' id to distinguish > + * discovered capacity sets between different CPU masks > + * at each topology level: capturing unique capacity > + * values at each scan stage > + */ > + ++scan_id; > + local_cap_count = 0; > > - max_capacity = capacity; > - asym_level = tl_id; > - asym_tl = tl; > + mask = tl->mask(cpu); > + for_each_cpu_and(i, mask, cpu_map) { > + ^ This should go. > + capacity = arch_scale_cpu_capacity(i); > + > + list_for_each_entry(entry, &capacity_set, link) { > + if (entry->capacity == capacity && > + entry->scan_level < scan_id) { > + entry->scan_level = scan_id; > + ++local_cap_count; > + } > + } > + __cpumask_clear_cpu(i, cpu_mask); > } > -next_level: > - tl_id++; > + if (cap_count != local_cap_count) > + full_asym = false; > + if (local_cap_count > 1) { > + int flags = (cap_count != local_cap_count) > + ? SD_ASYM_CPUCAPACITY > + : SD_ASYM_CPUCAPACITY > + | SD_ASYM_CPUCAPACITY_FULL; I haven't found many ternaries split over several lines, but those seem to still follow the "operand at EOL" thing. The end result isn't particularly pretty here (which is quite subjective, I know); another way to express this would be: int flags = SD_ASYM_CPUCAPACITY | SD_ASYM_CPUCAPACITY_FULL * (cap_count == local_cap_count); which is... Meh.