Re: [RFC PATCH v2 2/3] sched/topology: Rework CPU capacity asymmetry detection

Valentin Schneider <valentin.schneider@xxxxxxx> · Wed, 05 May 2021 19:17:39 +0100

Nitpicks ahead...

On 28/04/21 10:32, Beata Michalska wrote:
> @@ -1958,65 +1958,308 @@ static bool topology_span_sane(struct sched_domain_topology_level *tl,
>
>       return true;
>  }
> +/**
> + * Asym capacity bits
> + */
> +
> +/**
> + * Cached cpu masks for those sched domains, at a given topology level,
> + * that do represent CPUs with asymmetric capacities.
> + *
> + * Each topology level will get the cached data assigned,
> + * with asym cap sched_flags (SD_ASYM_CPUCAPACITY and SD_ASYM_CPUCAPACITY_FULL
> + * accordingly) and the corresponding cpumask for:
> + * - domains that do span CPUs with different capacities
> + * - domains where all CPU capacities are visible for all CPUs within
> + *   the domain
> + *
> + * Within a single topology level there might be domains
> + * with different scope of asymmetry:
> + *	none     -> .
> + *	partial  -> SD_ASYM_CPUCAPACITY
> + *	full     -> SD_ASYM_CPUCAPACITY|SD_ASYM_CPUCAPACITY_FULL
> + */
> +struct asym_cache_data {
> +
  ^
That should go

[...]

> -	if (!asym)
> -		return NULL;
> +	/* No asymmetry detected so skip the rest */
> +	if (!(cap_count > 1))
> +		goto leave;
> +
> +	if (!alloc_cpumask_var(&cpu_mask, GFP_KERNEL))
> +		goto leave;
>
> +	/* Get the number of topology levels */
> +	for_each_sd_topology(tl) level_count++;
>       /*
> -	 * Examine topology from all CPU's point of views to detect the lowest
> -	 * sched_domain_topology_level where a highest capacity CPU is visible
> -	 * to everyone.
> +	 * Allocate an array to store cached data per each topology level
>        */

That comment can be squashed into a single line.

> -	for_each_cpu(i, cpu_map) {
> -		unsigned long max_capacity = arch_scale_cpu_capacity(i);
> -		int tl_id = 0;
> +	scan_data = kcalloc(level_count, sizeof(*scan_data), GFP_KERNEL);
> +	if (!scan_data) {
> +		free_cpumask_var(cpu_mask);
> +		goto leave;
> +	}
>
> -		for_each_sd_topology(tl) {
> -			if (tl_id < asym_level)
> -				goto next_level;
> +	level_count = 0;
> +
> +	for_each_sd_topology(tl) {
> +		unsigned int local_cap_count;
> +		bool full_asym = true;
> +		const struct cpumask *mask;
> +		struct asym_cache_data *data = &scan_data[level_count++];
>
> -			for_each_cpu_and(j, tl->mask(i), cpu_map) {
> -				unsigned long capacity;
> +#ifdef CONFIG_NUMA
> +		/*
> +		 * For NUMA we might end-up in a sched domain that spans numa
> +		 * nodes with cpus with different capacities which would not be
> +		 * caught  by the above scan as those will have separate
                          ^
Stray whitespace >>>>>>>>>^

> +		 * cpumasks - subject to numa level
> +		 * @see: sched_domains_curr_level & sd_numa_mask
> +		 * Considered to be a no-go
> +		 */
> +		if (WARN_ON_ONCE(tl->numa_level && !full_asym))
> +			goto leave;
> +#endif
>
> -				capacity = arch_scale_cpu_capacity(j);
> +		if (asym_tl) {
> +			data->sched_flags = SD_ASYM_CPUCAPACITY |
> +					    SD_ASYM_CPUCAPACITY_FULL;
> +			continue;
> +		}
>
> -				if (capacity <= max_capacity)
> -					continue;
> +		cpumask_copy(cpu_mask, cpu_map);
> +		cpu = cpumask_first(cpu_mask);
> +
> +		while (cpu < nr_cpu_ids) {
> +			int i;
> +
> +			/*
> +			 * Tracking each CPU capacity 'scan' id to distinguish
> +			 * discovered capacity sets between different CPU masks
> +			 * at each topology level: capturing unique capacity
> +			 * values at each scan stage
> +			 */
> +			++scan_id;
> +			local_cap_count = 0;
>
> -				max_capacity = capacity;
> -				asym_level = tl_id;
> -				asym_tl = tl;
> +			mask = tl->mask(cpu);
> +			for_each_cpu_and(i, mask, cpu_map) {
> +
  ^
This should go.

> +				capacity = arch_scale_cpu_capacity(i);
> +
> +				list_for_each_entry(entry, &capacity_set, link) {
> +					if (entry->capacity == capacity &&
> +					    entry->scan_level < scan_id) {
> +						entry->scan_level = scan_id;
> +						++local_cap_count;
> +					}
> +				}
> +				__cpumask_clear_cpu(i, cpu_mask);
>                       }
> -next_level:
> -			tl_id++;
> +			if (cap_count != local_cap_count)
> +				full_asym = false;
> +			if (local_cap_count > 1) {
> +				int flags = (cap_count != local_cap_count)
> +					 ? SD_ASYM_CPUCAPACITY
> +					 : SD_ASYM_CPUCAPACITY
> +					 | SD_ASYM_CPUCAPACITY_FULL;

I haven't found many ternaries split over several lines, but those seem to
still follow the "operand at EOL" thing. The end result isn't particularly
pretty here (which is quite subjective, I know); another way to express
this would be:

                                int flags = SD_ASYM_CPUCAPACITY |
                                        SD_ASYM_CPUCAPACITY_FULL *
                                        (cap_count == local_cap_count);

which is... Meh.