Re: [PATCH v14 04/13] x86/mm: use INVLPGB for kernel TLB flushes

Dave Hansen <dave.hansen@xxxxxxxxx> · Fri, 28 Feb 2025 11:00:52 -0800

On 2/25/25 19:00, Rik van Riel wrote:
> Use broadcast TLB invalidation for kernel addresses when available.
> 
> Remove the need to send IPIs for kernel TLB flushes.

Nit: the changelog doesn't address the refactoring.

*Ideally*, you'd create the helpers and move the code there in one patch
and then actually "use INVLPGB for kernel TLB flushes" in the next. It's
compact enough here that it's not a deal breaker.

> +static void invlpgb_kernel_range_flush(struct flush_tlb_info *info)
> +{
> +	unsigned long addr, nr;
> +
> +	for (addr = info->start; addr < info->end; addr += nr << PAGE_SHIFT) {
> +		nr = (info->end - addr) >> PAGE_SHIFT;
> +		nr = clamp_val(nr, 1, invlpgb_count_max);
> +		invlpgb_flush_addr_nosync(addr, nr);
> +	}
> +	__tlbsync();
> +}

This needs a comment or two. Explaining that the function can take large
sizes:

/*
 * Flush an arbitrarily large range of memory with INVLPGB
 */

But that the _instruction_ can not is important.  This would be great in
the loop just above the clamp:

		/*
		 * INVLPGB has a limit on the size of ranges
		 * it can flush. Break large flushes up.
		 */

>  static void do_kernel_range_flush(void *info)
>  {
>  	struct flush_tlb_info *f = info;
> @@ -1087,6 +1099,22 @@ static void do_kernel_range_flush(void *info)
>  		flush_tlb_one_kernel(addr);
>  }
>  
> +static void kernel_tlb_flush_all(struct flush_tlb_info *info)
> +{
> +	if (cpu_feature_enabled(X86_FEATURE_INVLPGB))
> +		invlpgb_flush_all();
> +	else
> +		on_each_cpu(do_flush_tlb_all, NULL, 1);
> +}
> +
> +static void kernel_tlb_flush_range(struct flush_tlb_info *info)
> +{
> +	if (cpu_feature_enabled(X86_FEATURE_INVLPGB))
> +		invlpgb_kernel_range_flush(info);
> +	else
> +		on_each_cpu(do_kernel_range_flush, info, 1);
> +}
> +
>  void flush_tlb_kernel_range(unsigned long start, unsigned long end)
>  {
>  	struct flush_tlb_info *info;
> @@ -1097,9 +1125,9 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end)
>  				  TLB_GENERATION_INVALID);
>  
>  	if (info->end == TLB_FLUSH_ALL)
> -		on_each_cpu(do_flush_tlb_all, NULL, 1);
> +		kernel_tlb_flush_all(info);
>  	else
> -		on_each_cpu(do_kernel_range_flush, info, 1);
> +		kernel_tlb_flush_range(info);
>  
>  	put_flush_tlb_info();
>  }

But the structure of this code is much better than previous versions.
With the comments fixed:

Acked-by: Dave Hansen <dave.hansen@xxxxxxxxx>