On Mon, Dec 30, 2024 at 12:53:07PM -0500, Rik van Riel wrote: > Use broadcast TLB invalidation for kernel addresses when available. > > This stops us from having to send IPIs for kernel TLB flushes. > > Signed-off-by: Rik van Riel <riel@xxxxxxxxxxx> > --- > arch/x86/mm/tlb.c | 31 +++++++++++++++++++++++++++++++ > 1 file changed, 31 insertions(+) > > diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c > index 6cf881a942bb..29207dc5b807 100644 > --- a/arch/x86/mm/tlb.c > +++ b/arch/x86/mm/tlb.c > @@ -1077,6 +1077,32 @@ void flush_tlb_all(void) > on_each_cpu(do_flush_tlb_all, NULL, 1); > } > > +static void broadcast_kernel_range_flush(unsigned long start, unsigned long end) > +{ > + unsigned long addr; > + unsigned long maxnr = invlpgb_count_max; > + unsigned long threshold = tlb_single_page_flush_ceiling * maxnr; The tip-tree preferred ordering of variable declarations at the beginning of a function is reverse fir tree order:: struct long_struct_name *descriptive_name; unsigned long foo, bar; unsigned int tmp; int ret; The above is faster to parse than the reverse ordering:: int ret; unsigned int tmp; unsigned long foo, bar; struct long_struct_name *descriptive_name; And even more so than random ordering:: unsigned long foo, bar; int ret; struct long_struct_name *descriptive_name; unsigned int tmp; And you can get rid of maxnr and get the reversed xmas tree order: diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 29207dc5b807..8a85acd9483d 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -1079,9 +1079,8 @@ void flush_tlb_all(void) static void broadcast_kernel_range_flush(unsigned long start, unsigned long end) { + unsigned long threshold = tlb_single_page_flush_ceiling * invlpgb_count_max; unsigned long addr; - unsigned long maxnr = invlpgb_count_max; - unsigned long threshold = tlb_single_page_flush_ceiling * maxnr; /* * TLBSYNC only waits for flushes originating on the same CPU. @@ -1095,7 +1094,7 @@ static void broadcast_kernel_range_flush(unsigned long start, unsigned long end) } else { unsigned long nr; for (addr = start; addr < end; addr += nr << PAGE_SHIFT) { - nr = min((end - addr) >> PAGE_SHIFT, maxnr); + nr = min((end - addr) >> PAGE_SHIFT, invlpgb_count_max); invlpgb_flush_addr(addr, nr); } } > + /* > + * TLBSYNC only waits for flushes originating on the same CPU. > + * Disabling migration allows us to wait on all flushes. > + */ > + guard(preempt)(); Migration? Why not migrate_disable() then? Although there's a big, thorny comment in include/linux/preempt.h about its influence on sched. > + > + if (end == TLB_FLUSH_ALL || > + (end - start) > threshold << PAGE_SHIFT) { > + invlpgb_flush_all(); > + } else { > + unsigned long nr; > + for (addr = start; addr < end; addr += nr << PAGE_SHIFT) { > + nr = min((end - addr) >> PAGE_SHIFT, maxnr); > + invlpgb_flush_addr(addr, nr); > + } > + } > + > + tlbsync(); > +} -- Regards/Gruss, Boris. https://people.kernel.org/tglx/notes-about-netiquette