On Wed, Dec 18, 2019 at 11:05:30AM +0530, Aneesh Kumar K.V wrote: > --- a/include/asm-generic/tlb.h > +++ b/include/asm-generic/tlb.h > @@ -402,7 +402,12 @@ tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma) { } > > static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb) > { > - if (!tlb->end) > + /* > + * Anything calling __tlb_adjust_range() also sets at least one of > + * these bits. > + */ > + if (!(tlb->freed_tables || tlb->cleared_ptes || tlb->cleared_pmds || > + tlb->cleared_puds || tlb->cleared_p4ds)) > return; FWIW I looked at the GCC generated assembly output for this (x86_64) and it did a single load and mask as expected.