On Mon, 21 Mar 2022, Sebastian Andrzej Siewior wrote: > The access to mlock_pvec is protected by disabling preemption via > get_cpu_var() or implicit by having preemption disabled by the caller > (in mlock_page_drain() case). > This breaks on PREEMPT_RT since folio_lruvec_lock_irq() acquires a > sleeping lock in this section. > > Create struct mlock_pvec which consits of the local_lock_t and the > pagevec. Acquire the local_lock() before accessing the per-CPU pagevec. > Replace mlock_page_drain() with a _local() version which is invoked on > the local CPU and acquires the local_lock_t and a _remote() version > which uses the pagevec from a remote CPU which offline. > > Signed-off-by: Sebastian Andrzej Siewior <bigeasy@xxxxxxxxxxxxx> Acked-by: Hugh Dickins <hughd@xxxxxxxxxx> Thanks a lot for catching this: your patch looks exactly right to me. Bad mistake on my part to have missed all the local_lock'ing in mm/swap.c when I rebased my old mm/munlock implementation to the current tree. > --- > mm/internal.h | 6 ++++-- > mm/migrate.c | 2 +- > mm/mlock.c | 46 ++++++++++++++++++++++++++++++++++++---------- > mm/page_alloc.c | 1 + > mm/rmap.c | 4 ++-- > mm/swap.c | 4 +++- > 6 files changed, 47 insertions(+), 16 deletions(-) > > diff --git a/mm/internal.h b/mm/internal.h > index 9be0227ccc942..50c3fd71d7ddd 100644 > --- a/mm/internal.h > +++ b/mm/internal.h > @@ -456,7 +456,8 @@ static inline void munlock_vma_page(struct page *page, > } > void mlock_new_page(struct page *page); > bool need_mlock_page_drain(int cpu); > -void mlock_page_drain(int cpu); > +void mlock_page_drain_local(void); > +void mlock_page_drain_remote(int cpu); > > extern pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma); > > @@ -547,7 +548,8 @@ static inline void munlock_vma_page(struct page *page, > struct vm_area_struct *vma, bool compound) { } > static inline void mlock_new_page(struct page *page) { } > static inline bool need_mlock_page_drain(int cpu) { return false; } > -static inline void mlock_page_drain(int cpu) { } > +static inline void mlock_page_drain_local(void) { } > +static inline void mlock_page_drain_remote(int cpu) { } > static inline void vunmap_range_noflush(unsigned long start, unsigned long end) > { > } > diff --git a/mm/migrate.c b/mm/migrate.c > index 3d60823afd2d3..de175e2fdba5d 100644 > --- a/mm/migrate.c > +++ b/mm/migrate.c > @@ -246,7 +246,7 @@ static bool remove_migration_pte(struct folio *folio, > set_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte); > } > if (vma->vm_flags & VM_LOCKED) > - mlock_page_drain(smp_processor_id()); > + mlock_page_drain_local(); > > trace_remove_migration_pte(pvmw.address, pte_val(pte), > compound_order(new)); > diff --git a/mm/mlock.c b/mm/mlock.c > index efd2dd2943dec..6e503b8690f52 100644 > --- a/mm/mlock.c > +++ b/mm/mlock.c > @@ -28,7 +28,14 @@ > > #include "internal.h" > > -static DEFINE_PER_CPU(struct pagevec, mlock_pvec); > +struct mlock_pvec { > + local_lock_t lock; > + struct pagevec vec; > +}; > + > +static DEFINE_PER_CPU(struct mlock_pvec, mlock_pvec) = { > + .lock = INIT_LOCAL_LOCK(lock), > +}; > > bool can_do_mlock(void) > { > @@ -203,18 +210,30 @@ static void mlock_pagevec(struct pagevec *pvec) > pagevec_reinit(pvec); > } > > -void mlock_page_drain(int cpu) > +void mlock_page_drain_local(void) > { > struct pagevec *pvec; > > - pvec = &per_cpu(mlock_pvec, cpu); > + local_lock(&mlock_pvec.lock); > + pvec = this_cpu_ptr(&mlock_pvec.vec); > + if (pagevec_count(pvec)) > + mlock_pagevec(pvec); > + local_unlock(&mlock_pvec.lock); > +} > + > +void mlock_page_drain_remote(int cpu) > +{ > + struct pagevec *pvec; > + > + WARN_ON_ONCE(cpu_online(cpu)); > + pvec = &per_cpu(mlock_pvec.vec, cpu); > if (pagevec_count(pvec)) > mlock_pagevec(pvec); > } > > bool need_mlock_page_drain(int cpu) > { > - return pagevec_count(&per_cpu(mlock_pvec, cpu)); > + return pagevec_count(&per_cpu(mlock_pvec.vec, cpu)); > } > > /** > @@ -223,7 +242,10 @@ bool need_mlock_page_drain(int cpu) > */ > void mlock_folio(struct folio *folio) > { > - struct pagevec *pvec = &get_cpu_var(mlock_pvec); > + struct pagevec *pvec; > + > + local_lock(&mlock_pvec.lock); > + pvec = this_cpu_ptr(&mlock_pvec.vec); > > if (!folio_test_set_mlocked(folio)) { > int nr_pages = folio_nr_pages(folio); > @@ -236,7 +258,7 @@ void mlock_folio(struct folio *folio) > if (!pagevec_add(pvec, mlock_lru(&folio->page)) || > folio_test_large(folio) || lru_cache_disabled()) > mlock_pagevec(pvec); > - put_cpu_var(mlock_pvec); > + local_unlock(&mlock_pvec.lock); > } > > /** > @@ -245,9 +267,11 @@ void mlock_folio(struct folio *folio) > */ > void mlock_new_page(struct page *page) > { > - struct pagevec *pvec = &get_cpu_var(mlock_pvec); > + struct pagevec *pvec; > int nr_pages = thp_nr_pages(page); > > + local_lock(&mlock_pvec.lock); > + pvec = this_cpu_ptr(&mlock_pvec.vec); > SetPageMlocked(page); > mod_zone_page_state(page_zone(page), NR_MLOCK, nr_pages); > __count_vm_events(UNEVICTABLE_PGMLOCKED, nr_pages); > @@ -256,7 +280,7 @@ void mlock_new_page(struct page *page) > if (!pagevec_add(pvec, mlock_new(page)) || > PageHead(page) || lru_cache_disabled()) > mlock_pagevec(pvec); > - put_cpu_var(mlock_pvec); > + local_unlock(&mlock_pvec.lock); > } > > /** > @@ -265,8 +289,10 @@ void mlock_new_page(struct page *page) > */ > void munlock_page(struct page *page) > { > - struct pagevec *pvec = &get_cpu_var(mlock_pvec); > + struct pagevec *pvec; > > + local_lock(&mlock_pvec.lock); > + pvec = this_cpu_ptr(&mlock_pvec.vec); > /* > * TestClearPageMlocked(page) must be left to __munlock_page(), > * which will check whether the page is multiply mlocked. > @@ -276,7 +302,7 @@ void munlock_page(struct page *page) > if (!pagevec_add(pvec, page) || > PageHead(page) || lru_cache_disabled()) > mlock_pagevec(pvec); > - put_cpu_var(mlock_pvec); > + local_unlock(&mlock_pvec.lock); > } > > static int mlock_pte_range(pmd_t *pmd, unsigned long addr, > diff --git a/mm/page_alloc.c b/mm/page_alloc.c > index f648decfe39d1..94f515845d53a 100644 > --- a/mm/page_alloc.c > +++ b/mm/page_alloc.c > @@ -8355,6 +8355,7 @@ static int page_alloc_cpu_dead(unsigned int cpu) > struct zone *zone; > > lru_add_drain_cpu(cpu); > + mlock_page_drain_remote(cpu); > drain_pages(cpu); > > /* > diff --git a/mm/rmap.c b/mm/rmap.c > index 3eb95fcde7000..90b4956096ef3 100644 > --- a/mm/rmap.c > +++ b/mm/rmap.c > @@ -1727,7 +1727,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, > */ > page_remove_rmap(subpage, vma, folio_test_hugetlb(folio)); > if (vma->vm_flags & VM_LOCKED) > - mlock_page_drain(smp_processor_id()); > + mlock_page_drain_local(); > folio_put(folio); > } > > @@ -2005,7 +2005,7 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma, > */ > page_remove_rmap(subpage, vma, folio_test_hugetlb(folio)); > if (vma->vm_flags & VM_LOCKED) > - mlock_page_drain(smp_processor_id()); > + mlock_page_drain_local(); > folio_put(folio); > } > > diff --git a/mm/swap.c b/mm/swap.c > index bceff0cb559c9..7e320ec08c6ae 100644 > --- a/mm/swap.c > +++ b/mm/swap.c > @@ -624,7 +624,6 @@ void lru_add_drain_cpu(int cpu) > pagevec_lru_move_fn(pvec, lru_lazyfree_fn); > > activate_page_drain(cpu); > - mlock_page_drain(cpu); > } > > /** > @@ -706,6 +705,7 @@ void lru_add_drain(void) > local_lock(&lru_pvecs.lock); > lru_add_drain_cpu(smp_processor_id()); > local_unlock(&lru_pvecs.lock); > + mlock_page_drain_local(); > } > > /* > @@ -720,6 +720,7 @@ static void lru_add_and_bh_lrus_drain(void) > lru_add_drain_cpu(smp_processor_id()); > local_unlock(&lru_pvecs.lock); > invalidate_bh_lrus_cpu(); > + mlock_page_drain_local(); > } > > void lru_add_drain_cpu_zone(struct zone *zone) > @@ -728,6 +729,7 @@ void lru_add_drain_cpu_zone(struct zone *zone) > lru_add_drain_cpu(smp_processor_id()); > drain_local_pages(zone); > local_unlock(&lru_pvecs.lock); > + mlock_page_drain_local(); > } > > #ifdef CONFIG_SMP > -- > 2.35.1 > >