Make pin_user_pages*() leave a ZERO_PAGE unpinned if it extracts a pointer to it from the page tables and make unpin_user_page*() correspondingly ignore a ZERO_PAGE when unpinning. We don't want to risk overrunning a zero page's refcount as we're only allowed ~2 million pins on it - something that userspace can conceivably trigger. Add a pair of functions to test whether a page or a folio is a ZERO_PAGE. Signed-off-by: David Howells <dhowells@xxxxxxxxxx> cc: Christoph Hellwig <hch@xxxxxxxxxxxxx> cc: David Hildenbrand <david@xxxxxxxxxx> cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> cc: Jens Axboe <axboe@xxxxxxxxx> cc: Al Viro <viro@xxxxxxxxxxxxxxxxxx> cc: Matthew Wilcox <willy@xxxxxxxxxxxxx> cc: Jan Kara <jack@xxxxxxx> cc: Jeff Layton <jlayton@xxxxxxxxxx> cc: Jason Gunthorpe <jgg@xxxxxxxxxx> cc: Logan Gunthorpe <logang@xxxxxxxxxxxx> cc: Hillf Danton <hdanton@xxxxxxxx> cc: Christian Brauner <brauner@xxxxxxxxxx> cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx> cc: linux-fsdevel@xxxxxxxxxxxxxxx cc: linux-block@xxxxxxxxxxxxxxx cc: linux-kernel@xxxxxxxxxxxxxxx cc: linux-mm@xxxxxxxxx --- Notes: ver #2) - Fix use of ZERO_PAGE(). - Add is_zero_page() and is_zero_folio() wrappers. - Return the zero page obtained, not ZERO_PAGE(0) unconditionally. include/linux/pgtable.h | 10 ++++++++++ mm/gup.c | 25 ++++++++++++++++++++++++- 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index c5a51481bbb9..2b0431a11de2 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -1245,6 +1245,16 @@ static inline unsigned long my_zero_pfn(unsigned long addr) } #endif /* CONFIG_MMU */ +static inline bool is_zero_page(const struct page *page) +{ + return is_zero_pfn(page_to_pfn(page)); +} + +static inline bool is_zero_folio(const struct folio *folio) +{ + return is_zero_page(&folio->page); +} + #ifdef CONFIG_MMU #ifndef CONFIG_TRANSPARENT_HUGEPAGE diff --git a/mm/gup.c b/mm/gup.c index bbe416236593..69b002628f5d 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -51,7 +51,8 @@ static inline void sanity_check_pinned_pages(struct page **pages, struct page *page = *pages; struct folio *folio = page_folio(page); - if (!folio_test_anon(folio)) + if (is_zero_page(page) || + !folio_test_anon(folio)) continue; if (!folio_test_large(folio) || folio_test_hugetlb(folio)) VM_BUG_ON_PAGE(!PageAnonExclusive(&folio->page), page); @@ -131,6 +132,13 @@ struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags) else if (flags & FOLL_PIN) { struct folio *folio; + /* + * Don't take a pin on the zero page - it's not going anywhere + * and it is used in a *lot* of places. + */ + if (is_zero_page(page)) + return page_folio(page); + /* * Can't do FOLL_LONGTERM + FOLL_PIN gup fast path if not in a * right zone, so fail and let the caller fall back to the slow @@ -180,6 +188,8 @@ struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags) static void gup_put_folio(struct folio *folio, int refs, unsigned int flags) { if (flags & FOLL_PIN) { + if (is_zero_folio(folio)) + return; node_stat_mod_folio(folio, NR_FOLL_PIN_RELEASED, refs); if (folio_test_large(folio)) atomic_sub(refs, &folio->_pincount); @@ -224,6 +234,13 @@ int __must_check try_grab_page(struct page *page, unsigned int flags) if (flags & FOLL_GET) folio_ref_inc(folio); else if (flags & FOLL_PIN) { + /* + * Don't take a pin on the zero page - it's not going anywhere + * and it is used in a *lot* of places. + */ + if (is_zero_page(page)) + return 0; + /* * Similar to try_grab_folio(): be sure to *also* * increment the normal page refcount field at least once, @@ -3079,6 +3096,9 @@ EXPORT_SYMBOL_GPL(get_user_pages_fast); * * FOLL_PIN means that the pages must be released via unpin_user_page(). Please * see Documentation/core-api/pin_user_pages.rst for further details. + * + * Note that if the zero_page is amongst the returned pages, it will not have + * pins in it and unpin_user_page() will not remove pins from it. */ int pin_user_pages_fast(unsigned long start, int nr_pages, unsigned int gup_flags, struct page **pages) @@ -3161,6 +3181,9 @@ EXPORT_SYMBOL(pin_user_pages); * pin_user_pages_unlocked() is the FOLL_PIN variant of * get_user_pages_unlocked(). Behavior is the same, except that this one sets * FOLL_PIN and rejects FOLL_GET. + * + * Note that if the zero_page is amongst the returned pages, it will not have + * pins in it and unpin_user_page() will not remove pins from it. */ long pin_user_pages_unlocked(unsigned long start, unsigned long nr_pages, struct page **pages, unsigned int gup_flags)