We can use folio->_pincount directly, since all users are guarded by tests of compound/large. Signed-off-by: Matthew Wilcox (Oracle) <willy@xxxxxxxxxxxxx> --- Documentation/core-api/pin_user_pages.rst | 29 +++++++++++------------ include/linux/mm.h | 14 ++--------- include/linux/mm_types.h | 5 ---- mm/debug.c | 4 ++-- mm/gup.c | 8 +++---- mm/huge_memory.c | 4 ++-- mm/hugetlb.c | 4 ++-- mm/page_alloc.c | 9 ++++--- 8 files changed, 32 insertions(+), 45 deletions(-) diff --git a/Documentation/core-api/pin_user_pages.rst b/Documentation/core-api/pin_user_pages.rst index facafbdecb95..9fb0b1080d3b 100644 --- a/Documentation/core-api/pin_user_pages.rst +++ b/Documentation/core-api/pin_user_pages.rst @@ -55,18 +55,17 @@ flags the caller provides. The caller is required to pass in a non-null struct pages* array, and the function then pins pages by incrementing each by a special value: GUP_PIN_COUNTING_BIAS. -For compound pages, the GUP_PIN_COUNTING_BIAS scheme is not used. Instead, -an exact form of pin counting is achieved, by using the 2nd struct page -in the compound page. A new struct page field, compound_pincount, has -been added in order to support this. - -This approach for compound pages avoids the counting upper limit problems that -are discussed below. Those limitations would have been aggravated severely by -huge pages, because each tail page adds a refcount to the head page. And in -fact, testing revealed that, without a separate compound_pincount field, -page overflows were seen in some huge page stress tests. - -This also means that huge pages and compound pages do not suffer +For large folios, the GUP_PIN_COUNTING_BIAS scheme is not used. Instead, +the extra space available in the struct folio is used to store the +pincount directly. + +This approach for large folios avoids the counting upper limit problems +that are discussed below. Those limitations would have been aggravated +severely by huge pages, because each tail page adds a refcount to the +head page. And in fact, testing revealed that, without a separate pincount +field, refcount overflows were seen in some huge page stress tests. + +This also means that huge pages and large folios do not suffer from the false positives problem that is mentioned below.:: Function @@ -264,9 +263,9 @@ place.) Other diagnostics ================= -dump_page() has been enhanced slightly, to handle these new counting -fields, and to better report on compound pages in general. Specifically, -for compound pages, the exact (compound_pincount) pincount is reported. +dump_page() has been enhanced slightly to handle these new counting +fields, and to better report on large folios in general. Specifically, +for large folios, the exact pincount is reported. References ========== diff --git a/include/linux/mm.h b/include/linux/mm.h index 49e40766adb6..5683a25ce08e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1011,11 +1011,6 @@ static inline void folio_set_compound_dtor(struct folio *folio, void destroy_large_folio(struct folio *folio); -static inline int head_compound_pincount(struct page *head) -{ - return atomic_read(compound_pincount_ptr(head)); -} - static inline void set_compound_order(struct page *page, unsigned int order) { page[1].compound_order = order; @@ -1641,11 +1636,6 @@ static inline struct folio *pfn_folio(unsigned long pfn) return page_folio(pfn_to_page(pfn)); } -static inline atomic_t *folio_pincount_ptr(struct folio *folio) -{ - return &folio_page(folio, 1)->compound_pincount; -} - /** * folio_maybe_dma_pinned - Report if a folio may be pinned for DMA. * @folio: The folio. @@ -1663,7 +1653,7 @@ static inline atomic_t *folio_pincount_ptr(struct folio *folio) * expected to be able to deal gracefully with a false positive. * * For large folios, the result will be exactly correct. That's because - * we have more tracking data available: the compound_pincount is used + * we have more tracking data available: the _pincount field is used * instead of the GUP_PIN_COUNTING_BIAS scheme. * * For more information, please see Documentation/core-api/pin_user_pages.rst. @@ -1674,7 +1664,7 @@ static inline atomic_t *folio_pincount_ptr(struct folio *folio) static inline bool folio_maybe_dma_pinned(struct folio *folio) { if (folio_test_large(folio)) - return atomic_read(folio_pincount_ptr(folio)) > 0; + return atomic_read(&folio->_pincount) > 0; /* * folio_ref_count() is signed. If that refcount overflows, then diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 2b0a0595fc9e..c225d81eae83 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -443,11 +443,6 @@ static inline atomic_t *subpages_mapcount_ptr(struct page *page) return &page[1].subpages_mapcount; } -static inline atomic_t *compound_pincount_ptr(struct page *page) -{ - return &page[1].compound_pincount; -} - /* * Used for sizing the vmemmap region on some architectures */ diff --git a/mm/debug.c b/mm/debug.c index 7f8e5f744e42..893c9dbf76ca 100644 --- a/mm/debug.c +++ b/mm/debug.c @@ -94,11 +94,11 @@ static void __dump_page(struct page *page) page, page_ref_count(head), mapcount, mapping, page_to_pgoff(page), page_to_pfn(page)); if (compound) { - pr_warn("head:%p order:%u compound_mapcount:%d subpages_mapcount:%d compound_pincount:%d\n", + pr_warn("head:%p order:%u compound_mapcount:%d subpages_mapcount:%d pincount:%d\n", head, compound_order(head), head_compound_mapcount(head), head_subpages_mapcount(head), - head_compound_pincount(head)); + atomic_read(&folio->_pincount)); } #ifdef CONFIG_MEMCG diff --git a/mm/gup.c b/mm/gup.c index f45a3a5be53a..38ba1697dd61 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -111,7 +111,7 @@ static inline struct folio *try_get_folio(struct page *page, int refs) * FOLL_GET: folio's refcount will be incremented by @refs. * * FOLL_PIN on large folios: folio's refcount will be incremented by - * @refs, and its compound_pincount will be incremented by @refs. + * @refs, and its pincount will be incremented by @refs. * * FOLL_PIN on single-page folios: folio's refcount will be incremented by * @refs * GUP_PIN_COUNTING_BIAS. @@ -157,7 +157,7 @@ struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags) * try_get_folio() is left intact. */ if (folio_test_large(folio)) - atomic_add(refs, folio_pincount_ptr(folio)); + atomic_add(refs, &folio->_pincount); else folio_ref_add(folio, refs * (GUP_PIN_COUNTING_BIAS - 1)); @@ -182,7 +182,7 @@ static void gup_put_folio(struct folio *folio, int refs, unsigned int flags) if (flags & FOLL_PIN) { node_stat_mod_folio(folio, NR_FOLL_PIN_RELEASED, refs); if (folio_test_large(folio)) - atomic_sub(refs, folio_pincount_ptr(folio)); + atomic_sub(refs, &folio->_pincount); else refs *= GUP_PIN_COUNTING_BIAS; } @@ -232,7 +232,7 @@ int __must_check try_grab_page(struct page *page, unsigned int flags) */ if (folio_test_large(folio)) { folio_ref_add(folio, 1); - atomic_add(1, folio_pincount_ptr(folio)); + atomic_add(1, &folio->_pincount); } else { folio_ref_add(folio, GUP_PIN_COUNTING_BIAS); } diff --git a/mm/huge_memory.c b/mm/huge_memory.c index c13b1f67d14e..9570f03cdee4 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2477,9 +2477,9 @@ static void __split_huge_page_tail(struct page *head, int tail, * of swap cache pages that store the swp_entry_t in tail pages. * Fix up and warn once if private is unexpectedly set. * - * What of 32-bit systems, on which head[1].compound_pincount overlays + * What of 32-bit systems, on which folio->_pincount overlays * head[1].private? No problem: THP_SWAP is not enabled on 32-bit, and - * compound_pincount must be 0 for folio_ref_freeze() to have succeeded. + * pincount must be 0 for folio_ref_freeze() to have succeeded. */ if (!folio_test_swapcache(page_folio(head))) { VM_WARN_ON_ONCE_PAGE(page_tail->private != 0, page_tail); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 273a6522aa4c..15b2707c1600 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1476,7 +1476,7 @@ static void __destroy_compound_gigantic_folio(struct folio *folio, atomic_set(folio_mapcount_ptr(folio), 0); atomic_set(folio_subpages_mapcount_ptr(folio), 0); - atomic_set(folio_pincount_ptr(folio), 0); + atomic_set(&folio->_pincount, 0); for (i = 1; i < nr_pages; i++) { p = folio_page(folio, i); @@ -1998,7 +1998,7 @@ static bool __prep_compound_gigantic_folio(struct folio *folio, } atomic_set(folio_mapcount_ptr(folio), -1); atomic_set(folio_subpages_mapcount_ptr(folio), 0); - atomic_set(folio_pincount_ptr(folio), 0); + atomic_set(&folio->_pincount, 0); return true; out_error: diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 4d9afa1048ea..d1e5ec875fd0 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -775,11 +775,13 @@ void free_compound_page(struct page *page) static void prep_compound_head(struct page *page, unsigned int order) { + struct folio *folio = (struct folio *)page; + set_compound_page_dtor(page, COMPOUND_PAGE_DTOR); set_compound_order(page, order); atomic_set(compound_mapcount_ptr(page), -1); atomic_set(subpages_mapcount_ptr(page), 0); - atomic_set(compound_pincount_ptr(page), 0); + atomic_set(&folio->_pincount, 0); } static void prep_compound_tail(struct page *head, int tail_idx) @@ -1291,6 +1293,7 @@ static inline bool free_page_is_bad(struct page *page) static int free_tail_pages_check(struct page *head_page, struct page *page) { + struct folio *folio = (struct folio *)head_page; int ret = 1; /* @@ -1314,8 +1317,8 @@ static int free_tail_pages_check(struct page *head_page, struct page *page) bad_page(page, "nonzero subpages_mapcount"); goto out; } - if (unlikely(head_compound_pincount(head_page))) { - bad_page(page, "nonzero compound_pincount"); + if (unlikely(atomic_read(&folio->_pincount))) { + bad_page(page, "nonzero pincount"); goto out; } break; -- 2.35.1