Let's make folio_mapped_shared() precise by using or rmap ID magic to identify if a single MM is responsible for all mappings. If there is a lot of concurrent (un)map activity, we could theoretically spin for quite a while. But we're only looking at the rmap values in case we didn't already identify the folio as "obviously shared". In most cases, there should only be one or a handful of page tables involved. For current THPs with ~512 .. 2048 subpages, we really shouldn't see a lot of concurrent updates that keep us spinning for a long time. Anyhow, if ever a problem this can be optimized later if there is real demand. Signed-off-by: David Hildenbrand <david@xxxxxxxxxx> --- include/linux/mm.h | 21 ++++++++++++--- include/linux/rmap.h | 2 ++ mm/rmap_id.c | 63 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 82 insertions(+), 4 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 765e688690f1..1081a8faa1a3 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2113,6 +2113,17 @@ static inline size_t folio_size(struct folio *folio) return PAGE_SIZE << folio_order(folio); } +#ifdef CONFIG_RMAP_ID +bool __folio_large_mapped_shared(struct folio *folio, struct mm_struct *mm); +#else +static inline bool __folio_large_mapped_shared(struct folio *folio, + struct mm_struct *mm) +{ + /* ... guess based on the mapcount of the first page of the folio. */ + return atomic_read(&folio->page._mapcount) > 0; +} +#endif + /** * folio_mapped_shared - Report if a folio is certainly mapped by * multiple entities in their page tables @@ -2141,8 +2152,11 @@ static inline size_t folio_size(struct folio *folio) * PMD-mapped PMD-sized THP), the result will be exactly correct. * * For all other (partially-mappable) folios, such as PTE-mapped THP, the - * return value is partially fuzzy: true is not fuzzy, because it means - * "certainly mapped shared", but false means "maybe mapped exclusively". + * return value is partially fuzzy without CONFIG_RMAP_ID: true is not fuzzy, + * because it means "certainly mapped shared", but false means + * "maybe mapped exclusively". + * + * With CONFIG_RMAP_ID, the result will be exactly correct. * * Note that this function only considers *current* page table mappings * tracked via rmap -- that properly adjusts the folio mapcount(s) -- and @@ -2177,8 +2191,7 @@ static inline bool folio_mapped_shared(struct folio *folio, */ if (total_mapcount > folio_nr_pages(folio)) return true; - /* ... guess based on the mapcount of the first page of the folio. */ - return atomic_read(&folio->page._mapcount) > 0; + return __folio_large_mapped_shared(folio, mm); } #ifndef HAVE_ARCH_MAKE_PAGE_ACCESSIBLE diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 19c9dc3216df..a73e146d82d1 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -253,6 +253,8 @@ void __folio_set_large_rmap_val(struct folio *folio, int count, struct mm_struct *mm); void __folio_add_large_rmap_val(struct folio *folio, int count, struct mm_struct *mm); +bool __folio_has_large_matching_rmap_val(struct folio *folio, int count, + struct mm_struct *mm); #else static inline void __folio_prep_large_rmap(struct folio *folio) { diff --git a/mm/rmap_id.c b/mm/rmap_id.c index e66b0f5aea2d..85a61c830f19 100644 --- a/mm/rmap_id.c +++ b/mm/rmap_id.c @@ -322,6 +322,69 @@ void __folio_add_large_rmap_val(struct folio *folio, int count, } } +bool __folio_has_large_matching_rmap_val(struct folio *folio, int count, + struct mm_struct *mm) +{ + const unsigned int order = folio_order(folio); + unsigned long diff = 0; + + switch (order) { +#if MAX_ORDER >= RMAP_SUBID_6_MIN_ORDER + case RMAP_SUBID_6_MIN_ORDER .. RMAP_SUBID_6_MAX_ORDER: + diff |= atomic_long_read(&folio->_rmap_val0) ^ (get_rmap_subid_6(mm, 0) * count); + diff |= atomic_long_read(&folio->_rmap_val1) ^ (get_rmap_subid_6(mm, 1) * count); + diff |= atomic_long_read(&folio->_rmap_val2) ^ (get_rmap_subid_6(mm, 2) * count); + diff |= atomic_long_read(&folio->_rmap_val3) ^ (get_rmap_subid_6(mm, 3) * count); + diff |= atomic_long_read(&folio->_rmap_val4) ^ (get_rmap_subid_6(mm, 4) * count); + diff |= atomic_long_read(&folio->_rmap_val5) ^ (get_rmap_subid_6(mm, 5) * count); + break; +#endif +#if MAX_ORDER >= RMAP_SUBID_5_MIN_ORDER + case RMAP_SUBID_5_MIN_ORDER .. RMAP_SUBID_5_MAX_ORDER: + diff |= atomic_long_read(&folio->_rmap_val0) ^ (get_rmap_subid_5(mm, 0) * count); + diff |= atomic_long_read(&folio->_rmap_val1) ^ (get_rmap_subid_5(mm, 1) * count); + diff |= atomic_long_read(&folio->_rmap_val2) ^ (get_rmap_subid_5(mm, 2) * count); + diff |= atomic_long_read(&folio->_rmap_val3) ^ (get_rmap_subid_5(mm, 3) * count); + diff |= atomic_long_read(&folio->_rmap_val4) ^ (get_rmap_subid_5(mm, 4) * count); + break; +#endif + default: + diff |= atomic_long_read(&folio->_rmap_val0) ^ (get_rmap_subid_4(mm, 0) * count); + diff |= atomic_long_read(&folio->_rmap_val1) ^ (get_rmap_subid_4(mm, 1) * count); + diff |= atomic_long_read(&folio->_rmap_val2) ^ (get_rmap_subid_4(mm, 2) * count); + diff |= atomic_long_read(&folio->_rmap_val3) ^ (get_rmap_subid_4(mm, 3) * count); + break; + } + return !diff; +} + +bool __folio_large_mapped_shared(struct folio *folio, struct mm_struct *mm) +{ + unsigned long start; + bool exclusive; + int mapcount; + + VM_WARN_ON_ONCE(!folio_test_large_rmappable(folio)); + VM_WARN_ON_ONCE(folio_test_hugetlb(folio)); + + /* + * Livelocking here is unlikely, as the caller already handles the + * "obviously shared" cases. If ever an issue and there is too much + * concurrent (un)mapping happening (using different page tables), we + * could stop earlier and just return "shared". + */ + do { + start = raw_read_atomic_seqcount_begin(&folio->_rmap_atomic_seqcount); + mapcount = folio_mapcount(folio); + if (unlikely(mapcount > folio_nr_pages(folio))) + return true; + exclusive = __folio_has_large_matching_rmap_val(folio, mapcount, mm); + } while (raw_read_atomic_seqcount_retry(&folio->_rmap_atomic_seqcount, + start)); + + return !exclusive; +} + int alloc_rmap_id(void) { int id; -- 2.41.0