We can reduce the number of atomic RMW operations when we are the single exclusive writer -- the common case. So instead of always requiring (1) 2 atomic RMW operations for adjusting the atomic seqcount (2) 1 atomic RMW operation for adjusting the total mapcount (3) 1 to 6 atomic RMW operation for adjusting the rmap values We can avoid (2) and (3) if we are the exclusive writer and limit it to the 2 atomic RMW operations from (1). Signed-off-by: David Hildenbrand <david@xxxxxxxxxx> --- include/linux/rmap.h | 81 +++++++++++++++++++++++++++++++++----------- mm/rmap_id.c | 52 ++++++++++++++++++++++++++++ 2 files changed, 114 insertions(+), 19 deletions(-) diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 0758dddc5528..538c23d3c0c9 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -291,23 +291,36 @@ static inline void __folio_undo_large_rmap(struct folio *folio) #endif } -static inline void __folio_write_large_rmap_begin(struct folio *folio) +static inline bool __folio_write_large_rmap_begin(struct folio *folio) { + bool exclusive; + VM_WARN_ON_FOLIO(!folio_test_large_rmappable(folio), folio); VM_WARN_ON_FOLIO(folio_test_hugetlb(folio), folio); - raw_write_atomic_seqcount_begin(&folio->_rmap_atomic_seqcount, - false); + + exclusive = raw_write_atomic_seqcount_begin(&folio->_rmap_atomic_seqcount, + true); + if (likely(exclusive)) { + prefetchw(&folio->_rmap_val0); + if (unlikely(folio_order(folio) > RMAP_SUBID_4_MAX_ORDER)) + prefetchw(&folio->_rmap_val4); + } + return exclusive; } -static inline void __folio_write_large_rmap_end(struct folio *folio) +static inline void __folio_write_large_rmap_end(struct folio *folio, + bool exclusive) { - raw_write_atomic_seqcount_end(&folio->_rmap_atomic_seqcount, false); + raw_write_atomic_seqcount_end(&folio->_rmap_atomic_seqcount, + exclusive); } void __folio_set_large_rmap_val(struct folio *folio, int count, struct mm_struct *mm); void __folio_add_large_rmap_val(struct folio *folio, int count, struct mm_struct *mm); +void __folio_add_large_rmap_val_exclusive(struct folio *folio, int count, + struct mm_struct *mm); bool __folio_has_large_matching_rmap_val(struct folio *folio, int count, struct mm_struct *mm); #else @@ -317,12 +330,14 @@ static inline void __folio_prep_large_rmap(struct folio *folio) static inline void __folio_undo_large_rmap(struct folio *folio) { } -static inline void __folio_write_large_rmap_begin(struct folio *folio) +static inline bool __folio_write_large_rmap_begin(struct folio *folio) { VM_WARN_ON_FOLIO(!folio_test_large_rmappable(folio), folio); VM_WARN_ON_FOLIO(folio_test_hugetlb(folio), folio); + return false; } -static inline void __folio_write_large_rmap_end(struct folio *folio) +static inline void __folio_write_large_rmap_end(struct folio *folio, + bool exclusive) { } static inline void __folio_set_large_rmap_val(struct folio *folio, int count, @@ -333,6 +348,10 @@ static inline void __folio_add_large_rmap_val(struct folio *folio, int count, struct mm_struct *mm) { } +static inline void __folio_add_large_rmap_val_exclusive(struct folio *folio, + int count, struct mm_struct *mm) +{ +} #endif /* CONFIG_RMAP_ID */ static inline void folio_set_large_mapcount(struct folio *folio, @@ -348,28 +367,52 @@ static inline void folio_set_large_mapcount(struct folio *folio, static inline void folio_inc_large_mapcount(struct folio *folio, struct vm_area_struct *vma) { - __folio_write_large_rmap_begin(folio); - atomic_inc(&folio->_total_mapcount); - __folio_add_large_rmap_val(folio, 1, vma->vm_mm); - __folio_write_large_rmap_end(folio); + bool exclusive; + + exclusive = __folio_write_large_rmap_begin(folio); + if (likely(exclusive)) { + atomic_set(&folio->_total_mapcount, + atomic_read(&folio->_total_mapcount) + 1); + __folio_add_large_rmap_val_exclusive(folio, 1, vma->vm_mm); + } else { + atomic_inc(&folio->_total_mapcount); + __folio_add_large_rmap_val(folio, 1, vma->vm_mm); + } + __folio_write_large_rmap_end(folio, exclusive); } static inline void folio_add_large_mapcount(struct folio *folio, int count, struct vm_area_struct *vma) { - __folio_write_large_rmap_begin(folio); - atomic_add(count, &folio->_total_mapcount); - __folio_add_large_rmap_val(folio, count, vma->vm_mm); - __folio_write_large_rmap_end(folio); + bool exclusive; + + exclusive = __folio_write_large_rmap_begin(folio); + if (likely(exclusive)) { + atomic_set(&folio->_total_mapcount, + atomic_read(&folio->_total_mapcount) + count); + __folio_add_large_rmap_val_exclusive(folio, count, vma->vm_mm); + } else { + atomic_add(count, &folio->_total_mapcount); + __folio_add_large_rmap_val(folio, count, vma->vm_mm); + } + __folio_write_large_rmap_end(folio, exclusive); } static inline void folio_dec_large_mapcount(struct folio *folio, struct vm_area_struct *vma) { - __folio_write_large_rmap_begin(folio); - atomic_dec(&folio->_total_mapcount); - __folio_add_large_rmap_val(folio, -1, vma->vm_mm); - __folio_write_large_rmap_end(folio); + bool exclusive; + + exclusive = __folio_write_large_rmap_begin(folio); + if (likely(exclusive)) { + atomic_set(&folio->_total_mapcount, + atomic_read(&folio->_total_mapcount) - 1); + __folio_add_large_rmap_val_exclusive(folio, -1, vma->vm_mm); + } else { + atomic_dec(&folio->_total_mapcount); + __folio_add_large_rmap_val(folio, -1, vma->vm_mm); + } + __folio_write_large_rmap_end(folio, exclusive); } /* RMAP flags, currently only relevant for some anon rmap operations. */ diff --git a/mm/rmap_id.c b/mm/rmap_id.c index 421d8d2b646c..5009c6e43965 100644 --- a/mm/rmap_id.c +++ b/mm/rmap_id.c @@ -379,6 +379,58 @@ void __folio_add_large_rmap_val(struct folio *folio, int count, } } +void __folio_add_large_rmap_val_exclusive(struct folio *folio, int count, + struct mm_struct *mm) +{ + const unsigned int order = folio_order(folio); + + /* + * Concurrent rmap value modifications are impossible. We don't care + * about store tearing because readers will realize the concurrent + * updates using the seqcount and simply retry. So adjust the bare + * atomic counter instead. + */ + switch (order) { +#if MAX_ORDER >= RMAP_SUBID_6_MIN_ORDER + case RMAP_SUBID_6_MIN_ORDER ... RMAP_SUBID_6_MAX_ORDER: + folio->_rmap_val0.counter += get_rmap_subid_6(mm, 0) * count; + folio->_rmap_val1.counter += get_rmap_subid_6(mm, 1) * count; + folio->_rmap_val2.counter += get_rmap_subid_6(mm, 2) * count; + folio->_rmap_val3.counter += get_rmap_subid_6(mm, 3) * count; + folio->_rmap_val4.counter += get_rmap_subid_6(mm, 4) * count; + folio->_rmap_val5.counter += get_rmap_subid_6(mm, 5) * count; + break; +#endif +#if MAX_ORDER >= RMAP_SUBID_5_MIN_ORDER + case RMAP_SUBID_5_MIN_ORDER ... RMAP_SUBID_5_MAX_ORDER: + folio->_rmap_val0.counter += get_rmap_subid_5(mm, 0) * count; + folio->_rmap_val1.counter += get_rmap_subid_5(mm, 1) * count; + folio->_rmap_val2.counter += get_rmap_subid_5(mm, 2) * count; + folio->_rmap_val3.counter += get_rmap_subid_5(mm, 3) * count; + folio->_rmap_val4.counter += get_rmap_subid_5(mm, 4) * count; + break; +#endif + case RMAP_SUBID_4_MIN_ORDER ... RMAP_SUBID_4_MAX_ORDER: + folio->_rmap_val0.counter += get_rmap_subid_4(mm, 0) * count; + folio->_rmap_val1.counter += get_rmap_subid_4(mm, 1) * count; + folio->_rmap_val2.counter += get_rmap_subid_4(mm, 2) * count; + folio->_rmap_val3.counter += get_rmap_subid_4(mm, 3) * count; + break; + case RMAP_SUBID_3_MIN_ORDER ... RMAP_SUBID_3_MAX_ORDER: + folio->_rmap_val0.counter += get_rmap_subid_3(mm, 0) * count; + folio->_rmap_val1.counter += get_rmap_subid_3(mm, 1) * count; + folio->_rmap_val2.counter += get_rmap_subid_3(mm, 2) * count; + break; + case RMAP_SUBID_2_MIN_ORDER ... RMAP_SUBID_2_MAX_ORDER: + folio->_rmap_val0.counter += get_rmap_subid_2(mm, 0) * count; + folio->_rmap_val1.counter += get_rmap_subid_2(mm, 1) * count; + break; + default: + folio->_rmap_val0.counter += get_rmap_subid_1(mm); + break; + } +} + bool __folio_has_large_matching_rmap_val(struct folio *folio, int count, struct mm_struct *mm) { -- 2.41.0