From: Kairui Song <kasong@xxxxxxxxxxx> Swapcache can reuse this part for multi index support, no change of performance from page cache side except noise: Test in 8G memory cgroup and 16G brd ramdisk. echo 3 > /proc/sys/vm/drop_caches fio -name=cached --numjobs=16 --filename=/mnt/test.img \ --buffered=1 --ioengine=mmap --rw=randread --time_based \ --ramp_time=30s --runtime=5m --group_reporting Before: bw ( MiB/s): min= 493, max= 3947, per=100.00%, avg=2625.56, stdev=25.74, samples=8651 iops : min=126454, max=1010681, avg=672142.61, stdev=6590.48, samples=8651 After: bw ( MiB/s): min= 298, max= 3840, per=100.00%, avg=2614.34, stdev=23.77, samples=8689 iops : min=76464, max=983045, avg=669270.35, stdev=6084.31, samples=8689 Test result with THP (do a THP randread then switch to 4K page in hope it issues a lot of splitting): echo 3 > /proc/sys/vm/drop_caches fio -name=cached --numjobs=16 --filename=/mnt/test.img \ --buffered=1 --ioengine=mmap -thp=1 --readonly \ --rw=randread --time_based --ramp_time=30s --runtime=10m \ --group_reporting fio -name=cached --numjobs=16 --filename=/mnt/test.img \ --buffered=1 --ioengine=mmap \ --rw=randread --time_based --runtime=5s --group_reporting Before: bw ( KiB/s): min= 4611, max=15370, per=100.00%, avg=8928.74, stdev=105.17, samples=19146 iops : min= 1151, max= 3842, avg=2231.27, stdev=26.29, samples=19146 READ: bw=4635B/s (4635B/s), 4635B/s-4635B/s (4635B/s-4635B/s), io=64.0KiB (65.5kB), run=14137-14137msec After: bw ( KiB/s): min= 4691, max=15666, per=100.00%, avg=8890.30, stdev=104.53, samples=19056 iops : min= 1167, max= 3913, avg=2218.68, stdev=26.15, samples=19056 READ: bw=4590B/s (4590B/s), 4590B/s-4590B/s (4590B/s-4590B/s), io=64.0KiB (65.5kB), run=14275-14275msec Signed-off-by: Kairui Song <kasong@xxxxxxxxxxx> --- mm/filemap.c | 124 +++++++++++++++++++++++++++------------------------ 1 file changed, 65 insertions(+), 59 deletions(-) diff --git a/mm/filemap.c b/mm/filemap.c index 90b86f22a9df..0ccdc9e92764 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -848,38 +848,23 @@ void replace_page_cache_folio(struct folio *old, struct folio *new) } EXPORT_SYMBOL_GPL(replace_page_cache_folio); -noinline int __filemap_add_folio(struct address_space *mapping, - struct folio *folio, pgoff_t index, gfp_t gfp, void **shadowp) +static int __filemap_lock_store(struct xa_state *xas, struct folio *folio, + pgoff_t index, gfp_t gfp, void **shadowp) { - XA_STATE(xas, &mapping->i_pages, index); - void *alloced_shadow = NULL; - int alloced_order = 0; - bool huge; - long nr; - - VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); - VM_BUG_ON_FOLIO(folio_test_swapbacked(folio), folio); - mapping_set_update(&xas, mapping); - - VM_BUG_ON_FOLIO(index & (folio_nr_pages(folio) - 1), folio); - xas_set_order(&xas, index, folio_order(folio)); - huge = folio_test_hugetlb(folio); - nr = folio_nr_pages(folio); - + void *entry, *old, *alloced_shadow = NULL; + int order, split_order, alloced_order = 0; gfp &= GFP_RECLAIM_MASK; - folio_ref_add(folio, nr); - folio->mapping = mapping; - folio->index = xas.xa_index; for (;;) { - int order = -1, split_order = 0; - void *entry, *old = NULL; + order = -1; + split_order = 0; + old = NULL; - xas_lock_irq(&xas); - xas_for_each_conflict(&xas, entry) { + xas_lock_irq(xas); + xas_for_each_conflict(xas, entry) { old = entry; if (!xa_is_value(entry)) { - xas_set_err(&xas, -EEXIST); + xas_set_err(xas, -EEXIST); goto unlock; } /* @@ -887,72 +872,93 @@ noinline int __filemap_add_folio(struct address_space *mapping, * it will be the first and only entry iterated. */ if (order == -1) - order = xas_get_order(&xas); + order = xas_get_order(xas); } /* entry may have changed before we re-acquire the lock */ if (alloced_order && (old != alloced_shadow || order != alloced_order)) { - xas_destroy(&xas); + xas_destroy(xas); alloced_order = 0; } if (old) { if (order > 0 && order > folio_order(folio)) { - /* How to handle large swap entries? */ - BUG_ON(shmem_mapping(mapping)); if (!alloced_order) { split_order = order; goto unlock; } - xas_split(&xas, old, order); - xas_reset(&xas); + xas_split(xas, old, order); + xas_reset(xas); } if (shadowp) *shadowp = old; } - xas_store(&xas, folio); - if (xas_error(&xas)) - goto unlock; - - mapping->nrpages += nr; - - /* hugetlb pages do not participate in page cache accounting */ - if (!huge) { - __lruvec_stat_mod_folio(folio, NR_FILE_PAGES, nr); - if (folio_test_pmd_mappable(folio)) - __lruvec_stat_mod_folio(folio, - NR_FILE_THPS, nr); - } - + xas_store(xas, folio); + if (!xas_error(xas)) + return 0; unlock: - xas_unlock_irq(&xas); + xas_unlock_irq(xas); /* split needed, alloc here and retry. */ if (split_order) { - xas_split_alloc(&xas, old, split_order, gfp); - if (xas_error(&xas)) + xas_split_alloc(xas, old, split_order, gfp); + if (xas_error(xas)) goto error; alloced_shadow = old; alloced_order = split_order; - xas_reset(&xas); + xas_reset(xas); continue; } - if (!xas_nomem(&xas, gfp)) + if (!xas_nomem(xas, gfp)) break; } - if (xas_error(&xas)) - goto error; - - trace_mm_filemap_add_to_page_cache(folio); - return 0; error: - folio->mapping = NULL; - /* Leave page->index set: truncation relies upon it */ - folio_put_refs(folio, nr); - return xas_error(&xas); + return xas_error(xas); +} + +noinline int __filemap_add_folio(struct address_space *mapping, + struct folio *folio, pgoff_t index, gfp_t gfp, void **shadowp) +{ + XA_STATE(xas, &mapping->i_pages, index); + bool huge; + long nr; + int ret; + + VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); + VM_BUG_ON_FOLIO(folio_test_swapbacked(folio), folio); + mapping_set_update(&xas, mapping); + + VM_BUG_ON_FOLIO(index & (folio_nr_pages(folio) - 1), folio); + xas_set_order(&xas, index, folio_order(folio)); + huge = folio_test_hugetlb(folio); + nr = folio_nr_pages(folio); + + folio_ref_add(folio, nr); + folio->mapping = mapping; + folio->index = xas.xa_index; + + ret = __filemap_lock_store(&xas, folio, index, gfp, shadowp); + if (!ret) { + mapping->nrpages += nr; + /* hugetlb pages do not participate in page cache accounting */ + if (!huge) { + __lruvec_stat_mod_folio(folio, NR_FILE_PAGES, nr); + if (folio_test_pmd_mappable(folio)) + __lruvec_stat_mod_folio(folio, + NR_FILE_THPS, nr); + } + xas_unlock_irq(&xas); + trace_mm_filemap_add_to_page_cache(folio); + } else { + folio->mapping = NULL; + /* Leave page->index set: truncation relies upon it */ + folio_put_refs(folio, nr); + } + + return ret; } ALLOW_ERROR_INJECTION(__filemap_add_folio, ERRNO); -- 2.43.0