Hi peifeng, On Tue, Feb 20, 2024 at 2:43 PM 李培锋 <lipeifeng@xxxxxxxx> wrote: > > add more experts from Linux and Google. > > > 在 2024/2/19 22:17, lipeifeng@xxxxxxxx 写道: > > From: lipeifeng <lipeifeng@xxxxxxxx> > > > > The patch to support folio_referenced to control the bevavior > > of walk_rmap, which for some thread to hold the lock in rmap_walk > > instead of try_lock when using folio_referenced. please describe what problem the patch is trying to address, and why this modification is needed in commit message. btw, who is set rw_try_lock to 0, what is the benefit? > > > > Signed-off-by: lipeifeng <lipeifeng@xxxxxxxx> > > --- > > include/linux/rmap.h | 5 +++-- > > mm/rmap.c | 5 +++-- > > mm/vmscan.c | 16 ++++++++++++++-- > > 3 files changed, 20 insertions(+), 6 deletions(-) > > > > diff --git a/include/linux/rmap.h b/include/linux/rmap.h > > index b7944a8..846b261 100644 > > --- a/include/linux/rmap.h > > +++ b/include/linux/rmap.h > > @@ -623,7 +623,8 @@ static inline int folio_try_share_anon_rmap_pmd(struct folio *folio, > > * Called from mm/vmscan.c to handle paging out > > */ > > int folio_referenced(struct folio *, int is_locked, > > - struct mem_cgroup *memcg, unsigned long *vm_flags); > > + struct mem_cgroup *memcg, unsigned long *vm_flags, > > + unsigned int rw_try_lock); > > > > void try_to_migrate(struct folio *folio, enum ttu_flags flags); > > void try_to_unmap(struct folio *, enum ttu_flags flags); > > @@ -739,7 +740,7 @@ struct anon_vma *folio_lock_anon_vma_read(struct folio *folio, > > > > static inline int folio_referenced(struct folio *folio, int is_locked, > > struct mem_cgroup *memcg, > > - unsigned long *vm_flags) > > + unsigned long *vm_flags, unsigned int rw_try_lock) > > { > > *vm_flags = 0; > > return 0; > > diff --git a/mm/rmap.c b/mm/rmap.c > > index f5d43ed..15d1fba 100644 > > --- a/mm/rmap.c > > +++ b/mm/rmap.c > > @@ -952,6 +952,7 @@ static bool invalid_folio_referenced_vma(struct vm_area_struct *vma, void *arg) > > * @is_locked: Caller holds lock on the folio. > > * @memcg: target memory cgroup > > * @vm_flags: A combination of all the vma->vm_flags which referenced the folio. > > + * @rw_try_lock: if try_lock in rmap_walk > > * > > * Quick test_and_clear_referenced for all mappings of a folio, > > * > > @@ -959,7 +960,7 @@ static bool invalid_folio_referenced_vma(struct vm_area_struct *vma, void *arg) > > * the function bailed out due to rmap lock contention. > > */ > > int folio_referenced(struct folio *folio, int is_locked, > > - struct mem_cgroup *memcg, unsigned long *vm_flags) > > + struct mem_cgroup *memcg, unsigned long *vm_flags, unsigned int rw_try_lock) > > { > > int we_locked = 0; > > struct folio_referenced_arg pra = { > > @@ -970,7 +971,7 @@ int folio_referenced(struct folio *folio, int is_locked, > > .rmap_one = folio_referenced_one, > > .arg = (void *)&pra, > > .anon_lock = folio_lock_anon_vma_read, > > - .try_lock = true, > > + .try_lock = rw_try_lock ? true : false, > > .invalid_vma = invalid_folio_referenced_vma, > > }; > > > > diff --git a/mm/vmscan.c b/mm/vmscan.c > > index 4f9c854..0296d48 100644 > > --- a/mm/vmscan.c > > +++ b/mm/vmscan.c > > @@ -136,6 +136,9 @@ struct scan_control { > > /* Always discard instead of demoting to lower tier memory */ > > unsigned int no_demotion:1; > > > > + /* if try_lock in rmap_walk */ > > + unsigned int rw_try_lock:1; > > + > > /* Allocation order */ > > s8 order; > > > > @@ -827,7 +830,7 @@ static enum folio_references folio_check_references(struct folio *folio, > > unsigned long vm_flags; > > > > referenced_ptes = folio_referenced(folio, 1, sc->target_mem_cgroup, > > - &vm_flags); > > + &vm_flags, sc->rw_try_lock); > > referenced_folio = folio_test_clear_referenced(folio); > > > > /* > > @@ -1501,6 +1504,7 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone, > > struct scan_control sc = { > > .gfp_mask = GFP_KERNEL, > > .may_unmap = 1, > > + .rw_try_lock = 1, > > }; > > struct reclaim_stat stat; > > unsigned int nr_reclaimed; > > @@ -2038,7 +2042,7 @@ static void shrink_active_list(unsigned long nr_to_scan, > > > > /* Referenced or rmap lock contention: rotate */ > > if (folio_referenced(folio, 0, sc->target_mem_cgroup, > > - &vm_flags) != 0) { > > + &vm_flags, sc->rw_try_lock) != 0) { > > /* > > * Identify referenced, file-backed active folios and > > * give them one more trip around the active list. So > > @@ -2096,6 +2100,7 @@ static unsigned int reclaim_folio_list(struct list_head *folio_list, > > .may_unmap = 1, > > .may_swap = 1, > > .no_demotion = 1, > > + .rw_try_lock = 1, > > }; > > > > nr_reclaimed = shrink_folio_list(folio_list, pgdat, &sc, &dummy_stat, false); > > @@ -5442,6 +5447,7 @@ static ssize_t lru_gen_seq_write(struct file *file, const char __user *src, > > .may_swap = true, > > .reclaim_idx = MAX_NR_ZONES - 1, > > .gfp_mask = GFP_KERNEL, > > + .rw_try_lock = 1, > > }; > > > > buf = kvmalloc(len + 1, GFP_KERNEL); > > @@ -6414,6 +6420,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, > > .may_writepage = !laptop_mode, > > .may_unmap = 1, > > .may_swap = 1, > > + .rw_try_lock = 1, > > }; > > > > /* > > @@ -6459,6 +6466,7 @@ unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg, > > .may_unmap = 1, > > .reclaim_idx = MAX_NR_ZONES - 1, > > .may_swap = !noswap, > > + .rw_try_lock = 1, > > }; > > > > WARN_ON_ONCE(!current->reclaim_state); > > @@ -6503,6 +6511,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, > > .may_unmap = 1, > > .may_swap = !!(reclaim_options & MEMCG_RECLAIM_MAY_SWAP), > > .proactive = !!(reclaim_options & MEMCG_RECLAIM_PROACTIVE), > > + .rw_try_lock = 1, > > }; > > /* > > * Traverse the ZONELIST_FALLBACK zonelist of the current node to put > > @@ -6764,6 +6773,7 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int highest_zoneidx) > > .gfp_mask = GFP_KERNEL, > > .order = order, > > .may_unmap = 1, > > + .rw_try_lock = 1, > > }; > > > > set_task_reclaim_state(current, &sc.reclaim_state); > > @@ -7223,6 +7233,7 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim) > > .may_unmap = 1, > > .may_swap = 1, > > .hibernation_mode = 1, > > + .rw_try_lock = 1, > > }; > > struct zonelist *zonelist = node_zonelist(numa_node_id(), sc.gfp_mask); > > unsigned long nr_reclaimed; > > @@ -7381,6 +7392,7 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in > > .may_unmap = !!(node_reclaim_mode & RECLAIM_UNMAP), > > .may_swap = 1, > > .reclaim_idx = gfp_zone(gfp_mask), > > + .rw_try_lock = 1, > > }; > > unsigned long pflags; > > Thanks Barry