Hi, Jan, All, Please take a look at this patch again. Although this may not be a graceful way. I can't think any other way to fix the problem except using workingset. Thanks, On 2024/2/1 18:08, Liu Shixin wrote: > When the pagefault is not for write and the refault distance is close, > the page will be activated directly. If there are too many such pages in > a file, that means the pages may be reclaimed immediately. > In such situation, there is no positive effect to read-ahead since it will > only waste IO. So collect the number of such pages and when the number is > too large, stop bothering with read-ahead for a while until it decreased > automatically. > > Define 'too large' as 10000 experientially, which can solves the problem > and does not affect by the occasional active refault. > > Signed-off-by: Liu Shixin <liushixin2@xxxxxxxxxx> > --- > include/linux/fs.h | 2 ++ > include/linux/pagemap.h | 1 + > mm/filemap.c | 16 ++++++++++++++++ > mm/readahead.c | 4 ++++ > 4 files changed, 23 insertions(+) > > diff --git a/include/linux/fs.h b/include/linux/fs.h > index ed5966a704951..f2a1825442f5a 100644 > --- a/include/linux/fs.h > +++ b/include/linux/fs.h > @@ -960,6 +960,7 @@ struct fown_struct { > * the first of these pages is accessed. > * @ra_pages: Maximum size of a readahead request, copied from the bdi. > * @mmap_miss: How many mmap accesses missed in the page cache. > + * @active_refault: Number of active page refault. > * @prev_pos: The last byte in the most recent read request. > * > * When this structure is passed to ->readahead(), the "most recent" > @@ -971,6 +972,7 @@ struct file_ra_state { > unsigned int async_size; > unsigned int ra_pages; > unsigned int mmap_miss; > + unsigned int active_refault; > loff_t prev_pos; > }; > > diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h > index 2df35e65557d2..da9eaf985dec4 100644 > --- a/include/linux/pagemap.h > +++ b/include/linux/pagemap.h > @@ -1256,6 +1256,7 @@ struct readahead_control { > pgoff_t _index; > unsigned int _nr_pages; > unsigned int _batch_count; > + unsigned int _active_refault; > bool _workingset; > unsigned long _pflags; > }; > diff --git a/mm/filemap.c b/mm/filemap.c > index 750e779c23db7..4de80592ab270 100644 > --- a/mm/filemap.c > +++ b/mm/filemap.c > @@ -3037,6 +3037,7 @@ loff_t mapping_seek_hole_data(struct address_space *mapping, loff_t start, > > #ifdef CONFIG_MMU > #define MMAP_LOTSAMISS (100) > +#define ACTIVE_REFAULT_LIMIT (10000) > /* > * lock_folio_maybe_drop_mmap - lock the page, possibly dropping the mmap_lock > * @vmf - the vm_fault for this fault. > @@ -3142,6 +3143,18 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf) > if (mmap_miss > MMAP_LOTSAMISS) > return fpin; > > + ractl._active_refault = READ_ONCE(ra->active_refault); > + if (ractl._active_refault) > + WRITE_ONCE(ra->active_refault, --ractl._active_refault); > + > + /* > + * If there are a lot of refault of active pages in this file, > + * that means the memory reclaim is ongoing. Stop bothering with > + * read-ahead since it will only waste IO. > + */ > + if (ractl._active_refault >= ACTIVE_REFAULT_LIMIT) > + return fpin; > + > /* > * mmap read-around > */ > @@ -3151,6 +3164,9 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf) > ra->async_size = ra->ra_pages / 4; > ractl._index = ra->start; > page_cache_ra_order(&ractl, ra, 0); > + > + WRITE_ONCE(ra->active_refault, ractl._active_refault); > + > return fpin; > } > > diff --git a/mm/readahead.c b/mm/readahead.c > index cc4abb67eb223..d79bb70a232c4 100644 > --- a/mm/readahead.c > +++ b/mm/readahead.c > @@ -263,6 +263,10 @@ void page_cache_ra_unbounded(struct readahead_control *ractl, > folio_set_readahead(folio); > ractl->_workingset |= folio_test_workingset(folio); > ractl->_nr_pages++; > + if (unlikely(folio_test_workingset(folio))) > + ractl->_active_refault++; > + else if (unlikely(ractl->_active_refault)) > + ractl->_active_refault--; > } > > /*