From: Kairui Song <kasong@xxxxxxxxxxx> There isn't any feature change, just move the refault distance checking logic into a standalone helper so it can be reused later. Signed-off-by: Kairui Song <kasong@xxxxxxxxxxx> --- mm/workingset.c | 137 ++++++++++++++++++++++++++++-------------------- 1 file changed, 79 insertions(+), 58 deletions(-) diff --git a/mm/workingset.c b/mm/workingset.c index 8613945fc66e..b0704cbfc667 100644 --- a/mm/workingset.c +++ b/mm/workingset.c @@ -170,9 +170,10 @@ */ #define WORKINGSET_SHIFT 1 -#define EVICTION_SHIFT ((BITS_PER_LONG - BITS_PER_XA_VALUE) + \ +#define EVICTION_SHIFT ((BITS_PER_LONG - BITS_PER_XA_VALUE) + \ WORKINGSET_SHIFT + NODES_SHIFT + \ MEM_CGROUP_ID_SHIFT) +#define EVICTION_BITS (BITS_PER_LONG - (EVICTION_SHIFT)) #define EVICTION_MASK (~0UL >> EVICTION_SHIFT) /* @@ -216,6 +217,79 @@ static void unpack_shadow(void *shadow, int *memcgidp, pg_data_t **pgdat, *workingsetp = workingset; } +/* + * Get the refault distance timestamp reading at eviction time. + */ +static inline unsigned long lru_eviction(struct lruvec *lruvec, + int bits, int bucket_order) +{ + unsigned long eviction = atomic_long_read(&lruvec->nonresident_age); + + eviction >>= bucket_order; + eviction &= ~0UL >> (BITS_PER_LONG - bits); + + return eviction; +} + +/* + * Calculate and test refault distance. + */ +static inline bool lru_test_refault(struct mem_cgroup *memcg, + struct lruvec *lruvec, + unsigned long eviction, bool file, + int bits, int bucket_order) +{ + unsigned long refault, distance; + unsigned long active, inactive_file, inactive_anon; + + eviction <<= bucket_order; + refault = atomic_long_read(&lruvec->nonresident_age); + + /* + * The unsigned subtraction here gives an accurate distance + * across nonresident_age overflows in most cases. There is a + * special case: usually, shadow entries have a short lifetime + * and are either refaulted or reclaimed along with the inode + * before they get too old. But it is not impossible for the + * nonresident_age to lap a shadow entry in the field, which + * can then result in a false small refault distance, leading + * to a false activation should this old entry actually + * refault again. However, earlier kernels used to deactivate + * unconditionally with *every* reclaim invocation for the + * longest time, so the occasional inappropriate activation + * leading to pressure on the active list is not a problem. + */ + distance = (refault - eviction) & (~0UL >> (BITS_PER_LONG - bits)); + + /* + * Compare the distance to the existing workingset size. We + * don't activate pages that couldn't stay resident even if + * all the memory was available to the workingset. Whether + * workingset competition needs to consider anon or not depends + * on having free swap space. + */ + active = lruvec_page_state(lruvec, NR_ACTIVE_FILE); + inactive_file = lruvec_page_state(lruvec, NR_INACTIVE_FILE); + + if (mem_cgroup_get_nr_swap_pages(memcg) > 0) { + active += lruvec_page_state(lruvec, NR_ACTIVE_ANON); + inactive_anon = lruvec_page_state(lruvec, NR_INACTIVE_ANON); + } else { + inactive_anon = 0; + } + + /* + * When there are already enough active pages, be less aggressive + * on reactivating pages, challenge an large set of established + * active pages with one time refaulted page may not be a good idea. + */ + if (active >= inactive_anon + inactive_file) + return distance < inactive_anon + inactive_file; + else + return distance < active + \ + (file ? inactive_anon : inactive_file); +} + #ifdef CONFIG_LRU_GEN static void *lru_gen_eviction(struct folio *folio) @@ -386,11 +460,10 @@ void *workingset_eviction(struct folio *folio, struct mem_cgroup *target_memcg) lruvec = mem_cgroup_lruvec(target_memcg, pgdat); /* XXX: target_memcg can be NULL, go through lruvec */ memcgid = mem_cgroup_id(lruvec_memcg(lruvec)); - eviction = atomic_long_read(&lruvec->nonresident_age); - eviction >>= bucket_order; + eviction = lru_eviction(lruvec, EVICTION_BITS, bucket_order); workingset_age_nonresident(lruvec, folio_nr_pages(folio)); return pack_shadow(memcgid, pgdat, eviction, - folio_test_workingset(folio)); + folio_test_workingset(folio)); } /** @@ -408,11 +481,6 @@ bool workingset_test_recent(void *shadow, bool file, bool *workingset) { struct mem_cgroup *eviction_memcg; struct lruvec *eviction_lruvec; - unsigned long refault_distance; - unsigned long inactive_file; - unsigned long inactive_anon; - unsigned long refault; - unsigned long active; int memcgid; struct pglist_data *pgdat; unsigned long eviction; @@ -421,7 +489,6 @@ bool workingset_test_recent(void *shadow, bool file, bool *workingset) return lru_gen_test_recent(shadow, file, &eviction_lruvec, &eviction, workingset); unpack_shadow(shadow, &memcgid, &pgdat, &eviction, workingset); - eviction <<= bucket_order; /* * Look up the memcg associated with the stored ID. It might @@ -442,56 +509,10 @@ bool workingset_test_recent(void *shadow, bool file, bool *workingset) eviction_memcg = mem_cgroup_from_id(memcgid); if (!mem_cgroup_disabled() && !eviction_memcg) return false; - eviction_lruvec = mem_cgroup_lruvec(eviction_memcg, pgdat); - refault = atomic_long_read(&eviction_lruvec->nonresident_age); - /* - * Calculate the refault distance - * - * The unsigned subtraction here gives an accurate distance - * across nonresident_age overflows in most cases. There is a - * special case: usually, shadow entries have a short lifetime - * and are either refaulted or reclaimed along with the inode - * before they get too old. But it is not impossible for the - * nonresident_age to lap a shadow entry in the field, which - * can then result in a false small refault distance, leading - * to a false activation should this old entry actually - * refault again. However, earlier kernels used to deactivate - * unconditionally with *every* reclaim invocation for the - * longest time, so the occasional inappropriate activation - * leading to pressure on the active list is not a problem. - */ - refault_distance = (refault - eviction) & EVICTION_MASK; - - /* - * Compare the distance to the existing workingset size. We - * don't activate pages that couldn't stay resident even if - * all the memory was available to the workingset. Whether - * workingset competition needs to consider anon or not depends - * on having free swap space. - */ - active = lruvec_page_state(eviction_lruvec, NR_ACTIVE_FILE); - inactive_file = lruvec_page_state(eviction_lruvec, NR_INACTIVE_FILE); - - if (mem_cgroup_get_nr_swap_pages(eviction_memcg) > 0) { - active += lruvec_page_state(eviction_lruvec, - NR_ACTIVE_ANON); - inactive_anon = lruvec_page_state(eviction_lruvec, - NR_INACTIVE_ANON); - } else { - inactive_anon = 0; - } - - /* - * When there are already enough active pages, be less aggressive - * on reactivating pages, challenge an large set of established - * active pages with one time refaulted page may not be a good idea. - */ - if (active >= inactive_anon + inactive_file) - return refault_distance < inactive_anon + inactive_file; - else - return refault_distance < active + (file ? inactive_anon : inactive_file); + return lru_test_refault(eviction_memcg, eviction_lruvec, eviction, + file, EVICTION_BITS, bucket_order); } /** -- 2.41.0