Thanks a lot for your comments. On 2024/06/11 5:27, Yosry Ahmed wrote: >> unsigned long nr_to_walk = 1; >> >> + if (!list_lru_count_one(&zswap_list_lru, nid, memcg)) >> + continue; >> + ++stored; >> shrunk += list_lru_walk_one(&zswap_list_lru, nid, memcg, >> &shrink_memcg_cb, NULL, &nr_to_walk); >> } >> + >> + if (!stored) >> + return -ENOENT; >> + > > Can't we just check nr_to_walk here and return -ENOENT if it remains as 1? > > Something like: > > if (nr_to_walk) > return -ENOENT; > if (!shrunk) > return -EAGAIN; > return 0; > ah, the counting step can be removed. I will change it in v2. >> return shrunk ? 0 : -EAGAIN; >> } >> >> @@ -1418,12 +1425,18 @@ static void shrink_worker(struct work_struct *w) >> { >> struct mem_cgroup *memcg = NULL; >> struct mem_cgroup *next_memcg; >> - int ret, failures = 0; >> + int ret, failures = 0, progress; >> unsigned long thr; >> >> /* Reclaim down to the accept threshold */ >> thr = zswap_accept_thr_pages(); >> >> + /* >> + * We might start from the last memcg. >> + * That is not a failure. >> + */ >> + progress = 1; >> + >> /* global reclaim will select cgroup in a round-robin fashion. >> * >> * We save iteration cursor memcg into zswap_next_shrink, >> @@ -1461,9 +1474,12 @@ static void shrink_worker(struct work_struct *w) >> */ >> if (!memcg) { >> spin_unlock(&zswap_shrink_lock); >> - if (++failures == MAX_RECLAIM_RETRIES) >> + >> + /* tree walk completed but no progress */ >> + if (!progress && ++failures == MAX_RECLAIM_RETRIES) >> break; > > It seems like we may keep iterating the entire hierarchy a lot of > times as long as we are making any type of progress. This doesn't seem > right. > Since shrink_worker evicts only one page per tree walk when there is only one memcg using zswap, I believe this is the intended behavior. Even if we choose to break the loop more aggressively, it would only be postponing the problem because pool_limit_hit will trigger the worker again. I agree the existing approach is inefficient. It might be better to change the 1 page in a round-robin strategy. >> >> + progress = 0; >> goto resched; >> } >> >> @@ -1493,10 +1509,15 @@ static void shrink_worker(struct work_struct *w) >> /* drop the extra reference */ >> mem_cgroup_put(memcg); >> >> - if (ret == -EINVAL) >> - break; >> + /* not a writeback candidate memcg */ >> + if (ret == -EINVAL || ret == -ENOENT) >> + continue; >> + > > We should probably return -ENOENT for memcg with writeback disabled as well. > >> if (ret && ++failures == MAX_RECLAIM_RETRIES) >> break; >> + >> + ++progress; >> + /* reschedule as we performed some IO */ >> resched: >> cond_resched(); >> } while (zswap_total_pages() > thr); >> -- >> 2.43.0 >>