zswap does not have a suitable method to select objects that have not been accessed for a long time, and just shrink the pool when the limit is hit. There is a high probability of wasting memory in zswap if the limit is too high. This patch add a new interface writeback_time_threshold to shrink zswap pool proactively based on the time threshold in second, e.g.:: echo 600 > /sys/module/zswap/parameters/writeback_time_threshold If zswap_entrys have not been accessed for more than 600 seconds, they will be swapout to swap. if set to 0, all of them will be swapout. Signed-off-by: Zhongkun He <hezhongkun.hzk@xxxxxxxxxxxxx> --- Documentation/admin-guide/mm/zswap.rst | 9 +++ mm/zswap.c | 76 ++++++++++++++++++++++++++ 2 files changed, 85 insertions(+) diff --git a/Documentation/admin-guide/mm/zswap.rst b/Documentation/admin-guide/mm/zswap.rst index 45b98390e938..9ffaed26c3c0 100644 --- a/Documentation/admin-guide/mm/zswap.rst +++ b/Documentation/admin-guide/mm/zswap.rst @@ -153,6 +153,15 @@ attribute, e. g.:: Setting this parameter to 100 will disable the hysteresis. +When there is a lot of cold memory according to the store time in the zswap, +it can be swapout and save memory in userspace proactively. User can write +writeback time threshold in second to enable it, e.g.:: + + echo 600 > /sys/module/zswap/parameters/writeback_time_threshold + +If zswap_entrys have not been accessed for more than 600 seconds, they will be +swapout. if set to 0, all of them will be swapout. + A debugfs interface is provided for various statistic about pool size, number of pages stored, same-value filled pages and various counters for the reasons pages are rejected. diff --git a/mm/zswap.c b/mm/zswap.c index 083c693602b8..c3a19b56a29b 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -141,6 +141,16 @@ static bool zswap_exclusive_loads_enabled = IS_ENABLED( CONFIG_ZSWAP_EXCLUSIVE_LOADS_DEFAULT_ON); module_param_named(exclusive_loads, zswap_exclusive_loads_enabled, bool, 0644); +/* zswap writeback time threshold in second */ +static unsigned int zswap_writeback_time_thr; +static int zswap_writeback_time_thr_param_set(const char *, const struct kernel_param *); +static const struct kernel_param_ops zswap_writeback_param_ops = { + .set = zswap_writeback_time_thr_param_set, + .get = param_get_uint, +}; +module_param_cb(writeback_time_threshold, &zswap_writeback_param_ops, + &zswap_writeback_time_thr, 0644); + /* Number of zpools in zswap_pool (empirically determined for scalability) */ #define ZSWAP_NR_ZPOOLS 32 @@ -197,6 +207,7 @@ struct zswap_pool { * value - value of the same-value filled pages which have same content * objcg - the obj_cgroup that the compressed memory is charged to * lru - handle to the pool's lru used to evict pages. + * sto_time - the store time of zswap_entry. */ struct zswap_entry { struct rb_node rbnode; @@ -210,6 +221,7 @@ struct zswap_entry { }; struct obj_cgroup *objcg; struct list_head lru; + ktime_t sto_time; }; /* @@ -288,6 +300,31 @@ static void zswap_update_total_size(void) zswap_pool_total_size = total; } +static void zswap_reclaim_entry_by_timethr(void); + +static bool zswap_reach_timethr(struct zswap_pool *pool) +{ + struct zswap_entry *entry; + ktime_t expire_time = 0; + bool ret = false; + + spin_lock(&pool->lru_lock); + + if (list_empty(&pool->lru)) + goto out; + + entry = list_last_entry(&pool->lru, struct zswap_entry, lru); + expire_time = ktime_add(entry->sto_time, + ns_to_ktime(zswap_writeback_time_thr * NSEC_PER_SEC)); + + if (ktime_after(ktime_get_boottime(), expire_time)) + ret = true; +out: + spin_unlock(&pool->lru_lock); + return ret; +} + + /********************************* * zswap entry functions **********************************/ @@ -395,6 +432,7 @@ static void zswap_free_entry(struct zswap_entry *entry) else { spin_lock(&entry->pool->lru_lock); list_del(&entry->lru); + entry->sto_time = 0; spin_unlock(&entry->pool->lru_lock); zpool_free(zswap_find_zpool(entry), entry->handle); zswap_pool_put(entry->pool); @@ -709,6 +747,28 @@ static void shrink_worker(struct work_struct *w) zswap_pool_put(pool); } +static void zswap_reclaim_entry_by_timethr(void) +{ + struct zswap_pool *pool = zswap_pool_current_get(); + int ret, failures = 0; + + if (!pool) + return; + + while (zswap_reach_timethr(pool)) { + ret = zswap_reclaim_entry(pool); + if (ret) { + zswap_reject_reclaim_fail++; + if (ret != -EAGAIN) + break; + if (++failures == MAX_RECLAIM_RETRIES) + break; + } + cond_resched(); + } + zswap_pool_put(pool); +} + static struct zswap_pool *zswap_pool_create(char *type, char *compressor) { int i; @@ -1037,6 +1097,21 @@ static int zswap_enabled_param_set(const char *val, return ret; } +static int zswap_writeback_time_thr_param_set(const char *val, + const struct kernel_param *kp) +{ + int ret = -ENODEV; + + /* if this is load-time (pre-init) param setting, just return. */ + if (system_state != SYSTEM_RUNNING) + return ret; + + ret = param_set_uint(val, kp); + if (!ret) + zswap_reclaim_entry_by_timethr(); + return ret; +} + /********************************* * writeback code **********************************/ @@ -1360,6 +1435,7 @@ bool zswap_store(struct folio *folio) if (entry->length) { spin_lock(&entry->pool->lru_lock); list_add(&entry->lru, &entry->pool->lru); + entry->sto_time = ktime_get_boottime(); spin_unlock(&entry->pool->lru_lock); } spin_unlock(&tree->lock); -- 2.25.1