To reduce grace periods and improve kfree() performance, we have done batching recently dramatically bringing down the number of grace periods while giving us the ability to use kfree_bulk() for efficient kfree'ing. However, this has increased the likelihood of OOM condition under heavy kfree_rcu() flood on small memory systems. This patch introduces a shrinker which starts grace periods right away if the system is under memory pressure due to existence of objects that have still not started a grace period. With this patch, I do not observe an OOM anymore on a system with 512MB RAM and 8 CPUs, with the following rcuperf options: rcuperf.kfree_loops=20000 rcuperf.kfree_alloc_num=8000 rcuperf.kfree_rcu_test=1 rcuperf.kfree_mult=2 NOTE: On systems with no memory pressure, the patch has no effect as intended. Cc: urezki@xxxxxxxxx Signed-off-by: Joel Fernandes (Google) <joel@xxxxxxxxxxxxxxxxx> --- kernel/rcu/tree.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index d91c9156fab2e..28ec35e15529d 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -2723,6 +2723,8 @@ struct kfree_rcu_cpu { struct delayed_work monitor_work; bool monitor_todo; bool initialized; + // Number of objects for which GP not started + int count; }; static DEFINE_PER_CPU(struct kfree_rcu_cpu, krc); @@ -2791,6 +2793,7 @@ static inline bool queue_kfree_rcu_work(struct kfree_rcu_cpu *krcp) krwp->head_free = krcp->head; krcp->head = NULL; + krcp->count = 0; INIT_RCU_WORK(&krwp->rcu_work, kfree_rcu_work); queue_rcu_work(system_wq, &krwp->rcu_work); return true; @@ -2864,6 +2867,7 @@ void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func) head->func = func; head->next = krcp->head; krcp->head = head; + krcp->count++; // Set timer to drain after KFREE_DRAIN_JIFFIES. if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING && @@ -2879,6 +2883,58 @@ void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func) } EXPORT_SYMBOL_GPL(kfree_call_rcu); +static unsigned long +kfree_rcu_shrink_count(struct shrinker *shrink, struct shrink_control *sc) +{ + int cpu; + unsigned long flags, count = 0; + + /* Snapshot count of all CPUs */ + for_each_online_cpu(cpu) { + struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu); + + spin_lock_irqsave(&krcp->lock, flags); + count += krcp->count; + spin_unlock_irqrestore(&krcp->lock, flags); + } + + return count; +} + +static unsigned long +kfree_rcu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) +{ + int cpu, freed = 0; + unsigned long flags; + + for_each_online_cpu(cpu) { + int count; + struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu); + + count = krcp->count; + spin_lock_irqsave(&krcp->lock, flags); + if (krcp->monitor_todo) + kfree_rcu_drain_unlock(krcp, flags); + else + spin_unlock_irqrestore(&krcp->lock, flags); + + sc->nr_to_scan -= count; + freed += count; + + if (sc->nr_to_scan <= 0) + break; + } + + return freed; +} + +static struct shrinker kfree_rcu_shrinker = { + .count_objects = kfree_rcu_shrink_count, + .scan_objects = kfree_rcu_shrink_scan, + .batch = 0, + .seeks = DEFAULT_SEEKS, +}; + void __init kfree_rcu_scheduler_running(void) { int cpu; @@ -3774,6 +3830,8 @@ static void __init kfree_rcu_batch_init(void) INIT_DELAYED_WORK(&krcp->monitor_work, kfree_rcu_monitor); krcp->initialized = true; } + if (register_shrinker(&kfree_rcu_shrinker)) + pr_err("Failed to register kfree_rcu() shrinker!\n"); } void __init rcu_init(void) -- 2.25.0.265.gbab2e86ba0-goog