From: Hou Tao <houtao1@xxxxxxxxxx> When there are concurrent uref release and bpf timer init operations, the following sequence diagram is possible and it will lead to memory leak: bpf program X bpf_timer_init() lock timer->lock read timer->timer as NULL read map->usercnt != 0 process Y close(map_fd) // put last uref bpf_map_put_uref() atomic_dec_and_test(map->usercnt) array_map_free_timers() bpf_timer_cancel_and_free() // just return and lead to memory leak read timer->timer is NULL t = bpf_map_kmalloc_node() timer->timer = t unlock timer->lock Fix the problem by checking map->usercnt again after timer->timer is assigned, so when there are concurrent uref release and bpf timer init, either bpf_timer_cancel_and_free() from uref release reads a no-NULL timer and the newly-added check of map->usercnt reads a zero usercnt. Because atomic_dec_and_test(map->usercnt) and READ_ONCE(timer->timer) in bpf_timer_cancel_and_free() are not protected by a lock, so add a memory barrier to guarantee the order between map->usercnt and timer->timer. Also use WRITE_ONCE(timer->timer, x) to match the lockless read of timer->timer. Reported-by: Hsin-Wei Hung <hsinweih@xxxxxxx> Closes: https://lore.kernel.org/bpf/CABcoxUaT2k9hWsS1tNgXyoU3E-=PuOgMn737qK984fbFmfYixQ@xxxxxxxxxxxxxx Fixes: b00628b1c7d5 ("bpf: Introduce bpf timers.") Signed-off-by: Hou Tao <houtao1@xxxxxxxxxx> --- kernel/bpf/helpers.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 757b99c1e613f..a7d92c3ddc3dd 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1156,7 +1156,7 @@ BPF_CALL_3(bpf_timer_init, struct bpf_timer_kern *, timer, struct bpf_map *, map u64, flags) { clockid_t clockid = flags & (MAX_CLOCKS - 1); - struct bpf_hrtimer *t; + struct bpf_hrtimer *t, *to_free = NULL; int ret = 0; BUILD_BUG_ON(MAX_CLOCKS != 16); @@ -1197,9 +1197,21 @@ BPF_CALL_3(bpf_timer_init, struct bpf_timer_kern *, timer, struct bpf_map *, map rcu_assign_pointer(t->callback_fn, NULL); hrtimer_init(&t->timer, clockid, HRTIMER_MODE_REL_SOFT); t->timer.function = bpf_timer_cb; - timer->timer = t; + WRITE_ONCE(timer->timer, t); + /* Guarantee order between timer->timer and map->usercnt. So when + * there are concurrent uref release and bpf timer init, either + * bpf_timer_cancel_and_free() called by uref release reads a no-NULL + * timer or atomic64_read() below reads a zero usercnt. + */ + smp_mb(); + if (!atomic64_read(&map->usercnt)) { + WRITE_ONCE(timer->timer, NULL); + to_free = t; + ret = -EPERM; + } out: __bpf_spin_unlock_irqrestore(&timer->lock); + kfree(to_free); return ret; } @@ -1372,7 +1384,7 @@ void bpf_timer_cancel_and_free(void *val) /* The subsequent bpf_timer_start/cancel() helpers won't be able to use * this timer, since it won't be initialized. */ - timer->timer = NULL; + WRITE_ONCE(timer->timer, NULL); out: __bpf_spin_unlock_irqrestore(&timer->lock); if (!t) -- 2.29.2