The pattern of wait_event(percpu_ref_is_zero()) has been used in several kernel components, and this way actually has the following risk: - percpu_ref_is_zero() can be returned just between atomic_long_sub_and_test() and ref->data->release(ref) - given the refcount is found as zero, percpu_ref_exit() could be called, and the host data structure is freed - then use-after-free is triggered in ->release() when the user host data structure is freed after percpu_ref_exit() returns Reported-by: Zhong Jinghua <zhongjinghua@xxxxxxxxxx> Fixes: 2b0d3d3e4fcf ("percpu_ref: reduce memory footprint of percpu_ref in fast path") Signed-off-by: Ming Lei <ming.lei@xxxxxxxxxx> --- include/linux/percpu-refcount.h | 41 ++++++++++++++++++++++----------- lib/percpu-refcount.c | 22 ++++++++++++++++++ 2 files changed, 50 insertions(+), 13 deletions(-) diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h index 006c6aae261e..6ef29ebffd58 100644 --- a/include/linux/percpu-refcount.h +++ b/include/linux/percpu-refcount.h @@ -55,6 +55,7 @@ #include <linux/rcupdate.h> #include <linux/types.h> #include <linux/gfp.h> +#include <linux/sched.h> struct percpu_ref; typedef void (percpu_ref_func_t)(struct percpu_ref *); @@ -104,6 +105,7 @@ struct percpu_ref_data { bool force_atomic:1; bool allow_reinit:1; bool auto_exit:1; + bool being_release:1; struct rcu_head rcu; struct percpu_ref *ref; }; @@ -137,6 +139,7 @@ void percpu_ref_kill_and_confirm(struct percpu_ref *ref, void percpu_ref_resurrect(struct percpu_ref *ref); void percpu_ref_reinit(struct percpu_ref *ref); bool percpu_ref_is_zero(struct percpu_ref *ref); +wait_queue_head_t *percpu_ref_get_switch_waitq(void); /** * percpu_ref_kill - drop the initial ref @@ -319,6 +322,29 @@ static inline bool percpu_ref_tryget_live(struct percpu_ref *ref) return ret; } +/* Internal helper, please do not call it outside */ +static inline void __percpu_ref_put_many(struct percpu_ref *ref, + unsigned long nr) +{ + struct percpu_ref_data *data = ref->data; + struct percpu_ref copy = *ref; + bool release = false; + + data->being_release = 1; + if (unlikely(atomic_long_sub_and_test(nr, &data->count))) { + data->release(ref); + release = true; + } + data->being_release = 0; + + if (release) { + if (data->auto_exit) + percpu_ref_exit(©); + /* re-use switch waitq for ack the release done */ + wake_up_all(percpu_ref_get_switch_waitq()); + } +} + /** * percpu_ref_put_many - decrement a percpu refcount * @ref: percpu_ref to put @@ -337,19 +363,8 @@ static inline void percpu_ref_put_many(struct percpu_ref *ref, unsigned long nr) if (__ref_is_percpu(ref, &percpu_count)) this_cpu_sub(*percpu_count, nr); - else { - struct percpu_ref_data *data = ref->data; - struct percpu_ref copy = *ref; - bool release = false; - - if (unlikely(atomic_long_sub_and_test(nr, &data->count))) { - data->release(ref); - release = true; - } - - if (release && data->auto_exit) - percpu_ref_exit(©); - } + else + __percpu_ref_put_many(ref, nr); rcu_read_unlock(); } diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c index c0cadf92948f..fd50eda233ed 100644 --- a/lib/percpu-refcount.c +++ b/lib/percpu-refcount.c @@ -140,6 +140,22 @@ void percpu_ref_exit(struct percpu_ref *ref) if (!data) return; + /* + * We may reach here because wait_event(percpu_ref_is_zero()) + * returns, and ->release() may not be completed or even started + * ye, then use-after-free is caused, so drain ->release() here + */ + if (!data->auto_exit) { + /* + * Order reading the atomic count in percpu_ref_is_zero + * and reading data->being_release. The counter pair is + * the one implied in atomic_long_sub_and_test() called + * from __percpu_ref_put_many(). + */ + smp_rmb(); + wait_event(percpu_ref_switch_waitq, !data->being_release); + } + spin_lock_irqsave(&percpu_ref_switch_lock, flags); ref->percpu_count_ptr |= atomic_long_read(&ref->data->count) << __PERCPU_REF_FLAG_BITS; @@ -480,3 +496,9 @@ void percpu_ref_resurrect(struct percpu_ref *ref) spin_unlock_irqrestore(&percpu_ref_switch_lock, flags); } EXPORT_SYMBOL_GPL(percpu_ref_resurrect); + +wait_queue_head_t *percpu_ref_get_switch_waitq() +{ + return &percpu_ref_switch_waitq; +} +EXPORT_SYMBOL_GPL(percpu_ref_get_switch_waitq); -- 2.38.1