atomic_t variables are currently used to implement reference counters with the following properties: - counter is initialized to 1 using atomic_set() - a resource is freed upon counter reaching zero - once counter reaches zero, its further increments aren't allowed - counter schema uses basic atomic operations (set, inc, inc_not_zero, dec_and_test, etc.) Such atomic variables should be converted to a newly provided refcount_t type and API that prevents accidental counter overflows and underflows. This is important since overflows and underflows can lead to use-after-free situation and be exploitable. The variable task_struct.stack_refcount is used as pure reference counter. Convert it to refcount_t and fix up the operations. **Important note for maintainers: Some functions from refcount_t API defined in lib/refcount.c have different memory ordering guarantees than their atomic counterparts. The full comparison can be seen in https://lkml.org/lkml/2017/11/15/57 and it is hopefully soon in state to be merged to the documentation tree. Normally the differences should not matter since refcount_t provides enough guarantees to satisfy the refcounting use cases, but in some rare cases it might matter. Please double check that you don't have some undocumented memory guarantees for this variable usage. For the task_struct.stack_refcount it might make a difference in following places: - try_get_task_stack(): increment in refcount_inc_not_zero() only guarantees control dependency on success vs. fully ordered atomic counterpart - put_task_stack(): decrement in refcount_dec_and_test() only provides RELEASE ordering and control dependency on success vs. fully ordered atomic counterpart Suggested-by: Kees Cook <keescook@xxxxxxxxxxxx> Reviewed-by: David Windsor <dwindsor@xxxxxxxxx> Reviewed-by: Hans Liljestrand <ishkamiel@xxxxxxxxx> Signed-off-by: Elena Reshetova <elena.reshetova@xxxxxxxxx> --- include/linux/init_task.h | 3 ++- include/linux/sched.h | 2 +- include/linux/sched/task_stack.h | 2 +- kernel/fork.c | 6 +++--- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 1e35fce..6a87579 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -13,6 +13,7 @@ #include <linux/securebits.h> #include <linux/seqlock.h> #include <linux/rbtree.h> +#include <linux/refcount.h> #include <linux/sched/autogroup.h> #include <net/net_namespace.h> #include <linux/sched/rt.h> @@ -207,7 +208,7 @@ extern struct cred init_cred; #ifdef CONFIG_THREAD_INFO_IN_TASK # define INIT_TASK_TI(tsk) \ .thread_info = INIT_THREAD_INFO(tsk), \ - .stack_refcount = ATOMIC_INIT(1), + .stack_refcount = REFCOUNT_INIT(1), #else # define INIT_TASK_TI(tsk) #endif diff --git a/include/linux/sched.h b/include/linux/sched.h index 924a812..c8c6d17 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1098,7 +1098,7 @@ struct task_struct { #endif #ifdef CONFIG_THREAD_INFO_IN_TASK /* A live task holds one reference: */ - atomic_t stack_refcount; + refcount_t stack_refcount; #endif #ifdef CONFIG_LIVEPATCH int patch_state; diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h index cb4828a..4559316 100644 --- a/include/linux/sched/task_stack.h +++ b/include/linux/sched/task_stack.h @@ -61,7 +61,7 @@ static inline unsigned long *end_of_stack(struct task_struct *p) #ifdef CONFIG_THREAD_INFO_IN_TASK static inline void *try_get_task_stack(struct task_struct *tsk) { - return atomic_inc_not_zero(&tsk->stack_refcount) ? + return refcount_inc_not_zero(&tsk->stack_refcount) ? task_stack_page(tsk) : NULL; } diff --git a/kernel/fork.c b/kernel/fork.c index 16df4f5..822efa2 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -362,7 +362,7 @@ static void release_task_stack(struct task_struct *tsk) #ifdef CONFIG_THREAD_INFO_IN_TASK void put_task_stack(struct task_struct *tsk) { - if (atomic_dec_and_test(&tsk->stack_refcount)) + if (refcount_dec_and_test(&tsk->stack_refcount)) release_task_stack(tsk); } #endif @@ -380,7 +380,7 @@ void free_task(struct task_struct *tsk) * If the task had a separate stack allocation, it should be gone * by now. */ - WARN_ON_ONCE(atomic_read(&tsk->stack_refcount) != 0); + WARN_ON_ONCE(refcount_read(&tsk->stack_refcount) != 0); #endif rt_mutex_debug_task_free(tsk); ftrace_graph_exit_task(tsk); @@ -795,7 +795,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) tsk->stack_vm_area = stack_vm_area; #endif #ifdef CONFIG_THREAD_INFO_IN_TASK - atomic_set(&tsk->stack_refcount, 1); + refcount_set(&tsk->stack_refcount, 1); #endif if (err) -- 2.7.4