Hi Jens,
Sysbot found an UAF bug in __fdget_raw [1].
The issue triggers on 5.10 stable, and doesn't trigger on mainline.
I was able to bisect it to the fixing commit:
commit fb3a1f6c745c "io-wq: have manager wait for all workers to exit"
The fix went in around 5.12 kernel and was part of a bigger series of uio fixes:
https://lore.kernel.org/all/20210304002700.374417-3-axboe@xxxxxxxxx/
Then I found out that there is one more fix needed on top:
"io-wq: fix race in freeing 'wq' and worker access"
https://lore.kernel.org/all/20210310224358.1494503-2-axboe@xxxxxxxxx/
I have back ported the two patches to 5.10, see patch below, but the issue still
triggers. See trace [2]
Could you have a look and see what else could be missing. Any suggestion would
be appreciated.
--
Thanks,
Tadeusz
[1] https://syzkaller.appspot.com/bug?id=54c4ddb7a0d44bd9fbdc22d19caff5f2098081fe
[2] https://pastebin.linaro.org/view/raw/263a8d9f
diff --git a/fs/io-wq.c b/fs/io-wq.c
index 3d5fc76b92d0..c39568971288 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -125,6 +125,9 @@ struct io_wq {
refcount_t refs;
struct completion done;
+ atomic_t worker_refs;
+ struct completion worker_done;
+
struct hlist_node cpuhp_node;
refcount_t use_refs;
@@ -250,6 +253,10 @@ static void io_worker_exit(struct io_worker *worker)
raw_spin_unlock_irq(&wqe->lock);
kfree_rcu(worker, rcu);
+
+ if (atomic_dec_and_test(&wqe->wq->worker_refs))
+ complete(&wqe->wq->worker_done);
+
if (refcount_dec_and_test(&wqe->wq->refs))
complete(&wqe->wq->done);
}
@@ -691,6 +698,7 @@ static bool create_io_worker(struct io_wq *wq, struct io_wqe
*wqe, int index)
return false;
refcount_set(&worker->ref, 1);
+ atomic_inc(&wq->worker_refs);
worker->nulls_node.pprev = NULL;
worker->wqe = wqe;
spin_lock_init(&worker->lock);
@@ -821,6 +829,14 @@ static int io_wq_manager(void *data)
if (current->task_works)
task_work_run();
+ rcu_read_lock();
+ for_each_node(node)
+ io_wq_for_each_worker(wq->wqes[node], io_wq_worker_wake, NULL);
+ rcu_read_unlock();
+
+ if (atomic_dec_and_test(&wq->worker_refs))
+ complete(&wq->worker_done);
+ wait_for_completion(&wq->worker_done);
out:
if (refcount_dec_and_test(&wq->refs)) {
complete(&wq->done);
@@ -1134,6 +1150,8 @@ struct io_wq *io_wq_create(unsigned bounded, struct
io_wq_data *data)
}
init_completion(&wq->done);
+ init_completion(&wq->worker_done);
+ atomic_set(&wq->worker_refs, 0);
wq->manager = kthread_create(io_wq_manager, wq, "io_wq_manager");
if (!IS_ERR(wq->manager)) {
@@ -1179,11 +1197,6 @@ static void __io_wq_destroy(struct io_wq *wq)
if (wq->manager)
kthread_stop(wq->manager);
- rcu_read_lock();
- for_each_node(node)
- io_wq_for_each_worker(wq->wqes[node], io_wq_worker_wake, NULL);
- rcu_read_unlock();
-
wait_for_completion(&wq->done);
for_each_node(node)