On 12/9/24 14:56, Bernd Schubert wrote:
When the fuse-server terminates while the fuse-client or kernel
still has queued URING_CMDs, these commands retain references
to the struct file used by the fuse connection. This prevents
fuse_dev_release() from being invoked, resulting in a hung mount
point.
This patch addresses the issue by making queued URING_CMDs
cancelable, allowing fuse_dev_release() to proceed as expected
and preventing the mount point from hanging.
io_uring bits look good
Signed-off-by: Bernd Schubert <bschubert@xxxxxxx>
---
fs/fuse/dev_uring.c | 87 ++++++++++++++++++++++++++++++++++++++++++---------
fs/fuse/dev_uring_i.h | 12 +++++++
2 files changed, 85 insertions(+), 14 deletions(-)
diff --git a/fs/fuse/dev_uring.c b/fs/fuse/dev_uring.c
index 8bdfb6fcfa51976cd121bee7f2e8dec1ff9aa916..be7eaf7cc569ff77f8ebdff323634b84ea0a3f63 100644
--- a/fs/fuse/dev_uring.c
+++ b/fs/fuse/dev_uring.c
...
@@ -294,24 +302,27 @@ static void fuse_uring_stop_fuse_req_end(struct fuse_ring_ent *ent)
/*
* Release a request/entry on connection tear down
*/
-static void fuse_uring_entry_teardown(struct fuse_ring_ent *ent,
- bool need_cmd_done)
+static void fuse_uring_entry_teardown(struct fuse_ring_ent *ent)
{
- /*
- * fuse_request_end() might take other locks like fi->lock and
- * can lead to lock ordering issues
- */
- lockdep_assert_not_held(&ent->queue->lock);
+ struct fuse_ring_queue *queue = ent->queue;
- if (need_cmd_done)
+ if (ent->need_cmd_done)
io_uring_cmd_done(ent->cmd, -ENOTCONN, 0,
IO_URING_F_UNLOCKED);
nit: might be better to pair all io_uring_cmd_done() with
ent->cmd = NULL;
since after the call the request is released and can't be used
by fuse anymore.
if (ent->fuse_req)
fuse_uring_stop_fuse_req_end(ent);
- list_del_init(&ent->list);
- kfree(ent);
+ /*
+ * The entry must not be freed immediately, due to access of direct
+ * pointer access of entries through IO_URING_F_CANCEL - there is a risk
+ * of race between daemon termination (which triggers IO_URING_F_CANCEL
+ * and accesses entries without checking the list state first
+ */
+ spin_lock(&queue->lock);
+ list_move(&ent->list, &queue->ent_released);
+ ent->state = FRRS_RELEASED;
+ spin_unlock(&queue->lock);
...
+ * Handle IO_URING_F_CANCEL, typically should come on daemon termination.
+ *
+ * Releasing the last entry should trigger fuse_dev_release() if
+ * the daemon was terminated
+ */
+static int fuse_uring_cancel(struct io_uring_cmd *cmd, unsigned int issue_flags)
+{
+ struct fuse_ring_ent *ent = fuse_uring_cmd_to_ring_ent(cmd);
+ struct fuse_ring_queue *queue;
+ bool need_cmd_done = false;
+ int ret = 0;
+
+ /*
+ * direct access on ent - it must not be destructed as long as
+ * IO_URING_F_CANCEL might come up
+ */
+ queue = ent->queue;
+ spin_lock(&queue->lock);
+ if (ent->state == FRRS_WAIT) {
+ ent->state = FRRS_USERSPACE;
+ list_move(&ent->list, &queue->ent_in_userspace);
+ need_cmd_done = true;
+ }
+ spin_unlock(&queue->lock);
+
+ if (need_cmd_done) {
+ io_uring_cmd_done(cmd, -ENOTCONN, 0, issue_flags);
+ } else {
+ /* io-uring handles resending */
+ ret = -EAGAIN;
FWIW, apparently io_uring ignores error codes returned from here.
It only cares if the request is removed from a list via
io_uring_cmd_done() or not.
+ }
+
+ return ret;
+}
+
+static void fuse_uring_prepare_cancel(struct io_uring_cmd *cmd, int issue_flags,
+ struct fuse_ring_ent *ring_ent)
+{
+ fuse_uring_cmd_set_ring_ent(cmd, ring_ent);
+ io_uring_cmd_mark_cancelable(cmd, issue_flags);
+}
+
--
Pavel Begunkov