On Wed, May 29, 2024 at 08:00:45PM +0200, Bernd Schubert wrote: > This adds basic support for ring SQEs (with opcode=IORING_OP_URING_CMD). > For now only FUSE_URING_REQ_FETCH is handled to register queue entries. > > Signed-off-by: Bernd Schubert <bschubert@xxxxxxx> > --- > fs/fuse/dev.c | 1 + > fs/fuse/dev_uring.c | 267 ++++++++++++++++++++++++++++++++++++++++++++++ > fs/fuse/dev_uring_i.h | 12 +++ > include/uapi/linux/fuse.h | 33 ++++++ > 4 files changed, 313 insertions(+) > > diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c > index cd5dc6ae9272..05a87731b5c3 100644 > --- a/fs/fuse/dev.c > +++ b/fs/fuse/dev.c > @@ -2510,6 +2510,7 @@ const struct file_operations fuse_dev_operations = { > .compat_ioctl = compat_ptr_ioctl, > #if IS_ENABLED(CONFIG_FUSE_IO_URING) > .mmap = fuse_uring_mmap, > + .uring_cmd = fuse_uring_cmd, > #endif > }; > EXPORT_SYMBOL_GPL(fuse_dev_operations); > diff --git a/fs/fuse/dev_uring.c b/fs/fuse/dev_uring.c > index 2c0ccb378908..48b1118b64f4 100644 > --- a/fs/fuse/dev_uring.c > +++ b/fs/fuse/dev_uring.c > @@ -31,6 +31,27 @@ > #include <linux/topology.h> > #include <linux/io_uring/cmd.h> > > +static void fuse_ring_ring_ent_unset_userspace(struct fuse_ring_ent *ent) > +{ > + clear_bit(FRRS_USERSPACE, &ent->state); > + list_del_init(&ent->list); > +} > + > +/* Update conn limits according to ring values */ > +static void fuse_uring_conn_cfg_limits(struct fuse_ring *ring) > +{ > + struct fuse_conn *fc = ring->fc; > + > + WRITE_ONCE(fc->max_pages, min_t(unsigned int, fc->max_pages, > + ring->req_arg_len / PAGE_SIZE)); > + > + /* This not ideal, as multiplication with nr_queue assumes the limit > + * gets reached when all queues are used, but a single threaded > + * application might already do that. > + */ > + WRITE_ONCE(fc->max_background, ring->nr_queues * ring->max_nr_async); > +} > + > /* > * Basic ring setup for this connection based on the provided configuration > */ > @@ -329,3 +350,249 @@ int fuse_uring_queue_cfg(struct fuse_ring *ring, > return 0; > } > > +/* > + * Put a ring request onto hold, it is no longer used for now. > + */ > +static void fuse_uring_ent_avail(struct fuse_ring_ent *ring_ent, > + struct fuse_ring_queue *queue) > + __must_hold(&queue->lock) Sorry I'm just now bringing this up, but I'd love to see a lockdep_assert_held(<whatever lock>); in every place where you use __must_hold, so I get a nice big warning when I'm running stuff. I don't always run sparse, but I always test with lockdep on, and that'll help me notice problems. > +{ > + struct fuse_ring *ring = queue->ring; > + > + /* unsets all previous flags - basically resets */ > + pr_devel("%s ring=%p qid=%d tag=%d state=%lu async=%d\n", __func__, > + ring, ring_ent->queue->qid, ring_ent->tag, ring_ent->state, > + ring_ent->async); > + > + if (WARN_ON(test_bit(FRRS_USERSPACE, &ring_ent->state))) { > + pr_warn("%s qid=%d tag=%d state=%lu async=%d\n", __func__, > + ring_ent->queue->qid, ring_ent->tag, ring_ent->state, > + ring_ent->async); > + return; > + } > + > + WARN_ON_ONCE(!list_empty(&ring_ent->list)); > + > + if (ring_ent->async) > + list_add(&ring_ent->list, &queue->async_ent_avail_queue); > + else > + list_add(&ring_ent->list, &queue->sync_ent_avail_queue); > + > + set_bit(FRRS_WAIT, &ring_ent->state); > +} > + > +/* > + * fuse_uring_req_fetch command handling > + */ > +static int fuse_uring_fetch(struct fuse_ring_ent *ring_ent, > + struct io_uring_cmd *cmd, unsigned int issue_flags) > +__must_hold(ring_ent->queue->lock) > +{ > + struct fuse_ring_queue *queue = ring_ent->queue; > + struct fuse_ring *ring = queue->ring; > + int ret = 0; > + int nr_ring_sqe; > + > + /* register requests for foreground requests first, then backgrounds */ > + if (queue->nr_req_sync >= ring->max_nr_sync) { > + queue->nr_req_async++; > + ring_ent->async = 1; > + } else > + queue->nr_req_sync++; IIRC the style guidelines say if you use { in any part of the if, you've got to use them for all of it. But that may just be what we do in btrfs. Normally I wouldn't nit about it but I have comments elsewhere for this patch. > + > + fuse_uring_ent_avail(ring_ent, queue); > + > + if (queue->nr_req_sync + queue->nr_req_async > ring->queue_depth) { > + /* should be caught by ring state before and queue depth > + * check before > + */ > + WARN_ON(1); > + pr_info("qid=%d tag=%d req cnt (fg=%d async=%d exceeds depth=%zu", > + queue->qid, ring_ent->tag, queue->nr_req_sync, > + queue->nr_req_async, ring->queue_depth); > + ret = -ERANGE; > + } > + > + if (ret) > + goto out; /* erange */ This can just be if (whatever) { WARN_ON_ONCE(1); return -ERANGE; } instead of the goto out thing. > + > + WRITE_ONCE(ring_ent->cmd, cmd); > + > + nr_ring_sqe = ring->queue_depth * ring->nr_queues; > + if (atomic_inc_return(&ring->nr_sqe_init) == nr_ring_sqe) { > + fuse_uring_conn_cfg_limits(ring); > + ring->ready = 1; > + } > + > +out: > + return ret; And this can just be return 0 here with the above change. > +} > + > +static struct fuse_ring_queue * > +fuse_uring_get_verify_queue(struct fuse_ring *ring, > + const struct fuse_uring_cmd_req *cmd_req, > + unsigned int issue_flags) > +{ > + struct fuse_conn *fc = ring->fc; > + struct fuse_ring_queue *queue; > + int ret; > + > + if (!(issue_flags & IO_URING_F_SQE128)) { > + pr_info("qid=%d tag=%d SQE128 not set\n", cmd_req->qid, > + cmd_req->tag); > + ret = -EINVAL; > + goto err; > + } > + > + if (unlikely(!fc->connected)) { > + ret = -ENOTCONN; > + goto err; > + } > + > + if (unlikely(!ring->configured)) { > + pr_info("command for a connection that is not ring configured\n"); > + ret = -ENODEV; > + goto err; > + } > + > + if (unlikely(cmd_req->qid >= ring->nr_queues)) { > + pr_devel("qid=%u >= nr-queues=%zu\n", cmd_req->qid, > + ring->nr_queues); > + ret = -EINVAL; > + goto err; > + } > + > + queue = fuse_uring_get_queue(ring, cmd_req->qid); > + if (unlikely(queue == NULL)) { > + pr_info("Got NULL queue for qid=%d\n", cmd_req->qid); > + ret = -EIO; > + goto err; > + } > + > + if (unlikely(!queue->configured || queue->stopped)) { > + pr_info("Ring or queue (qid=%u) not ready.\n", cmd_req->qid); > + ret = -ENOTCONN; > + goto err; > + } > + > + if (cmd_req->tag > ring->queue_depth) { > + pr_info("tag=%u > queue-depth=%zu\n", cmd_req->tag, > + ring->queue_depth); > + ret = -EINVAL; > + goto err; > + } > + > + return queue; > + > +err: > + return ERR_PTR(ret); There's no cleanup here, so just make all the above return ERR_PTR(-whatever) instead of the goto err thing. > +} > + > +/** > + * Entry function from io_uring to handle the given passthrough command > + * (op cocde IORING_OP_URING_CMD) > + */ Docstyle thing. > +int fuse_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) > +{ > + const struct fuse_uring_cmd_req *cmd_req = io_uring_sqe_cmd(cmd->sqe); > + struct fuse_dev *fud = fuse_get_dev(cmd->file); > + struct fuse_conn *fc = fud->fc; > + struct fuse_ring *ring = fc->ring; > + struct fuse_ring_queue *queue; > + struct fuse_ring_ent *ring_ent = NULL; > + u32 cmd_op = cmd->cmd_op; > + int ret = 0; > + > + if (!ring) { > + ret = -ENODEV; > + goto out; > + } > + > + queue = fuse_uring_get_verify_queue(ring, cmd_req, issue_flags); > + if (IS_ERR(queue)) { > + ret = PTR_ERR(queue); > + goto out; > + } > + > + ring_ent = &queue->ring_ent[cmd_req->tag]; > + > + pr_devel("%s:%d received: cmd op %d qid %d (%p) tag %d (%p)\n", > + __func__, __LINE__, cmd_op, cmd_req->qid, queue, cmd_req->tag, > + ring_ent); > + > + spin_lock(&queue->lock); > + if (unlikely(queue->stopped)) { > + /* XXX how to ensure queue still exists? Add > + * an rw ring->stop lock? And take that at the beginning > + * of this function? Better would be to advise uring > + * not to call this function at all? Or free the queue memory > + * only, on daemon PF_EXITING? > + */ > + ret = -ENOTCONN; > + goto err_unlock; > + } > + > + if (current == queue->server_task) > + queue->uring_cmd_issue_flags = issue_flags; > + > + switch (cmd_op) { > + case FUSE_URING_REQ_FETCH: This is all organized kind of oddly, I think I'd prefer if you put all the code from above where we grab the queue lock and the bit below into a helper. So instead of spin_lock(&queue->lock); blah switch (cmd_op) { case FUSE_URING_REQ_FETCH: blah default: ret = -EINVAL; } you have static int fuse_uring_req_fetch(queue, cmd, issue_flags) { ring_ent = blah; spin_lock(&queue->lock); <blah> spin_unlock(&que->lock); return ret; } then switch (cmd_op) { case FUSE_URING_REQ_FETCH: ret = fuse_uring_req_fetch(queue, cmd, issue_flags); break; default: ret = -EINVAL; break; } Alternatively just pushe all the queue stuff down into the case FUSE_URING_REQ_FETCH part, but I think the helper is cleaner. > + if (queue->server_task == NULL) { > + queue->server_task = current; > + queue->uring_cmd_issue_flags = issue_flags; > + } > + > + /* No other bit must be set here */ > + if (ring_ent->state != BIT(FRRS_INIT)) { > + pr_info_ratelimited( > + "qid=%d tag=%d register req state %lu expected %lu", > + cmd_req->qid, cmd_req->tag, ring_ent->state, > + BIT(FRRS_INIT)); > + ret = -EINVAL; > + goto err_unlock; > + } > + > + fuse_ring_ring_ent_unset_userspace(ring_ent); > + > + ret = fuse_uring_fetch(ring_ent, cmd, issue_flags); > + if (ret) > + goto err_unlock; > + > + /* > + * The ring entry is registered now and needs to be handled > + * for shutdown. > + */ > + atomic_inc(&ring->queue_refs); > + > + spin_unlock(&queue->lock); > + break; > + default: > + ret = -EINVAL; > + pr_devel("Unknown uring command %d", cmd_op); > + goto err_unlock; > + } > +out: > + pr_devel("uring cmd op=%d, qid=%d tag=%d ret=%d\n", cmd_op, > + cmd_req->qid, cmd_req->tag, ret); > + > + if (ret < 0) { > + if (ring_ent != NULL) { You don't pull anything from ring_ent in the pr_info, so maybe drop the extra if statement? Thanks, Josef