[PATCH 12/13] fuse: Add uring sqe commit and fetch support

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This adds support for fuse request completion through ring SQEs
(FUSE_URING_REQ_COMMIT_AND_FETCH handling). After committing
the ring entry it becomes available for new fuse requests.
Handling of requests through the ring (SQE/CQE handling)
is complete now.

Fuse request data are copied through the mmaped ring buffer,
there is no support for any zero copy yet.

Signed-off-by: Bernd Schubert <bschubert@xxxxxxx>
cc: Miklos Szeredi <miklos@xxxxxxxxxx>
cc: linux-fsdevel@xxxxxxxxxxxxxxx
cc: Amir Goldstein <amir73il@xxxxxxxxx>
cc: fuse-devel@xxxxxxxxxxxxxxxxxxxxx
---
 fs/fuse/dev.c         |   1 +
 fs/fuse/dev_uring.c   | 408 +++++++++++++++++++++++++++++++++++++++++-
 fs/fuse/dev_uring_i.h |   1 +
 3 files changed, 401 insertions(+), 9 deletions(-)

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index de9193f66c8b..cce55eaed8a3 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -2330,6 +2330,7 @@ const struct file_operations fuse_dev_operations = {
 	.unlocked_ioctl = fuse_dev_ioctl,
 	.compat_ioctl   = compat_ptr_ioctl,
 	.mmap		= fuse_uring_mmap,
+	.uring_cmd	= fuse_uring_cmd,
 };
 EXPORT_SYMBOL_GPL(fuse_dev_operations);
 
diff --git a/fs/fuse/dev_uring.c b/fs/fuse/dev_uring.c
index 744a38064131..5c41f9f71410 100644
--- a/fs/fuse/dev_uring.c
+++ b/fs/fuse/dev_uring.c
@@ -34,6 +34,9 @@ module_param(enable_uring, bool, 0644);
 MODULE_PARM_DESC(enable_uring,
 	"Enable uring userspace communication through uring.");
 
+static bool fuse_uring_ent_release_and_fetch(struct fuse_ring_ent *ring_ent);
+static void fuse_uring_send_to_ring(struct fuse_ring_ent *ring_ent);
+
 static struct fuse_ring_queue *
 fuse_uring_get_queue(struct fuse_conn *fc, int qid)
 {
@@ -47,15 +50,6 @@ fuse_uring_get_queue(struct fuse_conn *fc, int qid)
 	return (struct fuse_ring_queue *)(ptr + qid * fc->ring.queue_size);
 }
 
-/* dummy function will be replaced in later commits */
-static void fuse_uring_bit_set(struct fuse_ring_ent *ent, bool is_bg,
-			       const char *str)
-{
-	(void)ent;
-	(void)is_bg;
-	(void)str;
-}
-
 /* Abort all list queued request on the given ring queue */
 static void fuse_uring_end_queue_requests(struct fuse_ring_queue *queue)
 {
@@ -81,6 +75,363 @@ void fuse_uring_end_requests(struct fuse_conn *fc)
 	}
 }
 
+/*
+ * Finalize a fuse request, then fetch and send the next entry, if available
+ *
+ * has lock/unlock/lock to avoid holding the lock on calling fuse_request_end
+ */
+static void
+fuse_uring_req_end_and_get_next(struct fuse_ring_ent *ring_ent, bool set_err,
+				int error)
+{
+	bool already = false;
+	struct fuse_req *req = ring_ent->fuse_req;
+	bool send;
+
+	spin_lock(&ring_ent->queue->lock);
+	if (ring_ent->state & FRRS_FUSE_REQ_END || !ring_ent->need_req_end)
+		already = true;
+	else {
+		ring_ent->state |= FRRS_FUSE_REQ_END;
+		ring_ent->need_req_end = 0;
+	}
+	spin_unlock(&ring_ent->queue->lock);
+
+	if (already) {
+		struct fuse_ring_queue *queue = ring_ent->queue;
+
+		if (!queue->aborted) {
+			pr_info("request end not needed state=%llu end-bit=%d\n",
+				ring_ent->state, ring_ent->need_req_end);
+			WARN_ON(1);
+		}
+		return;
+	}
+
+	if (set_err)
+		req->out.h.error = error;
+
+	fuse_request_end(ring_ent->fuse_req);
+	ring_ent->fuse_req = NULL;
+
+	send = fuse_uring_ent_release_and_fetch(ring_ent);
+	if (send)
+		fuse_uring_send_to_ring(ring_ent);
+}
+
+/*
+ * Copy data from the req to the ring buffer
+ */
+static int fuse_uring_copy_to_ring(struct fuse_conn *fc,
+				   struct fuse_req *req,
+				   struct fuse_ring_req *rreq)
+{
+	struct fuse_copy_state cs;
+	struct fuse_args *args = req->args;
+	int err;
+
+	fuse_copy_init(&cs, 1, NULL);
+	cs.is_uring = 1;
+	cs.ring.buf = rreq->in_out_arg;
+	cs.ring.buf_sz = fc->ring.req_arg_len;
+	cs.req = req;
+
+	pr_devel("%s:%d buf=%p len=%d args=%d\n", __func__, __LINE__,
+		 cs.ring.buf, cs.ring.buf_sz, args->out_numargs);
+
+	err = fuse_copy_args(&cs, args->in_numargs, args->in_pages,
+			     (struct fuse_arg *) args->in_args, 0);
+	rreq->in_out_arg_len = cs.ring.offset;
+
+	pr_devel("%s:%d buf=%p len=%d args=%d err=%d\n", __func__, __LINE__,
+		 cs.ring.buf, cs.ring.buf_sz, args->out_numargs, err);
+
+	return err;
+}
+
+/*
+ * Copy data from the ring buffer to the fuse request
+ */
+static int fuse_uring_copy_from_ring(struct fuse_conn *fc,
+				     struct fuse_req *req,
+				     struct fuse_ring_req *rreq)
+{
+	struct fuse_copy_state cs;
+	struct fuse_args *args = req->args;
+
+	fuse_copy_init(&cs, 0, NULL);
+	cs.is_uring = 1;
+	cs.ring.buf = rreq->in_out_arg;
+
+	if (rreq->in_out_arg_len > fc->ring.req_arg_len) {
+		pr_devel("Max ring buffer len exceeded (%u vs %zu\n",
+			 rreq->in_out_arg_len,  fc->ring.req_arg_len);
+		return -EINVAL;
+	}
+	cs.ring.buf_sz = rreq->in_out_arg_len;
+	cs.req = req;
+
+	pr_devel("%s:%d buf=%p len=%d args=%d\n", __func__, __LINE__,
+		 cs.ring.buf, cs.ring.buf_sz, args->out_numargs);
+
+	return fuse_copy_out_args(&cs, args, rreq->in_out_arg_len);
+}
+
+/**
+ * Write data to the ring buffer and send the request to userspace,
+ * userspace will read it
+ * This is comparable with classical read(/dev/fuse)
+ */
+static void fuse_uring_send_to_ring(struct fuse_ring_ent *ring_ent)
+{
+	struct fuse_conn *fc = ring_ent->queue->fc;
+	struct fuse_ring_req *rreq = ring_ent->rreq;
+	struct fuse_req *req = ring_ent->fuse_req;
+	int err = 0;
+
+	pr_devel("%s:%d ring-req=%p fuse_req=%p state=%llu args=%p\n", __func__,
+		 __LINE__, ring_ent, ring_ent->fuse_req, ring_ent->state, req->args);
+
+	spin_lock(&ring_ent->queue->lock);
+	if (unlikely((ring_ent->state & FRRS_USERSPACE) ||
+		     (ring_ent->state & FRRS_FREED))) {
+		pr_err("ring-req=%p buf_req=%p invalid state %llu on send\n",
+		       ring_ent, rreq, ring_ent->state);
+		WARN_ON(1);
+		err = -EIO;
+	} else
+		ring_ent->state |= FRRS_USERSPACE;
+
+	ring_ent->need_cmd_done = 0;
+	spin_unlock(&ring_ent->queue->lock);
+	if (err)
+		goto err;
+
+	err = fuse_uring_copy_to_ring(fc, req, rreq);
+	if (unlikely(err)) {
+		ring_ent->state &= ~FRRS_USERSPACE;
+		ring_ent->need_cmd_done = 1;
+		goto err;
+	}
+
+	/* ring req go directly into the shared memory buffer */
+	rreq->in = req->in.h;
+
+	pr_devel("%s qid=%d tag=%d state=%llu cmd-done op=%d unique=%llu\n",
+		__func__, ring_ent->queue->qid, ring_ent->tag, ring_ent->state,
+		rreq->in.opcode, rreq->in.unique);
+
+	io_uring_cmd_done(ring_ent->cmd, 0, 0);
+	return;
+
+err:
+	fuse_uring_req_end_and_get_next(ring_ent, true, err);
+}
+
+/**
+ * Set the given ring entry as available in the queue bitmap
+ */
+static void fuse_uring_bit_set(struct fuse_ring_ent *ring_ent, bool bg,
+			       const char *str)
+__must_hold(ring_ent->queue->lock)
+{
+	int old;
+	struct fuse_ring_queue *queue = ring_ent->queue;
+	const struct fuse_conn *fc = queue->fc;
+	int tag = ring_ent->tag;
+
+	old = test_and_set_bit(tag, queue->req_avail_map);
+	if (unlikely(old != 0)) {
+		pr_warn("%8s invalid bit value on clear for qid=%d tag=%d",
+			str, queue->qid, tag);
+		WARN_ON(1);
+	}
+	if (bg)
+		queue->req_bg++;
+	else
+		queue->req_fg++;
+
+	pr_devel("%35s bit set fc=%p is_bg=%d qid=%d tag=%d fg=%d bg=%d bgq: %d\n",
+		 str, fc, bg, queue->qid, ring_ent->tag, queue->req_fg,
+		 queue->req_bg, !list_empty(&queue->bg_queue));
+}
+
+/**
+ * Mark the ring entry as not available for other requests
+ */
+static int fuse_uring_bit_clear(struct fuse_ring_ent *ring_ent, int is_bg,
+				const char *str)
+__must_hold(ring_ent->queue->lock)
+{
+	int old;
+	struct fuse_ring_queue *queue = ring_ent->queue;
+	const struct fuse_conn *fc = queue->fc;
+	int tag = ring_ent->tag;
+	int *value = is_bg ? &queue->req_bg : &queue->req_fg;
+
+	if (unlikely(*value <= 0)) {
+		pr_warn("%s qid=%d tag=%d is_bg=%d zero req avail fg=%d bg=%d\n",
+			str, queue->qid, ring_ent->tag, is_bg,
+			queue->req_bg, queue->req_fg);
+		WARN_ON(1);
+		return -EINVAL;
+	}
+
+	old = test_and_clear_bit(tag, queue->req_avail_map);
+	if (unlikely(old != 1)) {
+		pr_warn("%8s invalid bit value on clear for qid=%d tag=%d",
+			str, queue->qid, tag);
+		WARN_ON(1);
+		return -EIO;
+	}
+
+	ring_ent->rreq->flags = 0;
+
+	if (is_bg) {
+		ring_ent->rreq->flags |= FUSE_RING_REQ_FLAG_BACKGROUND;
+		queue->req_bg--;
+	} else
+		queue->req_fg--;
+
+	pr_devel("%35s ring bit clear fc=%p is_bg=%d qid=%d tag=%d fg=%d bg=%d\n",
+		 str, fc, is_bg, queue->qid, ring_ent->tag,
+		 queue->req_fg, queue->req_bg);
+
+	ring_ent->state |= FRRS_FUSE_REQ;
+
+	return 0;
+}
+
+/*
+ * Assign a fuse queue entry to the given entry
+ *
+ */
+static bool fuse_uring_assign_ring_entry(struct fuse_ring_ent *ring_ent,
+					     struct list_head *head,
+					     int is_bg)
+__must_hold(&queue.waitq.lock)
+{
+	struct fuse_req *req;
+	int res;
+
+	if (list_empty(head))
+		return false;
+
+	res = fuse_uring_bit_clear(ring_ent, is_bg, __func__);
+	if (unlikely(res))
+		return false;
+
+	req = list_first_entry(head, struct fuse_req, list);
+	list_del_init(&req->list);
+	clear_bit(FR_PENDING, &req->flags);
+	ring_ent->fuse_req = req;
+	ring_ent->need_req_end = 1;
+
+	return true;
+}
+
+/*
+ * Checks for errors and stores it into the request
+ */
+static int fuse_uring_ring_ent_has_err(struct fuse_conn *fc,
+				       struct fuse_ring_ent *ring_ent)
+{
+	struct fuse_req *req = ring_ent->fuse_req;
+	struct fuse_out_header *oh = &req->out.h;
+	int err;
+
+	if (oh->unique == 0) {
+		/* Not supportd through request based uring, this needs another
+		 * ring from user space to kernel
+		 */
+		pr_warn("Unsupported fuse-notify\n");
+		err = -EINVAL;
+		goto seterr;
+	}
+
+	if (oh->error <= -512 || oh->error > 0) {
+		err = -EINVAL;
+		goto seterr;
+	}
+
+	if (oh->error) {
+		err = oh->error;
+		pr_devel("%s:%d err=%d op=%d req-ret=%d",
+			 __func__, __LINE__, err, req->args->opcode,
+			 req->out.h.error);
+		goto err; /* error already set */
+	}
+
+	if ((oh->unique & ~FUSE_INT_REQ_BIT) != req->in.h.unique) {
+
+		pr_warn("Unpexted seqno mismatch, expected: %llu got %llu\n",
+			req->in.h.unique, oh->unique & ~FUSE_INT_REQ_BIT);
+		err = -ENOENT;
+		goto seterr;
+	}
+
+	/* Is it an interrupt reply ID?	 */
+	if (oh->unique & FUSE_INT_REQ_BIT) {
+		err = 0;
+		if (oh->error == -ENOSYS)
+			fc->no_interrupt = 1;
+		else if (oh->error == -EAGAIN) {
+			/* XXX Needs to copy to the next cq and submit it */
+			// err = queue_interrupt(req);
+			pr_warn("Intrerupt EAGAIN not supported yet");
+			err = -EINVAL;
+		}
+
+		goto seterr;
+	}
+
+	return 0;
+
+seterr:
+	pr_devel("%s:%d err=%d op=%d req-ret=%d",
+		 __func__, __LINE__, err, req->args->opcode,
+		 req->out.h.error);
+	oh->error = err;
+err:
+	pr_devel("%s:%d err=%d op=%d req-ret=%d",
+		 __func__, __LINE__, err, req->args->opcode,
+		 req->out.h.error);
+	return err;
+}
+
+/**
+ * Read data from the ring buffer, which user space has written to
+ * This is comparible with handling of classical write(/dev/fuse).
+ * Also make the ring request available again for new fuse requests.
+ */
+static void fuse_uring_commit_and_release(struct fuse_dev *fud,
+					  struct fuse_ring_ent *ring_ent)
+{
+	struct fuse_ring_req *rreq = ring_ent->rreq;
+	struct fuse_req *req = ring_ent->fuse_req;
+	ssize_t err = 0;
+	bool set_err = false;
+
+	req->out.h = rreq->out;
+
+	err = fuse_uring_ring_ent_has_err(fud->fc, ring_ent);
+	if (err) {
+		/* req->out.h.error already set */
+		pr_devel("%s:%d err=%zd oh->err=%d\n",
+			 __func__, __LINE__, err, req->out.h.error);
+		goto out;
+	}
+
+	err = fuse_uring_copy_from_ring(fud->fc, req, rreq);
+	if (err)
+		set_err = true;
+
+out:
+	pr_devel("%s:%d ret=%zd op=%d req-ret=%d\n",
+		 __func__, __LINE__, err, req->args->opcode, req->out.h.error);
+	fuse_uring_req_end_and_get_next(ring_ent, set_err, err);
+}
+
 /*
  * Release a ring request, it is no longer needed and can handle new data
  *
@@ -119,6 +470,25 @@ __must_hold(&queue->lock)
 	ring_ent->rreq->flags = 0;
 	ring_ent->state = FRRS_FUSE_WAIT;
 }
+
+/*
+ * Release a uring entry and fetch the next fuse request if available
+ */
+static bool fuse_uring_ent_release_and_fetch(struct fuse_ring_ent *ring_ent)
+{
+	struct fuse_ring_queue *queue = ring_ent->queue;
+	bool is_bg = !!(ring_ent->rreq->flags & FUSE_RING_REQ_FLAG_BACKGROUND);
+	bool send = false;
+	struct list_head *head = is_bg ? &queue->bg_queue : &queue->fg_queue;
+
+	spin_lock(&ring_ent->queue->lock);
+	fuse_uring_ent_release(ring_ent, queue, is_bg);
+	send = fuse_uring_assign_ring_entry(ring_ent, head, is_bg);
+	spin_unlock(&ring_ent->queue->lock);
+
+	return send;
+}
+
 /**
  * Simplified ring-entry release function, for shutdown only
  */
@@ -810,6 +1180,26 @@ int fuse_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
 
 		ret = fuse_uring_fetch(ring_ent, cmd);
 		break;
+	case FUSE_URING_REQ_COMMIT_AND_FETCH:
+		if (unlikely(!fc->ring.ready)) {
+			pr_info("commit and fetch, but the ring is not ready yet");
+			goto out;
+		}
+
+		if (!(prev_state & FRRS_USERSPACE)) {
+			pr_info("qid=%d tag=%d state %llu misses %d\n",
+				queue->qid, ring_ent->tag, ring_ent->state,
+				FRRS_USERSPACE);
+			goto out;
+		}
+
+		/* XXX Test inject error */
+
+		WRITE_ONCE(ring_ent->cmd, cmd);
+		fuse_uring_commit_and_release(fud, ring_ent);
+
+		ret = 0;
+		break;
 	default:
 		ret = -EINVAL;
 		pr_devel("Unknown uring command %d", cmd_op);
diff --git a/fs/fuse/dev_uring_i.h b/fs/fuse/dev_uring_i.h
index 4032dccca8b6..b0ef36215b80 100644
--- a/fs/fuse/dev_uring_i.h
+++ b/fs/fuse/dev_uring_i.h
@@ -13,6 +13,7 @@ void fuse_uring_end_requests(struct fuse_conn *fc);
 int fuse_uring_ioctl(struct file *file, struct fuse_uring_cfg *cfg);
 void fuse_uring_ring_destruct(struct fuse_conn *fc);
 int fuse_uring_mmap(struct file *filp, struct vm_area_struct *vma);
+int fuse_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags);
 #endif
 
 
-- 
2.37.2




[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [NTFS 3]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [NTFS 3]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux