Re: IORING_SEND_NOTIF_REPORT_USAGE (was Re: IORING_CQE_F_COPIED)

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 10/20/22 11:04, Stefan Metzmacher wrote:
Hi Pavel,
[...]

So far I came up with a IORING_SEND_NOTIF_REPORT_USAGE opt-in flag
and the reporting is done in cqe.res with IORING_NOTIF_USAGE_ZC_USED (0x00000001)
and/or IORING_NOTIF_USAGE_ZC_COPIED (0x8000000). So the caller is also
able to notice that some parts were able to use zero copy, while other
fragments were copied.

Are we really interested in multihoming and probably some very edge cases?
I'd argue we're not and it should be a single bool hint indicating whether
zc is viable or not. It can do more complex calculations _if_ needed, e.g.
looking inside skb's and figure out how many bytes were copied but as for me
it should better be turned into a single bool in the end. Could also be the
number of bytes copied, but I don't think we can't have the accuracy for
that (e.g. what we're going to return if some protocol duplicates an skb
and sends to 2 different devices or is processing it in a pipeline?)

So the question is what is the use case for having 2 flags?

btw, now we've got another example why the report flag is a good idea,
we can't use cqe.res unconditionally because we want to have a "one CQE
per request" mode, but it's fine if we make it and the report flag
mutually exclusive.


I haven't tested it yet, but I want to post it early...

What do you think?

Keeping in mind potential backporting let's make it as simple and
short as possible first and then do optimisations on top.

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index ab7458033ee3..751fc4eff8d1 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -296,10 +296,28 @@ enum io_uring_op {
   *
   * IORING_RECVSEND_FIXED_BUF    Use registered buffers, the index is stored in
   *                the buf_index field.
+ *
+ * IORING_SEND_NOTIF_REPORT_USAGE
+ *                If SEND[MSG]_ZC should report
+ *                the zerocopy usage in cqe.res
+ *                for the IORING_CQE_F_NOTIF cqe.
+ *                IORING_NOTIF_USAGE_ZC_USED if zero copy was used
+ *                (at least partially).
+ *                IORING_NOTIF_USAGE_ZC_COPIED if data was copied
+ *                (at least partially).
   */
  #define IORING_RECVSEND_POLL_FIRST    (1U << 0)
  #define IORING_RECV_MULTISHOT        (1U << 1)
  #define IORING_RECVSEND_FIXED_BUF    (1U << 2)
+#define IORING_SEND_NOTIF_REPORT_USAGE    (1U << 3)
+
+/*
+ * cqe.res for IORING_CQE_F_NOTIF if
+ * IORING_SEND_NOTIF_REPORT_USAGE was requested
+ */
+#define IORING_NOTIF_USAGE_ZC_USED    (1U << 0)
+#define IORING_NOTIF_USAGE_ZC_COPIED    (1U << 31)
+

  /*
   * accept flags stored in sqe->ioprio
diff --git a/io_uring/net.c b/io_uring/net.c
index 735eec545115..a79d7d349e19 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -946,9 +946,11 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)

      zc->flags = READ_ONCE(sqe->ioprio);
      if (zc->flags & ~(IORING_RECVSEND_POLL_FIRST |
-              IORING_RECVSEND_FIXED_BUF))
+              IORING_RECVSEND_FIXED_BUF |
+              IORING_SEND_NOTIF_REPORT_USAGE))
          return -EINVAL;
-    notif = zc->notif = io_alloc_notif(ctx);
+    notif = zc->notif = io_alloc_notif(ctx,
+                       zc->flags & IORING_SEND_NOTIF_REPORT_USAGE);
      if (!notif)
          return -ENOMEM;
      notif->cqe.user_data = req->cqe.user_data;
diff --git a/io_uring/notif.c b/io_uring/notif.c
index e37c6569d82e..3844e3c8ad7e 100644
--- a/io_uring/notif.c
+++ b/io_uring/notif.c
@@ -3,13 +3,14 @@
  #include <linux/file.h>
  #include <linux/slab.h>
  #include <linux/net.h>
+#include <linux/errqueue.h>

Is it needed?

  #include <linux/io_uring.h>

  #include "io_uring.h"
  #include "notif.h"
  #include "rsrc.h"

-static void __io_notif_complete_tw(struct io_kiocb *notif, bool *locked)
+static inline void __io_notif_complete_tw(struct io_kiocb *notif, bool *locked)

Let's remove this hunk with inlining and do it later

  {
      struct io_notif_data *nd = io_notif_to_data(notif);
      struct io_ring_ctx *ctx = notif->ctx;
@@ -21,20 +22,46 @@ static void __io_notif_complete_tw(struct io_kiocb *notif, bool *locked)
      io_req_task_complete(notif, locked);
  }

-static void io_uring_tx_zerocopy_callback(struct sk_buff *skb,
-                      struct ubuf_info *uarg,
-                      bool success)
+static inline void io_uring_tx_zerocopy_callback(struct sk_buff *skb,
+                         struct ubuf_info *uarg,
+                         bool success)

This one as well.


  {
      struct io_notif_data *nd = container_of(uarg, struct io_notif_data, uarg);
      struct io_kiocb *notif = cmd_to_io_kiocb(nd);

      if (refcount_dec_and_test(&uarg->refcnt)) {
-        notif->io_task_work.func = __io_notif_complete_tw;
          io_req_task_work_add(notif);
      }
  }

-struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx)
+static void __io_notif_complete_tw_report_usage(struct io_kiocb *notif, bool *locked)

Just shove all that into __io_notif_complete_tw().

+{
+    struct io_notif_data *nd = io_notif_to_data(notif);
+
+    if (likely(nd->zc_used))
+        notif->cqe.res |= IORING_NOTIF_USAGE_ZC_USED;
+
+    if (unlikely(nd->zc_copied))
+        notif->cqe.res |= IORING_NOTIF_USAGE_ZC_COPIED;
+
+    __io_notif_complete_tw(notif, locked);
+}
+
+static void io_uring_tx_zerocopy_callback_report_usage(struct sk_buff *skb,
+                            struct ubuf_info *uarg,
+                            bool success)
+{
+    struct io_notif_data *nd = container_of(uarg, struct io_notif_data, uarg);
+
+    if (success && !nd->zc_used && skb)
+        nd->zc_used = true;
+    else if (unlikely(!success && !nd->zc_copied))
+        nd->zc_copied = true;

It's fine but racy, so let's WRITE_ONCE() to indicate it.

+
+    io_uring_tx_zerocopy_callback(skb, uarg, success);
+}
+
+struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx, bool report_usage)
      __must_hold(&ctx->uring_lock)

And it's better to kill this argument and init zc_used/copied
unconditionally.

  {
      struct io_kiocb *notif;
@@ -54,7 +81,14 @@ struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx)
      nd = io_notif_to_data(notif);
      nd->account_pages = 0;
      nd->uarg.flags = SKBFL_ZEROCOPY_FRAG | SKBFL_DONT_ORPHAN;
-    nd->uarg.callback = io_uring_tx_zerocopy_callback;
+    if (report_usage) {
+        nd->zc_used = nd->zc_copied = false;
+        nd->uarg.callback = io_uring_tx_zerocopy_callback_report_usage;
+        notif->io_task_work.func = __io_notif_complete_tw_report_usage;
+    } else {
+        nd->uarg.callback = io_uring_tx_zerocopy_callback;
+        notif->io_task_work.func = __io_notif_complete_tw;
+    }
      refcount_set(&nd->uarg.refcnt, 1);
      return notif;
  }
@@ -66,7 +100,6 @@ void io_notif_flush(struct io_kiocb *notif)

      /* drop slot's master ref */
      if (refcount_dec_and_test(&nd->uarg.refcnt)) {
-        notif->io_task_work.func = __io_notif_complete_tw;
          io_req_task_work_add(notif);
      }
  }
diff --git a/io_uring/notif.h b/io_uring/notif.h
index 5b4d710c8ca5..5ac7a2745e52 100644
--- a/io_uring/notif.h
+++ b/io_uring/notif.h
@@ -13,10 +13,12 @@ struct io_notif_data {
      struct file        *file;
      struct ubuf_info    uarg;
      unsigned long        account_pages;
+    bool            zc_used;
+    bool            zc_copied;

IIRC io_notif_data is fully packed in 6.1, so placing zc_{used,copied}
there might complicate backporting (if any). We can place them in io_kiocb
directly and move in 6.2. Alternatively account_pages doesn't have to be
long.

  };

  void io_notif_flush(struct io_kiocb *notif);
-struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx);
+struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx, bool report_usage);

  static inline struct io_notif_data *io_notif_to_data(struct io_kiocb *notif)
  {


--
Pavel Begunkov



[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux