On 15/10/2020 22:11, Pavel Begunkov wrote: > see [3/4] and [4/4] for motivation, the other two are just preps. > I wasn't expecting to find performance difference, but my naive nop > test yeilds 5030 vs 5160 KIOPS, before and after [3/4] correspondingly. Forgot to add that I want to review it thoroughly on silly bugs, so until then it's more of RFC. > The test is submitting 32 linked nops and waits for them to complete. > The testing is tuned for consistentcy, and the results are consistent > across reboots. The test was basically like the diff below. diff --git a/tools/io_uring/io_uring-bench.c b/tools/io_uring/io_uring-bench.c index 7703f0118385..84c4487c4d4e 100644 --- a/tools/io_uring/io_uring-bench.c +++ b/tools/io_uring/io_uring-bench.c @@ -96,13 +96,13 @@ static volatile int finish; /* * OPTIONS: Set these to test the various features of io_uring. */ -static int polled = 1; /* use IO polling */ +static int polled = 0; /* use IO polling */ static int fixedbufs = 1; /* use fixed user buffers */ static int register_files = 1; /* use fixed files */ static int buffered = 0; /* use buffered IO, not O_DIRECT */ static int sq_thread_poll = 0; /* use kernel submission/poller thread */ static int sq_thread_cpu = -1; /* pin above thread to this CPU */ -static int do_nop = 0; /* no-op SQ ring commands */ +static int do_nop = 1; /* no-op SQ ring commands */ static int io_uring_register_buffers(struct submitter *s) { @@ -149,6 +149,7 @@ static void init_io(struct submitter *s, unsigned index) if (do_nop) { sqe->opcode = IORING_OP_NOP; + sqe->flags = IOSQE_IO_LINK; return; } -- Pavel Begunkov