Re: [PATCH blktests v3 1/2] src: add mini ublk source code

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 2/19/2023 7:46 PM, Ming Lei wrote:
> Prepare for adding ublk related test:
> 
> 1) ublk delete is sync removal, this way is convenient to
>     blkg/queue/disk instance leak issue
> 
> 2) mini ublk has two builtin target(null, loop), and loop IO is
> handled by io_uring, so we can use ublk to cover part of io_uring
> workloads
> 
> 3) not like loop/nbd, ublk won't pre-allocate/add disk, and always
> add/delete disk dynamically, this way may cover disk plug & unplug
> tests
> 
> 4) ublk specific test given people starts to use it, so better to
> let blktest cover ublk related tests
> 
> Add mini ublk source for test purpose only, which is easy to use:
> 
> ./miniublk add -t {null|loop} [-q nr_queues] [-d depth] [-n dev_id]
> 	 default: nr_queues=2(max 4), depth=128(max 128), dev_id=-1(auto allocation)
> 	 -t loop -f backing_file
> 	 -t null
> ./miniublk del [-n dev_id] -a
> 	 -a delete all devices, -n delete specified device
> ./miniublk list [-n dev_id] -a
> 	 -a list all devices, -n list specified device, default -a
> 
> miniublk depends on liburing 2.2, adds HAVE_LIBURING for checking if
> liburing 2.2 exists; also add HAVE_UBLK_HEADER for checking ublk kernel
> UAPI header exists. If either of two dependencies can't be met, simply
> ignore miniublk target.
> 
> Also v6.0 is the 1st linux kernel release with ublk.
> 
> Signed-off-by: Ming Lei <ming.lei@xxxxxxxxxx>
> ---
>   src/.gitignore |    1 +
>   src/Makefile   |   18 +
>   src/miniublk.c | 1376 ++++++++++++++++++++++++++++++++++++++++++++++++
>   3 files changed, 1395 insertions(+)
>   create mode 100644 src/miniublk.c
> 
> diff --git a/src/.gitignore b/src/.gitignore
> index 355bed3..df7aff5 100644
> --- a/src/.gitignore
> +++ b/src/.gitignore
> @@ -8,3 +8,4 @@
>   /sg/dxfer-from-dev
>   /sg/syzkaller1
>   /zbdioctl
> +/miniublk
> diff --git a/src/Makefile b/src/Makefile
> index 3b587f6..81c6541 100644
> --- a/src/Makefile
> +++ b/src/Makefile
> @@ -2,6 +2,10 @@ HAVE_C_HEADER = $(shell if echo "\#include <$(1)>" |		\
>   		$(CC) -E - > /dev/null 2>&1; then echo "$(2)";	\
>   		else echo "$(3)"; fi)
>   
> +HAVE_C_MACRO = $(shell if echo "#include <$(1)>" |	\
> +		$(CC) -E - 2>&1 /dev/null | grep $(2) > /dev/null 2>&1; \
> +		then echo 1;else echo 0; fi)
> +
>   C_TARGETS := \
>   	loblksize \
>   	loop_change_fd \
> @@ -13,16 +17,27 @@ C_TARGETS := \
>   	sg/syzkaller1 \
>   	zbdioctl
>   
> +C_MINIUBLK := miniublk
> +
> +HAVE_LIBURING := $(call HAVE_C_MACRO,liburing.h,IORING_OP_URING_CMD)
> +HAVE_UBLK_HEADER := $(call HAVE_C_HEADER,linux/ublk_cmd.h,1)
> +
>   CXX_TARGETS := \
>   	discontiguous-io
>   
> +ifeq ($(HAVE_LIBURING)$(HAVE_UBLK_HEADER), 11)
> +TARGETS := $(C_TARGETS) $(CXX_TARGETS) $(C_MINIUBLK)
> +else
> +$(info Skip $(C_MINIUBLK) build due to missing kernel header(v6.0+) or liburing(2.2+))
>   TARGETS := $(C_TARGETS) $(CXX_TARGETS)
> +endif
>   
>   CONFIG_DEFS := $(call HAVE_C_HEADER,linux/blkzoned.h,-DHAVE_LINUX_BLKZONED_H)
>   
>   override CFLAGS   := -O2 -Wall -Wshadow $(CFLAGS) $(CONFIG_DEFS)
>   override CXXFLAGS := -O2 -std=c++11 -Wall -Wextra -Wshadow -Wno-sign-compare \
>   		     -Werror $(CXXFLAGS) $(CONFIG_DEFS)
> +MINIUBLK_FLAGS :=  -D_GNU_SOURCE -lpthread -luring
>   
>   all: $(TARGETS)
>   
> @@ -39,4 +54,7 @@ $(C_TARGETS): %: %.c
>   $(CXX_TARGETS): %: %.cpp
>   	$(CXX) $(CPPFLAGS) $(CXXFLAGS) -o $@ $^
>   
> +$(C_MINIUBLK): %: miniublk.c
> +	$(CC) $(CFLAGS) $(MINIUBLK_FLAGS) -o $@ miniublk.c
> +
>   .PHONY: all clean install
> diff --git a/src/miniublk.c b/src/miniublk.c
> new file mode 100644
> index 0000000..e84ba41
> --- /dev/null
> +++ b/src/miniublk.c
> @@ -0,0 +1,1376 @@
> +// SPDX-License-Identifier: GPL-3.0+
> +// Copyright (C) 2023 Ming Lei
> +
> +/*
> + * io_uring based mini ublk implementation with null/loop target,
> + * for test purpose only.
> + *
> + * So please keep it clean & simple & reliable.
> + */
> +
> +#include <unistd.h>
> +#include <stdlib.h>
> +#include <assert.h>
> +#include <stdio.h>
> +#include <stdarg.h>
> +#include <string.h>
> +#include <pthread.h>
> +#include <getopt.h>
> +#include <limits.h>
> +#include <sys/syscall.h>
> +#include <sys/mman.h>
> +#include <sys/ioctl.h>
> +#include <liburing.h>
> +#include <linux/ublk_cmd.h>
> +
> +#define CTRL_DEV		"/dev/ublk-control"
> +#define UBLKC_DEV		"/dev/ublkc"
> +#define UBLK_CTRL_RING_DEPTH            32
> +
> +/* queue idle timeout */
> +#define UBLKSRV_IO_IDLE_SECS		20
> +
> +#define UBLK_IO_MAX_BYTES               65536
> +#define UBLK_MAX_QUEUES                 4
> +#define UBLK_QUEUE_DEPTH                128
> +
> +#define UBLK_DBG_DEV            (1U << 0)
> +#define UBLK_DBG_QUEUE          (1U << 1)
> +#define UBLK_DBG_IO_CMD         (1U << 2)
> +#define UBLK_DBG_IO             (1U << 3)
> +#define UBLK_DBG_CTRL_CMD       (1U << 4)
> +#define UBLK_LOG		(1U << 5)
> +
> +struct ublk_dev;
> +struct ublk_queue;
> +
> +struct ublk_ctrl_cmd_data {
> +	unsigned short cmd_op;

perhaps use enum type to avoid any type mismatach errors in future..

> +#define CTRL_CMD_HAS_DATA	1
> +#define CTRL_CMD_HAS_BUF	2
> +	unsigned short flags;

I'd keep falgs u64 or u32 just incase..

> +
> +	__u64 data[2];
> +	__u64 addr;
> +	__u32 len;
> +};
> +
> +struct ublk_io {
> +	char *buf_addr;
> +
> +#define UBLKSRV_NEED_FETCH_RQ		(1UL << 0)
> +#define UBLKSRV_NEED_COMMIT_RQ_COMP	(1UL << 1)
> +#define UBLKSRV_IO_FREE			(1UL << 2)
> +	unsigned int flags;
> +
> +	unsigned int result;
> +};
> +
> +struct ublk_tgt_ops {
> +	const char *name;
> +	int (*init_tgt)(struct ublk_dev *);
> +	void (*deinit_tgt)(struct ublk_dev *);
> +
> +	int (*queue_io)(struct ublk_queue *, int tag);
> +	void (*tgt_io_done)(struct ublk_queue *,
> +			int tag, const struct io_uring_cqe *);
> +};
> +
> +struct ublk_tgt {
> +	unsigned long dev_size;
> +	const struct ublk_tgt_ops *ops;
> +	int argc;
> +	char **argv;
> +	struct ublk_params params;
> +};
> +
> +struct ublk_queue {
> +	int q_id;
> +	int q_depth;

is there a reason to not use unsigned int for qid and qd above ?

> +	unsigned int cmd_inflight;
> +	unsigned int io_inflight;
> +	struct ublk_dev *dev;
> +	const struct ublk_tgt_ops *tgt_ops;
> +	char *io_cmd_buf;
> +	struct io_uring ring;
> +	struct ublk_io ios[UBLK_QUEUE_DEPTH];
> +#define UBLKSRV_QUEUE_STOPPING	(1U << 0)
> +#define UBLKSRV_QUEUE_IDLE	(1U << 1)
> +	unsigned state;
> +	int tid;

instead of int pid_t  ?

> +	pthread_t thread;
> +};
> +
> +struct ublk_dev {
> +	struct ublk_tgt tgt;
> +	struct ublksrv_ctrl_dev_info  dev_info;
> +	struct ublk_queue q[UBLK_MAX_QUEUES];
> +
> +	int fds[2];	/* fds[0] points to /dev/ublkcN */
> +	int nr_fds;
> +	int ctrl_fd;
> +	struct io_uring ring;
> +};
> +
> +#ifndef offsetof
> +#define offsetof(TYPE, MEMBER)  ((size_t)&((TYPE *)0)->MEMBER)
> +#endif
> +
> +#ifndef container_of
> +#define container_of(ptr, type, member) ({                              \
> +	unsigned long __mptr = (unsigned long)(ptr);                    \
> +	((type *)(__mptr - offsetof(type, member))); })
> +#endif
> +
> +#define round_up(val, rnd) \
> +	(((val) + ((rnd) - 1)) & ~((rnd) - 1))
> +
> +#define ublk_assert(x)  do { \
> +	if (!(x)) {     \
> +		ublk_err("%s %d: assert!\n", __func__, __LINE__); \
> +		assert(x);      \
> +	}       \
> +} while (0)
> +
> +static const struct ublk_tgt_ops *ublk_find_tgt(const char *name);
> +
> +static unsigned int ublk_dbg_mask = UBLK_LOG;
> +
> +static inline unsigned ilog2(unsigned x)
> +{
> +    return sizeof(unsigned) * 8 - 1 - __builtin_clz(x);
> +}
> +
> +static inline int is_target_io(__u64 user_data)
> +{
> +	return (user_data & (1ULL << 63)) != 0;
> +}
> +
> +static inline __u64 build_user_data(unsigned tag, unsigned op,
> +		unsigned tgt_data, unsigned is_target_io)
> +{
> +	assert(!(tag >> 16) && !(op >> 8) && !(tgt_data >> 16));
> +
> +	return tag | (op << 16) | (tgt_data << 24) | (__u64)is_target_io << 63;
> +}
> +
> +static inline unsigned int user_data_to_tag(__u64 user_data)
> +{
> +	return user_data & 0xffff;
> +}
> +
> +static inline unsigned int user_data_to_op(__u64 user_data)
> +{
> +	return (user_data >> 16) & 0xff;
> +}
> +
> +static void ublk_err(const char *fmt, ...)
> +{
> +	va_list ap;
> +
> +	va_start(ap, fmt);
> +	vfprintf(stderr, fmt, ap);
> +}
> +
> +static void ublk_log(const char *fmt, ...)
> +{
> +	if (ublk_dbg_mask & UBLK_LOG) {
> +		va_list ap;
> +
> +		va_start(ap, fmt);
> +		vfprintf(stdout, fmt, ap);
> +	}
> +}
> +
> +static void ublk_dbg(int level, const char *fmt, ...)
> +{
> +	if (level & ublk_dbg_mask) {
> +		va_list ap;
> +		va_start(ap, fmt);
> +		vfprintf(stdout, fmt, ap);
> +        }
> +}
> +
> +static inline void *ublk_get_sqe_cmd(const struct io_uring_sqe *sqe)
> +{
> +	return (void *)&sqe->addr3;
> +}
> +
> +static inline void ublk_mark_io_done(struct ublk_io *io, int res)
> +{
> +	io->flags |= (UBLKSRV_NEED_COMMIT_RQ_COMP | UBLKSRV_IO_FREE);
> +	io->result = res;
> +}
> +
> +static inline const struct ublksrv_io_desc *ublk_get_iod(
> +                const struct ublk_queue *q, int tag)
> +{
> +        return (struct ublksrv_io_desc *)
> +                &(q->io_cmd_buf[tag * sizeof(struct ublksrv_io_desc)]);
> +}
> +
> +static inline void ublk_set_sqe_cmd_op(struct io_uring_sqe *sqe,
> +		__u32 cmd_op)
> +{
> +        __u32 *addr = (__u32 *)&sqe->off;
> +
> +        addr[0] = cmd_op;
> +        addr[1] = 0;
> +}
> +
> +static inline int ublk_setup_ring(struct io_uring *r, int depth,
> +		int cq_depth, unsigned flags)
> +{
> +	struct io_uring_params p;
> +
> +	memset(&p, 0, sizeof(p));
> +	p.flags = flags | IORING_SETUP_CQSIZE;
> +	p.cq_entries = cq_depth;
> +
> +	return io_uring_queue_init_params(depth, r, &p);
> +}
> +
> +static inline void ublk_ctrl_init_cmd(struct ublk_dev *dev,
> +		struct io_uring_sqe *sqe,
> +		struct ublk_ctrl_cmd_data *data)
> +{
> +	struct ublksrv_ctrl_dev_info *info = &dev->dev_info;
> +	struct ublksrv_ctrl_cmd *cmd = (struct ublksrv_ctrl_cmd *)ublk_get_sqe_cmd(sqe);
> +
> +	sqe->fd = dev->ctrl_fd;
> +	sqe->opcode = IORING_OP_URING_CMD;
> +	sqe->ioprio = 0;
> +
> +	if (data->flags & CTRL_CMD_HAS_BUF) {
> +		cmd->addr = data->addr;
> +		cmd->len = data->len;
> +	}
> +
> +	if (data->flags & CTRL_CMD_HAS_DATA)
> +		cmd->data[0] = data->data[0];
> +
> +	cmd->dev_id = info->dev_id;
> +	cmd->queue_id = -1;
> +
> +	ublk_set_sqe_cmd_op(sqe, data->cmd_op);
> +
> +	io_uring_sqe_set_data(sqe, cmd);
> +}
> +
> +static int __ublk_ctrl_cmd(struct ublk_dev *dev,
> +		struct ublk_ctrl_cmd_data *data)
> +{
> +	struct io_uring_sqe *sqe;
> +	struct io_uring_cqe *cqe;
> +	int ret = -EINVAL;
> +
> +	sqe = io_uring_get_sqe(&dev->ring);
> +	if (!sqe) {
> +		ublk_err("%s: can't get sqe ret %d\n", __func__, ret);
> +		return ret;
> +	}
> +
> +	ublk_ctrl_init_cmd(dev, sqe, data);
> +
> +	ret = io_uring_submit(&dev->ring);
> +	if (ret < 0) {
> +		ublk_err("uring submit ret %d\n", ret);
> +		return ret;
> +	}
> +
> +	ret = io_uring_wait_cqe(&dev->ring, &cqe);
> +	if (ret < 0) {
> +		ublk_err("wait cqe: %s\n", strerror(-ret));
> +		return ret;
> +	}
> +	io_uring_cqe_seen(&dev->ring, cqe);
> +
> +	return cqe->res;
> +}
> +
> +int ublk_ctrl_stop_dev(struct ublk_dev *dev)
> +{
> +	struct ublk_ctrl_cmd_data data = {
> +		.cmd_op	= UBLK_CMD_STOP_DEV,
> +	};
> +	int ret;
> +
> +	ret = __ublk_ctrl_cmd(dev, &data);
> +	return ret;

you can just return directly from here and remove local var ret..

> +}
> +
> +int ublk_ctrl_start_dev(struct ublk_dev *dev,
> +		int daemon_pid)
> +{
> +	struct ublk_ctrl_cmd_data data = {
> +		.cmd_op	= UBLK_CMD_START_DEV,
> +		.flags	= CTRL_CMD_HAS_DATA,
> +	};
> +	int ret;
> +
> +	dev->dev_info.ublksrv_pid = data.data[0] = daemon_pid;
> +
> +	ret = __ublk_ctrl_cmd(dev, &data);
> +
> +	return ret;

same here as previous comment ...

> +}
> +
> +int ublk_ctrl_add_dev(struct ublk_dev *dev)
> +{
> +	struct ublk_ctrl_cmd_data data = {
> +		.cmd_op	= UBLK_CMD_ADD_DEV,
> +		.flags	= CTRL_CMD_HAS_BUF,
> +		.addr = (__u64)&dev->dev_info,
> +		.len = sizeof(struct ublksrv_ctrl_dev_info),
> +	};
> +
> +	return __ublk_ctrl_cmd(dev, &data);
> +}
> +
> +int ublk_ctrl_del_dev(struct ublk_dev *dev)
> +{
> +	struct ublk_ctrl_cmd_data data = {
> +		.cmd_op = UBLK_CMD_DEL_DEV,
> +		.flags = 0,
> +	};
> +
> +	return __ublk_ctrl_cmd(dev, &data);
> +}
> +
> +int ublk_ctrl_get_info(struct ublk_dev *dev)
> +{
> +	struct ublk_ctrl_cmd_data data = {
> +		.cmd_op	= UBLK_CMD_GET_DEV_INFO,
> +		.flags	= CTRL_CMD_HAS_BUF,
> +		.addr = (__u64)&dev->dev_info,
> +		.len = sizeof(struct ublksrv_ctrl_dev_info),
> +	};
> +
> +	return __ublk_ctrl_cmd(dev, &data);
> +}
> +
> +int ublk_ctrl_set_params(struct ublk_dev *dev,
> +		struct ublk_params *params)
> +{
> +	struct ublk_ctrl_cmd_data data = {
> +		.cmd_op	= UBLK_CMD_SET_PARAMS,
> +		.flags	= CTRL_CMD_HAS_BUF,
> +		.addr = (__u64)params,
> +		.len = sizeof(*params),
> +	};
> +	params->len = sizeof(*params);
> +	return __ublk_ctrl_cmd(dev, &data);
> +}
> +
> +static int ublk_ctrl_get_params(struct ublk_dev *dev,
> +		struct ublk_params *params)
> +{
> +	struct ublk_ctrl_cmd_data data = {
> +		.cmd_op	= UBLK_CMD_GET_PARAMS,
> +		.flags	= CTRL_CMD_HAS_BUF,
> +		.addr = (__u64)params,
> +		.len = sizeof(*params),
> +	};
> +	int ret;
> +
> +	params->len = sizeof(*params);
> +
> +	ret = __ublk_ctrl_cmd(dev, &data);
> +	return ret;

same here as previous comment ...

> +}
> +
> +static const char *ublk_dev_state_desc(struct ublk_dev *dev)
> +{
> +	switch (dev->dev_info.state) {
> +	case UBLK_S_DEV_DEAD:
> +		return "DEAD";
> +	case UBLK_S_DEV_LIVE:
> +		return "LIVE";
> +	default:
> +		return "UNKNOWN";
> +	};
> +}
> +
> +static void ublk_ctrl_dump(struct ublk_dev *dev, bool show_queue)
> +{
> +	struct ublksrv_ctrl_dev_info *info = &dev->dev_info;
> +	int ret;
> +	struct ublk_params p;
> +
> +	ret = ublk_ctrl_get_params(dev, &p);
> +	if (ret < 0) {
> +		ublk_err("failed to get params %m\n");
> +		return;
> +	}
> +
> +	ublk_log("dev id %d: nr_hw_queues %d queue_depth %d block size %d dev_capacity %lld\n",
> +			info->dev_id,
> +                        info->nr_hw_queues, info->queue_depth,
> +                        1 << p.basic.logical_bs_shift, p.basic.dev_sectors);
> +	ublk_log("\tmax rq size %d daemon pid %d flags 0x%llx state %s\n",
> +                        info->max_io_buf_bytes,
> +			info->ublksrv_pid, info->flags,
> +			ublk_dev_state_desc(dev));
> +	if (show_queue) {
> +		int i;
> +
> +		for (i = 0; i < dev->dev_info.nr_hw_queues; i++)
> +			ublk_log("\tqueue 0 tid: %d\n", dev->q[i].tid);
> +	}
> +	fflush(stdout);
> +}
> +
> +static void ublk_ctrl_deinit(struct ublk_dev *dev)
> +{
> +	close(dev->ctrl_fd);
> +	free(dev);
> +}
> +
> +static struct ublk_dev *ublk_ctrl_init()

perhaps specify void -> ublk_ctrl_init(void)

> +{
> +	struct ublk_dev *dev = (struct ublk_dev *)calloc(1, sizeof(*dev));
> +	struct ublksrv_ctrl_dev_info *info = &dev->dev_info;
> +	int ret;
> +
> +	dev->ctrl_fd = open(CTRL_DEV, O_RDWR);
> +	if (dev->ctrl_fd < 0) {
> +		ublk_err("control dev %s can't be opened: %m\n", CTRL_DEV);

perhaps also print the errno that is set by the open system call unless
ublk_err is doing that implicitly which I'm not sure.

also I didn't understand two format specifiers for above call and only
one argument its a bit confusing to read the code...

> +		exit(dev->ctrl_fd);
> +	}
> +	info->max_io_buf_bytes = UBLK_IO_MAX_BYTES;
> +
> +	ret = ublk_setup_ring(&dev->ring, UBLK_CTRL_RING_DEPTH,
> +			UBLK_CTRL_RING_DEPTH, IORING_SETUP_SQE128);
> +	if (ret < 0) {
> +		ublk_err("queue_init: %s\n", strerror(-ret));
> +		free(dev);
> +		return NULL;
> +	}
> +	dev->nr_fds = 1;
> +
> +	return dev;
> +}
> +
> +static int ublk_queue_cmd_buf_sz(struct ublk_queue *q)
> +{
> +	int size =  q->q_depth * sizeof(struct ublksrv_io_desc);
> +	unsigned int page_sz = getpagesize();
> +
> +	return round_up(size, page_sz);
> +}
> +
> +static void ublk_queue_deinit(struct ublk_queue *q)
> +{
> +	int i;
> +	int nr_ios = q->q_depth;
> +
> +	io_uring_unregister_ring_fd(&q->ring);
> +
> +	if (q->ring.ring_fd > 0) {
> +		io_uring_unregister_files(&q->ring);
> +		close(q->ring.ring_fd);
> +		q->ring.ring_fd = -1;
> +	}
> +
> +	if (q->io_cmd_buf)
> +		munmap(q->io_cmd_buf, ublk_queue_cmd_buf_sz(q));
> +
> +	for (i = 0; i < nr_ios; i++)
> +		free(q->ios[i].buf_addr);
> +}
> +
> +static int ublk_queue_init(struct ublk_queue *q)
> +{
> +	struct ublk_dev *dev = q->dev;
> +	int depth = dev->dev_info.queue_depth;
> +	int i, ret = -1;
> +	int cmd_buf_size, io_buf_size;
> +	unsigned long off;
> +	int ring_depth = depth, cq_depth = depth;
> +
> +	q->tgt_ops = dev->tgt.ops;
> +	q->state = 0;
> +	q->q_depth = depth;
> +	q->cmd_inflight = 0;
> +	q->tid = gettid();
> +
> +	cmd_buf_size = ublk_queue_cmd_buf_sz(q);
> +	off = UBLKSRV_CMD_BUF_OFFSET +
> +		q->q_id * (UBLK_MAX_QUEUE_DEPTH * sizeof(struct ublksrv_io_desc));
> +	q->io_cmd_buf = (char *)mmap(0, cmd_buf_size, PROT_READ,
> +			MAP_SHARED | MAP_POPULATE, dev->fds[0], off);
> +	if (q->io_cmd_buf == MAP_FAILED) {
> +		ublk_err("ublk dev %d queue %d map io_cmd_buf failed\n",
> +				q->dev->dev_info.dev_id, q->q_id);

also add strerror(errno) to message ?

> +		goto fail;
> +	}
> +
> +	io_buf_size = dev->dev_info.max_io_buf_bytes;
> +	for (i = 0; i < q->q_depth; i++) {
> +		q->ios[i].buf_addr = NULL;
> +
> +		if (posix_memalign((void **)&q->ios[i].buf_addr,
> +					getpagesize(), io_buf_size)) {
> +			ublk_err("ublk dev %d queue %d io %d posix_memalign failed\n",
> +					dev->dev_info.dev_id, q->q_id, i);
> +			goto fail;
> +		}

same as previous comment ..

> +		q->ios[i].flags = UBLKSRV_NEED_FETCH_RQ | UBLKSRV_IO_FREE;
> +	}
> +
> +	ret = ublk_setup_ring(&q->ring, ring_depth, cq_depth,
> +			IORING_SETUP_SQE128 | IORING_SETUP_COOP_TASKRUN);
> +	if (ret < 0) {
> +		ublk_err("ublk dev %d queue %d setup io_uring failed %d\n",
> +				q->dev->dev_info.dev_id, q->q_id, ret);
> +		goto fail;
> +	}
> +
> +	io_uring_register_ring_fd(&q->ring);
> +
> +	ret = io_uring_register_files(&q->ring, dev->fds, dev->nr_fds);
> +	if (ret) {
> +		ublk_err("ublk dev %d queue %d register files failed %d\n",
> +				q->dev->dev_info.dev_id, q->q_id, ret);
> +		goto fail;
> +	}
> +
> +	return 0;
> + fail:
> +	ublk_queue_deinit(q);
> +	ublk_err("ublk dev %d queue %d failed\n",
> +			dev->dev_info.dev_id, q->q_id);
> +	return -ENOMEM;
> +}
> +
> +static int ublk_dev_prep(struct ublk_dev *dev)
> +{
> +	int dev_id = dev->dev_info.dev_id;
> +	char buf[64];
> +	int ret;
> +
> +	snprintf(buf, 64, "%s%d", UBLKC_DEV, dev_id);
> +	dev->fds[0] = open(buf, O_RDWR);
> +	if (dev->fds[0] < 0) {
> +		ret = -EBADF;
> +		ublk_err("can't open %s, ret %d\n", buf, dev->fds[0]);
> +		goto fail;
> +	}
> +
> +	if (dev->tgt.ops->init_tgt)
> +		ret = dev->tgt.ops->init_tgt(dev);
> +	else
> +		ret = 0;
> +

if ret is initialize to 0 at the time of declaration in above function
we can safely get rid of else above ?

> +	return ret;
> +fail:
> +	close(dev->fds[0]);
> +	return ret;
> +}
> +
> +static void ublk_dev_unprep(struct ublk_dev *dev)
> +{
> +	if (dev->tgt.ops->deinit_tgt)
> +		dev->tgt.ops->deinit_tgt(dev);
> +	close(dev->fds[0]);
> +}
> +
> +static int ublk_queue_io_cmd(struct ublk_queue *q,
> +		struct ublk_io *io, unsigned tag)
> +{
> +	struct ublksrv_io_cmd *cmd;
> +	struct io_uring_sqe *sqe;
> +	unsigned int cmd_op = 0;
> +	__u64 user_data;
> +
> +	/* only freed io can be issued */
> +	if (!(io->flags & UBLKSRV_IO_FREE))
> +		return 0;
> +
> +	/* we issue because we need either fetching or committing */
> +	if (!(io->flags &
> +		(UBLKSRV_NEED_FETCH_RQ | UBLKSRV_NEED_COMMIT_RQ_COMP)))
> +		return 0;
> +
> +	if (io->flags & UBLKSRV_NEED_COMMIT_RQ_COMP)
> +		cmd_op = UBLK_IO_COMMIT_AND_FETCH_REQ;
> +	else if (io->flags & UBLKSRV_NEED_FETCH_RQ)
> +		cmd_op = UBLK_IO_FETCH_REQ;
> +
> +	sqe = io_uring_get_sqe(&q->ring);
> +	if (!sqe) {
> +		ublk_err("%s: run out of sqe %d, tag %d\n",
> +				__func__, q->q_id, tag);
> +		return -1;
> +	}
> +
> +	cmd = (struct ublksrv_io_cmd *)ublk_get_sqe_cmd(sqe);
> +
> +	if (cmd_op == UBLK_IO_COMMIT_AND_FETCH_REQ)
> +		cmd->result = io->result;
> +
> +	/* These fields should be written once, never change */
> +	ublk_set_sqe_cmd_op(sqe, cmd_op);
> +	sqe->fd		= 0;	/* dev->fds[0] */
> +	sqe->opcode	= IORING_OP_URING_CMD;
> +	sqe->flags	= IOSQE_FIXED_FILE;
> +	sqe->rw_flags	= 0;
> +	cmd->tag	= tag;
> +	cmd->addr	= (__u64)io->buf_addr;
> +	cmd->q_id	= q->q_id;
> +
> +	user_data = build_user_data(tag, cmd_op, 0, 0);
> +	io_uring_sqe_set_data64(sqe, user_data);
> +
> +	io->flags = 0;
> +
> +	q->cmd_inflight += 1;
> +
> +	ublk_dbg(UBLK_DBG_IO_CMD, "%s: (qid %d tag %u cmd_op %u) iof %x stopping %d\n",
> +			__func__, q->q_id, tag, cmd_op,
> +			io->flags, !!(q->state & UBLKSRV_QUEUE_STOPPING));
> +	return 1;
> +}
> +
> +static int ublk_complete_io(struct ublk_queue *q,
> +		unsigned tag, int res)
> +{
> +	struct ublk_io *io = &q->ios[tag];
> +
> +	ublk_mark_io_done(io, res);
> +
> +	return ublk_queue_io_cmd(q, io, tag);
> +}
> +
> +static void ublk_submit_fetch_commands(struct ublk_queue *q)
> +{
> +	int i = 0;
> +
> +	for (i = 0; i < q->q_depth; i++)
> +		ublk_queue_io_cmd(q, &q->ios[i], i);
> +}
> +
> +static int ublk_queue_is_idle(struct ublk_queue *q)
> +{
> +	return !io_uring_sq_ready(&q->ring) && !q->io_inflight;
> +}
> +
> +static int ublk_queue_is_done(struct ublk_queue *q)
> +{
> +	return (q->state & UBLKSRV_QUEUE_STOPPING) && ublk_queue_is_idle(q);
> +}
> +
> +static void ublk_queue_discard_io_pages(struct ublk_queue *q)
> +{
> +	const struct ublk_dev *dev = q->dev;
> +	unsigned int io_buf_size = dev->dev_info.max_io_buf_bytes;
> +	int i = 0;
> +
> +	for (i = 0; i < q->q_depth; i++)
> +		madvise(q->ios[i].buf_addr, io_buf_size, MADV_DONTNEED);
> +}
> +
> +static void ublk_queue_idle_enter(struct ublk_queue *q)
> +{
> +	if (q->state & UBLKSRV_QUEUE_IDLE)
> +		return;
> +
> +	ublk_dbg(UBLK_DBG_QUEUE, "dev%d-q%d: enter idle %x\n",
> +			q->dev->dev_info.dev_id, q->q_id, q->state);
> +	ublk_queue_discard_io_pages(q);
> +	q->state |= UBLKSRV_QUEUE_IDLE;
> +}
> +
> +static void ublk_queue_idle_exit(struct ublk_queue *q)
> +{
> +	if (q->state & UBLKSRV_QUEUE_IDLE) {
> +		ublk_dbg(UBLK_DBG_QUEUE, "dev%d-q%d: exit idle %x\n",
> +			q->dev->dev_info.dev_id, q->q_id, q->state);
> +		q->state &= ~UBLKSRV_QUEUE_IDLE;
> +	}
> +}
> +
> +static inline void ublksrv_handle_tgt_cqe(struct ublk_queue *q,
> +		struct io_uring_cqe *cqe)
> +{
> +	unsigned tag = user_data_to_tag(cqe->user_data);
> +
> +	if (cqe->res < 0 && cqe->res != -EAGAIN)
> +		ublk_err("%s: failed tgt io: res %d qid %u tag %u, cmd_op %u\n",
> +			__func__, cqe->res, q->q_id,
> +			user_data_to_tag(cqe->user_data),
> +			user_data_to_op(cqe->user_data));
> +
> +	if (q->tgt_ops->tgt_io_done)
> +		q->tgt_ops->tgt_io_done(q, tag, cqe);
> +}
> +
> +static void ublk_handle_cqe(struct io_uring *r,
> +		struct io_uring_cqe *cqe, void *data)
> +{
> +	struct ublk_queue *q = container_of(r, struct ublk_queue, ring);
> +	unsigned tag = user_data_to_tag(cqe->user_data);
> +	unsigned cmd_op = user_data_to_op(cqe->user_data);
> +	int fetch = (cqe->res != UBLK_IO_RES_ABORT) &&
> +		!(q->state & UBLKSRV_QUEUE_STOPPING);
> +	struct ublk_io *io;
> +
> +	ublk_dbg(UBLK_DBG_IO_CMD, "%s: res %d (qid %d tag %u cmd_op %u target %d) stopping %d\n",
> +			__func__, cqe->res, q->q_id, tag, cmd_op,
> +			is_target_io(cqe->user_data),
> +			(q->state & UBLKSRV_QUEUE_STOPPING));
> +
> +	/* Don't retrieve io in case of target io */
> +	if (is_target_io(cqe->user_data)) {
> +		ublksrv_handle_tgt_cqe(q, cqe);
> +		return;
> +	}
> +
> +	io = &q->ios[tag];
> +	q->cmd_inflight--;
> +
> +	if (!fetch) {
> +		q->state |= UBLKSRV_QUEUE_STOPPING;
> +		io->flags &= ~UBLKSRV_NEED_FETCH_RQ;
> +	}
> +
> +	if (cqe->res == UBLK_IO_RES_OK) {
> +		ublk_assert(tag < q->q_depth);
> +		q->tgt_ops->queue_io(q, tag);
> +	} else {
> +		/*
> +		 * COMMIT_REQ will be completed immediately since no fetching
> +		 * piggyback is required.
> +		 *
> +		 * Marking IO_FREE only, then this io won't be issued since
> +		 * we only issue io with (UBLKSRV_IO_FREE | UBLKSRV_NEED_*)
> +		 *
> +		 * */
> +		io->flags = UBLKSRV_IO_FREE;
> +	}

if you can return; from above if we can remove the else, no biggy
though..
> +}
> +
> +static int ublk_reap_events_uring(struct io_uring *r)
> +{
> +	struct io_uring_cqe *cqe;
> +	unsigned head;
> +	int count = 0;
> +
> +	io_uring_for_each_cqe(r, head, cqe) {
> +		ublk_handle_cqe(r, cqe, NULL);
> +		count += 1;
> +	}
> +	io_uring_cq_advance(r, count);
> +
> +	return count;
> +}
> +
> +static int ublk_process_io(struct ublk_queue *q)
> +{
> +	int ret, reapped;
> +	struct __kernel_timespec ts = {
> +		.tv_sec = UBLKSRV_IO_IDLE_SECS,
> +		.tv_nsec = 0
> +        };
> +	struct __kernel_timespec *tsp = (q->state & UBLKSRV_QUEUE_IDLE) ?
> +		NULL : &ts;
> +	struct io_uring_cqe *cqe;
> +
> +	ublk_dbg(UBLK_DBG_QUEUE, "dev%d-q%d: to_submit %d inflight cmd %u stopping %d\n",
> +				q->dev->dev_info.dev_id,
> +				q->q_id, io_uring_sq_ready(&q->ring),
> +				q->cmd_inflight,
> +				(q->state & UBLKSRV_QUEUE_STOPPING));
> +
> +	if (ublk_queue_is_done(q))
> +		return -ENODEV;
> +
> +	ret = io_uring_submit_and_wait_timeout(&q->ring, &cqe, 1, tsp, NULL);
> +	reapped = ublk_reap_events_uring(&q->ring);
> +
> +	ublk_dbg(UBLK_DBG_QUEUE, "submit result %d, reapped %d stop %d idle %d\n",
> +			ret, reapped, (q->state & UBLKSRV_QUEUE_STOPPING),
> +			(q->state & UBLKSRV_QUEUE_IDLE));
> +
> +	if (!(q->state & UBLKSRV_QUEUE_STOPPING)) {
> +		if (ret == -ETIME && reapped == 0 && ublk_queue_is_idle(q))
> +			ublk_queue_idle_enter(q);
> +		else
> +			ublk_queue_idle_exit(q);
> +	}
> +	return reapped;
> +}
> +
> +static void *ublk_io_handler_fn(void *data)
> +{
> +	struct ublk_queue *q = data;
> +	int dev_id = q->dev->dev_info.dev_id;
> +	int ret;
> +
> +	ret = ublk_queue_init(q);
> +	if (ret) {
> +		ublk_err("ublk dev %d queue %d init queue failed\n",
> +				dev_id, q->q_id);
> +		return NULL;
> +	}
> +
> +	/* submit all io commands to ublk driver */
> +	ublk_submit_fetch_commands(q);
> +
> +	ublk_dbg(UBLK_DBG_QUEUE, "tid %d: ublk dev %d queue %d started\n",
> +			gettid(),
> +			dev_id, q->q_id);
> +	do {
> +		if (ublk_process_io(q) < 0)
> +			break;
> +	} while (1);
> +
> +	ublk_dbg(UBLK_DBG_QUEUE, "ublk dev %d queue %d exited\n", dev_id, q->q_id);
> +	ublk_queue_deinit(q);
> +	return NULL;
> +}
> +
> +static void ublk_set_parameters(struct ublk_dev *dev)
> +{
> +	int ret;
> +
> +	ret = ublk_ctrl_set_params(dev, &dev->tgt.params);
> +	if (ret)
> +		ublk_err("dev %d set basic parameter failed %d\n",
> +				dev->dev_info.dev_id, ret);
> +}
> +
> +static int ublk_start_daemon(struct ublk_dev *dev)
> +{
> +	int ret, i;
> +	void *thread_ret;
> +	const struct ublksrv_ctrl_dev_info *dinfo = &dev->dev_info;
> +
> +	daemon(1, 1);
> +
> +	ublk_dbg(UBLK_DBG_DEV, "%s enter\n", __func__);
> +
> +	ret = ublk_dev_prep(dev);
> +	if (!dev)
> +		return -ENOMEM;
> +

can dev be NULL ever here ? do we need to check ret instaed of dev?
(I don't think so since looking it ublk_dev_prep() it will not change
dev value and it is unconditionally deferenced before it's use).

also shuldn't we return ret here as ublk_dev_prep() will retrun non
-ENOMEM value ?

> +	for (i = 0; i < dinfo->nr_hw_queues; i++) {
> +		dev->q[i].dev = dev;
> +		dev->q[i].q_id = i;
> +		pthread_create(&dev->q[i].thread, NULL,
> +				ublk_io_handler_fn,
> +				&dev->q[i]);
> +	}
> +
> +	ublk_set_parameters(dev);
> +
> +	/* everything is fine now, start us */
> +	ret = ublk_ctrl_start_dev(dev, getpid());
> +	if (ret < 0)
> +		goto fail;
> +
> +	ublk_ctrl_get_info(dev);
> +	ublk_ctrl_dump(dev, true);
> +
> +	/* wait until we are terminated */
> +	for (i = 0; i < dinfo->nr_hw_queues; i++)
> +		pthread_join(dev->q[i].thread, &thread_ret);
> + fail:
> +	ublk_dev_unprep(dev);
> +	ublk_dbg(UBLK_DBG_DEV, "%s exit\n", __func__);
> +
> +	return ret;
> +}
> +
> +static int cmd_dev_add(int argc, char *argv[])
> +{
> +	static const struct option longopts[] = {
> +		{ "type",		1,	NULL, 't' },
> +		{ "number",		1,	NULL, 'n' },
> +		{ "queues",		1,	NULL, 'q' },
> +		{ "depth",		1,	NULL, 'd' },
> +		{ "debug_mask",	1,	NULL, 0},
> +		{ "quiet",	0,	NULL, 0},
> +		{ NULL }
> +	};
> +	const struct ublk_tgt_ops *ops;
> +	struct ublksrv_ctrl_dev_info *info;
> +	struct ublk_dev *dev;
> +	int ret, option_idx, opt;
> +	const char *tgt_type = NULL;
> +	int dev_id = -1;
> +	unsigned nr_queues = 2, depth = UBLK_QUEUE_DEPTH;
> +
> +	while ((opt = getopt_long(argc, argv, "-:t:n:d:q:",
> +				  longopts, &option_idx)) != -1) {
> +		switch (opt) {
> +		case 'n':
> +			dev_id = strtol(optarg, NULL, 10);
> +			break;
> +		case 't':
> +			tgt_type = optarg;
> +			break;
> +		case 'q':
> +			nr_queues = strtol(optarg, NULL, 10);
> +			break;
> +		case 'd':
> +			depth = strtol(optarg, NULL, 10);
> +			break;
> +		case 0:
> +			if (!strcmp(longopts[option_idx].name, "debug_mask"))
> +				ublk_dbg_mask = strtol(optarg, NULL, 16);
> +			if (!strcmp(longopts[option_idx].name, "quiet"))
> +				ublk_dbg_mask = 0;
> +			break;
> +		}
> +	}
> +
> +	optind = 0;
> +
> +	ops = ublk_find_tgt(tgt_type);
> +	if (!ops) {
> +		ublk_err("%s: no such tgt type, type %s\n",
> +				__func__, tgt_type);
> +		return -ENODEV;
> +	}
> +
> +	if (nr_queues > UBLK_MAX_QUEUES || depth > UBLK_QUEUE_DEPTH) {
> +		ublk_err("%s: invalid nr_queues or depth queues %u depth %u\n",
> +				__func__, nr_queues, depth);
> +		return -EINVAL;
> +	}
> +
> +	dev = ublk_ctrl_init();
> +	if (!dev) {
> +		ublk_err("%s: can't alloc dev id %d, type %s\n",
> +				__func__, dev_id, tgt_type);
> +		return -ENOMEM;
> +	}
> +
> +	info = &dev->dev_info;
> +	info->dev_id = dev_id;
> +        info->nr_hw_queues = nr_queues;
> +        info->queue_depth = depth;
> +	dev->tgt.ops = ops;
> +	dev->tgt.argc = argc;
> +	dev->tgt.argv = argv;
> +
> +	ret = ublk_ctrl_add_dev(dev);
> +	if (ret < 0) {
> +		ublk_err("%s: can't add dev id %d, type %s ret %d\n",
> +				__func__, dev_id, tgt_type, ret);
> +		goto fail;
> +	}
> +
> +	ret = ublk_start_daemon(dev);
> +	if (ret < 0) {
> +		ublk_err("%s: can't start daemon id %d, type %s\n",
> +				__func__, dev_id, tgt_type);
> +		goto fail_del;
> +	}
> +fail_del:
> +	ublk_ctrl_del_dev(dev);
> +fail:
> +	ublk_ctrl_deinit(dev);
> +	return ret;
> +}
> +
> +static int ublk_stop_io_daemon(const struct ublk_dev *dev)
> +{
> +	int daemon_pid = dev->dev_info.ublksrv_pid;
> +	int cnt = 0, ret;
> +
> +	if (daemon_pid == -1)
> +		return 0;
> +
> +	/* wait until daemon is exited, or timeout after 3 seconds */
> +	do {
> +		ret = kill(daemon_pid, 0);
> +		if (ret)
> +			break;
> +		usleep(500000);
> +		cnt++;
> +	} while (!ret && cnt < 6);
> +
> +	ublk_dbg(UBLK_DBG_DEV, "%s: pid %d ret %d\n", __func__, daemon_pid, ret);
> +
> +	return ret != 0 ? 0 : -1;
> +}
> +
> +static int __cmd_dev_del(int number, bool log)
> +{
> +	struct ublk_dev *dev;
> +	int ret;
> +
> +	dev = ublk_ctrl_init();
> +	dev->dev_info.dev_id = number;
> +
> +	ret = ublk_ctrl_get_info(dev);
> +	if (ret < 0) {
> +		goto fail;
> +	}
> +
> +	ret = ublk_ctrl_stop_dev(dev);
> +	if (ret < 0) {
> +		if (log)
> +			ublk_err("stop dev %d failed\n", number);
> +		goto fail;
> +	}
> +
> +	ret = ublk_stop_io_daemon(dev);
> +	if (ret < 0) {
> +		if (log)
> +			ublk_err("stop daemon %d failed\n", number);
> +	}
> +
> +	ublk_ctrl_del_dev(dev);
> +fail:
> +	ublk_ctrl_deinit(dev);
> +	return ret;
> +}
> +
> +static int cmd_dev_del(int argc, char *argv[])
> +{
> +	static const struct option longopts[] = {
> +		{ "number",		1,	NULL, 'n' },
> +		{ "all",		0,	NULL, 'a' },
> +		{ "debug_mask",	1,	NULL, 0},
> +		{ NULL }
> +	};
> +	int number = -2;
> +	int opt, i, option_idx;
> +
> +	while ((opt = getopt_long(argc, argv, "n:a",
> +				  longopts, &option_idx)) != -1) {
> +		switch (opt) {
> +		case 'a':
> +			number = -1;
> +			break;
> +
> +		case 'n':
> +			number = strtol(optarg, NULL, 10);
> +			break;
> +		case 0:
> +			if (!strcmp(longopts[option_idx].name, "debug_mask"))
> +				ublk_dbg_mask = strtol(optarg, NULL, 16);
> +			break;
> +		}
> +	}
> +
> +	if (number >= 0)
> +		return __cmd_dev_del(number, true);
> +	else if (number != -1) {
> +		ublk_err("%s: pass wrong devid or not delete via -a\n");
> +		return -EINVAL;
> +	}
> +
> +	for (i = 0; i < 255; i++)
> +		__cmd_dev_del(i, false);
> +
> +	return 0;
> +}
> +
> +static int __cmd_dev_list(int number, bool log)
> +{
> +	struct ublk_dev *dev = ublk_ctrl_init();
> +	int ret;
> +
> +	dev->dev_info.dev_id = number;
> +
> +	ret = ublk_ctrl_get_info(dev);
> +	if (ret < 0) {
> +		if (log)
> +			ublk_err("%s: can't get dev info from %d: %d\n",
> +					__func__, number, ret);
> +	} else {
> +		ublk_ctrl_dump(dev, false);
> +	}
> +
> +	ublk_ctrl_deinit(dev);
> +
> +	return ret;
> +}
> +
> +
> +static int cmd_dev_list(int argc, char *argv[])
> +{
> +	static const struct option longopts[] = {
> +		{ "number",		1,	NULL, 'n' },
> +		{ "all",		0,	NULL, 'a' },
> +		{ NULL }
> +	};
> +	int number = -1;
> +	int opt, i;
> +
> +	while ((opt = getopt_long(argc, argv, "n:a",
> +				  longopts, NULL)) != -1) {
> +		switch (opt) {
> +		case 'a':
> +			break;
> +
> +		case 'n':
> +			number = strtol(optarg, NULL, 10);
> +			break;
> +		}
> +	}
> +
> +	if (number >= 0)
> +		return __cmd_dev_list(number, true);
> +
> +	for (i = 0; i < 255; i++)
> +		__cmd_dev_list(i, false);
> +
> +	return 0;
> +}
> +
> +static int cmd_dev_help(int argc, char *argv[])
> +{
> +	printf("%s add -t {null|loop} [-q nr_queues] [-d depth] [-n dev_id] \n",
> +			argv[0]);
> +	printf("\t default: nr_queues=2(max 4), depth=128(max 128), dev_id=-1(auto allocation)\n");
> +	printf("\t -t loop -f backing_file \n");
> +	printf("\t -t null\n");
> +	printf("%s del [-n dev_id] -a \n", argv[0]);
> +	printf("\t -a delete all devices -n delete specified device\n");
> +	printf("%s list [-n dev_id] -a \n", argv[0]);
> +	printf("\t -a list all devices, -n list specified device, default -a \n");
> +
> +	return 0;
> +}
> +
> +static int ublk_null_tgt_init(struct ublk_dev *dev)
> +{
> +	const struct ublksrv_ctrl_dev_info *info = &dev->dev_info;
> +	unsigned long dev_size = 250UL << 30;
> +
> +	dev->tgt.dev_size = dev_size;
> +	dev->tgt.params = (struct ublk_params) {
> +		.types = UBLK_PARAM_TYPE_BASIC,
> +		.basic = {
> +			.logical_bs_shift	= 9,
> +			.physical_bs_shift	= 12,
> +			.io_opt_shift		= 12,
> +			.io_min_shift		= 9,
> +			.max_sectors		= info->max_io_buf_bytes >> 9,
> +			.dev_sectors		= dev_size >> 9,
> +		},
> +	};
> +
> +	return 0;
> +}
> +
> +static int ublk_null_queue_io(struct ublk_queue *q, int tag)
> +{
> +	const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
> +
> +	ublk_complete_io(q, tag, iod->nr_sectors << 9);
> +
> +	return 0;
> +}
> +
> +static int loop_queue_tgt_io(struct ublk_queue *q, int tag)
> +{
> +	const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
> +	struct io_uring_sqe *sqe = io_uring_get_sqe(&q->ring);
> +	unsigned ublk_op = ublksrv_get_op(iod);
> +
> +	if (!sqe)
> +		return -ENOMEM;
> +
> +	switch (ublk_op) {
> +	case UBLK_IO_OP_FLUSH:
> +		io_uring_prep_sync_file_range(sqe, 1 /*fds[1]*/,
> +				iod->nr_sectors << 9,
> +				iod->start_sector << 9,
> +				IORING_FSYNC_DATASYNC);
> +		io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE);
> +		break;
> +	case UBLK_IO_OP_WRITE_ZEROES:
> +	case UBLK_IO_OP_DISCARD:
> +		return -ENOTSUP;
> +	case UBLK_IO_OP_READ:
> +		io_uring_prep_read(sqe, 1 /*fds[1]*/,
> +				(void *)iod->addr,
> +				iod->nr_sectors << 9,
> +				iod->start_sector << 9);
> +		io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE);
> +		break;
> +	case UBLK_IO_OP_WRITE:
> +		io_uring_prep_write(sqe, 1 /*fds[1]*/,
> +				(void *)iod->addr,
> +				iod->nr_sectors << 9,
> +				iod->start_sector << 9);
> +		io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE);
> +		break;
> +	default:
> +		return -EINVAL;
> +	}
> +
> +	q->io_inflight++;
> +	/* bit63 marks us as tgt io */
> +	sqe->user_data = build_user_data(tag, ublk_op, 0, 1);
> +
> +	ublk_dbg(UBLK_DBG_IO, "%s: tag %d ublk io %x %llx %u\n", __func__, tag,
> +			iod->op_flags, iod->start_sector, iod->nr_sectors << 9);
> +	return 1;
> +}
> +
> +static int ublk_loop_queue_io(struct ublk_queue *q, int tag)
> +{
> +	int queued = loop_queue_tgt_io(q, tag);
> +
> +	if (queued < 0)
> +		ublk_complete_io(q, tag, queued);
> +
> +	return 0;
> +}
> +
> +static void ublk_loop_io_done(struct ublk_queue *q, int tag,
> +		const struct io_uring_cqe *cqe)
> +{
> +	int cqe_tag = user_data_to_tag(cqe->user_data);
> +
> +	ublk_assert(tag == cqe_tag);
> +	ublk_complete_io(q, tag, cqe->res);
> +	q->io_inflight--;
> +}
> +
> +static void ublk_loop_tgt_deinit(struct ublk_dev *dev)
> +{
> +	fsync(dev->fds[1]);
> +	close(dev->fds[1]);
> +}
> +
> +static int ublk_loop_tgt_init(struct ublk_dev *dev)
> +{
> +	static const struct option lo_longopts[] = {
> +		{ "file",		1,	NULL, 'f' },
> +		{ NULL }
> +	};
> +	unsigned long long bytes;
> +	char **argv = dev->tgt.argv;
> +	int argc = dev->tgt.argc;
> +	char *file = NULL;
> +	struct stat st;
> +	int fd, opt;
> +	struct ublk_params p = {
> +		.types = UBLK_PARAM_TYPE_BASIC,
> +		.basic = {
> +			.logical_bs_shift	= 9,
> +			.physical_bs_shift	= 12,
> +			.io_opt_shift	= 12,
> +			.io_min_shift	= 9,
> +			.max_sectors = dev->dev_info.max_io_buf_bytes >> 9,
> +		},
> +	};
> +
> +	while ((opt = getopt_long(argc, argv, "-:f:",
> +				  lo_longopts, NULL)) != -1) {
> +		switch (opt) {
> +		case 'f':
> +			file = strdup(optarg);
> +			break;
> +		}
> +	}
> +
> +	ublk_dbg(UBLK_DBG_DEV, "%s: file %s\n", __func__, file);
> +
> +	if (!file)
> +		return -EINVAL;
> +
> +	fd = open(file, O_RDWR);
> +	if (fd < 0) {
> +		ublk_err( "%s: backing file %s can't be opened\n",
> +				__func__, file);
> +		return -EBADF;
> +	}
> +
> +	if (fstat(fd, &st) < 0) {
> +		close(fd);
> +		return -EBADF;
> +	}
> +
> +	if (S_ISBLK(st.st_mode)) {
> +		unsigned int bs, pbs;
> +
> +		if (ioctl(fd, BLKGETSIZE64, &bytes) != 0)
> +			return -EBADF;
> +		if (ioctl(fd, BLKSSZGET, &bs) != 0)
> +			return -1;
> +		if (ioctl(fd, BLKPBSZGET, &pbs) != 0)
> +			return -1;
> +		p.basic.logical_bs_shift = ilog2(bs);
> +		p.basic.physical_bs_shift = ilog2(pbs);
> +	} else if (S_ISREG(st.st_mode)) {
> +		bytes = st.st_size;
> +	} else {
> +		bytes = 0;
> +	}
> +
> +	if (fcntl(fd, F_SETFL, O_DIRECT)) {
> +		p.basic.logical_bs_shift = 9;
> +		p.basic.physical_bs_shift = 12;
> +		ublk_log("%s: ublk-loop fallback to buffered IO\n", __func__);
> +	}
> +
> +	dev->tgt.dev_size = bytes;
> +	p.basic.dev_sectors = bytes >> 9;
> +	dev->fds[1] = fd;
> +	dev->nr_fds += 1;
> +	dev->tgt.params = p;
> +
> +	return 0;
> +}
> +
> +const struct ublk_tgt_ops tgt_ops_list[] = {
> +	{
> +		.name = "null",
> +		.init_tgt = ublk_null_tgt_init,
> +		.queue_io = ublk_null_queue_io,
> +	},
> +
> +	{
> +		.name = "loop",
> +		.init_tgt = ublk_loop_tgt_init,
> +		.deinit_tgt = ublk_loop_tgt_deinit,
> +		.queue_io = ublk_loop_queue_io,
> +		.tgt_io_done = ublk_loop_io_done,
> +	},
> +};
> +
> +static const struct ublk_tgt_ops *ublk_find_tgt(const char *name)
> +{
> +	const struct ublk_tgt_ops *ops;
> +	int i;
> +
> +	if (name == NULL)
> +		return NULL;
> +
> +	for (i = 0; sizeof(tgt_ops_list) / sizeof(*ops); i++)
> +		if (strcmp(tgt_ops_list[i].name, name) == 0)
> +			return &tgt_ops_list[i];
> +	return NULL;
> +}
> +
> +int main(int argc, char *argv[])
> +{
> +	const char *cmd = argv[1];
> +	int ret = -EINVAL;
> +
> +	if (argc == 1)
> +		goto out;
> +
> +	if (!strcmp(cmd, "add"))
> +		ret = cmd_dev_add(argc, argv);
> +	else if (!strcmp(cmd, "del"))
> +		ret = cmd_dev_del(argc, argv);
> +	else if (!strcmp(cmd, "list"))
> +		ret = cmd_dev_list(argc, argv);
> +	else if (!strcmp(cmd, "help"))
> +		ret = cmd_dev_help(argc, argv);
> +out:
> +	if (ret)
> +		cmd_dev_help(argc, argv);
> +
> +	return ret;
> +}




[Index of Archives]     [Linux RAID]     [Linux SCSI]     [Linux ATA RAID]     [IDE]     [Linux Wireless]     [Linux Kernel]     [ATH6KL]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Device Mapper]

  Powered by Linux