Re: [PATCH blktests v3 1/2] src: add mini ublk source code

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Chaitanya,

Thanks for the review!

On Tue, Feb 21, 2023 at 07:58:48PM +0000, Chaitanya Kulkarni wrote:
> On 2/19/2023 7:46 PM, Ming Lei wrote:
> > Prepare for adding ublk related test:
> > 
> > 1) ublk delete is sync removal, this way is convenient to
> >     blkg/queue/disk instance leak issue
> > 
> > 2) mini ublk has two builtin target(null, loop), and loop IO is
> > handled by io_uring, so we can use ublk to cover part of io_uring
> > workloads
> > 
> > 3) not like loop/nbd, ublk won't pre-allocate/add disk, and always
> > add/delete disk dynamically, this way may cover disk plug & unplug
> > tests
> > 
> > 4) ublk specific test given people starts to use it, so better to
> > let blktest cover ublk related tests
> > 
> > Add mini ublk source for test purpose only, which is easy to use:
> > 
> > ./miniublk add -t {null|loop} [-q nr_queues] [-d depth] [-n dev_id]
> > 	 default: nr_queues=2(max 4), depth=128(max 128), dev_id=-1(auto allocation)
> > 	 -t loop -f backing_file
> > 	 -t null
> > ./miniublk del [-n dev_id] -a
> > 	 -a delete all devices, -n delete specified device
> > ./miniublk list [-n dev_id] -a
> > 	 -a list all devices, -n list specified device, default -a
> > 
> > miniublk depends on liburing 2.2, adds HAVE_LIBURING for checking if
> > liburing 2.2 exists; also add HAVE_UBLK_HEADER for checking ublk kernel
> > UAPI header exists. If either of two dependencies can't be met, simply
> > ignore miniublk target.
> > 
> > Also v6.0 is the 1st linux kernel release with ublk.
> > 
> > Signed-off-by: Ming Lei <ming.lei@xxxxxxxxxx>
> > ---
> >   src/.gitignore |    1 +
> >   src/Makefile   |   18 +
> >   src/miniublk.c | 1376 ++++++++++++++++++++++++++++++++++++++++++++++++
> >   3 files changed, 1395 insertions(+)
> >   create mode 100644 src/miniublk.c
> > 
> > diff --git a/src/.gitignore b/src/.gitignore
> > index 355bed3..df7aff5 100644
> > --- a/src/.gitignore
> > +++ b/src/.gitignore
> > @@ -8,3 +8,4 @@
> >   /sg/dxfer-from-dev
> >   /sg/syzkaller1
> >   /zbdioctl
> > +/miniublk
> > diff --git a/src/Makefile b/src/Makefile
> > index 3b587f6..81c6541 100644
> > --- a/src/Makefile
> > +++ b/src/Makefile
> > @@ -2,6 +2,10 @@ HAVE_C_HEADER = $(shell if echo "\#include <$(1)>" |		\
> >   		$(CC) -E - > /dev/null 2>&1; then echo "$(2)";	\
> >   		else echo "$(3)"; fi)
> >   
> > +HAVE_C_MACRO = $(shell if echo "#include <$(1)>" |	\
> > +		$(CC) -E - 2>&1 /dev/null | grep $(2) > /dev/null 2>&1; \
> > +		then echo 1;else echo 0; fi)
> > +
> >   C_TARGETS := \
> >   	loblksize \
> >   	loop_change_fd \
> > @@ -13,16 +17,27 @@ C_TARGETS := \
> >   	sg/syzkaller1 \
> >   	zbdioctl
> >   
> > +C_MINIUBLK := miniublk
> > +
> > +HAVE_LIBURING := $(call HAVE_C_MACRO,liburing.h,IORING_OP_URING_CMD)
> > +HAVE_UBLK_HEADER := $(call HAVE_C_HEADER,linux/ublk_cmd.h,1)
> > +
> >   CXX_TARGETS := \
> >   	discontiguous-io
> >   
> > +ifeq ($(HAVE_LIBURING)$(HAVE_UBLK_HEADER), 11)
> > +TARGETS := $(C_TARGETS) $(CXX_TARGETS) $(C_MINIUBLK)
> > +else
> > +$(info Skip $(C_MINIUBLK) build due to missing kernel header(v6.0+) or liburing(2.2+))
> >   TARGETS := $(C_TARGETS) $(CXX_TARGETS)
> > +endif
> >   
> >   CONFIG_DEFS := $(call HAVE_C_HEADER,linux/blkzoned.h,-DHAVE_LINUX_BLKZONED_H)
> >   
> >   override CFLAGS   := -O2 -Wall -Wshadow $(CFLAGS) $(CONFIG_DEFS)
> >   override CXXFLAGS := -O2 -std=c++11 -Wall -Wextra -Wshadow -Wno-sign-compare \
> >   		     -Werror $(CXXFLAGS) $(CONFIG_DEFS)
> > +MINIUBLK_FLAGS :=  -D_GNU_SOURCE -lpthread -luring
> >   
> >   all: $(TARGETS)
> >   
> > @@ -39,4 +54,7 @@ $(C_TARGETS): %: %.c
> >   $(CXX_TARGETS): %: %.cpp
> >   	$(CXX) $(CPPFLAGS) $(CXXFLAGS) -o $@ $^
> >   
> > +$(C_MINIUBLK): %: miniublk.c
> > +	$(CC) $(CFLAGS) $(MINIUBLK_FLAGS) -o $@ miniublk.c
> > +
> >   .PHONY: all clean install
> > diff --git a/src/miniublk.c b/src/miniublk.c
> > new file mode 100644
> > index 0000000..e84ba41
> > --- /dev/null
> > +++ b/src/miniublk.c
> > @@ -0,0 +1,1376 @@
> > +// SPDX-License-Identifier: GPL-3.0+
> > +// Copyright (C) 2023 Ming Lei
> > +
> > +/*
> > + * io_uring based mini ublk implementation with null/loop target,
> > + * for test purpose only.
> > + *
> > + * So please keep it clean & simple & reliable.
> > + */
> > +
> > +#include <unistd.h>
> > +#include <stdlib.h>
> > +#include <assert.h>
> > +#include <stdio.h>
> > +#include <stdarg.h>
> > +#include <string.h>
> > +#include <pthread.h>
> > +#include <getopt.h>
> > +#include <limits.h>
> > +#include <sys/syscall.h>
> > +#include <sys/mman.h>
> > +#include <sys/ioctl.h>
> > +#include <liburing.h>
> > +#include <linux/ublk_cmd.h>
> > +
> > +#define CTRL_DEV		"/dev/ublk-control"
> > +#define UBLKC_DEV		"/dev/ublkc"
> > +#define UBLK_CTRL_RING_DEPTH            32
> > +
> > +/* queue idle timeout */
> > +#define UBLKSRV_IO_IDLE_SECS		20
> > +
> > +#define UBLK_IO_MAX_BYTES               65536
> > +#define UBLK_MAX_QUEUES                 4
> > +#define UBLK_QUEUE_DEPTH                128
> > +
> > +#define UBLK_DBG_DEV            (1U << 0)
> > +#define UBLK_DBG_QUEUE          (1U << 1)
> > +#define UBLK_DBG_IO_CMD         (1U << 2)
> > +#define UBLK_DBG_IO             (1U << 3)
> > +#define UBLK_DBG_CTRL_CMD       (1U << 4)
> > +#define UBLK_LOG		(1U << 5)
> > +
> > +struct ublk_dev;
> > +struct ublk_queue;
> > +
> > +struct ublk_ctrl_cmd_data {
> > +	unsigned short cmd_op;
> 
> perhaps use enum type to avoid any type mismatach errors in future..

Sounds good.

> 
> > +#define CTRL_CMD_HAS_DATA	1
> > +#define CTRL_CMD_HAS_BUF	2
> > +	unsigned short flags;
> 
> I'd keep falgs u64 or u32 just incase..

Fine.

> 
> > +
> > +	__u64 data[2];
> > +	__u64 addr;
> > +	__u32 len;
> > +};
> > +
> > +struct ublk_io {
> > +	char *buf_addr;
> > +
> > +#define UBLKSRV_NEED_FETCH_RQ		(1UL << 0)
> > +#define UBLKSRV_NEED_COMMIT_RQ_COMP	(1UL << 1)
> > +#define UBLKSRV_IO_FREE			(1UL << 2)
> > +	unsigned int flags;
> > +
> > +	unsigned int result;
> > +};
> > +
> > +struct ublk_tgt_ops {
> > +	const char *name;
> > +	int (*init_tgt)(struct ublk_dev *);
> > +	void (*deinit_tgt)(struct ublk_dev *);
> > +
> > +	int (*queue_io)(struct ublk_queue *, int tag);
> > +	void (*tgt_io_done)(struct ublk_queue *,
> > +			int tag, const struct io_uring_cqe *);
> > +};
> > +
> > +struct ublk_tgt {
> > +	unsigned long dev_size;
> > +	const struct ublk_tgt_ops *ops;
> > +	int argc;
> > +	char **argv;
> > +	struct ublk_params params;
> > +};
> > +
> > +struct ublk_queue {
> > +	int q_id;
> > +	int q_depth;
> 
> is there a reason to not use unsigned int for qid and qd above ?

Because int is enough for holding nr_queues and depth.

> 
> > +	unsigned int cmd_inflight;
> > +	unsigned int io_inflight;
> > +	struct ublk_dev *dev;
> > +	const struct ublk_tgt_ops *tgt_ops;
> > +	char *io_cmd_buf;
> > +	struct io_uring ring;
> > +	struct ublk_io ios[UBLK_QUEUE_DEPTH];
> > +#define UBLKSRV_QUEUE_STOPPING	(1U << 0)
> > +#define UBLKSRV_QUEUE_IDLE	(1U << 1)
> > +	unsigned state;
> > +	int tid;
> 
> instead of int pid_t  ?

OK.

> 
> > +	pthread_t thread;
> > +};
> > +
> > +struct ublk_dev {
> > +	struct ublk_tgt tgt;
> > +	struct ublksrv_ctrl_dev_info  dev_info;
> > +	struct ublk_queue q[UBLK_MAX_QUEUES];
> > +
> > +	int fds[2];	/* fds[0] points to /dev/ublkcN */
> > +	int nr_fds;
> > +	int ctrl_fd;
> > +	struct io_uring ring;
> > +};
> > +
> > +#ifndef offsetof
> > +#define offsetof(TYPE, MEMBER)  ((size_t)&((TYPE *)0)->MEMBER)
> > +#endif
> > +
> > +#ifndef container_of
> > +#define container_of(ptr, type, member) ({                              \
> > +	unsigned long __mptr = (unsigned long)(ptr);                    \
> > +	((type *)(__mptr - offsetof(type, member))); })
> > +#endif
> > +
> > +#define round_up(val, rnd) \
> > +	(((val) + ((rnd) - 1)) & ~((rnd) - 1))
> > +
> > +#define ublk_assert(x)  do { \
> > +	if (!(x)) {     \
> > +		ublk_err("%s %d: assert!\n", __func__, __LINE__); \
> > +		assert(x);      \
> > +	}       \
> > +} while (0)
> > +
> > +static const struct ublk_tgt_ops *ublk_find_tgt(const char *name);
> > +
> > +static unsigned int ublk_dbg_mask = UBLK_LOG;
> > +
> > +static inline unsigned ilog2(unsigned x)
> > +{
> > +    return sizeof(unsigned) * 8 - 1 - __builtin_clz(x);
> > +}
> > +
> > +static inline int is_target_io(__u64 user_data)
> > +{
> > +	return (user_data & (1ULL << 63)) != 0;
> > +}
> > +
> > +static inline __u64 build_user_data(unsigned tag, unsigned op,
> > +		unsigned tgt_data, unsigned is_target_io)
> > +{
> > +	assert(!(tag >> 16) && !(op >> 8) && !(tgt_data >> 16));
> > +
> > +	return tag | (op << 16) | (tgt_data << 24) | (__u64)is_target_io << 63;
> > +}
> > +
> > +static inline unsigned int user_data_to_tag(__u64 user_data)
> > +{
> > +	return user_data & 0xffff;
> > +}
> > +
> > +static inline unsigned int user_data_to_op(__u64 user_data)
> > +{
> > +	return (user_data >> 16) & 0xff;
> > +}
> > +
> > +static void ublk_err(const char *fmt, ...)
> > +{
> > +	va_list ap;
> > +
> > +	va_start(ap, fmt);
> > +	vfprintf(stderr, fmt, ap);
> > +}
> > +
> > +static void ublk_log(const char *fmt, ...)
> > +{
> > +	if (ublk_dbg_mask & UBLK_LOG) {
> > +		va_list ap;
> > +
> > +		va_start(ap, fmt);
> > +		vfprintf(stdout, fmt, ap);
> > +	}
> > +}
> > +
> > +static void ublk_dbg(int level, const char *fmt, ...)
> > +{
> > +	if (level & ublk_dbg_mask) {
> > +		va_list ap;
> > +		va_start(ap, fmt);
> > +		vfprintf(stdout, fmt, ap);
> > +        }
> > +}
> > +
> > +static inline void *ublk_get_sqe_cmd(const struct io_uring_sqe *sqe)
> > +{
> > +	return (void *)&sqe->addr3;
> > +}
> > +
> > +static inline void ublk_mark_io_done(struct ublk_io *io, int res)
> > +{
> > +	io->flags |= (UBLKSRV_NEED_COMMIT_RQ_COMP | UBLKSRV_IO_FREE);
> > +	io->result = res;
> > +}
> > +
> > +static inline const struct ublksrv_io_desc *ublk_get_iod(
> > +                const struct ublk_queue *q, int tag)
> > +{
> > +        return (struct ublksrv_io_desc *)
> > +                &(q->io_cmd_buf[tag * sizeof(struct ublksrv_io_desc)]);
> > +}
> > +
> > +static inline void ublk_set_sqe_cmd_op(struct io_uring_sqe *sqe,
> > +		__u32 cmd_op)
> > +{
> > +        __u32 *addr = (__u32 *)&sqe->off;
> > +
> > +        addr[0] = cmd_op;
> > +        addr[1] = 0;
> > +}
> > +
> > +static inline int ublk_setup_ring(struct io_uring *r, int depth,
> > +		int cq_depth, unsigned flags)
> > +{
> > +	struct io_uring_params p;
> > +
> > +	memset(&p, 0, sizeof(p));
> > +	p.flags = flags | IORING_SETUP_CQSIZE;
> > +	p.cq_entries = cq_depth;
> > +
> > +	return io_uring_queue_init_params(depth, r, &p);
> > +}
> > +
> > +static inline void ublk_ctrl_init_cmd(struct ublk_dev *dev,
> > +		struct io_uring_sqe *sqe,
> > +		struct ublk_ctrl_cmd_data *data)
> > +{
> > +	struct ublksrv_ctrl_dev_info *info = &dev->dev_info;
> > +	struct ublksrv_ctrl_cmd *cmd = (struct ublksrv_ctrl_cmd *)ublk_get_sqe_cmd(sqe);
> > +
> > +	sqe->fd = dev->ctrl_fd;
> > +	sqe->opcode = IORING_OP_URING_CMD;
> > +	sqe->ioprio = 0;
> > +
> > +	if (data->flags & CTRL_CMD_HAS_BUF) {
> > +		cmd->addr = data->addr;
> > +		cmd->len = data->len;
> > +	}
> > +
> > +	if (data->flags & CTRL_CMD_HAS_DATA)
> > +		cmd->data[0] = data->data[0];
> > +
> > +	cmd->dev_id = info->dev_id;
> > +	cmd->queue_id = -1;
> > +
> > +	ublk_set_sqe_cmd_op(sqe, data->cmd_op);
> > +
> > +	io_uring_sqe_set_data(sqe, cmd);
> > +}
> > +
> > +static int __ublk_ctrl_cmd(struct ublk_dev *dev,
> > +		struct ublk_ctrl_cmd_data *data)
> > +{
> > +	struct io_uring_sqe *sqe;
> > +	struct io_uring_cqe *cqe;
> > +	int ret = -EINVAL;
> > +
> > +	sqe = io_uring_get_sqe(&dev->ring);
> > +	if (!sqe) {
> > +		ublk_err("%s: can't get sqe ret %d\n", __func__, ret);
> > +		return ret;
> > +	}
> > +
> > +	ublk_ctrl_init_cmd(dev, sqe, data);
> > +
> > +	ret = io_uring_submit(&dev->ring);
> > +	if (ret < 0) {
> > +		ublk_err("uring submit ret %d\n", ret);
> > +		return ret;
> > +	}
> > +
> > +	ret = io_uring_wait_cqe(&dev->ring, &cqe);
> > +	if (ret < 0) {
> > +		ublk_err("wait cqe: %s\n", strerror(-ret));
> > +		return ret;
> > +	}
> > +	io_uring_cqe_seen(&dev->ring, cqe);
> > +
> > +	return cqe->res;
> > +}
> > +
> > +int ublk_ctrl_stop_dev(struct ublk_dev *dev)
> > +{
> > +	struct ublk_ctrl_cmd_data data = {
> > +		.cmd_op	= UBLK_CMD_STOP_DEV,
> > +	};
> > +	int ret;
> > +
> > +	ret = __ublk_ctrl_cmd(dev, &data);
> > +	return ret;
> 
> you can just return directly from here and remove local var ret..

OK.

> 
> > +}
> > +
> > +int ublk_ctrl_start_dev(struct ublk_dev *dev,
> > +		int daemon_pid)
> > +{
> > +	struct ublk_ctrl_cmd_data data = {
> > +		.cmd_op	= UBLK_CMD_START_DEV,
> > +		.flags	= CTRL_CMD_HAS_DATA,
> > +	};
> > +	int ret;
> > +
> > +	dev->dev_info.ublksrv_pid = data.data[0] = daemon_pid;
> > +
> > +	ret = __ublk_ctrl_cmd(dev, &data);
> > +
> > +	return ret;
> 
> same here as previous comment ...
> 
> > +}
> > +
> > +int ublk_ctrl_add_dev(struct ublk_dev *dev)
> > +{
> > +	struct ublk_ctrl_cmd_data data = {
> > +		.cmd_op	= UBLK_CMD_ADD_DEV,
> > +		.flags	= CTRL_CMD_HAS_BUF,
> > +		.addr = (__u64)&dev->dev_info,
> > +		.len = sizeof(struct ublksrv_ctrl_dev_info),
> > +	};
> > +
> > +	return __ublk_ctrl_cmd(dev, &data);
> > +}
> > +
> > +int ublk_ctrl_del_dev(struct ublk_dev *dev)
> > +{
> > +	struct ublk_ctrl_cmd_data data = {
> > +		.cmd_op = UBLK_CMD_DEL_DEV,
> > +		.flags = 0,
> > +	};
> > +
> > +	return __ublk_ctrl_cmd(dev, &data);
> > +}
> > +
> > +int ublk_ctrl_get_info(struct ublk_dev *dev)
> > +{
> > +	struct ublk_ctrl_cmd_data data = {
> > +		.cmd_op	= UBLK_CMD_GET_DEV_INFO,
> > +		.flags	= CTRL_CMD_HAS_BUF,
> > +		.addr = (__u64)&dev->dev_info,
> > +		.len = sizeof(struct ublksrv_ctrl_dev_info),
> > +	};
> > +
> > +	return __ublk_ctrl_cmd(dev, &data);
> > +}
> > +
> > +int ublk_ctrl_set_params(struct ublk_dev *dev,
> > +		struct ublk_params *params)
> > +{
> > +	struct ublk_ctrl_cmd_data data = {
> > +		.cmd_op	= UBLK_CMD_SET_PARAMS,
> > +		.flags	= CTRL_CMD_HAS_BUF,
> > +		.addr = (__u64)params,
> > +		.len = sizeof(*params),
> > +	};
> > +	params->len = sizeof(*params);
> > +	return __ublk_ctrl_cmd(dev, &data);
> > +}
> > +
> > +static int ublk_ctrl_get_params(struct ublk_dev *dev,
> > +		struct ublk_params *params)
> > +{
> > +	struct ublk_ctrl_cmd_data data = {
> > +		.cmd_op	= UBLK_CMD_GET_PARAMS,
> > +		.flags	= CTRL_CMD_HAS_BUF,
> > +		.addr = (__u64)params,
> > +		.len = sizeof(*params),
> > +	};
> > +	int ret;
> > +
> > +	params->len = sizeof(*params);
> > +
> > +	ret = __ublk_ctrl_cmd(dev, &data);
> > +	return ret;
> 
> same here as previous comment ...
> 
> > +}
> > +
> > +static const char *ublk_dev_state_desc(struct ublk_dev *dev)
> > +{
> > +	switch (dev->dev_info.state) {
> > +	case UBLK_S_DEV_DEAD:
> > +		return "DEAD";
> > +	case UBLK_S_DEV_LIVE:
> > +		return "LIVE";
> > +	default:
> > +		return "UNKNOWN";
> > +	};
> > +}
> > +
> > +static void ublk_ctrl_dump(struct ublk_dev *dev, bool show_queue)
> > +{
> > +	struct ublksrv_ctrl_dev_info *info = &dev->dev_info;
> > +	int ret;
> > +	struct ublk_params p;
> > +
> > +	ret = ublk_ctrl_get_params(dev, &p);
> > +	if (ret < 0) {
> > +		ublk_err("failed to get params %m\n");
> > +		return;
> > +	}
> > +
> > +	ublk_log("dev id %d: nr_hw_queues %d queue_depth %d block size %d dev_capacity %lld\n",
> > +			info->dev_id,
> > +                        info->nr_hw_queues, info->queue_depth,
> > +                        1 << p.basic.logical_bs_shift, p.basic.dev_sectors);
> > +	ublk_log("\tmax rq size %d daemon pid %d flags 0x%llx state %s\n",
> > +                        info->max_io_buf_bytes,
> > +			info->ublksrv_pid, info->flags,
> > +			ublk_dev_state_desc(dev));
> > +	if (show_queue) {
> > +		int i;
> > +
> > +		for (i = 0; i < dev->dev_info.nr_hw_queues; i++)
> > +			ublk_log("\tqueue 0 tid: %d\n", dev->q[i].tid);
> > +	}
> > +	fflush(stdout);
> > +}
> > +
> > +static void ublk_ctrl_deinit(struct ublk_dev *dev)
> > +{
> > +	close(dev->ctrl_fd);
> > +	free(dev);
> > +}
> > +
> > +static struct ublk_dev *ublk_ctrl_init()
> 
> perhaps specify void -> ublk_ctrl_init(void)
> 
> > +{
> > +	struct ublk_dev *dev = (struct ublk_dev *)calloc(1, sizeof(*dev));
> > +	struct ublksrv_ctrl_dev_info *info = &dev->dev_info;
> > +	int ret;
> > +
> > +	dev->ctrl_fd = open(CTRL_DEV, O_RDWR);
> > +	if (dev->ctrl_fd < 0) {
> > +		ublk_err("control dev %s can't be opened: %m\n", CTRL_DEV);
> 
> perhaps also print the errno that is set by the open system call unless
> ublk_err is doing that implicitly which I'm not sure.
> 
> also I didn't understand two format specifiers for above call and only
> one argument its a bit confusing to read the code...

%m is supposed to print message of errno, but it is fine to add the
errno too.

> 
> > +		exit(dev->ctrl_fd);
> > +	}
> > +	info->max_io_buf_bytes = UBLK_IO_MAX_BYTES;
> > +
> > +	ret = ublk_setup_ring(&dev->ring, UBLK_CTRL_RING_DEPTH,
> > +			UBLK_CTRL_RING_DEPTH, IORING_SETUP_SQE128);
> > +	if (ret < 0) {
> > +		ublk_err("queue_init: %s\n", strerror(-ret));
> > +		free(dev);
> > +		return NULL;
> > +	}
> > +	dev->nr_fds = 1;
> > +
> > +	return dev;
> > +}
> > +
> > +static int ublk_queue_cmd_buf_sz(struct ublk_queue *q)
> > +{
> > +	int size =  q->q_depth * sizeof(struct ublksrv_io_desc);
> > +	unsigned int page_sz = getpagesize();
> > +
> > +	return round_up(size, page_sz);
> > +}
> > +
> > +static void ublk_queue_deinit(struct ublk_queue *q)
> > +{
> > +	int i;
> > +	int nr_ios = q->q_depth;
> > +
> > +	io_uring_unregister_ring_fd(&q->ring);
> > +
> > +	if (q->ring.ring_fd > 0) {
> > +		io_uring_unregister_files(&q->ring);
> > +		close(q->ring.ring_fd);
> > +		q->ring.ring_fd = -1;
> > +	}
> > +
> > +	if (q->io_cmd_buf)
> > +		munmap(q->io_cmd_buf, ublk_queue_cmd_buf_sz(q));
> > +
> > +	for (i = 0; i < nr_ios; i++)
> > +		free(q->ios[i].buf_addr);
> > +}
> > +
> > +static int ublk_queue_init(struct ublk_queue *q)
> > +{
> > +	struct ublk_dev *dev = q->dev;
> > +	int depth = dev->dev_info.queue_depth;
> > +	int i, ret = -1;
> > +	int cmd_buf_size, io_buf_size;
> > +	unsigned long off;
> > +	int ring_depth = depth, cq_depth = depth;
> > +
> > +	q->tgt_ops = dev->tgt.ops;
> > +	q->state = 0;
> > +	q->q_depth = depth;
> > +	q->cmd_inflight = 0;
> > +	q->tid = gettid();
> > +
> > +	cmd_buf_size = ublk_queue_cmd_buf_sz(q);
> > +	off = UBLKSRV_CMD_BUF_OFFSET +
> > +		q->q_id * (UBLK_MAX_QUEUE_DEPTH * sizeof(struct ublksrv_io_desc));
> > +	q->io_cmd_buf = (char *)mmap(0, cmd_buf_size, PROT_READ,
> > +			MAP_SHARED | MAP_POPULATE, dev->fds[0], off);
> > +	if (q->io_cmd_buf == MAP_FAILED) {
> > +		ublk_err("ublk dev %d queue %d map io_cmd_buf failed\n",
> > +				q->dev->dev_info.dev_id, q->q_id);
> 
> also add strerror(errno) to message ?

%m could be easier.

> 
> > +		goto fail;
> > +	}
> > +
> > +	io_buf_size = dev->dev_info.max_io_buf_bytes;
> > +	for (i = 0; i < q->q_depth; i++) {
> > +		q->ios[i].buf_addr = NULL;
> > +
> > +		if (posix_memalign((void **)&q->ios[i].buf_addr,
> > +					getpagesize(), io_buf_size)) {
> > +			ublk_err("ublk dev %d queue %d io %d posix_memalign failed\n",
> > +					dev->dev_info.dev_id, q->q_id, i);
> > +			goto fail;
> > +		}
> 
> same as previous comment ..

OK.

> 
> > +		q->ios[i].flags = UBLKSRV_NEED_FETCH_RQ | UBLKSRV_IO_FREE;
> > +	}
> > +
> > +	ret = ublk_setup_ring(&q->ring, ring_depth, cq_depth,
> > +			IORING_SETUP_SQE128 | IORING_SETUP_COOP_TASKRUN);
> > +	if (ret < 0) {
> > +		ublk_err("ublk dev %d queue %d setup io_uring failed %d\n",
> > +				q->dev->dev_info.dev_id, q->q_id, ret);
> > +		goto fail;
> > +	}
> > +
> > +	io_uring_register_ring_fd(&q->ring);
> > +
> > +	ret = io_uring_register_files(&q->ring, dev->fds, dev->nr_fds);
> > +	if (ret) {
> > +		ublk_err("ublk dev %d queue %d register files failed %d\n",
> > +				q->dev->dev_info.dev_id, q->q_id, ret);
> > +		goto fail;
> > +	}
> > +
> > +	return 0;
> > + fail:
> > +	ublk_queue_deinit(q);
> > +	ublk_err("ublk dev %d queue %d failed\n",
> > +			dev->dev_info.dev_id, q->q_id);
> > +	return -ENOMEM;
> > +}
> > +
> > +static int ublk_dev_prep(struct ublk_dev *dev)
> > +{
> > +	int dev_id = dev->dev_info.dev_id;
> > +	char buf[64];
> > +	int ret;
> > +
> > +	snprintf(buf, 64, "%s%d", UBLKC_DEV, dev_id);
> > +	dev->fds[0] = open(buf, O_RDWR);
> > +	if (dev->fds[0] < 0) {
> > +		ret = -EBADF;
> > +		ublk_err("can't open %s, ret %d\n", buf, dev->fds[0]);
> > +		goto fail;
> > +	}
> > +
> > +	if (dev->tgt.ops->init_tgt)
> > +		ret = dev->tgt.ops->init_tgt(dev);
> > +	else
> > +		ret = 0;
> > +
> 
> if ret is initialize to 0 at the time of declaration in above function
> we can safely get rid of else above ?

OK.

> 
> > +	return ret;
> > +fail:
> > +	close(dev->fds[0]);
> > +	return ret;
> > +}
> > +
> > +static void ublk_dev_unprep(struct ublk_dev *dev)
> > +{
> > +	if (dev->tgt.ops->deinit_tgt)
> > +		dev->tgt.ops->deinit_tgt(dev);
> > +	close(dev->fds[0]);
> > +}
> > +
> > +static int ublk_queue_io_cmd(struct ublk_queue *q,
> > +		struct ublk_io *io, unsigned tag)
> > +{
> > +	struct ublksrv_io_cmd *cmd;
> > +	struct io_uring_sqe *sqe;
> > +	unsigned int cmd_op = 0;
> > +	__u64 user_data;
> > +
> > +	/* only freed io can be issued */
> > +	if (!(io->flags & UBLKSRV_IO_FREE))
> > +		return 0;
> > +
> > +	/* we issue because we need either fetching or committing */
> > +	if (!(io->flags &
> > +		(UBLKSRV_NEED_FETCH_RQ | UBLKSRV_NEED_COMMIT_RQ_COMP)))
> > +		return 0;
> > +
> > +	if (io->flags & UBLKSRV_NEED_COMMIT_RQ_COMP)
> > +		cmd_op = UBLK_IO_COMMIT_AND_FETCH_REQ;
> > +	else if (io->flags & UBLKSRV_NEED_FETCH_RQ)
> > +		cmd_op = UBLK_IO_FETCH_REQ;
> > +
> > +	sqe = io_uring_get_sqe(&q->ring);
> > +	if (!sqe) {
> > +		ublk_err("%s: run out of sqe %d, tag %d\n",
> > +				__func__, q->q_id, tag);
> > +		return -1;
> > +	}
> > +
> > +	cmd = (struct ublksrv_io_cmd *)ublk_get_sqe_cmd(sqe);
> > +
> > +	if (cmd_op == UBLK_IO_COMMIT_AND_FETCH_REQ)
> > +		cmd->result = io->result;
> > +
> > +	/* These fields should be written once, never change */
> > +	ublk_set_sqe_cmd_op(sqe, cmd_op);
> > +	sqe->fd		= 0;	/* dev->fds[0] */
> > +	sqe->opcode	= IORING_OP_URING_CMD;
> > +	sqe->flags	= IOSQE_FIXED_FILE;
> > +	sqe->rw_flags	= 0;
> > +	cmd->tag	= tag;
> > +	cmd->addr	= (__u64)io->buf_addr;
> > +	cmd->q_id	= q->q_id;
> > +
> > +	user_data = build_user_data(tag, cmd_op, 0, 0);
> > +	io_uring_sqe_set_data64(sqe, user_data);
> > +
> > +	io->flags = 0;
> > +
> > +	q->cmd_inflight += 1;
> > +
> > +	ublk_dbg(UBLK_DBG_IO_CMD, "%s: (qid %d tag %u cmd_op %u) iof %x stopping %d\n",
> > +			__func__, q->q_id, tag, cmd_op,
> > +			io->flags, !!(q->state & UBLKSRV_QUEUE_STOPPING));
> > +	return 1;
> > +}
> > +
> > +static int ublk_complete_io(struct ublk_queue *q,
> > +		unsigned tag, int res)
> > +{
> > +	struct ublk_io *io = &q->ios[tag];
> > +
> > +	ublk_mark_io_done(io, res);
> > +
> > +	return ublk_queue_io_cmd(q, io, tag);
> > +}
> > +
> > +static void ublk_submit_fetch_commands(struct ublk_queue *q)
> > +{
> > +	int i = 0;
> > +
> > +	for (i = 0; i < q->q_depth; i++)
> > +		ublk_queue_io_cmd(q, &q->ios[i], i);
> > +}
> > +
> > +static int ublk_queue_is_idle(struct ublk_queue *q)
> > +{
> > +	return !io_uring_sq_ready(&q->ring) && !q->io_inflight;
> > +}
> > +
> > +static int ublk_queue_is_done(struct ublk_queue *q)
> > +{
> > +	return (q->state & UBLKSRV_QUEUE_STOPPING) && ublk_queue_is_idle(q);
> > +}
> > +
> > +static void ublk_queue_discard_io_pages(struct ublk_queue *q)
> > +{
> > +	const struct ublk_dev *dev = q->dev;
> > +	unsigned int io_buf_size = dev->dev_info.max_io_buf_bytes;
> > +	int i = 0;
> > +
> > +	for (i = 0; i < q->q_depth; i++)
> > +		madvise(q->ios[i].buf_addr, io_buf_size, MADV_DONTNEED);
> > +}
> > +
> > +static void ublk_queue_idle_enter(struct ublk_queue *q)
> > +{
> > +	if (q->state & UBLKSRV_QUEUE_IDLE)
> > +		return;
> > +
> > +	ublk_dbg(UBLK_DBG_QUEUE, "dev%d-q%d: enter idle %x\n",
> > +			q->dev->dev_info.dev_id, q->q_id, q->state);
> > +	ublk_queue_discard_io_pages(q);
> > +	q->state |= UBLKSRV_QUEUE_IDLE;
> > +}
> > +
> > +static void ublk_queue_idle_exit(struct ublk_queue *q)
> > +{
> > +	if (q->state & UBLKSRV_QUEUE_IDLE) {
> > +		ublk_dbg(UBLK_DBG_QUEUE, "dev%d-q%d: exit idle %x\n",
> > +			q->dev->dev_info.dev_id, q->q_id, q->state);
> > +		q->state &= ~UBLKSRV_QUEUE_IDLE;
> > +	}
> > +}
> > +
> > +static inline void ublksrv_handle_tgt_cqe(struct ublk_queue *q,
> > +		struct io_uring_cqe *cqe)
> > +{
> > +	unsigned tag = user_data_to_tag(cqe->user_data);
> > +
> > +	if (cqe->res < 0 && cqe->res != -EAGAIN)
> > +		ublk_err("%s: failed tgt io: res %d qid %u tag %u, cmd_op %u\n",
> > +			__func__, cqe->res, q->q_id,
> > +			user_data_to_tag(cqe->user_data),
> > +			user_data_to_op(cqe->user_data));
> > +
> > +	if (q->tgt_ops->tgt_io_done)
> > +		q->tgt_ops->tgt_io_done(q, tag, cqe);
> > +}
> > +
> > +static void ublk_handle_cqe(struct io_uring *r,
> > +		struct io_uring_cqe *cqe, void *data)
> > +{
> > +	struct ublk_queue *q = container_of(r, struct ublk_queue, ring);
> > +	unsigned tag = user_data_to_tag(cqe->user_data);
> > +	unsigned cmd_op = user_data_to_op(cqe->user_data);
> > +	int fetch = (cqe->res != UBLK_IO_RES_ABORT) &&
> > +		!(q->state & UBLKSRV_QUEUE_STOPPING);
> > +	struct ublk_io *io;
> > +
> > +	ublk_dbg(UBLK_DBG_IO_CMD, "%s: res %d (qid %d tag %u cmd_op %u target %d) stopping %d\n",
> > +			__func__, cqe->res, q->q_id, tag, cmd_op,
> > +			is_target_io(cqe->user_data),
> > +			(q->state & UBLKSRV_QUEUE_STOPPING));
> > +
> > +	/* Don't retrieve io in case of target io */
> > +	if (is_target_io(cqe->user_data)) {
> > +		ublksrv_handle_tgt_cqe(q, cqe);
> > +		return;
> > +	}
> > +
> > +	io = &q->ios[tag];
> > +	q->cmd_inflight--;
> > +
> > +	if (!fetch) {
> > +		q->state |= UBLKSRV_QUEUE_STOPPING;
> > +		io->flags &= ~UBLKSRV_NEED_FETCH_RQ;
> > +	}
> > +
> > +	if (cqe->res == UBLK_IO_RES_OK) {
> > +		ublk_assert(tag < q->q_depth);
> > +		q->tgt_ops->queue_io(q, tag);
> > +	} else {
> > +		/*
> > +		 * COMMIT_REQ will be completed immediately since no fetching
> > +		 * piggyback is required.
> > +		 *
> > +		 * Marking IO_FREE only, then this io won't be issued since
> > +		 * we only issue io with (UBLKSRV_IO_FREE | UBLKSRV_NEED_*)
> > +		 *
> > +		 * */
> > +		io->flags = UBLKSRV_IO_FREE;
> > +	}
> 
> if you can return; from above if we can remove the else, no biggy
> though..

I think "else" is just fine.

> > +}
> > +
> > +static int ublk_reap_events_uring(struct io_uring *r)
> > +{
> > +	struct io_uring_cqe *cqe;
> > +	unsigned head;
> > +	int count = 0;
> > +
> > +	io_uring_for_each_cqe(r, head, cqe) {
> > +		ublk_handle_cqe(r, cqe, NULL);
> > +		count += 1;
> > +	}
> > +	io_uring_cq_advance(r, count);
> > +
> > +	return count;
> > +}
> > +
> > +static int ublk_process_io(struct ublk_queue *q)
> > +{
> > +	int ret, reapped;
> > +	struct __kernel_timespec ts = {
> > +		.tv_sec = UBLKSRV_IO_IDLE_SECS,
> > +		.tv_nsec = 0
> > +        };
> > +	struct __kernel_timespec *tsp = (q->state & UBLKSRV_QUEUE_IDLE) ?
> > +		NULL : &ts;
> > +	struct io_uring_cqe *cqe;
> > +
> > +	ublk_dbg(UBLK_DBG_QUEUE, "dev%d-q%d: to_submit %d inflight cmd %u stopping %d\n",
> > +				q->dev->dev_info.dev_id,
> > +				q->q_id, io_uring_sq_ready(&q->ring),
> > +				q->cmd_inflight,
> > +				(q->state & UBLKSRV_QUEUE_STOPPING));
> > +
> > +	if (ublk_queue_is_done(q))
> > +		return -ENODEV;
> > +
> > +	ret = io_uring_submit_and_wait_timeout(&q->ring, &cqe, 1, tsp, NULL);
> > +	reapped = ublk_reap_events_uring(&q->ring);
> > +
> > +	ublk_dbg(UBLK_DBG_QUEUE, "submit result %d, reapped %d stop %d idle %d\n",
> > +			ret, reapped, (q->state & UBLKSRV_QUEUE_STOPPING),
> > +			(q->state & UBLKSRV_QUEUE_IDLE));
> > +
> > +	if (!(q->state & UBLKSRV_QUEUE_STOPPING)) {
> > +		if (ret == -ETIME && reapped == 0 && ublk_queue_is_idle(q))
> > +			ublk_queue_idle_enter(q);
> > +		else
> > +			ublk_queue_idle_exit(q);
> > +	}
> > +	return reapped;
> > +}
> > +
> > +static void *ublk_io_handler_fn(void *data)
> > +{
> > +	struct ublk_queue *q = data;
> > +	int dev_id = q->dev->dev_info.dev_id;
> > +	int ret;
> > +
> > +	ret = ublk_queue_init(q);
> > +	if (ret) {
> > +		ublk_err("ublk dev %d queue %d init queue failed\n",
> > +				dev_id, q->q_id);
> > +		return NULL;
> > +	}
> > +
> > +	/* submit all io commands to ublk driver */
> > +	ublk_submit_fetch_commands(q);
> > +
> > +	ublk_dbg(UBLK_DBG_QUEUE, "tid %d: ublk dev %d queue %d started\n",
> > +			gettid(),
> > +			dev_id, q->q_id);
> > +	do {
> > +		if (ublk_process_io(q) < 0)
> > +			break;
> > +	} while (1);
> > +
> > +	ublk_dbg(UBLK_DBG_QUEUE, "ublk dev %d queue %d exited\n", dev_id, q->q_id);
> > +	ublk_queue_deinit(q);
> > +	return NULL;
> > +}
> > +
> > +static void ublk_set_parameters(struct ublk_dev *dev)
> > +{
> > +	int ret;
> > +
> > +	ret = ublk_ctrl_set_params(dev, &dev->tgt.params);
> > +	if (ret)
> > +		ublk_err("dev %d set basic parameter failed %d\n",
> > +				dev->dev_info.dev_id, ret);
> > +}
> > +
> > +static int ublk_start_daemon(struct ublk_dev *dev)
> > +{
> > +	int ret, i;
> > +	void *thread_ret;
> > +	const struct ublksrv_ctrl_dev_info *dinfo = &dev->dev_info;
> > +
> > +	daemon(1, 1);
> > +
> > +	ublk_dbg(UBLK_DBG_DEV, "%s enter\n", __func__);
> > +
> > +	ret = ublk_dev_prep(dev);
> > +	if (!dev)
> > +		return -ENOMEM;
> > +
> 
> can dev be NULL ever here ? do we need to check ret instaed of dev?
> (I don't think so since looking it ublk_dev_prep() it will not change
> dev value and it is unconditionally deferenced before it's use).
> 
> also shuldn't we return ret here as ublk_dev_prep() will retrun non
> -ENOMEM value ?

Good catch, it is really a bug.

> 
> > +	for (i = 0; i < dinfo->nr_hw_queues; i++) {
> > +		dev->q[i].dev = dev;
> > +		dev->q[i].q_id = i;
> > +		pthread_create(&dev->q[i].thread, NULL,
> > +				ublk_io_handler_fn,
> > +				&dev->q[i]);
> > +	}
> > +
> > +	ublk_set_parameters(dev);
> > +
> > +	/* everything is fine now, start us */
> > +	ret = ublk_ctrl_start_dev(dev, getpid());
> > +	if (ret < 0)
> > +		goto fail;
> > +
> > +	ublk_ctrl_get_info(dev);
> > +	ublk_ctrl_dump(dev, true);
> > +
> > +	/* wait until we are terminated */
> > +	for (i = 0; i < dinfo->nr_hw_queues; i++)
> > +		pthread_join(dev->q[i].thread, &thread_ret);
> > + fail:
> > +	ublk_dev_unprep(dev);
> > +	ublk_dbg(UBLK_DBG_DEV, "%s exit\n", __func__);
> > +
> > +	return ret;
> > +}
> > +
> > +static int cmd_dev_add(int argc, char *argv[])
> > +{
> > +	static const struct option longopts[] = {
> > +		{ "type",		1,	NULL, 't' },
> > +		{ "number",		1,	NULL, 'n' },
> > +		{ "queues",		1,	NULL, 'q' },
> > +		{ "depth",		1,	NULL, 'd' },
> > +		{ "debug_mask",	1,	NULL, 0},
> > +		{ "quiet",	0,	NULL, 0},
> > +		{ NULL }
> > +	};
> > +	const struct ublk_tgt_ops *ops;
> > +	struct ublksrv_ctrl_dev_info *info;
> > +	struct ublk_dev *dev;
> > +	int ret, option_idx, opt;
> > +	const char *tgt_type = NULL;
> > +	int dev_id = -1;
> > +	unsigned nr_queues = 2, depth = UBLK_QUEUE_DEPTH;
> > +
> > +	while ((opt = getopt_long(argc, argv, "-:t:n:d:q:",
> > +				  longopts, &option_idx)) != -1) {
> > +		switch (opt) {
> > +		case 'n':
> > +			dev_id = strtol(optarg, NULL, 10);
> > +			break;
> > +		case 't':
> > +			tgt_type = optarg;
> > +			break;
> > +		case 'q':
> > +			nr_queues = strtol(optarg, NULL, 10);
> > +			break;
> > +		case 'd':
> > +			depth = strtol(optarg, NULL, 10);
> > +			break;
> > +		case 0:
> > +			if (!strcmp(longopts[option_idx].name, "debug_mask"))
> > +				ublk_dbg_mask = strtol(optarg, NULL, 16);
> > +			if (!strcmp(longopts[option_idx].name, "quiet"))
> > +				ublk_dbg_mask = 0;
> > +			break;
> > +		}
> > +	}
> > +
> > +	optind = 0;
> > +
> > +	ops = ublk_find_tgt(tgt_type);
> > +	if (!ops) {
> > +		ublk_err("%s: no such tgt type, type %s\n",
> > +				__func__, tgt_type);
> > +		return -ENODEV;
> > +	}
> > +
> > +	if (nr_queues > UBLK_MAX_QUEUES || depth > UBLK_QUEUE_DEPTH) {
> > +		ublk_err("%s: invalid nr_queues or depth queues %u depth %u\n",
> > +				__func__, nr_queues, depth);
> > +		return -EINVAL;
> > +	}
> > +
> > +	dev = ublk_ctrl_init();
> > +	if (!dev) {
> > +		ublk_err("%s: can't alloc dev id %d, type %s\n",
> > +				__func__, dev_id, tgt_type);
> > +		return -ENOMEM;
> > +	}
> > +
> > +	info = &dev->dev_info;
> > +	info->dev_id = dev_id;
> > +        info->nr_hw_queues = nr_queues;
> > +        info->queue_depth = depth;
> > +	dev->tgt.ops = ops;
> > +	dev->tgt.argc = argc;
> > +	dev->tgt.argv = argv;
> > +
> > +	ret = ublk_ctrl_add_dev(dev);
> > +	if (ret < 0) {
> > +		ublk_err("%s: can't add dev id %d, type %s ret %d\n",
> > +				__func__, dev_id, tgt_type, ret);
> > +		goto fail;
> > +	}
> > +
> > +	ret = ublk_start_daemon(dev);
> > +	if (ret < 0) {
> > +		ublk_err("%s: can't start daemon id %d, type %s\n",
> > +				__func__, dev_id, tgt_type);
> > +		goto fail_del;
> > +	}
> > +fail_del:
> > +	ublk_ctrl_del_dev(dev);
> > +fail:
> > +	ublk_ctrl_deinit(dev);
> > +	return ret;
> > +}
> > +
> > +static int ublk_stop_io_daemon(const struct ublk_dev *dev)
> > +{
> > +	int daemon_pid = dev->dev_info.ublksrv_pid;
> > +	int cnt = 0, ret;
> > +
> > +	if (daemon_pid == -1)
> > +		return 0;
> > +
> > +	/* wait until daemon is exited, or timeout after 3 seconds */
> > +	do {
> > +		ret = kill(daemon_pid, 0);
> > +		if (ret)
> > +			break;
> > +		usleep(500000);
> > +		cnt++;
> > +	} while (!ret && cnt < 6);
> > +
> > +	ublk_dbg(UBLK_DBG_DEV, "%s: pid %d ret %d\n", __func__, daemon_pid, ret);
> > +
> > +	return ret != 0 ? 0 : -1;
> > +}
> > +
> > +static int __cmd_dev_del(int number, bool log)
> > +{
> > +	struct ublk_dev *dev;
> > +	int ret;
> > +
> > +	dev = ublk_ctrl_init();
> > +	dev->dev_info.dev_id = number;
> > +
> > +	ret = ublk_ctrl_get_info(dev);
> > +	if (ret < 0) {
> > +		goto fail;
> > +	}
> > +
> > +	ret = ublk_ctrl_stop_dev(dev);
> > +	if (ret < 0) {
> > +		if (log)
> > +			ublk_err("stop dev %d failed\n", number);
> > +		goto fail;
> > +	}
> > +
> > +	ret = ublk_stop_io_daemon(dev);
> > +	if (ret < 0) {
> > +		if (log)
> > +			ublk_err("stop daemon %d failed\n", number);
> > +	}
> > +
> > +	ublk_ctrl_del_dev(dev);
> > +fail:
> > +	ublk_ctrl_deinit(dev);
> > +	return ret;
> > +}
> > +
> > +static int cmd_dev_del(int argc, char *argv[])
> > +{
> > +	static const struct option longopts[] = {
> > +		{ "number",		1,	NULL, 'n' },
> > +		{ "all",		0,	NULL, 'a' },
> > +		{ "debug_mask",	1,	NULL, 0},
> > +		{ NULL }
> > +	};
> > +	int number = -2;
> > +	int opt, i, option_idx;
> > +
> > +	while ((opt = getopt_long(argc, argv, "n:a",
> > +				  longopts, &option_idx)) != -1) {
> > +		switch (opt) {
> > +		case 'a':
> > +			number = -1;
> > +			break;
> > +
> > +		case 'n':
> > +			number = strtol(optarg, NULL, 10);
> > +			break;
> > +		case 0:
> > +			if (!strcmp(longopts[option_idx].name, "debug_mask"))
> > +				ublk_dbg_mask = strtol(optarg, NULL, 16);
> > +			break;
> > +		}
> > +	}
> > +
> > +	if (number >= 0)
> > +		return __cmd_dev_del(number, true);
> > +	else if (number != -1) {
> > +		ublk_err("%s: pass wrong devid or not delete via -a\n");
> > +		return -EINVAL;
> > +	}
> > +
> > +	for (i = 0; i < 255; i++)
> > +		__cmd_dev_del(i, false);
> > +
> > +	return 0;
> > +}
> > +
> > +static int __cmd_dev_list(int number, bool log)
> > +{
> > +	struct ublk_dev *dev = ublk_ctrl_init();
> > +	int ret;
> > +
> > +	dev->dev_info.dev_id = number;
> > +
> > +	ret = ublk_ctrl_get_info(dev);
> > +	if (ret < 0) {
> > +		if (log)
> > +			ublk_err("%s: can't get dev info from %d: %d\n",
> > +					__func__, number, ret);
> > +	} else {
> > +		ublk_ctrl_dump(dev, false);
> > +	}
> > +
> > +	ublk_ctrl_deinit(dev);
> > +
> > +	return ret;
> > +}
> > +
> > +
> > +static int cmd_dev_list(int argc, char *argv[])
> > +{
> > +	static const struct option longopts[] = {
> > +		{ "number",		1,	NULL, 'n' },
> > +		{ "all",		0,	NULL, 'a' },
> > +		{ NULL }
> > +	};
> > +	int number = -1;
> > +	int opt, i;
> > +
> > +	while ((opt = getopt_long(argc, argv, "n:a",
> > +				  longopts, NULL)) != -1) {
> > +		switch (opt) {
> > +		case 'a':
> > +			break;
> > +
> > +		case 'n':
> > +			number = strtol(optarg, NULL, 10);
> > +			break;
> > +		}
> > +	}
> > +
> > +	if (number >= 0)
> > +		return __cmd_dev_list(number, true);
> > +
> > +	for (i = 0; i < 255; i++)
> > +		__cmd_dev_list(i, false);
> > +
> > +	return 0;
> > +}
> > +
> > +static int cmd_dev_help(int argc, char *argv[])
> > +{
> > +	printf("%s add -t {null|loop} [-q nr_queues] [-d depth] [-n dev_id] \n",
> > +			argv[0]);
> > +	printf("\t default: nr_queues=2(max 4), depth=128(max 128), dev_id=-1(auto allocation)\n");
> > +	printf("\t -t loop -f backing_file \n");
> > +	printf("\t -t null\n");
> > +	printf("%s del [-n dev_id] -a \n", argv[0]);
> > +	printf("\t -a delete all devices -n delete specified device\n");
> > +	printf("%s list [-n dev_id] -a \n", argv[0]);
> > +	printf("\t -a list all devices, -n list specified device, default -a \n");
> > +
> > +	return 0;
> > +}
> > +
> > +static int ublk_null_tgt_init(struct ublk_dev *dev)
> > +{
> > +	const struct ublksrv_ctrl_dev_info *info = &dev->dev_info;
> > +	unsigned long dev_size = 250UL << 30;
> > +
> > +	dev->tgt.dev_size = dev_size;
> > +	dev->tgt.params = (struct ublk_params) {
> > +		.types = UBLK_PARAM_TYPE_BASIC,
> > +		.basic = {
> > +			.logical_bs_shift	= 9,
> > +			.physical_bs_shift	= 12,
> > +			.io_opt_shift		= 12,
> > +			.io_min_shift		= 9,
> > +			.max_sectors		= info->max_io_buf_bytes >> 9,
> > +			.dev_sectors		= dev_size >> 9,
> > +		},
> > +	};
> > +
> > +	return 0;
> > +}
> > +
> > +static int ublk_null_queue_io(struct ublk_queue *q, int tag)
> > +{
> > +	const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
> > +
> > +	ublk_complete_io(q, tag, iod->nr_sectors << 9);
> > +
> > +	return 0;
> > +}
> > +
> > +static int loop_queue_tgt_io(struct ublk_queue *q, int tag)
> > +{
> > +	const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
> > +	struct io_uring_sqe *sqe = io_uring_get_sqe(&q->ring);
> > +	unsigned ublk_op = ublksrv_get_op(iod);
> > +
> > +	if (!sqe)
> > +		return -ENOMEM;
> > +
> > +	switch (ublk_op) {
> > +	case UBLK_IO_OP_FLUSH:
> > +		io_uring_prep_sync_file_range(sqe, 1 /*fds[1]*/,
> > +				iod->nr_sectors << 9,
> > +				iod->start_sector << 9,
> > +				IORING_FSYNC_DATASYNC);
> > +		io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE);
> > +		break;
> > +	case UBLK_IO_OP_WRITE_ZEROES:
> > +	case UBLK_IO_OP_DISCARD:
> > +		return -ENOTSUP;
> > +	case UBLK_IO_OP_READ:
> > +		io_uring_prep_read(sqe, 1 /*fds[1]*/,
> > +				(void *)iod->addr,
> > +				iod->nr_sectors << 9,
> > +				iod->start_sector << 9);
> > +		io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE);
> > +		break;
> > +	case UBLK_IO_OP_WRITE:
> > +		io_uring_prep_write(sqe, 1 /*fds[1]*/,
> > +				(void *)iod->addr,
> > +				iod->nr_sectors << 9,
> > +				iod->start_sector << 9);
> > +		io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE);
> > +		break;
> > +	default:
> > +		return -EINVAL;
> > +	}
> > +
> > +	q->io_inflight++;
> > +	/* bit63 marks us as tgt io */
> > +	sqe->user_data = build_user_data(tag, ublk_op, 0, 1);
> > +
> > +	ublk_dbg(UBLK_DBG_IO, "%s: tag %d ublk io %x %llx %u\n", __func__, tag,
> > +			iod->op_flags, iod->start_sector, iod->nr_sectors << 9);
> > +	return 1;
> > +}
> > +
> > +static int ublk_loop_queue_io(struct ublk_queue *q, int tag)
> > +{
> > +	int queued = loop_queue_tgt_io(q, tag);
> > +
> > +	if (queued < 0)
> > +		ublk_complete_io(q, tag, queued);
> > +
> > +	return 0;
> > +}
> > +
> > +static void ublk_loop_io_done(struct ublk_queue *q, int tag,
> > +		const struct io_uring_cqe *cqe)
> > +{
> > +	int cqe_tag = user_data_to_tag(cqe->user_data);
> > +
> > +	ublk_assert(tag == cqe_tag);
> > +	ublk_complete_io(q, tag, cqe->res);
> > +	q->io_inflight--;
> > +}
> > +
> > +static void ublk_loop_tgt_deinit(struct ublk_dev *dev)
> > +{
> > +	fsync(dev->fds[1]);
> > +	close(dev->fds[1]);
> > +}
> > +
> > +static int ublk_loop_tgt_init(struct ublk_dev *dev)
> > +{
> > +	static const struct option lo_longopts[] = {
> > +		{ "file",		1,	NULL, 'f' },
> > +		{ NULL }
> > +	};
> > +	unsigned long long bytes;
> > +	char **argv = dev->tgt.argv;
> > +	int argc = dev->tgt.argc;
> > +	char *file = NULL;
> > +	struct stat st;
> > +	int fd, opt;
> > +	struct ublk_params p = {
> > +		.types = UBLK_PARAM_TYPE_BASIC,
> > +		.basic = {
> > +			.logical_bs_shift	= 9,
> > +			.physical_bs_shift	= 12,
> > +			.io_opt_shift	= 12,
> > +			.io_min_shift	= 9,
> > +			.max_sectors = dev->dev_info.max_io_buf_bytes >> 9,
> > +		},
> > +	};
> > +
> > +	while ((opt = getopt_long(argc, argv, "-:f:",
> > +				  lo_longopts, NULL)) != -1) {
> > +		switch (opt) {
> > +		case 'f':
> > +			file = strdup(optarg);
> > +			break;
> > +		}
> > +	}
> > +
> > +	ublk_dbg(UBLK_DBG_DEV, "%s: file %s\n", __func__, file);
> > +
> > +	if (!file)
> > +		return -EINVAL;
> > +
> > +	fd = open(file, O_RDWR);
> > +	if (fd < 0) {
> > +		ublk_err( "%s: backing file %s can't be opened\n",
> > +				__func__, file);
> > +		return -EBADF;
> > +	}
> > +
> > +	if (fstat(fd, &st) < 0) {
> > +		close(fd);
> > +		return -EBADF;
> > +	}
> > +
> > +	if (S_ISBLK(st.st_mode)) {
> > +		unsigned int bs, pbs;
> > +
> > +		if (ioctl(fd, BLKGETSIZE64, &bytes) != 0)
> > +			return -EBADF;
> > +		if (ioctl(fd, BLKSSZGET, &bs) != 0)
> > +			return -1;
> > +		if (ioctl(fd, BLKPBSZGET, &pbs) != 0)
> > +			return -1;
> > +		p.basic.logical_bs_shift = ilog2(bs);
> > +		p.basic.physical_bs_shift = ilog2(pbs);
> > +	} else if (S_ISREG(st.st_mode)) {
> > +		bytes = st.st_size;
> > +	} else {
> > +		bytes = 0;
> > +	}
> > +
> > +	if (fcntl(fd, F_SETFL, O_DIRECT)) {
> > +		p.basic.logical_bs_shift = 9;
> > +		p.basic.physical_bs_shift = 12;
> > +		ublk_log("%s: ublk-loop fallback to buffered IO\n", __func__);
> > +	}
> > +
> > +	dev->tgt.dev_size = bytes;
> > +	p.basic.dev_sectors = bytes >> 9;
> > +	dev->fds[1] = fd;
> > +	dev->nr_fds += 1;
> > +	dev->tgt.params = p;
> > +
> > +	return 0;
> > +}
> > +
> > +const struct ublk_tgt_ops tgt_ops_list[] = {
> > +	{
> > +		.name = "null",
> > +		.init_tgt = ublk_null_tgt_init,
> > +		.queue_io = ublk_null_queue_io,
> > +	},
> > +
> > +	{
> > +		.name = "loop",
> > +		.init_tgt = ublk_loop_tgt_init,
> > +		.deinit_tgt = ublk_loop_tgt_deinit,
> > +		.queue_io = ublk_loop_queue_io,
> > +		.tgt_io_done = ublk_loop_io_done,
> > +	},
> > +};
> > +
> > +static const struct ublk_tgt_ops *ublk_find_tgt(const char *name)
> > +{
> > +	const struct ublk_tgt_ops *ops;
> > +	int i;
> > +
> > +	if (name == NULL)
> > +		return NULL;
> > +
> > +	for (i = 0; sizeof(tgt_ops_list) / sizeof(*ops); i++)
> > +		if (strcmp(tgt_ops_list[i].name, name) == 0)
> > +			return &tgt_ops_list[i];
> > +	return NULL;
> > +}
> > +
> > +int main(int argc, char *argv[])
> > +{
> > +	const char *cmd = argv[1];
> > +	int ret = -EINVAL;
> > +
> > +	if (argc == 1)
> > +		goto out;
> > +
> > +	if (!strcmp(cmd, "add"))
> > +		ret = cmd_dev_add(argc, argv);
> > +	else if (!strcmp(cmd, "del"))
> > +		ret = cmd_dev_del(argc, argv);
> > +	else if (!strcmp(cmd, "list"))
> > +		ret = cmd_dev_list(argc, argv);
> > +	else if (!strcmp(cmd, "help"))
> > +		ret = cmd_dev_help(argc, argv);
> > +out:
> > +	if (ret)
> > +		cmd_dev_help(argc, argv);
> > +
> > +	return ret;
> > +}

thanks,
Ming




[Index of Archives]     [Linux RAID]     [Linux SCSI]     [Linux ATA RAID]     [IDE]     [Linux Wireless]     [Linux Kernel]     [ATH6KL]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Device Mapper]

  Powered by Linux