[RFC PATCH 21/22] selftests: add tests for covering both bpf aio and split

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Add ublk-stripe for covering both bpf aio and io split features.

Signed-off-by: Ming Lei <tom.leiming@xxxxxxxxx>
---
 tools/testing/selftests/ublk/Makefile         |   3 +
 .../selftests/ublk/progs/ublk_stripe.c        | 319 ++++++++++++++++++
 .../testing/selftests/ublk/test_stripe_01.sh  |  35 ++
 .../testing/selftests/ublk/test_stripe_02.sh  |  26 ++
 tools/testing/selftests/ublk/ublk_bpf.c       |  88 ++++-
 5 files changed, 468 insertions(+), 3 deletions(-)
 create mode 100644 tools/testing/selftests/ublk/progs/ublk_stripe.c
 create mode 100755 tools/testing/selftests/ublk/test_stripe_01.sh
 create mode 100755 tools/testing/selftests/ublk/test_stripe_02.sh

diff --git a/tools/testing/selftests/ublk/Makefile b/tools/testing/selftests/ublk/Makefile
index 2540ae7a75a3..7c30c5728694 100644
--- a/tools/testing/selftests/ublk/Makefile
+++ b/tools/testing/selftests/ublk/Makefile
@@ -27,6 +27,9 @@ TEST_PROGS += test_null_04.sh
 TEST_PROGS += test_loop_01.sh
 TEST_PROGS += test_loop_02.sh
 
+TEST_PROGS += test_stripe_01.sh
+TEST_PROGS += test_stripe_02.sh
+
 # Order correspond to 'make run_tests' order
 TEST_GEN_PROGS_EXTENDED = ublk_bpf
 
diff --git a/tools/testing/selftests/ublk/progs/ublk_stripe.c b/tools/testing/selftests/ublk/progs/ublk_stripe.c
new file mode 100644
index 000000000000..98a59239047c
--- /dev/null
+++ b/tools/testing/selftests/ublk/progs/ublk_stripe.c
@@ -0,0 +1,319 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <linux/const.h>
+#include <linux/errno.h>
+#include <linux/falloc.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+//#define DEBUG
+#include "ublk_bpf.h"
+
+/* libbpf v1.4.5 is required for struct_ops to work */
+
+struct ublk_stripe {
+#define MAX_BACKFILES	4
+	unsigned char chunk_shift;
+	unsigned char nr_backfiles;
+	int fds[MAX_BACKFILES];
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, 128);
+	__type(key, unsigned int);	/* dev id */
+	__type(value, struct ublk_stripe);	/* stripe setting */
+} stripe_map SEC(".maps");
+
+/* todo: make it writable payload of ublk_bpf_io */
+struct ublk_io_payload {
+	unsigned int ref;
+	int res;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, 10240);
+	__type(key, unsigned long long);	/* dev_id + q_id + tag */
+	__type(value, struct ublk_io_payload);	/* io payload */
+} io_map SEC(".maps");
+
+static inline void dec_stripe_io_ref(const struct ublk_bpf_io *io, struct ublk_io_payload *pv, int ret)
+{
+	if (!pv)
+		return;
+
+	if (pv->res >= 0)
+		pv->res = ret;
+
+	if (!__sync_sub_and_fetch(&pv->ref, 1)) {
+		unsigned rw = (io->iod->op_flags & 0xff);
+
+		if (pv->res >= 0 && (rw <= 1))
+			pv->res = io->iod->nr_sectors << 9;
+		ublk_bpf_complete_io(io, pv->res);
+	}
+}
+
+static inline void ublk_stripe_comp_and_release_aio(struct bpf_aio *aio, int ret)
+{
+	struct ublk_bpf_io *io = ublk_bpf_acquire_io_from_aio(aio);
+	struct ublk_io_payload *pv = NULL;
+	unsigned long long io_key = build_io_key(io);
+
+	if (!io)
+		return;
+
+	io_key = build_io_key(io);
+	pv = bpf_map_lookup_elem(&io_map, &io_key);
+
+	/* drop reference for each underlying aio */
+	dec_stripe_io_ref(io, pv, ret);
+	ublk_bpf_release_io_from_aio(io);
+
+	ublk_bpf_dettach_and_complete_aio(aio);
+	bpf_aio_release(aio);
+}
+
+SEC("struct_ops/bpf_aio_complete_cb")
+void BPF_PROG(ublk_stripe_comp_cb, struct bpf_aio *aio, long ret)
+{
+	BPF_DBG("aio result %d, back_file %s pos %llx", ret,
+			aio->iocb.ki_filp->f_path.dentry->d_name.name,
+			aio->iocb.ki_pos);
+	ublk_stripe_comp_and_release_aio(aio, ret);
+}
+
+SEC(".struct_ops.link")
+struct bpf_aio_complete_ops stripe_ublk_bpf_aio_ops = {
+	.id = 32,
+	.bpf_aio_complete_cb = (void *)ublk_stripe_comp_cb,
+};
+
+static inline int ublk_stripe_submit_backing_io(const struct ublk_bpf_io *io,
+		int backfile_fd, unsigned long backfile_off,
+		unsigned int backfile_bytes,
+		unsigned int buf_off)
+{
+	const struct ublksrv_io_desc *iod = io->iod;
+	unsigned int op_flags = 0;
+	struct bpf_aio *aio;
+	int res = -EINVAL;
+	int op;
+
+	/* translate ublk opcode into backing file's */
+	switch (iod->op_flags & 0xff) {
+	case 0 /*UBLK_IO_OP_READ*/:
+		op = BPF_AIO_OP_FS_READ;
+		break;
+	case 1 /*UBLK_IO_OP_WRITE*/:
+		op = BPF_AIO_OP_FS_WRITE;
+		break;
+	case 2 /*UBLK_IO_OP_FLUSH*/:
+		op = BPF_AIO_OP_FS_FSYNC;
+		break;
+	case 3 /*UBLK_IO_OP_DISCARD*/:
+		op = BPF_AIO_OP_FS_FALLOCATE;
+		op_flags = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE;
+		break;
+	case 4 /*UBLK_IO_OP_WRITE_SAME*/:
+		op = BPF_AIO_OP_FS_FALLOCATE;
+		op_flags = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE;
+		break;
+	case 5 /*UBLK_IO_OP_WRITE_ZEROES*/:
+		op = BPF_AIO_OP_FS_FALLOCATE;
+		op_flags = FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	res = -ENOMEM;
+	aio = bpf_aio_alloc(op, 0);
+	if (!aio)
+		goto fail;
+
+	/* attach aio into the specified range of this io command */
+	res = ublk_bpf_attach_and_prep_aio(io, buf_off, backfile_bytes, aio);
+	if (res < 0) {
+		bpf_printk("bpf aio attaching failed %d\n", res);
+		goto fail;
+	}
+
+	/* submit this aio onto the backing file */
+	res = bpf_aio_submit(aio, backfile_fd, backfile_off, backfile_bytes, op_flags);
+	if (res < 0) {
+		bpf_printk("aio submit failed %d\n", res);
+		ublk_stripe_comp_and_release_aio(aio, res);
+	}
+	return 0;
+fail:
+	return res;
+}
+
+static int calculate_backfile_off_bytes(const struct ublk_stripe *stripe,
+		unsigned long stripe_off, unsigned int stripe_bytes,
+		unsigned long *backfile_off,
+		unsigned int *backfile_bytes)
+{
+	unsigned long chunk_size = 1U << stripe->chunk_shift;
+	unsigned int nr_bf = stripe->nr_backfiles;
+	unsigned long unit_chunk_size = nr_bf << stripe->chunk_shift;
+	unsigned long start_off = stripe_off & ~(chunk_size - 1);
+	unsigned long unit_start_off = stripe_off & ~(unit_chunk_size - 1);
+	unsigned int idx = (start_off - unit_start_off) >> stripe->chunk_shift;
+
+	*backfile_bytes = stripe_bytes;
+	*backfile_off = (unit_start_off / nr_bf)  + (idx << stripe->chunk_shift)  + (stripe_off - start_off);
+
+	return stripe->fds[idx % MAX_BACKFILES];
+}
+
+static unsigned int calculate_stripe_off_bytes(const struct ublk_stripe *stripe,
+		const struct ublksrv_io_desc *iod, unsigned int this_off,
+		unsigned long *stripe_off)
+{
+	unsigned long off, next_off;
+	unsigned int chunk_size = 1U << stripe->chunk_shift;
+	unsigned int max_size = (iod->nr_sectors << 9) - this_off;
+
+	off = (iod->start_sector << 9) + this_off;
+	next_off = (off & ~(chunk_size  - 1)) + chunk_size;;
+
+	*stripe_off = off;
+
+	if (max_size < next_off - off)
+		return max_size;
+	return next_off - off;
+}
+
+static inline ublk_bpf_return_t __ublk_stripe_handle_io_cmd(const struct ublk_bpf_io *io, unsigned int off)
+{
+	ublk_bpf_return_t ret = ublk_bpf_return_val(UBLK_BPF_IO_QUEUED, 0);
+	unsigned long stripe_off, backfile_off;
+	unsigned int stripe_bytes, backfile_bytes;
+	int dev_id = ublk_bpf_get_dev_id(io);
+	const struct ublksrv_io_desc *iod;
+	const struct ublk_stripe *stripe;
+	int res = -EINVAL;
+	int backfile_fd;
+	unsigned long long io_key = build_io_key(io);
+	struct ublk_io_payload pl = {
+		.ref = 2,
+		.res = 0,
+	};
+	struct ublk_io_payload *pv = NULL;
+
+	iod = ublk_bpf_get_iod(io);
+	if (!iod) {
+		ublk_bpf_complete_io(io, res);
+		return ret;
+	}
+
+	BPF_DBG("ublk dev %u qid %u: handle io cmd tag %u op %u %lx-%d off %u",
+			ublk_bpf_get_dev_id(io),
+			ublk_bpf_get_queue_id(io),
+			ublk_bpf_get_io_tag(io),
+			iod->op_flags & 0xff,
+			iod->start_sector << 9,
+			iod->nr_sectors << 9, off);
+
+	/* retrieve backing file descriptor */
+	stripe = bpf_map_lookup_elem(&stripe_map, &dev_id);
+	if (!stripe) {
+		bpf_printk("can't get FD from %d\n", dev_id);
+		return ret;
+	}
+
+	/* todo: build as big chunk as possible for each underlying files/disks */
+	stripe_bytes = calculate_stripe_off_bytes(stripe, iod, off, &stripe_off);
+	backfile_fd = calculate_backfile_off_bytes(stripe, stripe_off, stripe_bytes,
+			&backfile_off, &backfile_bytes);
+	BPF_DBG("\t <chunk_shift %u files %u> stripe(%lx %lu) backfile(%d %lx %lu)",
+			stripe->chunk_shift, stripe->nr_backfiles,
+			stripe_off, stripe_bytes,
+			backfile_fd, backfile_off, backfile_bytes);
+
+	if (!stripe_bytes) {
+		bpf_printk("submit bpf aio failed %d\n", res);
+		res = -EINVAL;
+		goto exit;
+	}
+
+	/* grab one submission reference, and one extra for the whole batch */
+	if (!off) {
+		res = bpf_map_update_elem(&io_map, &io_key, &pl, BPF_ANY);
+		if (res) {
+			bpf_printk("update io map element failed %d key %llx\n", res, io_key);
+			goto exit;
+		}
+	} else {
+		pv = bpf_map_lookup_elem(&io_map, &io_key);
+		if (pv)
+			__sync_fetch_and_add(&pv->ref, 1);
+	}
+
+	/* handle this io command by submitting IOs on backing file */
+	res = ublk_stripe_submit_backing_io(io, backfile_fd, backfile_off, backfile_bytes, off);
+
+exit:
+	/* io cmd can't be completes until this reference is dropped */
+	if (res < 0) {
+		bpf_printk("submit bpf aio failed %d\n", res);
+		ublk_bpf_complete_io(io, res);
+		return ret;
+	}
+
+	/* drop the extra reference for the whole batch */
+	if (off + stripe_bytes == iod->nr_sectors << 9) {
+		if (!pv)
+			pv = bpf_map_lookup_elem(&io_map, &io_key);
+		dec_stripe_io_ref(io, pv, pv ? pv->res : 0);
+	}
+
+	return ublk_bpf_return_val(UBLK_BPF_IO_CONTINUE, stripe_bytes);
+}
+
+SEC("struct_ops/ublk_bpf_release_io_cmd")
+void BPF_PROG(ublk_stripe_release_io_cmd, struct ublk_bpf_io *io)
+{
+	BPF_DBG("%s: complete io command %d", __func__, io->res);
+}
+
+SEC("struct_ops.s/ublk_bpf_queue_io_cmd_daemon")
+ublk_bpf_return_t BPF_PROG(ublk_stripe_handle_io_cmd, struct ublk_bpf_io *io, unsigned int off)
+{
+	return __ublk_stripe_handle_io_cmd(io, off);
+}
+
+SEC("struct_ops/ublk_bpf_attach_dev")
+int BPF_PROG(ublk_stripe_attach_dev, int dev_id)
+{
+	const struct ublk_stripe *stripe;
+
+	/* retrieve backing file descriptor */
+	stripe = bpf_map_lookup_elem(&stripe_map, &dev_id);
+	if (!stripe) {
+		bpf_printk("can't get FD from %d\n", dev_id);
+		return -EINVAL;
+	}
+
+	if (stripe->nr_backfiles >= MAX_BACKFILES)
+		return -EINVAL;
+
+	if (stripe->chunk_shift < 12)
+		return -EINVAL;
+
+	return 0;
+}
+
+SEC(".struct_ops.link")
+struct ublk_bpf_ops stripe_ublk_bpf_ops = {
+	.id = 32,
+	.attach_dev = (void *)ublk_stripe_attach_dev,
+	.queue_io_cmd_daemon = (void *)ublk_stripe_handle_io_cmd,
+	.release_io_cmd = (void *)ublk_stripe_release_io_cmd,
+};
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/ublk/test_stripe_01.sh b/tools/testing/selftests/ublk/test_stripe_01.sh
new file mode 100755
index 000000000000..3c21f7db495a
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_stripe_01.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+. test_common.sh
+
+TID="stripe_01"
+ERR_CODE=0
+
+# prepare & register and pin bpf prog
+_prep_bpf_test "stripe" ublk_stripe.bpf.o
+
+backfile_0=`_create_backfile 256M`
+backfile_1=`_create_backfile 256M`
+
+# add two ublk null disks with the pinned bpf prog
+_add_ublk_dev -t stripe -n 0 --bpf_prog 32 --bpf_aio_prog 32 --quiet $backfile_0 $backfile_1
+
+# run fio over the ublk disk
+fio --name=write_and_verify \
+    --filename=/dev/ublkb0 \
+    --ioengine=libaio --iodepth=4 \
+    --rw=write \
+    --size=256M \
+    --direct=1 \
+    --verify=crc32c \
+    --do_verify=1 \
+    --bs=4k > /dev/null 2>&1
+ERR_CODE=$?
+
+# cleanup & unregister and unpin the bpf prog
+_cleanup_bpf_test "stripe"
+
+_remove_backfile $backfile_0
+_remove_backfile $backfile_1
+
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_stripe_02.sh b/tools/testing/selftests/ublk/test_stripe_02.sh
new file mode 100755
index 000000000000..fdbb81dc53d8
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_stripe_02.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+. test_common.sh
+
+TID="stripe_02"
+ERR_CODE=0
+
+# prepare & register and pin bpf prog
+_prep_bpf_test "stripe" ublk_stripe.bpf.o
+
+backfile_0=`_create_backfile 256M`
+backfile_1=`_create_backfile 256M`
+
+# add two ublk null disks with the pinned bpf prog
+_add_ublk_dev -t stripe -n 0 --bpf_prog 32 --bpf_aio_prog 32 --quiet $backfile_0 $backfile_1
+
+_mkfs_mount_test /dev/ublkb0
+ERR_CODE=$?
+
+# cleanup & unregister and unpin the bpf prog
+_cleanup_bpf_test "stripe"
+
+_remove_backfile $backfile_0
+_remove_backfile $backfile_1
+
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/ublk_bpf.c b/tools/testing/selftests/ublk/ublk_bpf.c
index c24d5e18a1b1..85b2b4a09e05 100644
--- a/tools/testing/selftests/ublk/ublk_bpf.c
+++ b/tools/testing/selftests/ublk/ublk_bpf.c
@@ -1283,14 +1283,14 @@ static int cmd_dev_reg_bpf(struct dev_ctx *ctx)
 
 static int cmd_dev_help(char *exe)
 {
-	printf("%s add -t [null|loop] [-q nr_queues] [-d depth] [-n dev_id] [--bpf_prog ublk_prog_id] [--bpf_aio_prog ublk_aio_prog_id] [backfile1] [backfile2] ...\n", exe);
+	printf("%s add -t [null|loop|stripe] [-q nr_queues] [-d depth] [-n dev_id] [--bpf_prog ublk_prog_id] [--bpf_aio_prog ublk_aio_prog_id] [backfile1] [backfile2] ...\n", exe);
 	printf("\t default: nr_queues=2(max 4), depth=128(max 128), dev_id=-1(auto allocation)\n");
 	printf("%s del [-n dev_id] -a \n", exe);
 	printf("\t -a delete all devices -n delete specified device\n");
 	printf("%s list [-n dev_id] -a \n", exe);
 	printf("\t -a list all devices, -n list specified device, default -a \n");
-	printf("%s reg -t [null|loop] bpf_prog_obj_path \n", exe);
-	printf("%s unreg -t [null|loop]\n", exe);
+	printf("%s reg -t [null|loop|stripe] bpf_prog_obj_path \n", exe);
+	printf("%s unreg -t [null|loop|stripe]\n", exe);
 	return 0;
 }
 
@@ -1475,6 +1475,83 @@ static int ublk_loop_tgt_init(struct ublk_dev *dev)
 	return 0;
 }
 
+struct ublk_stripe_params {
+	unsigned char chunk_shift;
+	unsigned char nr_backfiles;
+	int fds[MAX_BACK_FILES];
+};
+
+static int stripe_bpf_setup_parameters(struct ublk_dev *dev, unsigned int chunk_shift)
+{
+	int dev_id = dev->dev_info.dev_id;
+	struct ublk_stripe_params stripe = {
+		.chunk_shift	=	chunk_shift,
+		.nr_backfiles	=	dev->nr_fds - 1,
+	};
+	int map_fd;
+	int err, i;
+
+	for (i = 0; i < stripe.nr_backfiles; i++)
+		stripe.fds[i] = dev->fds[i + 1];
+
+	map_fd = bpf_obj_get("/sys/fs/bpf/ublk/stripe/stripe_map");
+	if (map_fd < 0) {
+		ublk_err("Error getting map file descriptor\n");
+		return -EINVAL;
+	}
+
+	err = bpf_map_update_elem(map_fd, &dev_id, &stripe, BPF_ANY);
+	if (err) {
+		ublk_err("Error updating map element: %d\n", errno);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int ublk_stripe_tgt_init(struct ublk_dev *dev)
+{
+	unsigned long long bytes = 0;
+	unsigned chunk_shift = 12;
+	int ret, i;
+	struct ublk_params p = {
+		.types = UBLK_PARAM_TYPE_BASIC | UBLK_PARAM_TYPE_BPF,
+		.basic = {
+			.logical_bs_shift	= 9,
+			.physical_bs_shift	= 12,
+			.io_opt_shift	= 12,
+			.io_min_shift	= 9,
+			.max_sectors = dev->dev_info.max_io_buf_bytes >> 9,
+		},
+		.bpf = {
+			.flags = UBLK_BPF_HAS_OPS_ID | UBLK_BPF_HAS_AIO_OPS_ID,
+			.ops_id = dev->bpf_prog_id,
+			.aio_ops_id = dev->bpf_aio_prog_id,
+		},
+	};
+
+	ret = backing_file_tgt_init(dev);
+	if (ret)
+		return ret;
+
+	assert(stripe_bpf_setup_parameters(dev, chunk_shift) == 0);
+
+	for (i = 0; i < dev->nr_fds - 1; i++) {
+		unsigned long size = dev->tgt.backing_file_size[i];
+
+		if (size != dev->tgt.backing_file_size[0])
+			return -EINVAL;
+		if (size & ((1 << chunk_shift) - 1))
+			return -EINVAL;
+		bytes += size;
+	}
+
+	dev->tgt.dev_size = bytes;
+	p.basic.dev_sectors = bytes >> 9;
+	dev->tgt.params = p;
+
+	return 0;
+}
 
 static const struct ublk_tgt_ops tgt_ops_list[] = {
 	{
@@ -1487,6 +1564,11 @@ static const struct ublk_tgt_ops tgt_ops_list[] = {
 		.init_tgt = ublk_loop_tgt_init,
 		.deinit_tgt = backing_file_tgt_deinit,
 	},
+	{
+		.name = "stripe",
+		.init_tgt = ublk_stripe_tgt_init,
+		.deinit_tgt = backing_file_tgt_deinit,
+	},
 };
 
 static const struct ublk_tgt_ops *ublk_find_tgt(const char *name)
-- 
2.47.0





[Index of Archives]     [Linux RAID]     [Linux SCSI]     [Linux ATA RAID]     [IDE]     [Linux Wireless]     [Linux Kernel]     [ATH6KL]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Device Mapper]

  Powered by Linux