Re: [PATCH] bcache: Use bcache without formatting existing device

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




> 2022年7月4日 23:13,Andrea Tomassetti <andrea.tomassetti-opensource@xxxxxxxx> 写道:
> 
> Introducing a bcache control device (/dev/bcache_ctrl)
> that allows communicating to the driver from user space
> via IOCTL. The only IOCTL commands currently implemented,
> receives a struct cache_sb and uses it to register the
> backing device without any need of formatting them.
> 
> Signed-off-by: Andrea Tomassetti <andrea.tomassetti-opensource@xxxxxxxx>
> ---
> Hi all,
> At Devo we started to think of using bcache in our production systems
> to boost performance. But, at the very beginning, we were faced with
> one annoying issue, at least for our use-case: bcache needs the backing
> devices to be formatted before being able to use them. What's really
> needed is just to wipe any FS signature out of them. This is definitely
> not an option we will consider in our production systems because we would
> need to move hundreds of terabytes of data.
> 
> To circumvent the "formatting" problem, in the past weeks I worked on some
> modifications to the actual bcache module. In particular, I added a bcache
> control device (exported to /dev/bcache_ctrl) that allows communicating to
> the driver from userspace via IOCTL. One of the IOCTL commands that I
> implemented receives a struct cache_sb and uses it to register the backing
> device. The modifications are really small and retro compatible. To then
> re-create the same configuration after every boot (because the backing
> devices now do not present the bcache super block anymore) I created an
> udev rule that invokes a python script that will re-create the same
> scenario based on a yaml configuration file.
> 
> I'm re-sending this patch without any confidentiality footer, sorry for that.

Thanks for removing that confidential and legal statement, that’s the reason I was not able to reply your email.

Back to the patch, I don’t support this idea. For the problem you are solving, indeed people uses device mapper linear target for many years, and it works perfectly without any code modification.

That is, create a 8K image and set it as a loop device, then write a dm table to combine it with the existing hard drive. Then you run “bcache make -B <combined dm target>”, you will get a bcache device whose first 8K in the loop device and existing super block of the hard driver located at expected offset.

It is unnecessary to create a new IOCTL interface, and I feel the way how it works is really unconvinced for potential security risk.

Thanks.

Coly Li

> 
> drivers/md/bcache/Makefile      |   2 +-
> drivers/md/bcache/control.c     | 117 ++++++++++++++++++++++++++++++++
> drivers/md/bcache/control.h     |  12 ++++
> drivers/md/bcache/ioctl_codes.h |  19 ++++++
> drivers/md/bcache/super.c       |  50 +++++++++++---
> 5 files changed, 189 insertions(+), 11 deletions(-)
> create mode 100644 drivers/md/bcache/control.c
> create mode 100644 drivers/md/bcache/control.h
> create mode 100644 drivers/md/bcache/ioctl_codes.h
> 
> diff --git a/drivers/md/bcache/Makefile b/drivers/md/bcache/Makefile
> index 5b87e59676b8..46ed41baed7a 100644
> --- a/drivers/md/bcache/Makefile
> +++ b/drivers/md/bcache/Makefile
> @@ -4,4 +4,4 @@ obj-$(CONFIG_BCACHE)	+= bcache.o
> 
> bcache-y		:= alloc.o bset.o btree.o closure.o debug.o extents.o\
> 	io.o journal.o movinggc.o request.o stats.o super.o sysfs.o trace.o\
> -	util.o writeback.o features.o
> +	util.o writeback.o features.o control.o
> diff --git a/drivers/md/bcache/control.c b/drivers/md/bcache/control.c
> new file mode 100644
> index 000000000000..69b5e554d093
> --- /dev/null
> +++ b/drivers/md/bcache/control.c
> @@ -0,0 +1,117 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#include <linux/cdev.h>
> +#include <linux/fs.h>
> +#include <linux/vmalloc.h>
> +
> +#include "control.h"
> +
> +struct bch_ctrl_device {
> +	struct cdev cdev;
> +	struct class *class;
> +	dev_t dev;
> +};
> +
> +static struct bch_ctrl_device _control_device;
> +
> +/* this handles IOCTL for /dev/bcache_ctrl */
> +/*********************************************/
> +static long bch_service_ioctl_ctrl(struct file *filp, unsigned int cmd,
> +		unsigned long arg)
> +{
> +	int retval = 0;
> +
> +	if (_IOC_TYPE(cmd) != BCH_IOCTL_MAGIC)
> +		return -EINVAL;
> +
> +	if (!capable(CAP_SYS_ADMIN)) {
> +		/* Must be root to issue ioctls */
> +		return -EPERM;
> +	}
> +
> +	switch (cmd) {
> +	case BCH_IOCTL_REGISTER_DEVICE: {
> +		struct bch_register_device *cmd_info;
> +
> +		cmd_info = vmalloc(sizeof(struct bch_register_device));
> +		if (!cmd_info)
> +			return -ENOMEM;
> +
> +		if (copy_from_user(cmd_info, (void __user *)arg,
> +				sizeof(struct bch_register_device))) {
> +			pr_err("Cannot copy cmd info from user space\n");
> +			vfree(cmd_info);
> +			return -EINVAL;
> +		}
> +
> +		retval = register_bcache_ioctl(cmd_info);
> +
> +		vfree(cmd_info);
> +		return retval;
> +	}
> +
> +	default:
> +		return -EINVAL;
> +	}
> +}
> +
> +static const struct file_operations _ctrl_dev_fops = {
> +	.owner = THIS_MODULE,
> +	.unlocked_ioctl = bch_service_ioctl_ctrl
> +};
> +
> +int __init bch_ctrl_device_init(void)
> +{
> +	struct bch_ctrl_device *ctrl = &_control_device;
> +	struct device *device;
> +	int result = 0;
> +
> +	result = alloc_chrdev_region(&ctrl->dev, 0, 1, "bcache");
> +	if (result) {
> +		pr_err("Cannot allocate control chrdev number.\n");
> +		goto error_alloc_chrdev_region;
> +	}
> +
> +	cdev_init(&ctrl->cdev, &_ctrl_dev_fops);
> +
> +	result = cdev_add(&ctrl->cdev, ctrl->dev, 1);
> +	if (result) {
> +		pr_err("Cannot add control chrdev.\n");
> +		goto error_cdev_add;
> +	}
> +
> +	ctrl->class = class_create(THIS_MODULE, "bcache");
> +	if (IS_ERR(ctrl->class)) {
> +		pr_err("Cannot create control chrdev class.\n");
> +		result = PTR_ERR(ctrl->class);
> +		goto error_class_create;
> +	}
> +
> +	device = device_create(ctrl->class, NULL, ctrl->dev, NULL,
> +			"bcache_ctrl");
> +	if (IS_ERR(device)) {
> +		pr_err("Cannot create control chrdev.\n");
> +		result = PTR_ERR(device);
> +		goto error_device_create;
> +	}
> +
> +	return result;
> +
> +error_device_create:
> +	class_destroy(ctrl->class);
> +error_class_create:
> +	cdev_del(&ctrl->cdev);
> +error_cdev_add:
> +	unregister_chrdev_region(ctrl->dev, 1);
> +error_alloc_chrdev_region:
> +	return result;
> +}
> +
> +void bch_ctrl_device_deinit(void)
> +{
> +	struct bch_ctrl_device *ctrl = &_control_device;
> +
> +	device_destroy(ctrl->class, ctrl->dev);
> +	class_destroy(ctrl->class);
> +	cdev_del(&ctrl->cdev);
> +	unregister_chrdev_region(ctrl->dev, 1);
> +}
> diff --git a/drivers/md/bcache/control.h b/drivers/md/bcache/control.h
> new file mode 100644
> index 000000000000..3e4273db02a3
> --- /dev/null
> +++ b/drivers/md/bcache/control.h
> @@ -0,0 +1,12 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef __BCACHE_CONTROL_H__
> +#define __BCACHE_CONTROL_H__
> +
> +#include "ioctl_codes.h"
> +
> +int __init bch_ctrl_device_init(void);
> +void bch_ctrl_device_deinit(void);
> +
> +ssize_t register_bcache_ioctl(struct bch_register_device *brd);
> +
> +#endif
> diff --git a/drivers/md/bcache/ioctl_codes.h b/drivers/md/bcache/ioctl_codes.h
> new file mode 100644
> index 000000000000..b004d60c29ff
> --- /dev/null
> +++ b/drivers/md/bcache/ioctl_codes.h
> @@ -0,0 +1,19 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef __BCACHE_IOCTL_CODES_H__
> +#define __BCACHE_IOCTL_CODES_H__
> +
> +#include <linux/ioctl.h>
> +#include <linux/types.h>
> +
> +struct bch_register_device {
> +	const char *dev_name;
> +	size_t size;
> +	struct cache_sb *sb;
> +};
> +
> +#define BCH_IOCTL_MAGIC (0xBC)
> +
> +/* Register a new backing device */
> +#define BCH_IOCTL_REGISTER_DEVICE	_IOWR(BCH_IOCTL_MAGIC, 1, struct bch_register_device)
> +
> +#endif
> diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
> index 140f35dc0c45..339a11d00fef 100644
> --- a/drivers/md/bcache/super.c
> +++ b/drivers/md/bcache/super.c
> @@ -14,6 +14,7 @@
> #include "request.h"
> #include "writeback.h"
> #include "features.h"
> +#include "control.h"
> 
> #include <linux/blkdev.h>
> #include <linux/pagemap.h>
> @@ -340,6 +341,9 @@ void bch_write_bdev_super(struct cached_dev *dc, struct closure *parent)
> 	struct closure *cl = &dc->sb_write;
> 	struct bio *bio = &dc->sb_bio;
> 
> +	if (!dc->sb_disk)
> +		return;
> +
> 	down(&dc->sb_write_mutex);
> 	closure_init(cl, parent);
> 
> @@ -2403,14 +2407,14 @@ static int register_cache(struct cache_sb *sb, struct cache_sb_disk *sb_disk,
> 
> /* Global interfaces/init */
> 
> -static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
> +static ssize_t register_bcache_sysfs(struct kobject *k, struct kobj_attribute *attr,
> 			       const char *buffer, size_t size);
> static ssize_t bch_pending_bdevs_cleanup(struct kobject *k,
> 					 struct kobj_attribute *attr,
> 					 const char *buffer, size_t size);
> 
> -kobj_attribute_write(register,		register_bcache);
> -kobj_attribute_write(register_quiet,	register_bcache);
> +kobj_attribute_write(register,		register_bcache_sysfs);
> +kobj_attribute_write(register_quiet,	register_bcache_sysfs);
> kobj_attribute_write(pendings_cleanup,	bch_pending_bdevs_cleanup);
> 
> static bool bch_is_open_backing(dev_t dev)
> @@ -2465,7 +2469,8 @@ static void register_bdev_worker(struct work_struct *work)
> 	dc = kzalloc(sizeof(*dc), GFP_KERNEL);
> 	if (!dc) {
> 		fail = true;
> -		put_page(virt_to_page(args->sb_disk));
> +		if (args->sb_disk)
> +			put_page(virt_to_page(args->sb_disk));
> 		blkdev_put(args->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
> 		goto out;
> 	}
> @@ -2495,7 +2500,8 @@ static void register_cache_worker(struct work_struct *work)
> 	ca = kzalloc(sizeof(*ca), GFP_KERNEL);
> 	if (!ca) {
> 		fail = true;
> -		put_page(virt_to_page(args->sb_disk));
> +		if (args->sb_disk)
> +			put_page(virt_to_page(args->sb_disk));
> 		blkdev_put(args->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
> 		goto out;
> 	}
> @@ -2525,7 +2531,7 @@ static void register_device_async(struct async_reg_args *args)
> 	queue_delayed_work(system_wq, &args->reg_work, 10);
> }
> 
> -static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
> +static ssize_t register_bcache_common(void *k, struct kobj_attribute *attr,
> 			       const char *buffer, size_t size)
> {
> 	const char *err;
> @@ -2587,9 +2593,14 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
> 	if (set_blocksize(bdev, 4096))
> 		goto out_blkdev_put;
> 
> -	err = read_super(sb, bdev, &sb_disk);
> -	if (err)
> -		goto out_blkdev_put;
> +	if (!k) {
> +		err = read_super(sb, bdev, &sb_disk);
> +		if (err)
> +			goto out_blkdev_put;
> +	} else {
> +		sb_disk =  NULL;
> +		memcpy(sb, (struct cache_sb *)k, sizeof(struct cache_sb));
> +	}
> 
> 	err = "failed to register device";
> 
> @@ -2651,7 +2662,8 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
> 	return size;
> 
> out_put_sb_page:
> -	put_page(virt_to_page(sb_disk));
> +	if (!k)
> +		put_page(virt_to_page(sb_disk));
> out_blkdev_put:
> 	blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
> out_free_sb:
> @@ -2666,6 +2678,18 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
> 	return ret;
> }
> 
> +static ssize_t register_bcache_sysfs(struct kobject *k, struct kobj_attribute *attr,
> +			       const char *buffer, size_t size)
> +{
> +	return register_bcache_common(NULL, attr, buffer, size);
> +}
> +
> +ssize_t register_bcache_ioctl(struct bch_register_device *brd)
> +{
> +	return register_bcache_common((void *)brd->sb, NULL, brd->dev_name, brd->size);
> +}
> +
> +
> 
> struct pdev {
> 	struct list_head list;
> @@ -2819,6 +2843,7 @@ static void bcache_exit(void)
> {
> 	bch_debug_exit();
> 	bch_request_exit();
> +	bch_ctrl_device_deinit();
> 	if (bcache_kobj)
> 		kobject_put(bcache_kobj);
> 	if (bcache_wq)
> @@ -2918,6 +2943,11 @@ static int __init bcache_init(void)
> 	bch_debug_init();
> 	closure_debug_init();
> 
> +	if (bch_ctrl_device_init()) {
> +		pr_err("Cannot initialize control device\n");
> +		goto err;
> +	}
> +
> 	bcache_is_reboot = false;
> 
> 	return 0;
> --
> 2.37.0
> 





[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Index of Archives]     [Linux ARM Kernel]     [Linux Filesystem Development]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Security]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [ECOS]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux