Re: [PATCH v3] bcache: Use bcache without formatting existing device

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



ping...  any comments?

Hi all,
At Devo we started to think of using bcache in our production systems
to boost performance. But, at the very beginning, we were faced with
one annoying issue, at least for our use-case: bcache needs the backing
devices to be formatted before being able to use them. What's really
needed is just to wipe any FS signature out of them. This is definitely
not an option we will consider in our production systems because we would
need to move hundreds of terabytes of data.

To circumvent the "formatting" problem, in the past weeks I worked on some
modifications to the actual bcache module. In particular, I added a bcache
control device (exported to /dev/bcache_ctrl) that allows communicating to
the driver from userspace via IOCTL. One of the IOCTL commands that I
implemented receives a struct cache_sb and uses it to register the backing
device. The modifications are really small and retro compatible. To then
re-create the same configuration after every boot (because the backing
devices now do not present the bcache super block anymore) I created an
udev rule that invokes a python script that will re-create the same
scenario based on a yaml configuration file.

On Thu, Mar 10, 2022 at 12:44 PM Andrea Tomassetti
<andrea.tomassetti@xxxxxxxx> wrote:
>
> v3: fix build warning reported by kernel test robot
>
> >> drivers/md/bcache/control.c:18:6: warning: no
>    previous prototype for function 'bch_service_ioctl_ctrl'
>    [-Wmissing-prototypes]
>    long bch_service_ioctl_ctrl(struct file *filp, unsigned int cmd,
>         ^
>    drivers/md/bcache/control.c:18:1: note: declare 'static' if the
>    function is not intended to be used outside of this translation unit
>    long bch_service_ioctl_ctrl(struct file *filp, unsigned int cmd,
>    ^
>    static
>    1 warning generated.
>
> Reported-by: kernel test robot <lkp@xxxxxxxxx>
>
> v2: Fixed small typos
>
> Introducing a bcache control device (/dev/bcache_ctrl)
> that allows communicating to the driver from user space
> via IOCTL. The only IOCTL commands currently implemented,
> receives a struct cache_sb and uses it to register the
> backing device.
>
> Signed-off-by: Andrea Tomassetti <andrea.tomassetti@xxxxxxxx>
> ---
>  drivers/md/bcache/Makefile      |   2 +-
>  drivers/md/bcache/control.c     | 117 ++++++++++++++++++++++++++++++++
>  drivers/md/bcache/control.h     |  12 ++++
>  drivers/md/bcache/ioctl_codes.h |  19 ++++++
>  drivers/md/bcache/super.c       |  62 ++++++++++++-----
>  drivers/md/bcache/sysfs.c       |   4 ++
>  drivers/md/bcache/writeback.h   |   2 +-
>  7 files changed, 200 insertions(+), 18 deletions(-)
>  create mode 100644 drivers/md/bcache/control.c
>  create mode 100644 drivers/md/bcache/control.h
>  create mode 100644 drivers/md/bcache/ioctl_codes.h
>
> diff --git a/drivers/md/bcache/Makefile b/drivers/md/bcache/Makefile
> index 5b87e59676b8..46ed41baed7a 100644
> --- a/drivers/md/bcache/Makefile
> +++ b/drivers/md/bcache/Makefile
> @@ -4,4 +4,4 @@ obj-$(CONFIG_BCACHE)    += bcache.o
>
>  bcache-y               := alloc.o bset.o btree.o closure.o debug.o extents.o\
>         io.o journal.o movinggc.o request.o stats.o super.o sysfs.o trace.o\
> -       util.o writeback.o features.o
> +       util.o writeback.o features.o control.o
> diff --git a/drivers/md/bcache/control.c b/drivers/md/bcache/control.c
> new file mode 100644
> index 000000000000..69b5e554d093
> --- /dev/null
> +++ b/drivers/md/bcache/control.c
> @@ -0,0 +1,117 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#include <linux/cdev.h>
> +#include <linux/fs.h>
> +#include <linux/vmalloc.h>
> +
> +#include "control.h"
> +
> +struct bch_ctrl_device {
> +       struct cdev cdev;
> +       struct class *class;
> +       dev_t dev;
> +};
> +
> +static struct bch_ctrl_device _control_device;
> +
> +/* this handles IOCTL for /dev/bcache_ctrl */
> +/*********************************************/
> +static long bch_service_ioctl_ctrl(struct file *filp, unsigned int cmd,
> +               unsigned long arg)
> +{
> +       int retval = 0;
> +
> +       if (_IOC_TYPE(cmd) != BCH_IOCTL_MAGIC)
> +               return -EINVAL;
> +
> +       if (!capable(CAP_SYS_ADMIN)) {
> +               /* Must be root to issue ioctls */
> +               return -EPERM;
> +       }
> +
> +       switch (cmd) {
> +       case BCH_IOCTL_REGISTER_DEVICE: {
> +               struct bch_register_device *cmd_info;
> +
> +               cmd_info = vmalloc(sizeof(struct bch_register_device));
> +               if (!cmd_info)
> +                       return -ENOMEM;
> +
> +               if (copy_from_user(cmd_info, (void __user *)arg,
> +                               sizeof(struct bch_register_device))) {
> +                       pr_err("Cannot copy cmd info from user space\n");
> +                       vfree(cmd_info);
> +                       return -EINVAL;
> +               }
> +
> +               retval = register_bcache_ioctl(cmd_info);
> +
> +               vfree(cmd_info);
> +               return retval;
> +       }
> +
> +       default:
> +               return -EINVAL;
> +       }
> +}
> +
> +static const struct file_operations _ctrl_dev_fops = {
> +       .owner = THIS_MODULE,
> +       .unlocked_ioctl = bch_service_ioctl_ctrl
> +};
> +
> +int __init bch_ctrl_device_init(void)
> +{
> +       struct bch_ctrl_device *ctrl = &_control_device;
> +       struct device *device;
> +       int result = 0;
> +
> +       result = alloc_chrdev_region(&ctrl->dev, 0, 1, "bcache");
> +       if (result) {
> +               pr_err("Cannot allocate control chrdev number.\n");
> +               goto error_alloc_chrdev_region;
> +       }
> +
> +       cdev_init(&ctrl->cdev, &_ctrl_dev_fops);
> +
> +       result = cdev_add(&ctrl->cdev, ctrl->dev, 1);
> +       if (result) {
> +               pr_err("Cannot add control chrdev.\n");
> +               goto error_cdev_add;
> +       }
> +
> +       ctrl->class = class_create(THIS_MODULE, "bcache");
> +       if (IS_ERR(ctrl->class)) {
> +               pr_err("Cannot create control chrdev class.\n");
> +               result = PTR_ERR(ctrl->class);
> +               goto error_class_create;
> +       }
> +
> +       device = device_create(ctrl->class, NULL, ctrl->dev, NULL,
> +                       "bcache_ctrl");
> +       if (IS_ERR(device)) {
> +               pr_err("Cannot create control chrdev.\n");
> +               result = PTR_ERR(device);
> +               goto error_device_create;
> +       }
> +
> +       return result;
> +
> +error_device_create:
> +       class_destroy(ctrl->class);
> +error_class_create:
> +       cdev_del(&ctrl->cdev);
> +error_cdev_add:
> +       unregister_chrdev_region(ctrl->dev, 1);
> +error_alloc_chrdev_region:
> +       return result;
> +}
> +
> +void bch_ctrl_device_deinit(void)
> +{
> +       struct bch_ctrl_device *ctrl = &_control_device;
> +
> +       device_destroy(ctrl->class, ctrl->dev);
> +       class_destroy(ctrl->class);
> +       cdev_del(&ctrl->cdev);
> +       unregister_chrdev_region(ctrl->dev, 1);
> +}
> diff --git a/drivers/md/bcache/control.h b/drivers/md/bcache/control.h
> new file mode 100644
> index 000000000000..3e4273db02a3
> --- /dev/null
> +++ b/drivers/md/bcache/control.h
> @@ -0,0 +1,12 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef __BCACHE_CONTROL_H__
> +#define __BCACHE_CONTROL_H__
> +
> +#include "ioctl_codes.h"
> +
> +int __init bch_ctrl_device_init(void);
> +void bch_ctrl_device_deinit(void);
> +
> +ssize_t register_bcache_ioctl(struct bch_register_device *brd);
> +
> +#endif
> diff --git a/drivers/md/bcache/ioctl_codes.h b/drivers/md/bcache/ioctl_codes.h
> new file mode 100644
> index 000000000000..b004d60c29ff
> --- /dev/null
> +++ b/drivers/md/bcache/ioctl_codes.h
> @@ -0,0 +1,19 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef __BCACHE_IOCTL_CODES_H__
> +#define __BCACHE_IOCTL_CODES_H__
> +
> +#include <linux/ioctl.h>
> +#include <linux/types.h>
> +
> +struct bch_register_device {
> +       const char *dev_name;
> +       size_t size;
> +       struct cache_sb *sb;
> +};
> +
> +#define BCH_IOCTL_MAGIC (0xBC)
> +
> +/* Register a new backing device */
> +#define BCH_IOCTL_REGISTER_DEVICE      _IOWR(BCH_IOCTL_MAGIC, 1, struct bch_register_device)
> +
> +#endif
> diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
> index 140f35dc0c45..95db3785a6e0 100644
> --- a/drivers/md/bcache/super.c
> +++ b/drivers/md/bcache/super.c
> @@ -14,6 +14,7 @@
>  #include "request.h"
>  #include "writeback.h"
>  #include "features.h"
> +#include "control.h"
>
>  #include <linux/blkdev.h>
>  #include <linux/pagemap.h>
> @@ -1069,7 +1070,7 @@ int bch_cached_dev_run(struct cached_dev *dc)
>                 goto out;
>         }
>
> -       if (!d->c &&
> +       if (!d->c && dc->sb_disk &&
>             BDEV_STATE(&dc->sb) != BDEV_STATE_NONE) {
>                 struct closure cl;
>
> @@ -1259,9 +1260,6 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
>          */
>
>         if (bch_is_zero(u->uuid, 16)) {
> -               struct closure cl;
> -
> -               closure_init_stack(&cl);
>
>                 memcpy(u->uuid, dc->sb.uuid, 16);
>                 memcpy(u->label, dc->sb.label, SB_LABEL_SIZE);
> @@ -1271,8 +1269,14 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
>                 memcpy(dc->sb.set_uuid, c->set_uuid, 16);
>                 SET_BDEV_STATE(&dc->sb, BDEV_STATE_CLEAN);
>
> -               bch_write_bdev_super(dc, &cl);
> -               closure_sync(&cl);
> +               if (dc->sb_disk) {
> +                       struct closure cl;
> +
> +                       closure_init_stack(&cl);
> +                       bch_write_bdev_super(dc, &cl);
> +                       closure_sync(&cl);
> +               }
> +
>         } else {
>                 u->last_reg = rtime;
>                 bch_uuid_write(c);
> @@ -2403,14 +2407,14 @@ static int register_cache(struct cache_sb *sb, struct cache_sb_disk *sb_disk,
>
>  /* Global interfaces/init */
>
> -static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
> +static ssize_t register_bcache_sysfs(struct kobject *k, struct kobj_attribute *attr,
>                                const char *buffer, size_t size);
>  static ssize_t bch_pending_bdevs_cleanup(struct kobject *k,
>                                          struct kobj_attribute *attr,
>                                          const char *buffer, size_t size);
>
> -kobj_attribute_write(register,         register_bcache);
> -kobj_attribute_write(register_quiet,   register_bcache);
> +kobj_attribute_write(register,         register_bcache_sysfs);
> +kobj_attribute_write(register_quiet,   register_bcache_sysfs);
>  kobj_attribute_write(pendings_cleanup, bch_pending_bdevs_cleanup);
>
>  static bool bch_is_open_backing(dev_t dev)
> @@ -2465,7 +2469,8 @@ static void register_bdev_worker(struct work_struct *work)
>         dc = kzalloc(sizeof(*dc), GFP_KERNEL);
>         if (!dc) {
>                 fail = true;
> -               put_page(virt_to_page(args->sb_disk));
> +               if (args->sb_disk)
> +                       put_page(virt_to_page(args->sb_disk));
>                 blkdev_put(args->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
>                 goto out;
>         }
> @@ -2495,7 +2500,8 @@ static void register_cache_worker(struct work_struct *work)
>         ca = kzalloc(sizeof(*ca), GFP_KERNEL);
>         if (!ca) {
>                 fail = true;
> -               put_page(virt_to_page(args->sb_disk));
> +               if (args->sb_disk)
> +                       put_page(virt_to_page(args->sb_disk));
>                 blkdev_put(args->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
>                 goto out;
>         }
> @@ -2525,7 +2531,7 @@ static void register_device_async(struct async_reg_args *args)
>         queue_delayed_work(system_wq, &args->reg_work, 10);
>  }
>
> -static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
> +static ssize_t register_bcache_common(void *k, struct kobj_attribute *attr,
>                                const char *buffer, size_t size)
>  {
>         const char *err;
> @@ -2587,9 +2593,14 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
>         if (set_blocksize(bdev, 4096))
>                 goto out_blkdev_put;
>
> -       err = read_super(sb, bdev, &sb_disk);
> -       if (err)
> -               goto out_blkdev_put;
> +       if (!k) {
> +               err = read_super(sb, bdev, &sb_disk);
> +               if (err)
> +                       goto out_blkdev_put;
> +       } else {
> +               sb_disk =  NULL;
> +               memcpy(sb, (struct cache_sb *)k, sizeof(struct cache_sb));
> +       }
>
>         err = "failed to register device";
>
> @@ -2651,7 +2662,8 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
>         return size;
>
>  out_put_sb_page:
> -       put_page(virt_to_page(sb_disk));
> +       if (!k)
> +               put_page(virt_to_page(sb_disk));
>  out_blkdev_put:
>         blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
>  out_free_sb:
> @@ -2666,6 +2678,18 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
>         return ret;
>  }
>
> +static ssize_t register_bcache_sysfs(struct kobject *k, struct kobj_attribute *attr,
> +                              const char *buffer, size_t size)
> +{
> +       return register_bcache_common(NULL, attr, buffer, size);
> +}
> +
> +ssize_t register_bcache_ioctl(struct bch_register_device *brd)
> +{
> +       return register_bcache_common((void *)brd->sb, NULL, brd->dev_name, brd->size);
> +}
> +
> +
>
>  struct pdev {
>         struct list_head list;
> @@ -2819,6 +2843,7 @@ static void bcache_exit(void)
>  {
>         bch_debug_exit();
>         bch_request_exit();
> +       bch_ctrl_device_deinit();
>         if (bcache_kobj)
>                 kobject_put(bcache_kobj);
>         if (bcache_wq)
> @@ -2918,6 +2943,11 @@ static int __init bcache_init(void)
>         bch_debug_init();
>         closure_debug_init();
>
> +       if (bch_ctrl_device_init()) {
> +               pr_err("Cannot initialize control device\n");
> +               goto err;
> +       }
> +
>         bcache_is_reboot = false;
>
>         return 0;
> diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
> index 1f0dce30fa75..2df5b114e821 100644
> --- a/drivers/md/bcache/sysfs.c
> +++ b/drivers/md/bcache/sysfs.c
> @@ -379,6 +379,10 @@ STORE(__cached_dev)
>                 if (v < 0)
>                         return v;
>
> +               // Devices created via IOCTL don't support changing cache mode
> +               if (!dc->sb_disk)
> +                       return -EINVAL;
> +
>                 if ((unsigned int) v != BDEV_CACHE_MODE(&dc->sb)) {
>                         SET_BDEV_CACHE_MODE(&dc->sb, v);
>                         bch_write_bdev_super(dc, NULL);
> diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h
> index 02b2f9df73f6..bd7b95bd2da7 100644
> --- a/drivers/md/bcache/writeback.h
> +++ b/drivers/md/bcache/writeback.h
> @@ -135,7 +135,7 @@ static inline void bch_writeback_add(struct cached_dev *dc)
>  {
>         if (!atomic_read(&dc->has_dirty) &&
>             !atomic_xchg(&dc->has_dirty, 1)) {
> -               if (BDEV_STATE(&dc->sb) != BDEV_STATE_DIRTY) {
> +               if (dc->sb_disk && BDEV_STATE(&dc->sb) != BDEV_STATE_DIRTY) {
>                         SET_BDEV_STATE(&dc->sb, BDEV_STATE_DIRTY);
>                         /* XXX: should do this synchronously */
>                         bch_write_bdev_super(dc, NULL);
> --
> 2.25.1
>

-- 







The contents of this email are confidential. If the reader of this 
message is not the intended recipient, you are hereby notified that any 
dissemination, distribution or copying of this communication is strictly 
prohibited. If you have received this communication in error, please notify 
us immediately by replying to this message and deleting it from your 
computer. Thank you. Devo, Inc; arco@xxxxxxxx <mailto:arco@xxxxxxxx>;  
Calle Estébanez Calderón 3-5, 5th Floor. Madrid, Spain 28020





[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Index of Archives]     [Linux ARM Kernel]     [Linux Filesystem Development]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Security]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [ECOS]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux