Re: [PATCH 4/8] block: support delayed holder registration

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi,

On 04.08.2021 11:41, Christoph Hellwig wrote:
> device mapper needs to register holders before it is ready to do I/O.
> Currently it does so by registering the disk early, which can leave
> the disk and queue in a weird half state where the queue is registered
> with the disk, except for sysfs and the elevator.  And this state has
> been a bit promlematic before, and will get more so when sorting out
> the responsibilities between the queue and the disk.
>
> Support registering holders on an initialized but not registered disk
> instead by delaying the sysfs registration until the disk is registered.
>
> Signed-off-by: Christoph Hellwig <hch@xxxxxx>
> Reviewed-by: Mike Snitzer <snitzer@xxxxxxxxxx>

This patch landed in today's linux-next (20210810) as commit 
d62633873590 ("block: support delayed holder registration"). It triggers 
a following lockdep warning on ARM64's virt 'machine' on QEmu:

======================================================
WARNING: possible circular locking dependency detected
5.14.0-rc4+ #10642 Not tainted
------------------------------------------------------
systemd-udevd/227 is trying to acquire lock:
ffffb6b41952d628 (mtd_table_mutex){+.+.}-{3:3}, at: blktrans_open+0x40/0x250

but task is already holding lock:
ffff0eacc403bb18 (&disk->open_mutex){+.+.}-{3:3}, at: 
blkdev_get_by_dev+0x110/0x2f8

which lock already depends on the new lock.


the existing dependency chain (in reverse order) is:

-> #1 (&disk->open_mutex){+.+.}-{3:3}:
        __mutex_lock+0xa4/0x978
        mutex_lock_nested+0x54/0x60
        bd_register_pending_holders+0x2c/0x118
        __device_add_disk+0x1d8/0x368
        device_add_disk+0x10/0x18
        add_mtd_blktrans_dev+0x2dc/0x428
        mtdblock_add_mtd+0x68/0x98
        blktrans_notify_add+0x44/0x70
        add_mtd_device+0x430/0x4a0
        mtd_device_parse_register+0x1a4/0x2b0
        physmap_flash_probe+0x44c/0x780
        platform_probe+0x90/0xd8
        really_probe+0x138/0x2d0
        __driver_probe_device+0x78/0xd8
        driver_probe_device+0x40/0x110
        __driver_attach+0xcc/0x118
        bus_for_each_dev+0x68/0xc8
        driver_attach+0x20/0x28
        bus_add_driver+0x168/0x1f8
        driver_register+0x60/0x110
        __platform_driver_register+0x24/0x30
        physmap_init+0x18/0x20
        do_one_initcall+0x84/0x450
        kernel_init_freeable+0x31c/0x38c
        kernel_init+0x20/0x120
        ret_from_fork+0x10/0x18

-> #0 (mtd_table_mutex){+.+.}-{3:3}:
        __lock_acquire+0xff4/0x1840
        lock_acquire+0x130/0x3e8
        __mutex_lock+0xa4/0x978
        mutex_lock_nested+0x54/0x60
        blktrans_open+0x40/0x250
        blkdev_get_whole+0x28/0x120
        blkdev_get_by_dev+0x15c/0x2f8
        blkdev_open+0x50/0xb0
        do_dentry_open+0x238/0x3c0
        vfs_open+0x28/0x30
        path_openat+0x720/0x938
        do_filp_open+0x80/0x108
        do_sys_openat2+0x1b4/0x2c8
        do_sys_open+0x68/0x88
        __arm64_compat_sys_openat+0x1c/0x28
        invoke_syscall+0x40/0xf8
        el0_svc_common+0x60/0x100
        do_el0_svc_compat+0x1c/0x48
        el0_svc_compat+0x20/0x30
        el0t_32_sync_handler+0xec/0x140
        el0t_32_sync+0x168/0x16c

other info that might help us debug this:

  Possible unsafe locking scenario:

        CPU0                    CPU1
        ----                    ----
   lock(&disk->open_mutex);
                                lock(mtd_table_mutex);
                                lock(&disk->open_mutex);
   lock(mtd_table_mutex);

  *** DEADLOCK ***

1 lock held by systemd-udevd/227:
  #0: ffff0eacc403bb18 (&disk->open_mutex){+.+.}-{3:3}, at: 
blkdev_get_by_dev+0x110/0x2f8

stack backtrace:
CPU: 1 PID: 227 Comm: systemd-udevd Not tainted 5.14.0-rc4+ #10642
Hardware name: linux,dummy-virt (DT)
Call trace:
  dump_backtrace+0x0/0x1d0
  show_stack+0x14/0x20
  dump_stack_lvl+0x88/0xb0
  dump_stack+0x14/0x2c
  print_circular_bug.isra.50+0x1ac/0x200
  check_noncircular+0x134/0x148
  __lock_acquire+0xff4/0x1840
  lock_acquire+0x130/0x3e8
  __mutex_lock+0xa4/0x978
  mutex_lock_nested+0x54/0x60
  blktrans_open+0x40/0x250
  blkdev_get_whole+0x28/0x120
  blkdev_get_by_dev+0x15c/0x2f8
  blkdev_open+0x50/0xb0
  do_dentry_open+0x238/0x3c0
  vfs_open+0x28/0x30
  path_openat+0x720/0x938
  do_filp_open+0x80/0x108
  do_sys_openat2+0x1b4/0x2c8
  do_sys_open+0x68/0x88
  __arm64_compat_sys_openat+0x1c/0x28
  invoke_syscall+0x40/0xf8
  el0_svc_common+0x60/0x100
  do_el0_svc_compat+0x1c/0x48
  el0_svc_compat+0x20/0x30
  el0t_32_sync_handler+0xec/0x140
  el0t_32_sync+0x168/0x16c

If this is a false positive, then it should be annotated as such.

> ---
>   block/genhd.c         | 10 +++++++
>   block/holder.c        | 68 ++++++++++++++++++++++++++++++++-----------
>   include/linux/genhd.h |  5 ++++
>   3 files changed, 66 insertions(+), 17 deletions(-)
>
> diff --git a/block/genhd.c b/block/genhd.c
> index cd4eab744667..db916f779077 100644
> --- a/block/genhd.c
> +++ b/block/genhd.c
> @@ -447,6 +447,16 @@ static void register_disk(struct device *parent, struct gendisk *disk,
>   		kobject_create_and_add("holders", &ddev->kobj);
>   	disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj);
>   
> +	/*
> +	 * XXX: this is a mess, can't wait for real error handling in add_disk.
> +	 * Make sure ->slave_dir is NULL if we failed some of the registration
> +	 * so that the cleanup in bd_unlink_disk_holder works properly.
> +	 */
> +	if (bd_register_pending_holders(disk) < 0) {
> +		kobject_put(disk->slave_dir);
> +		disk->slave_dir = NULL;
> +	}
> +
>   	if (disk->flags & GENHD_FL_HIDDEN)
>   		return;
>   
> diff --git a/block/holder.c b/block/holder.c
> index 11e65d99a9fb..4568cc4f6827 100644
> --- a/block/holder.c
> +++ b/block/holder.c
> @@ -28,6 +28,19 @@ static void del_symlink(struct kobject *from, struct kobject *to)
>   	sysfs_remove_link(from, kobject_name(to));
>   }
>   
> +static int __link_disk_holder(struct block_device *bdev, struct gendisk *disk)
> +{
> +	int ret;
> +
> +	ret = add_symlink(disk->slave_dir, bdev_kobj(bdev));
> +	if (ret)
> +		return ret;
> +	ret = add_symlink(bdev->bd_holder_dir, &disk_to_dev(disk)->kobj);
> +	if (ret)
> +		del_symlink(disk->slave_dir, bdev_kobj(bdev));
> +	return ret;
> +}
> +
>   /**
>    * bd_link_disk_holder - create symlinks between holding disk and slave bdev
>    * @bdev: the claimed slave bdev
> @@ -66,7 +79,7 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
>   	WARN_ON_ONCE(!bdev->bd_holder);
>   
>   	/* FIXME: remove the following once add_disk() handles errors */
> -	if (WARN_ON(!disk->slave_dir || !bdev->bd_holder_dir))
> +	if (WARN_ON(!bdev->bd_holder_dir))
>   		goto out_unlock;
>   
>   	holder = bd_find_holder_disk(bdev, disk);
> @@ -84,28 +97,28 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
>   	INIT_LIST_HEAD(&holder->list);
>   	holder->bdev = bdev;
>   	holder->refcnt = 1;
> -
> -	ret = add_symlink(disk->slave_dir, bdev_kobj(bdev));
> -	if (ret)
> -		goto out_free;
> -
> -	ret = add_symlink(bdev->bd_holder_dir, &disk_to_dev(disk)->kobj);
> -	if (ret)
> -		goto out_del;
> +	if (disk->slave_dir) {
> +		ret = __link_disk_holder(bdev, disk);
> +		if (ret) {
> +			kfree(holder);
> +			goto out_unlock;
> +		}
> +	}
>   
>   	list_add(&holder->list, &disk->slave_bdevs);
> -	goto out_unlock;
> -
> -out_del:
> -	del_symlink(disk->slave_dir, bdev_kobj(bdev));
> -out_free:
> -	kfree(holder);
>   out_unlock:
>   	mutex_unlock(&disk->open_mutex);
>   	return ret;
>   }
>   EXPORT_SYMBOL_GPL(bd_link_disk_holder);
>   
> +static void __unlink_disk_holder(struct block_device *bdev,
> +		struct gendisk *disk)
> +{
> +	del_symlink(disk->slave_dir, bdev_kobj(bdev));
> +	del_symlink(bdev->bd_holder_dir, &disk_to_dev(disk)->kobj);
> +}
> +
>   /**
>    * bd_unlink_disk_holder - destroy symlinks created by bd_link_disk_holder()
>    * @bdev: the calimed slave bdev
> @@ -123,11 +136,32 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
>   	mutex_lock(&disk->open_mutex);
>   	holder = bd_find_holder_disk(bdev, disk);
>   	if (!WARN_ON_ONCE(holder == NULL) && !--holder->refcnt) {
> -		del_symlink(disk->slave_dir, bdev_kobj(bdev));
> -		del_symlink(bdev->bd_holder_dir, &disk_to_dev(disk)->kobj);
> +		if (disk->slave_dir)
> +			__unlink_disk_holder(bdev, disk);
>   		list_del_init(&holder->list);
>   		kfree(holder);
>   	}
>   	mutex_unlock(&disk->open_mutex);
>   }
>   EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
> +
> +int bd_register_pending_holders(struct gendisk *disk)
> +{
> +	struct bd_holder_disk *holder;
> +	int ret;
> +
> +	mutex_lock(&disk->open_mutex);
> +	list_for_each_entry(holder, &disk->slave_bdevs, list) {
> +		ret = __link_disk_holder(holder->bdev, disk);
> +		if (ret)
> +			goto out_undo;
> +	}
> +	mutex_unlock(&disk->open_mutex);
> +	return 0;
> +
> +out_undo:
> +	list_for_each_entry_continue_reverse(holder, &disk->slave_bdevs, list)
> +		__unlink_disk_holder(holder->bdev, disk);
> +	mutex_unlock(&disk->open_mutex);
> +	return ret;
> +}
> diff --git a/include/linux/genhd.h b/include/linux/genhd.h
> index 0721807d76ee..80952f038d79 100644
> --- a/include/linux/genhd.h
> +++ b/include/linux/genhd.h
> @@ -323,6 +323,7 @@ long compat_blkdev_ioctl(struct file *, unsigned, unsigned long);
>   #ifdef CONFIG_BLOCK_HOLDER_DEPRECATED
>   int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk);
>   void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk);
> +int bd_register_pending_holders(struct gendisk *disk);
>   #else
>   static inline int bd_link_disk_holder(struct block_device *bdev,
>   				      struct gendisk *disk)
> @@ -333,6 +334,10 @@ static inline void bd_unlink_disk_holder(struct block_device *bdev,
>   					 struct gendisk *disk)
>   {
>   }
> +static inline int bd_register_pending_holders(struct gendisk *disk)
> +{
> +	return 0;
> +}
>   #endif /* CONFIG_BLOCK_HOLDER_DEPRECATED */
>   
>   dev_t part_devt(struct gendisk *disk, u8 partno);

Best regards
-- 
Marek Szyprowski, PhD
Samsung R&D Institute Poland




[Index of Archives]     [Linux RAID]     [Linux SCSI]     [Linux ATA RAID]     [IDE]     [Linux Wireless]     [Linux Kernel]     [ATH6KL]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Device Mapper]

  Powered by Linux