Re: [PATCH stable 5.10 1/3] block: look up holders by bdev

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, Aug 01, 2022 at 09:39:30PM +0800, Yu Kuai wrote:
> Hi, Greg
> 
> 在 2022/08/01 21:17, Greg KH 写道:
> > On Mon, Aug 01, 2022 at 08:25:27PM +0800, Yu Kuai wrote:
> > > Hi, Greg
> > > 
> > > 在 2022/08/01 19:19, Greg KH 写道:
> > > > On Fri, Jul 29, 2022 at 02:23:54PM +0800, Yu Kuai wrote:
> > > > > From: Christoph Hellwig <hch@xxxxxx>
> > > > > 
> > > > > commit 0dbcfe247f22a6d73302dfa691c48b3c14d31c4c upstream.
> > > > > 
> > > > > Invert they way the holder relations are tracked.  This very
> > > > > slightly reduces the memory overhead for partitioned devices.
> > > > > 
> > > > > Signed-off-by: Christoph Hellwig <hch@xxxxxx>
> > > > > Signed-off-by: Yu Kuai <yukuai3@xxxxxxxxxx>
> > > > > ---
> > > > >    block/genhd.c             |  3 +++
> > > > >    fs/block_dev.c            | 31 +++++++++++++++++++------------
> > > > >    include/linux/blk_types.h |  3 ---
> > > > >    include/linux/genhd.h     |  4 +++-
> > > > >    4 files changed, 25 insertions(+), 16 deletions(-)
> > > > > 
> > > > > diff --git a/block/genhd.c b/block/genhd.c
> > > > > index 796baf761202..2b11a2735285 100644
> > > > > --- a/block/genhd.c
> > > > > +++ b/block/genhd.c
> > > > > @@ -1760,6 +1760,9 @@ struct gendisk *__alloc_disk_node(int minors, int node_id)
> > > > >    	disk_to_dev(disk)->class = &block_class;
> > > > >    	disk_to_dev(disk)->type = &disk_type;
> > > > >    	device_initialize(disk_to_dev(disk));
> > > > > +#ifdef CONFIG_SYSFS
> > > > > +	INIT_LIST_HEAD(&disk->slave_bdevs);
> > > > > +#endif
> > > > >    	return disk;
> > > > >    out_free_part0:
> > > > > diff --git a/fs/block_dev.c b/fs/block_dev.c
> > > > > index 29f020c4b2d0..a202c76fcf7f 100644
> > > > > --- a/fs/block_dev.c
> > > > > +++ b/fs/block_dev.c
> > > > > @@ -823,9 +823,6 @@ static void init_once(void *foo)
> > > > >    	memset(bdev, 0, sizeof(*bdev));
> > > > >    	mutex_init(&bdev->bd_mutex);
> > > > > -#ifdef CONFIG_SYSFS
> > > > > -	INIT_LIST_HEAD(&bdev->bd_holder_disks);
> > > > > -#endif
> > > > >    	bdev->bd_bdi = &noop_backing_dev_info;
> > > > >    	inode_init_once(&ei->vfs_inode);
> > > > >    	/* Initialize mutex for freeze. */
> > > > > @@ -1188,7 +1185,7 @@ EXPORT_SYMBOL(bd_abort_claiming);
> > > > >    #ifdef CONFIG_SYSFS
> > > > >    struct bd_holder_disk {
> > > > >    	struct list_head	list;
> > > > > -	struct gendisk		*disk;
> > > > > +	struct block_device	*bdev;
> > > > >    	int			refcnt;
> > > > >    };
> > > > > @@ -1197,8 +1194,8 @@ static struct bd_holder_disk *bd_find_holder_disk(struct block_device *bdev,
> > > > >    {
> > > > >    	struct bd_holder_disk *holder;
> > > > > -	list_for_each_entry(holder, &bdev->bd_holder_disks, list)
> > > > > -		if (holder->disk == disk)
> > > > > +	list_for_each_entry(holder, &disk->slave_bdevs, list)
> > > > > +		if (holder->bdev == bdev)
> > > > >    			return holder;
> > > > >    	return NULL;
> > > > >    }
> > > > > @@ -1244,9 +1241,13 @@ static void del_symlink(struct kobject *from, struct kobject *to)
> > > > >    int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
> > > > >    {
> > > > >    	struct bd_holder_disk *holder;
> > > > > +	struct block_device *bdev_holder = bdget_disk(disk, 0);
> > > > >    	int ret = 0;
> > > > > -	mutex_lock(&bdev->bd_mutex);
> > > > > +	if (WARN_ON_ONCE(!bdev_holder))
> > > > > +		return -ENOENT;
> > > > > +
> > > > > +	mutex_lock(&bdev_holder->bd_mutex);
> > > > >    	WARN_ON_ONCE(!bdev->bd_holder);
> > > > > @@ -1267,7 +1268,7 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
> > > > >    	}
> > > > >    	INIT_LIST_HEAD(&holder->list);
> > > > > -	holder->disk = disk;
> > > > > +	holder->bdev = bdev;
> > > > >    	holder->refcnt = 1;
> > > > >    	ret = add_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
> > > > > @@ -1283,7 +1284,7 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
> > > > >    	 */
> > > > >    	kobject_get(bdev->bd_part->holder_dir);
> > > > > -	list_add(&holder->list, &bdev->bd_holder_disks);
> > > > > +	list_add(&holder->list, &disk->slave_bdevs);
> > > > >    	goto out_unlock;
> > > > >    out_del:
> > > > > @@ -1291,7 +1292,8 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
> > > > >    out_free:
> > > > >    	kfree(holder);
> > > > >    out_unlock:
> > > > > -	mutex_unlock(&bdev->bd_mutex);
> > > > > +	mutex_unlock(&bdev_holder->bd_mutex);
> > > > > +	bdput(bdev_holder);
> > > > >    	return ret;
> > > > >    }
> > > > >    EXPORT_SYMBOL_GPL(bd_link_disk_holder);
> > > > > @@ -1309,8 +1311,12 @@ EXPORT_SYMBOL_GPL(bd_link_disk_holder);
> > > > >    void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
> > > > >    {
> > > > >    	struct bd_holder_disk *holder;
> > > > > +	struct block_device *bdev_holder = bdget_disk(disk, 0);
> > > > > -	mutex_lock(&bdev->bd_mutex);
> > > > > +	if (WARN_ON_ONCE(!bdev_holder))
> > > > > +		return;
> > > > > +
> > > > > +	mutex_lock(&bdev_holder->bd_mutex);
> > > > >    	holder = bd_find_holder_disk(bdev, disk);
> > > > > @@ -1323,7 +1329,8 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
> > > > >    		kfree(holder);
> > > > >    	}
> > > > > -	mutex_unlock(&bdev->bd_mutex);
> > > > > +	mutex_unlock(&bdev_holder->bd_mutex);
> > > > > +	bdput(bdev_holder);
> > > > >    }
> > > > >    EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
> > > > >    #endif
> > > > > diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
> > > > > index d9b69bbde5cc..1b84ecb34c18 100644
> > > > > --- a/include/linux/blk_types.h
> > > > > +++ b/include/linux/blk_types.h
> > > > > @@ -29,9 +29,6 @@ struct block_device {
> > > > >    	void *			bd_holder;
> > > > >    	int			bd_holders;
> > > > >    	bool			bd_write_holder;
> > > > > -#ifdef CONFIG_SYSFS
> > > > > -	struct list_head	bd_holder_disks;
> > > > > -#endif
> > > > >    	struct block_device *	bd_contains;
> > > > >    	u8			bd_partno;
> > > > >    	struct hd_struct *	bd_part;
> > > > > diff --git a/include/linux/genhd.h b/include/linux/genhd.h
> > > > > index 03da3f603d30..3e5049a527e6 100644
> > > > > --- a/include/linux/genhd.h
> > > > > +++ b/include/linux/genhd.h
> > > > > @@ -195,7 +195,9 @@ struct gendisk {
> > > > >    #define GD_NEED_PART_SCAN		0
> > > > >    	struct rw_semaphore lookup_sem;
> > > > >    	struct kobject *slave_dir;
> > > > > -
> > > > > +#ifdef CONFIG_SYSFS
> > > > > +	struct list_head	slave_bdevs;
> > > > > +#endif
> > > > 
> > > > This is very different from the upstream version, and forces the change
> > > > onto everyone, not just those who had CONFIG_BLOCK_HOLDER_DEPRECATED
> > > > enabled like was done in the main kernel tree.
> > > > 
> > > > Why force this on all and not just use the same option?
> > > > 
> > > > I would need a strong ACK from the original developers and maintainers
> > > > of these subsystems before being able to take these into the 5.10 tree.
> > > 
> > > Yes, I agree that Christoph must take a look first.
> > > > 
> > > > What prevents you from just using 5.15 for those systems that run into
> > > > these issues?
> > > 
> > > The null pointer problem is reported by our product(related to dm-
> > > mpath), and they had been using 5.10 for a long time. And switching
> > > kernel for commercial will require lots of work, it's not an option
> > > for now.
> > 
> > It should be the same validation and verification and testing path for
> > 5.15.y as for an intrusive kernel change as this one, right?  What makes
> > it any different to prevent 5.15 from being used?
> 
> No, they are not the same, if we try to use 5.15.y, we have to test all
> other stuff that our product is used currently(ext4, block, scsi and
> lots of other modules). With this patch, we only need to make sure
> dm works fine.

That is a very odd way to test a monolithic kernel image, where any
change in any part can affect any other part of the kernel.  You touched
the block layer, why you wouldn't think that all block-related things
would also have to be tested is very strange to me as those are directly
related.  And while you are at it, you should test all other subsystems
as the system is released as a whole, not as individual changes that are
not unrelated.

I think you need to revisit your testing strategy, good luck!

greg k-h



[Index of Archives]     [Linux Kernel]     [Kernel Development Newbies]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite Hiking]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux