Re: [RFC v4 linux-next 19/19] fs & block: remove bdev->bd_inode

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi,

在 2024/03/19 7:22, Christoph Hellwig 写道:
On Mon, Mar 18, 2024 at 03:19:03PM +0800, Yu Kuai wrote:
I come up with an ideal:

While opening the block_device the first time, store the generated new
file in "bd_inode->i_private". And release it after the last opener
close the block_device.

The advantages are:
  - multiple openers can share the same bdev_file;
  - raw block device ops can use the bdev_file as well, and there is no
need to distinguish iomap/buffer_head for raw block_device;

Please let me know what do you think?

That does sound very reasonable to me.

I just implement the ideal with following patch(not fully tested, just
boot and some blktests)

Please let me know what you think.
Thanks!
Kuai

diff --git a/block/bdev.c b/block/bdev.c
index d42a6bc73474..8bc8962c59a5 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -899,14 +899,6 @@ int bdev_open(struct block_device *bdev, blk_mode_t mode, void *holder,
        if (unblock_events)
                disk_unblock_events(disk);

-       bdev_file->f_flags |= O_LARGEFILE;
-       bdev_file->f_mode |= FMODE_BUF_RASYNC | FMODE_CAN_ODIRECT;
-       if (bdev_nowait(bdev))
-               bdev_file->f_mode |= FMODE_NOWAIT;
-       bdev_file->f_mapping = bdev_mapping(bdev);
-       bdev_file->f_wb_err = filemap_sample_wb_err(bdev_file->f_mapping);
-       bdev_file->private_data = holder;
-
        return 0;
 put_module:
        module_put(disk->fops->owner);
@@ -948,12 +940,66 @@ static unsigned blk_to_file_flags(blk_mode_t mode)
        return flags;
 }

+struct file *alloc_and_init_bdev_file(struct block_device *bdev,
+                                     blk_mode_t mode, void *holder)
+{
+ struct file *bdev_file = alloc_file_pseudo_noaccount(bdev_inode(bdev), + blockdev_mnt, "", blk_to_file_flags(mode) | O_LARGEFILE,
+                       &def_blk_fops);
+
+       if (IS_ERR(bdev_file))
+               return bdev_file;
+
+       bdev_file->f_flags |= O_LARGEFILE;
+       bdev_file->f_mode |= FMODE_BUF_RASYNC | FMODE_CAN_ODIRECT;
+       if (bdev_nowait(bdev))
+               bdev_file->f_mode |= FMODE_NOWAIT;
+       bdev_file->f_mapping = bdev_mapping(bdev);
+       bdev_file->f_wb_err = filemap_sample_wb_err(bdev_file->f_mapping);
+       bdev_file->private_data = holder;
+
+       return bdev_file;
+}
+
+void get_bdev_file(struct block_device *bdev, struct file *bdev_file)
+{
+       struct inode *bd_inode = bdev_inode(bdev);
+       struct file *file;
+
+       mutex_lock(&bdev->bd_disk->open_mutex);
+       file = bd_inode->i_private;
+
+       if (!file) {
+               get_file(bdev_file);
+               bd_inode->i_private = bdev_file;
+       } else {
+               get_file(file);
+       }
+
+       mutex_unlock(&bdev->bd_disk->open_mutex);
+}
+
+void put_bdev_file(struct block_device *bdev)
+{
+       struct file *file = NULL;
+       struct inode *bd_inode = bdev_inode(bdev);
+
+       mutex_lock(&bdev->bd_disk->open_mutex);
+       file = bd_inode->i_private;
+
+       if (!atomic_read(&bdev->bd_openers))
+               bd_inode->i_private = NULL;
+
+       mutex_unlock(&bdev->bd_disk->open_mutex);
+
+       fput(file);
+}
+
struct file *bdev_file_open_by_dev(dev_t dev, blk_mode_t mode, void *holder,
                                   const struct blk_holder_ops *hops)
 {
        struct file *bdev_file;
        struct block_device *bdev;
-       unsigned int flags;
        int ret;

        ret = bdev_permission(dev, mode, holder);
@@ -964,20 +1010,20 @@ struct file *bdev_file_open_by_dev(dev_t dev, blk_mode_t mode, void *holder,
        if (!bdev)
                return ERR_PTR(-ENXIO);

-       flags = blk_to_file_flags(mode);
-       bdev_file = alloc_file_pseudo_noaccount(bdev_inode(bdev),
- blockdev_mnt, "", flags | O_LARGEFILE, &def_blk_fops);
+       bdev_file = alloc_and_init_bdev_file(bdev, mode, holder);
        if (IS_ERR(bdev_file)) {
                blkdev_put_no_open(bdev);
                return bdev_file;
        }
        ihold(bdev_inode(bdev));
+       get_bdev_file(bdev, bdev_file);

        ret = bdev_open(bdev, mode, holder, hops, bdev_file);
        if (ret) {
/* We failed to open the block device. Let ->release() know. */
                bdev_file->private_data = ERR_PTR(ret);
                fput(bdev_file);
+               put_bdev_file(bdev);
                return ERR_PTR(ret);
        }
        return bdev_file;
@@ -1049,6 +1095,7 @@ void bdev_release(struct file *bdev_file)

        module_put(disk->fops->owner);
 put_no_open:
+       put_bdev_file(bdev);
        blkdev_put_no_open(bdev);
 }

diff --git a/block/blk.h b/block/blk.h
index 5ac293179bfb..ebe99dc9cff5 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -518,6 +518,10 @@ static inline int req_ref_read(struct request *req)
        return atomic_read(&req->ref);
 }

+struct file *alloc_and_init_bdev_file(struct block_device *bdev,
+                                     blk_mode_t mode, void *holder);
+void get_bdev_file(struct block_device *bdev, struct file *bdev_file);
+void put_bdev_file(struct block_device *bdev);
 void bdev_release(struct file *bdev_file);
 int bdev_open(struct block_device *bdev, blk_mode_t mode, void *holder,
              const struct blk_holder_ops *hops, struct file *bdev_file);
diff --git a/block/fops.c b/block/fops.c
index 4037ae72a919..059f6c7d3c09 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -382,7 +382,7 @@ static ssize_t blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter) static int blkdev_iomap_begin(struct inode *inode, loff_t offset, loff_t length, unsigned int flags, struct iomap *iomap, struct iomap *srcmap)
 {
-       struct block_device *bdev = I_BDEV(inode);
+       struct block_device *bdev = file_bdev(inode->i_private);
        loff_t isize = i_size_read(inode);

        iomap->bdev = bdev;
@@ -404,7 +404,7 @@ static const struct iomap_ops blkdev_iomap_ops = {
 static int blkdev_get_block(struct inode *inode, sector_t iblock,
                struct buffer_head *bh, int create)
 {
-       bh->b_bdev = I_BDEV(inode);
+       bh->b_bdev = file_bdev(inode->i_private);
        bh->b_blocknr = iblock;
        set_buffer_mapped(bh);
        return 0;
@@ -598,6 +598,7 @@ blk_mode_t file_to_blk_mode(struct file *file)

 static int blkdev_open(struct inode *inode, struct file *filp)
 {
+       struct file *bdev_file;
        struct block_device *bdev;
        blk_mode_t mode;
        int ret;
@@ -614,9 +615,28 @@ static int blkdev_open(struct inode *inode, struct file *filp)
        if (!bdev)
                return -ENXIO;

+       bdev_file = alloc_and_init_bdev_file(bdev,
+                       BLK_OPEN_READ | BLK_OPEN_WRITE, NULL);
+       if (IS_ERR(bdev_file)) {
+               blkdev_put_no_open(bdev);
+               return PTR_ERR(bdev_file);
+       }
+
+       bdev_file->private_data = ERR_PTR(-EINVAL);
+       get_bdev_file(bdev, bdev_file);
        ret = bdev_open(bdev, mode, filp->private_data, NULL, filp);
-       if (ret)
+       if (ret) {
+               put_bdev_file(bdev);
                blkdev_put_no_open(bdev);
+       } else {
+               filp->f_flags |= O_LARGEFILE;
+               filp->f_mode |= FMODE_BUF_RASYNC | FMODE_CAN_ODIRECT;
+               if (bdev_nowait(bdev))
+                       filp->f_mode |= FMODE_NOWAIT;
+               filp->f_mapping = bdev_mapping(bdev);
+ filp->f_wb_err = filemap_sample_wb_err(bdev_file->f_mapping);
+       }
+
        return ret;
 }

.






[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [NTFS 3]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [NTFS 3]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux