Hi,
在 2024/03/19 7:22, Christoph Hellwig 写道:
On Mon, Mar 18, 2024 at 03:19:03PM +0800, Yu Kuai wrote:
I come up with an ideal:
While opening the block_device the first time, store the generated new
file in "bd_inode->i_private". And release it after the last opener
close the block_device.
The advantages are:
- multiple openers can share the same bdev_file;
- raw block device ops can use the bdev_file as well, and there is no
need to distinguish iomap/buffer_head for raw block_device;
Please let me know what do you think?
That does sound very reasonable to me.
I just implement the ideal with following patch(not fully tested, just
boot and some blktests)
Please let me know what you think.
Thanks!
Kuai
diff --git a/block/bdev.c b/block/bdev.c
index d42a6bc73474..8bc8962c59a5 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -899,14 +899,6 @@ int bdev_open(struct block_device *bdev, blk_mode_t
mode, void *holder,
if (unblock_events)
disk_unblock_events(disk);
- bdev_file->f_flags |= O_LARGEFILE;
- bdev_file->f_mode |= FMODE_BUF_RASYNC | FMODE_CAN_ODIRECT;
- if (bdev_nowait(bdev))
- bdev_file->f_mode |= FMODE_NOWAIT;
- bdev_file->f_mapping = bdev_mapping(bdev);
- bdev_file->f_wb_err = filemap_sample_wb_err(bdev_file->f_mapping);
- bdev_file->private_data = holder;
-
return 0;
put_module:
module_put(disk->fops->owner);
@@ -948,12 +940,66 @@ static unsigned blk_to_file_flags(blk_mode_t mode)
return flags;
}
+struct file *alloc_and_init_bdev_file(struct block_device *bdev,
+ blk_mode_t mode, void *holder)
+{
+ struct file *bdev_file =
alloc_file_pseudo_noaccount(bdev_inode(bdev),
+ blockdev_mnt, "", blk_to_file_flags(mode) |
O_LARGEFILE,
+ &def_blk_fops);
+
+ if (IS_ERR(bdev_file))
+ return bdev_file;
+
+ bdev_file->f_flags |= O_LARGEFILE;
+ bdev_file->f_mode |= FMODE_BUF_RASYNC | FMODE_CAN_ODIRECT;
+ if (bdev_nowait(bdev))
+ bdev_file->f_mode |= FMODE_NOWAIT;
+ bdev_file->f_mapping = bdev_mapping(bdev);
+ bdev_file->f_wb_err = filemap_sample_wb_err(bdev_file->f_mapping);
+ bdev_file->private_data = holder;
+
+ return bdev_file;
+}
+
+void get_bdev_file(struct block_device *bdev, struct file *bdev_file)
+{
+ struct inode *bd_inode = bdev_inode(bdev);
+ struct file *file;
+
+ mutex_lock(&bdev->bd_disk->open_mutex);
+ file = bd_inode->i_private;
+
+ if (!file) {
+ get_file(bdev_file);
+ bd_inode->i_private = bdev_file;
+ } else {
+ get_file(file);
+ }
+
+ mutex_unlock(&bdev->bd_disk->open_mutex);
+}
+
+void put_bdev_file(struct block_device *bdev)
+{
+ struct file *file = NULL;
+ struct inode *bd_inode = bdev_inode(bdev);
+
+ mutex_lock(&bdev->bd_disk->open_mutex);
+ file = bd_inode->i_private;
+
+ if (!atomic_read(&bdev->bd_openers))
+ bd_inode->i_private = NULL;
+
+ mutex_unlock(&bdev->bd_disk->open_mutex);
+
+ fput(file);
+}
+
struct file *bdev_file_open_by_dev(dev_t dev, blk_mode_t mode, void
*holder,
const struct blk_holder_ops *hops)
{
struct file *bdev_file;
struct block_device *bdev;
- unsigned int flags;
int ret;
ret = bdev_permission(dev, mode, holder);
@@ -964,20 +1010,20 @@ struct file *bdev_file_open_by_dev(dev_t dev,
blk_mode_t mode, void *holder,
if (!bdev)
return ERR_PTR(-ENXIO);
- flags = blk_to_file_flags(mode);
- bdev_file = alloc_file_pseudo_noaccount(bdev_inode(bdev),
- blockdev_mnt, "", flags | O_LARGEFILE,
&def_blk_fops);
+ bdev_file = alloc_and_init_bdev_file(bdev, mode, holder);
if (IS_ERR(bdev_file)) {
blkdev_put_no_open(bdev);
return bdev_file;
}
ihold(bdev_inode(bdev));
+ get_bdev_file(bdev, bdev_file);
ret = bdev_open(bdev, mode, holder, hops, bdev_file);
if (ret) {
/* We failed to open the block device. Let ->release()
know. */
bdev_file->private_data = ERR_PTR(ret);
fput(bdev_file);
+ put_bdev_file(bdev);
return ERR_PTR(ret);
}
return bdev_file;
@@ -1049,6 +1095,7 @@ void bdev_release(struct file *bdev_file)
module_put(disk->fops->owner);
put_no_open:
+ put_bdev_file(bdev);
blkdev_put_no_open(bdev);
}
diff --git a/block/blk.h b/block/blk.h
index 5ac293179bfb..ebe99dc9cff5 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -518,6 +518,10 @@ static inline int req_ref_read(struct request *req)
return atomic_read(&req->ref);
}
+struct file *alloc_and_init_bdev_file(struct block_device *bdev,
+ blk_mode_t mode, void *holder);
+void get_bdev_file(struct block_device *bdev, struct file *bdev_file);
+void put_bdev_file(struct block_device *bdev);
void bdev_release(struct file *bdev_file);
int bdev_open(struct block_device *bdev, blk_mode_t mode, void *holder,
const struct blk_holder_ops *hops, struct file *bdev_file);
diff --git a/block/fops.c b/block/fops.c
index 4037ae72a919..059f6c7d3c09 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -382,7 +382,7 @@ static ssize_t blkdev_direct_IO(struct kiocb *iocb,
struct iov_iter *iter)
static int blkdev_iomap_begin(struct inode *inode, loff_t offset,
loff_t length,
unsigned int flags, struct iomap *iomap, struct iomap
*srcmap)
{
- struct block_device *bdev = I_BDEV(inode);
+ struct block_device *bdev = file_bdev(inode->i_private);
loff_t isize = i_size_read(inode);
iomap->bdev = bdev;
@@ -404,7 +404,7 @@ static const struct iomap_ops blkdev_iomap_ops = {
static int blkdev_get_block(struct inode *inode, sector_t iblock,
struct buffer_head *bh, int create)
{
- bh->b_bdev = I_BDEV(inode);
+ bh->b_bdev = file_bdev(inode->i_private);
bh->b_blocknr = iblock;
set_buffer_mapped(bh);
return 0;
@@ -598,6 +598,7 @@ blk_mode_t file_to_blk_mode(struct file *file)
static int blkdev_open(struct inode *inode, struct file *filp)
{
+ struct file *bdev_file;
struct block_device *bdev;
blk_mode_t mode;
int ret;
@@ -614,9 +615,28 @@ static int blkdev_open(struct inode *inode, struct
file *filp)
if (!bdev)
return -ENXIO;
+ bdev_file = alloc_and_init_bdev_file(bdev,
+ BLK_OPEN_READ | BLK_OPEN_WRITE, NULL);
+ if (IS_ERR(bdev_file)) {
+ blkdev_put_no_open(bdev);
+ return PTR_ERR(bdev_file);
+ }
+
+ bdev_file->private_data = ERR_PTR(-EINVAL);
+ get_bdev_file(bdev, bdev_file);
ret = bdev_open(bdev, mode, filp->private_data, NULL, filp);
- if (ret)
+ if (ret) {
+ put_bdev_file(bdev);
blkdev_put_no_open(bdev);
+ } else {
+ filp->f_flags |= O_LARGEFILE;
+ filp->f_mode |= FMODE_BUF_RASYNC | FMODE_CAN_ODIRECT;
+ if (bdev_nowait(bdev))
+ filp->f_mode |= FMODE_NOWAIT;
+ filp->f_mapping = bdev_mapping(bdev);
+ filp->f_wb_err =
filemap_sample_wb_err(bdev_file->f_mapping);
+ }
+
return ret;
}
.