[PATCH] dax: allow DAX to look up an inode's block device

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



There are a number of places in dax.c that look up the struct block_device
associated with an inode.  Previously this was done by just using
inode->i_sb->s_bdev.  This is correct in some cases, such as when using
ext2 and ext4.

However, for raw block devices and for XFS with a real-time device, the
value in inode->i_sb->s_bdev is not correct.  With the code as it is
currently written, an fsync or msync to a DAX enabled raw block device will
cause a NULL pointer dereference kernel BUG.  For this to work correctly we
need to ask the block device or filesystem what struct block_device is
appropriate for our inode.

To that end, add a get_bdev(struct inode *) entry point to struct
super_operations.  If this function pointer is non-NULL, this notifies DAX
that it needs to use it to look up the correct block_device.  If
i_sb->get_bdev() is NULL DAX will default to inode->i_sb->s_bdev.

I added the function to super_operations instead of another alternative
like inode_operations because the function pointer varies by filesystem or
block device, not per inode.  I believe that this will also save memory
because there is only one struct super_operations per mounted filesystem
but there could be many struct inode_operations and there is no need to
keep many copies of the same function pointer in memory.

Signed-off-by: Ross Zwisler <ross.zwisler@xxxxxxxxxxxxxxx>
---
 fs/block_dev.c     |  6 ++++++
 fs/dax.c           | 20 ++++++++++++++------
 fs/xfs/xfs_aops.c  |  2 +-
 fs/xfs/xfs_aops.h  |  1 +
 fs/xfs/xfs_super.c |  1 +
 include/linux/fs.h |  1 +
 6 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/fs/block_dev.c b/fs/block_dev.c
index fa0507a..845b049 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -156,6 +156,11 @@ blkdev_get_block(struct inode *inode, sector_t iblock,
 	return 0;
 }
 
+static struct block_device *blkdev_get_bdev(struct inode *inode)
+{
+	return I_BDEV(inode);
+}
+
 static struct inode *bdev_file_inode(struct file *file)
 {
 	return file->f_mapping->host;
@@ -569,6 +574,7 @@ static const struct super_operations bdev_sops = {
 	.alloc_inode = bdev_alloc_inode,
 	.destroy_inode = bdev_destroy_inode,
 	.drop_inode = generic_delete_inode,
+	.get_bdev = blkdev_get_bdev,
 	.evict_inode = bdev_evict_inode,
 };
 
diff --git a/fs/dax.c b/fs/dax.c
index 227974a..c701ea4 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -32,6 +32,14 @@
 #include <linux/pfn_t.h>
 #include <linux/sizes.h>
 
+static struct block_device *dax_get_bdev(struct inode *inode)
+{
+	if (inode->i_sb->s_op->get_bdev)
+		return inode->i_sb->s_op->get_bdev(inode);
+	else
+		return inode->i_sb->s_bdev;
+}
+
 static long dax_map_atomic(struct block_device *bdev, struct blk_dax_ctl *dax)
 {
 	struct request_queue *q = bdev->bd_queue;
@@ -85,7 +93,7 @@ struct page *read_dax_sector(struct block_device *bdev, sector_t n)
  */
 int dax_clear_blocks(struct inode *inode, sector_t block, long _size)
 {
-	struct block_device *bdev = inode->i_sb->s_bdev;
+	struct block_device *bdev = dax_get_bdev(inode);
 	struct blk_dax_ctl dax = {
 		.sector = block << (inode->i_blkbits - 9),
 		.size = _size,
@@ -266,7 +274,7 @@ ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode,
 	loff_t end = pos + iov_iter_count(iter);
 
 	memset(&bh, 0, sizeof(bh));
-	bh.b_bdev = inode->i_sb->s_bdev;
+	bh.b_bdev = dax_get_bdev(inode);
 
 	if ((flags & DIO_LOCKING) && iov_iter_rw(iter) == READ) {
 		struct address_space *mapping = inode->i_mapping;
@@ -488,7 +496,7 @@ int dax_writeback_mapping_range(struct address_space *mapping, loff_t start,
 		loff_t end)
 {
 	struct inode *inode = mapping->host;
-	struct block_device *bdev = inode->i_sb->s_bdev;
+	struct block_device *bdev = dax_get_bdev(inode);
 	pgoff_t start_index, end_index, pmd_index;
 	pgoff_t indices[PAGEVEC_SIZE];
 	struct pagevec pvec;
@@ -628,7 +636,7 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
 
 	memset(&bh, 0, sizeof(bh));
 	block = (sector_t)vmf->pgoff << (PAGE_SHIFT - blkbits);
-	bh.b_bdev = inode->i_sb->s_bdev;
+	bh.b_bdev = dax_get_bdev(inode);
 	bh.b_size = PAGE_SIZE;
 
  repeat:
@@ -847,7 +855,7 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
 	}
 
 	memset(&bh, 0, sizeof(bh));
-	bh.b_bdev = inode->i_sb->s_bdev;
+	bh.b_bdev = dax_get_bdev(inode);
 	block = (sector_t)pgoff << (PAGE_SHIFT - blkbits);
 
 	bh.b_size = PMD_SIZE;
@@ -1100,7 +1108,7 @@ int dax_zero_page_range(struct inode *inode, loff_t from, unsigned length,
 	BUG_ON((offset + length) > PAGE_CACHE_SIZE);
 
 	memset(&bh, 0, sizeof(bh));
-	bh.b_bdev = inode->i_sb->s_bdev;
+	bh.b_bdev = dax_get_bdev(inode);
 	bh.b_size = PAGE_CACHE_SIZE;
 	err = get_block(inode, index, &bh, 0);
 	if (err < 0)
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 379c089..fc20518 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -55,7 +55,7 @@ xfs_count_page_state(
 	} while ((bh = bh->b_this_page) != head);
 }
 
-STATIC struct block_device *
+struct block_device *
 xfs_find_bdev_for_inode(
 	struct inode		*inode)
 {
diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h
index f6ffc9a..a4343c6 100644
--- a/fs/xfs/xfs_aops.h
+++ b/fs/xfs/xfs_aops.h
@@ -62,5 +62,6 @@ int	xfs_get_blocks_dax_fault(struct inode *inode, sector_t offset,
 			         struct buffer_head *map_bh, int create);
 
 extern void xfs_count_page_state(struct page *, int *, int *);
+extern struct block_device *xfs_find_bdev_for_inode(struct inode *);
 
 #endif /* __XFS_AOPS_H__ */
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 59c9b7b..26e7051 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1623,6 +1623,7 @@ static const struct super_operations xfs_super_operations = {
 	.destroy_inode		= xfs_fs_destroy_inode,
 	.evict_inode		= xfs_fs_evict_inode,
 	.drop_inode		= xfs_fs_drop_inode,
+	.get_bdev		= xfs_find_bdev_for_inode,
 	.put_super		= xfs_fs_put_super,
 	.sync_fs		= xfs_fs_sync_fs,
 	.freeze_fs		= xfs_fs_freeze,
diff --git a/include/linux/fs.h b/include/linux/fs.h
index b10002d..5b636eb 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1730,6 +1730,7 @@ struct super_operations {
 	int (*write_inode) (struct inode *, struct writeback_control *wbc);
 	int (*drop_inode) (struct inode *);
 	void (*evict_inode) (struct inode *);
+	struct block_device *(*get_bdev) (struct inode *);
 	void (*put_super) (struct super_block *);
 	int (*sync_fs)(struct super_block *sb, int wait);
 	int (*freeze_super) (struct super_block *);
-- 
2.5.0

--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux