[PATCH -v3] vfs: add releasepages hooks to block devices which can be used by file systems

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Toshiyuki Okajima <toshi.okajima@xxxxxxxxxxxxxx>

Implement blkdev_releasepage() to release the buffer_heads and page
after we release private data which belongs to a client of the block
device, such as a filesystem.

blkdev_releasepage() call the client's releasepage() which is
registered by blkdev_register_client_releasepage() to release its
private data.

Signed-off-by: Toshiyuki Okajima <toshi.okajima@xxxxxxxxxxxxxx>
Signed-off-by: "Theodore Ts'o" <tytso@xxxxxxx>
Cc: linux-fsdevel@xxxxxxxxxxxxxxx
---
 fs/block_dev.c     |   68 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/super.c         |   22 ++++++++++++++++
 include/linux/fs.h |    9 +++++++
 3 files changed, 99 insertions(+), 0 deletions(-)

diff --git a/fs/block_dev.c b/fs/block_dev.c
index db831ef..bac0a38 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -29,6 +29,9 @@
 
 struct bdev_inode {
 	struct block_device bdev;
+	void *client;
+	int (*client_releasepage)(void*, struct page*, gfp_t);
+	rwlock_t client_lock;
 	struct inode vfs_inode;
 };
 
@@ -260,6 +263,9 @@ static struct inode *bdev_alloc_inode(struct super_block *sb)
 	struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL);
 	if (!ei)
 		return NULL;
+	ei->client = NULL;
+	ei->client_releasepage = NULL;
+	rwlock_init(&ei->client_lock);
 	return &ei->vfs_inode;
 }
 
@@ -1208,6 +1214,67 @@ static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 	return blkdev_ioctl(bdev, mode, cmd, arg);
 }
 
+/*
+ * blkdev_releasepage: execute ei->client_releasepage() if it exists.
+ * Otherwise, execute try_to_free_buffers().
+ * ei->client_releasepage() releases private client's page if possible.
+ * Because a buffer_head's using counter is bigger than 0 if a client has
+ * a page for private usage. If so, try_to_free_buffers() cannot release it.
+ * Therefore a client must try to release a page itself.
+ */
+static int blkdev_releasepage(struct page *page, gfp_t wait)
+{
+	struct bdev_inode *ei = BDEV_I(page->mapping->host);
+	int ret;
+
+	read_lock(&ei->client_lock);
+	if (ei->client_releasepage != NULL)
+		/*
+		 * Since we are holding a spinlock (ei->client_lock),
+		 * make sure the client_releasepage function
+		 * understands that it must not block.
+		 */
+		ret = (*ei->client_releasepage)(ei->client, page,
+						wait & ~__GFP_WAIT);
+	else
+		ret = try_to_free_buffers(page);
+	read_unlock(&ei->client_lock);
+	return ret;
+}
+
+/*
+ * blkdev_register_client_releasepage: register client_releasepage.
+ */
+int blkdev_register_client_releasepage(struct block_device *bdev,
+	void *client, int (*releasepage)(void*, struct page*, gfp_t))
+{
+	struct bdev_inode *ei = BDEV_I(bdev->bd_inode);
+	int ret = 1;
+
+	write_lock(&ei->client_lock);
+	if (ei->client == NULL && ei->client_releasepage == NULL) {
+		ei->client = client;
+		ei->client_releasepage = releasepage;
+	} else if (ei->client != client 
+			|| ei->client_releasepage != releasepage)
+		ret = 0;
+	write_unlock(&ei->client_lock);
+	return ret;
+}
+
+/*
+ * blkdev_unregister_client_releasepage: unregister client_releasepage.
+ */
+void blkdev_unregister_client_releasepage(struct block_device *bdev)
+{
+	struct bdev_inode *ei = BDEV_I(bdev->bd_inode);
+
+	write_lock(&ei->client_lock);
+	ei->client = NULL;
+	ei->client_releasepage = NULL;
+	write_unlock(&ei->client_lock);
+}
+
 static const struct address_space_operations def_blk_aops = {
 	.readpage	= blkdev_readpage,
 	.writepage	= blkdev_writepage,
@@ -1215,6 +1282,7 @@ static const struct address_space_operations def_blk_aops = {
 	.write_begin	= blkdev_write_begin,
 	.write_end	= blkdev_write_end,
 	.writepages	= generic_writepages,
+	.releasepage	= blkdev_releasepage,
 	.direct_IO	= blkdev_direct_IO,
 };
 
diff --git a/fs/super.c b/fs/super.c
index 400a760..fd254eb 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -801,6 +801,18 @@ int get_sb_bdev(struct file_system_type *fs_type,
 
 		s->s_flags |= MS_ACTIVE;
 	}
+	/*
+	 * register a client function which releases a page whose mapping is
+	 * block device 
+	 */
+	if (fs_type->release_metadata != NULL
+		&& !blkdev_register_client_releasepage(bdev, s,
+			fs_type->release_metadata)) {
+		up_write(&s->s_umount);
+		deactivate_super(s);
+		error = -EBUSY;
+		goto error_bdev;
+	}
 
 	return simple_set_mnt(mnt, s);
 
@@ -819,6 +831,16 @@ void kill_block_super(struct super_block *sb)
 	struct block_device *bdev = sb->s_bdev;
 	fmode_t mode = sb->s_mode;
 
+	/*
+	 * unregister a client function which releases a page whose mapping is
+	 * block device
+	 *
+	 * This is sure to be unmounting here, and it releases all own data
+	 * itself. Therefore the filesystem's function which is owned by the
+	 * block device, which releases its data is not needed any more.
+	 */
+	if (sb->s_type->release_metadata != NULL)
+		blkdev_unregister_client_releasepage(bdev);
 	generic_shutdown_super(sb);
 	sync_blockdev(bdev);
 	close_bdev_exclusive(bdev, mode);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0dcdd94..398c8ed 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1538,6 +1538,7 @@ struct file_system_type {
 	int (*get_sb) (struct file_system_type *, int,
 		       const char *, void *, struct vfsmount *);
 	void (*kill_sb) (struct super_block *);
+	int (*release_metadata)(void*, struct page*, gfp_t);
 	struct module *owner;
 	struct file_system_type * next;
 	struct list_head fs_supers;
@@ -1699,8 +1700,16 @@ extern void bd_set_size(struct block_device *, loff_t size);
 extern void bd_forget(struct inode *inode);
 extern void bdput(struct block_device *);
 extern struct block_device *open_by_devnum(dev_t, fmode_t);
+extern int blkdev_register_client_releasepage(struct block_device *,
+	void *, int (*releasepage)(void *, struct page*, gfp_t));
+extern void blkdev_unregister_client_releasepage(struct block_device *);
 #else
 static inline void bd_forget(struct inode *inode) {}
+static inline int blkdev_register_client_releasepage(struct block_device *,
+	void *, int (*releasepage)(void *, struct page*, gfp_t))
+{ return 1; }
+static inline void blkdev_unregister_client_releasepage(struct block_device *)
+{}
 #endif
 extern const struct file_operations def_blk_fops;
 extern const struct file_operations def_chr_fops;
-- 
1.6.0.4.8.g36f27.dirty

--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux