[patch] fs: avoid buffer_head

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This is the more interesting patch. I wonder if any filesystem people
have interesting requests of the callbacks? Also, I didn't put the
equivalent invalidate_inode_buffers callback in clear_inode because
I was hoping filesystems could do that. But maybe it is required in
some cases?

--

Introduce new address space operations sync and release, which can be used
by a filesystem to synchronize and release per-address_space private metadata.
They generalise sync_mapping_buffers, invalidate_inode_buffers, and
remove_inode_buffers calls, and get another step closer to divorcing
buffer heads from core mm/fs code.

---
 fs/buffer.c                 |    4 ++--
 fs/inode.c                  |   42 ++++++++++++++++++++++++++++++++----------
 fs/libfs.c                  |    7 ++++++-
 include/linux/buffer_head.h |    2 --
 include/linux/fs.h          |   35 +++++++++++++++++++++++++++++++++++
 5 files changed, 75 insertions(+), 15 deletions(-)

Index: linux-2.6/fs/buffer.c
===================================================================
--- linux-2.6.orig/fs/buffer.c
+++ linux-2.6/fs/buffer.c
@@ -476,9 +476,9 @@ static void __remove_assoc_queue(struct
 	bh->b_assoc_map = NULL;
 }
 
-int inode_has_buffers(struct inode *inode)
+static int inode_has_buffers(struct inode *inode)
 {
-	return !list_empty(&inode->i_data.private_list);
+	return mapping_has_private(&inode->i_data);
 }
 
 /*
Index: linux-2.6/include/linux/buffer_head.h
===================================================================
--- linux-2.6.orig/include/linux/buffer_head.h
+++ linux-2.6/include/linux/buffer_head.h
@@ -159,7 +159,6 @@ void end_buffer_async_write(struct buffe
 
 /* Things to do with buffers at mapping->private_list */
 void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode);
-int inode_has_buffers(struct inode *);
 void invalidate_inode_buffers(struct inode *);
 int remove_inode_buffers(struct inode *inode);
 int sync_mapping_buffers(struct address_space *mapping);
@@ -335,7 +334,6 @@ extern int __set_page_dirty_buffers(stru
 
 static inline void buffer_init(void) {}
 static inline int try_to_free_buffers(struct page *page) { return 1; }
-static inline int inode_has_buffers(struct inode *inode) { return 0; }
 static inline void invalidate_inode_buffers(struct inode *inode) {}
 static inline int remove_inode_buffers(struct inode *inode) { return 1; }
 static inline int sync_mapping_buffers(struct address_space *mapping) { return 0; }
Index: linux-2.6/fs/inode.c
===================================================================
--- linux-2.6.orig/fs/inode.c
+++ linux-2.6/fs/inode.c
@@ -28,11 +28,11 @@
 
 /*
  * This is needed for the following functions:
- *  - inode_has_buffers
  *  - invalidate_inode_buffers
- *  - invalidate_bdev
+ *  - remove_inode_buffers
  *
  * FIXME: remove all knowledge of the buffer layer from this file
+ * (by converting filesystems to ->release and ->sync aops)
  */
 #include <linux/buffer_head.h>
 
@@ -224,7 +224,8 @@ static struct inode *alloc_inode(struct
 
 void __destroy_inode(struct inode *inode)
 {
-	BUG_ON(inode_has_buffers(inode));
+	BUG_ON(mapping_has_private(&inode->i_data));
+	BUG_ON(inode->i_data.nrpages);
 	security_inode_free(inode);
 	fsnotify_inode_delete(inode);
 #ifdef CONFIG_FS_POSIX_ACL
@@ -306,10 +307,15 @@ void __iget(struct inode *inode)
  */
 void clear_inode(struct inode *inode)
 {
+	struct address_space *mapping = &inode->i_data;
+
 	might_sleep();
-	invalidate_inode_buffers(inode);
+	/* XXX: filesystems should invalidate this before calling */
+	if (!mapping->a_ops->release)
+		invalidate_inode_buffers(inode);
 
-	BUG_ON(inode->i_data.nrpages);
+	BUG_ON(mapping_has_private(mapping));
+	BUG_ON(mapping->nrpages);
 	BUG_ON(!(inode->i_state & I_FREEING));
 	BUG_ON(inode->i_state & I_CLEAR);
 	inode_sync_wait(inode);
@@ -370,6 +376,7 @@ static int invalidate_list(struct list_h
 	for (;;) {
 		struct list_head *tmp = next;
 		struct inode *inode;
+		struct address_space *mapping;
 
 		/*
 		 * We can reschedule here without worrying about the list's
@@ -385,7 +392,12 @@ static int invalidate_list(struct list_h
 		inode = list_entry(tmp, struct inode, i_sb_list);
 		if (inode->i_state & I_NEW)
 			continue;
-		invalidate_inode_buffers(inode);
+		mapping = &inode->i_data;
+		if (!mapping->a_ops->release)
+			invalidate_inode_buffers(inode);
+		else
+			mapping->a_ops->release(mapping, AOP_RELEASE_FORCE);
+		BUG_ON(mapping_has_private(mapping));
 		if (!atomic_read(&inode->i_count)) {
 			list_move(&inode->i_list, dispose);
 			WARN_ON(inode->i_state & I_NEW);
@@ -429,13 +441,15 @@ EXPORT_SYMBOL(invalidate_inodes);
 
 static int can_unuse(struct inode *inode)
 {
+	struct address_space *mapping = &inode->i_data;
+
 	if (inode->i_state)
 		return 0;
-	if (inode_has_buffers(inode))
+	if (mapping_has_private(mapping))
 		return 0;
 	if (atomic_read(&inode->i_count))
 		return 0;
-	if (inode->i_data.nrpages)
+	if (mapping->nrpages)
 		return 0;
 	return 1;
 }
@@ -464,6 +478,7 @@ static void prune_icache(int nr_to_scan)
 	spin_lock(&inode_lock);
 	for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) {
 		struct inode *inode;
+		struct address_space *mapping;
 
 		if (list_empty(&inode_unused))
 			break;
@@ -474,10 +489,17 @@ static void prune_icache(int nr_to_scan)
 			list_move(&inode->i_list, &inode_unused);
 			continue;
 		}
-		if (inode_has_buffers(inode) || inode->i_data.nrpages) {
+		mapping = &inode->i_data;
+		if (mapping_has_private(mapping) || mapping->nrpages) {
+			int ret;
+
 			__iget(inode);
 			spin_unlock(&inode_lock);
-			if (remove_inode_buffers(inode))
+			if (mapping->a_ops->release)
+				ret = mapping->a_ops->release(mapping, 0);
+			else
+				ret = !remove_inode_buffers(inode);
+			if (ret)
 				reap += invalidate_mapping_pages(&inode->i_data,
 								0, -1);
 			iput(inode);
Index: linux-2.6/include/linux/fs.h
===================================================================
--- linux-2.6.orig/include/linux/fs.h
+++ linux-2.6/include/linux/fs.h
@@ -569,6 +569,17 @@ typedef struct {
 typedef int (*read_actor_t)(read_descriptor_t *, struct page *,
 		unsigned long, unsigned long);
 
+/*
+ * Flags for address_space_operations.release operations.
+ */
+#define AOP_RELEASE_FORCE	0x01	/* Release dirty and in-use data */
+
+/*
+ * Flags for address_space_operations.sync operations.
+ */
+#define AOP_SYNC_WRITE		0x01	/* Begin writeout */
+#define AOP_SYNC_WAIT		0x02	/* Wait for started writeout */
+
 struct address_space_operations {
 	int (*writepage)(struct page *page, struct writeback_control *wbc);
 	int (*readpage)(struct file *, struct page *);
@@ -604,6 +615,22 @@ struct address_space_operations {
 	int (*launder_page) (struct page *);
 	int (*is_partially_uptodate) (struct page *, read_descriptor_t *,
 					unsigned long);
+	/*
+	 * release_mapping releases any private data on the mapping so that
+	 * it may be reclaimed.
+	 * Second parameter is flags (see above).
+	 * Returns 0 success, or -errno.
+	 */
+	int (*release)(struct address_space *, unsigned int);
+
+	/*
+	 * sync writes back and waits for any private data on the mapping,
+	 * as a data consistency operation.
+	 * Second parameter is flags (see above).
+	 * Returns 0 success, or -errno.
+	 */
+	int (*sync)(struct address_space *, unsigned int);
+
 	int (*error_remove_page)(struct address_space *, struct page *);
 };
 
@@ -688,6 +715,14 @@ struct block_device {
 int mapping_tagged(struct address_space *mapping, int tag);
 
 /*
+ * Does this mapping have anything on its private list?
+ */
+static inline int mapping_has_private(struct address_space *mapping)
+{
+	return !list_empty(&mapping->private_list);
+}
+
+/*
  * Might pages of this file be mapped into userspace?
  */
 static inline int mapping_mapped(struct address_space *mapping)
Index: linux-2.6/fs/libfs.c
===================================================================
--- linux-2.6.orig/fs/libfs.c
+++ linux-2.6/fs/libfs.c
@@ -823,10 +823,15 @@ int simple_fsync(struct file *file, stru
 		.nr_to_write = 0, /* metadata-only; caller takes care of data */
 	};
 	struct inode *inode = dentry->d_inode;
+	struct address_space *mapping = inode->i_mapping;
 	int err;
 	int ret;
 
-	ret = sync_mapping_buffers(inode->i_mapping);
+	if (!mapping->a_ops->sync)
+		ret = sync_mapping_buffers(mapping);
+	else
+		ret = mapping->a_ops->sync(mapping, AOP_SYNC_WRITE|AOP_SYNC_WAIT);
+
 	if (!(inode->i_state & I_DIRTY))
 		return ret;
 	if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux