[PATCH 08/17] fs: Convert i_data.private_list to use optional field infrastructure

Jan Kara <jack@xxxxxxx> · Fri, 2 May 2014 15:19:59 +0200

Convert use of i_data.private_list for metadata buffer heads to use
generic optional inode field infrastructure. So far we just set it up so
that it uses i_data.private_list. In later patches we switch filesystems
one by one to use their own fields.

Signed-off-by: Jan Kara <jack@xxxxxxx>
---
 fs/buffer.c                 | 98 +++++++++++++++++++--------------------------
 fs/super.c                  |  2 +
 include/linux/buffer_head.h | 12 +++++-
 include/linux/fs.h          |  2 +
 4 files changed, 57 insertions(+), 57 deletions(-)

diff --git a/fs/buffer.c b/fs/buffer.c
index d7a88a0ab0d4..9441889d8383 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -43,7 +43,7 @@
 #include <linux/bit_spinlock.h>
 #include <trace/events/block.h>
 
-static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
+static int fsync_buffers_list(spinlock_t *lock, struct inode_meta_bhs *mbh);
 
 #define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
 
@@ -460,44 +460,27 @@ EXPORT_SYMBOL(mark_buffer_async_write);
  *
  * The functions mark_buffer_inode_dirty(), fsync_inode_buffers(),
  * inode_has_buffers() and invalidate_inode_buffers() are provided for the
- * management of a list of dependent buffers at ->i_mapping->private_list.
- *
- * Locking is a little subtle: try_to_free_buffers() will remove buffers
- * from their controlling inode's queue when they are being freed.  But
- * try_to_free_buffers() will be operating against the *blockdev* mapping
- * at the time, not against the S_ISREG file which depends on those buffers.
- * So the locking for private_list is via the private_lock in the address_space
- * which backs the buffers.  Which is different from the address_space 
- * against which the buffers are listed.  So for a particular address_space,
- * mapping->private_lock does *not* protect mapping->private_list!  In fact,
- * mapping->private_list will always be protected by the backing blockdev's
- * ->private_lock.
- *
- * Which introduces a requirement: all buffers on an address_space's
- * ->private_list must be from the same address_space: the blockdev's.
- *
- * address_spaces which do not place buffers at ->private_list via these
- * utility functions are free to use private_lock and private_list for
- * whatever they want.  The only requirement is that list_empty(private_list)
- * be true at clear_inode() time.
- *
- * FIXME: clear_inode should not call invalidate_inode_buffers().  The
- * filesystems should do that.  invalidate_inode_buffers() should just go
- * BUG_ON(!list_empty).
+ * management of a list of dependent buffers anchored at inode_meta_bhs
+ * structure inode filesystem's inode.
+ *
+ * Locking is a little subtle: try_to_free_buffers() will remove buffers from
+ * their controlling inode's queue when they are being freed.  But
+ * try_to_free_buffers() will be operating against the *blockdev* mapping at
+ * the time, not against the S_ISREG file which depends on those buffers.  So
+ * the locking for inode_meta_bhs list is via the private_lock in the
+ * address_space which backs the buffers.  Which is different from the
+ * address_space against which the buffers are listed.  So for a particular
+ * address_space, mapping->private_lock does *not* protect inode's
+ * inode_meta_bhs list!  In fact, inode_meta_bhs list will always be protected
+ * by the backing blockdev's ->private_lock.
+ *
+ * Which introduces a requirement: all buffers on an inode's inode_meta_bhs
+ * list must be from the same address_space: the blockdev's.
  *
  * FIXME: mark_buffer_dirty_inode() is a data-plane operation.  It should
  * take an address_space, not an inode.  And it should be called
  * mark_buffer_dirty_fsync() to clearly define why those buffers are being
  * queued up.
- *
- * FIXME: mark_buffer_dirty_inode() doesn't need to add the buffer to the
- * list if it is already on a list.  Because if the buffer is on a list,
- * it *must* already be on the right one.  If not, the filesystem is being
- * silly.  This will save a ton of locking.  But first we have to ensure
- * that buffers are taken *off* the old inode's list when they are freed
- * (presumably in truncate).  That requires careful auditing of all
- * filesystems (do it inside bforget()).  It could also be done by bringing
- * b_inode back.
  */
 
 /*
@@ -514,7 +497,9 @@ static void __remove_assoc_queue(struct buffer_head *bh)
 
 int inode_has_buffers(struct inode *inode)
 {
-	return !list_empty(&inode->i_data.private_list);
+	struct inode_meta_bhs *mbh = inode_get_field(inode, IF_META_BHS);
+
+	return mbh && !list_empty(&mbh->list);
 }
 
 /*
@@ -592,8 +577,8 @@ static inline struct address_space *inode_buffer_mapping(struct inode *inode)
  * sync_mapping_buffers - write out & wait upon a mapping's "associated" buffers
  * @mapping: the mapping which wants those buffers written
  *
- * Starts I/O against the buffers at mapping->private_list, and waits upon
- * that I/O.
+ * Starts I/O against the buffers at mapping->host's inode_meta_bhs list, and
+ * waits upon that I/O.
  *
  * Basically, this is a convenience function for fsync().
  * @mapping is a file or directory which needs those buffers to be written for
@@ -601,14 +586,15 @@ static inline struct address_space *inode_buffer_mapping(struct inode *inode)
  */
 int sync_mapping_buffers(struct address_space *mapping)
 {
+	struct inode *inode = mapping->host;
+	struct inode_meta_bhs *mbh = inode_get_field(inode, IF_META_BHS);
 	struct address_space *buffer_mapping;
 
-	if (list_empty(&mapping->private_list))
+	if (!inode_has_buffers(inode))
 		return 0;
 
-	buffer_mapping = inode_buffer_mapping(mapping->host);
-	return fsync_buffers_list(&buffer_mapping->private_lock,
-					&mapping->private_list);
+	buffer_mapping = inode_buffer_mapping(inode);
+	return fsync_buffers_list(&buffer_mapping->private_lock, mbh);
 }
 EXPORT_SYMBOL(sync_mapping_buffers);
 
@@ -633,12 +619,12 @@ void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
 {
 	struct address_space *mapping = inode->i_mapping;
 	struct address_space *buffer_mapping = inode_buffer_mapping(inode);
+	struct inode_meta_bhs *mbh = inode_get_field(inode, IF_META_BHS);
 
 	mark_buffer_dirty(bh);
 	if (!bh->b_assoc_map) {
 		spin_lock(&buffer_mapping->private_lock);
-		list_move_tail(&bh->b_assoc_buffers,
-				&mapping->private_list);
+		list_move_tail(&bh->b_assoc_buffers, &mbh->list);
 		bh->b_assoc_map = mapping;
 		spin_unlock(&buffer_mapping->private_lock);
 	}
@@ -739,7 +725,7 @@ EXPORT_SYMBOL(__set_page_dirty_buffers);
  * the osync code to catch these locked, dirty buffers without requeuing
  * any newly dirty buffers for write.
  */
-static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
+static int fsync_buffers_list(spinlock_t *lock, struct inode_meta_bhs *mbh)
 {
 	struct buffer_head *bh;
 	struct list_head tmp;
@@ -751,8 +737,8 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
 	blk_start_plug(&plug);
 
 	spin_lock(lock);
-	while (!list_empty(list)) {
-		bh = BH_ENTRY(list->next);
+	while (!list_empty(&mbh->list)) {
+		bh = BH_ENTRY(mbh->list.next);
 		mapping = bh->b_assoc_map;
 		__remove_assoc_queue(bh);
 		/* Avoid race with mark_buffer_dirty_inode() which does
@@ -799,7 +785,7 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
 		smp_mb();
 		if (buffer_dirty(bh)) {
 			list_add(&bh->b_assoc_buffers,
-				 &mapping->private_list);
+				 &mbh->list);
 			bh->b_assoc_map = mapping;
 		}
 		spin_unlock(lock);
@@ -811,7 +797,7 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
 	}
 	
 	spin_unlock(lock);
-	err2 = osync_buffers_list(lock, list);
+	err2 = osync_buffers_list(lock, &mbh->list);
 	if (err)
 		return err;
 	else
@@ -830,14 +816,14 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
 void invalidate_inode_buffers(struct inode *inode)
 {
 	if (inode_has_buffers(inode)) {
-		struct address_space *mapping = &inode->i_data;
-		struct list_head *list = &mapping->private_list;
+		struct inode_meta_bhs *mbh;
 		struct address_space *buffer_mapping;
 
+		mbh = inode_get_field(inode, IF_META_BHS);
 		buffer_mapping = inode_buffer_mapping(inode);
 		spin_lock(&buffer_mapping->private_lock);
-		while (!list_empty(list))
-			__remove_assoc_queue(BH_ENTRY(list->next));
+		while (!list_empty(&mbh->list))
+			__remove_assoc_queue(BH_ENTRY(mbh->list.next));
 		spin_unlock(&buffer_mapping->private_lock);
 	}
 }
@@ -854,14 +840,14 @@ int remove_inode_buffers(struct inode *inode)
 	int ret = 1;
 
 	if (inode_has_buffers(inode)) {
-		struct address_space *mapping = &inode->i_data;
-		struct list_head *list = &mapping->private_list;
+		struct inode_meta_bhs *mbh;
 		struct address_space *buffer_mapping;
 
+		mbh = inode_get_field(inode, IF_META_BHS);
 		buffer_mapping = inode_buffer_mapping(inode);
 		spin_lock(&buffer_mapping->private_lock);
-		while (!list_empty(list)) {
-			struct buffer_head *bh = BH_ENTRY(list->next);
+		while (!list_empty(&mbh->list)) {
+			struct buffer_head *bh = BH_ENTRY(mbh->list.next);
 			if (buffer_dirty(bh)) {
 				ret = 0;
 				break;
diff --git a/fs/super.c b/fs/super.c
index 80d5cf2ca765..4192e3356e37 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -219,6 +219,8 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
 	s->s_op = &default_op;
 	s->s_time_gran = 1000000000;
 	s->cleancache_poolid = -1;
+	s->s_inode_fields[IF_META_BHS] = offsetof(struct inode,
+						  i_data.private_list);
 
 	s->s_shrink.seeks = DEFAULT_SEEKS;
 	s->s_shrink.scan_objects = super_cache_scan;
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index d77797a52b7b..060acb847240 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -131,6 +131,16 @@ BUFFER_FNS(Meta, meta)
 BUFFER_FNS(Prio, prio)
 BUFFER_FNS(Defer_Completion, defer_completion)
 
+/* List of metadata buffers associated with the inode */
+struct inode_meta_bhs {
+	struct list_head list;
+};
+
+static inline void inode_mbhs_init_once(struct inode_meta_bhs *mbh)
+{
+	INIT_LIST_HEAD(&mbh->list);
+}
+
 #define bh_offset(bh)		((unsigned long)(bh)->b_data & ~PAGE_MASK)
 
 /* If we *know* page->private refers to buffer_heads */
@@ -162,7 +172,7 @@ void end_buffer_read_sync(struct buffer_head *bh, int uptodate);
 void end_buffer_write_sync(struct buffer_head *bh, int uptodate);
 void end_buffer_async_write(struct buffer_head *bh, int uptodate);
 
-/* Things to do with buffers at mapping->private_list */
+/* Things to do with inode's associated metadata buffers */
 void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode);
 int inode_has_buffers(struct inode *);
 void invalidate_inode_buffers(struct inode *);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index cc811b2f1a39..37bc82f7a02d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -614,6 +614,8 @@ struct inode {
 
 /* Optional inode fields (stored in filesystems inode if the fs needs them) */
 enum {
+	IF_META_BHS,	/* List of metadata buffer heads for inode
+			   (struct inode_meta_bhs) */
 	IF_FIELD_NR	/* Number of optional inode fields */
 };
 
-- 
1.8.1.4

--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html