Convert use of i_data.private_list for metadata buffer heads to use generic optional inode field infrastructure. So far we just set it up so that it uses i_data.private_list. In later patches we switch filesystems one by one to use their own fields. Signed-off-by: Jan Kara <jack@xxxxxxx> --- fs/buffer.c | 98 +++++++++++++++++++-------------------------- fs/super.c | 2 + include/linux/buffer_head.h | 12 +++++- include/linux/fs.h | 2 + 4 files changed, 57 insertions(+), 57 deletions(-) diff --git a/fs/buffer.c b/fs/buffer.c index d7a88a0ab0d4..9441889d8383 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -43,7 +43,7 @@ #include <linux/bit_spinlock.h> #include <trace/events/block.h> -static int fsync_buffers_list(spinlock_t *lock, struct list_head *list); +static int fsync_buffers_list(spinlock_t *lock, struct inode_meta_bhs *mbh); #define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers) @@ -460,44 +460,27 @@ EXPORT_SYMBOL(mark_buffer_async_write); * * The functions mark_buffer_inode_dirty(), fsync_inode_buffers(), * inode_has_buffers() and invalidate_inode_buffers() are provided for the - * management of a list of dependent buffers at ->i_mapping->private_list. - * - * Locking is a little subtle: try_to_free_buffers() will remove buffers - * from their controlling inode's queue when they are being freed. But - * try_to_free_buffers() will be operating against the *blockdev* mapping - * at the time, not against the S_ISREG file which depends on those buffers. - * So the locking for private_list is via the private_lock in the address_space - * which backs the buffers. Which is different from the address_space - * against which the buffers are listed. So for a particular address_space, - * mapping->private_lock does *not* protect mapping->private_list! In fact, - * mapping->private_list will always be protected by the backing blockdev's - * ->private_lock. - * - * Which introduces a requirement: all buffers on an address_space's - * ->private_list must be from the same address_space: the blockdev's. - * - * address_spaces which do not place buffers at ->private_list via these - * utility functions are free to use private_lock and private_list for - * whatever they want. The only requirement is that list_empty(private_list) - * be true at clear_inode() time. - * - * FIXME: clear_inode should not call invalidate_inode_buffers(). The - * filesystems should do that. invalidate_inode_buffers() should just go - * BUG_ON(!list_empty). + * management of a list of dependent buffers anchored at inode_meta_bhs + * structure inode filesystem's inode. + * + * Locking is a little subtle: try_to_free_buffers() will remove buffers from + * their controlling inode's queue when they are being freed. But + * try_to_free_buffers() will be operating against the *blockdev* mapping at + * the time, not against the S_ISREG file which depends on those buffers. So + * the locking for inode_meta_bhs list is via the private_lock in the + * address_space which backs the buffers. Which is different from the + * address_space against which the buffers are listed. So for a particular + * address_space, mapping->private_lock does *not* protect inode's + * inode_meta_bhs list! In fact, inode_meta_bhs list will always be protected + * by the backing blockdev's ->private_lock. + * + * Which introduces a requirement: all buffers on an inode's inode_meta_bhs + * list must be from the same address_space: the blockdev's. * * FIXME: mark_buffer_dirty_inode() is a data-plane operation. It should * take an address_space, not an inode. And it should be called * mark_buffer_dirty_fsync() to clearly define why those buffers are being * queued up. - * - * FIXME: mark_buffer_dirty_inode() doesn't need to add the buffer to the - * list if it is already on a list. Because if the buffer is on a list, - * it *must* already be on the right one. If not, the filesystem is being - * silly. This will save a ton of locking. But first we have to ensure - * that buffers are taken *off* the old inode's list when they are freed - * (presumably in truncate). That requires careful auditing of all - * filesystems (do it inside bforget()). It could also be done by bringing - * b_inode back. */ /* @@ -514,7 +497,9 @@ static void __remove_assoc_queue(struct buffer_head *bh) int inode_has_buffers(struct inode *inode) { - return !list_empty(&inode->i_data.private_list); + struct inode_meta_bhs *mbh = inode_get_field(inode, IF_META_BHS); + + return mbh && !list_empty(&mbh->list); } /* @@ -592,8 +577,8 @@ static inline struct address_space *inode_buffer_mapping(struct inode *inode) * sync_mapping_buffers - write out & wait upon a mapping's "associated" buffers * @mapping: the mapping which wants those buffers written * - * Starts I/O against the buffers at mapping->private_list, and waits upon - * that I/O. + * Starts I/O against the buffers at mapping->host's inode_meta_bhs list, and + * waits upon that I/O. * * Basically, this is a convenience function for fsync(). * @mapping is a file or directory which needs those buffers to be written for @@ -601,14 +586,15 @@ static inline struct address_space *inode_buffer_mapping(struct inode *inode) */ int sync_mapping_buffers(struct address_space *mapping) { + struct inode *inode = mapping->host; + struct inode_meta_bhs *mbh = inode_get_field(inode, IF_META_BHS); struct address_space *buffer_mapping; - if (list_empty(&mapping->private_list)) + if (!inode_has_buffers(inode)) return 0; - buffer_mapping = inode_buffer_mapping(mapping->host); - return fsync_buffers_list(&buffer_mapping->private_lock, - &mapping->private_list); + buffer_mapping = inode_buffer_mapping(inode); + return fsync_buffers_list(&buffer_mapping->private_lock, mbh); } EXPORT_SYMBOL(sync_mapping_buffers); @@ -633,12 +619,12 @@ void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode) { struct address_space *mapping = inode->i_mapping; struct address_space *buffer_mapping = inode_buffer_mapping(inode); + struct inode_meta_bhs *mbh = inode_get_field(inode, IF_META_BHS); mark_buffer_dirty(bh); if (!bh->b_assoc_map) { spin_lock(&buffer_mapping->private_lock); - list_move_tail(&bh->b_assoc_buffers, - &mapping->private_list); + list_move_tail(&bh->b_assoc_buffers, &mbh->list); bh->b_assoc_map = mapping; spin_unlock(&buffer_mapping->private_lock); } @@ -739,7 +725,7 @@ EXPORT_SYMBOL(__set_page_dirty_buffers); * the osync code to catch these locked, dirty buffers without requeuing * any newly dirty buffers for write. */ -static int fsync_buffers_list(spinlock_t *lock, struct list_head *list) +static int fsync_buffers_list(spinlock_t *lock, struct inode_meta_bhs *mbh) { struct buffer_head *bh; struct list_head tmp; @@ -751,8 +737,8 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list) blk_start_plug(&plug); spin_lock(lock); - while (!list_empty(list)) { - bh = BH_ENTRY(list->next); + while (!list_empty(&mbh->list)) { + bh = BH_ENTRY(mbh->list.next); mapping = bh->b_assoc_map; __remove_assoc_queue(bh); /* Avoid race with mark_buffer_dirty_inode() which does @@ -799,7 +785,7 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list) smp_mb(); if (buffer_dirty(bh)) { list_add(&bh->b_assoc_buffers, - &mapping->private_list); + &mbh->list); bh->b_assoc_map = mapping; } spin_unlock(lock); @@ -811,7 +797,7 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list) } spin_unlock(lock); - err2 = osync_buffers_list(lock, list); + err2 = osync_buffers_list(lock, &mbh->list); if (err) return err; else @@ -830,14 +816,14 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list) void invalidate_inode_buffers(struct inode *inode) { if (inode_has_buffers(inode)) { - struct address_space *mapping = &inode->i_data; - struct list_head *list = &mapping->private_list; + struct inode_meta_bhs *mbh; struct address_space *buffer_mapping; + mbh = inode_get_field(inode, IF_META_BHS); buffer_mapping = inode_buffer_mapping(inode); spin_lock(&buffer_mapping->private_lock); - while (!list_empty(list)) - __remove_assoc_queue(BH_ENTRY(list->next)); + while (!list_empty(&mbh->list)) + __remove_assoc_queue(BH_ENTRY(mbh->list.next)); spin_unlock(&buffer_mapping->private_lock); } } @@ -854,14 +840,14 @@ int remove_inode_buffers(struct inode *inode) int ret = 1; if (inode_has_buffers(inode)) { - struct address_space *mapping = &inode->i_data; - struct list_head *list = &mapping->private_list; + struct inode_meta_bhs *mbh; struct address_space *buffer_mapping; + mbh = inode_get_field(inode, IF_META_BHS); buffer_mapping = inode_buffer_mapping(inode); spin_lock(&buffer_mapping->private_lock); - while (!list_empty(list)) { - struct buffer_head *bh = BH_ENTRY(list->next); + while (!list_empty(&mbh->list)) { + struct buffer_head *bh = BH_ENTRY(mbh->list.next); if (buffer_dirty(bh)) { ret = 0; break; diff --git a/fs/super.c b/fs/super.c index 80d5cf2ca765..4192e3356e37 100644 --- a/fs/super.c +++ b/fs/super.c @@ -219,6 +219,8 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags) s->s_op = &default_op; s->s_time_gran = 1000000000; s->cleancache_poolid = -1; + s->s_inode_fields[IF_META_BHS] = offsetof(struct inode, + i_data.private_list); s->s_shrink.seeks = DEFAULT_SEEKS; s->s_shrink.scan_objects = super_cache_scan; diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index d77797a52b7b..060acb847240 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -131,6 +131,16 @@ BUFFER_FNS(Meta, meta) BUFFER_FNS(Prio, prio) BUFFER_FNS(Defer_Completion, defer_completion) +/* List of metadata buffers associated with the inode */ +struct inode_meta_bhs { + struct list_head list; +}; + +static inline void inode_mbhs_init_once(struct inode_meta_bhs *mbh) +{ + INIT_LIST_HEAD(&mbh->list); +} + #define bh_offset(bh) ((unsigned long)(bh)->b_data & ~PAGE_MASK) /* If we *know* page->private refers to buffer_heads */ @@ -162,7 +172,7 @@ void end_buffer_read_sync(struct buffer_head *bh, int uptodate); void end_buffer_write_sync(struct buffer_head *bh, int uptodate); void end_buffer_async_write(struct buffer_head *bh, int uptodate); -/* Things to do with buffers at mapping->private_list */ +/* Things to do with inode's associated metadata buffers */ void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode); int inode_has_buffers(struct inode *); void invalidate_inode_buffers(struct inode *); diff --git a/include/linux/fs.h b/include/linux/fs.h index cc811b2f1a39..37bc82f7a02d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -614,6 +614,8 @@ struct inode { /* Optional inode fields (stored in filesystems inode if the fs needs them) */ enum { + IF_META_BHS, /* List of metadata buffer heads for inode + (struct inode_meta_bhs) */ IF_FIELD_NR /* Number of optional inode fields */ }; -- 1.8.1.4 -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html