This is the more interesting patch. I wonder if any filesystem people have interesting requests of the callbacks? Also, I didn't put the equivalent invalidate_inode_buffers callback in clear_inode because I was hoping filesystems could do that. But maybe it is required in some cases? -- Introduce new address space operations sync and release, which can be used by a filesystem to synchronize and release per-address_space private metadata. They generalise sync_mapping_buffers, invalidate_inode_buffers, and remove_inode_buffers calls, and get another step closer to divorcing buffer heads from core mm/fs code. --- fs/buffer.c | 4 ++-- fs/inode.c | 42 ++++++++++++++++++++++++++++++++---------- fs/libfs.c | 7 ++++++- include/linux/buffer_head.h | 2 -- include/linux/fs.h | 35 +++++++++++++++++++++++++++++++++++ 5 files changed, 75 insertions(+), 15 deletions(-) Index: linux-2.6/fs/buffer.c =================================================================== --- linux-2.6.orig/fs/buffer.c +++ linux-2.6/fs/buffer.c @@ -476,9 +476,9 @@ static void __remove_assoc_queue(struct bh->b_assoc_map = NULL; } -int inode_has_buffers(struct inode *inode) +static int inode_has_buffers(struct inode *inode) { - return !list_empty(&inode->i_data.private_list); + return mapping_has_private(&inode->i_data); } /* Index: linux-2.6/include/linux/buffer_head.h =================================================================== --- linux-2.6.orig/include/linux/buffer_head.h +++ linux-2.6/include/linux/buffer_head.h @@ -159,7 +159,6 @@ void end_buffer_async_write(struct buffe /* Things to do with buffers at mapping->private_list */ void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode); -int inode_has_buffers(struct inode *); void invalidate_inode_buffers(struct inode *); int remove_inode_buffers(struct inode *inode); int sync_mapping_buffers(struct address_space *mapping); @@ -335,7 +334,6 @@ extern int __set_page_dirty_buffers(stru static inline void buffer_init(void) {} static inline int try_to_free_buffers(struct page *page) { return 1; } -static inline int inode_has_buffers(struct inode *inode) { return 0; } static inline void invalidate_inode_buffers(struct inode *inode) {} static inline int remove_inode_buffers(struct inode *inode) { return 1; } static inline int sync_mapping_buffers(struct address_space *mapping) { return 0; } Index: linux-2.6/fs/inode.c =================================================================== --- linux-2.6.orig/fs/inode.c +++ linux-2.6/fs/inode.c @@ -28,11 +28,11 @@ /* * This is needed for the following functions: - * - inode_has_buffers * - invalidate_inode_buffers - * - invalidate_bdev + * - remove_inode_buffers * * FIXME: remove all knowledge of the buffer layer from this file + * (by converting filesystems to ->release and ->sync aops) */ #include <linux/buffer_head.h> @@ -224,7 +224,8 @@ static struct inode *alloc_inode(struct void __destroy_inode(struct inode *inode) { - BUG_ON(inode_has_buffers(inode)); + BUG_ON(mapping_has_private(&inode->i_data)); + BUG_ON(inode->i_data.nrpages); security_inode_free(inode); fsnotify_inode_delete(inode); #ifdef CONFIG_FS_POSIX_ACL @@ -306,10 +307,15 @@ void __iget(struct inode *inode) */ void clear_inode(struct inode *inode) { + struct address_space *mapping = &inode->i_data; + might_sleep(); - invalidate_inode_buffers(inode); + /* XXX: filesystems should invalidate this before calling */ + if (!mapping->a_ops->release) + invalidate_inode_buffers(inode); - BUG_ON(inode->i_data.nrpages); + BUG_ON(mapping_has_private(mapping)); + BUG_ON(mapping->nrpages); BUG_ON(!(inode->i_state & I_FREEING)); BUG_ON(inode->i_state & I_CLEAR); inode_sync_wait(inode); @@ -370,6 +376,7 @@ static int invalidate_list(struct list_h for (;;) { struct list_head *tmp = next; struct inode *inode; + struct address_space *mapping; /* * We can reschedule here without worrying about the list's @@ -385,7 +392,12 @@ static int invalidate_list(struct list_h inode = list_entry(tmp, struct inode, i_sb_list); if (inode->i_state & I_NEW) continue; - invalidate_inode_buffers(inode); + mapping = &inode->i_data; + if (!mapping->a_ops->release) + invalidate_inode_buffers(inode); + else + mapping->a_ops->release(mapping, AOP_RELEASE_FORCE); + BUG_ON(mapping_has_private(mapping)); if (!atomic_read(&inode->i_count)) { list_move(&inode->i_list, dispose); WARN_ON(inode->i_state & I_NEW); @@ -429,13 +441,15 @@ EXPORT_SYMBOL(invalidate_inodes); static int can_unuse(struct inode *inode) { + struct address_space *mapping = &inode->i_data; + if (inode->i_state) return 0; - if (inode_has_buffers(inode)) + if (mapping_has_private(mapping)) return 0; if (atomic_read(&inode->i_count)) return 0; - if (inode->i_data.nrpages) + if (mapping->nrpages) return 0; return 1; } @@ -464,6 +478,7 @@ static void prune_icache(int nr_to_scan) spin_lock(&inode_lock); for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) { struct inode *inode; + struct address_space *mapping; if (list_empty(&inode_unused)) break; @@ -474,10 +489,17 @@ static void prune_icache(int nr_to_scan) list_move(&inode->i_list, &inode_unused); continue; } - if (inode_has_buffers(inode) || inode->i_data.nrpages) { + mapping = &inode->i_data; + if (mapping_has_private(mapping) || mapping->nrpages) { + int ret; + __iget(inode); spin_unlock(&inode_lock); - if (remove_inode_buffers(inode)) + if (mapping->a_ops->release) + ret = mapping->a_ops->release(mapping, 0); + else + ret = !remove_inode_buffers(inode); + if (ret) reap += invalidate_mapping_pages(&inode->i_data, 0, -1); iput(inode); Index: linux-2.6/include/linux/fs.h =================================================================== --- linux-2.6.orig/include/linux/fs.h +++ linux-2.6/include/linux/fs.h @@ -569,6 +569,17 @@ typedef struct { typedef int (*read_actor_t)(read_descriptor_t *, struct page *, unsigned long, unsigned long); +/* + * Flags for address_space_operations.release operations. + */ +#define AOP_RELEASE_FORCE 0x01 /* Release dirty and in-use data */ + +/* + * Flags for address_space_operations.sync operations. + */ +#define AOP_SYNC_WRITE 0x01 /* Begin writeout */ +#define AOP_SYNC_WAIT 0x02 /* Wait for started writeout */ + struct address_space_operations { int (*writepage)(struct page *page, struct writeback_control *wbc); int (*readpage)(struct file *, struct page *); @@ -604,6 +615,22 @@ struct address_space_operations { int (*launder_page) (struct page *); int (*is_partially_uptodate) (struct page *, read_descriptor_t *, unsigned long); + /* + * release_mapping releases any private data on the mapping so that + * it may be reclaimed. + * Second parameter is flags (see above). + * Returns 0 success, or -errno. + */ + int (*release)(struct address_space *, unsigned int); + + /* + * sync writes back and waits for any private data on the mapping, + * as a data consistency operation. + * Second parameter is flags (see above). + * Returns 0 success, or -errno. + */ + int (*sync)(struct address_space *, unsigned int); + int (*error_remove_page)(struct address_space *, struct page *); }; @@ -688,6 +715,14 @@ struct block_device { int mapping_tagged(struct address_space *mapping, int tag); /* + * Does this mapping have anything on its private list? + */ +static inline int mapping_has_private(struct address_space *mapping) +{ + return !list_empty(&mapping->private_list); +} + +/* * Might pages of this file be mapped into userspace? */ static inline int mapping_mapped(struct address_space *mapping) Index: linux-2.6/fs/libfs.c =================================================================== --- linux-2.6.orig/fs/libfs.c +++ linux-2.6/fs/libfs.c @@ -823,10 +823,15 @@ int simple_fsync(struct file *file, stru .nr_to_write = 0, /* metadata-only; caller takes care of data */ }; struct inode *inode = dentry->d_inode; + struct address_space *mapping = inode->i_mapping; int err; int ret; - ret = sync_mapping_buffers(inode->i_mapping); + if (!mapping->a_ops->sync) + ret = sync_mapping_buffers(mapping); + else + ret = mapping->a_ops->sync(mapping, AOP_SYNC_WRITE|AOP_SYNC_WAIT); + if (!(inode->i_state & I_DIRTY)) return ret; if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html