Hello, In order to add a generic FIEMAP support for all filesystems that do not necessarily have extents (ie ext2/3), it is necessary to have direct access to the filesystems get_block function. The reason for this is because in certain cases (again ext2/3) the filesystem has the ability to map as many contiguous blocks together at once, which would be far more efficient than calling ->bmap() over and over for all of the blocks in the inode. In order to accomplish this I would like to expose the filesystems get_block function via an inode operation. This would allow me to create a simple generic FIEMAP function that could be used on all fs's and be a bit more efficient that FIBMAP, and it would clean up where we are passing get_block_t everywhere. This is just the vfs layer patch, since I want to make sure this sort of idea is acceptable before I go doing too much more work. I've already started converting fs's over to use this new way of doing things and I haven't seen any conflicts from doing things this way yet. Thanks much, Josef Index: linux-2.6/fs/buffer.c =================================================================== --- linux-2.6.orig/fs/buffer.c +++ linux-2.6/fs/buffer.c @@ -1640,7 +1640,7 @@ EXPORT_SYMBOL(unmap_underlying_metadata) * prevents this contention from occurring. */ static int __block_write_full_page(struct inode *inode, struct page *page, - get_block_t *get_block, struct writeback_control *wbc) + struct writeback_control *wbc) { int err; sector_t block; @@ -1690,7 +1690,7 @@ static int __block_write_full_page(struc set_buffer_uptodate(bh); } else if (!buffer_mapped(bh) && buffer_dirty(bh)) { WARN_ON(bh->b_size != blocksize); - err = get_block(inode, block, bh, 1); + err = inode->i_op->get_block(inode, block, bh, 1); if (err) goto recover; if (buffer_new(bh)) { @@ -1842,7 +1842,7 @@ void page_zero_new_buffers(struct page * EXPORT_SYMBOL(page_zero_new_buffers); static int __block_prepare_write(struct inode *inode, struct page *page, - unsigned from, unsigned to, get_block_t *get_block) + unsigned from, unsigned to) { unsigned block_start, block_end; sector_t block; @@ -1877,7 +1877,7 @@ static int __block_prepare_write(struct clear_buffer_new(bh); if (!buffer_mapped(bh)) { WARN_ON(bh->b_size != blocksize); - err = get_block(inode, block, bh, 1); + err = inode->i_op->get_block(inode, block, bh, 1); if (err) break; if (buffer_new(bh)) { @@ -1966,8 +1966,7 @@ static int __block_commit_write(struct i */ int block_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, - struct page **pagep, void **fsdata, - get_block_t *get_block) + struct page **pagep, void **fsdata) { struct inode *inode = mapping->host; int status = 0; @@ -1992,7 +1991,7 @@ int block_write_begin(struct file *file, } else BUG_ON(!PageLocked(page)); - status = __block_prepare_write(inode, page, start, end, get_block); + status = __block_prepare_write(inode, page, start, end); if (unlikely(status)) { ClearPageUptodate(page); @@ -2087,7 +2086,7 @@ EXPORT_SYMBOL(generic_write_end); * set/clear_buffer_uptodate() functions propagate buffer state into the * page struct once IO has completed. */ -int block_read_full_page(struct page *page, get_block_t *get_block) +int block_read_full_page(struct page *page) { struct inode *inode = page->mapping->host; sector_t iblock, lblock; @@ -2118,7 +2117,8 @@ int block_read_full_page(struct page *pa fully_mapped = 0; if (iblock < lblock) { WARN_ON(bh->b_size != blocksize); - err = get_block(inode, iblock, bh, 0); + err = inode->i_op->get_block(inode, iblock, bh, + 0); if (err) SetPageError(page); } @@ -2281,8 +2281,7 @@ out: */ int cont_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, - struct page **pagep, void **fsdata, - get_block_t *get_block, loff_t *bytes) + struct page **pagep, void **fsdata, loff_t *bytes) { struct inode *inode = mapping->host; unsigned blocksize = 1 << inode->i_blkbits; @@ -2301,16 +2300,15 @@ int cont_write_begin(struct file *file, *pagep = NULL; err = block_write_begin(file, mapping, pos, len, - flags, pagep, fsdata, get_block); + flags, pagep, fsdata); out: return err; } -int block_prepare_write(struct page *page, unsigned from, unsigned to, - get_block_t *get_block) +int block_prepare_write(struct page *page, unsigned from, unsigned to) { struct inode *inode = page->mapping->host; - int err = __block_prepare_write(inode, page, from, to, get_block); + int err = __block_prepare_write(inode, page, from, to); if (err) ClearPageUptodate(page); return err; @@ -2356,8 +2354,7 @@ int generic_commit_write(struct file *fi * unlock the page. */ int -block_page_mkwrite(struct vm_area_struct *vma, struct page *page, - get_block_t get_block) +block_page_mkwrite(struct vm_area_struct *vma, struct page *page) { struct inode *inode = vma->vm_file->f_path.dentry->d_inode; unsigned long end; @@ -2378,7 +2375,7 @@ block_page_mkwrite(struct vm_area_struct else end = PAGE_CACHE_SIZE; - ret = block_prepare_write(page, 0, end, get_block); + ret = block_prepare_write(page, 0, end); if (!ret) ret = block_commit_write(page, 0, end); @@ -2427,8 +2424,7 @@ static void attach_nobh_buffers(struct p */ int nobh_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, - struct page **pagep, void **fsdata, - get_block_t *get_block) + struct page **pagep, void **fsdata) { struct inode *inode = mapping->host; const unsigned blkbits = inode->i_blkbits; @@ -2459,7 +2455,7 @@ int nobh_write_begin(struct file *file, page_cache_release(page); *pagep = NULL; return block_write_begin(file, mapping, pos, len, flags, pagep, - fsdata, get_block); + fsdata); } if (PageMappedToDisk(page)) @@ -2497,8 +2493,9 @@ int nobh_write_begin(struct file *file, create = 1; if (block_start >= to) create = 0; - ret = get_block(inode, block_in_file + block_in_page, - bh, create); + ret = inode->i_op->get_block(inode, + block_in_file + block_in_page, + bh, create); if (ret) goto failed; if (!buffer_mapped(bh)) @@ -2610,8 +2607,7 @@ EXPORT_SYMBOL(nobh_write_end); * that it tries to operate without attaching bufferheads to * the page. */ -int nobh_writepage(struct page *page, get_block_t *get_block, - struct writeback_control *wbc) +int nobh_writepage(struct page *page, struct writeback_control *wbc) { struct inode * const inode = page->mapping->host; loff_t i_size = i_size_read(inode); @@ -2649,15 +2645,14 @@ int nobh_writepage(struct page *page, ge */ zero_user_segment(page, offset, PAGE_CACHE_SIZE); out: - ret = mpage_writepage(page, get_block, wbc); + ret = mpage_writepage(page, wbc); if (ret == -EAGAIN) - ret = __block_write_full_page(inode, page, get_block, wbc); + ret = __block_write_full_page(inode, page, wbc); return ret; } EXPORT_SYMBOL(nobh_writepage); -int nobh_truncate_page(struct address_space *mapping, - loff_t from, get_block_t *get_block) +int nobh_truncate_page(struct address_space *mapping, loff_t from) { pgoff_t index = from >> PAGE_CACHE_SHIFT; unsigned offset = from & (PAGE_CACHE_SIZE-1); @@ -2688,7 +2683,7 @@ int nobh_truncate_page(struct address_sp has_buffers: unlock_page(page); page_cache_release(page); - return block_truncate_page(mapping, from, get_block); + return block_truncate_page(mapping, from); } /* Find the buffer that contains "offset" */ @@ -2698,7 +2693,7 @@ has_buffers: pos += blocksize; } - err = get_block(inode, iblock, &map_bh, 0); + err = inode->i_op->get_block(inode, iblock, &map_bh, 0); if (err) goto unlock; /* unmapped? It's a hole - nothing to do */ @@ -2732,8 +2727,7 @@ out: } EXPORT_SYMBOL(nobh_truncate_page); -int block_truncate_page(struct address_space *mapping, - loff_t from, get_block_t *get_block) +int block_truncate_page(struct address_space *mapping, loff_t from) { pgoff_t index = from >> PAGE_CACHE_SHIFT; unsigned offset = from & (PAGE_CACHE_SIZE-1); @@ -2775,7 +2769,7 @@ int block_truncate_page(struct address_s err = 0; if (!buffer_mapped(bh)) { WARN_ON(bh->b_size != blocksize); - err = get_block(inode, iblock, bh, 0); + err = inode->i_op->get_block(inode, iblock, bh, 0); if (err) goto unlock; /* unmapped? It's a hole - nothing to do */ @@ -2810,8 +2804,7 @@ out: /* * The generic ->writepage function for buffer-backed address_spaces */ -int block_write_full_page(struct page *page, get_block_t *get_block, - struct writeback_control *wbc) +int block_write_full_page(struct page *page, struct writeback_control *wbc) { struct inode * const inode = page->mapping->host; loff_t i_size = i_size_read(inode); @@ -2820,7 +2813,7 @@ int block_write_full_page(struct page *p /* Is the page fully inside i_size? */ if (page->index < end_index) - return __block_write_full_page(inode, page, get_block, wbc); + return __block_write_full_page(inode, page, wbc); /* Is the page fully outside i_size? (truncate in progress) */ offset = i_size & (PAGE_CACHE_SIZE-1); @@ -2843,18 +2836,17 @@ int block_write_full_page(struct page *p * writes to that region are not written out to the file." */ zero_user_segment(page, offset, PAGE_CACHE_SIZE); - return __block_write_full_page(inode, page, get_block, wbc); + return __block_write_full_page(inode, page, wbc); } -sector_t generic_block_bmap(struct address_space *mapping, sector_t block, - get_block_t *get_block) +sector_t generic_block_bmap(struct address_space *mapping, sector_t block) { struct buffer_head tmp; struct inode *inode = mapping->host; tmp.b_state = 0; tmp.b_blocknr = 0; tmp.b_size = 1 << inode->i_blkbits; - get_block(inode, block, &tmp, 0); + inode->i_op->get_block(inode, block, &tmp, 0); return tmp.b_blocknr; } Index: linux-2.6/fs/direct-io.c =================================================================== --- linux-2.6.orig/fs/direct-io.c +++ linux-2.6/fs/direct-io.c @@ -87,7 +87,6 @@ struct dio { unsigned first_block_in_page; /* doesn't change, Used only once */ int boundary; /* prev block is at a boundary */ int reap_counter; /* rate limit reaping */ - get_block_t *get_block; /* block mapping function */ dio_iodone_t *end_io; /* IO completion function */ sector_t final_block_in_bio; /* current final block in bio + 1 */ sector_t next_block_for_io; /* next block to be put under IO, @@ -538,8 +537,8 @@ static int get_more_blocks(struct dio *d * at a higher level for inside-i_size block-instantiating * writes. */ - ret = (*dio->get_block)(dio->inode, fs_startblk, - map_bh, create); + ret = dio->inode->i_op->get_block(dio->inode, fs_startblk, + map_bh, create); } return ret; } @@ -939,8 +938,7 @@ out: static ssize_t direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, const struct iovec *iov, loff_t offset, unsigned long nr_segs, - unsigned blkbits, get_block_t get_block, dio_iodone_t end_io, - struct dio *dio) + unsigned blkbits, dio_iodone_t end_io, struct dio *dio) { unsigned long user_addr; unsigned long flags; @@ -955,7 +953,6 @@ direct_io_worker(int rw, struct kiocb *i dio->blkfactor = inode->i_blkbits - blkbits; dio->block_in_file = offset >> blkbits; - dio->get_block = get_block; dio->end_io = end_io; dio->final_block_in_bio = -1; dio->next_block_for_io = -1; @@ -1116,8 +1113,7 @@ direct_io_worker(int rw, struct kiocb *i ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, struct block_device *bdev, const struct iovec *iov, loff_t offset, - unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, - int dio_lock_type) + unsigned long nr_segs, dio_iodone_t end_io, int dio_lock_type) { int seg; size_t size; @@ -1213,7 +1209,7 @@ __blockdev_direct_IO(int rw, struct kioc (end > i_size_read(inode))); retval = direct_io_worker(rw, iocb, inode, iov, offset, - nr_segs, blkbits, get_block, end_io, dio); + nr_segs, blkbits, end_io, dio); if (rw == READ && dio_lock_type == DIO_LOCKING) release_i_mutex = 0; Index: linux-2.6/fs/mpage.c =================================================================== --- linux-2.6.orig/fs/mpage.c +++ linux-2.6/fs/mpage.c @@ -167,7 +167,7 @@ map_buffer_to_page(struct page *page, st static struct bio * do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages, sector_t *last_block_in_bio, struct buffer_head *map_bh, - unsigned long *first_logical_block, get_block_t get_block) + unsigned long *first_logical_block) { struct inode *inode = page->mapping->host; const unsigned blkbits = inode->i_blkbits; @@ -229,7 +229,8 @@ do_mpage_readpage(struct bio *bio, struc if (block_in_file < last_block) { map_bh->b_size = (last_block-block_in_file) << blkbits; - if (get_block(inode, block_in_file, map_bh, 0)) + if (inode->i_op->get_block(inode, block_in_file, + map_bh, 0)) goto confused; *first_logical_block = block_in_file; } @@ -318,7 +319,7 @@ confused: if (bio) bio = mpage_bio_submit(READ, bio); if (!PageUptodate(page)) - block_read_full_page(page, get_block); + block_read_full_page(page); else unlock_page(page); goto out; @@ -332,7 +333,6 @@ confused: * The page at @pages->prev has the lowest file offset, and reads should be * issued in @pages->prev to @pages->next order. * @nr_pages: The number of pages at *@pages - * @get_block: The filesystem's block mapper function. * * This function walks the pages and the blocks within each page, building and * emitting large BIOs. @@ -369,7 +369,7 @@ confused: */ int mpage_readpages(struct address_space *mapping, struct list_head *pages, - unsigned nr_pages, get_block_t get_block) + unsigned nr_pages) { struct bio *bio = NULL; unsigned page_idx; @@ -388,8 +388,7 @@ mpage_readpages(struct address_space *ma bio = do_mpage_readpage(bio, page, nr_pages - page_idx, &last_block_in_bio, &map_bh, - &first_logical_block, - get_block); + &first_logical_block); } page_cache_release(page); } @@ -403,7 +402,7 @@ EXPORT_SYMBOL(mpage_readpages); /* * This isn't called much at all */ -int mpage_readpage(struct page *page, get_block_t get_block) +int mpage_readpage(struct page *page) { struct bio *bio = NULL; sector_t last_block_in_bio = 0; @@ -412,7 +411,7 @@ int mpage_readpage(struct page *page, ge clear_buffer_mapped(&map_bh); bio = do_mpage_readpage(bio, page, 1, &last_block_in_bio, - &map_bh, &first_logical_block, get_block); + &map_bh, &first_logical_block); if (bio) mpage_bio_submit(READ, bio); return 0; @@ -438,7 +437,6 @@ EXPORT_SYMBOL(mpage_readpage); struct mpage_data { struct bio *bio; sector_t last_block_in_bio; - get_block_t *get_block; unsigned use_writepage; }; @@ -527,7 +525,7 @@ static int __mpage_writepage(struct page map_bh.b_state = 0; map_bh.b_size = 1 << blkbits; - if (mpd->get_block(inode, block_in_file, &map_bh, 1)) + if (inode->i_op->get_block(inode, block_in_file, &map_bh, 1)) goto confused; if (buffer_new(&map_bh)) unmap_underlying_metadata(map_bh.b_bdev, @@ -656,8 +654,7 @@ out: * mpage_writepages - walk the list of dirty pages of the given address space & writepage() all of them * @mapping: address space structure to write * @wbc: subtract the number of written pages from *@wbc->nr_to_write - * @get_block: the filesystem's block mapper function. - * If this is NULL then use a_ops->writepage. Otherwise, go + * @use_writepages: If this is 1 then use a_ops->writepage. Otherwise, go * direct-to-BIO. * * This is a library function, which implements the writepages() @@ -673,17 +670,16 @@ out: */ int mpage_writepages(struct address_space *mapping, - struct writeback_control *wbc, get_block_t get_block) + struct writeback_control *wbc, int use_writepages) { int ret; - if (!get_block) + if (use_writepages) ret = generic_writepages(mapping, wbc); else { struct mpage_data mpd = { .bio = NULL, .last_block_in_bio = 0, - .get_block = get_block, .use_writepage = 1, }; @@ -695,13 +691,11 @@ mpage_writepages(struct address_space *m } EXPORT_SYMBOL(mpage_writepages); -int mpage_writepage(struct page *page, get_block_t get_block, - struct writeback_control *wbc) +int mpage_writepage(struct page *page, struct writeback_control *wbc) { struct mpage_data mpd = { .bio = NULL, .last_block_in_bio = 0, - .get_block = get_block, .use_writepage = 0, }; int ret = __mpage_writepage(page, wbc, &mpd); Index: linux-2.6/include/linux/buffer_head.h =================================================================== --- linux-2.6.orig/include/linux/buffer_head.h +++ linux-2.6/include/linux/buffer_head.h @@ -53,7 +53,7 @@ typedef void (bh_end_io_t)(struct buffer * within a page, and of course as the unit of I/O through the * filesystem and block layers. Nowadays the basic I/O unit * is the bio, and buffer_heads are used for extracting block - * mappings (via a get_block_t call), for tracking state within + * mappings (via a get_block call), for tracking state within * a page (via a page_mapping) and for wrapping bio submission * for backward compatibility reasons (e.g. submit_bh). */ @@ -202,12 +202,11 @@ extern int buffer_heads_over_limit; * address_spaces. */ void block_invalidatepage(struct page *page, unsigned long offset); -int block_write_full_page(struct page *page, get_block_t *get_block, - struct writeback_control *wbc); -int block_read_full_page(struct page*, get_block_t*); +int block_write_full_page(struct page *page, struct writeback_control *wbc); +int block_read_full_page(struct page*); int block_write_begin(struct file *, struct address_space *, loff_t, unsigned, unsigned, - struct page **, void **, get_block_t*); + struct page **, void **); int block_write_end(struct file *, struct address_space *, loff_t, unsigned, unsigned, struct page *, void *); @@ -215,28 +214,25 @@ int generic_write_end(struct file *, str loff_t, unsigned, unsigned, struct page *, void *); void page_zero_new_buffers(struct page *page, unsigned from, unsigned to); -int block_prepare_write(struct page*, unsigned, unsigned, get_block_t*); +int block_prepare_write(struct page*, unsigned, unsigned); int cont_write_begin(struct file *, struct address_space *, loff_t, - unsigned, unsigned, struct page **, void **, - get_block_t *, loff_t *); + unsigned, unsigned, struct page **, void **, loff_t *); int generic_cont_expand_simple(struct inode *inode, loff_t size); int block_commit_write(struct page *page, unsigned from, unsigned to); -int block_page_mkwrite(struct vm_area_struct *vma, struct page *page, - get_block_t get_block); +int block_page_mkwrite(struct vm_area_struct *vma, struct page *page); void block_sync_page(struct page *); -sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *); +sector_t generic_block_bmap(struct address_space *, sector_t); int generic_commit_write(struct file *, struct page *, unsigned, unsigned); -int block_truncate_page(struct address_space *, loff_t, get_block_t *); +int block_truncate_page(struct address_space *, loff_t); int file_fsync(struct file *, struct dentry *, int); int nobh_write_begin(struct file *, struct address_space *, loff_t, unsigned, unsigned, - struct page **, void **, get_block_t*); + struct page **, void **); int nobh_write_end(struct file *, struct address_space *, loff_t, unsigned, unsigned, struct page *, void *); -int nobh_truncate_page(struct address_space *, loff_t, get_block_t *); -int nobh_writepage(struct page *page, get_block_t *get_block, - struct writeback_control *wbc); +int nobh_truncate_page(struct address_space *, loff_t); +int nobh_writepage(struct page *page, struct writeback_control *wbc); void buffer_init(void); Index: linux-2.6/include/linux/fs.h =================================================================== --- linux-2.6.orig/include/linux/fs.h +++ linux-2.6/include/linux/fs.h @@ -1272,6 +1272,8 @@ struct inode_operations { void (*truncate_range)(struct inode *, loff_t, loff_t); long (*fallocate)(struct inode *inode, int mode, loff_t offset, loff_t len); + int (*get_block)(struct inode *inode, sector_t iblock, + struct buffer_head *bh_result, int create); }; struct seq_file; @@ -1903,8 +1905,7 @@ static inline int xip_truncate_page(stru #ifdef CONFIG_BLOCK ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, struct block_device *bdev, const struct iovec *iov, loff_t offset, - unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, - int lock_type); + unsigned long nr_segs, dio_iodone_t end_io, int lock_type); enum { DIO_LOCKING = 1, /* need locking between buffered and direct access */ @@ -1914,29 +1915,26 @@ enum { static inline ssize_t blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, struct block_device *bdev, const struct iovec *iov, - loff_t offset, unsigned long nr_segs, get_block_t get_block, - dio_iodone_t end_io) + loff_t offset, unsigned long nr_segs, dio_iodone_t end_io) { return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, - nr_segs, get_block, end_io, DIO_LOCKING); + nr_segs, end_io, DIO_LOCKING); } static inline ssize_t blockdev_direct_IO_no_locking(int rw, struct kiocb *iocb, struct inode *inode, struct block_device *bdev, const struct iovec *iov, - loff_t offset, unsigned long nr_segs, get_block_t get_block, - dio_iodone_t end_io) + loff_t offset, unsigned long nr_segs, dio_iodone_t end_io) { return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, - nr_segs, get_block, end_io, DIO_NO_LOCKING); + nr_segs, end_io, DIO_NO_LOCKING); } static inline ssize_t blockdev_direct_IO_own_locking(int rw, struct kiocb *iocb, struct inode *inode, struct block_device *bdev, const struct iovec *iov, - loff_t offset, unsigned long nr_segs, get_block_t get_block, - dio_iodone_t end_io) + loff_t offset, unsigned long nr_segs, dio_iodone_t end_io) { return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, - nr_segs, get_block, end_io, DIO_OWN_LOCKING); + nr_segs, end_io, DIO_OWN_LOCKING); } #endif Index: linux-2.6/include/linux/mpage.h =================================================================== --- linux-2.6.orig/include/linux/mpage.h +++ linux-2.6/include/linux/mpage.h @@ -14,11 +14,10 @@ struct writeback_control; int mpage_readpages(struct address_space *mapping, struct list_head *pages, - unsigned nr_pages, get_block_t get_block); -int mpage_readpage(struct page *page, get_block_t get_block); + unsigned nr_pages); +int mpage_readpage(struct page *page); int mpage_writepages(struct address_space *mapping, - struct writeback_control *wbc, get_block_t get_block); -int mpage_writepage(struct page *page, get_block_t *get_block, - struct writeback_control *wbc); + struct writeback_control *wbc, int use_writepages); +int mpage_writepage(struct page *page, struct writeback_control *wbc); #endif -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html