Rework the generic block "cont" routines to handle the new aops. Supporting cont_prepare_write would take quite a lot of code to support, so remove it instead (and we later convert all filesystems to use it). write_begin gets passed AOP_FLAG_CONT_EXPAND when called from generic_cont_expand, so filesystems can avoid the old hacks they used. Cc: hirofumi@xxxxxxxxxxxxxxxxxx Cc: Linux Filesystems <linux-fsdevel@xxxxxxxxxxxxxxx> Signed-off-by: Nick Piggin <npiggin@xxxxxxx> fs/buffer.c | 204 +++++++++++++++++++++----------------------- include/linux/buffer_head.h | 5 - include/linux/fs.h | 1 mm/filemap.c | 5 + 4 files changed, 110 insertions(+), 105 deletions(-) Index: linux-2.6/fs/buffer.c =================================================================== --- linux-2.6.orig/fs/buffer.c +++ linux-2.6/fs/buffer.c @@ -2027,6 +2027,7 @@ int generic_write_end(struct file *file, loff_t pos, unsigned len, unsigned copied, struct page *page, void *fsdata) { + struct inode *inode = mapping->host; copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); unlock_page(page); @@ -2041,6 +2042,8 @@ int generic_write_end(struct file *file, i_size_write(inode, pos+copied); mark_inode_dirty(inode); } + + return copied; } EXPORT_SYMBOL(generic_write_end); @@ -2142,14 +2145,14 @@ int block_read_full_page(struct page *pa } /* utility function for filesystems that need to do work on expanding - * truncates. Uses prepare/commit_write to allow the filesystem to + * truncates. Uses filesystem pagecache writes to allow the filesystem to * deal with the hole. */ -static int __generic_cont_expand(struct inode *inode, loff_t size, - pgoff_t index, unsigned int offset) +int generic_cont_expand_simple(struct inode *inode, loff_t size) { struct address_space *mapping = inode->i_mapping; struct page *page; + void *fsdata; unsigned long limit; int err; @@ -2162,146 +2165,141 @@ static int __generic_cont_expand(struct if (size > inode->i_sb->s_maxbytes) goto out; - err = -ENOMEM; - page = grab_cache_page(mapping, index); - if (!page) - goto out; - err = mapping->a_ops->prepare_write(NULL, page, offset, offset); - if (err) { - /* - * ->prepare_write() may have instantiated a few blocks - * outside i_size. Trim these off again. - */ - unlock_page(page); - page_cache_release(page); - vmtruncate(inode, inode->i_size); + err = pagecache_write_begin(NULL, mapping, size, 0, + AOP_FLAG_UNINTERRUPTIBLE|AOP_FLAG_CONT_EXPAND, + &page, &fsdata); + if (err) goto out; - } - err = mapping->a_ops->commit_write(NULL, page, offset, offset); + err = pagecache_write_end(NULL, mapping, size, 0, 0, page, fsdata); + BUG_ON(err > 0); - unlock_page(page); - page_cache_release(page); - if (err > 0) - err = 0; out: return err; } int generic_cont_expand(struct inode *inode, loff_t size) { - pgoff_t index; unsigned int offset; offset = (size & (PAGE_CACHE_SIZE - 1)); /* Within page */ /* ugh. in prepare/commit_write, if from==to==start of block, we - ** skip the prepare. make sure we never send an offset for the start - ** of a block - */ + * skip the prepare. make sure we never send an offset for the start + * of a block. + * XXX: actually, this should be handled in those filesystems by + * checking for the AOP_FLAG_CONT_EXPAND flag. + */ if ((offset & (inode->i_sb->s_blocksize - 1)) == 0) { /* caller must handle this extra byte. */ - offset++; + size++; } - index = size >> PAGE_CACHE_SHIFT; - - return __generic_cont_expand(inode, size, index, offset); + return generic_cont_expand_simple(inode, size); } -int generic_cont_expand_simple(struct inode *inode, loff_t size) +int cont_expand_zero(struct file *file, struct address_space *mapping, + loff_t pos, loff_t *bytes) { - loff_t pos = size - 1; - pgoff_t index = pos >> PAGE_CACHE_SHIFT; - unsigned int offset = (pos & (PAGE_CACHE_SIZE - 1)) + 1; - - /* prepare/commit_write can handle even if from==to==start of block. */ - return __generic_cont_expand(inode, size, index, offset); -} - -/* - * For moronic filesystems that do not allow holes in file. - * We may have to extend the file. - */ - -int cont_prepare_write(struct page *page, unsigned offset, - unsigned to, get_block_t *get_block, loff_t *bytes) -{ - struct address_space *mapping = page->mapping; struct inode *inode = mapping->host; - struct page *new_page; - pgoff_t pgpos; - long status; - unsigned zerofrom; unsigned blocksize = 1 << inode->i_blkbits; + struct page *page; + void *fsdata; + pgoff_t index, curidx; + loff_t curpos; + unsigned zerofrom, offset, len; void *kaddr; + int err = 0; - while(page->index > (pgpos = *bytes>>PAGE_CACHE_SHIFT)) { - status = -ENOMEM; - new_page = grab_cache_page(mapping, pgpos); - if (!new_page) - goto out; - /* we might sleep */ - if (*bytes>>PAGE_CACHE_SHIFT != pgpos) { - unlock_page(new_page); - page_cache_release(new_page); - continue; - } - zerofrom = *bytes & ~PAGE_CACHE_MASK; + index = pos >> PAGE_CACHE_SHIFT; + offset = pos & ~PAGE_CACHE_MASK; + + while (index > (curidx = (curpos = *bytes)>>PAGE_CACHE_SHIFT)) { + zerofrom = curpos & ~PAGE_CACHE_MASK; if (zerofrom & (blocksize-1)) { *bytes |= (blocksize-1); (*bytes)++; } - status = __block_prepare_write(inode, new_page, zerofrom, - PAGE_CACHE_SIZE, get_block); - if (status) - goto out_unmap; - kaddr = kmap_atomic(new_page, KM_USER0); - memset(kaddr+zerofrom, 0, PAGE_CACHE_SIZE-zerofrom); - flush_dcache_page(new_page); + len = PAGE_CACHE_SIZE - zerofrom; + + err = pagecache_write_begin(file, mapping, curpos, len, + AOP_FLAG_UNINTERRUPTIBLE, + &page, &fsdata); + if (err) + goto out; + kaddr = kmap_atomic(page, KM_USER0); + memset(kaddr+zerofrom, 0, len); + flush_dcache_page(page); kunmap_atomic(kaddr, KM_USER0); - generic_commit_write(NULL, new_page, zerofrom, PAGE_CACHE_SIZE); - unlock_page(new_page); - page_cache_release(new_page); + err = pagecache_write_end(file, mapping, curpos, len, len, + page, fsdata); + if (err < 0) + goto out; + BUG_ON(err != len); + err = 0; } - if (page->index < pgpos) { - /* completely inside the area */ - zerofrom = offset; - } else { - /* page covers the boundary, find the boundary offset */ - zerofrom = *bytes & ~PAGE_CACHE_MASK; - + /* page covers the boundary, find the boundary offset */ + if (index == curidx) { + zerofrom = curpos & ~PAGE_CACHE_MASK; /* if we will expand the thing last block will be filled */ - if (to > zerofrom && (zerofrom & (blocksize-1))) { + if (offset <= zerofrom) { + goto out; + } + if (zerofrom & (blocksize-1)) { *bytes |= (blocksize-1); (*bytes)++; } + len = offset - zerofrom; - /* starting below the boundary? Nothing to zero out */ - if (offset <= zerofrom) - zerofrom = offset; - } - status = __block_prepare_write(inode, page, zerofrom, to, get_block); - if (status) - goto out1; - if (zerofrom < offset) { + err = pagecache_write_begin(file, mapping, curpos, len, + AOP_FLAG_UNINTERRUPTIBLE, + &page, &fsdata); + if (err) + goto out; kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr+zerofrom, 0, offset-zerofrom); + memset(kaddr+zerofrom, 0, len); flush_dcache_page(page); kunmap_atomic(kaddr, KM_USER0); - __block_commit_write(inode, page, zerofrom, offset); + err = pagecache_write_end(file, mapping, curpos, len, len, + page, fsdata); + if (err < 0) + goto out; + BUG_ON(err != len); + err = 0; } - return 0; -out1: - ClearPageUptodate(page); - return status; - -out_unmap: - ClearPageUptodate(new_page); - unlock_page(new_page); - page_cache_release(new_page); out: - return status; + return err; +} + +/* + * For moronic filesystems that do not allow holes in file. + * We may have to extend the file. + */ +int cont_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata, + get_block_t *get_block, loff_t *bytes) +{ + struct inode *inode = mapping->host; + unsigned blocksize = 1 << inode->i_blkbits; + unsigned zerofrom; + int err; + + err = cont_expand_zero(file, mapping, pos, bytes); + if (err) + goto out; + + zerofrom = *bytes & ~PAGE_CACHE_MASK; + if (pos+len > *bytes && zerofrom & (blocksize-1)) { + *bytes |= (blocksize-1); + (*bytes)++; + } + + *pagep = NULL; + err = block_write_begin(file, mapping, pos, len, + flags, pagep, fsdata, get_block); +out: + return err; } int block_prepare_write(struct page *page, unsigned from, unsigned to, @@ -3160,7 +3158,7 @@ EXPORT_SYMBOL(block_read_full_page); EXPORT_SYMBOL(block_sync_page); EXPORT_SYMBOL(block_truncate_page); EXPORT_SYMBOL(block_write_full_page); -EXPORT_SYMBOL(cont_prepare_write); +EXPORT_SYMBOL(cont_write_begin); EXPORT_SYMBOL(end_buffer_read_sync); EXPORT_SYMBOL(end_buffer_write_sync); EXPORT_SYMBOL(file_fsync); Index: linux-2.6/include/linux/buffer_head.h =================================================================== --- linux-2.6.orig/include/linux/buffer_head.h +++ linux-2.6/include/linux/buffer_head.h @@ -213,8 +213,9 @@ int generic_write_end(struct file *, str struct page *, void *); void page_zero_new_buffers(struct page *page, unsigned from, unsigned to); int block_prepare_write(struct page*, unsigned, unsigned, get_block_t*); -int cont_prepare_write(struct page*, unsigned, unsigned, get_block_t*, - loff_t *); +int cont_write_begin(struct file *, struct address_space *, loff_t, + unsigned, unsigned, struct page **, void **, + get_block_t *, loff_t *); int generic_cont_expand(struct inode *inode, loff_t size); int generic_cont_expand_simple(struct inode *inode, loff_t size); int block_commit_write(struct page *page, unsigned from, unsigned to); Index: linux-2.6/include/linux/fs.h =================================================================== --- linux-2.6.orig/include/linux/fs.h +++ linux-2.6/include/linux/fs.h @@ -392,6 +392,7 @@ enum positive_aop_returns { }; #define AOP_FLAG_UNINTERRUPTIBLE 0x0001 /* will not do a short write */ +#define AOP_FLAG_CONT_EXPAND 0x0002 /* called from cont_expand */ /* * oh the beauties of C type declarations. Index: linux-2.6/mm/filemap.c =================================================================== --- linux-2.6.orig/mm/filemap.c +++ linux-2.6/mm/filemap.c @@ -1789,6 +1789,7 @@ size_t iov_iter_copy_from_user_atomic(st return copied; } +EXPORT_SYMBOL(iov_iter_copy_from_user_atomic); /* * This has the same sideeffects and return value as @@ -1815,6 +1816,7 @@ size_t iov_iter_copy_from_user(struct pa kunmap(page); return copied; } +EXPORT_SYMBOL(iov_iter_copy_from_user); static void __iov_iter_advance_iov(struct iov_iter *i, size_t bytes) { @@ -1846,6 +1848,7 @@ void iov_iter_advance(struct iov_iter *i __iov_iter_advance_iov(i, bytes); i->count -= bytes; } +EXPORT_SYMBOL(iov_iter_advance); int iov_iter_fault_in_readable(struct iov_iter *i) { @@ -1853,6 +1856,7 @@ int iov_iter_fault_in_readable(struct io char __user *buf = i->iov->iov_base + i->iov_offset; return fault_in_pages_readable(buf, seglen); } +EXPORT_SYMBOL(iov_iter_fault_in_readable); /* * Return the count of just the current iov_iter segment. @@ -1865,6 +1869,7 @@ size_t iov_iter_single_seg_count(struct else return min(i->count, iov->iov_len - i->iov_offset); } +EXPORT_SYMBOL(iov_iter_single_seg_count); /* * Performs necessary checks before doing a write -- - To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html