Almost all read/write operation handles data with chunks(segments or pages) and result has integral behaviour for folowing scenario: for_each_chunk() { res = op(....); if(IS_ERROR(res)) return progress ? progress : res; progress += res; } prepare_write may has integral behaviour in case of blksize < pgsize, for example we successfully allocated/read some blocks, but not all of them, and than some error happend. Currently we eliminate this progress by doing vmtrunate() after prepare_has failed. It is good to have ability to signal about this progress. Interprete positive prepare_write() ret code as bytes num that fs ready to handle at this moment. I've ask SAW, he think it is sane. This size always less than PAGE_CACHE_SIZE so it less than AOP_TRUNCATED_PAGE too. BTH: This approach was used in OpenVZ 2.6.9 kernel in order to make FS with delayed allocation more correct, and its works well. I think not everybody will happy about this, but let's discuss all advantages and disadvantages of this change. Signed-off-by: Dmitriy Monakhov <dmonakhov@xxxxxxxxxx> -------------
diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 62632f5..b4f6eac 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -239,7 +239,11 @@ static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec, page_cache_release(page); continue; } - goto unlock; + if (ret > 0 && ret < PAGE_CACHE_SIZE) + /* prepare_write demands limit size of bytes. */ + size = min(size, (unsigned)ret); + else + goto unlock; } transfer_result = lo_do_transfer(lo, WRITE, page, offset, bvec->bv_page, bv_offs, size, IV); diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c index 1e5d2ba..b5eebe8 100644 --- a/fs/ecryptfs/mmap.c +++ b/fs/ecryptfs/mmap.c @@ -454,6 +454,17 @@ static int ecryptfs_write_inode_size_to_header(struct file *lower_file, } lower_a_ops = lower_inode->i_mapping->a_ops; rc = lower_a_ops->prepare_write(lower_file, header_page, 0, 8); + if (unlikely(rc > 0 && rc < PAGE_CACHE_SIZE)) { + /* + * prepare_write can handle less bytes whan we need. This is not likely + * to happend realy because usualy we need only one block. In order to + * preserve prepare/commit balanced invoke commit end fail. + */ + int ret; + ret = lower_a_ops->commit_write(lower_file, header_page, 0, rc); + rc = ret ? ret : -ENOSPC; + goto unlock; + } file_size = (u64)i_size_read(inode); ecryptfs_printk(KERN_DEBUG, "Writing size: [0x%.16x]\n", file_size); file_size = cpu_to_be64(file_size); @@ -462,6 +473,7 @@ static int ecryptfs_write_inode_size_to_header(struct file *lower_file, kunmap_atomic(header_virt, KM_USER0); flush_dcache_page(header_page); rc = lower_a_ops->commit_write(lower_file, header_page, 0, 8); +unlock: if (rc < 0) ecryptfs_printk(KERN_ERR, "Error commiting header page " "write\n"); diff --git a/fs/namei.c b/fs/namei.c index b305589..723db81 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2704,6 +2704,16 @@ retry: page_cache_release(page); goto retry; } + if (unlikely(err > 0 && err < PAGE_CACHE_SIZE)) { + /* + * prepare_write can handle less bytes whan we need. This is not likely + * to happend realy because usualy we need only one block. In order to + * preserve prepare/commit balanced invoke commit end fail. + */ + int ret; + ret = mapping->a_ops->commit_write(NULL, page, 0, err); + err = ret ? ret : -ENOSPC; + } if (err) goto fail_map; kaddr = kmap_atomic(page, KM_USER0); diff --git a/fs/splice.c b/fs/splice.c index 2fca6eb..d2b92bf 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -652,6 +652,17 @@ find_page: if (unlikely(ret)) { loff_t isize = i_size_read(mapping->host); + if (ret > 0 && ret < PAGE_CACHE_SIZE) { + /* + * prepare_write demands limit size of bytes. In order to + * preserve prepare/commit balanced invoke commit end fail. + * Initial i_size saved, so vmtruncate safely restore it later. + */ + int ret2; + ret2 = mapping->a_ops->commit_write(file, page, offset, + offset + ret); + ret = ret2 ? ret2 : -ENOSPC; + } if (ret != AOP_TRUNCATED_PAGE) unlock_page(page); page_cache_release(page); diff --git a/mm/filemap.c b/mm/filemap.c index 5fe315a..529eb9e 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2188,6 +2188,11 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, } status = a_ops->prepare_write(file, page, offset, offset+bytes); + if (unlikely(status > 0 && status < PAGE_CACHE_SIZE)) { + /* prepare_write demands limit size of bytes at this iteration.*/ + bytes = min(bytes, (size_t)status); + status = 0; + } if (unlikely(status)) { loff_t isize = i_size_read(inode);