Just wondering if we should add an fsync_range syscall like AIX and some BSDs have? It's pretty simple for the pagecache since it already implements the full sync with range syncs anyway. For filesystems and user programs, I imagine it is a bit easier to convert to fsync_range from fsync rather than use the sync_file_range syscall. Having a flags argument is nice, but AIX seems to use O_SYNC as a flag, I wonder if we should follow? Patch isn't complete... --- fs/sync.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 50 insertions(+), 4 deletions(-) Index: linux-2.6/fs/sync.c =================================================================== --- linux-2.6.orig/fs/sync.c +++ linux-2.6/fs/sync.c @@ -76,10 +76,12 @@ int file_fsync(struct file *filp, struct } /** - * vfs_fsync - perform a fsync or fdatasync on a file + * vfs_fsync_range - perform a fsync or fdatasync on part of a file * @file: file to sync * @dentry: dentry of @file * @data: only perform a fdatasync operation + * @start: first byte to be synced + * @end: last byte to be synced * * Write back data and metadata for @file to disk. If @datasync is * set only metadata needed to access modified file data is written. @@ -88,7 +90,8 @@ int file_fsync(struct file *filp, struct * only @dentry is set. This can only happen when the filesystem * implements the export_operations API. */ -int vfs_fsync(struct file *file, struct dentry *dentry, int datasync) +int vfs_fsync_range(struct file *file, struct dentry *dentry, int datasync, + loff_t start, loff_t end) { const struct file_operations *fop; struct address_space *mapping; @@ -112,7 +115,7 @@ int vfs_fsync(struct file *file, struct goto out; } - ret = filemap_fdatawrite(mapping); + ret = filemap_fdatawrite_range(mapping, start, end); /* * We need to protect against concurrent writers, which could cause @@ -123,12 +126,32 @@ int vfs_fsync(struct file *file, struct if (!ret) ret = err; mutex_unlock(&mapping->host->i_mutex); - err = filemap_fdatawait(mapping); + err = wait_on_page_writeback_range(mapping, + start >> PAGE_CACHE_SHIFT, end >> PAGE_CACHE_SHIFT); if (!ret) ret = err; out: return ret; } +EXPORT_SYMBOL(vfs_fsync_range); + +/** + * vfs_fsync - perform a fsync or fdatasync on a file + * @file: file to sync + * @dentry: dentry of @file + * @data: only perform a fdatasync operation + * + * Write back data and metadata for @file to disk. If @datasync is + * set only metadata needed to access modified file data is written. + * + * In case this function is called from nfsd @file may be %NULL and + * only @dentry is set. This can only happen when the filesystem + * implements the export_operations API. + */ +int vfs_fsync(struct file *file, struct dentry *dentry, int datasync) +{ + return vfs_fsync_range(file, dentry, datasync, 0, LLONG_MAX); +} EXPORT_SYMBOL(vfs_fsync); static int do_fsync(unsigned int fd, int datasync) @@ -154,6 +177,29 @@ SYSCALL_DEFINE1(fdatasync, unsigned int, return do_fsync(fd, 1); } +SYSCALL_DEFINE(fsync_range)(int fd, int how, loff_t start, loff_t length) +{ + struct file *file; + loff_t end; + int ret = -EBADF; + + if (how != O_DSYNC && how != O_SYNC) + return -EINVAL; + + if (length == 0) + end = LLONG_MAX; + else + end = start + length - 1; + + file = fget(fd); + if (file) { + ret = vfs_fsync_range(file, file->f_path.dentry, how == O_DSYNC, + start, end); + fput(file); + } + return ret; +} + /* * sys_sync_file_range() permits finely controlled syncing over a segment of * a file in the range offset .. (offset+nbytes-1) inclusive. If nbytes is -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html