From: Boaz Harrosh <boazh@xxxxxxxxxx> Add more file operation. Some are calling stubs in other files Signed-off-by: Boaz Harrosh <boazh@xxxxxxxxxx> --- fs/zuf/_extern.h | 4 + fs/zuf/file.c | 429 ++++++++++++++++++++++++++++++++++++++++++++++ fs/zuf/rw.c | 12 ++ fs/zuf/zuf-core.c | 4 + fs/zuf/zus_api.h | 45 +++++ 5 files changed, 494 insertions(+) diff --git a/fs/zuf/_extern.h b/fs/zuf/_extern.h index 32a381ac4bd7..391484b0e125 100644 --- a/fs/zuf/_extern.h +++ b/fs/zuf/_extern.h @@ -48,6 +48,10 @@ uint zuf_prepare_symname(struct zufs_ioc_new_inode *ioc_new_inode, /* rw.c */ +ssize_t zuf_rw_read_iter(struct super_block *sb, struct inode *inode, + struct kiocb *kiocb, struct iov_iter *ii); +ssize_t zuf_rw_write_iter(struct super_block *sb, struct inode *inode, + struct kiocb *kiocb, struct iov_iter *ii); int zuf_trim_edge(struct inode *inode, ulong filepos, uint len); /* super.c */ diff --git a/fs/zuf/file.c b/fs/zuf/file.c index c6c8ca71e957..0e62145e923a 100644 --- a/fs/zuf/file.c +++ b/fs/zuf/file.c @@ -13,14 +13,443 @@ * Sagi Manole <sagim@xxxxxxxxxx>" */ +#include <linux/fs.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/uio.h> +#include <linux/mm.h> +#include <linux/uaccess.h> +#include <linux/falloc.h> +#include <linux/mman.h> +#include <linux/fadvise.h> +#include <linux/delay.h> #include "zuf.h" +static long zuf_fallocate(struct file *file, int mode, loff_t offset, + loff_t len) +{ + struct inode *inode = file->f_path.dentry->d_inode; + struct zuf_inode_info *zii = ZUII(inode); + struct zufs_ioc_range ioc_range = { + .hdr.in_len = sizeof(ioc_range), + .hdr.operation = ZUFS_OP_FALLOCATE, + .zus_ii = ZUII(inode)->zus_ii, + .offset = offset, + .length = len, + .opflags = mode, + }; + enum {FALLOC_RETRY = 7}; + int retry = 0; + int err = 0; + + zuf_dbg_vfs("[%ld] mode=0x%x offset=0x%llx len=0x%llx\n", + inode->i_ino, mode, offset, len); + + if (!S_ISREG(inode->i_mode)) + return -EINVAL; + + zuf_w_lock(zii); + + if (!(mode & FALLOC_FL_KEEP_SIZE) && + (i_size_read(inode) < offset + len)) { + err = inode_newsize_ok(inode, offset + len); + if (unlikely(err)) + goto out; + } + + zus_inode_cmtime_now(inode, zii->zi); + + if (mode & (FALLOC_FL_ZERO_RANGE | FALLOC_FL_PUNCH_HOLE)) { + /* ASSUMING FS supports these two */ + struct super_block *sb = inode->i_sb; + ulong off1 = offset & (sb->s_blocksize - 1); + ulong off2 = (offset + len) & (sb->s_blocksize - 1); + + if (md_o2p(offset) == md_o2p(offset + len)) { + /* Same block. Just nullify the range and goto out */ + err = zuf_trim_edge(inode, offset, off2 - off1); + goto out_update; + } + if (off1) { + uint l = sb->s_blocksize - off1; + + err = zuf_trim_edge(inode, offset, l); + if (unlikely(err)) + goto out; + if (mode & FALLOC_FL_ZERO_RANGE) { + ioc_range.offset += l; + ioc_range.length -= l; + } + } + if (off2) { + err = zuf_trim_edge(inode, (offset + len) - off2, off2); + if (unlikely(err)) + goto out; + if (mode & FALLOC_FL_ZERO_RANGE) + ioc_range.length -= off2; + } + } + + /* no length remains, but size might have changed in trim_edge */ + if (!ioc_range.length) + goto out_update; + +again: + err = zufc_dispatch(ZUF_ROOT(SBI(inode->i_sb)), &ioc_range.hdr, + NULL, 0); + if (unlikely(err)) { + if (err == -EZUFS_RETRY) { + if (FALLOC_RETRY < retry++) { + zuf_dbg_err("[%ld] retry=%d\n", + inode->i_ino, retry); + msleep(retry - FALLOC_RETRY); + } + goto again; + } + zuf_dbg_err("[%ld] zufc_dispatch failed => %d\n", + inode->i_ino, err); + } + +out_update: + i_size_write(inode, le64_to_cpu(zii->zi->i_size)); + inode->i_blocks = le64_to_cpu(zii->zi->i_blocks); + +out: + zuf_w_unlock(zii); + + return err; +} + +static loff_t zuf_llseek(struct file *file, loff_t offset, int whence) +{ + struct inode *inode = file->f_path.dentry->d_inode; + struct zuf_inode_info *zii = ZUII(inode); + struct zufs_ioc_seek ioc_seek = { + .hdr.in_len = sizeof(ioc_seek), + .hdr.out_len = sizeof(ioc_seek), + .hdr.operation = ZUFS_OP_LLSEEK, + .zus_ii = zii->zus_ii, + .offset_in = offset, + .whence = whence, + }; + int err = 0; + + zuf_dbg_vfs("[%ld] offset=0x%llx whence=%d\n", + inode->i_ino, offset, whence); + + if (whence != SEEK_DATA && whence != SEEK_HOLE) + return generic_file_llseek(file, offset, whence); + + zuf_r_lock(zii); + + if ((offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET)) || + offset > inode->i_sb->s_maxbytes) { + err = -EINVAL; + goto out; + } else if (inode->i_size <= offset) { + err = -ENXIO; + goto out; + } else if (!inode->i_blocks) { + if (whence == SEEK_HOLE) + ioc_seek.offset_out = i_size_read(inode); + else + err = -ENXIO; + goto out; + } + + err = zufc_dispatch(ZUF_ROOT(SBI(inode->i_sb)), &ioc_seek.hdr, NULL, 0); + if (unlikely(err)) { + zuf_dbg_err("zufc_dispatch failed => %d\n", err); + goto out; + } + + if (ioc_seek.offset_out != file->f_pos) { + file->f_pos = ioc_seek.offset_out; + file->f_version = 0; + } + +out: + zuf_r_unlock(zii); + + return err ?: ioc_seek.offset_out; +} + +/* This callback is called when a file is closed */ +static int zuf_flush(struct file *file, fl_owner_t id) +{ + zuf_dbg_vfs("[%ld]\n", file->f_inode->i_ino); + + return 0; +} + +static int tozu_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, + u64 offset, u64 len) +{ + int err = -EOPNOTSUPP; + ulong start_index = md_o2p(offset); + ulong end_index = md_o2p_up(offset + len); + struct zuf_inode_info *zii = ZUII(inode); + + zuf_dbg_vfs( + "[%ld] offset=0x%llx len=0x%llx i-start=0x%lx i-end=0x%lx\n", + inode->i_ino, offset, len, start_index, end_index); + + if (fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC)) + return -EBADR; + + zuf_r_lock(zii); + + /* TODO: ZUS fiemap (&msi)*/ + + zuf_r_unlock(zii); + return err; +} + +static void _lock_two_ziis(struct zuf_inode_info *zii1, + struct zuf_inode_info *zii2) +{ + if (zii1 > zii2) + swap(zii2, zii2); + + zuf_w_lock(zii1); + if (zii1 != zii2) + zuf_w_lock_nested(zii2); +} + +static void _unlock_two_ziis(struct zuf_inode_info *zii1, + struct zuf_inode_info *zii2) +{ + if (zii1 > zii2) + swap(zii2, zii2); + + if (zii1 != zii2) + zuf_w_unlock(zii2); + zuf_w_unlock(zii1); +} + +static int _clone_file_range(struct inode *src_inode, loff_t pos_in, + struct inode *dst_inode, loff_t pos_out, + u64 len, u64 len_up, int operation) +{ + struct zuf_inode_info *src_zii = ZUII(src_inode); + struct zuf_inode_info *dst_zii = ZUII(dst_inode); + struct zus_inode *dst_zi = dst_zii->zi; + struct super_block *sb = src_inode->i_sb; + struct zufs_ioc_clone ioc_clone = { + .hdr.in_len = sizeof(ioc_clone), + .hdr.out_len = sizeof(ioc_clone), + .hdr.operation = operation, + .src_zus_ii = src_zii->zus_ii, + .dst_zus_ii = dst_zii->zus_ii, + .pos_in = pos_in, + .pos_out = pos_out, + .len = len, + .len_up = len_up, + }; + int err; + + _lock_two_ziis(src_zii, dst_zii); + + /* NOTE: len==0 means to-end-of-file which is what we want */ + unmap_mapping_range(src_inode->i_mapping, pos_in, len, 0); + unmap_mapping_range(dst_inode->i_mapping, pos_out, len, 0); + + zus_inode_cmtime_now(dst_inode, dst_zi); + err = zufc_dispatch(ZUF_ROOT(SBI(sb)), &ioc_clone.hdr, NULL, 0); + if (unlikely(err && err != -EINTR)) { + zuf_err("failed to clone %ld -> %ld ; err=%d\n", + src_inode->i_ino, dst_inode->i_ino, err); + goto out; + } + + dst_inode->i_blocks = le64_to_cpu(dst_zi->i_blocks); + i_size_write(dst_inode, dst_zi->i_size); + +out: + _unlock_two_ziis(src_zii, dst_zii); + + return err; +} + +static loff_t zuf_clone_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + loff_t len, uint remap_flags) +{ + struct inode *src_inode = file_inode(file_in); + struct inode *dst_inode = file_inode(file_out); + ulong src_size = i_size_read(src_inode); + ulong dst_size = i_size_read(dst_inode); + struct super_block *sb = src_inode->i_sb; + ulong len_up = len; + int err; + + zuf_dbg_vfs( + "ino-in=%ld ino-out=%ld pos_in=0x%llx pos_out=0x%llx length=0x%llx\n", + src_inode->i_ino, dst_inode->i_ino, pos_in, pos_out, len); + + if (remap_flags & ~REMAP_FILE_ADVISORY) + return -EINVAL; + + if (src_inode == dst_inode) { + if (pos_in == pos_out) { + zuf_dbg_err("[%ld] Clone nothing!!\n", + src_inode->i_ino); + return 0; + } + if (pos_in < pos_out) { + if (pos_in + len > pos_out) { + zuf_dbg_err( + "[%ld] overlapping pos_in < pos_out?? => EINVAL\n", + src_inode->i_ino); + return -EINVAL; + } + } else { + if (pos_out + len > pos_in) { + zuf_dbg_err("[%ld] overlapping pos_out < pos_in?? => EINVAL\n", + src_inode->i_ino); + return -EINVAL; + } + } + } + + if ((pos_in & (sb->s_blocksize - 1)) || + (pos_out & (sb->s_blocksize - 1))) { + zuf_err("[%ld] Not aligned len=0x%llx pos_in=0x%llx " + "pos_out=0x%llx src-size=0x%llx dst-size=0x%llx\n", + src_inode->i_ino, len, pos_in, pos_out, + i_size_read(src_inode), i_size_read(dst_inode)); + return -EINVAL; + } + + /* STD says that len==0 means up to end of SRC */ + if (!len) + len_up = len = src_size - pos_in; + + if (!pos_in && !pos_out && (src_size <= pos_in + len) && + (dst_size <= src_size)) { + len_up = 0; + } else if (len & (sb->s_blocksize - 1)) { + /* un-aligned len, see if it is beyond EOF */ + if ((src_size > pos_in + len) || + (dst_size > pos_out + len)) { + zuf_err("[%ld] Not aligned len=0x%llx pos_in=0x%llx " + "pos_out=0x%llx src-size=0x%lx dst-size=0x%lx\n", + src_inode->i_ino, len, pos_in, pos_out, + src_size, dst_size); + return -EINVAL; + } + len_up = md_p2o(md_o2p_up(len)); + } + + err = _clone_file_range(src_inode, pos_in, dst_inode, pos_out, len, + len_up, ZUFS_OP_CLONE); + if (unlikely(err)) + zuf_err("_clone_file_range failed => %d\n", err); + + return err ? err : len; +} + +static ssize_t zuf_copy_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + size_t len, uint flags) +{ + struct inode *src_inode = file_inode(file_in); + struct inode *dst_inode = file_inode(file_out); + ssize_t ret; + + zuf_dbg_vfs("ino-in=%ld ino-out=%ld pos_in=0x%llx pos_out=0x%llx length=0x%lx\n", + src_inode->i_ino, dst_inode->i_ino, pos_in, pos_out, len); + + ret = zuf_clone_file_range(file_in, pos_in, file_out, pos_out, len, + REMAP_FILE_ADVISORY); + + return ret ?: len; +} + +/* ZUFS: + * make sure we clean up the resources consumed by zufs_init() + */ +static int zuf_file_release(struct inode *inode, struct file *filp) +{ + if (unlikely(filp->private_data)) + zuf_err("not yet\n"); + + return 0; +} + +static ssize_t zuf_read_iter(struct kiocb *kiocb, struct iov_iter *ii) +{ + struct inode *inode = file_inode(kiocb->ki_filp); + struct zuf_inode_info *zii = ZUII(inode); + ssize_t ret; + + zuf_dbg_vfs("[%ld] ppos=0x%llx len=0x%zx\n", + inode->i_ino, kiocb->ki_pos, iov_iter_count(ii)); + + file_accessed(kiocb->ki_filp); + + zuf_r_lock(zii); + + ret = zuf_rw_read_iter(inode->i_sb, inode, kiocb, ii); + + zuf_r_unlock(zii); + + zuf_dbg_vfs("[%ld] => 0x%lx\n", inode->i_ino, ret); + return ret; +} + +static ssize_t zuf_write_iter(struct kiocb *kiocb, struct iov_iter *ii) +{ + struct inode *inode = file_inode(kiocb->ki_filp); + struct zuf_inode_info *zii = ZUII(inode); + ssize_t ret; + + ret = generic_write_checks(kiocb, ii); + if (unlikely(ret < 0)) { + zuf_dbg_vfs("[%ld] generic_write_checks => 0x%lx\n", + inode->i_ino, ret); + return ret; + } + + zuf_r_lock(zii); + + ret = file_remove_privs(kiocb->ki_filp); + if (unlikely(ret < 0)) + goto out; + + zus_inode_cmtime_now(inode, zii->zi); + + ret = zuf_rw_write_iter(inode->i_sb, inode, kiocb, ii); + if (unlikely(ret < 0)) + goto out; + + if (i_size_read(inode) <= le64_to_cpu(zii->zi->i_size)) + i_size_write(inode, le64_to_cpu(zii->zi->i_size)); + + inode->i_blocks = le64_to_cpu(zii->zi->i_blocks); + +out: + zuf_r_unlock(zii); + + zuf_dbg_vfs("[%ld] => 0x%lx\n", inode->i_ino, ret); + return ret; +} + const struct file_operations zuf_file_operations = { + .llseek = zuf_llseek, + .read_iter = zuf_read_iter, + .write_iter = zuf_write_iter, .open = generic_file_open, + .flush = zuf_flush, + .release = zuf_file_release, + .fallocate = zuf_fallocate, + .copy_file_range = zuf_copy_file_range, + .remap_file_range = zuf_clone_file_range, }; const struct inode_operations zuf_file_inode_operations = { .setattr = zuf_setattr, .getattr = zuf_getattr, .update_time = zuf_update_time, + .fiemap = tozu_fiemap, }; diff --git a/fs/zuf/rw.c b/fs/zuf/rw.c index 1eb8453da564..335bfd256499 100644 --- a/fs/zuf/rw.c +++ b/fs/zuf/rw.c @@ -23,3 +23,15 @@ int zuf_trim_edge(struct inode *inode, ulong filepos, uint len) { return -EIO; } + +ssize_t zuf_rw_read_iter(struct super_block *sb, struct inode *inode, + struct kiocb *kiocb, struct iov_iter *ii) +{ + return -EIO; +} + +ssize_t zuf_rw_write_iter(struct super_block *sb, struct inode *inode, + struct kiocb *kiocb, struct iov_iter *ii) +{ + return -EIO; +} diff --git a/fs/zuf/zuf-core.c b/fs/zuf/zuf-core.c index 3a264e6475c4..96ffc6244daa 100644 --- a/fs/zuf/zuf-core.c +++ b/fs/zuf/zuf-core.c @@ -774,8 +774,12 @@ const char *zuf_op_name(enum e_zufs_operation op) CASE_ENUM_NAME(ZUFS_OP_REMOVE_DENTRY ); CASE_ENUM_NAME(ZUFS_OP_RENAME ); CASE_ENUM_NAME(ZUFS_OP_READDIR ); + CASE_ENUM_NAME(ZUFS_OP_CLONE ); + CASE_ENUM_NAME(ZUFS_OP_COPY ); CASE_ENUM_NAME(ZUFS_OP_GET_SYMLINK ); CASE_ENUM_NAME(ZUFS_OP_SETATTR ); + CASE_ENUM_NAME(ZUFS_OP_FALLOCATE ); + CASE_ENUM_NAME(ZUFS_OP_LLSEEK ); CASE_ENUM_NAME(ZUFS_OP_BREAK ); default: return "UNKNOWN"; diff --git a/fs/zuf/zus_api.h b/fs/zuf/zus_api.h index 74f69a12a263..32e8c2cae518 100644 --- a/fs/zuf/zus_api.h +++ b/fs/zuf/zus_api.h @@ -337,9 +337,13 @@ enum e_zufs_operation { ZUFS_OP_REMOVE_DENTRY, ZUFS_OP_RENAME, ZUFS_OP_READDIR, + ZUFS_OP_CLONE, + ZUFS_OP_COPY, ZUFS_OP_GET_SYMLINK, ZUFS_OP_SETATTR, + ZUFS_OP_FALLOCATE, + ZUFS_OP_LLSEEK, ZUFS_OP_BREAK, /* Kernel telling Server to exit */ ZUFS_OP_MAX_OPT, @@ -528,6 +532,47 @@ struct zufs_ioc_attr { __u32 pad; }; +enum ZUFS_RANGE_FLAGS { + ZUFS_RF_DONTNEED = 0x00000001, +}; + +/* ZUFS_OP_ISYNC, ZUFS_OP_FALLOCATE */ +struct zufs_ioc_range { + struct zufs_ioc_hdr hdr; + /* IN */ + struct zus_inode_info *zus_ii; + __u64 offset, length; + __u32 opflags; + __u32 ioc_flags; + + /* OUT */ + __u64 write_unmapped; +}; + +/* ZUFS_OP_CLONE */ +struct zufs_ioc_clone { + struct zufs_ioc_hdr hdr; + /* IN */ + struct zus_inode_info *src_zus_ii; + struct zus_inode_info *dst_zus_ii; + __u64 pos_in, pos_out; + __u64 len; + __u64 len_up; +}; + +/* ZUFS_OP_LLSEEK */ +struct zufs_ioc_seek { + struct zufs_ioc_hdr hdr; + /* IN */ + struct zus_inode_info *zus_ii; + __u64 offset_in; + __u32 whence; + __u32 pad; + + /* OUT */ + __u64 offset_out; +}; + /* Allocate a special_file that will be a dual-port communication buffer with * user mode. * Server will access the buffer via the mmap of this file. -- 2.20.1