This commit introduces the famfs file_operations. We call thp_get_unmapped_area() to force PMD page alignment. Our read and write handlers (famfs_dax_read_iter() and famfs_dax_write_iter()) call dax_iomap_rw() to do the work. famfs_file_invalid() checks for various ways a famfs file can be in an invalid state so we can fail I/O or fault resolution in those cases. Those cases include the following: * No famfs metadata * file i_size does not match the originally allocated size * file is not flagged as DAX * errors were detected previously on the file An invalid file can often be fixed by replaying the log, or by umount/mount/log replay - all of which are user space operations. Signed-off-by: John Groves <john@xxxxxxxxxx> --- fs/famfs/famfs_file.c | 136 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 136 insertions(+) diff --git a/fs/famfs/famfs_file.c b/fs/famfs/famfs_file.c index fc667d5f7be8..5228e9de1e3b 100644 --- a/fs/famfs/famfs_file.c +++ b/fs/famfs/famfs_file.c @@ -19,6 +19,142 @@ #include <uapi/linux/famfs_ioctl.h> #include "famfs_internal.h" +/********************************************************************* + * file_operations + */ + +/* Reject I/O to files that aren't in a valid state */ +static ssize_t +famfs_file_invalid(struct inode *inode) +{ + size_t i_size = i_size_read(inode); + struct famfs_file_meta *meta = inode->i_private; + + if (!meta) { + pr_err("%s: un-initialized famfs file\n", __func__); + return -EIO; + } + if (i_size != meta->file_size) { + pr_err("%s: something changed the size from %ld to %ld\n", + __func__, meta->file_size, i_size); + meta->error = 1; + return -ENXIO; + } + if (!IS_DAX(inode)) { + pr_err("%s: inode %llx IS_DAX is false\n", __func__, (u64)inode); + meta->error = 1; + return -ENXIO; + } + if (meta->error) { + pr_err("%s: previously detected metadata errors\n", __func__); + meta->error = 1; + return -EIO; + } + return 0; +} + +static ssize_t +famfs_dax_read_iter( + struct kiocb *iocb, + struct iov_iter *to) +{ + struct inode *inode = iocb->ki_filp->f_mapping->host; + size_t i_size = i_size_read(inode); + size_t count = iov_iter_count(to); + size_t max_count; + ssize_t rc; + + rc = famfs_file_invalid(inode); + if (rc) + return rc; + + max_count = max_t(size_t, 0, i_size - iocb->ki_pos); + + if (count > max_count) + iov_iter_truncate(to, max_count); + + if (!iov_iter_count(to)) + return 0; + + rc = dax_iomap_rw(iocb, to, &famfs_iomap_ops); + + file_accessed(iocb->ki_filp); + return rc; +} + +/** + * famfs_write_iter() + * + * We need our own write-iter in order to prevent append + */ +static ssize_t +famfs_dax_write_iter( + struct kiocb *iocb, + struct iov_iter *from) +{ + struct inode *inode = iocb->ki_filp->f_mapping->host; + size_t i_size = i_size_read(inode); + size_t count = iov_iter_count(from); + size_t max_count; + ssize_t rc; + + rc = famfs_file_invalid(inode); + if (rc) + return rc; + + /* Starting offset of write is: iocb->ki_pos + * length is iov_iter_count(from) + */ + max_count = max_t(size_t, 0, i_size - iocb->ki_pos); + + /* If write would go past EOF, truncate it to end at EOF since famfs does not + * alloc-on-write + */ + if (count > max_count) + iov_iter_truncate(from, max_count); + + if (!iov_iter_count(from)) + return 0; + + return dax_iomap_rw(iocb, from, &famfs_iomap_ops); +} + +static int +famfs_file_mmap( + struct file *file, + struct vm_area_struct *vma) +{ + struct inode *inode = file_inode(file); + ssize_t rc; + + rc = famfs_file_invalid(inode); + if (rc) + return (int)rc; + + file_accessed(file); + vma->vm_ops = &famfs_file_vm_ops; + vm_flags_set(vma, VM_HUGEPAGE); + return 0; +} + +const struct file_operations famfs_file_operations = { + .owner = THIS_MODULE, + + /* Custom famfs operations */ + .write_iter = famfs_dax_write_iter, + .read_iter = famfs_dax_read_iter, + .mmap = famfs_file_mmap, + + /* Force PMD alignment for mmap */ + .get_unmapped_area = thp_get_unmapped_area, + + /* Generic Operations */ + .fsync = noop_fsync, + .splice_read = filemap_splice_read, + .splice_write = iter_file_splice_write, + .llseek = generic_file_llseek, +}; + /********************************************************************* * iomap_operations * -- 2.43.0