On Fri, Aug 20, 2021 at 04:44:26PM +0300, Nikolay Borisov wrote: > > > On 18.08.21 г. 0:06, Omar Sandoval wrote: > > From: Omar Sandoval <osandov@xxxxxx> > > > > The implementation resembles direct I/O: we have to flush any ordered > > extents, invalidate the page cache, and do the io tree/delalloc/extent > > map/ordered extent dance. From there, we can reuse the compression code > > with a minor modification to distinguish the write from writeback. This > > also creates inline extents when possible. > > > > Signed-off-by: Omar Sandoval <osandov@xxxxxx> > > <snip> > > > * Add an entry indicating a block group or device which is pinned by a > > diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c > > index 7a0a9c752624..13a0a65c6a43 100644 > > --- a/fs/btrfs/ioctl.c > > +++ b/fs/btrfs/ioctl.c > > @@ -103,6 +103,8 @@ struct btrfs_ioctl_encoded_io_args_32 { > > > > #define BTRFS_IOC_ENCODED_READ_32 _IOR(BTRFS_IOCTL_MAGIC, 64, \ > > struct btrfs_ioctl_encoded_io_args_32) > > +#define BTRFS_IOC_ENCODED_WRITE_32 _IOW(BTRFS_IOCTL_MAGIC, 64, \ > > + struct btrfs_ioctl_encoded_io_args_32) > > #endif > > > > /* Mask out flags that are inappropriate for the given type of inode. */ > > @@ -4992,6 +4994,102 @@ static int btrfs_ioctl_encoded_read(struct file *file, void __user *argp, > > return ret; > > } > > > > +static int btrfs_ioctl_encoded_write(struct file *file, void __user *argp, > > + bool compat) > > +{ > > + struct btrfs_ioctl_encoded_io_args args; > > + struct iovec iovstack[UIO_FASTIOV]; > > + struct iovec *iov = iovstack; > > + struct iov_iter iter; > > + loff_t pos; > > + struct kiocb kiocb; > > + ssize_t ret; > > + > > + if (!capable(CAP_SYS_ADMIN)) { > > + ret = -EPERM; > > + goto out_acct; > > + } > > + > > + if (!(file->f_mode & FMODE_WRITE)) { > > + ret = -EBADF; > > + goto out_acct; > > + } > > + > > + if (compat) { > > +#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT) > > + struct btrfs_ioctl_encoded_io_args_32 args32; > > + > > + if (copy_from_user(&args32, argp, sizeof(args32))) { > > + ret = -EFAULT; > > + goto out_acct; > > + } > > + args.iov = compat_ptr(args32.iov); > > + args.iovcnt = args.iovcnt; > > + memcpy(&args.offset, &args32.offset, > > + sizeof(args) - > > + offsetof(struct btrfs_ioctl_encoded_io_args, offset)); > > +#else > > + return -ENOTTY; > > +#endif > > + } else { > > + if (copy_from_user(&args, argp, sizeof(args))) { > > + ret = -EFAULT; > > + goto out_acct; > > + } > > + } > > + > > + ret = -EINVAL; > > + if (args.flags != 0) > > + goto out_acct; > > + if (memchr_inv(args.reserved, 0, sizeof(args.reserved))) > > + goto out_acct; > > + if (args.compression == BTRFS_ENCODED_IO_COMPRESSION_NONE && > > + args.encryption == BTRFS_ENCODED_IO_ENCRYPTION_NONE) > > Do you intend on supporting encrypted data writeout in the future, given > that in btrfs_do_encoded_write EINVAL is returned if the data to be > written is encrypted? If not then this check could be moved earlier to > fail fast. We probably want to support it at some point in the future, yes. > > @@ -5138,9 +5236,13 @@ long btrfs_ioctl(struct file *file, unsigned int > > return fsverity_ioctl_measure(file, argp); > > case BTRFS_IOC_ENCODED_READ: > > return btrfs_ioctl_encoded_read(file, argp, false); > > + case BTRFS_IOC_ENCODED_WRITE: > > + return btrfs_ioctl_encoded_write(file, argp, false); > > #if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT) > > case BTRFS_IOC_ENCODED_READ_32: > > return btrfs_ioctl_encoded_read(file, argp, true); > > + case BTRFS_IOC_ENCODED_WRITE_32: > > + return btrfs_ioctl_encoded_write(file, argp, true); > > #endif > > } > > > > diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c > > index 550c34fa0e6d..180f302dee93 100644 > > --- a/fs/btrfs/ordered-data.c > > +++ b/fs/btrfs/ordered-data.c > > @@ -521,9 +521,15 @@ void btrfs_remove_ordered_extent(struct btrfs_inode *btrfs_inode, > > spin_lock(&btrfs_inode->lock); > > btrfs_mod_outstanding_extents(btrfs_inode, -1); > > spin_unlock(&btrfs_inode->lock); > > - if (root != fs_info->tree_root) > > - btrfs_delalloc_release_metadata(btrfs_inode, entry->num_bytes, > > - false); > > + if (root != fs_info->tree_root) { > > + u64 release; > > + > > + if (test_bit(BTRFS_ORDERED_ENCODED, &entry->flags)) > > + release = entry->disk_num_bytes; > > + else > > + release = entry->num_bytes; > > + btrfs_delalloc_release_metadata(btrfs_inode, release, false); > > + } > > > > percpu_counter_add_batch(&fs_info->ordered_bytes, -entry->num_bytes, > > fs_info->delalloc_batch); > > diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h > > index 0feb0c29839e..04588ccad34c 100644 > > --- a/fs/btrfs/ordered-data.h > > +++ b/fs/btrfs/ordered-data.h > > @@ -74,6 +74,8 @@ enum { > > BTRFS_ORDERED_LOGGED_CSUM, > > /* We wait for this extent to complete in the current transaction */ > > BTRFS_ORDERED_PENDING, > > + /* RWF_ENCODED I/O */ > > nit: RWF_ENCODED is no longer, we simply have ioctl-based encoded io. So > this needs to be renamed to avoid confusion for people not necessarily > faimilar with the development history of the feature. Good catch, thanks.