On 18.08.21 г. 0:06, Omar Sandoval wrote: > From: Omar Sandoval <osandov@xxxxxx> > > The implementation resembles direct I/O: we have to flush any ordered > extents, invalidate the page cache, and do the io tree/delalloc/extent > map/ordered extent dance. From there, we can reuse the compression code > with a minor modification to distinguish the write from writeback. This > also creates inline extents when possible. > > Signed-off-by: Omar Sandoval <osandov@xxxxxx> <snip> > * Add an entry indicating a block group or device which is pinned by a > diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c > index 7a0a9c752624..13a0a65c6a43 100644 > --- a/fs/btrfs/ioctl.c > +++ b/fs/btrfs/ioctl.c > @@ -103,6 +103,8 @@ struct btrfs_ioctl_encoded_io_args_32 { > > #define BTRFS_IOC_ENCODED_READ_32 _IOR(BTRFS_IOCTL_MAGIC, 64, \ > struct btrfs_ioctl_encoded_io_args_32) > +#define BTRFS_IOC_ENCODED_WRITE_32 _IOW(BTRFS_IOCTL_MAGIC, 64, \ > + struct btrfs_ioctl_encoded_io_args_32) > #endif > > /* Mask out flags that are inappropriate for the given type of inode. */ > @@ -4992,6 +4994,102 @@ static int btrfs_ioctl_encoded_read(struct file *file, void __user *argp, > return ret; > } > > +static int btrfs_ioctl_encoded_write(struct file *file, void __user *argp, > + bool compat) > +{ > + struct btrfs_ioctl_encoded_io_args args; > + struct iovec iovstack[UIO_FASTIOV]; > + struct iovec *iov = iovstack; > + struct iov_iter iter; > + loff_t pos; > + struct kiocb kiocb; > + ssize_t ret; > + > + if (!capable(CAP_SYS_ADMIN)) { > + ret = -EPERM; > + goto out_acct; > + } > + > + if (!(file->f_mode & FMODE_WRITE)) { > + ret = -EBADF; > + goto out_acct; > + } > + > + if (compat) { > +#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT) > + struct btrfs_ioctl_encoded_io_args_32 args32; > + > + if (copy_from_user(&args32, argp, sizeof(args32))) { > + ret = -EFAULT; > + goto out_acct; > + } > + args.iov = compat_ptr(args32.iov); > + args.iovcnt = args.iovcnt; > + memcpy(&args.offset, &args32.offset, > + sizeof(args) - > + offsetof(struct btrfs_ioctl_encoded_io_args, offset)); > +#else > + return -ENOTTY; > +#endif > + } else { > + if (copy_from_user(&args, argp, sizeof(args))) { > + ret = -EFAULT; > + goto out_acct; > + } > + } > + > + ret = -EINVAL; > + if (args.flags != 0) > + goto out_acct; > + if (memchr_inv(args.reserved, 0, sizeof(args.reserved))) > + goto out_acct; > + if (args.compression == BTRFS_ENCODED_IO_COMPRESSION_NONE && > + args.encryption == BTRFS_ENCODED_IO_ENCRYPTION_NONE) Do you intend on supporting encrypted data writeout in the future, given that in btrfs_do_encoded_write EINVAL is returned if the data to be written is encrypted? If not then this check could be moved earlier to fail fast. <snip> > @@ -5138,9 +5236,13 @@ long btrfs_ioctl(struct file *file, unsigned int > return fsverity_ioctl_measure(file, argp); > case BTRFS_IOC_ENCODED_READ: > return btrfs_ioctl_encoded_read(file, argp, false); > + case BTRFS_IOC_ENCODED_WRITE: > + return btrfs_ioctl_encoded_write(file, argp, false); > #if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT) > case BTRFS_IOC_ENCODED_READ_32: > return btrfs_ioctl_encoded_read(file, argp, true); > + case BTRFS_IOC_ENCODED_WRITE_32: > + return btrfs_ioctl_encoded_write(file, argp, true); > #endif > } > > diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c > index 550c34fa0e6d..180f302dee93 100644 > --- a/fs/btrfs/ordered-data.c > +++ b/fs/btrfs/ordered-data.c > @@ -521,9 +521,15 @@ void btrfs_remove_ordered_extent(struct btrfs_inode *btrfs_inode, > spin_lock(&btrfs_inode->lock); > btrfs_mod_outstanding_extents(btrfs_inode, -1); > spin_unlock(&btrfs_inode->lock); > - if (root != fs_info->tree_root) > - btrfs_delalloc_release_metadata(btrfs_inode, entry->num_bytes, > - false); > + if (root != fs_info->tree_root) { > + u64 release; > + > + if (test_bit(BTRFS_ORDERED_ENCODED, &entry->flags)) > + release = entry->disk_num_bytes; > + else > + release = entry->num_bytes; > + btrfs_delalloc_release_metadata(btrfs_inode, release, false); > + } > > percpu_counter_add_batch(&fs_info->ordered_bytes, -entry->num_bytes, > fs_info->delalloc_batch); > diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h > index 0feb0c29839e..04588ccad34c 100644 > --- a/fs/btrfs/ordered-data.h > +++ b/fs/btrfs/ordered-data.h > @@ -74,6 +74,8 @@ enum { > BTRFS_ORDERED_LOGGED_CSUM, > /* We wait for this extent to complete in the current transaction */ > BTRFS_ORDERED_PENDING, > + /* RWF_ENCODED I/O */ nit: RWF_ENCODED is no longer, we simply have ioctl-based encoded io. So this needs to be renamed to avoid confusion for people not necessarily faimilar with the development history of the feature. > + BTRFS_ORDERED_ENCODED, > }; > > /* BTRFS_ORDERED_* flags that specify the type of the extent. */ > @@ -81,7 +83,8 @@ enum { > (1UL << BTRFS_ORDERED_NOCOW) | \ > (1UL << BTRFS_ORDERED_PREALLOC) | \ > (1UL << BTRFS_ORDERED_COMPRESSED) | \ > - (1UL << BTRFS_ORDERED_DIRECT)) > + (1UL << BTRFS_ORDERED_DIRECT) | \ > + (1UL << BTRFS_ORDERED_ENCODED)) > > struct btrfs_ordered_extent { > /* logical offset in the file */ >