On Mon, Aug 12, 2024 at 9:37 AM Bernd Schubert <bernd.schubert@xxxxxxxxxxx> wrote: > > Sorry, I had sent out the wrong/old patch file - it doesn't have one change > (handling of already aligned buffers). > Shall I sent v4? The correct version is below > > --- > > From: Bernd Schubert <bschubert@xxxxxxx> > Date: Fri, 21 Jun 2024 11:51:23 +0200 > Subject: [PATCH v3] fuse: Allow page aligned writes > > Write IOs should be page aligned as fuse server > might need to copy data to another buffer otherwise in > order to fulfill network or device storage requirements. > > Simple reproducer is with libfuse, example/passthrough* > and opening a file with O_DIRECT - without this change > writing to that file failed with -EINVAL if the underlying > file system was requiring alignment. > > Required server side changes: > Server needs to seek to the next page, without splice that is > just page size buffer alignment, with splice another splice > syscall is needed to seek over the unaligned area. > > Signed-off-by: Bernd Schubert <bschubert@xxxxxxx> > Reviewed-by: Joanne Koong <joannelkoong@xxxxxxxxx> > --- > > Changes since v2: > - Added a no-op return in fuse_copy_align for buffers that are > already aligned (cs->len == PAGE_SIZE && cs->offset == 0). Some > server implementations actually do that to compensate for fuse client > misalignment. And it could also happen by accident for non aligned > server allocation. > Added suggestions from Joannes review: > - Removed two sanity checks in fuse_copy_align() to have it > generic. > - Moved from args->in_args[0].align to args->in_args[1].align > to have it in the arg that actually needs the alignment > (for FUSE_WRITE) and updated fuse_copy_args() to align that arg. > - Slight update in the commit body (removed "Reads"). > > libfuse patch: > https://github.com/libfuse/libfuse/pull/983 > > From implmentation point of view it is debatable if request type > parsing should be done in fuse_copy_args() (or fuse_dev_do_read() > or if alignment should be stored in struct fuse_arg / fuse_in_arg. > There are pros and cons for both, I kept it in args as it is > more generic and also allows to later on align other request > types, for example FUSE_SETXATTR. > --- > fs/fuse/dev.c | 29 +++++++++++++++++++++++++++-- > fs/fuse/file.c | 6 ++++++ > fs/fuse/fuse_i.h | 6 ++++-- > include/uapi/linux/fuse.h | 9 ++++++++- > 4 files changed, 45 insertions(+), 5 deletions(-) > > diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c > index 9eb191b5c4de..072c7bacc4a7 100644 > --- a/fs/fuse/dev.c > +++ b/fs/fuse/dev.c > @@ -1009,6 +1009,25 @@ static int fuse_copy_one(struct fuse_copy_state *cs, void *val, unsigned size) > return 0; > } > > +/* Align to the next page */ > +static int fuse_copy_align(struct fuse_copy_state *cs) > +{ > + /* > + * This could happen if the userspace buffer is aligned in a way that > + * it compensates fuse headers. > + */ > + if (cs->len == PAGE_SIZE && cs->offset == 0) > + return 0; > + > + if (WARN_ON(cs->len == PAGE_SIZE || cs->offset == 0)) > + return -EIO; > + > + /* Seek to the next page */ > + cs->offset += cs->len; > + cs->len = 0; > + return 0; > +} > + > /* Copy request arguments to/from userspace buffer */ > static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs, > unsigned argpages, struct fuse_arg *args, > @@ -1019,10 +1038,16 @@ static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs, > > for (i = 0; !err && i < numargs; i++) { > struct fuse_arg *arg = &args[i]; > - if (i == numargs - 1 && argpages) > + if (i == numargs - 1 && argpages) { > + if (arg->align) { > + err = fuse_copy_align(cs); > + if (err) > + break; > + } > err = fuse_copy_pages(cs, arg->size, zeroing); > - else > + } else { > err = fuse_copy_one(cs, arg->value, arg->size); > + } > } > return err; > } > diff --git a/fs/fuse/file.c b/fs/fuse/file.c > index f39456c65ed7..9783d5809ec3 100644 > --- a/fs/fuse/file.c > +++ b/fs/fuse/file.c > @@ -1062,6 +1062,12 @@ static void fuse_write_args_fill(struct fuse_io_args *ia, struct fuse_file *ff, > args->in_args[0].size = FUSE_COMPAT_WRITE_IN_SIZE; > else > args->in_args[0].size = sizeof(ia->write.in); > + > + if (ff->open_flags & FOPEN_ALIGNED_WRITES) { > + args->in_args[1].align = 1; > + ia->write.in.write_flags |= FUSE_WRITE_ALIGNED; > + } > + > args->in_args[0].value = &ia->write.in; > args->in_args[1].size = count; > args->out_numargs = 1; > diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h > index f23919610313..1600bd7b1d0d 100644 > --- a/fs/fuse/fuse_i.h > +++ b/fs/fuse/fuse_i.h > @@ -275,13 +275,15 @@ struct fuse_file { > > /** One input argument of a request */ > struct fuse_in_arg { > - unsigned size; > + unsigned int size; > + unsigned int align:1; > const void *value; > }; > > /** One output argument of a request */ > struct fuse_arg { > - unsigned size; > + unsigned int size; > + unsigned int align:1; > void *value; > }; > > diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h > index d08b99d60f6f..742262c7c1eb 100644 > --- a/include/uapi/linux/fuse.h > +++ b/include/uapi/linux/fuse.h > @@ -217,6 +217,9 @@ > * - add backing_id to fuse_open_out, add FOPEN_PASSTHROUGH open flag > * - add FUSE_NO_EXPORT_SUPPORT init flag > * - add FUSE_NOTIFY_RESEND, add FUSE_HAS_RESEND init flag > + * > + * 7.41 > + * - add FOPEN_ALIGNED_WRITES open flag and FUSE_WRITE_ALIGNED write flag > */ > > #ifndef _LINUX_FUSE_H > @@ -252,7 +255,7 @@ > #define FUSE_KERNEL_VERSION 7 > > /** Minor version number of this interface */ > -#define FUSE_KERNEL_MINOR_VERSION 40 > +#define FUSE_KERNEL_MINOR_VERSION 41 > > /** The node ID of the root inode */ > #define FUSE_ROOT_ID 1 > @@ -360,6 +363,7 @@ struct fuse_file_lock { > * FOPEN_NOFLUSH: don't flush data cache on close (unless FUSE_WRITEBACK_CACHE) > * FOPEN_PARALLEL_DIRECT_WRITES: Allow concurrent direct writes on the same inode > * FOPEN_PASSTHROUGH: passthrough read/write io for this open file > + * FOPEN_ALIGNED_WRITES: Page align write io data > */ > #define FOPEN_DIRECT_IO (1 << 0) > #define FOPEN_KEEP_CACHE (1 << 1) > @@ -369,6 +373,7 @@ struct fuse_file_lock { > #define FOPEN_NOFLUSH (1 << 5) > #define FOPEN_PARALLEL_DIRECT_WRITES (1 << 6) > #define FOPEN_PASSTHROUGH (1 << 7) > +#define FOPEN_ALIGNED_WRITES (1 << 8) > > /** > * INIT request/reply flags > @@ -496,10 +501,12 @@ struct fuse_file_lock { > * FUSE_WRITE_CACHE: delayed write from page cache, file handle is guessed > * FUSE_WRITE_LOCKOWNER: lock_owner field is valid > * FUSE_WRITE_KILL_SUIDGID: kill suid and sgid bits > + * FUSE_WRITE_ALIGNED: Write io data are page aligned > */ > #define FUSE_WRITE_CACHE (1 << 0) > #define FUSE_WRITE_LOCKOWNER (1 << 1) > #define FUSE_WRITE_KILL_SUIDGID (1 << 2) > +#define FUSE_WRITE_ALIGNED (1 << 3) > > /* Obsolete alias; this flag implies killing suid/sgid only. */ > #define FUSE_WRITE_KILL_PRIV FUSE_WRITE_KILL_SUIDGID > -- > 2.43.0 > >