On 12/23/21 6:52 AM, Christian Brauner wrote: > On Wed, Dec 22, 2021 at 01:01:26PM -0800, Stefan Roesch wrote: >> This adds support to io_uring for the fsetxattr and setxattr API. >> >> Signed-off-by: Stefan Roesch <shr@xxxxxx> >> --- >> fs/io_uring.c | 170 ++++++++++++++++++++++++++++++++++ >> include/uapi/linux/io_uring.h | 6 +- >> 2 files changed, 175 insertions(+), 1 deletion(-) >> >> diff --git a/fs/io_uring.c b/fs/io_uring.c >> index c8258c784116..8b6c70d6cacc 100644 >> --- a/fs/io_uring.c >> +++ b/fs/io_uring.c >> @@ -82,6 +82,7 @@ >> #include <linux/audit.h> >> #include <linux/security.h> >> #include <linux/atomic-ref.h> >> +#include <linux/xattr.h> >> >> #define CREATE_TRACE_POINTS >> #include <trace/events/io_uring.h> >> @@ -726,6 +727,13 @@ struct io_async_rw { >> struct wait_page_queue wpq; >> }; >> >> +struct io_xattr { >> + struct file *file; >> + struct xattr_ctx ctx; >> + void *value; >> + struct filename *filename; >> +}; >> + >> enum { >> REQ_F_FIXED_FILE_BIT = IOSQE_FIXED_FILE_BIT, >> REQ_F_IO_DRAIN_BIT = IOSQE_IO_DRAIN_BIT, >> @@ -866,6 +874,7 @@ struct io_kiocb { >> struct io_symlink symlink; >> struct io_hardlink hardlink; >> struct io_getdents getdents; >> + struct io_xattr xattr; >> }; >> >> u8 opcode; >> @@ -1118,6 +1127,10 @@ static const struct io_op_def io_op_defs[] = { >> [IORING_OP_GETDENTS] = { >> .needs_file = 1, >> }, >> + [IORING_OP_FSETXATTR] = { >> + .needs_file = 1 >> + }, >> + [IORING_OP_SETXATTR] = {}, >> }; >> >> /* requests with any of those set should undergo io_disarm_next() */ >> @@ -3887,6 +3900,144 @@ static int io_renameat(struct io_kiocb *req, unsigned int issue_flags) >> return 0; >> } >> >> +static int __io_setxattr_prep(struct io_kiocb *req, >> + const struct io_uring_sqe *sqe, >> + struct user_namespace *user_ns) >> +{ >> + struct io_xattr *ix = &req->xattr; >> + const char __user *name; >> + void *ret; >> + >> + if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) >> + return -EINVAL; >> + if (unlikely(sqe->ioprio)) >> + return -EINVAL; >> + if (unlikely(req->flags & REQ_F_FIXED_FILE)) >> + return -EBADF; >> + >> + ix->filename = NULL; >> + name = u64_to_user_ptr(READ_ONCE(sqe->addr)); >> + ix->ctx.value = u64_to_user_ptr(READ_ONCE(sqe->addr2)); >> + ix->ctx.size = READ_ONCE(sqe->len); >> + ix->ctx.flags = READ_ONCE(sqe->xattr_flags); >> + >> + ix->ctx.kname = kmalloc(XATTR_NAME_MAX + 1, GFP_KERNEL); >> + if (!ix->ctx.kname) >> + return -ENOMEM; >> + ix->ctx.kname_sz = XATTR_NAME_MAX + 1; >> + >> + ret = setxattr_setup(user_ns, name, &ix->ctx); > > Looking at this a bit closer, the setxattr_setup() function converts the > vfs caps prior to vfs_setxattr(). That shouldn't be done there though. > The conversion should be done when mnt_want_write() is held in > __io_setxattr() exactly how we do for setxattr()-based calls in > fs/xattr.c. This will guard against changes of relevant mount properties > (current or future). It will also allow you to simplify your > setxattr_setup() function a bit and you don't need to retrieve the > mount's idmapping until __io_setxattr(). > > Right now you're splitting updating the xattrs over the prep and commit > stage and I worry that in fully async contexts this is easy to miss. So > I'd rather do it in one place. Since we can't move it all into > vfs_setxattr() similar to what we did for fscaps because it's used in a > bunch of contexts where the conversion isn't wanted we should simply > expose do_setxattr() similar to do_getxattr() you're adding. > > So on top of your current patchset I'd suggest you do something like the > following (completely untested): > Thanks for your review and the code. I only changed the below code that the do_setxattr does not use a kvalue, I assume you wanted to use xattr_val. > From 6bcd3efc3293bb91599ee73272262ac596ab4608 Mon Sep 17 00:00:00 2001 > From: Christian Brauner <christian.brauner@xxxxxxxxxx> > Date: Thu, 23 Dec 2021 15:23:14 +0100 > Subject: [PATCH] UNTESTED > > --- > fs/internal.h | 8 +++++--- > fs/io_uring.c | 21 +++++++++----------- > fs/xattr.c | 55 ++++++++++++++++++++++++++++++++++----------------- > 3 files changed, 51 insertions(+), 33 deletions(-) > > diff --git a/fs/internal.h b/fs/internal.h > index ea0433799dbc..08259fa98b2e 100644 > --- a/fs/internal.h > +++ b/fs/internal.h > @@ -222,6 +222,8 @@ ssize_t do_getxattr(struct user_namespace *mnt_userns, > void __user *value, > size_t size); > > -void *setxattr_setup(struct user_namespace *mnt_userns, > - const char __user *name, > - struct xattr_ctx *ctx); > +int do_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry, > + struct xattr_ctx *ctx, void *xattr_val); > + > +int setxattr_copy(const char __user *name, struct xattr_ctx *ctx, > + void **xattr_val); > diff --git a/fs/io_uring.c b/fs/io_uring.c > index 5dd01f19d915..c910c29e1632 100644 > --- a/fs/io_uring.c > +++ b/fs/io_uring.c > @@ -4040,12 +4040,11 @@ static int io_getxattr(struct io_kiocb *req, unsigned int issue_flags) > } > > static int __io_setxattr_prep(struct io_kiocb *req, > - const struct io_uring_sqe *sqe, > - struct user_namespace *user_ns) > + const struct io_uring_sqe *sqe) > { > struct io_xattr *ix = &req->xattr; > const char __user *name; > - void *ret; > + int ret; > > if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) > return -EINVAL; > @@ -4065,13 +4064,12 @@ static int __io_setxattr_prep(struct io_kiocb *req, > return -ENOMEM; > ix->ctx.kname_sz = XATTR_NAME_MAX + 1; > > - ret = setxattr_setup(user_ns, name, &ix->ctx); > - if (IS_ERR(ret)) { > + ret = setxattr_copy(name, &ix->ctx, &ix->value); > + if (ret) { > kfree(ix->ctx.kname); > - return PTR_ERR(ret); > + return ret; > } > > - ix->value = ret; > req->flags |= REQ_F_NEED_CLEANUP; > return 0; > } > @@ -4083,7 +4081,7 @@ static int io_setxattr_prep(struct io_kiocb *req, > const char __user *path; > int ret; > > - ret = __io_setxattr_prep(req, sqe, current_user_ns()); > + ret = __io_setxattr_prep(req, sqe); > if (ret) > return ret; > > @@ -4101,7 +4099,7 @@ static int io_setxattr_prep(struct io_kiocb *req, > static int io_fsetxattr_prep(struct io_kiocb *req, > const struct io_uring_sqe *sqe) > { > - return __io_setxattr_prep(req, sqe, file_mnt_user_ns(req->file)); > + return __io_setxattr_prep(req, sqe); > } > > static int __io_setxattr(struct io_kiocb *req, unsigned int issue_flags, > @@ -4112,9 +4110,8 @@ static int __io_setxattr(struct io_kiocb *req, unsigned int issue_flags, > > ret = mnt_want_write(path->mnt); > if (!ret) { > - ret = vfs_setxattr(mnt_user_ns(path->mnt), path->dentry, > - ix->ctx.kname, ix->value, ix->ctx.size, > - ix->ctx.flags); > + ret = do_setxattr(mnt_user_ns(path->mnt), path->dentry, > + &ix->ctx, ix->value); > mnt_drop_write(path->mnt); > } > > diff --git a/fs/xattr.c b/fs/xattr.c > index a675c7f0ea0c..03a44c5895d1 100644 > --- a/fs/xattr.c > +++ b/fs/xattr.c > @@ -542,40 +542,59 @@ EXPORT_SYMBOL_GPL(vfs_removexattr); > * Extended attribute SET operations > */ > > -void *setxattr_setup(struct user_namespace *mnt_userns, const char __user *name, > - struct xattr_ctx *ctx) > +int setxattr_copy(const char __user *name, struct xattr_ctx *ctx, > + void **xattr_val) > { > void *kvalue = NULL; > int error; > > if (ctx->flags & ~(XATTR_CREATE|XATTR_REPLACE)) > - return ERR_PTR(-EINVAL); > + return -EINVAL; > > error = strncpy_from_user(ctx->kname, name, ctx->kname_sz); > if (error == 0 || error == ctx->kname_sz) > - return ERR_PTR(-ERANGE); > + return -ERANGE; > if (error < 0) > - return ERR_PTR(error); > + return error; > > if (ctx->size) { > if (ctx->size > XATTR_SIZE_MAX) > - return ERR_PTR(-E2BIG); > + return -E2BIG; > > kvalue = kvmalloc(ctx->size, GFP_KERNEL); > if (!kvalue) > - return ERR_PTR(-ENOMEM); > + return -ENOMEM; > > if (copy_from_user(kvalue, ctx->value, ctx->size)) { > kvfree(kvalue); > - return ERR_PTR(-EFAULT); > + return -EFAULT; > } > - > - if ((strcmp(ctx->kname, XATTR_NAME_POSIX_ACL_ACCESS) == 0) || > - (strcmp(ctx->kname, XATTR_NAME_POSIX_ACL_DEFAULT) == 0)) > - posix_acl_fix_xattr_from_user(mnt_userns, kvalue, ctx->size); > } > > - return kvalue; > + *xattr_val = kvalue; > + return 0; > +} > + > +static void setxattr_convert(struct user_namespace *mnt_userns, > + struct xattr_ctx *ctx, void *kvalue) > +{ > + if (ctx->size && > + ((strcmp(ctx->kname, XATTR_NAME_POSIX_ACL_ACCESS) == 0) || > + (strcmp(ctx->kname, XATTR_NAME_POSIX_ACL_DEFAULT) == 0))) > + posix_acl_fix_xattr_from_user(mnt_userns, kvalue, ctx->size); > +} > + > +int do_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry, > + struct xattr_ctx *ctx, void *xattr_val) > +{ > + void *kvalue = NULL; > + int error; > + > + setxattr_convert(mnt_userns, ctx, kvalue); > + error = vfs_setxattr(mnt_userns, dentry, ctx->kname, > + kvalue, ctx->size, ctx->flags); > + kvfree(kvalue); > + return error; > } > > static long > @@ -591,14 +610,14 @@ setxattr(struct user_namespace *mnt_userns, struct dentry *d, > .kname_sz = sizeof(kname), > .flags = flags, > }; > - void *kvalue; > + void *kvalue = NULL; > int error; > > - kvalue = setxattr_setup(mnt_userns, name, &ctx); > - if (IS_ERR(kvalue)) > - return PTR_ERR(kvalue); > + error = setxattr_copy(name, &ctx, &kvalue); > + if (error) > + return error; > > - error = vfs_setxattr(mnt_userns, d, kname, kvalue, size, flags); > + error = do_setxattr(mnt_userns, d, &ctx, kvalue); > > kvfree(kvalue); > return error; >