On Wed, Nov 18, 2020 at 9:18 PM Omar Sandoval <osandov@xxxxxxxxxxx> wrote: > > From: Omar Sandoval <osandov@xxxxxx> > > The upcoming RWF_ENCODED operation introduces some security concerns: > > 1. Compressed writes will pass arbitrary data to decompression > algorithms in the kernel. > 2. Compressed reads can leak truncated/hole punched data. > > Therefore, we need to require privilege for RWF_ENCODED. It's not > possible to do the permissions checks at the time of the read or write > because, e.g., io_uring submits IO from a worker thread. So, add an open > flag which requires CAP_SYS_ADMIN. It can also be set and cleared with > fcntl(). The flag is not cleared in any way on fork or exec. It must be > combined with O_CLOEXEC when opening to avoid accidental leaks (if > needed, it may be set without O_CLOEXEC by using fnctl()). > > Note that the usual issue that unknown open flags are ignored doesn't > really matter for O_ALLOW_ENCODED; if the kernel doesn't support > O_ALLOW_ENCODED, then it doesn't support RWF_ENCODED, either. > > Signed-off-by: Omar Sandoval <osandov@xxxxxx> > --- > arch/alpha/include/uapi/asm/fcntl.h | 1 + > arch/parisc/include/uapi/asm/fcntl.h | 1 + > arch/sparc/include/uapi/asm/fcntl.h | 1 + > fs/fcntl.c | 10 ++++++++-- > fs/namei.c | 4 ++++ > fs/open.c | 7 +++++++ > include/linux/fcntl.h | 2 +- > include/uapi/asm-generic/fcntl.h | 4 ++++ > 8 files changed, 27 insertions(+), 3 deletions(-) > > diff --git a/arch/alpha/include/uapi/asm/fcntl.h b/arch/alpha/include/uapi/asm/fcntl.h > index 50bdc8e8a271..391e0d112e41 100644 > --- a/arch/alpha/include/uapi/asm/fcntl.h > +++ b/arch/alpha/include/uapi/asm/fcntl.h > @@ -34,6 +34,7 @@ > > #define O_PATH 040000000 > #define __O_TMPFILE 0100000000 > +#define O_ALLOW_ENCODED 0200000000 > > #define F_GETLK 7 > #define F_SETLK 8 > diff --git a/arch/parisc/include/uapi/asm/fcntl.h b/arch/parisc/include/uapi/asm/fcntl.h > index 03dee816cb13..72ea9bdf5f04 100644 > --- a/arch/parisc/include/uapi/asm/fcntl.h > +++ b/arch/parisc/include/uapi/asm/fcntl.h > @@ -19,6 +19,7 @@ > > #define O_PATH 020000000 > #define __O_TMPFILE 040000000 > +#define O_ALLOW_ENCODED 100000000 > > #define F_GETLK64 8 > #define F_SETLK64 9 > diff --git a/arch/sparc/include/uapi/asm/fcntl.h b/arch/sparc/include/uapi/asm/fcntl.h > index 67dae75e5274..ac3e8c9cb32c 100644 > --- a/arch/sparc/include/uapi/asm/fcntl.h > +++ b/arch/sparc/include/uapi/asm/fcntl.h > @@ -37,6 +37,7 @@ > > #define O_PATH 0x1000000 > #define __O_TMPFILE 0x2000000 > +#define O_ALLOW_ENCODED 0x8000000 > > #define F_GETOWN 5 /* for sockets. */ > #define F_SETOWN 6 /* for sockets. */ > diff --git a/fs/fcntl.c b/fs/fcntl.c > index 19ac5baad50f..9302f68fe698 100644 > --- a/fs/fcntl.c > +++ b/fs/fcntl.c > @@ -30,7 +30,8 @@ > #include <asm/siginfo.h> > #include <linux/uaccess.h> > > -#define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME) > +#define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME | \ > + O_ALLOW_ENCODED) > > static int setfl(int fd, struct file * filp, unsigned long arg) > { > @@ -49,6 +50,11 @@ static int setfl(int fd, struct file * filp, unsigned long arg) > if (!inode_owner_or_capable(inode)) > return -EPERM; > > + /* O_ALLOW_ENCODED can only be set by superuser */ > + if ((arg & O_ALLOW_ENCODED) && !(filp->f_flags & O_ALLOW_ENCODED) && > + !capable(CAP_SYS_ADMIN)) > + return -EPERM; > + > /* required for strict SunOS emulation */ > if (O_NONBLOCK != O_NDELAY) > if (arg & O_NDELAY) > @@ -1033,7 +1039,7 @@ static int __init fcntl_init(void) > * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY > * is defined as O_NONBLOCK on some platforms and not on others. > */ > - BUILD_BUG_ON(21 - 1 /* for O_RDONLY being 0 */ != > + BUILD_BUG_ON(22 - 1 /* for O_RDONLY being 0 */ != > HWEIGHT32( > (VALID_OPEN_FLAGS & ~(O_NONBLOCK | O_NDELAY)) | > __FMODE_EXEC | __FMODE_NONOTIFY)); > diff --git a/fs/namei.c b/fs/namei.c > index d4a6dd772303..fbf64ce61088 100644 > --- a/fs/namei.c > +++ b/fs/namei.c > @@ -2890,6 +2890,10 @@ static int may_open(const struct path *path, int acc_mode, int flag) > if (flag & O_NOATIME && !inode_owner_or_capable(inode)) > return -EPERM; > > + /* O_ALLOW_ENCODED can only be set by superuser */ > + if ((flag & O_ALLOW_ENCODED) && !capable(CAP_SYS_ADMIN)) > + return -EPERM; > + > return 0; > } > > diff --git a/fs/open.c b/fs/open.c > index 9af548fb841b..f2863aaf78e7 100644 > --- a/fs/open.c > +++ b/fs/open.c > @@ -1040,6 +1040,13 @@ inline int build_open_flags(const struct open_how *how, struct open_flags *op) > acc_mode = 0; > } > > + /* > + * O_ALLOW_ENCODED must be combined with O_CLOEXEC to avoid accidentally > + * leaking encoded I/O privileges. > + */ > + if ((how->flags & (O_ALLOW_ENCODED | O_CLOEXEC)) == O_ALLOW_ENCODED) > + return -EINVAL; > + dup() can also result in accidental leak. We could fail dup() of fd without O_CLOEXEC. Should we? If we should than what error code should it be? We could return EPERM, but since we do allow to clear O_CLOEXEC or set O_ALLOW_ENCODED after open, EPERM seems a tad harsh. EINVAL seems inappropriate because the error has nothing to do with input args of dup() and EBADF would also be confusing. Thanks, Amir.