On Tue, Jul 14, 2020 at 08:16:36PM +0200, Mickaël Salaün wrote: > @@ -2849,7 +2855,7 @@ static int may_open(const struct path *path, int acc_mode, int flag) > case S_IFLNK: > return -ELOOP; > case S_IFDIR: > - if (acc_mode & (MAY_WRITE | MAY_EXEC)) > + if (acc_mode & (MAY_WRITE | MAY_EXEC | MAY_OPENEXEC)) > return -EISDIR; > break; (I need to figure out where "open for reading" rejects S_IFDIR, since it's clearly not here...) > case S_IFBLK: > @@ -2859,13 +2865,26 @@ static int may_open(const struct path *path, int acc_mode, int flag) > fallthrough; > case S_IFIFO: > case S_IFSOCK: > - if (acc_mode & MAY_EXEC) > + if (acc_mode & (MAY_EXEC | MAY_OPENEXEC)) > return -EACCES; > flag &= ~O_TRUNC; > break; This will immediately break a system that runs code with MAY_OPENEXEC set but reads from a block, char, fifo, or socket, even in the case of a sysadmin leaving the "file" sysctl disabled. > case S_IFREG: > - if ((acc_mode & MAY_EXEC) && path_noexec(path)) > - return -EACCES; > + if (path_noexec(path)) { > + if (acc_mode & MAY_EXEC) > + return -EACCES; > + if ((acc_mode & MAY_OPENEXEC) && > + (sysctl_open_mayexec_enforce & OPEN_MAYEXEC_ENFORCE_MOUNT)) > + return -EACCES; > + } > + if ((acc_mode & MAY_OPENEXEC) && > + (sysctl_open_mayexec_enforce & OPEN_MAYEXEC_ENFORCE_FILE)) > + /* > + * Because acc_mode may change here, the next and only > + * use of acc_mode should then be by the following call > + * to inode_permission(). > + */ > + acc_mode |= MAY_EXEC; > break; > } Likely very minor, but I'd like to avoid the path_noexec() call in the fast-path (it dereferences a couple pointers where as doing bit tests on acc_mode is fast). Given that and the above observations, I think that may_open() likely needs to start with: if (acc_mode & MAY_OPENEXEC) { /* Reject all file types when mount enforcement set. */ if ((sysctl_open_mayexec_enforce & OPEN_MAYEXEC_ENFORCE_MOUNT) && path_noexec(path)) return -EACCES; /* Treat the same as MAY_EXEC. */ if (sysctl_open_mayexec_enforce & OPEN_MAYEXEC_ENFORCE_FILE)) acc_mode |= MAY_EXEC; } (Though I'm not 100% sure that path_noexec() is safe to be called for all file types: i.e. path->mnt and path->-mnt->mnt_sb *always* non-NULL?) This change would also imply that OPEN_MAYEXEC_ENFORCE_FILE *includes* OPEN_MAYEXEC_ENFORCE_MOUNT (i.e. the sysctl should not be a bitfield), since path_noexec() would get checked for S_ISREG. I can't come up with a rationale where one would want OPEN_MAYEXEC_ENFORCE_FILE but _not_ OPEN_MAYEXEC_ENFORCE_MOUNT? (I can absolutely see wanting only OPEN_MAYEXEC_ENFORCE_MOUNT, or suddenly one has to go mark every loaded thing with the exec bit and most distros haven't done this to, for example, shared libraries. But setting the exec bit and then NOT wanting to enforce the mount check seems... not sensible?) Outside of this change, yes, I like this now -- it's much cleaner because we have all the checks in the same place where they belong. :) > diff --git a/kernel/sysctl.c b/kernel/sysctl.c > index db1ce7af2563..5008a2566e79 100644 > --- a/kernel/sysctl.c > +++ b/kernel/sysctl.c > @@ -113,6 +113,7 @@ static int sixty = 60; > > static int __maybe_unused neg_one = -1; > static int __maybe_unused two = 2; > +static int __maybe_unused three = 3; > static int __maybe_unused four = 4; > static unsigned long zero_ul; > static unsigned long one_ul = 1; Oh, are these still here? I thought they got removed (or at least made const). Where did that series go? Hmpf, see sysctl_vals, but yes, for now, this is fine. > @@ -888,7 +889,6 @@ static int proc_taint(struct ctl_table *table, int write, > return err; > } > > -#ifdef CONFIG_PRINTK > static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write, > void *buffer, size_t *lenp, loff_t *ppos) > { > @@ -897,7 +897,6 @@ static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write, > > return proc_dointvec_minmax(table, write, buffer, lenp, ppos); > } > -#endif > > /** > * struct do_proc_dointvec_minmax_conv_param - proc_dointvec_minmax() range checking structure > @@ -3264,6 +3263,15 @@ static struct ctl_table fs_table[] = { > .extra1 = SYSCTL_ZERO, > .extra2 = &two, > }, > + { > + .procname = "open_mayexec_enforce", > + .data = &sysctl_open_mayexec_enforce, > + .maxlen = sizeof(int), > + .mode = 0600, > + .proc_handler = proc_dointvec_minmax_sysadmin, > + .extra1 = SYSCTL_ZERO, > + .extra2 = &three, > + }, > #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE) > { > .procname = "binfmt_misc", > -- > 2.27.0 > -- Kees Cook