On 15/07/2020 22:37, Kees Cook wrote: > On Tue, Jul 14, 2020 at 08:16:36PM +0200, Mickaël Salaün wrote: >> @@ -2849,7 +2855,7 @@ static int may_open(const struct path *path, int acc_mode, int flag) >> case S_IFLNK: >> return -ELOOP; >> case S_IFDIR: >> - if (acc_mode & (MAY_WRITE | MAY_EXEC)) >> + if (acc_mode & (MAY_WRITE | MAY_EXEC | MAY_OPENEXEC)) >> return -EISDIR; >> break; > > (I need to figure out where "open for reading" rejects S_IFDIR, since > it's clearly not here...) > >> case S_IFBLK: >> @@ -2859,13 +2865,26 @@ static int may_open(const struct path *path, int acc_mode, int flag) >> fallthrough; >> case S_IFIFO: >> case S_IFSOCK: >> - if (acc_mode & MAY_EXEC) >> + if (acc_mode & (MAY_EXEC | MAY_OPENEXEC)) >> return -EACCES; >> flag &= ~O_TRUNC; >> break; > > This will immediately break a system that runs code with MAY_OPENEXEC > set but reads from a block, char, fifo, or socket, even in the case of > a sysadmin leaving the "file" sysctl disabled. As documented, O_MAYEXEC is for regular files. The only legitimate use case seems to be with pipes, which should probably be allowed when enforcement is disabled. > >> case S_IFREG: >> - if ((acc_mode & MAY_EXEC) && path_noexec(path)) >> - return -EACCES; >> + if (path_noexec(path)) { >> + if (acc_mode & MAY_EXEC) >> + return -EACCES; >> + if ((acc_mode & MAY_OPENEXEC) && >> + (sysctl_open_mayexec_enforce & OPEN_MAYEXEC_ENFORCE_MOUNT)) >> + return -EACCES; >> + } >> + if ((acc_mode & MAY_OPENEXEC) && >> + (sysctl_open_mayexec_enforce & OPEN_MAYEXEC_ENFORCE_FILE)) >> + /* >> + * Because acc_mode may change here, the next and only >> + * use of acc_mode should then be by the following call >> + * to inode_permission(). >> + */ >> + acc_mode |= MAY_EXEC; >> break; >> } > > Likely very minor, but I'd like to avoid the path_noexec() call in the > fast-path (it dereferences a couple pointers where as doing bit tests on > acc_mode is fast). > > Given that and the above observations, I think that may_open() likely > needs to start with: > > if (acc_mode & MAY_OPENEXEC) { > /* Reject all file types when mount enforcement set. */ > if ((sysctl_open_mayexec_enforce & OPEN_MAYEXEC_ENFORCE_MOUNT) && > path_noexec(path)) > return -EACCES; > /* Treat the same as MAY_EXEC. */ > if (sysctl_open_mayexec_enforce & OPEN_MAYEXEC_ENFORCE_FILE)) > acc_mode |= MAY_EXEC; > } OK > > (Though I'm not 100% sure that path_noexec() is safe to be called for > all file types: i.e. path->mnt and path->-mnt->mnt_sb *always* non-NULL?) path->mnt should always be non-NULL: https://lore.kernel.org/lkml/20200317164709.GA23230@xxxxxxxxxxxxxxxxxx/ > > This change would also imply that OPEN_MAYEXEC_ENFORCE_FILE *includes* > OPEN_MAYEXEC_ENFORCE_MOUNT (i.e. the sysctl should not be a bitfield), > since path_noexec() would get checked for S_ISREG. I can't come up with > a rationale where one would want OPEN_MAYEXEC_ENFORCE_FILE but _not_ > OPEN_MAYEXEC_ENFORCE_MOUNT? I don't see why it is an inclusion. > > (I can absolutely see wanting only OPEN_MAYEXEC_ENFORCE_MOUNT, or > suddenly one has to go mark every loaded thing with the exec bit and > most distros haven't done this to, for example, shared libraries. But > setting the exec bit and then NOT wanting to enforce the mount check > seems... not sensible?) > > Outside of this change, yes, I like this now -- it's much cleaner > because we have all the checks in the same place where they belong. :) > >> diff --git a/kernel/sysctl.c b/kernel/sysctl.c >> index db1ce7af2563..5008a2566e79 100644 >> --- a/kernel/sysctl.c >> +++ b/kernel/sysctl.c >> @@ -113,6 +113,7 @@ static int sixty = 60; >> >> static int __maybe_unused neg_one = -1; >> static int __maybe_unused two = 2; >> +static int __maybe_unused three = 3; >> static int __maybe_unused four = 4; >> static unsigned long zero_ul; >> static unsigned long one_ul = 1; > > Oh, are these still here? I thought they got removed (or at least made > const). Where did that series go? Hmpf, see sysctl_vals, but yes, for > now, this is fine. > >> @@ -888,7 +889,6 @@ static int proc_taint(struct ctl_table *table, int write, >> return err; >> } >> >> -#ifdef CONFIG_PRINTK >> static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write, >> void *buffer, size_t *lenp, loff_t *ppos) >> { >> @@ -897,7 +897,6 @@ static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write, >> >> return proc_dointvec_minmax(table, write, buffer, lenp, ppos); >> } >> -#endif >> >> /** >> * struct do_proc_dointvec_minmax_conv_param - proc_dointvec_minmax() range checking structure >> @@ -3264,6 +3263,15 @@ static struct ctl_table fs_table[] = { >> .extra1 = SYSCTL_ZERO, >> .extra2 = &two, >> }, >> + { >> + .procname = "open_mayexec_enforce", >> + .data = &sysctl_open_mayexec_enforce, >> + .maxlen = sizeof(int), >> + .mode = 0600, >> + .proc_handler = proc_dointvec_minmax_sysadmin, >> + .extra1 = SYSCTL_ZERO, >> + .extra2 = &three, >> + }, >> #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE) >> { >> .procname = "binfmt_misc", >> -- >> 2.27.0 >> >