On Aug 5, 2024 Jann Horn <jannh@xxxxxxxxxx> wrote: > > keyctl_session_to_parent() involves posting task work to the parent task, > with work function key_change_session_keyring. > Because the task work in the parent runs asynchronously, no errors can be > returned back to the caller of keyctl_session_to_parent(), and therefore > the work function key_change_session_keyring() can't be allowed to fail due > to things like memory allocation failure or permission checks - all > allocations and checks have to happen in the child. > > This is annoying for two reasons: > > - It is the only reason why cred_alloc_blank() and > security_transfer_creds() are necessary. > - It means we can't do synchronous permission checks. > > Rewrite keyctl_session_to_parent() to run task work on the parent > synchronously, so that any errors that happen in the task work can be > plumbed back into the syscall return value in the child. > This allows us to get rid of cred_alloc_blank() and > security_transfer_creds() in a later commit, and it will make it possible > to write more reliable security checks for this operation. > > Note that this requires using TWA_SIGNAL instead of TWA_RESUME, so the > parent might observe some spurious -EAGAIN syscall returns or such; but the > parent likely anyway has to be ready to deal with the side effects of > receiving signals (since it'll probably get SIGCHLD when the child dies), > so that probably isn't an issue. > > Signed-off-by: Jann Horn <jannh@xxxxxxxxxx> > --- > security/keys/internal.h | 8 ++++ > security/keys/keyctl.c | 107 +++++++++++++------------------------------ > security/keys/process_keys.c | 86 ++++++++++++++++++---------------- > 3 files changed, 87 insertions(+), 114 deletions(-) ... > diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c > index ab927a142f51..e4cfe5c4594a 100644 > --- a/security/keys/keyctl.c > +++ b/security/keys/keyctl.c > @@ -1616,104 +1616,63 @@ long keyctl_get_security(key_serial_t keyid, > * parent process. > * > * The keyring must exist and must grant the caller LINK permission, and the > * parent process must be single-threaded and must have the same effective > * ownership as this process and mustn't be SUID/SGID. > * > - * The keyring will be emplaced on the parent when it next resumes userspace. > + * The keyring will be emplaced on the parent via a pseudo-signal. > * > * If successful, 0 will be returned. > */ > long keyctl_session_to_parent(void) > { > - struct task_struct *me, *parent; > - const struct cred *mycred, *pcred; > - struct callback_head *newwork, *oldwork; > + struct keyctl_session_to_parent_context ctx; > + struct task_struct *parent; > key_ref_t keyring_r; > - struct cred *cred; > int ret; > > keyring_r = lookup_user_key(KEY_SPEC_SESSION_KEYRING, 0, KEY_NEED_LINK); > if (IS_ERR(keyring_r)) > return PTR_ERR(keyring_r); > > - ret = -ENOMEM; > - > - /* our parent is going to need a new cred struct, a new tgcred struct > - * and new security data, so we allocate them here to prevent ENOMEM in > - * our parent */ > - cred = cred_alloc_blank(); > - if (!cred) > - goto error_keyring; > - newwork = &cred->rcu; > + write_lock_irq(&tasklist_lock); > + parent = get_task_struct(rcu_dereference_protected(current->real_parent, > + lockdep_is_held(&tasklist_lock))); > + write_unlock_irq(&tasklist_lock); > > - cred->session_keyring = key_ref_to_ptr(keyring_r); > - keyring_r = NULL; > - init_task_work(newwork, key_change_session_keyring); > + /* the parent mustn't be init and mustn't be a kernel thread */ > + if (is_global_init(parent) || (READ_ONCE(parent->flags) & PF_KTHREAD) != 0) > + goto put_task; I think we need to explicitly set @ret if we are failing here, yes? > - me = current; > - rcu_read_lock(); > - write_lock_irq(&tasklist_lock); > + ctx.new_session_keyring = key_ref_to_ptr(keyring_r); > + ctx.child_cred = current_cred(); > + init_completion(&ctx.done); > + init_task_work(&ctx.work, key_change_session_keyring); > + ret = task_work_add(parent, &ctx.work, TWA_SIGNAL); > + if (ret) > + goto put_task; > > - ret = -EPERM; > - oldwork = NULL; > - parent = rcu_dereference_protected(me->real_parent, > - lockdep_is_held(&tasklist_lock)); > + ret = wait_for_completion_interruptible(&ctx.done); > > - /* the parent mustn't be init and mustn't be a kernel thread */ > - if (parent->pid <= 1 || !parent->mm) > - goto unlock; > - > - /* the parent must be single threaded */ > - if (!thread_group_empty(parent)) > - goto unlock; > - > - /* the parent and the child must have different session keyrings or > - * there's no point */ > - mycred = current_cred(); > - pcred = __task_cred(parent); > - if (mycred == pcred || > - mycred->session_keyring == pcred->session_keyring) { > - ret = 0; > - goto unlock; > + if (task_work_cancel(parent, &ctx.work)) { > + /* > + * We got interrupted and the task work was canceled before it > + * could execute. > + * Use -ERESTARTNOINTR instead of -ERESTARTSYS for > + * compatibility - the manpage does not list -EINTR as a > + * possible error for keyctl(). > + */ > + ret = -ERESTARTNOINTR; > + } else { > + /* task work is running or has been executed */ > + wait_for_completion(&ctx.done); > + ret = ctx.result; > } > > - /* the parent must have the same effective ownership and mustn't be > - * SUID/SGID */ > - if (!uid_eq(pcred->uid, mycred->euid) || > - !uid_eq(pcred->euid, mycred->euid) || > - !uid_eq(pcred->suid, mycred->euid) || > - !gid_eq(pcred->gid, mycred->egid) || > - !gid_eq(pcred->egid, mycred->egid) || > - !gid_eq(pcred->sgid, mycred->egid)) > - goto unlock; > - > - /* the keyrings must have the same UID */ > - if ((pcred->session_keyring && > - !uid_eq(pcred->session_keyring->uid, mycred->euid)) || > - !uid_eq(mycred->session_keyring->uid, mycred->euid)) > - goto unlock; > - > - /* cancel an already pending keyring replacement */ > - oldwork = task_work_cancel_func(parent, key_change_session_keyring); > - > - /* the replacement session keyring is applied just prior to userspace > - * restarting */ > - ret = task_work_add(parent, newwork, TWA_RESUME); > - if (!ret) > - newwork = NULL; > -unlock: > - write_unlock_irq(&tasklist_lock); > - rcu_read_unlock(); > - if (oldwork) > - put_cred(container_of(oldwork, struct cred, rcu)); > - if (newwork) > - put_cred(cred); > - return ret; > - > -error_keyring: > +put_task: > + put_task_struct(parent); > key_ref_put(keyring_r); > return ret; > } -- paul-moore.com