On Sun, May 17, 2020 at 02:30:57PM -0700, Kees Cook wrote: > On Sun, May 17, 2020 at 09:02:15AM -0600, Tycho Andersen wrote: > > I'm going read this thread more carefully tomorrow, but I just wanted to > mention that I'd *like* to extend seccomp_data for doing deep argument > inspection of the new syscalls. I think it's the least bad of many > designs, and I'll write that up in more detail. (I would *really* like > to avoid extending seccomp's BPF language, and instead allow probing > into the struct copied from userspace, etc.) > > Anyway, it's very related to this, so, yeah, probably we need a v2 of the > notif API, but I'll try to get all the ideas here collected in one place. I scratched together a proposal of what I think would make a not-terrible V2 API. I'm sure there's bugs in this code, but I think it's workable -- or at least a place to start. The biggest thing I think we should consider is unrolling seccomp_data if we don't intend to add new BPF-accessible fields. If also uses read(2), so we get to take advantage of read(2)'s ability to pass a size along with the read, as opposed to doing ioctl tricks. It also makes programming from against it slightly simpler. I can imagine that the send API could be similar, in that it could support write, and thus making it 100% usable from Go (and the like) without requiring a separate OS-thread be spun up to interact with the listener. diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h index c1735455bc53..db6e5335ec6a 100644 --- a/include/uapi/linux/seccomp.h +++ b/include/uapi/linux/seccomp.h @@ -113,6 +113,41 @@ struct seccomp_notif_resp { __u32 flags; }; +#define SECCOMP_DATA_SIZE_VER0 64 + +#define SECCOMP_NOTIF_CONFIG_SIZE_VER0 16 + +/* Optional seccomp return fields */ +/* __u32 for triggering pid */ +#define SECCOMP_NOTIF_FIELD_PID (1UL << 1) +/* __u32 for triggering thread group leader id */ +#define SECCOMP_NOTIF_FIELD_TGID (1UL << 1) + +struct seccomp_notif_config { + __u32 size; + /* + * The supported SECCOMP_DATA_SIZE to be returned. If the size passed in + * is unsupported (seccomp_data is too small or if it is larger than the + * currently supported size), EINVAL or E2BIG will be, respectively, + * returned, and this will be set to the latest supported size. + */ + __u32 seccomp_data_size; + /* + * This is an OR'd together list of SECCOMP_NOTIF_FIELD_*. If an + * unsupported field is requested, ENOTSUPP will be returned. + */ + __u64 optional_fields; +}; + +/* read(2) Notification format example: + * struct seccomp_notif_read { + * __u64 id; + * __u32 pid; if SECCOMP_NOTIF_FIELD_PID + * __u32 tgid; if SECCOMP_NOTIF_FIELD_TGID + * struct seccomp_data data; + * }; + */ + #define SECCOMP_IOC_MAGIC '!' #define SECCOMP_IO(nr) _IO(SECCOMP_IOC_MAGIC, nr) #define SECCOMP_IOR(nr, type) _IOR(SECCOMP_IOC_MAGIC, nr, type) @@ -124,4 +159,10 @@ struct seccomp_notif_resp { #define SECCOMP_IOCTL_NOTIF_SEND SECCOMP_IOWR(1, \ struct seccomp_notif_resp) #define SECCOMP_IOCTL_NOTIF_ID_VALID SECCOMP_IOR(2, __u64) +/* + * Configure the data structure to be returned by read(2). + * Returns the size of the data structure which will be returned. + */ +#define SECCOMP_IOCTL_NOTIF_CONFIG SECCOMP_IOR(3, \ + struct seccomp_notif_config) #endif /* _UAPI_LINUX_SECCOMP_H */ diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 55a6184f5990..4670cb03c390 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -95,12 +95,19 @@ struct seccomp_knotif { * @next_id: The id of the next request. * @notifications: A list of struct seccomp_knotif elements. * @wqh: A wait queue for poll. + * @read_size: The size of the expected read. If it is set to 0, it means that + * reading notifications via read(2) is not yet configured. Until + * then, EINVAL will be returned via read(2). + * @data_size: The supported size of seccomp_data. + * @optional_fields: The optional fields to return on read */ struct notification { struct semaphore request; u64 next_id; struct list_head notifications; wait_queue_head_t wqh; + u32 read_size; + u64 optional_fields; }; /** @@ -1021,79 +1028,160 @@ static int seccomp_notify_release(struct inode *inode, struct file *file) return 0; } -static long seccomp_notify_recv(struct seccomp_filter *filter, - void __user *buf) +static void seccomp_reset_notif(struct seccomp_filter *filter, + u64 id) { - struct seccomp_knotif *knotif = NULL, *cur; - struct seccomp_notif unotif; - ssize_t ret; - - /* Verify that we're not given garbage to keep struct extensible. */ - ret = check_zeroed_user(buf, sizeof(unotif)); - if (ret < 0) - return ret; - if (!ret) - return -EINVAL; + struct seccomp_knotif *cur; - memset(&unotif, 0, sizeof(unotif)); + /* + * Something went wrong. To make sure that we keep this + * notification alive, let's reset it back to INIT. It + * may have died when we released the lock, so we need to make + * sure it's still around. + */ + mutex_lock(&filter->notify_lock); + list_for_each_entry(cur, &filter->notif->notifications, list) { + if (cur->id == id) { + cur->state = SECCOMP_NOTIFY_INIT; + up(&filter->notif->request); + break; + } + } + mutex_unlock(&filter->notify_lock); +} +/* + * Returns the next knotif available. If the return is a non-error, it will + * be with notify_lock held. It is up to the caller to unlock it. + */ +static struct seccomp_knotif *seccomp_get_notif(struct seccomp_filter *filter) +{ + struct seccomp_knotif *cur; + int ret; ret = down_interruptible(&filter->notif->request); if (ret < 0) - return ret; + return ERR_PTR(ret); mutex_lock(&filter->notify_lock); list_for_each_entry(cur, &filter->notif->notifications, list) { if (cur->state == SECCOMP_NOTIFY_INIT) { - knotif = cur; - break; + cur->state = SECCOMP_NOTIFY_SENT; + wake_up_poll(&filter->notif->wqh, + EPOLLOUT | EPOLLWRNORM); + return cur; } } - + mutex_unlock(&filter->notify_lock); /* * If we didn't find a notification, it could be that the task was * interrupted by a fatal signal between the time we were woken and * when we were able to acquire the rw lock. */ - if (!knotif) { - ret = -ENOENT; - goto out; - } + return ERR_PTR(-ENOENT); +} + +static long seccomp_notify_recv(struct seccomp_filter *filter, + void __user *buf) +{ + struct seccomp_knotif *knotif; + struct seccomp_notif unotif; + ssize_t ret; + + /* Verify that we're not given garbage to keep struct extensible. */ + ret = check_zeroed_user(buf, sizeof(unotif)); + if (ret < 0) + return ret; + if (!ret) + return -EINVAL; + + memset(&unotif, 0, sizeof(unotif)); + + knotif = seccomp_get_notif(filter); + if (IS_ERR(knotif)) + return PTR_ERR(knotif); unotif.id = knotif->id; unotif.pid = task_pid_vnr(knotif->task); unotif.data = *(knotif->data); - knotif->state = SECCOMP_NOTIFY_SENT; - wake_up_poll(&filter->notif->wqh, EPOLLOUT | EPOLLWRNORM); - ret = 0; -out: mutex_unlock(&filter->notify_lock); - if (ret == 0 && copy_to_user(buf, &unotif, sizeof(unotif))) { - ret = -EFAULT; + if (!copy_to_user(buf, &unotif, sizeof(unotif))) + return 0; - /* - * Userspace screwed up. To make sure that we keep this - * notification alive, let's reset it back to INIT. It - * may have died when we released the lock, so we need to make - * sure it's still around. - */ - knotif = NULL; - mutex_lock(&filter->notify_lock); - list_for_each_entry(cur, &filter->notif->notifications, list) { - if (cur->id == unotif.id) { - knotif = cur; - break; - } - } + seccomp_reset_notif(filter, unotif.id); + return -EFAULT; +} - if (knotif) { - knotif->state = SECCOMP_NOTIFY_INIT; - up(&filter->notif->request); - } - mutex_unlock(&filter->notify_lock); +/* Append the src value to the dst, and return the new cursor */ +#define append(dst, src) ({ \ + typeof(src) _src = src; \ + memcpy(dst, &_src, sizeof(_src)); \ + dst + sizeof(_src); \ +}) + +/* + * Append the value pointer to by the pointer, src, to the dst + * and return the new cursor + */ +#define append_ptr(dst, src) ({ \ + typeof(src) _src = src; \ + memcpy(dst, _src, sizeof(*_src)); \ + dst + sizeof(*_src); \ +}) + +static ssize_t seccomp_notify_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct seccomp_filter *filter = file->private_data; + u32 optional_fields, read_size; + struct seccomp_knotif *knotif; + void *kbuf, *cursor; + int ret; + u64 id; + + ret = mutex_lock_interruptible(&filter->notify_lock); + if (ret) + return ret; + read_size = filter->notif->read_size; + optional_fields = filter->notif->optional_fields; + mutex_unlock(&filter->notify_lock); + + if (read_size == 0) + return -EINVAL; + if (count < read_size) + return -ENOSPC; + + knotif = seccomp_get_notif(filter); + if (IS_ERR(knotif)) + return PTR_ERR(knotif); + + id = knotif->id; + kbuf = kzalloc(read_size, GFP_KERNEL); + if (!kbuf) { + ret = -ENOMEM; + goto out; } + cursor = kbuf; + cursor = append(cursor, knotif->id); + if (filter->notif->optional_fields & SECCOMP_NOTIF_FIELD_PID) + cursor = append(cursor, task_pid_vnr(knotif->task)); + if (filter->notif->optional_fields & SECCOMP_NOTIF_FIELD_TGID) + cursor = append(cursor, task_tgid_vnr(knotif->task)); + cursor = append_ptr(cursor, knotif->data); + WARN_ON_ONCE(cursor != kbuf + read_size); + + ret = copy_to_user(buf, kbuf, read_size); + if (!ret) + ret = read_size; + + kfree(kbuf); +out: + mutex_unlock(&filter->notify_lock); + if (ret < 0) + seccomp_reset_notif(filter, id); + return ret; } @@ -1175,6 +1263,70 @@ static long seccomp_notify_id_valid(struct seccomp_filter *filter, return ret; } +static long seccomp_notif_config(struct seccomp_filter *filter, + struct seccomp_notif_config __user *uconfig) +{ + struct seccomp_notif_config config; + /* ret_size is a minimum of 8, given id */ + int ret, notification_size = 8; + u32 size; + + + ret = get_user(size, &uconfig->size); + if (ret) + return ret; + ret = copy_struct_from_user(&config, sizeof(config), uconfig, size); + if (ret) + return ret; + + /* + * This needs to be bumped every time we change seccomp_data's size + */ + BUILD_BUG_ON(sizeof(struct seccomp_data) != SECCOMP_DATA_SIZE_VER0); + if (config.seccomp_data_size < SECCOMP_DATA_SIZE_VER0) { + ret = -EINVAL; + goto invalid_seccomp_data_size; + } + if (config.seccomp_data_size > SECCOMP_DATA_SIZE_VER0) { + ret = -E2BIG; + goto invalid_seccomp_data_size; + } + notification_size += config.seccomp_data_size; + + if (config.optional_fields & ~(SECCOMP_NOTIF_FIELD_PID|SECCOMP_NOTIF_FIELD_TGID)) + return -ENOTSUPP; + + if (config.optional_fields & SECCOMP_NOTIF_FIELD_PID) + notification_size += 4; + if (config.optional_fields & SECCOMP_NOTIF_FIELD_TGID) + notification_size += 4; + + + ret = mutex_lock_interruptible(&filter->notify_lock); + if (ret < 0) + return ret; + + if (filter->notif->read_size) { + ret = -EBUSY; + goto out; + } + + ret = notification_size; + filter->notif->read_size = notification_size; + filter->notif->optional_fields = config.optional_fields; + +out: + mutex_unlock(&filter->notify_lock); + + return ret; + +invalid_seccomp_data_size: + if (put_user(SECCOMP_DATA_SIZE_VER0, &uconfig->seccomp_data_size)) + return -EFAULT; + + return ret; +} + static long seccomp_notify_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -1188,6 +1340,8 @@ static long seccomp_notify_ioctl(struct file *file, unsigned int cmd, return seccomp_notify_send(filter, buf); case SECCOMP_IOCTL_NOTIF_ID_VALID: return seccomp_notify_id_valid(filter, buf); + case SECCOMP_IOCTL_NOTIF_CONFIG: + return seccomp_notif_config(filter, buf); default: return -EINVAL; } @@ -1224,6 +1378,7 @@ static const struct file_operations seccomp_notify_ops = { .release = seccomp_notify_release, .unlocked_ioctl = seccomp_notify_ioctl, .compat_ioctl = seccomp_notify_ioctl, + .read = seccomp_notify_read, }; static struct file *init_listener(struct seccomp_filter *filter) > > -- > Kees Cook > _______________________________________________ > Containers mailing list > Containers@xxxxxxxxxxxxxxxxxxxxxxxxxx > https://lists.linuxfoundation.org/mailman/listinfo/containers _______________________________________________ Containers mailing list Containers@xxxxxxxxxxxxxxxxxxxxxxxxxx https://lists.linuxfoundation.org/mailman/listinfo/containers