This patch change BPF syscall to avoid returning file descriptor value zero. As mentioned in cover letter, it is very impractical when extending kABI that the file-descriptor value 'zero' is valid, as this requires new fields must be initialised as minus-1. First step is to change the kernel such that BPF-syscall simply doesn't return value zero as a FD number. This patch achieves this by similar code to anon_inode_getfd(), with the exception of getting unused FD starting from 1. The kernel already supports starting from a specific FD value, as this is used by f_dupfd(). It seems simpler to replicate part of anon_inode_getfd() code and use this start from offset feature, instead of using f_dupfd() handling afterwards. Signed-off-by: Jesper Dangaard Brouer <brouer@xxxxxxxxxx> --- fs/file.c | 2 +- include/linux/file.h | 1 + kernel/bpf/syscall.c | 38 ++++++++++++++++++++++++++++++++------ 3 files changed, 34 insertions(+), 7 deletions(-) diff --git a/fs/file.c b/fs/file.c index abb8b7081d7a..122185cb7707 100644 --- a/fs/file.c +++ b/fs/file.c @@ -535,7 +535,7 @@ int __alloc_fd(struct files_struct *files, return error; } -static int alloc_fd(unsigned start, unsigned flags) +int alloc_fd(unsigned start, unsigned flags) { return __alloc_fd(current->files, start, rlimit(RLIMIT_NOFILE), flags); } diff --git a/include/linux/file.h b/include/linux/file.h index 122f80084a3e..927fb6c2582d 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -85,6 +85,7 @@ extern int f_dupfd(unsigned int from, struct file *file, unsigned flags); extern int replace_fd(unsigned fd, struct file *file, unsigned flags); extern void set_close_on_exec(unsigned int fd, int flag); extern bool get_close_on_exec(unsigned int fd); +extern int alloc_fd(unsigned start, unsigned flags); extern int __get_unused_fd_flags(unsigned flags, unsigned long nofile); extern int get_unused_fd_flags(unsigned flags); extern void put_unused_fd(unsigned int fd); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 4d530b1d5683..6eba236aacd1 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -688,6 +688,32 @@ const struct file_operations bpf_map_fops = { .poll = bpf_map_poll, }; +/* Code is similar to anon_inode_getfd(), except starts at FD 1 */ +int bpf_anon_inode_getfd(const char *name, const struct file_operations *fops, + void *priv, int flags) +{ + int error, fd; + struct file *file; + + error = alloc_fd(1, flags); + if (error < 0) + return error; + fd = error; + + file = anon_inode_getfile(name, fops, priv, flags); + if (IS_ERR(file)) { + error = PTR_ERR(file); + goto err_put_unused_fd; + } + fd_install(fd, file); + + return fd; + +err_put_unused_fd: + put_unused_fd(fd); + return error; +} + int bpf_map_new_fd(struct bpf_map *map, int flags) { int ret; @@ -696,8 +722,8 @@ int bpf_map_new_fd(struct bpf_map *map, int flags) if (ret < 0) return ret; - return anon_inode_getfd("bpf-map", &bpf_map_fops, map, - flags | O_CLOEXEC); + return bpf_anon_inode_getfd("bpf-map", &bpf_map_fops, map, + flags | O_CLOEXEC); } int bpf_get_file_flag(int flags) @@ -1840,8 +1866,8 @@ int bpf_prog_new_fd(struct bpf_prog *prog) if (ret < 0) return ret; - return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, - O_RDWR | O_CLOEXEC); + return bpf_anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, + O_RDWR | O_CLOEXEC); } static struct bpf_prog *____bpf_prog_get(struct fd f) @@ -2471,7 +2497,7 @@ int bpf_link_settle(struct bpf_link_primer *primer) int bpf_link_new_fd(struct bpf_link *link) { - return anon_inode_getfd("bpf-link", &bpf_link_fops, link, O_CLOEXEC); + return bpf_anon_inode_getfd("bpf-link", &bpf_link_fops, link, O_CLOEXEC); } struct bpf_link *bpf_link_get_from_fd(u32 ufd) @@ -4024,7 +4050,7 @@ static int bpf_enable_runtime_stats(void) return -EBUSY; } - fd = anon_inode_getfd("bpf-stats", &bpf_stats_fops, NULL, O_CLOEXEC); + fd = bpf_anon_inode_getfd("bpf-stats", &bpf_stats_fops, NULL, O_CLOEXEC); if (fd >= 0) static_key_slow_inc(&bpf_stats_enabled_key.key);