From: YiFei Zhu <yifeifz2@xxxxxxxxxxxx> Currently the kernel does not provide an infrastructure to translate architecture numbers to a human-readable name. Translating syscall numbers to syscall names is possible through FTRACE_SYSCALL infrastructure but it does not provide support for compat syscalls. This will create a file for each PID as /proc/pid/seccomp_cache. The file will be empty when no seccomp filters are loaded, or be in the format of: <hex arch number> <decimal syscall number> <ALLOW | FILTER> where ALLOW means the cache is guaranteed to allow the syscall, and filter means the cache will pass the syscall to the BPF filter. For the docker default profile on x86_64 it looks like: c000003e 0 ALLOW c000003e 1 ALLOW c000003e 2 ALLOW c000003e 3 ALLOW [...] c000003e 132 ALLOW c000003e 133 ALLOW c000003e 134 FILTER c000003e 135 FILTER c000003e 136 FILTER c000003e 137 ALLOW c000003e 138 ALLOW c000003e 139 FILTER c000003e 140 ALLOW c000003e 141 ALLOW [...] This file is guarded by CONFIG_PROC_SECCOMP_CACHE with a default of N because I think certain users of seecomp might not want the application to know which syscalls are definitely usable. I'm not sure if adding all the "human readable names" is worthwhile, considering it can be easily done in userspace. Suggested-by: Jann Horn <jannh@xxxxxxxxxx> Link: https://lore.kernel.org/lkml/CAG48ez3Ofqp4crXGksLmZY6=fGrF_tWyUCg7PBkAetvbbOPeOA@xxxxxxxxxxxxxx/ Signed-off-by: YiFei Zhu <yifeifz2@xxxxxxxxxxxx> --- arch/Kconfig | 10 ++++++++++ fs/proc/base.c | 7 +++++-- include/linux/seccomp.h | 5 +++++ kernel/seccomp.c | 26 ++++++++++++++++++++++++++ 4 files changed, 46 insertions(+), 2 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index 8cc3dc87f253..dbfd897e5dc0 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -514,6 +514,16 @@ config SECCOMP_CACHE_NR_ONLY endchoice +config PROC_SECCOMP_CACHE + bool "Show seccomp filter cache status in /proc/pid/seccomp_cache" + depends on SECCOMP_CACHE_NR_ONLY + depends on PROC_FS + help + This is enables /proc/pid/seccomp_cache interface to monitor + seccomp cache data. The file format is subject to change. + + If unsure, say N. + config HAVE_ARCH_STACKLEAK bool help diff --git a/fs/proc/base.c b/fs/proc/base.c index 617db4e0faa0..2af626f69fa1 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2615,7 +2615,7 @@ static struct dentry *proc_pident_instantiate(struct dentry *dentry, return d_splice_alias(inode, dentry); } -static struct dentry *proc_pident_lookup(struct inode *dir, +static struct dentry *proc_pident_lookup(struct inode *dir, struct dentry *dentry, const struct pid_entry *p, const struct pid_entry *end) @@ -2815,7 +2815,7 @@ static const struct pid_entry attr_dir_stuff[] = { static int proc_attr_dir_readdir(struct file *file, struct dir_context *ctx) { - return proc_pident_readdir(file, ctx, + return proc_pident_readdir(file, ctx, attr_dir_stuff, ARRAY_SIZE(attr_dir_stuff)); } @@ -3258,6 +3258,9 @@ static const struct pid_entry tgid_base_stuff[] = { #ifdef CONFIG_PROC_PID_ARCH_STATUS ONE("arch_status", S_IRUGO, proc_pid_arch_status), #endif +#ifdef CONFIG_PROC_SECCOMP_CACHE + ONE("seccomp_cache", S_IRUSR, proc_pid_seccomp_cache), +#endif }; static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx) diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h index 02aef2844c38..3cedec824365 100644 --- a/include/linux/seccomp.h +++ b/include/linux/seccomp.h @@ -121,4 +121,9 @@ static inline long seccomp_get_metadata(struct task_struct *task, return -EINVAL; } #endif /* CONFIG_SECCOMP_FILTER && CONFIG_CHECKPOINT_RESTORE */ + +#ifdef CONFIG_PROC_SECCOMP_CACHE +int proc_pid_seccomp_cache(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *task); +#endif #endif /* _LINUX_SECCOMP_H */ diff --git a/kernel/seccomp.c b/kernel/seccomp.c index ac0266b6d18a..d97ec1876b4e 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -2293,3 +2293,29 @@ static int __init seccomp_sysctl_init(void) device_initcall(seccomp_sysctl_init) #endif /* CONFIG_SYSCTL */ + +#ifdef CONFIG_PROC_SECCOMP_CACHE +/* Currently CONFIG_PROC_SECCOMP_CACHE implies CONFIG_SECCOMP_CACHE_NR_ONLY */ +int proc_pid_seccomp_cache(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *task) +{ + struct seccomp_filter *f = READ_ONCE(task->seccomp.filter); + int arch, nr; + + if (!f) + return 0; + + for (arch = 0; arch < ARRAY_SIZE(syscall_arches); arch++) { + for (nr = 0; nr < NR_syscalls; nr++) { + bool cached = test_bit(nr, f->cache.syscall_ok[arch]); + char *status = cached ? "ALLOW" : "FILTER"; + + seq_printf(m, "%08x %d %s\n", syscall_arches[arch], + nr, status + ); + } + } + + return 0; +} +#endif /* CONFIG_PROC_SECCOMP_CACHE */ -- 2.28.0 _______________________________________________ Containers mailing list Containers@xxxxxxxxxxxxxxxxxxxxxxxxxx https://lists.linuxfoundation.org/mailman/listinfo/containers