Software such as qemu and libvirt require the raw content of some MSRs to calculate host CPU capabilities. This is currently done through /dev/cpu/*/msr which is locked behind both CAP_SYS_RAWIO and file mode 0600, allowing only root to read and write MSRs. Expose some non-security sensitive MSRs through sysfs to allow access for unprivileged processes. This also helps other programs that are interested in IA32_EFER for x86-64-v1 detection. Signed-off-by: Tim Wiederhake <twiederh@xxxxxxxxxx> --- Changes to v1 (https://lkml.org/lkml/2023/5/23/1230): * removed patch to limit reads to /dev/cpu/*/msr to 8 bytes per read * removed CAP_SYS_RAWIO-less access to /dev/cpu/*/msr * introduced sysfs interface to msrs With this sysfs-based, unrestricted read access to some select msrs in place, a later patch could introduce checks for CAP_SYS_RAWIO for every access to /dev/cpu/*/msr as mentioned in the feedback to v1. --- arch/x86/kernel/msr.c | 45 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 7bb17d37db01..3c8354f3c2bd 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -50,6 +50,31 @@ enum allow_write_msrs { static enum allow_write_msrs allow_writes = MSR_WRITES_DEFAULT; +struct allow_read_msrs { + const char *procname; + u32 index; + u32 value[2]; +}; + +static struct allow_read_msrs allow_reads[] = { + { + .procname = "ia32_core_caps", + .index = MSR_IA32_CORE_CAPS, + }, + { + .procname = "ia32_arch_capabilities", + .index = MSR_IA32_ARCH_CAPABILITIES, + }, + { + .procname = "efer", + .index = MSR_EFER, + }, +}; + +static struct ctl_table msr_files[ARRAY_SIZE(allow_reads) + 1]; + +static struct ctl_table_header *msr_files_header; + static ssize_t msr_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { @@ -258,6 +283,25 @@ static char *msr_devnode(const struct device *dev, umode_t *mode) static int __init msr_init(void) { int err; + int i, j; + + for (i = 0, j = 0; i < ARRAY_SIZE(allow_reads); ++i) { + err = rdmsr_safe_on_cpu(0, allow_reads[i].index, + &allow_reads[i].value[0], + &allow_reads[i].value[1]); + if (err) + continue; + msr_files[j].procname = allow_reads[i].procname; + msr_files[j].data = &allow_reads[i].value; + msr_files[j].maxlen = 2 * sizeof(u32); + msr_files[j].mode = 0444; + msr_files[j].proc_handler = proc_doulongvec_minmax; + ++j; + } + + msr_files_header = register_sysctl("vm/msr", msr_files); + if (!msr_files_header) + return -ENOMEM; if (__register_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr", &msr_fops)) { pr_err("unable to get major %d for msr\n", MSR_MAJOR); @@ -287,6 +331,7 @@ module_init(msr_init); static void __exit msr_exit(void) { + unregister_sysctl_table(msr_files_header); cpuhp_remove_state(cpuhp_msr_state); class_destroy(msr_class); __unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr"); -- 2.39.2