On Mon, Mar 21, 2022 at 04:48:44PM -0700, Ben Gardon wrote: > Ensure that the userspace actor attempting to disable NX hugepages has > permission to reboot the system. Since disabling NX hugepages would > allow a guest to crash the system, it is similar to reboot permissions. > > This approach is the simplest permission gating, but passing a file > descriptor opened for write for the module parameter would also work > well and be more precise. > The latter approach was suggested by Sean Christopherson. > > Suggested-by: Jim Mattson <jmattson@xxxxxxxxxx> > Signed-off-by: Ben Gardon <bgardon@xxxxxxxxxx> > --- > arch/x86/kvm/x86.c | 18 ++++++- > .../selftests/kvm/include/kvm_util_base.h | 2 + > tools/testing/selftests/kvm/lib/kvm_util.c | 7 +++ > .../selftests/kvm/x86_64/nx_huge_pages_test.c | 49 ++++++++++++++----- > .../kvm/x86_64/nx_huge_pages_test.sh | 2 +- > 5 files changed, 65 insertions(+), 13 deletions(-) > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index 74351cbb9b5b..995f30667619 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -4256,7 +4256,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) > case KVM_CAP_SYS_ATTRIBUTES: > case KVM_CAP_VAPIC: > case KVM_CAP_ENABLE_CAP: > - case KVM_CAP_VM_DISABLE_NX_HUGE_PAGES: > r = 1; > break; > case KVM_CAP_EXIT_HYPERCALL: > @@ -4359,6 +4358,14 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) > case KVM_CAP_DISABLE_QUIRKS2: > r = KVM_X86_VALID_QUIRKS; > break; > + case KVM_CAP_VM_DISABLE_NX_HUGE_PAGES: > + /* > + * Since the risk of disabling NX hugepages is a guest crashing > + * the system, ensure the userspace process has permission to > + * reboot the system. > + */ > + r = capable(CAP_SYS_BOOT); Duplicating this check and comment isn't ideal. I think it would be fine to unconditionally return true here (KVM, after all, does support the capability) and only check for CAP_SYS_BOOT when userspace attempts to enable the capability. > + break; > default: > break; > } > @@ -6050,6 +6057,15 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, > mutex_unlock(&kvm->lock); > break; > case KVM_CAP_VM_DISABLE_NX_HUGE_PAGES: > + /* > + * Since the risk of disabling NX hugepages is a guest crashing > + * the system, ensure the userspace process has permission to > + * reboot the system. > + */ > + if (!capable(CAP_SYS_BOOT)) { > + r = -EPERM; > + break; > + } > kvm->arch.disable_nx_huge_pages = true; > kvm_update_nx_huge_pages(kvm); > r = 0; > diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h > index 72163ba2f878..4db8251c3ce5 100644 > --- a/tools/testing/selftests/kvm/include/kvm_util_base.h > +++ b/tools/testing/selftests/kvm/include/kvm_util_base.h Can you split out the selftests changes to a separate commit? I have a feeling you meant to :). > @@ -411,4 +411,6 @@ uint64_t vm_get_single_stat(struct kvm_vm *vm, const char *stat_name); > > uint32_t guest_get_vcpuid(void); > > +void vm_disable_nx_huge_pages(struct kvm_vm *vm); > + > #endif /* SELFTEST_KVM_UTIL_BASE_H */ > diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c > index 9d72d1bb34fa..46a7fa08d3e0 100644 > --- a/tools/testing/selftests/kvm/lib/kvm_util.c > +++ b/tools/testing/selftests/kvm/lib/kvm_util.c > @@ -2765,3 +2765,10 @@ uint64_t vm_get_single_stat(struct kvm_vm *vm, const char *stat_name) > return value; > } > > +void vm_disable_nx_huge_pages(struct kvm_vm *vm) > +{ > + struct kvm_enable_cap cap = { 0 }; > + > + cap.cap = KVM_CAP_VM_DISABLE_NX_HUGE_PAGES; > + vm_enable_cap(vm, &cap); > +} > diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c > index 2bcbe4efdc6a..5ce98f759bc8 100644 > --- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c > +++ b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c Will you add a test to exercise the CAP_SYS_BOOT check? At minimum the selftest should check if it has CAP_SYS_BOOT and act accordingly (e.g. exiting with KSFT_SKIP). > @@ -57,13 +57,40 @@ static void check_split_count(struct kvm_vm *vm, int expected_splits) > expected_splits, actual_splits); > } > > +static void help(void) > +{ > + puts(""); > + printf("usage: nx_huge_pages_test.sh [-x]\n"); > + puts(""); > + printf(" -x: Allow executable huge pages on the VM.\n"); > + puts(""); > + exit(0); > +} > + > int main(int argc, char **argv) > { > struct kvm_vm *vm; > struct timespec ts; > + bool disable_nx = false; > + int opt; > + > + while ((opt = getopt(argc, argv, "x")) != -1) { > + switch (opt) { > + case 'x': > + disable_nx = true; > + break; > + case 'h': > + default: > + help(); > + break; > + } > + } > > vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR); > > + if (disable_nx) > + vm_disable_nx_huge_pages(vm); > + > vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS_HUGETLB, > HPAGE_PADDR_START, HPAGE_SLOT, > HPAGE_SLOT_NPAGES, 0); > @@ -83,21 +110,21 @@ int main(int argc, char **argv) > * at 2M. > */ > run_guest_code(vm, guest_code0); > - check_2m_page_count(vm, 2); > - check_split_count(vm, 2); > + check_2m_page_count(vm, disable_nx ? 4 : 2); > + check_split_count(vm, disable_nx ? 0 : 2); > > /* > * guest_code1 is in the same huge page as data1, so it will cause > * that huge page to be remapped at 4k. > */ > run_guest_code(vm, guest_code1); > - check_2m_page_count(vm, 1); > - check_split_count(vm, 3); > + check_2m_page_count(vm, disable_nx ? 4 : 1); > + check_split_count(vm, disable_nx ? 0 : 3); > > /* Run guest_code0 again to check that is has no effect. */ > run_guest_code(vm, guest_code0); > - check_2m_page_count(vm, 1); > - check_split_count(vm, 3); > + check_2m_page_count(vm, disable_nx ? 4 : 1); > + check_split_count(vm, disable_nx ? 0 : 3); > > /* > * Give recovery thread time to run. The wrapper script sets > @@ -110,7 +137,7 @@ int main(int argc, char **argv) > /* > * Now that the reclaimer has run, all the split pages should be gone. > */ > - check_2m_page_count(vm, 1); > + check_2m_page_count(vm, disable_nx ? 4 : 1); > check_split_count(vm, 0); > > /* > @@ -118,13 +145,13 @@ int main(int argc, char **argv) > * again to check that pages are mapped at 2M again. > */ > run_guest_code(vm, guest_code0); > - check_2m_page_count(vm, 2); > - check_split_count(vm, 2); > + check_2m_page_count(vm, disable_nx ? 4 : 2); > + check_split_count(vm, disable_nx ? 0 : 2); > > /* Pages are once again split from running guest_code1. */ > run_guest_code(vm, guest_code1); > - check_2m_page_count(vm, 1); > - check_split_count(vm, 3); > + check_2m_page_count(vm, disable_nx ? 4 : 1); > + check_split_count(vm, disable_nx ? 0 : 3); > > kvm_vm_free(vm); > > diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh > index 19fc95723fcb..29f999f48848 100755 > --- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh > +++ b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh > @@ -14,7 +14,7 @@ echo 1 > /sys/module/kvm/parameters/nx_huge_pages_recovery_ratio > echo 100 > /sys/module/kvm/parameters/nx_huge_pages_recovery_period_ms > echo 200 > /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages > > -./nx_huge_pages_test > +./nx_huge_pages_test "${@}" > RET=$? > > echo $NX_HUGE_PAGES > /sys/module/kvm/parameters/nx_huge_pages > -- > 2.35.1.894.gb6a874cedc-goog >