From: Alexander Duyck <alexander.h.duyck@xxxxxxxxxxxxxxx> Add the host side of the KVM memory hinting support. With this we expose a feature bit indicating that the host will pass the messages along to the new madvise function. This functionality is mutually exclusive with device assignment. If a device is assigned we will disable the functionality as it could lead to a potential memory corruption if a device writes to a page after KVM has flagged it as not being used. The logic as it is currently defined limits the hint to only supporting a hugepage or larger notifications. This is meant to help prevent us from potentially breaking up huge pages by hinting that only a portion of the page is not needed. Signed-off-by: Alexander Duyck <alexander.h.duyck@xxxxxxxxxxxxxxx> --- Documentation/virtual/kvm/cpuid.txt | 4 +++ Documentation/virtual/kvm/hypercalls.txt | 14 ++++++++++++ arch/x86/include/uapi/asm/kvm_para.h | 3 +++ arch/x86/kvm/cpuid.c | 6 ++++- arch/x86/kvm/x86.c | 35 ++++++++++++++++++++++++++++++ include/uapi/linux/kvm_para.h | 1 + 6 files changed, 62 insertions(+), 1 deletion(-) diff --git a/Documentation/virtual/kvm/cpuid.txt b/Documentation/virtual/kvm/cpuid.txt index 97ca1940a0dc..fe3395a58b7e 100644 --- a/Documentation/virtual/kvm/cpuid.txt +++ b/Documentation/virtual/kvm/cpuid.txt @@ -66,6 +66,10 @@ KVM_FEATURE_PV_SEND_IPI || 11 || guest checks this feature bit || || before using paravirtualized || || send IPIs. ------------------------------------------------------------------------------ +KVM_FEATURE_PV_UNUSED_PAGE_HINT || 12 || guest checks this feature bit + || || before using paravirtualized + || || unused page hints. +------------------------------------------------------------------------------ KVM_FEATURE_CLOCKSOURCE_STABLE_BIT || 24 || host will warn if no guest-side || || per-cpu warps are expected in || || kvmclock. diff --git a/Documentation/virtual/kvm/hypercalls.txt b/Documentation/virtual/kvm/hypercalls.txt index da24c138c8d1..b374678ac1f9 100644 --- a/Documentation/virtual/kvm/hypercalls.txt +++ b/Documentation/virtual/kvm/hypercalls.txt @@ -141,3 +141,17 @@ a0 corresponds to the APIC ID in the third argument (a2), bit 1 corresponds to the APIC ID a2+1, and so on. Returns the number of CPUs to which the IPIs were delivered successfully. + +7. KVM_HC_UNUSED_PAGE_HINT +------------------------ +Architecture: x86 +Status: active +Purpose: Send unused page hint to host + +a0: physical address of region unused, page aligned +a1: size of unused region, page aligned + +The hypercall lets a guest send notifications to the host that it will no +longer be using a given page in memory. Multiple pages can be hinted at by +using the size field to hint that a higher order page is available by +specifying the higher order page size. diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h index 19980ec1a316..f066c23060df 100644 --- a/arch/x86/include/uapi/asm/kvm_para.h +++ b/arch/x86/include/uapi/asm/kvm_para.h @@ -29,6 +29,7 @@ #define KVM_FEATURE_PV_TLB_FLUSH 9 #define KVM_FEATURE_ASYNC_PF_VMEXIT 10 #define KVM_FEATURE_PV_SEND_IPI 11 +#define KVM_FEATURE_PV_UNUSED_PAGE_HINT 12 #define KVM_HINTS_REALTIME 0 @@ -119,4 +120,6 @@ struct kvm_vcpu_pv_apf_data { #define KVM_PV_EOI_ENABLED KVM_PV_EOI_MASK #define KVM_PV_EOI_DISABLED 0x0 +#define KVM_PV_UNUSED_PAGE_HINT_MIN_ORDER HUGETLB_PAGE_ORDER + #endif /* _UAPI_ASM_X86_KVM_PARA_H */ diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index bbffa6c54697..b82bcbfbc420 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -136,6 +136,9 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu) if (kvm_hlt_in_guest(vcpu->kvm) && best && (best->eax & (1 << KVM_FEATURE_PV_UNHALT))) best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT); + if (kvm_arch_has_assigned_device(vcpu->kvm) && best && + (best->eax & KVM_FEATURE_PV_UNUSED_PAGE_HINT)) + best->eax &= ~(1 << KVM_FEATURE_PV_UNUSED_PAGE_HINT); /* Update physical-address width */ vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); @@ -637,7 +640,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, (1 << KVM_FEATURE_PV_UNHALT) | (1 << KVM_FEATURE_PV_TLB_FLUSH) | (1 << KVM_FEATURE_ASYNC_PF_VMEXIT) | - (1 << KVM_FEATURE_PV_SEND_IPI); + (1 << KVM_FEATURE_PV_SEND_IPI) | + (1 << KVM_FEATURE_PV_UNUSED_PAGE_HINT); if (sched_info_on()) entry->eax |= (1 << KVM_FEATURE_STEAL_TIME); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3d27206f6c01..3ec75ab849e2 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -55,6 +55,7 @@ #include <linux/irqbypass.h> #include <linux/sched/stat.h> #include <linux/mem_encrypt.h> +#include <linux/mm.h> #include <trace/events/kvm.h> @@ -7052,6 +7053,37 @@ void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu) kvm_x86_ops->refresh_apicv_exec_ctrl(vcpu); } +static int kvm_pv_unused_page_hint_op(struct kvm *kvm, gpa_t gpa, size_t len) +{ + unsigned long start; + + /* + * Guarantee the following: + * len meets minimum size + * len is a power of 2 + * gpa is aligned to len + */ + if (len < (PAGE_SIZE << KVM_PV_UNUSED_PAGE_HINT_MIN_ORDER)) + return -KVM_EINVAL; + if (!is_power_of_2(len) || !IS_ALIGNED(gpa, len)) + return -KVM_EINVAL; + + /* + * If a device is assigned we cannot use use madvise as memory + * is shared with the device and could lead to memory corruption + * if the device writes to it after free. + */ + if (kvm_arch_has_assigned_device(kvm)) + return -KVM_EOPNOTSUPP; + + start = gfn_to_hva(kvm, gpa_to_gfn(gpa)); + + if (kvm_is_error_hva(start + len)) + return -KVM_EFAULT; + + return do_madvise_dontneed(start, len); +} + int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) { unsigned long nr, a0, a1, a2, a3, ret; @@ -7098,6 +7130,9 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) case KVM_HC_SEND_IPI: ret = kvm_pv_send_ipi(vcpu->kvm, a0, a1, a2, a3, op_64_bit); break; + case KVM_HC_UNUSED_PAGE_HINT: + ret = kvm_pv_unused_page_hint_op(vcpu->kvm, a0, a1); + break; default: ret = -KVM_ENOSYS; break; diff --git a/include/uapi/linux/kvm_para.h b/include/uapi/linux/kvm_para.h index 6c0ce49931e5..75643b862a4e 100644 --- a/include/uapi/linux/kvm_para.h +++ b/include/uapi/linux/kvm_para.h @@ -28,6 +28,7 @@ #define KVM_HC_MIPS_CONSOLE_OUTPUT 8 #define KVM_HC_CLOCK_PAIRING 9 #define KVM_HC_SEND_IPI 10 +#define KVM_HC_UNUSED_PAGE_HINT 11 /* * hypercalls use architecture specific