On 2/5/19 11:27 AM, Michael S. Tsirkin wrote: > On Tue, Feb 05, 2019 at 08:06:33AM -0500, Nitesh Narayan Lal wrote: >> On 2/4/19 11:14 PM, Michael S. Tsirkin wrote: >>> On Mon, Feb 04, 2019 at 03:18:48PM -0500, Nitesh Narayan Lal wrote: >>>> This patch includes the following: >>>> 1. Basic skeleton for the support >>>> 2. Enablement of x86 platform to use the same >>>> >>>> Signed-off-by: Nitesh Narayan Lal <nitesh@xxxxxxxxxx> >>>> --- >>>> arch/x86/Kbuild | 2 +- >>>> arch/x86/kvm/Kconfig | 8 ++++++++ >>>> arch/x86/kvm/Makefile | 2 ++ >>>> include/linux/gfp.h | 9 +++++++++ >>>> include/linux/page_hinting.h | 17 +++++++++++++++++ >>>> virt/kvm/page_hinting.c | 36 ++++++++++++++++++++++++++++++++++++ >>>> 6 files changed, 73 insertions(+), 1 deletion(-) >>>> create mode 100644 include/linux/page_hinting.h >>>> create mode 100644 virt/kvm/page_hinting.c >>>> >>>> diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild >>>> index c625f57472f7..3244df4ee311 100644 >>>> --- a/arch/x86/Kbuild >>>> +++ b/arch/x86/Kbuild >>>> @@ -2,7 +2,7 @@ obj-y += entry/ >>>> >>>> obj-$(CONFIG_PERF_EVENTS) += events/ >>>> >>>> -obj-$(CONFIG_KVM) += kvm/ >>>> +obj-$(subst m,y,$(CONFIG_KVM)) += kvm/ >>>> >>>> # Xen paravirtualization support >>>> obj-$(CONFIG_XEN) += xen/ >>>> diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig >>>> index 72fa955f4a15..2fae31459706 100644 >>>> --- a/arch/x86/kvm/Kconfig >>>> +++ b/arch/x86/kvm/Kconfig >>>> @@ -96,6 +96,14 @@ config KVM_MMU_AUDIT >>>> This option adds a R/W kVM module parameter 'mmu_audit', which allows >>>> auditing of KVM MMU events at runtime. >>>> >>>> +# KVM_FREE_PAGE_HINTING will allow the guest to report the free pages to the >>>> +# host in regular interval of time. >>>> +config KVM_FREE_PAGE_HINTING >>>> + def_bool y >>>> + depends on KVM >>>> + select VIRTIO >>>> + select VIRTIO_BALLOON >>>> + >>>> # OK, it's a little counter-intuitive to do this, but it puts it neatly under >>>> # the virtualization menu. >>>> source "drivers/vhost/Kconfig" >>>> diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile >>>> index 69b3a7c30013..78640a80501e 100644 >>>> --- a/arch/x86/kvm/Makefile >>>> +++ b/arch/x86/kvm/Makefile >>>> @@ -16,6 +16,8 @@ kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \ >>>> i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \ >>>> hyperv.o page_track.o debugfs.o >>>> >>>> +obj-$(CONFIG_KVM_FREE_PAGE_HINTING) += $(KVM)/page_hinting.o >>>> + >>>> kvm-intel-y += vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o vmx/evmcs.o vmx/nested.o >>>> kvm-amd-y += svm.o pmu_amd.o >>>> >>>> diff --git a/include/linux/gfp.h b/include/linux/gfp.h >>>> index 5f5e25fd6149..e596527284ba 100644 >>>> --- a/include/linux/gfp.h >>>> +++ b/include/linux/gfp.h >>>> @@ -7,6 +7,7 @@ >>>> #include <linux/stddef.h> >>>> #include <linux/linkage.h> >>>> #include <linux/topology.h> >>>> +#include <linux/page_hinting.h> >>>> >>>> struct vm_area_struct; >>>> >>>> @@ -456,6 +457,14 @@ static inline struct zonelist *node_zonelist(int nid, gfp_t flags) >>>> return NODE_DATA(nid)->node_zonelists + gfp_zonelist(flags); >>>> } >>>> >>>> +#ifdef CONFIG_KVM_FREE_PAGE_HINTING >>>> +#define HAVE_ARCH_FREE_PAGE >>>> +static inline void arch_free_page(struct page *page, int order) >>>> +{ >>>> + guest_free_page(page, order); >>>> +} >>>> +#endif >>>> + >>>> #ifndef HAVE_ARCH_FREE_PAGE >>>> static inline void arch_free_page(struct page *page, int order) { } >>>> #endif >>> OK so arch_free_page hook is used to tie into mm code, >>> with follow-up patches the pages get queued in a list >>> and then sent to hypervisor so it can free them. >>> Fair enough but how do we know the page is >>> not reused by the time it's received by the hypervisor? >>> If it's reused then isn't it a problem that >>> hypervisor calls MADV_DONTNEED on them? >> Hi Michael, >> >> In order to ensure that the page is not reused, we remove it from the >> buddy free list by acquiring the zone lock. After the page is freed by >> the hypervisor it is returned to the buddy free list again. > Thanks that's good to know. Could you point me to code that does this? In Patch 0006-KVM-Enables-the-kernel-to-isolate-and-report-free-page. hinting_fn() is responsible for scanning the per-cpu-array, acquiring the lock, isolating the page and invoking hyperlist_ready(). Under hyperlist_ready, the hypercall to report the free pages is made and once it is done in this function only those pages are returned to the buddy free list. > >>> >>>> diff --git a/include/linux/page_hinting.h b/include/linux/page_hinting.h >>>> new file mode 100644 >>>> index 000000000000..b54f7428f348 >>>> --- /dev/null >>>> +++ b/include/linux/page_hinting.h >>>> @@ -0,0 +1,17 @@ >>>> +/* >>>> + * Size of the array which is used to store the freed pages is defined by >>>> + * MAX_FGPT_ENTRIES. If possible, we have to find a better way using which >>>> + * we can get rid of the hardcoded array size. >>>> + */ >>>> +#define MAX_FGPT_ENTRIES 1000 >>>> +/* >>>> + * hypervisor_pages - It is a dummy structure passed with the hypercall. >>>> + * @pfn: page frame number for the page which needs to be sent to the host. >>>> + * @order: order of the page needs to be reported to the host. >>>> + */ >>>> +struct hypervisor_pages { >>>> + unsigned long pfn; >>>> + unsigned int order; >>>> +}; >>>> + >>>> +void guest_free_page(struct page *page, int order); >>>> diff --git a/virt/kvm/page_hinting.c b/virt/kvm/page_hinting.c >>>> new file mode 100644 >>>> index 000000000000..818bd6b84e0c >>>> --- /dev/null >>>> +++ b/virt/kvm/page_hinting.c >>>> @@ -0,0 +1,36 @@ >>>> +#include <linux/gfp.h> >>>> +#include <linux/mm.h> >>>> +#include <linux/kernel.h> >>>> + >>>> +/* >>>> + * struct kvm_free_pages - Tracks the pages which are freed by the guest. >>>> + * @pfn: page frame number for the page which is freed. >>>> + * @order: order corresponding to the page freed. >>>> + * @zonenum: zone number to which the freed page belongs. >>>> + */ >>>> +struct kvm_free_pages { >>>> + unsigned long pfn; >>>> + unsigned int order; >>>> + int zonenum; >>>> +}; >>>> + >>>> +/* >>>> + * struct page_hinting - holds array objects for the structures used to track >>>> + * guest free pages, along with an index variable for each of them. >>>> + * @kvm_pt: array object for the structure kvm_free_pages. >>>> + * @kvm_pt_idx: index for kvm_free_pages object. >>>> + * @hypervisor_pagelist: array object for the structure hypervisor_pages. >>>> + * @hyp_idx: index for hypervisor_pages object. >>>> + */ >>>> +struct page_hinting { >>>> + struct kvm_free_pages kvm_pt[MAX_FGPT_ENTRIES]; >>>> + int kvm_pt_idx; >>>> + struct hypervisor_pages hypervisor_pagelist[MAX_FGPT_ENTRIES]; >>>> + int hyp_idx; >>>> +}; >>>> + >>>> +DEFINE_PER_CPU(struct page_hinting, hinting_obj); >>>> + >>>> +void guest_free_page(struct page *page, int order) >>>> +{ >>>> +} >>>> -- >>>> 2.17.2 >> -- >> Regards >> Nitesh >> > > -- Regards Nitesh
Attachment:
signature.asc
Description: OpenPGP digital signature