Re: [PATCH v3 4/4] kvm: vmx: Add memcg accounting to KVM allocations

Paolo Bonzini <pbonzini@xxxxxxxxxx> · Tue, 12 Feb 2019 13:14:52 +0100

On 11/02/19 20:02, Ben Gardon wrote:
> There are many KVM kernel memory allocations which are tied to the life of
> the VM process and should be charged to the VM process's cgroup. If the
> allocations aren't tied to the process, the OOM killer will not know
> that killing the process will free the associated kernel memory.
> Add __GFP_ACCOUNT flags to many of the allocations which are not yet being
> charged to the VM process's cgroup.
> 
> Tested:
> 	Ran all kvm-unit-tests on a 64 bit Haswell machine, the patch
> 	introduced no new failures.
> 	Ran a kernel memory accounting test which creates a VM to touch
> 	memory and then checks that the kernel memory allocated for the
> 	process is within certain bounds.
> 	With this patch we account for much more of the vmalloc and slab memory
> 	allocated for the VM.
> 
> Signed-off-by: Ben Gardon <bgardon@xxxxxxxxxx>
> Reviewed-by: Shakeel Butt <shakeelb@xxxxxxxxxx>
> ---
>  arch/x86/kvm/vmx/nested.c |  9 +++++++--
>  arch/x86/kvm/vmx/vmx.c    | 27 ++++++++++++++++++---------
>  arch/x86/kvm/vmx/vmx.h    |  5 +++--
>  3 files changed, 28 insertions(+), 13 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
> index 3170e291215d0..88d20904b16e0 100644
> --- a/arch/x86/kvm/vmx/nested.c
> +++ b/arch/x86/kvm/vmx/nested.c
> @@ -4140,11 +4140,12 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu)
>  	if (r < 0)
>  		goto out_vmcs02;
>  
> -	vmx->nested.cached_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL);
> +	vmx->nested.cached_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL_ACCOUNT);
>  	if (!vmx->nested.cached_vmcs12)
>  		goto out_cached_vmcs12;
>  
> -	vmx->nested.cached_shadow_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL);
> +	vmx->nested.cached_shadow_vmcs12 = kmalloc(VMCS12_SIZE,
> +						   GFP_KERNEL_ACCOUNT);
>  	if (!vmx->nested.cached_shadow_vmcs12)
>  		goto out_cached_shadow_vmcs12;
>  
> @@ -5686,6 +5687,10 @@ __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *))
>  		enable_shadow_vmcs = 0;
>  	if (enable_shadow_vmcs) {
>  		for (i = 0; i < VMX_BITMAP_NR; i++) {
> +			/*
> +			 * The vmx_bitmap is not tied to a VM and so should
> +			 * not be charged to a memcg.
> +			 */
>  			vmx_bitmap[i] = (unsigned long *)
>  				__get_free_page(GFP_KERNEL);
>  			if (!vmx_bitmap[i]) {
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 4d39f731bc332..9efa4ceb45c84 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -245,6 +245,10 @@ static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf)
>  
>  	if (l1tf != VMENTER_L1D_FLUSH_NEVER && !vmx_l1d_flush_pages &&
>  	    !boot_cpu_has(X86_FEATURE_FLUSH_L1D)) {
> +		/*
> +		 * This allocation for vmx_l1d_flush_pages is not tied to a VM
> +		 * lifetime and so should not be charged to a memcg.
> +		 */
>  		page = alloc_pages(GFP_KERNEL, L1D_CACHE_ORDER);
>  		if (!page)
>  			return -ENOMEM;
> @@ -2389,13 +2393,13 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
>  	return 0;
>  }
>  
> -struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu)
> +struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags)
>  {
>  	int node = cpu_to_node(cpu);
>  	struct page *pages;
>  	struct vmcs *vmcs;
>  
> -	pages = __alloc_pages_node(node, GFP_KERNEL, vmcs_config.order);
> +	pages = __alloc_pages_node(node, flags, vmcs_config.order);
>  	if (!pages)
>  		return NULL;
>  	vmcs = page_address(pages);
> @@ -2442,7 +2446,8 @@ int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
>  	loaded_vmcs_init(loaded_vmcs);
>  
>  	if (cpu_has_vmx_msr_bitmap()) {
> -		loaded_vmcs->msr_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
> +		loaded_vmcs->msr_bitmap = (unsigned long *)
> +				__get_free_page(GFP_KERNEL_ACCOUNT);
>  		if (!loaded_vmcs->msr_bitmap)
>  			goto out_vmcs;
>  		memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE);
> @@ -2483,7 +2488,7 @@ static __init int alloc_kvm_area(void)
>  	for_each_possible_cpu(cpu) {
>  		struct vmcs *vmcs;
>  
> -		vmcs = alloc_vmcs_cpu(false, cpu);
> +		vmcs = alloc_vmcs_cpu(false, cpu, GFP_KERNEL);
>  		if (!vmcs) {
>  			free_kvm_area();
>  			return -ENOMEM;
> @@ -6652,7 +6657,9 @@ STACK_FRAME_NON_STANDARD(vmx_vcpu_run);
>  
>  static struct kvm *vmx_vm_alloc(void)
>  {
> -	struct kvm_vmx *kvm_vmx = vzalloc(sizeof(struct kvm_vmx));
> +	struct kvm_vmx *kvm_vmx = __vmalloc(sizeof(struct kvm_vmx),
> +					    GFP_KERNEL_ACCOUNT | __GFP_ZERO,
> +					    PAGE_KERNEL);
>  	return &kvm_vmx->kvm;
>  }
>  
> @@ -6680,14 +6687,16 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
>  static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
>  {
>  	int err;
> -	struct vcpu_vmx *vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
> +	struct vcpu_vmx *vmx;
>  	unsigned long *msr_bitmap;
>  	int cpu;
>  
> +	vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL_ACCOUNT);
>  	if (!vmx)
>  		return ERR_PTR(-ENOMEM);
>  
> -	vmx->vcpu.arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache, GFP_KERNEL);
> +	vmx->vcpu.arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache,
> +			GFP_KERNEL_ACCOUNT);
>  	if (!vmx->vcpu.arch.guest_fpu) {
>  		printk(KERN_ERR "kvm: failed to allocate vcpu's fpu\n");
>  		err = -ENOMEM;
> @@ -6709,12 +6718,12 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
>  	 * for the guest, etc.
>  	 */
>  	if (enable_pml) {
> -		vmx->pml_pg = alloc_page(GFP_KERNEL | __GFP_ZERO);
> +		vmx->pml_pg = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
>  		if (!vmx->pml_pg)
>  			goto uninit_vcpu;
>  	}
>  
> -	vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
> +	vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL_ACCOUNT);
>  	BUILD_BUG_ON(ARRAY_SIZE(vmx_msr_index) * sizeof(vmx->guest_msrs[0])
>  		     > PAGE_SIZE);
>  
> diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
> index 99328954c2fc1..23aa757cecf19 100644
> --- a/arch/x86/kvm/vmx/vmx.h
> +++ b/arch/x86/kvm/vmx/vmx.h
> @@ -478,7 +478,7 @@ static inline struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
>  	return &(to_vmx(vcpu)->pi_desc);
>  }
>  
> -struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu);
> +struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags);
>  void free_vmcs(struct vmcs *vmcs);
>  int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs);
>  void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs);
> @@ -487,7 +487,8 @@ void loaded_vmcs_clear(struct loaded_vmcs *loaded_vmcs);
>  
>  static inline struct vmcs *alloc_vmcs(bool shadow)
>  {
> -	return alloc_vmcs_cpu(shadow, raw_smp_processor_id());
> +	return alloc_vmcs_cpu(shadow, raw_smp_processor_id(),
> +			      GFP_KERNEL_ACCOUNT);
>  }
>  
>  u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa);
> 

Queued all, thanks.

Paolo