On 11/02/19 20:02, Ben Gardon wrote: > There are many KVM kernel memory allocations which are tied to the life of > the VM process and should be charged to the VM process's cgroup. If the > allocations aren't tied to the process, the OOM killer will not know > that killing the process will free the associated kernel memory. > Add __GFP_ACCOUNT flags to many of the allocations which are not yet being > charged to the VM process's cgroup. > > Tested: > Ran all kvm-unit-tests on a 64 bit Haswell machine, the patch > introduced no new failures. > Ran a kernel memory accounting test which creates a VM to touch > memory and then checks that the kernel memory allocated for the > process is within certain bounds. > With this patch we account for much more of the vmalloc and slab memory > allocated for the VM. > > Signed-off-by: Ben Gardon <bgardon@xxxxxxxxxx> > Reviewed-by: Shakeel Butt <shakeelb@xxxxxxxxxx> > --- > arch/x86/kvm/vmx/nested.c | 9 +++++++-- > arch/x86/kvm/vmx/vmx.c | 27 ++++++++++++++++++--------- > arch/x86/kvm/vmx/vmx.h | 5 +++-- > 3 files changed, 28 insertions(+), 13 deletions(-) > > diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c > index 3170e291215d0..88d20904b16e0 100644 > --- a/arch/x86/kvm/vmx/nested.c > +++ b/arch/x86/kvm/vmx/nested.c > @@ -4140,11 +4140,12 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu) > if (r < 0) > goto out_vmcs02; > > - vmx->nested.cached_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL); > + vmx->nested.cached_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL_ACCOUNT); > if (!vmx->nested.cached_vmcs12) > goto out_cached_vmcs12; > > - vmx->nested.cached_shadow_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL); > + vmx->nested.cached_shadow_vmcs12 = kmalloc(VMCS12_SIZE, > + GFP_KERNEL_ACCOUNT); > if (!vmx->nested.cached_shadow_vmcs12) > goto out_cached_shadow_vmcs12; > > @@ -5686,6 +5687,10 @@ __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *)) > enable_shadow_vmcs = 0; > if (enable_shadow_vmcs) { > for (i = 0; i < VMX_BITMAP_NR; i++) { > + /* > + * The vmx_bitmap is not tied to a VM and so should > + * not be charged to a memcg. > + */ > vmx_bitmap[i] = (unsigned long *) > __get_free_page(GFP_KERNEL); > if (!vmx_bitmap[i]) { > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c > index 4d39f731bc332..9efa4ceb45c84 100644 > --- a/arch/x86/kvm/vmx/vmx.c > +++ b/arch/x86/kvm/vmx/vmx.c > @@ -245,6 +245,10 @@ static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf) > > if (l1tf != VMENTER_L1D_FLUSH_NEVER && !vmx_l1d_flush_pages && > !boot_cpu_has(X86_FEATURE_FLUSH_L1D)) { > + /* > + * This allocation for vmx_l1d_flush_pages is not tied to a VM > + * lifetime and so should not be charged to a memcg. > + */ > page = alloc_pages(GFP_KERNEL, L1D_CACHE_ORDER); > if (!page) > return -ENOMEM; > @@ -2389,13 +2393,13 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf, > return 0; > } > > -struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu) > +struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags) > { > int node = cpu_to_node(cpu); > struct page *pages; > struct vmcs *vmcs; > > - pages = __alloc_pages_node(node, GFP_KERNEL, vmcs_config.order); > + pages = __alloc_pages_node(node, flags, vmcs_config.order); > if (!pages) > return NULL; > vmcs = page_address(pages); > @@ -2442,7 +2446,8 @@ int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) > loaded_vmcs_init(loaded_vmcs); > > if (cpu_has_vmx_msr_bitmap()) { > - loaded_vmcs->msr_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); > + loaded_vmcs->msr_bitmap = (unsigned long *) > + __get_free_page(GFP_KERNEL_ACCOUNT); > if (!loaded_vmcs->msr_bitmap) > goto out_vmcs; > memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE); > @@ -2483,7 +2488,7 @@ static __init int alloc_kvm_area(void) > for_each_possible_cpu(cpu) { > struct vmcs *vmcs; > > - vmcs = alloc_vmcs_cpu(false, cpu); > + vmcs = alloc_vmcs_cpu(false, cpu, GFP_KERNEL); > if (!vmcs) { > free_kvm_area(); > return -ENOMEM; > @@ -6652,7 +6657,9 @@ STACK_FRAME_NON_STANDARD(vmx_vcpu_run); > > static struct kvm *vmx_vm_alloc(void) > { > - struct kvm_vmx *kvm_vmx = vzalloc(sizeof(struct kvm_vmx)); > + struct kvm_vmx *kvm_vmx = __vmalloc(sizeof(struct kvm_vmx), > + GFP_KERNEL_ACCOUNT | __GFP_ZERO, > + PAGE_KERNEL); > return &kvm_vmx->kvm; > } > > @@ -6680,14 +6687,16 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu) > static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) > { > int err; > - struct vcpu_vmx *vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); > + struct vcpu_vmx *vmx; > unsigned long *msr_bitmap; > int cpu; > > + vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL_ACCOUNT); > if (!vmx) > return ERR_PTR(-ENOMEM); > > - vmx->vcpu.arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache, GFP_KERNEL); > + vmx->vcpu.arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache, > + GFP_KERNEL_ACCOUNT); > if (!vmx->vcpu.arch.guest_fpu) { > printk(KERN_ERR "kvm: failed to allocate vcpu's fpu\n"); > err = -ENOMEM; > @@ -6709,12 +6718,12 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) > * for the guest, etc. > */ > if (enable_pml) { > - vmx->pml_pg = alloc_page(GFP_KERNEL | __GFP_ZERO); > + vmx->pml_pg = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); > if (!vmx->pml_pg) > goto uninit_vcpu; > } > > - vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL); > + vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL_ACCOUNT); > BUILD_BUG_ON(ARRAY_SIZE(vmx_msr_index) * sizeof(vmx->guest_msrs[0]) > > PAGE_SIZE); > > diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h > index 99328954c2fc1..23aa757cecf19 100644 > --- a/arch/x86/kvm/vmx/vmx.h > +++ b/arch/x86/kvm/vmx/vmx.h > @@ -478,7 +478,7 @@ static inline struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu) > return &(to_vmx(vcpu)->pi_desc); > } > > -struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu); > +struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags); > void free_vmcs(struct vmcs *vmcs); > int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs); > void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs); > @@ -487,7 +487,8 @@ void loaded_vmcs_clear(struct loaded_vmcs *loaded_vmcs); > > static inline struct vmcs *alloc_vmcs(bool shadow) > { > - return alloc_vmcs_cpu(shadow, raw_smp_processor_id()); > + return alloc_vmcs_cpu(shadow, raw_smp_processor_id(), > + GFP_KERNEL_ACCOUNT); > } > > u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa); > Queued all, thanks. Paolo