Re: [PATCH 1/1] KVM: s390: Add memcg accounting to KVM allocations

Christian Borntraeger <borntraeger@xxxxxxxxxx> · Thu, 31 Oct 2019 16:27:47 +0100

On 31.10.19 16:22, David Hildenbrand wrote:
> On 31.10.19 16:19, Christian Borntraeger wrote:
>> While I propared my KVM Forum talk about whats new in KVM including
>> memcg, I realized that the s390 code does not take care of memcg.
>>
>> As far as I can tell, almost all kvm allocations in the s390x KVM code
>> can be attributed to process that triggers the allocation (in other
>> words, no global allocation for other guests). This will help the memcg
>> controller to do the right decisions.
>>
>> Signed-off-by: Christian Borntraeger <borntraeger@xxxxxxxxxx>
>> ---
>>   arch/s390/kvm/guestdbg.c  |  8 ++++----
>>   arch/s390/kvm/intercept.c |  2 +-
>>   arch/s390/kvm/interrupt.c | 12 ++++++------
>>   arch/s390/kvm/kvm-s390.c  | 22 +++++++++++-----------
>>   arch/s390/kvm/priv.c      |  4 ++--
>>   arch/s390/kvm/vsie.c      |  4 ++--
>>   6 files changed, 26 insertions(+), 26 deletions(-)
>>
>> diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c
>> index 394a5f53805b..3765c4223bf9 100644
>> --- a/arch/s390/kvm/guestdbg.c
>> +++ b/arch/s390/kvm/guestdbg.c
>> @@ -184,7 +184,7 @@ static int __import_wp_info(struct kvm_vcpu *vcpu,
>>       if (wp_info->len < 0 || wp_info->len > MAX_WP_SIZE)
>>           return -EINVAL;
>>   -    wp_info->old_data = kmalloc(bp_data->len, GFP_KERNEL);
>> +    wp_info->old_data = kmalloc(bp_data->len, GFP_KERNEL_ACCOUNT);
>>       if (!wp_info->old_data)
>>           return -ENOMEM;
>>       /* try to backup the original value */
>> @@ -234,7 +234,7 @@ int kvm_s390_import_bp_data(struct kvm_vcpu *vcpu,
>>       if (nr_wp > 0) {
>>           wp_info = kmalloc_array(nr_wp,
>>                       sizeof(*wp_info),
>> -                    GFP_KERNEL);
>> +                    GFP_KERNEL_ACCOUNT);
>>           if (!wp_info) {
>>               ret = -ENOMEM;
>>               goto error;
>> @@ -243,7 +243,7 @@ int kvm_s390_import_bp_data(struct kvm_vcpu *vcpu,
>>       if (nr_bp > 0) {
>>           bp_info = kmalloc_array(nr_bp,
>>                       sizeof(*bp_info),
>> -                    GFP_KERNEL);
>> +                    GFP_KERNEL_ACCOUNT);
>>           if (!bp_info) {
>>               ret = -ENOMEM;
>>               goto error;
>> @@ -349,7 +349,7 @@ static struct kvm_hw_wp_info_arch *any_wp_changed(struct kvm_vcpu *vcpu)
>>           if (!wp_info || !wp_info->old_data || wp_info->len <= 0)
>>               continue;
>>   -        temp = kmalloc(wp_info->len, GFP_KERNEL);
>> +        temp = kmalloc(wp_info->len, GFP_KERNEL_ACCOUNT);
>>           if (!temp)
>>               continue;
>>   diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
>> index a389fa85cca2..fb2daae88105 100644
>> --- a/arch/s390/kvm/intercept.c
>> +++ b/arch/s390/kvm/intercept.c
>> @@ -387,7 +387,7 @@ int handle_sthyi(struct kvm_vcpu *vcpu)
>>       if (addr & ~PAGE_MASK)
>>           return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
>>   -    sctns = (void *)get_zeroed_page(GFP_KERNEL);
>> +    sctns = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
>>       if (!sctns)
>>           return -ENOMEM;
>>   diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
>> index 165dea4c7f19..7fe8896a82dd 100644
>> --- a/arch/s390/kvm/interrupt.c
>> +++ b/arch/s390/kvm/interrupt.c
>> @@ -1668,7 +1668,7 @@ struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
>>           goto out;
>>       }
>>   gisa_out:
>> -    tmp_inti = kzalloc(sizeof(*inti), GFP_KERNEL);
>> +    tmp_inti = kzalloc(sizeof(*inti), GFP_KERNEL_ACCOUNT);
>>       if (tmp_inti) {
>>           tmp_inti->type = KVM_S390_INT_IO(1, 0, 0, 0);
>>           tmp_inti->io.io_int_word = isc_to_int_word(isc);
>> @@ -1881,7 +1881,7 @@ int kvm_s390_inject_vm(struct kvm *kvm,
>>       struct kvm_s390_interrupt_info *inti;
>>       int rc;
>>   -    inti = kzalloc(sizeof(*inti), GFP_KERNEL);
>> +    inti = kzalloc(sizeof(*inti), GFP_KERNEL_ACCOUNT);
>>       if (!inti)
>>           return -ENOMEM;
>>   @@ -2275,7 +2275,7 @@ static int enqueue_floating_irq(struct kvm_device *dev,
>>           return -EINVAL;
>>         while (len >= sizeof(struct kvm_s390_irq)) {
>> -        inti = kzalloc(sizeof(*inti), GFP_KERNEL);
>> +        inti = kzalloc(sizeof(*inti), GFP_KERNEL_ACCOUNT);
>>           if (!inti)
>>               return -ENOMEM;
>>   @@ -2323,7 +2323,7 @@ static int register_io_adapter(struct kvm_device *dev,
>>       if (dev->kvm->arch.adapters[adapter_info.id] != NULL)
>>           return -EINVAL;
>>   -    adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
>> +    adapter = kzalloc(sizeof(*adapter), GFP_KERNEL_ACCOUNT);
>>       if (!adapter)
>>           return -ENOMEM;
>>   @@ -2363,7 +2363,7 @@ static int kvm_s390_adapter_map(struct kvm *kvm, unsigned int id, __u64 addr)
>>       if (!adapter || !addr)
>>           return -EINVAL;
>>   -    map = kzalloc(sizeof(*map), GFP_KERNEL);
>> +    map = kzalloc(sizeof(*map), GFP_KERNEL_ACCOUNT);
>>       if (!map) {
>>           ret = -ENOMEM;
>>           goto out;
>> @@ -3223,7 +3223,7 @@ int kvm_s390_gib_init(u8 nisc)
>>           goto out;
>>       }
>>   -    gib = (struct kvm_s390_gib *)get_zeroed_page(GFP_KERNEL | GFP_DMA);
>> +    gib = (struct kvm_s390_gib *)get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA);
>>       if (!gib) {
>>           rc = -ENOMEM;
>>           goto out;
>> diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
>> index d9e6bf3d54f0..373e182fd8e8 100644
>> --- a/arch/s390/kvm/kvm-s390.c
>> +++ b/arch/s390/kvm/kvm-s390.c
>> @@ -1243,7 +1243,7 @@ static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
>>           ret = -EBUSY;
>>           goto out;
>>       }
>> -    proc = kzalloc(sizeof(*proc), GFP_KERNEL);
>> +    proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
>>       if (!proc) {
>>           ret = -ENOMEM;
>>           goto out;
>> @@ -1405,7 +1405,7 @@ static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
>>       struct kvm_s390_vm_cpu_processor *proc;
>>       int ret = 0;
>>   -    proc = kzalloc(sizeof(*proc), GFP_KERNEL);
>> +    proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
>>       if (!proc) {
>>           ret = -ENOMEM;
>>           goto out;
>> @@ -1433,7 +1433,7 @@ static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
>>       struct kvm_s390_vm_cpu_machine *mach;
>>       int ret = 0;
>>   -    mach = kzalloc(sizeof(*mach), GFP_KERNEL);
>> +    mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT);
>>       if (!mach) {
>>           ret = -ENOMEM;
>>           goto out;
>> @@ -1801,7 +1801,7 @@ static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
>>       if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
>>           return -EINVAL;
>>   -    keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
>> +    keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
>>       if (!keys)
>>           return -ENOMEM;
>>   @@ -1846,7 +1846,7 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
>>       if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
>>           return -EINVAL;
>>   -    keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
>> +    keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
>>       if (!keys)
>>           return -ENOMEM;
>>   @@ -2393,7 +2393,7 @@ static void sca_dispose(struct kvm *kvm)
>>     int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
>>   {
>> -    gfp_t alloc_flags = GFP_KERNEL;
>> +    gfp_t alloc_flags = GFP_KERNEL_ACCOUNT;
>>       int i, rc;
>>       char debug_name[16];
>>       static unsigned long sca_offset;
>> @@ -2438,7 +2438,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
>>         BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
>>       kvm->arch.sie_page2 =
>> -         (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
>> +         (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA);
>>       if (!kvm->arch.sie_page2)
>>           goto out_err;
>>   @@ -2652,7 +2652,7 @@ static int sca_switch_to_extended(struct kvm *kvm)
>>       unsigned int vcpu_idx;
>>       u32 scaol, scaoh;
>>   -    new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
>> +    new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO);
>>       if (!new_sca)
>>           return -ENOMEM;
>>   @@ -2947,7 +2947,7 @@ void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
>>     int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
>>   {
>> -    vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
>> +    vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL_ACCOUNT);
>>       if (!vcpu->arch.sie_block->cbrlo)
>>           return -ENOMEM;
>>       return 0;
>> @@ -3047,12 +3047,12 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
>>         rc = -ENOMEM;
>>   -    vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
>> +    vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL_ACCOUNT);
>>       if (!vcpu)
>>           goto out;
>>         BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
>> -    sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
>> +    sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT);
>>       if (!sie_page)
>>           goto out_free_cpu;
>>   diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
>> index ed52ffa8d5d4..536fcd599665 100644
>> --- a/arch/s390/kvm/priv.c
>> +++ b/arch/s390/kvm/priv.c
>> @@ -878,7 +878,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
>>       switch (fc) {
>>       case 1: /* same handling for 1 and 2 */
>>       case 2:
>> -        mem = get_zeroed_page(GFP_KERNEL);
>> +        mem = get_zeroed_page(GFP_KERNEL_ACCOUNT);
>>           if (!mem)
>>               goto out_no_data;
>>           if (stsi((void *) mem, fc, sel1, sel2))
>> @@ -887,7 +887,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
>>       case 3:
>>           if (sel1 != 2 || sel2 != 2)
>>               goto out_no_data;
>> -        mem = get_zeroed_page(GFP_KERNEL);
>> +        mem = get_zeroed_page(GFP_KERNEL_ACCOUNT);
>>           if (!mem)
>>               goto out_no_data;
>>           handle_stsi_3_2_2(vcpu, (void *) mem);
>> diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
>> index 076090f9e666..f55fca8f94f8 100644
>> --- a/arch/s390/kvm/vsie.c
>> +++ b/arch/s390/kvm/vsie.c
>> @@ -1236,7 +1236,7 @@ static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr)
>>         mutex_lock(&kvm->arch.vsie.mutex);
>>       if (kvm->arch.vsie.page_count < nr_vcpus) {
>> -        page = alloc_page(GFP_KERNEL | __GFP_ZERO | GFP_DMA);
>> +        page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO | GFP_DMA);
>>           if (!page) {
>>               mutex_unlock(&kvm->arch.vsie.mutex);
>>               return ERR_PTR(-ENOMEM);
>> @@ -1338,7 +1338,7 @@ int kvm_s390_handle_vsie(struct kvm_vcpu *vcpu)
>>   void kvm_s390_vsie_init(struct kvm *kvm)
>>   {
>>       mutex_init(&kvm->arch.vsie.mutex);
>> -    INIT_RADIX_TREE(&kvm->arch.vsie.addr_to_page, GFP_KERNEL);
>> +    INIT_RADIX_TREE(&kvm->arch.vsie.addr_to_page, GFP_KERNEL_ACCOUNT);
>>   }
>>     /* Destroy the vsie data structures. To be called when a vm is destroyed. */
>>
> 
> I was wondering about the gmap, especially also page tables for nested guests. Did you consider that already?

No not yet. gmap would be an extra patch. I then also have to be careful if there  are
some data structures that are shared between different guests. I think not, but I have 
not yet looked completely through that code.