Re: [PATCH v2 3/6] memory-attribute-manager: Introduce MemoryAttributeManager to manage RAMBLock with guest_memfd

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




On 2/18/2025 5:19 PM, Alexey Kardashevskiy wrote:
> 
> 

[..]

>> diff --git a/include/system/memory-attribute-manager.h b/include/
>> system/memory-attribute-manager.h
>> new file mode 100644
>> index 0000000000..72adc0028e
>> --- /dev/null
>> +++ b/include/system/memory-attribute-manager.h
>> @@ -0,0 +1,42 @@
>> +/*
>> + * QEMU memory attribute manager
>> + *
>> + * Copyright Intel
>> + *
>> + * Author:
>> + *      Chenyi Qiang <chenyi.qiang@xxxxxxxxx>
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2 or
>> later.
>> + * See the COPYING file in the top-level directory
>> + *
>> + */
>> +
>> +#ifndef SYSTEM_MEMORY_ATTRIBUTE_MANAGER_H
>> +#define SYSTEM_MEMORY_ATTRIBUTE_MANAGER_H
>> +
>> +#include "system/hostmem.h"
>> +
>> +#define TYPE_MEMORY_ATTRIBUTE_MANAGER "memory-attribute-manager"
>> +
>> +OBJECT_DECLARE_TYPE(MemoryAttributeManager,
>> MemoryAttributeManagerClass, MEMORY_ATTRIBUTE_MANAGER)
>> +
>> +struct MemoryAttributeManager {
>> +    Object parent;
>> +
>> +    MemoryRegion *mr;
>> +
>> +    /* 1-setting of the bit represents the memory is populated
>> (shared) */
>> +    int32_t bitmap_size;
> 
> unsigned.
> 
> Also, do either s/bitmap_size/shared_bitmap_size/ or
> s/shared_bitmap/bitmap/

Will change it. Thanks.

> 
> 
> 
>> +    unsigned long *shared_bitmap;
>> +
>> +    QLIST_HEAD(, RamDiscardListener) rdl_list;
>> +};
>> +
>> +struct MemoryAttributeManagerClass {
>> +    ObjectClass parent_class;
>> +};
>> +
>> +int memory_attribute_manager_realize(MemoryAttributeManager *mgr,
>> MemoryRegion *mr);
>> +void memory_attribute_manager_unrealize(MemoryAttributeManager *mgr);
>> +
>> +#endif
>> diff --git a/system/memory-attribute-manager.c b/system/memory-
>> attribute-manager.c
>> new file mode 100644
>> index 0000000000..ed97e43dd0
>> --- /dev/null
>> +++ b/system/memory-attribute-manager.c
>> @@ -0,0 +1,292 @@
>> +/*
>> + * QEMU memory attribute manager
>> + *
>> + * Copyright Intel
>> + *
>> + * Author:
>> + *      Chenyi Qiang <chenyi.qiang@xxxxxxxxx>
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2 or
>> later.
>> + * See the COPYING file in the top-level directory
>> + *
>> + */
>> +
>> +#include "qemu/osdep.h"
>> +#include "qemu/error-report.h"
>> +#include "system/memory-attribute-manager.h"
>> +
>> +OBJECT_DEFINE_TYPE_WITH_INTERFACES(MemoryAttributeManager,
>> +                                   memory_attribute_manager,
>> +                                   MEMORY_ATTRIBUTE_MANAGER,
>> +                                   OBJECT,
>> +                                   { TYPE_RAM_DISCARD_MANAGER },
>> +                                   { })
>> +
>> +static int memory_attribute_manager_get_block_size(const
>> MemoryAttributeManager *mgr)
>> +{
>> +    /*
>> +     * Because page conversion could be manipulated in the size of at
>> least 4K or 4K aligned,
>> +     * Use the host page size as the granularity to track the memory
>> attribute.
>> +     * TODO: if necessary, switch to get the page_size from RAMBlock.
>> +     * i.e. mgr->mr->ram_block->page_size.
> 
> I'd assume it is rather necessary already.

OK, Will return the page_size of RAMBlock directly.

> 
>> +     */
>> +    return qemu_real_host_page_size();
>> +}
>> +
>> +
>> +static bool memory_attribute_rdm_is_populated(const RamDiscardManager
>> *rdm,
>> +                                              const
>> MemoryRegionSection *section)
>> +{
>> +    const MemoryAttributeManager *mgr = MEMORY_ATTRIBUTE_MANAGER(rdm);
>> +    int block_size = memory_attribute_manager_get_block_size(mgr);
>> +    uint64_t first_bit = section->offset_within_region / block_size;
>> +    uint64_t last_bit = first_bit + int128_get64(section->size) /
>> block_size - 1;
>> +    unsigned long first_discard_bit;
>> +
>> +    first_discard_bit = find_next_zero_bit(mgr->shared_bitmap,
>> last_bit + 1, first_bit);
>> +    return first_discard_bit > last_bit;
>> +}
>> +
>> +typedef int (*memory_attribute_section_cb)(MemoryRegionSection *s,
>> void *arg);
>> +
>> +static int memory_attribute_notify_populate_cb(MemoryRegionSection
>> *section, void *arg)
>> +{
>> +    RamDiscardListener *rdl = arg;
>> +
>> +    return rdl->notify_populate(rdl, section);
>> +}
>> +
>> +static int memory_attribute_notify_discard_cb(MemoryRegionSection
>> *section, void *arg)
>> +{
>> +    RamDiscardListener *rdl = arg;
>> +
>> +    rdl->notify_discard(rdl, section);
>> +
>> +    return 0;
>> +}
>> +
>> +static int memory_attribute_for_each_populated_section(const
>> MemoryAttributeManager *mgr,
>> +                                                      
>> MemoryRegionSection *section,
>> +                                                       void *arg,
>> +                                                      
>> memory_attribute_section_cb cb)
>> +{
>> +    unsigned long first_one_bit, last_one_bit;
>> +    uint64_t offset, size;
>> +    int block_size = memory_attribute_manager_get_block_size(mgr);
>> +    int ret = 0;
>> +
>> +    first_one_bit = section->offset_within_region / block_size;
>> +    first_one_bit = find_next_bit(mgr->shared_bitmap, mgr-
>> >bitmap_size, first_one_bit);
>> +
>> +    while (first_one_bit < mgr->bitmap_size) {
>> +        MemoryRegionSection tmp = *section;
>> +
>> +        offset = first_one_bit * block_size;
>> +        last_one_bit = find_next_zero_bit(mgr->shared_bitmap, mgr-
>> >bitmap_size,
>> +                                          first_one_bit + 1) - 1;
>> +        size = (last_one_bit - first_one_bit + 1) * block_size;
> 
> 
> What all this math is for if we stuck with VFIO doing 1 page at the
> time? (I think I commented on this)

Sorry, I missed your previous comment. IMHO, as we track the status in
bitmap and we want to call the cb() on the shared part within
MemoryRegionSection. Here we do the calculation to find the expected
sub-range.

> 
>> +
>> +        if (!memory_region_section_intersect_range(&tmp, offset,
>> size)) {
>> +            break;
>> +        }
>> +
>> +        ret = cb(&tmp, arg);
>> +        if (ret) {
>> +            error_report("%s: Failed to notify RAM discard listener:
>> %s", __func__,
>> +                         strerror(-ret));
>> +            break;
>> +        }
>> +
>> +        first_one_bit = find_next_bit(mgr->shared_bitmap, mgr-
>> >bitmap_size,
>> +                                      last_one_bit + 2);
>> +    }
>> +
>> +    return ret;
>> +}
>> +

[..]

>> +
>> +static void
>> memory_attribute_rdm_unregister_listener(RamDiscardManager *rdm,
>> +                                                    
>> RamDiscardListener *rdl)
>> +{
>> +    MemoryAttributeManager *mgr = MEMORY_ATTRIBUTE_MANAGER(rdm);
>> +    int ret;
>> +
>> +    g_assert(rdl->section);
>> +    g_assert(rdl->section->mr == mgr->mr);
>> +
>> +    ret = memory_attribute_for_each_populated_section(mgr, rdl-
>> >section, rdl,
>> +                                                     
>> memory_attribute_notify_discard_cb);
>> +    if (ret) {
>> +        error_report("%s: Failed to unregister RAM discard listener:
>> %s", __func__,
>> +                     strerror(-ret));
>> +    }
>> +
>> +    memory_region_section_free_copy(rdl->section);
>> +    rdl->section = NULL;
>> +    QLIST_REMOVE(rdl, next);
>> +
>> +}
>> +
>> +typedef struct MemoryAttributeReplayData {
>> +    void *fn;
> 
> ReplayRamDiscard *fn, not void*.

We could cast the void *fn either to ReplayRamPopulate or
ReplayRamDiscard (see below).

> 
>> +    void *opaque;
>> +} MemoryAttributeReplayData;
>> +
>> +static int
>> memory_attribute_rdm_replay_populated_cb(MemoryRegionSection *section,
>> void *arg)
>> +{
>> +    MemoryAttributeReplayData *data = arg;
>> +
>> +    return ((ReplayRamPopulate)data->fn)(section, data->opaque);
>> +}
>> +
>> +static int memory_attribute_rdm_replay_populated(const
>> RamDiscardManager *rdm,
>> +                                                 MemoryRegionSection
>> *section,
>> +                                                 ReplayRamPopulate
>> replay_fn,
>> +                                                 void *opaque)
>> +{
>> +    MemoryAttributeManager *mgr = MEMORY_ATTRIBUTE_MANAGER(rdm);
>> +    MemoryAttributeReplayData data = { .fn = replay_fn, .opaque =
>> opaque };
>> +
>> +    g_assert(section->mr == mgr->mr);
>> +    return memory_attribute_for_each_populated_section(mgr, section,
>> &data,
>> +                                                      
>> memory_attribute_rdm_replay_populated_cb);
>> +}
>> +
>> +static int
>> memory_attribute_rdm_replay_discarded_cb(MemoryRegionSection *section,
>> void *arg)
>> +{
>> +    MemoryAttributeReplayData *data = arg;
>> +
>> +    ((ReplayRamDiscard)data->fn)(section, data->opaque);
>> +    return 0;
>> +}
>> +
>> +static void memory_attribute_rdm_replay_discarded(const
>> RamDiscardManager *rdm,
>> +                                                  MemoryRegionSection
>> *section,
>> +                                                  ReplayRamDiscard
>> replay_fn,
>> +                                                  void *opaque)
>> +{
>> +    MemoryAttributeManager *mgr = MEMORY_ATTRIBUTE_MANAGER(rdm);
>> +    MemoryAttributeReplayData data = { .fn = replay_fn, .opaque =
>> opaque };
>> +
>> +    g_assert(section->mr == mgr->mr);
>> +    memory_attribute_for_each_discarded_section(mgr, section, &data,
>> +                                               
>> memory_attribute_rdm_replay_discarded_cb);
>> +}
>> +
>> +int memory_attribute_manager_realize(MemoryAttributeManager *mgr,
>> MemoryRegion *mr)
>> +{
>> +    uint64_t bitmap_size;
>> +    int block_size = memory_attribute_manager_get_block_size(mgr);
>> +    int ret;
>> +
>> +    bitmap_size = ROUND_UP(mr->size, block_size) / block_size;
>> +
>> +    mgr->mr = mr;
>> +    mgr->bitmap_size = bitmap_size;
>> +    mgr->shared_bitmap = bitmap_new(bitmap_size);
>> +
>> +    ret = memory_region_set_ram_discard_manager(mgr->mr,
>> RAM_DISCARD_MANAGER(mgr));
> 
> Move it 3 lines up and avoid stale data in mgr->mr/bitmap_size/
> shared_bitmap and avoid g_free below?

Make sense. I will move it up the same as patch 02 before bitmap_new().

> 
>> +    if (ret) {
>> +        g_free(mgr->shared_bitmap);
>> +    }
>> +
>> +    return ret;
>> +}
>> +
>> +void memory_attribute_manager_unrealize(MemoryAttributeManager *mgr)
>> +{
>> +    memory_region_set_ram_discard_manager(mgr->mr, NULL);
>> +
>> +    g_free(mgr->shared_bitmap);
>> +}
>> +
>> +static void memory_attribute_manager_init(Object *obj)
> 
> Not used.
> 
>> +{
>> +    MemoryAttributeManager *mgr = MEMORY_ATTRIBUTE_MANAGER(obj);
>> +
>> +    QLIST_INIT(&mgr->rdl_list);
>> +} > +
>> +static void memory_attribute_manager_finalize(Object *obj)
> 
> Not used either. Thanks,

I think it is OK to define it as a placeholder? Just some preference.

> 
>> +{
>> +}
>> +
>> +static void memory_attribute_manager_class_init(ObjectClass *oc, void
>> *data)
>> +{
>> +    RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_CLASS(oc);
>> +
>> +    rdmc->get_min_granularity =
>> memory_attribute_rdm_get_min_granularity;
>> +    rdmc->register_listener = memory_attribute_rdm_register_listener;
>> +    rdmc->unregister_listener =
>> memory_attribute_rdm_unregister_listener;
>> +    rdmc->is_populated = memory_attribute_rdm_is_populated;
>> +    rdmc->replay_populated = memory_attribute_rdm_replay_populated;
>> +    rdmc->replay_discarded = memory_attribute_rdm_replay_discarded;
>> +}
>> diff --git a/system/meson.build b/system/meson.build
>> index 4952f4b2c7..ab07ff1442 100644
>> --- a/system/meson.build
>> +++ b/system/meson.build
>> @@ -15,6 +15,7 @@ system_ss.add(files(
>>     'dirtylimit.c',
>>     'dma-helpers.c',
>>     'globals.c',
>> +  'memory-attribute-manager.c',
>>     'memory_mapping.c',
>>     'qdev-monitor.c',
>>     'qtest.c',
> 





[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux