[PATCH 09/15] memory: prepare flatview and radix-tree for rcu style access

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Liu Ping Fan <pingfank@xxxxxxxxxxxxxxxxxx>

Flatview and radix view are all under the protection of pointer.
And this make sure the change of them seem to be atomic!

The mr accessed by radix-tree leaf or flatview will be reclaimed
after the prev PhysMap not in use any longer

Signed-off-by: Liu Ping Fan <pingfank@xxxxxxxxxxxxxxxxxx>
---
 exec.c      |  303 +++++++++++++++++++++++++++++++++++++++-------------------
 hw/vhost.c  |    2 +-
 hw/xen_pt.c |    2 +-
 kvm-all.c   |    2 +-
 memory.c    |   92 ++++++++++++++-----
 memory.h    |    9 ++-
 vl.c        |    1 +
 xen-all.c   |    2 +-
 8 files changed, 286 insertions(+), 127 deletions(-)

diff --git a/exec.c b/exec.c
index 01b91b0..97addb9 100644
--- a/exec.c
+++ b/exec.c
@@ -24,6 +24,7 @@
 #include <sys/mman.h>
 #endif
 
+#include "qemu/atomic.h"
 #include "qemu-common.h"
 #include "cpu.h"
 #include "tcg.h"
@@ -35,6 +36,8 @@
 #include "qemu-timer.h"
 #include "memory.h"
 #include "exec-memory.h"
+#include "qemu-thread.h"
+#include "qemu/reclaimer.h"
 #if defined(CONFIG_USER_ONLY)
 #include <qemu.h>
 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
@@ -184,25 +187,17 @@ static void *l1_map[V_L1_SIZE];
 
 #if !defined(CONFIG_USER_ONLY)
 
-static MemoryRegionSection *phys_sections;
-static unsigned phys_sections_nb, phys_sections_nb_alloc;
 static uint16_t phys_section_unassigned;
 static uint16_t phys_section_notdirty;
 static uint16_t phys_section_rom;
 static uint16_t phys_section_watch;
 
-
-/* Simple allocator for PhysPageEntry nodes */
-static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
-static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
-
 #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
 
-/* This is a multi-level map on the physical address space.
-   The bottom level has pointers to MemoryRegionSections.  */
-static PhysPageEntry phys_map = { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
-
+static QemuMutex cur_map_lock;
+static PhysMap *cur_map;
 QemuMutex mem_map_lock;
+static PhysMap *next_map;
 
 static void io_mem_init(void);
 static void memory_map_init(void);
@@ -383,41 +378,38 @@ static inline PageDesc *page_find(tb_page_addr_t index)
 
 #if !defined(CONFIG_USER_ONLY)
 
-static void phys_map_node_reserve(unsigned nodes)
+static void phys_map_node_reserve(PhysMap *map, unsigned nodes)
 {
-    if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
+    if (map->phys_map_nodes_nb + nodes > map->phys_map_nodes_nb_alloc) {
         typedef PhysPageEntry Node[L2_SIZE];
-        phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
-        phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
-                                      phys_map_nodes_nb + nodes);
-        phys_map_nodes = g_renew(Node, phys_map_nodes,
-                                 phys_map_nodes_nb_alloc);
+        map->phys_map_nodes_nb_alloc = MAX(map->phys_map_nodes_nb_alloc * 2,
+                                                                        16);
+        map->phys_map_nodes_nb_alloc = MAX(map->phys_map_nodes_nb_alloc,
+                                      map->phys_map_nodes_nb + nodes);
+        map->phys_map_nodes = g_renew(Node, map->phys_map_nodes,
+                                 map->phys_map_nodes_nb_alloc);
     }
 }
 
-static uint16_t phys_map_node_alloc(void)
+static uint16_t phys_map_node_alloc(PhysMap *map)
 {
     unsigned i;
     uint16_t ret;
 
-    ret = phys_map_nodes_nb++;
+    ret = map->phys_map_nodes_nb++;
     assert(ret != PHYS_MAP_NODE_NIL);
-    assert(ret != phys_map_nodes_nb_alloc);
+    assert(ret != map->phys_map_nodes_nb_alloc);
     for (i = 0; i < L2_SIZE; ++i) {
-        phys_map_nodes[ret][i].is_leaf = 0;
-        phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
+        map->phys_map_nodes[ret][i].is_leaf = 0;
+        map->phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
     }
     return ret;
 }
 
-static void phys_map_nodes_reset(void)
-{
-    phys_map_nodes_nb = 0;
-}
-
-
-static void phys_page_set_level(PhysPageEntry *lp, target_phys_addr_t *index,
-                                target_phys_addr_t *nb, uint16_t leaf,
+static void phys_page_set_level(PhysMap *map, PhysPageEntry *lp,
+                                target_phys_addr_t *index,
+                                target_phys_addr_t *nb,
+                                uint16_t leaf,
                                 int level)
 {
     PhysPageEntry *p;
@@ -425,8 +417,8 @@ static void phys_page_set_level(PhysPageEntry *lp, target_phys_addr_t *index,
     target_phys_addr_t step = (target_phys_addr_t)1 << (level * L2_BITS);
 
     if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
-        lp->ptr = phys_map_node_alloc();
-        p = phys_map_nodes[lp->ptr];
+        lp->ptr = phys_map_node_alloc(map);
+        p = map->phys_map_nodes[lp->ptr];
         if (level == 0) {
             for (i = 0; i < L2_SIZE; i++) {
                 p[i].is_leaf = 1;
@@ -434,7 +426,7 @@ static void phys_page_set_level(PhysPageEntry *lp, target_phys_addr_t *index,
             }
         }
     } else {
-        p = phys_map_nodes[lp->ptr];
+        p = map->phys_map_nodes[lp->ptr];
     }
     lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
 
@@ -445,24 +437,27 @@ static void phys_page_set_level(PhysPageEntry *lp, target_phys_addr_t *index,
             *index += step;
             *nb -= step;
         } else {
-            phys_page_set_level(lp, index, nb, leaf, level - 1);
+            phys_page_set_level(map, lp, index, nb, leaf, level - 1);
         }
         ++lp;
     }
 }
 
-static void phys_page_set(target_phys_addr_t index, target_phys_addr_t nb,
-                          uint16_t leaf)
+static void phys_page_set(PhysMap *map, target_phys_addr_t index,
+                            target_phys_addr_t nb,
+                            uint16_t leaf)
 {
     /* Wildly overreserve - it doesn't matter much. */
-    phys_map_node_reserve(3 * P_L2_LEVELS);
+    phys_map_node_reserve(map, 3 * P_L2_LEVELS);
 
-    phys_page_set_level(&phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
+    /* update in new tree*/
+    phys_page_set_level(map, &map->root, &index, &nb, leaf, P_L2_LEVELS - 1);
 }
 
-MemoryRegionSection *phys_page_find(target_phys_addr_t index)
+static MemoryRegionSection *phys_page_find_internal(PhysMap *map,
+                           target_phys_addr_t index)
 {
-    PhysPageEntry lp = phys_map;
+    PhysPageEntry lp = map->root;
     PhysPageEntry *p;
     int i;
     uint16_t s_index = phys_section_unassigned;
@@ -471,13 +466,79 @@ MemoryRegionSection *phys_page_find(target_phys_addr_t index)
         if (lp.ptr == PHYS_MAP_NODE_NIL) {
             goto not_found;
         }
-        p = phys_map_nodes[lp.ptr];
+        p = map->phys_map_nodes[lp.ptr];
         lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
     }
 
     s_index = lp.ptr;
 not_found:
-    return &phys_sections[s_index];
+    return &map->phys_sections[s_index];
+}
+
+MemoryRegionSection *phys_page_find(target_phys_addr_t index)
+{
+    return phys_page_find_internal(cur_map, index);
+}
+
+void physmap_get(PhysMap *map)
+{
+    atomic_inc(&map->ref);
+}
+
+/* Untill rcu read side finished, do this reclaim */
+static ChunkHead physmap_reclaimer_list = { .lh_first = NULL };
+void physmap_reclaimer_enqueue(void *opaque, ReleaseHandler *release)
+{
+    reclaimer_enqueue(&physmap_reclaimer_list, opaque, release);
+}
+
+static void destroy_all_mappings(PhysMap *map);
+static void phys_map_release(PhysMap *map)
+{
+    /* emulate for rcu reclaimer for mr */
+    reclaimer_worker(&physmap_reclaimer_list);
+
+    destroy_all_mappings(map);
+    g_free(map->phys_map_nodes);
+    g_free(map->phys_sections);
+    g_free(map->views[0].ranges);
+    g_free(map->views[1].ranges);
+    g_free(map);
+}
+
+void physmap_put(PhysMap *map)
+{
+    if (atomic_dec_and_test(&map->ref)) {
+        phys_map_release(map);
+    }
+}
+
+void cur_map_update(PhysMap *next)
+{
+    qemu_mutex_lock(&cur_map_lock);
+    physmap_put(cur_map);
+    cur_map = next;
+    smp_mb();
+    qemu_mutex_unlock(&cur_map_lock);
+}
+
+PhysMap *cur_map_get(void)
+{
+    PhysMap *ret;
+
+    qemu_mutex_lock(&cur_map_lock);
+    ret = cur_map;
+    physmap_get(ret);
+    smp_mb();
+    qemu_mutex_unlock(&cur_map_lock);
+    return ret;
+}
+
+PhysMap *alloc_next_map(void)
+{
+    PhysMap *next = g_malloc0(sizeof(PhysMap));
+    atomic_set(&next->ref, 1);
+    return next;
 }
 
 bool memory_region_is_unassigned(MemoryRegion *mr)
@@ -632,6 +693,7 @@ void cpu_exec_init_all(void)
     memory_map_init();
     io_mem_init();
     qemu_mutex_init(&mem_map_lock);
+    qemu_mutex_init(&cur_map_lock);
 #endif
 }
 
@@ -2161,17 +2223,18 @@ int page_unprotect(target_ulong address, uintptr_t pc, void *puc)
 
 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
 typedef struct subpage_t {
+    PhysMap *map;
     MemoryRegion iomem;
     target_phys_addr_t base;
     uint16_t sub_section[TARGET_PAGE_SIZE];
 } subpage_t;
 
-static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
-                             uint16_t section);
-static subpage_t *subpage_init(target_phys_addr_t base);
-static void destroy_page_desc(uint16_t section_index)
+static int subpage_register(PhysMap *map, subpage_t *mmio, uint32_t start,
+                            uint32_t end, uint16_t section);
+static subpage_t *subpage_init(PhysMap *map, target_phys_addr_t base);
+static void destroy_page_desc(PhysMap *map, uint16_t section_index)
 {
-    MemoryRegionSection *section = &phys_sections[section_index];
+    MemoryRegionSection *section = &map->phys_sections[section_index];
     MemoryRegion *mr = section->mr;
 
     if (mr->subpage) {
@@ -2181,7 +2244,7 @@ static void destroy_page_desc(uint16_t section_index)
     }
 }
 
-static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
+static void destroy_l2_mapping(PhysMap *map, PhysPageEntry *lp, unsigned level)
 {
     unsigned i;
     PhysPageEntry *p;
@@ -2190,38 +2253,34 @@ static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
         return;
     }
 
-    p = phys_map_nodes[lp->ptr];
+    p = map->phys_map_nodes[lp->ptr];
     for (i = 0; i < L2_SIZE; ++i) {
         if (!p[i].is_leaf) {
-            destroy_l2_mapping(&p[i], level - 1);
+            destroy_l2_mapping(map, &p[i], level - 1);
         } else {
-            destroy_page_desc(p[i].ptr);
+            destroy_page_desc(map, p[i].ptr);
         }
     }
     lp->is_leaf = 0;
     lp->ptr = PHYS_MAP_NODE_NIL;
 }
 
-static void destroy_all_mappings(void)
+static void destroy_all_mappings(PhysMap *map)
 {
-    destroy_l2_mapping(&phys_map, P_L2_LEVELS - 1);
-    phys_map_nodes_reset();
-}
+    PhysPageEntry *root = &map->root;
 
-static uint16_t phys_section_add(MemoryRegionSection *section)
-{
-    if (phys_sections_nb == phys_sections_nb_alloc) {
-        phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
-        phys_sections = g_renew(MemoryRegionSection, phys_sections,
-                                phys_sections_nb_alloc);
-    }
-    phys_sections[phys_sections_nb] = *section;
-    return phys_sections_nb++;
+    destroy_l2_mapping(map, root, P_L2_LEVELS - 1);
 }
 
-static void phys_sections_clear(void)
+static uint16_t phys_section_add(PhysMap *map, MemoryRegionSection *section)
 {
-    phys_sections_nb = 0;
+    if (map->phys_sections_nb == map->phys_sections_nb_alloc) {
+        map->phys_sections_nb_alloc = MAX(map->phys_sections_nb_alloc * 2, 16);
+        map->phys_sections = g_renew(MemoryRegionSection, map->phys_sections,
+                                map->phys_sections_nb_alloc);
+    }
+    map->phys_sections[map->phys_sections_nb] = *section;
+    return map->phys_sections_nb++;
 }
 
 /* register physical memory.
@@ -2232,12 +2291,13 @@ static void phys_sections_clear(void)
    start_addr and region_offset are rounded down to a page boundary
    before calculating this offset.  This should not be a problem unless
    the low bits of start_addr and region_offset differ.  */
-static void register_subpage(MemoryRegionSection *section)
+static void register_subpage(PhysMap *map, MemoryRegionSection *section)
 {
     subpage_t *subpage;
     target_phys_addr_t base = section->offset_within_address_space
         & TARGET_PAGE_MASK;
-    MemoryRegionSection *existing = phys_page_find(base >> TARGET_PAGE_BITS);
+    MemoryRegionSection *existing = phys_page_find_internal(map,
+                                            base >> TARGET_PAGE_BITS);
     MemoryRegionSection subsection = {
         .offset_within_address_space = base,
         .size = TARGET_PAGE_SIZE,
@@ -2247,30 +2307,30 @@ static void register_subpage(MemoryRegionSection *section)
     assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
 
     if (!(existing->mr->subpage)) {
-        subpage = subpage_init(base);
+        subpage = subpage_init(map, base);
         subsection.mr = &subpage->iomem;
-        phys_page_set(base >> TARGET_PAGE_BITS, 1,
-                      phys_section_add(&subsection));
+        phys_page_set(map, base >> TARGET_PAGE_BITS, 1,
+                      phys_section_add(map, &subsection));
     } else {
         subpage = container_of(existing->mr, subpage_t, iomem);
     }
     start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
     end = start + section->size;
-    subpage_register(subpage, start, end, phys_section_add(section));
+    subpage_register(map, subpage, start, end, phys_section_add(map, section));
 }
 
 
-static void register_multipage(MemoryRegionSection *section)
+static void register_multipage(PhysMap *map, MemoryRegionSection *section)
 {
     target_phys_addr_t start_addr = section->offset_within_address_space;
     ram_addr_t size = section->size;
     target_phys_addr_t addr;
-    uint16_t section_index = phys_section_add(section);
+    uint16_t section_index = phys_section_add(map, section);
 
     assert(size);
 
     addr = start_addr;
-    phys_page_set(addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
+    phys_page_set(map, addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
                   section_index);
 }
 
@@ -2278,13 +2338,14 @@ void cpu_register_physical_memory_log(MemoryRegionSection *section,
                                       bool readonly)
 {
     MemoryRegionSection now = *section, remain = *section;
+    PhysMap *map = next_map;
 
     if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
         || (now.size < TARGET_PAGE_SIZE)) {
         now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
                        - now.offset_within_address_space,
                        now.size);
-        register_subpage(&now);
+        register_subpage(map, &now);
         remain.size -= now.size;
         remain.offset_within_address_space += now.size;
         remain.offset_within_region += now.size;
@@ -2292,14 +2353,14 @@ void cpu_register_physical_memory_log(MemoryRegionSection *section,
     now = remain;
     now.size &= TARGET_PAGE_MASK;
     if (now.size) {
-        register_multipage(&now);
+        register_multipage(map, &now);
         remain.size -= now.size;
         remain.offset_within_address_space += now.size;
         remain.offset_within_region += now.size;
     }
     now = remain;
     if (now.size) {
-        register_subpage(&now);
+        register_subpage(map, &now);
     }
 }
 
@@ -3001,7 +3062,7 @@ static uint64_t subpage_read(void *opaque, target_phys_addr_t addr,
            mmio, len, addr, idx);
 #endif
 
-    section = &phys_sections[mmio->sub_section[idx]];
+    section = &mmio->map->phys_sections[mmio->sub_section[idx]];
     addr += mmio->base;
     addr -= section->offset_within_address_space;
     addr += section->offset_within_region;
@@ -3020,7 +3081,7 @@ static void subpage_write(void *opaque, target_phys_addr_t addr,
            __func__, mmio, len, addr, idx, value);
 #endif
 
-    section = &phys_sections[mmio->sub_section[idx]];
+    section = &mmio->map->phys_sections[mmio->sub_section[idx]];
     addr += mmio->base;
     addr -= section->offset_within_address_space;
     addr += section->offset_within_region;
@@ -3065,8 +3126,8 @@ static const MemoryRegionOps subpage_ram_ops = {
     .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
-static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
-                             uint16_t section)
+static int subpage_register(PhysMap *map, subpage_t *mmio, uint32_t start,
+                              uint32_t end, uint16_t section)
 {
     int idx, eidx;
 
@@ -3078,10 +3139,10 @@ static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
     printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
            mmio, start, end, idx, eidx, memory);
 #endif
-    if (memory_region_is_ram(phys_sections[section].mr)) {
-        MemoryRegionSection new_section = phys_sections[section];
+    if (memory_region_is_ram(map->phys_sections[section].mr)) {
+        MemoryRegionSection new_section = map->phys_sections[section];
         new_section.mr = &io_mem_subpage_ram;
-        section = phys_section_add(&new_section);
+        section = phys_section_add(map, &new_section);
     }
     for (; idx <= eidx; idx++) {
         mmio->sub_section[idx] = section;
@@ -3090,12 +3151,13 @@ static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
     return 0;
 }
 
-static subpage_t *subpage_init(target_phys_addr_t base)
+static subpage_t *subpage_init(PhysMap *map, target_phys_addr_t base)
 {
     subpage_t *mmio;
 
     mmio = g_malloc0(sizeof(subpage_t));
 
+    mmio->map = map;
     mmio->base = base;
     memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
                           "subpage", TARGET_PAGE_SIZE);
@@ -3104,12 +3166,12 @@ static subpage_t *subpage_init(target_phys_addr_t base)
     printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
            mmio, base, TARGET_PAGE_SIZE, subpage_memory);
 #endif
-    subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
+    subpage_register(map, mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
 
     return mmio;
 }
 
-static uint16_t dummy_section(MemoryRegion *mr)
+static uint16_t dummy_section(PhysMap *map, MemoryRegion *mr)
 {
     MemoryRegionSection section = {
         .mr = mr,
@@ -3118,7 +3180,7 @@ static uint16_t dummy_section(MemoryRegion *mr)
         .size = UINT64_MAX,
     };
 
-    return phys_section_add(&section);
+    return phys_section_add(map, &section);
 }
 
 MemoryRegion *iotlb_to_region(target_phys_addr_t index)
@@ -3140,15 +3202,32 @@ static void io_mem_init(void)
                           "watch", UINT64_MAX);
 }
 
-static void core_begin(MemoryListener *listener)
+#if 0
+static void physmap_init(void)
+{
+    FlatView v = { .ranges = NULL,
+                             .nr = 0,
+                             .nr_allocated = 0,
+    };
+
+    init_map.views[0] = v;
+    init_map.views[1] = v;
+    cur_map =  &init_map;
+}
+#endif
+
+static void core_begin(MemoryListener *listener, PhysMap *new_map)
 {
-    destroy_all_mappings();
-    phys_sections_clear();
-    phys_map.ptr = PHYS_MAP_NODE_NIL;
-    phys_section_unassigned = dummy_section(&io_mem_unassigned);
-    phys_section_notdirty = dummy_section(&io_mem_notdirty);
-    phys_section_rom = dummy_section(&io_mem_rom);
-    phys_section_watch = dummy_section(&io_mem_watch);
+
+    new_map->root.ptr = PHYS_MAP_NODE_NIL;
+    new_map->root.is_leaf = 0;
+
+    /* In all the map, these sections have the same index */
+    phys_section_unassigned = dummy_section(new_map, &io_mem_unassigned);
+    phys_section_notdirty = dummy_section(new_map, &io_mem_notdirty);
+    phys_section_rom = dummy_section(new_map, &io_mem_rom);
+    phys_section_watch = dummy_section(new_map, &io_mem_watch);
+    next_map = new_map;
 }
 
 static void core_commit(MemoryListener *listener)
@@ -3161,6 +3240,16 @@ static void core_commit(MemoryListener *listener)
     for(env = first_cpu; env != NULL; env = env->next_cpu) {
         tlb_flush(env, 1);
     }
+
+/* move into high layer
+    qemu_mutex_lock(&cur_map_lock);
+    if (cur_map != NULL) {
+        physmap_put(cur_map);
+    }
+    cur_map = next_map;
+    smp_mb();
+    qemu_mutex_unlock(&cur_map_lock);
+*/
 }
 
 static void core_region_add(MemoryListener *listener,
@@ -3217,7 +3306,7 @@ static void core_eventfd_del(MemoryListener *listener,
 {
 }
 
-static void io_begin(MemoryListener *listener)
+static void io_begin(MemoryListener *listener, PhysMap *next)
 {
 }
 
@@ -3329,6 +3418,20 @@ static void memory_map_init(void)
     memory_listener_register(&io_memory_listener, system_io);
 }
 
+void physmap_init(void)
+{
+    FlatView v = { .ranges = NULL, .nr = 0, .nr_allocated = 0,
+                           };
+    PhysMap *init_map = g_malloc0(sizeof(PhysMap));
+
+    atomic_set(&init_map->ref, 1);
+    init_map->root.ptr = PHYS_MAP_NODE_NIL;
+    init_map->root.is_leaf = 0;
+    init_map->views[0] = v;
+    init_map->views[1] = v;
+    cur_map = init_map;
+}
+
 MemoryRegion *get_system_memory(void)
 {
     return system_memory;
@@ -3391,6 +3494,7 @@ void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
     uint32_t val;
     target_phys_addr_t page;
     MemoryRegionSection *section;
+    PhysMap *cur = cur_map_get();
 
     while (len > 0) {
         page = addr & TARGET_PAGE_MASK;
@@ -3472,6 +3576,7 @@ void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
         buf += l;
         addr += l;
     }
+    physmap_put(cur);
 }
 
 /* used for ROM loading : can write in RAM and ROM */
diff --git a/hw/vhost.c b/hw/vhost.c
index 43664e7..df58345 100644
--- a/hw/vhost.c
+++ b/hw/vhost.c
@@ -438,7 +438,7 @@ static bool vhost_section(MemoryRegionSection *section)
         && memory_region_is_ram(section->mr);
 }
 
-static void vhost_begin(MemoryListener *listener)
+static void vhost_begin(MemoryListener *listener, PhysMap *next)
 {
 }
 
diff --git a/hw/xen_pt.c b/hw/xen_pt.c
index 3b6d186..fba8586 100644
--- a/hw/xen_pt.c
+++ b/hw/xen_pt.c
@@ -597,7 +597,7 @@ static void xen_pt_region_update(XenPCIPassthroughState *s,
     }
 }
 
-static void xen_pt_begin(MemoryListener *l)
+static void xen_pt_begin(MemoryListener *l, PhysMap *next)
 {
 }
 
diff --git a/kvm-all.c b/kvm-all.c
index f8e4328..bc42cab 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -693,7 +693,7 @@ static void kvm_set_phys_mem(MemoryRegionSection *section, bool add)
     }
 }
 
-static void kvm_begin(MemoryListener *listener)
+static void kvm_begin(MemoryListener *listener, PhysMap *next)
 {
 }
 
diff --git a/memory.c b/memory.c
index c7f2cfd..54cdc7f 100644
--- a/memory.c
+++ b/memory.c
@@ -20,6 +20,7 @@
 #include "kvm.h"
 #include <assert.h>
 #include "hw/qdev.h"
+#include "qemu-thread.h"
 
 #define WANT_EXEC_OBSOLETE
 #include "exec-obsolete.h"
@@ -192,7 +193,7 @@ typedef struct AddressSpaceOps AddressSpaceOps;
 /* A system address space - I/O, memory, etc. */
 struct AddressSpace {
     MemoryRegion *root;
-    FlatView current_map;
+    int view_id;
     int ioeventfd_nb;
     MemoryRegionIoeventfd *ioeventfds;
 };
@@ -232,11 +233,6 @@ static void flatview_insert(FlatView *view, unsigned pos, FlatRange *range)
     ++view->nr;
 }
 
-static void flatview_destroy(FlatView *view)
-{
-    g_free(view->ranges);
-}
-
 static bool can_merge(FlatRange *r1, FlatRange *r2)
 {
     return int128_eq(addrrange_end(r1->addr), r2->addr.start)
@@ -594,8 +590,10 @@ static void address_space_update_ioeventfds(AddressSpace *as)
     MemoryRegionIoeventfd *ioeventfds = NULL;
     AddrRange tmp;
     unsigned i;
+    PhysMap *map = cur_map_get();
+    FlatView *view = &map->views[as->view_id];
 
-    FOR_EACH_FLAT_RANGE(fr, &as->current_map) {
+    FOR_EACH_FLAT_RANGE(fr, view) {
         for (i = 0; i < fr->mr->ioeventfd_nb; ++i) {
             tmp = addrrange_shift(fr->mr->ioeventfds[i].addr,
                                   int128_sub(fr->addr.start,
@@ -616,6 +614,7 @@ static void address_space_update_ioeventfds(AddressSpace *as)
     g_free(as->ioeventfds);
     as->ioeventfds = ioeventfds;
     as->ioeventfd_nb = ioeventfd_nb;
+    physmap_put(map);
 }
 
 static void address_space_update_topology_pass(AddressSpace *as,
@@ -681,21 +680,23 @@ static void address_space_update_topology_pass(AddressSpace *as,
 }
 
 
-static void address_space_update_topology(AddressSpace *as)
+static void address_space_update_topology(AddressSpace *as, PhysMap *prev,
+                                            PhysMap *next)
 {
-    FlatView old_view = as->current_map;
+    FlatView old_view = prev->views[as->view_id];
     FlatView new_view = generate_memory_topology(as->root);
 
     address_space_update_topology_pass(as, old_view, new_view, false);
     address_space_update_topology_pass(as, old_view, new_view, true);
+    next->views[as->view_id] = new_view;
 
-    as->current_map = new_view;
-    flatview_destroy(&old_view);
     address_space_update_ioeventfds(as);
 }
 
 static void memory_region_update_topology(MemoryRegion *mr)
 {
+    PhysMap *prev, *next;
+
     if (memory_region_transaction_depth) {
         memory_region_update_pending |= !mr || mr->enabled;
         return;
@@ -705,16 +706,20 @@ static void memory_region_update_topology(MemoryRegion *mr)
         return;
     }
 
-    MEMORY_LISTENER_CALL_GLOBAL(begin, Forward);
+     prev = cur_map_get();
+    /* allocate PhysMap next here */
+    next = alloc_next_map();
+    MEMORY_LISTENER_CALL_GLOBAL(begin, Forward, next);
 
     if (address_space_memory.root) {
-        address_space_update_topology(&address_space_memory);
+        address_space_update_topology(&address_space_memory, prev, next);
     }
     if (address_space_io.root) {
-        address_space_update_topology(&address_space_io);
+        address_space_update_topology(&address_space_io, prev, next);
     }
 
     MEMORY_LISTENER_CALL_GLOBAL(commit, Forward);
+    cur_map_update(next);
 
     memory_region_update_pending = false;
 }
@@ -1071,7 +1076,7 @@ void memory_region_put(MemoryRegion *mr)
 
     if (atomic_dec_and_test(&mr->ref)) {
         /* to fix, using call_rcu( ,release) */
-        mr->life_ops->put(mr);
+        physmap_reclaimer_enqueue(mr, (ReleaseHandler *)mr->life_ops->put);
     }
 }
 
@@ -1147,13 +1152,18 @@ void memory_region_set_dirty(MemoryRegion *mr, target_phys_addr_t addr,
 void memory_region_sync_dirty_bitmap(MemoryRegion *mr)
 {
     FlatRange *fr;
+    FlatView *fview;
+    PhysMap *map;
 
-    FOR_EACH_FLAT_RANGE(fr, &address_space_memory.current_map) {
+    map = cur_map_get();
+    fview = &map->views[address_space_memory.view_id];
+    FOR_EACH_FLAT_RANGE(fr, fview) {
         if (fr->mr == mr) {
             MEMORY_LISTENER_UPDATE_REGION(fr, &address_space_memory,
                                           Forward, log_sync);
         }
     }
+    physmap_put(map);
 }
 
 void memory_region_set_readonly(MemoryRegion *mr, bool readonly)
@@ -1201,8 +1211,12 @@ static void memory_region_update_coalesced_range(MemoryRegion *mr)
     FlatRange *fr;
     CoalescedMemoryRange *cmr;
     AddrRange tmp;
+    FlatView *fview;
+    PhysMap *map;
 
-    FOR_EACH_FLAT_RANGE(fr, &address_space_memory.current_map) {
+    map = cur_map_get();
+    fview = &map->views[address_space_memory.view_id];
+    FOR_EACH_FLAT_RANGE(fr, fview) {
         if (fr->mr == mr) {
             qemu_unregister_coalesced_mmio(int128_get64(fr->addr.start),
                                            int128_get64(fr->addr.size));
@@ -1219,6 +1233,7 @@ static void memory_region_update_coalesced_range(MemoryRegion *mr)
             }
         }
     }
+    physmap_put(map);
 }
 
 void memory_region_set_coalescing(MemoryRegion *mr)
@@ -1458,29 +1473,49 @@ static int cmp_flatrange_addr(const void *addr_, const void *fr_)
     return 0;
 }
 
-static FlatRange *address_space_lookup(AddressSpace *as, AddrRange addr)
+static FlatRange *address_space_lookup(FlatView *view, AddrRange addr)
 {
-    return bsearch(&addr, as->current_map.ranges, as->current_map.nr,
+    return bsearch(&addr, view->ranges, view->nr,
                    sizeof(FlatRange), cmp_flatrange_addr);
 }
 
+/* dec the ref, which inc by memory_region_find*/
+void memory_region_section_put(MemoryRegionSection *mrs)
+{
+    if (mrs->mr != NULL) {
+        memory_region_put(mrs->mr);
+    }
+}
+
+/* inc mr's ref. Caller need dec mr's ref */
 MemoryRegionSection memory_region_find(MemoryRegion *address_space,
                                        target_phys_addr_t addr, uint64_t size)
 {
+    PhysMap *map;
     AddressSpace *as = memory_region_to_address_space(address_space);
     AddrRange range = addrrange_make(int128_make64(addr),
                                      int128_make64(size));
-    FlatRange *fr = address_space_lookup(as, range);
+    FlatView *fview;
+
+    map = cur_map_get();
+
+    fview = &map->views[as->view_id];
+    FlatRange *fr = address_space_lookup(fview, range);
     MemoryRegionSection ret = { .mr = NULL, .size = 0 };
 
     if (!fr) {
+        physmap_put(map);
         return ret;
     }
 
-    while (fr > as->current_map.ranges
+    while (fr > fview->ranges
            && addrrange_intersects(fr[-1].addr, range)) {
         --fr;
     }
+    /* To fix, the caller must in rcu, or we must inc fr->mr->ref here
+     */
+    memory_region_get(fr->mr);
+    physmap_put(map);
 
     ret.mr = fr->mr;
     range = addrrange_intersection(range, fr->addr);
@@ -1497,10 +1532,13 @@ void memory_global_sync_dirty_bitmap(MemoryRegion *address_space)
 {
     AddressSpace *as = memory_region_to_address_space(address_space);
     FlatRange *fr;
+    PhysMap *map = cur_map_get();
+    FlatView *view = &map->views[as->view_id];
 
-    FOR_EACH_FLAT_RANGE(fr, &as->current_map) {
+    FOR_EACH_FLAT_RANGE(fr, view) {
         MEMORY_LISTENER_UPDATE_REGION(fr, as, Forward, log_sync);
     }
+    physmap_put(map);
 }
 
 void memory_global_dirty_log_start(void)
@@ -1519,6 +1557,8 @@ static void listener_add_address_space(MemoryListener *listener,
                                        AddressSpace *as)
 {
     FlatRange *fr;
+    PhysMap *map;
+    FlatView *view;
 
     if (listener->address_space_filter
         && listener->address_space_filter != as->root) {
@@ -1528,7 +1568,10 @@ static void listener_add_address_space(MemoryListener *listener,
     if (global_dirty_log) {
         listener->log_global_start(listener);
     }
-    FOR_EACH_FLAT_RANGE(fr, &as->current_map) {
+
+    map = cur_map_get();
+    view = &map->views[as->view_id];
+    FOR_EACH_FLAT_RANGE(fr, view) {
         MemoryRegionSection section = {
             .mr = fr->mr,
             .address_space = as->root,
@@ -1539,6 +1582,7 @@ static void listener_add_address_space(MemoryListener *listener,
         };
         listener->region_add(listener, &section);
     }
+    physmap_put(map);
 }
 
 void memory_listener_register(MemoryListener *listener, MemoryRegion *filter)
@@ -1570,12 +1614,14 @@ void memory_listener_unregister(MemoryListener *listener)
 void set_system_memory_map(MemoryRegion *mr)
 {
     address_space_memory.root = mr;
+    address_space_memory.view_id = 0;
     memory_region_update_topology(NULL);
 }
 
 void set_system_io_map(MemoryRegion *mr)
 {
     address_space_io.root = mr;
+    address_space_io.view_id = 1;
     memory_region_update_topology(NULL);
 }
 
diff --git a/memory.h b/memory.h
index 357edd8..18442d4 100644
--- a/memory.h
+++ b/memory.h
@@ -256,7 +256,7 @@ typedef struct MemoryListener MemoryListener;
  * Use with memory_listener_register() and memory_listener_unregister().
  */
 struct MemoryListener {
-    void (*begin)(MemoryListener *listener);
+    void (*begin)(MemoryListener *listener, PhysMap *next);
     void (*commit)(MemoryListener *listener);
     void (*region_add)(MemoryListener *listener, MemoryRegionSection *section);
     void (*region_del)(MemoryListener *listener, MemoryRegionSection *section);
@@ -829,6 +829,13 @@ void mtree_info(fprintf_function mon_printf, void *f);
 
 void memory_region_get(MemoryRegion *mr);
 void memory_region_put(MemoryRegion *mr);
+void physmap_reclaimer_enqueue(void *opaque, ReleaseHandler *release);
+void physmap_get(PhysMap *map);
+void physmap_put(PhysMap *map);
+PhysMap *cur_map_get(void);
+PhysMap *alloc_next_map(void);
+void cur_map_update(PhysMap *next);
+void physmap_init(void);
 #endif
 
 #endif
diff --git a/vl.c b/vl.c
index 1329c30..12af523 100644
--- a/vl.c
+++ b/vl.c
@@ -3346,6 +3346,7 @@ int main(int argc, char **argv, char **envp)
     if (ram_size == 0) {
         ram_size = DEFAULT_RAM_SIZE * 1024 * 1024;
     }
+    physmap_init();
 
     configure_accelerator();
 
diff --git a/xen-all.c b/xen-all.c
index 59f2323..41d82fd 100644
--- a/xen-all.c
+++ b/xen-all.c
@@ -452,7 +452,7 @@ static void xen_set_memory(struct MemoryListener *listener,
     }
 }
 
-static void xen_begin(MemoryListener *listener)
+static void xen_begin(MemoryListener *listener, PhysMap *next)
 {
 }
 
-- 
1.7.4.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux