Re: [PATCH 09/15] memory: prepare flatview and radix-tree for rcu style access

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, Aug 8, 2012 at 6:25 AM, Liu Ping Fan <qemulist@xxxxxxxxx> wrote:
> From: Liu Ping Fan <pingfank@xxxxxxxxxxxxxxxxxx>
>
> Flatview and radix view are all under the protection of pointer.
> And this make sure the change of them seem to be atomic!
>
> The mr accessed by radix-tree leaf or flatview will be reclaimed
> after the prev PhysMap not in use any longer
>
> Signed-off-by: Liu Ping Fan <pingfank@xxxxxxxxxxxxxxxxxx>
> ---
>  exec.c      |  303 +++++++++++++++++++++++++++++++++++++++-------------------
>  hw/vhost.c  |    2 +-
>  hw/xen_pt.c |    2 +-
>  kvm-all.c   |    2 +-
>  memory.c    |   92 ++++++++++++++-----
>  memory.h    |    9 ++-
>  vl.c        |    1 +
>  xen-all.c   |    2 +-
>  8 files changed, 286 insertions(+), 127 deletions(-)
>
> diff --git a/exec.c b/exec.c
> index 01b91b0..97addb9 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -24,6 +24,7 @@
>  #include <sys/mman.h>
>  #endif
>
> +#include "qemu/atomic.h"
>  #include "qemu-common.h"
>  #include "cpu.h"
>  #include "tcg.h"
> @@ -35,6 +36,8 @@
>  #include "qemu-timer.h"
>  #include "memory.h"
>  #include "exec-memory.h"
> +#include "qemu-thread.h"
> +#include "qemu/reclaimer.h"
>  #if defined(CONFIG_USER_ONLY)
>  #include <qemu.h>
>  #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
> @@ -184,25 +187,17 @@ static void *l1_map[V_L1_SIZE];
>
>  #if !defined(CONFIG_USER_ONLY)
>
> -static MemoryRegionSection *phys_sections;
> -static unsigned phys_sections_nb, phys_sections_nb_alloc;
>  static uint16_t phys_section_unassigned;
>  static uint16_t phys_section_notdirty;
>  static uint16_t phys_section_rom;
>  static uint16_t phys_section_watch;
>
> -
> -/* Simple allocator for PhysPageEntry nodes */
> -static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
> -static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
> -
>  #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
>
> -/* This is a multi-level map on the physical address space.
> -   The bottom level has pointers to MemoryRegionSections.  */
> -static PhysPageEntry phys_map = { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
> -
> +static QemuMutex cur_map_lock;
> +static PhysMap *cur_map;
>  QemuMutex mem_map_lock;
> +static PhysMap *next_map;
>
>  static void io_mem_init(void);
>  static void memory_map_init(void);
> @@ -383,41 +378,38 @@ static inline PageDesc *page_find(tb_page_addr_t index)
>
>  #if !defined(CONFIG_USER_ONLY)
>
> -static void phys_map_node_reserve(unsigned nodes)
> +static void phys_map_node_reserve(PhysMap *map, unsigned nodes)
>  {
> -    if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
> +    if (map->phys_map_nodes_nb + nodes > map->phys_map_nodes_nb_alloc) {
>          typedef PhysPageEntry Node[L2_SIZE];
> -        phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
> -        phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
> -                                      phys_map_nodes_nb + nodes);
> -        phys_map_nodes = g_renew(Node, phys_map_nodes,
> -                                 phys_map_nodes_nb_alloc);
> +        map->phys_map_nodes_nb_alloc = MAX(map->phys_map_nodes_nb_alloc * 2,
> +                                                                        16);
> +        map->phys_map_nodes_nb_alloc = MAX(map->phys_map_nodes_nb_alloc,
> +                                      map->phys_map_nodes_nb + nodes);
> +        map->phys_map_nodes = g_renew(Node, map->phys_map_nodes,
> +                                 map->phys_map_nodes_nb_alloc);
>      }
>  }
>
> -static uint16_t phys_map_node_alloc(void)
> +static uint16_t phys_map_node_alloc(PhysMap *map)
>  {
>      unsigned i;
>      uint16_t ret;
>
> -    ret = phys_map_nodes_nb++;
> +    ret = map->phys_map_nodes_nb++;
>      assert(ret != PHYS_MAP_NODE_NIL);
> -    assert(ret != phys_map_nodes_nb_alloc);
> +    assert(ret != map->phys_map_nodes_nb_alloc);
>      for (i = 0; i < L2_SIZE; ++i) {
> -        phys_map_nodes[ret][i].is_leaf = 0;
> -        phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
> +        map->phys_map_nodes[ret][i].is_leaf = 0;
> +        map->phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
>      }
>      return ret;
>  }
>
> -static void phys_map_nodes_reset(void)
> -{
> -    phys_map_nodes_nb = 0;
> -}
> -
> -
> -static void phys_page_set_level(PhysPageEntry *lp, target_phys_addr_t *index,
> -                                target_phys_addr_t *nb, uint16_t leaf,
> +static void phys_page_set_level(PhysMap *map, PhysPageEntry *lp,
> +                                target_phys_addr_t *index,
> +                                target_phys_addr_t *nb,
> +                                uint16_t leaf,
>                                  int level)
>  {
>      PhysPageEntry *p;
> @@ -425,8 +417,8 @@ static void phys_page_set_level(PhysPageEntry *lp, target_phys_addr_t *index,
>      target_phys_addr_t step = (target_phys_addr_t)1 << (level * L2_BITS);
>
>      if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
> -        lp->ptr = phys_map_node_alloc();
> -        p = phys_map_nodes[lp->ptr];
> +        lp->ptr = phys_map_node_alloc(map);
> +        p = map->phys_map_nodes[lp->ptr];
>          if (level == 0) {
>              for (i = 0; i < L2_SIZE; i++) {
>                  p[i].is_leaf = 1;
> @@ -434,7 +426,7 @@ static void phys_page_set_level(PhysPageEntry *lp, target_phys_addr_t *index,
>              }
>          }
>      } else {
> -        p = phys_map_nodes[lp->ptr];
> +        p = map->phys_map_nodes[lp->ptr];
>      }
>      lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
>
> @@ -445,24 +437,27 @@ static void phys_page_set_level(PhysPageEntry *lp, target_phys_addr_t *index,
>              *index += step;
>              *nb -= step;
>          } else {
> -            phys_page_set_level(lp, index, nb, leaf, level - 1);
> +            phys_page_set_level(map, lp, index, nb, leaf, level - 1);
>          }
>          ++lp;
>      }
>  }
>
> -static void phys_page_set(target_phys_addr_t index, target_phys_addr_t nb,
> -                          uint16_t leaf)
> +static void phys_page_set(PhysMap *map, target_phys_addr_t index,
> +                            target_phys_addr_t nb,
> +                            uint16_t leaf)
>  {
>      /* Wildly overreserve - it doesn't matter much. */
> -    phys_map_node_reserve(3 * P_L2_LEVELS);
> +    phys_map_node_reserve(map, 3 * P_L2_LEVELS);
>
> -    phys_page_set_level(&phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
> +    /* update in new tree*/
> +    phys_page_set_level(map, &map->root, &index, &nb, leaf, P_L2_LEVELS - 1);
>  }
>
> -MemoryRegionSection *phys_page_find(target_phys_addr_t index)
> +static MemoryRegionSection *phys_page_find_internal(PhysMap *map,
> +                           target_phys_addr_t index)
>  {
> -    PhysPageEntry lp = phys_map;
> +    PhysPageEntry lp = map->root;
>      PhysPageEntry *p;
>      int i;
>      uint16_t s_index = phys_section_unassigned;
> @@ -471,13 +466,79 @@ MemoryRegionSection *phys_page_find(target_phys_addr_t index)
>          if (lp.ptr == PHYS_MAP_NODE_NIL) {
>              goto not_found;
>          }
> -        p = phys_map_nodes[lp.ptr];
> +        p = map->phys_map_nodes[lp.ptr];
>          lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
>      }
>
>      s_index = lp.ptr;
>  not_found:
> -    return &phys_sections[s_index];
> +    return &map->phys_sections[s_index];
> +}
> +
> +MemoryRegionSection *phys_page_find(target_phys_addr_t index)
> +{
> +    return phys_page_find_internal(cur_map, index);
> +}
> +
> +void physmap_get(PhysMap *map)
> +{
> +    atomic_inc(&map->ref);
> +}
> +
> +/* Untill rcu read side finished, do this reclaim */

Until

> +static ChunkHead physmap_reclaimer_list = { .lh_first = NULL };

Please insert a blank line here.

> +void physmap_reclaimer_enqueue(void *opaque, ReleaseHandler *release)
> +{
> +    reclaimer_enqueue(&physmap_reclaimer_list, opaque, release);
> +}
> +
> +static void destroy_all_mappings(PhysMap *map);

Prototypes belong to the top of the file.

> +static void phys_map_release(PhysMap *map)
> +{
> +    /* emulate for rcu reclaimer for mr */
> +    reclaimer_worker(&physmap_reclaimer_list);
> +
> +    destroy_all_mappings(map);
> +    g_free(map->phys_map_nodes);
> +    g_free(map->phys_sections);
> +    g_free(map->views[0].ranges);
> +    g_free(map->views[1].ranges);
> +    g_free(map);
> +}
> +
> +void physmap_put(PhysMap *map)
> +{
> +    if (atomic_dec_and_test(&map->ref)) {
> +        phys_map_release(map);
> +    }
> +}
> +
> +void cur_map_update(PhysMap *next)
> +{
> +    qemu_mutex_lock(&cur_map_lock);
> +    physmap_put(cur_map);
> +    cur_map = next;
> +    smp_mb();
> +    qemu_mutex_unlock(&cur_map_lock);
> +}
> +
> +PhysMap *cur_map_get(void)
> +{
> +    PhysMap *ret;
> +
> +    qemu_mutex_lock(&cur_map_lock);
> +    ret = cur_map;
> +    physmap_get(ret);
> +    smp_mb();
> +    qemu_mutex_unlock(&cur_map_lock);
> +    return ret;
> +}
> +
> +PhysMap *alloc_next_map(void)
> +{
> +    PhysMap *next = g_malloc0(sizeof(PhysMap));
> +    atomic_set(&next->ref, 1);
> +    return next;
>  }
>
>  bool memory_region_is_unassigned(MemoryRegion *mr)
> @@ -632,6 +693,7 @@ void cpu_exec_init_all(void)
>      memory_map_init();
>      io_mem_init();
>      qemu_mutex_init(&mem_map_lock);
> +    qemu_mutex_init(&cur_map_lock);
>  #endif
>  }
>
> @@ -2161,17 +2223,18 @@ int page_unprotect(target_ulong address, uintptr_t pc, void *puc)
>
>  #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
>  typedef struct subpage_t {
> +    PhysMap *map;
>      MemoryRegion iomem;
>      target_phys_addr_t base;
>      uint16_t sub_section[TARGET_PAGE_SIZE];
>  } subpage_t;
>
> -static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
> -                             uint16_t section);
> -static subpage_t *subpage_init(target_phys_addr_t base);
> -static void destroy_page_desc(uint16_t section_index)
> +static int subpage_register(PhysMap *map, subpage_t *mmio, uint32_t start,
> +                            uint32_t end, uint16_t section);
> +static subpage_t *subpage_init(PhysMap *map, target_phys_addr_t base);
> +static void destroy_page_desc(PhysMap *map, uint16_t section_index)
>  {
> -    MemoryRegionSection *section = &phys_sections[section_index];
> +    MemoryRegionSection *section = &map->phys_sections[section_index];
>      MemoryRegion *mr = section->mr;
>
>      if (mr->subpage) {
> @@ -2181,7 +2244,7 @@ static void destroy_page_desc(uint16_t section_index)
>      }
>  }
>
> -static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
> +static void destroy_l2_mapping(PhysMap *map, PhysPageEntry *lp, unsigned level)
>  {
>      unsigned i;
>      PhysPageEntry *p;
> @@ -2190,38 +2253,34 @@ static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
>          return;
>      }
>
> -    p = phys_map_nodes[lp->ptr];
> +    p = map->phys_map_nodes[lp->ptr];
>      for (i = 0; i < L2_SIZE; ++i) {
>          if (!p[i].is_leaf) {
> -            destroy_l2_mapping(&p[i], level - 1);
> +            destroy_l2_mapping(map, &p[i], level - 1);
>          } else {
> -            destroy_page_desc(p[i].ptr);
> +            destroy_page_desc(map, p[i].ptr);
>          }
>      }
>      lp->is_leaf = 0;
>      lp->ptr = PHYS_MAP_NODE_NIL;
>  }
>
> -static void destroy_all_mappings(void)
> +static void destroy_all_mappings(PhysMap *map)
>  {
> -    destroy_l2_mapping(&phys_map, P_L2_LEVELS - 1);
> -    phys_map_nodes_reset();
> -}
> +    PhysPageEntry *root = &map->root;
>
> -static uint16_t phys_section_add(MemoryRegionSection *section)
> -{
> -    if (phys_sections_nb == phys_sections_nb_alloc) {
> -        phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
> -        phys_sections = g_renew(MemoryRegionSection, phys_sections,
> -                                phys_sections_nb_alloc);
> -    }
> -    phys_sections[phys_sections_nb] = *section;
> -    return phys_sections_nb++;
> +    destroy_l2_mapping(map, root, P_L2_LEVELS - 1);
>  }
>
> -static void phys_sections_clear(void)
> +static uint16_t phys_section_add(PhysMap *map, MemoryRegionSection *section)
>  {
> -    phys_sections_nb = 0;
> +    if (map->phys_sections_nb == map->phys_sections_nb_alloc) {
> +        map->phys_sections_nb_alloc = MAX(map->phys_sections_nb_alloc * 2, 16);
> +        map->phys_sections = g_renew(MemoryRegionSection, map->phys_sections,
> +                                map->phys_sections_nb_alloc);
> +    }
> +    map->phys_sections[map->phys_sections_nb] = *section;
> +    return map->phys_sections_nb++;
>  }
>
>  /* register physical memory.
> @@ -2232,12 +2291,13 @@ static void phys_sections_clear(void)
>     start_addr and region_offset are rounded down to a page boundary
>     before calculating this offset.  This should not be a problem unless
>     the low bits of start_addr and region_offset differ.  */
> -static void register_subpage(MemoryRegionSection *section)
> +static void register_subpage(PhysMap *map, MemoryRegionSection *section)
>  {
>      subpage_t *subpage;
>      target_phys_addr_t base = section->offset_within_address_space
>          & TARGET_PAGE_MASK;
> -    MemoryRegionSection *existing = phys_page_find(base >> TARGET_PAGE_BITS);
> +    MemoryRegionSection *existing = phys_page_find_internal(map,
> +                                            base >> TARGET_PAGE_BITS);
>      MemoryRegionSection subsection = {
>          .offset_within_address_space = base,
>          .size = TARGET_PAGE_SIZE,
> @@ -2247,30 +2307,30 @@ static void register_subpage(MemoryRegionSection *section)
>      assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
>
>      if (!(existing->mr->subpage)) {
> -        subpage = subpage_init(base);
> +        subpage = subpage_init(map, base);
>          subsection.mr = &subpage->iomem;
> -        phys_page_set(base >> TARGET_PAGE_BITS, 1,
> -                      phys_section_add(&subsection));
> +        phys_page_set(map, base >> TARGET_PAGE_BITS, 1,
> +                      phys_section_add(map, &subsection));
>      } else {
>          subpage = container_of(existing->mr, subpage_t, iomem);
>      }
>      start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
>      end = start + section->size;
> -    subpage_register(subpage, start, end, phys_section_add(section));
> +    subpage_register(map, subpage, start, end, phys_section_add(map, section));
>  }
>
>
> -static void register_multipage(MemoryRegionSection *section)
> +static void register_multipage(PhysMap *map, MemoryRegionSection *section)
>  {
>      target_phys_addr_t start_addr = section->offset_within_address_space;
>      ram_addr_t size = section->size;
>      target_phys_addr_t addr;
> -    uint16_t section_index = phys_section_add(section);
> +    uint16_t section_index = phys_section_add(map, section);
>
>      assert(size);
>
>      addr = start_addr;
> -    phys_page_set(addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
> +    phys_page_set(map, addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
>                    section_index);
>  }
>
> @@ -2278,13 +2338,14 @@ void cpu_register_physical_memory_log(MemoryRegionSection *section,
>                                        bool readonly)
>  {
>      MemoryRegionSection now = *section, remain = *section;
> +    PhysMap *map = next_map;
>
>      if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
>          || (now.size < TARGET_PAGE_SIZE)) {
>          now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
>                         - now.offset_within_address_space,
>                         now.size);
> -        register_subpage(&now);
> +        register_subpage(map, &now);
>          remain.size -= now.size;
>          remain.offset_within_address_space += now.size;
>          remain.offset_within_region += now.size;
> @@ -2292,14 +2353,14 @@ void cpu_register_physical_memory_log(MemoryRegionSection *section,
>      now = remain;
>      now.size &= TARGET_PAGE_MASK;
>      if (now.size) {
> -        register_multipage(&now);
> +        register_multipage(map, &now);
>          remain.size -= now.size;
>          remain.offset_within_address_space += now.size;
>          remain.offset_within_region += now.size;
>      }
>      now = remain;
>      if (now.size) {
> -        register_subpage(&now);
> +        register_subpage(map, &now);
>      }
>  }
>
> @@ -3001,7 +3062,7 @@ static uint64_t subpage_read(void *opaque, target_phys_addr_t addr,
>             mmio, len, addr, idx);
>  #endif
>
> -    section = &phys_sections[mmio->sub_section[idx]];
> +    section = &mmio->map->phys_sections[mmio->sub_section[idx]];
>      addr += mmio->base;
>      addr -= section->offset_within_address_space;
>      addr += section->offset_within_region;
> @@ -3020,7 +3081,7 @@ static void subpage_write(void *opaque, target_phys_addr_t addr,
>             __func__, mmio, len, addr, idx, value);
>  #endif
>
> -    section = &phys_sections[mmio->sub_section[idx]];
> +    section = &mmio->map->phys_sections[mmio->sub_section[idx]];
>      addr += mmio->base;
>      addr -= section->offset_within_address_space;
>      addr += section->offset_within_region;
> @@ -3065,8 +3126,8 @@ static const MemoryRegionOps subpage_ram_ops = {
>      .endianness = DEVICE_NATIVE_ENDIAN,
>  };
>
> -static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
> -                             uint16_t section)
> +static int subpage_register(PhysMap *map, subpage_t *mmio, uint32_t start,
> +                              uint32_t end, uint16_t section)
>  {
>      int idx, eidx;
>
> @@ -3078,10 +3139,10 @@ static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
>      printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
>             mmio, start, end, idx, eidx, memory);
>  #endif
> -    if (memory_region_is_ram(phys_sections[section].mr)) {
> -        MemoryRegionSection new_section = phys_sections[section];
> +    if (memory_region_is_ram(map->phys_sections[section].mr)) {
> +        MemoryRegionSection new_section = map->phys_sections[section];
>          new_section.mr = &io_mem_subpage_ram;
> -        section = phys_section_add(&new_section);
> +        section = phys_section_add(map, &new_section);
>      }
>      for (; idx <= eidx; idx++) {
>          mmio->sub_section[idx] = section;
> @@ -3090,12 +3151,13 @@ static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
>      return 0;
>  }
>
> -static subpage_t *subpage_init(target_phys_addr_t base)
> +static subpage_t *subpage_init(PhysMap *map, target_phys_addr_t base)
>  {
>      subpage_t *mmio;
>
>      mmio = g_malloc0(sizeof(subpage_t));
>
> +    mmio->map = map;
>      mmio->base = base;
>      memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
>                            "subpage", TARGET_PAGE_SIZE);
> @@ -3104,12 +3166,12 @@ static subpage_t *subpage_init(target_phys_addr_t base)
>      printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
>             mmio, base, TARGET_PAGE_SIZE, subpage_memory);
>  #endif
> -    subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
> +    subpage_register(map, mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
>
>      return mmio;
>  }
>
> -static uint16_t dummy_section(MemoryRegion *mr)
> +static uint16_t dummy_section(PhysMap *map, MemoryRegion *mr)
>  {
>      MemoryRegionSection section = {
>          .mr = mr,
> @@ -3118,7 +3180,7 @@ static uint16_t dummy_section(MemoryRegion *mr)
>          .size = UINT64_MAX,
>      };
>
> -    return phys_section_add(&section);
> +    return phys_section_add(map, &section);
>  }
>
>  MemoryRegion *iotlb_to_region(target_phys_addr_t index)
> @@ -3140,15 +3202,32 @@ static void io_mem_init(void)
>                            "watch", UINT64_MAX);
>  }
>
> -static void core_begin(MemoryListener *listener)
> +#if 0
> +static void physmap_init(void)
> +{
> +    FlatView v = { .ranges = NULL,
> +                             .nr = 0,
> +                             .nr_allocated = 0,
> +    };
> +
> +    init_map.views[0] = v;
> +    init_map.views[1] = v;
> +    cur_map =  &init_map;
> +}
> +#endif

Please delete.

> +
> +static void core_begin(MemoryListener *listener, PhysMap *new_map)
>  {
> -    destroy_all_mappings();
> -    phys_sections_clear();
> -    phys_map.ptr = PHYS_MAP_NODE_NIL;
> -    phys_section_unassigned = dummy_section(&io_mem_unassigned);
> -    phys_section_notdirty = dummy_section(&io_mem_notdirty);
> -    phys_section_rom = dummy_section(&io_mem_rom);
> -    phys_section_watch = dummy_section(&io_mem_watch);
> +
> +    new_map->root.ptr = PHYS_MAP_NODE_NIL;
> +    new_map->root.is_leaf = 0;
> +
> +    /* In all the map, these sections have the same index */
> +    phys_section_unassigned = dummy_section(new_map, &io_mem_unassigned);
> +    phys_section_notdirty = dummy_section(new_map, &io_mem_notdirty);
> +    phys_section_rom = dummy_section(new_map, &io_mem_rom);
> +    phys_section_watch = dummy_section(new_map, &io_mem_watch);
> +    next_map = new_map;
>  }
>
>  static void core_commit(MemoryListener *listener)
> @@ -3161,6 +3240,16 @@ static void core_commit(MemoryListener *listener)
>      for(env = first_cpu; env != NULL; env = env->next_cpu) {
>          tlb_flush(env, 1);
>      }
> +
> +/* move into high layer
> +    qemu_mutex_lock(&cur_map_lock);
> +    if (cur_map != NULL) {
> +        physmap_put(cur_map);
> +    }
> +    cur_map = next_map;
> +    smp_mb();
> +    qemu_mutex_unlock(&cur_map_lock);
> +*/

Also commented out code should be deleted.

>  }
>
>  static void core_region_add(MemoryListener *listener,
> @@ -3217,7 +3306,7 @@ static void core_eventfd_del(MemoryListener *listener,
>  {
>  }
>
> -static void io_begin(MemoryListener *listener)
> +static void io_begin(MemoryListener *listener, PhysMap *next)
>  {
>  }
>
> @@ -3329,6 +3418,20 @@ static void memory_map_init(void)
>      memory_listener_register(&io_memory_listener, system_io);
>  }
>
> +void physmap_init(void)
> +{
> +    FlatView v = { .ranges = NULL, .nr = 0, .nr_allocated = 0,
> +                           };
> +    PhysMap *init_map = g_malloc0(sizeof(PhysMap));
> +
> +    atomic_set(&init_map->ref, 1);
> +    init_map->root.ptr = PHYS_MAP_NODE_NIL;
> +    init_map->root.is_leaf = 0;
> +    init_map->views[0] = v;
> +    init_map->views[1] = v;
> +    cur_map = init_map;
> +}
> +
>  MemoryRegion *get_system_memory(void)
>  {
>      return system_memory;
> @@ -3391,6 +3494,7 @@ void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
>      uint32_t val;
>      target_phys_addr_t page;
>      MemoryRegionSection *section;
> +    PhysMap *cur = cur_map_get();
>
>      while (len > 0) {
>          page = addr & TARGET_PAGE_MASK;
> @@ -3472,6 +3576,7 @@ void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
>          buf += l;
>          addr += l;
>      }
> +    physmap_put(cur);
>  }
>
>  /* used for ROM loading : can write in RAM and ROM */
> diff --git a/hw/vhost.c b/hw/vhost.c
> index 43664e7..df58345 100644
> --- a/hw/vhost.c
> +++ b/hw/vhost.c
> @@ -438,7 +438,7 @@ static bool vhost_section(MemoryRegionSection *section)
>          && memory_region_is_ram(section->mr);
>  }
>
> -static void vhost_begin(MemoryListener *listener)
> +static void vhost_begin(MemoryListener *listener, PhysMap *next)
>  {
>  }
>
> diff --git a/hw/xen_pt.c b/hw/xen_pt.c
> index 3b6d186..fba8586 100644
> --- a/hw/xen_pt.c
> +++ b/hw/xen_pt.c
> @@ -597,7 +597,7 @@ static void xen_pt_region_update(XenPCIPassthroughState *s,
>      }
>  }
>
> -static void xen_pt_begin(MemoryListener *l)
> +static void xen_pt_begin(MemoryListener *l, PhysMap *next)
>  {
>  }
>
> diff --git a/kvm-all.c b/kvm-all.c
> index f8e4328..bc42cab 100644
> --- a/kvm-all.c
> +++ b/kvm-all.c
> @@ -693,7 +693,7 @@ static void kvm_set_phys_mem(MemoryRegionSection *section, bool add)
>      }
>  }
>
> -static void kvm_begin(MemoryListener *listener)
> +static void kvm_begin(MemoryListener *listener, PhysMap *next)
>  {
>  }
>
> diff --git a/memory.c b/memory.c
> index c7f2cfd..54cdc7f 100644
> --- a/memory.c
> +++ b/memory.c
> @@ -20,6 +20,7 @@
>  #include "kvm.h"
>  #include <assert.h>
>  #include "hw/qdev.h"
> +#include "qemu-thread.h"
>
>  #define WANT_EXEC_OBSOLETE
>  #include "exec-obsolete.h"
> @@ -192,7 +193,7 @@ typedef struct AddressSpaceOps AddressSpaceOps;
>  /* A system address space - I/O, memory, etc. */
>  struct AddressSpace {
>      MemoryRegion *root;
> -    FlatView current_map;
> +    int view_id;
>      int ioeventfd_nb;
>      MemoryRegionIoeventfd *ioeventfds;
>  };
> @@ -232,11 +233,6 @@ static void flatview_insert(FlatView *view, unsigned pos, FlatRange *range)
>      ++view->nr;
>  }
>
> -static void flatview_destroy(FlatView *view)
> -{
> -    g_free(view->ranges);
> -}
> -
>  static bool can_merge(FlatRange *r1, FlatRange *r2)
>  {
>      return int128_eq(addrrange_end(r1->addr), r2->addr.start)
> @@ -594,8 +590,10 @@ static void address_space_update_ioeventfds(AddressSpace *as)
>      MemoryRegionIoeventfd *ioeventfds = NULL;
>      AddrRange tmp;
>      unsigned i;
> +    PhysMap *map = cur_map_get();
> +    FlatView *view = &map->views[as->view_id];
>
> -    FOR_EACH_FLAT_RANGE(fr, &as->current_map) {
> +    FOR_EACH_FLAT_RANGE(fr, view) {
>          for (i = 0; i < fr->mr->ioeventfd_nb; ++i) {
>              tmp = addrrange_shift(fr->mr->ioeventfds[i].addr,
>                                    int128_sub(fr->addr.start,
> @@ -616,6 +614,7 @@ static void address_space_update_ioeventfds(AddressSpace *as)
>      g_free(as->ioeventfds);
>      as->ioeventfds = ioeventfds;
>      as->ioeventfd_nb = ioeventfd_nb;
> +    physmap_put(map);
>  }
>
>  static void address_space_update_topology_pass(AddressSpace *as,
> @@ -681,21 +680,23 @@ static void address_space_update_topology_pass(AddressSpace *as,
>  }
>
>
> -static void address_space_update_topology(AddressSpace *as)
> +static void address_space_update_topology(AddressSpace *as, PhysMap *prev,
> +                                            PhysMap *next)
>  {
> -    FlatView old_view = as->current_map;
> +    FlatView old_view = prev->views[as->view_id];
>      FlatView new_view = generate_memory_topology(as->root);
>
>      address_space_update_topology_pass(as, old_view, new_view, false);
>      address_space_update_topology_pass(as, old_view, new_view, true);
> +    next->views[as->view_id] = new_view;
>
> -    as->current_map = new_view;
> -    flatview_destroy(&old_view);
>      address_space_update_ioeventfds(as);
>  }
>
>  static void memory_region_update_topology(MemoryRegion *mr)
>  {
> +    PhysMap *prev, *next;
> +
>      if (memory_region_transaction_depth) {
>          memory_region_update_pending |= !mr || mr->enabled;
>          return;
> @@ -705,16 +706,20 @@ static void memory_region_update_topology(MemoryRegion *mr)
>          return;
>      }
>
> -    MEMORY_LISTENER_CALL_GLOBAL(begin, Forward);
> +     prev = cur_map_get();
> +    /* allocate PhysMap next here */
> +    next = alloc_next_map();
> +    MEMORY_LISTENER_CALL_GLOBAL(begin, Forward, next);
>
>      if (address_space_memory.root) {
> -        address_space_update_topology(&address_space_memory);
> +        address_space_update_topology(&address_space_memory, prev, next);
>      }
>      if (address_space_io.root) {
> -        address_space_update_topology(&address_space_io);
> +        address_space_update_topology(&address_space_io, prev, next);
>      }
>
>      MEMORY_LISTENER_CALL_GLOBAL(commit, Forward);
> +    cur_map_update(next);
>
>      memory_region_update_pending = false;
>  }
> @@ -1071,7 +1076,7 @@ void memory_region_put(MemoryRegion *mr)
>
>      if (atomic_dec_and_test(&mr->ref)) {
>          /* to fix, using call_rcu( ,release) */
> -        mr->life_ops->put(mr);
> +        physmap_reclaimer_enqueue(mr, (ReleaseHandler *)mr->life_ops->put);
>      }
>  }
>
> @@ -1147,13 +1152,18 @@ void memory_region_set_dirty(MemoryRegion *mr, target_phys_addr_t addr,
>  void memory_region_sync_dirty_bitmap(MemoryRegion *mr)
>  {
>      FlatRange *fr;
> +    FlatView *fview;
> +    PhysMap *map;
>
> -    FOR_EACH_FLAT_RANGE(fr, &address_space_memory.current_map) {
> +    map = cur_map_get();
> +    fview = &map->views[address_space_memory.view_id];
> +    FOR_EACH_FLAT_RANGE(fr, fview) {
>          if (fr->mr == mr) {
>              MEMORY_LISTENER_UPDATE_REGION(fr, &address_space_memory,
>                                            Forward, log_sync);
>          }
>      }
> +    physmap_put(map);
>  }
>
>  void memory_region_set_readonly(MemoryRegion *mr, bool readonly)
> @@ -1201,8 +1211,12 @@ static void memory_region_update_coalesced_range(MemoryRegion *mr)
>      FlatRange *fr;
>      CoalescedMemoryRange *cmr;
>      AddrRange tmp;
> +    FlatView *fview;
> +    PhysMap *map;
>
> -    FOR_EACH_FLAT_RANGE(fr, &address_space_memory.current_map) {
> +    map = cur_map_get();
> +    fview = &map->views[address_space_memory.view_id];
> +    FOR_EACH_FLAT_RANGE(fr, fview) {
>          if (fr->mr == mr) {
>              qemu_unregister_coalesced_mmio(int128_get64(fr->addr.start),
>                                             int128_get64(fr->addr.size));
> @@ -1219,6 +1233,7 @@ static void memory_region_update_coalesced_range(MemoryRegion *mr)
>              }
>          }
>      }
> +    physmap_put(map);
>  }
>
>  void memory_region_set_coalescing(MemoryRegion *mr)
> @@ -1458,29 +1473,49 @@ static int cmp_flatrange_addr(const void *addr_, const void *fr_)
>      return 0;
>  }
>
> -static FlatRange *address_space_lookup(AddressSpace *as, AddrRange addr)
> +static FlatRange *address_space_lookup(FlatView *view, AddrRange addr)
>  {
> -    return bsearch(&addr, as->current_map.ranges, as->current_map.nr,
> +    return bsearch(&addr, view->ranges, view->nr,
>                     sizeof(FlatRange), cmp_flatrange_addr);
>  }
>
> +/* dec the ref, which inc by memory_region_find*/
> +void memory_region_section_put(MemoryRegionSection *mrs)
> +{
> +    if (mrs->mr != NULL) {
> +        memory_region_put(mrs->mr);
> +    }
> +}
> +
> +/* inc mr's ref. Caller need dec mr's ref */
>  MemoryRegionSection memory_region_find(MemoryRegion *address_space,
>                                         target_phys_addr_t addr, uint64_t size)
>  {
> +    PhysMap *map;
>      AddressSpace *as = memory_region_to_address_space(address_space);
>      AddrRange range = addrrange_make(int128_make64(addr),
>                                       int128_make64(size));
> -    FlatRange *fr = address_space_lookup(as, range);
> +    FlatView *fview;
> +
> +    map = cur_map_get();
> +
> +    fview = &map->views[as->view_id];
> +    FlatRange *fr = address_space_lookup(fview, range);
>      MemoryRegionSection ret = { .mr = NULL, .size = 0 };
>
>      if (!fr) {
> +        physmap_put(map);
>          return ret;
>      }
>
> -    while (fr > as->current_map.ranges
> +    while (fr > fview->ranges
>             && addrrange_intersects(fr[-1].addr, range)) {
>          --fr;
>      }
> +    /* To fix, the caller must in rcu, or we must inc fr->mr->ref here
> +     */
> +    memory_region_get(fr->mr);
> +    physmap_put(map);
>
>      ret.mr = fr->mr;
>      range = addrrange_intersection(range, fr->addr);
> @@ -1497,10 +1532,13 @@ void memory_global_sync_dirty_bitmap(MemoryRegion *address_space)
>  {
>      AddressSpace *as = memory_region_to_address_space(address_space);
>      FlatRange *fr;
> +    PhysMap *map = cur_map_get();
> +    FlatView *view = &map->views[as->view_id];
>
> -    FOR_EACH_FLAT_RANGE(fr, &as->current_map) {
> +    FOR_EACH_FLAT_RANGE(fr, view) {
>          MEMORY_LISTENER_UPDATE_REGION(fr, as, Forward, log_sync);
>      }
> +    physmap_put(map);
>  }
>
>  void memory_global_dirty_log_start(void)
> @@ -1519,6 +1557,8 @@ static void listener_add_address_space(MemoryListener *listener,
>                                         AddressSpace *as)
>  {
>      FlatRange *fr;
> +    PhysMap *map;
> +    FlatView *view;
>
>      if (listener->address_space_filter
>          && listener->address_space_filter != as->root) {
> @@ -1528,7 +1568,10 @@ static void listener_add_address_space(MemoryListener *listener,
>      if (global_dirty_log) {
>          listener->log_global_start(listener);
>      }
> -    FOR_EACH_FLAT_RANGE(fr, &as->current_map) {
> +
> +    map = cur_map_get();
> +    view = &map->views[as->view_id];
> +    FOR_EACH_FLAT_RANGE(fr, view) {
>          MemoryRegionSection section = {
>              .mr = fr->mr,
>              .address_space = as->root,
> @@ -1539,6 +1582,7 @@ static void listener_add_address_space(MemoryListener *listener,
>          };
>          listener->region_add(listener, &section);
>      }
> +    physmap_put(map);
>  }
>
>  void memory_listener_register(MemoryListener *listener, MemoryRegion *filter)
> @@ -1570,12 +1614,14 @@ void memory_listener_unregister(MemoryListener *listener)
>  void set_system_memory_map(MemoryRegion *mr)
>  {
>      address_space_memory.root = mr;
> +    address_space_memory.view_id = 0;
>      memory_region_update_topology(NULL);
>  }
>
>  void set_system_io_map(MemoryRegion *mr)
>  {
>      address_space_io.root = mr;
> +    address_space_io.view_id = 1;
>      memory_region_update_topology(NULL);
>  }
>
> diff --git a/memory.h b/memory.h
> index 357edd8..18442d4 100644
> --- a/memory.h
> +++ b/memory.h
> @@ -256,7 +256,7 @@ typedef struct MemoryListener MemoryListener;
>   * Use with memory_listener_register() and memory_listener_unregister().
>   */
>  struct MemoryListener {
> -    void (*begin)(MemoryListener *listener);
> +    void (*begin)(MemoryListener *listener, PhysMap *next);
>      void (*commit)(MemoryListener *listener);
>      void (*region_add)(MemoryListener *listener, MemoryRegionSection *section);
>      void (*region_del)(MemoryListener *listener, MemoryRegionSection *section);
> @@ -829,6 +829,13 @@ void mtree_info(fprintf_function mon_printf, void *f);
>
>  void memory_region_get(MemoryRegion *mr);
>  void memory_region_put(MemoryRegion *mr);
> +void physmap_reclaimer_enqueue(void *opaque, ReleaseHandler *release);
> +void physmap_get(PhysMap *map);
> +void physmap_put(PhysMap *map);
> +PhysMap *cur_map_get(void);
> +PhysMap *alloc_next_map(void);
> +void cur_map_update(PhysMap *next);
> +void physmap_init(void);
>  #endif
>
>  #endif
> diff --git a/vl.c b/vl.c
> index 1329c30..12af523 100644
> --- a/vl.c
> +++ b/vl.c
> @@ -3346,6 +3346,7 @@ int main(int argc, char **argv, char **envp)
>      if (ram_size == 0) {
>          ram_size = DEFAULT_RAM_SIZE * 1024 * 1024;
>      }
> +    physmap_init();
>
>      configure_accelerator();
>
> diff --git a/xen-all.c b/xen-all.c
> index 59f2323..41d82fd 100644
> --- a/xen-all.c
> +++ b/xen-all.c
> @@ -452,7 +452,7 @@ static void xen_set_memory(struct MemoryListener *listener,
>      }
>  }
>
> -static void xen_begin(MemoryListener *listener)
> +static void xen_begin(MemoryListener *listener, PhysMap *next)
>  {
>  }
>
> --
> 1.7.4.4
>
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux