The ram cache was initially the same as PVM's memory. At checkpoint, we cache the dirty memory of PVM into ram cache (so that ram cache always the same as PVM's memory at every checkpoint), flush cached memory to SVM after we received all PVM dirty memory(only needed to flush memory that was both dirty on PVM and SVM since last checkpoint). Signed-off-by: Yang Hongyang <yanghy@xxxxxxxxxxxxxx> --- arch_init.c | 154 ++++++++++++++++++++++++++++++++++++- include/exec/cpu-all.h | 1 + include/migration/migration-colo.h | 3 + migration-colo.c | 4 + 4 files changed, 159 insertions(+), 3 deletions(-) diff --git a/arch_init.c b/arch_init.c index c84e6c8..009bcb5 100644 --- a/arch_init.c +++ b/arch_init.c @@ -1013,6 +1013,7 @@ static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host) return 0; } +static void *memory_region_get_ram_cache_ptr(MemoryRegion *mr, RAMBlock *block); static inline void *host_from_stream_offset(QEMUFile *f, ram_addr_t offset, int flags) @@ -1027,7 +1028,12 @@ static inline void *host_from_stream_offset(QEMUFile *f, return NULL; } - return memory_region_get_ram_ptr(block->mr) + offset; + if (is_slave()) { + migration_bitmap_set_dirty(block->mr->ram_addr + offset); + return memory_region_get_ram_cache_ptr(block->mr, block) + offset; + } else { + return memory_region_get_ram_ptr(block->mr) + offset; + } } len = qemu_get_byte(f); @@ -1035,8 +1041,15 @@ static inline void *host_from_stream_offset(QEMUFile *f, id[len] = 0; QTAILQ_FOREACH(block, &ram_list.blocks, next) { - if (!strncmp(id, block->idstr, sizeof(id))) - return memory_region_get_ram_ptr(block->mr) + offset; + if (!strncmp(id, block->idstr, sizeof(id))) { + if (is_slave()) { + migration_bitmap_set_dirty(block->mr->ram_addr + offset); + return memory_region_get_ram_cache_ptr(block->mr, block) + + offset; + } else { + return memory_region_get_ram_ptr(block->mr) + offset; + } + } } error_report("Can't find block %s!", id); @@ -1054,11 +1067,13 @@ void ram_handle_compressed(void *host, uint8_t ch, uint64_t size) } } +static void ram_flush_cache(void); static int ram_load(QEMUFile *f, void *opaque, int version_id) { ram_addr_t addr; int flags, ret = 0; static uint64_t seq_iter; + bool need_flush = false; seq_iter++; @@ -1121,6 +1136,7 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) break; } + need_flush = true; ch = qemu_get_byte(f); ram_handle_compressed(host, ch, TARGET_PAGE_SIZE); } else if (flags & RAM_SAVE_FLAG_PAGE) { @@ -1133,6 +1149,7 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) break; } + need_flush = true; qemu_get_buffer(f, host, TARGET_PAGE_SIZE); } else if (flags & RAM_SAVE_FLAG_XBZRLE) { void *host = host_from_stream_offset(f, addr, flags); @@ -1148,6 +1165,7 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) ret = -EINVAL; break; } + need_flush = true; } else if (flags & RAM_SAVE_FLAG_HOOK) { ram_control_load_hook(f, flags); } else if (flags & RAM_SAVE_FLAG_EOS) { @@ -1161,11 +1179,141 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) ret = qemu_file_get_error(f); } + if (!ret && is_slave() && need_flush) { + ram_flush_cache(); + } + DPRINTF("Completed load of VM with exit code %d seq iteration " "%" PRIu64 "\n", ret, seq_iter); return ret; } +/* + * colo cache: this is for secondary VM, we cache the whole + * memory of the secondary VM. + */ +void create_and_init_ram_cache(void) +{ + /* + * called after first migration + */ + RAMBlock *block; + int64_t ram_cache_pages = last_ram_offset() >> TARGET_PAGE_BITS; + + QTAILQ_FOREACH(block, &ram_list.blocks, next) { + block->host_cache = g_malloc(block->length); + memcpy(block->host_cache, block->host, block->length); + } + + migration_bitmap = bitmap_new(ram_cache_pages); + migration_dirty_pages = 0; + memory_global_dirty_log_start(); +} + +void release_ram_cache(void) +{ + RAMBlock *block; + + if (migration_bitmap) { + memory_global_dirty_log_stop(); + g_free(migration_bitmap); + migration_bitmap = NULL; + } + + QTAILQ_FOREACH(block, &ram_list.blocks, next) { + g_free(block->host_cache); + } +} + +static void *memory_region_get_ram_cache_ptr(MemoryRegion *mr, RAMBlock *block) +{ + if (mr->alias) { + return memory_region_get_ram_cache_ptr(mr->alias, block) + + mr->alias_offset; + } + + assert(mr->terminates); + + ram_addr_t addr = mr->ram_addr & TARGET_PAGE_MASK; + + assert(addr - block->offset < block->length); + + return block->host_cache + (addr - block->offset); +} + +static inline +ram_addr_t host_bitmap_find_and_reset_dirty(MemoryRegion *mr, + ram_addr_t start) +{ + unsigned long base = mr->ram_addr >> TARGET_PAGE_BITS; + unsigned long nr = base + (start >> TARGET_PAGE_BITS); + unsigned long size = base + (int128_get64(mr->size) >> TARGET_PAGE_BITS); + + unsigned long next; + + next = find_next_bit(ram_list.dirty_memory[DIRTY_MEMORY_MIGRATION], + size, nr); + if (next < size) { + clear_bit(next, ram_list.dirty_memory[DIRTY_MEMORY_MIGRATION]); + } + return (next - base) << TARGET_PAGE_BITS; +} + +static void ram_flush_cache(void) +{ + RAMBlock *block = NULL; + void *dst_host; + void *src_host; + ram_addr_t ca = 0, ha = 0; + bool got_ca = 0, got_ha = 0; + int64_t host_dirty = 0, both_dirty = 0; + + address_space_sync_dirty_bitmap(&address_space_memory); + + block = QTAILQ_FIRST(&ram_list.blocks); + while (true) { + if (ca < block->length && ca <= ha) { + ca = migration_bitmap_find_and_reset_dirty(block->mr, ca); + if (ca < block->length) { + got_ca = 1; + } + } + if (ha < block->length && ha <= ca) { + ha = host_bitmap_find_and_reset_dirty(block->mr, ha); + if (ha < block->length && ha != ca) { + got_ha = 1; + } + host_dirty += (ha < block->length ? 1 : 0); + both_dirty += (ha < block->length && ha == ca ? 1 : 0); + } + if (ca >= block->length && ha >= block->length) { + ca = 0; + ha = 0; + block = QTAILQ_NEXT(block, next); + if (!block) { + break; + } + } else { + if (got_ha) { + got_ha = 0; + dst_host = memory_region_get_ram_ptr(block->mr) + ha; + src_host = memory_region_get_ram_cache_ptr(block->mr, block) + + ha; + memcpy(dst_host, src_host, TARGET_PAGE_SIZE); + } + if (got_ca) { + got_ca = 0; + dst_host = memory_region_get_ram_ptr(block->mr) + ca; + src_host = memory_region_get_ram_cache_ptr(block->mr, block) + + ca; + memcpy(dst_host, src_host, TARGET_PAGE_SIZE); + } + } + } + + assert(migration_dirty_pages == 0); +} + static SaveVMHandlers savevm_ram_handlers = { .save_live_setup = ram_save_setup, .save_live_iterate = ram_save_iterate, diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h index f91581f..029c984 100644 --- a/include/exec/cpu-all.h +++ b/include/exec/cpu-all.h @@ -300,6 +300,7 @@ CPUArchState *cpu_copy(CPUArchState *env); typedef struct RAMBlock { struct MemoryRegion *mr; uint8_t *host; + uint8_t *host_cache; ram_addr_t offset; ram_addr_t length; uint32_t flags; diff --git a/include/migration/migration-colo.h b/include/migration/migration-colo.h index c286a60..52187dd 100644 --- a/include/migration/migration-colo.h +++ b/include/migration/migration-colo.h @@ -29,5 +29,8 @@ void restore_exit_colo(void); bool is_slave(void); void colo_process_incoming_checkpoints(QEMUFile *f); +/* ram cache */ +void create_and_init_ram_cache(void); +void release_ram_cache(void); #endif diff --git a/migration-colo.c b/migration-colo.c index 13a6a57..52156e7 100644 --- a/migration-colo.c +++ b/migration-colo.c @@ -554,6 +554,8 @@ void colo_process_incoming_checkpoints(QEMUFile *f) colo_buffer_init(); + create_and_init_ram_cache(); + ret = colo_ctl_put(ctl, COLO_READY); if (ret) { goto out; @@ -631,6 +633,8 @@ out: qemu_fclose(fb); } + release_ram_cache(); + if (ctl) { qemu_fclose(ctl); } -- 1.9.1 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html