OHMURA Kei wrote: >>> Would be great if you could provide a version for upstream as well >>> because it will likely replace this qemu-kvm code on day. >> O.K. We'll prepare it. > > > We have implemented the version for upstream. Some source code are borrowed > from qemu-kvm.c. It is not fully tested yet, though. > > We also did performance test against this patch. Test environment is the same > as the email I sent before. > > > Experimental results: > Test1: Guest OS read 3GB file, which is bigger than memory. > #called orig.(msec) patch(msec) ratio > 14 3.79 0.18 20.8 > 12 3.20 0.15 21.4 > 11 2.89 0.14 21.0 > > Test2: Guest OS read/write 3GB file, which is bigger than memory. > #called orig.(msec) patch(msec) ratio > 364 180 8.70 20.7 > 326 161 7.71 20.9 > 474 235 11.7 20.1 > Wow, so we were really inefficient here. Nice work! Once you are done with your tests, please post this against qemu-kvm.git's uq/master so that Avi or Marcelo can push it upstream. Minor remarks below. > > --- > kvm-all.c | 80 +++++++++++++++++++++++++++++++++++++++++++++++++----------- > 1 files changed, 65 insertions(+), 15 deletions(-) > > diff --git a/kvm-all.c b/kvm-all.c > index 15ec38e..9666843 100644 > --- a/kvm-all.c > +++ b/kvm-all.c > @@ -279,9 +279,69 @@ int kvm_set_migration_log(int enable) > return 0; > } > > -static int test_le_bit(unsigned long nr, unsigned char *addr) > +static inline void kvm_get_dirty_pages_log_range_by_byte(unsigned int start, I don't think inline is appropriate here. Smart compilers are able to do this on their own. And small code footprint actually contributes to speed as well. > + unsigned int end, > + unsigned char *bitmap, > + unsigned long offset) > { > - return (addr[nr >> 3] >> (nr & 7)) & 1; > + unsigned int i, j, n = 0; > + unsigned long page_number, addr, addr1; > + ram_addr_t ram_addr; > + unsigned char c; > + > + /* > + * bitmap-traveling is faster than memory-traveling (for addr...) > + * especially when most of the memory is not dirty. > + */ > + for (i = start; i < end; i++) { > + c = bitmap[i]; > + while (c > 0) { > + j = ffsl(c) - 1; > + c &= ~(1u << j); > + page_number = i * 8 + j; > + addr1 = page_number * TARGET_PAGE_SIZE; > + addr = offset + addr1; > + ram_addr = cpu_get_physical_page_desc(addr); > + cpu_physical_memory_set_dirty(ram_addr); > + n++; > + } > + } > +} > + > +static int kvm_get_dirty_pages_log_range_by_long(unsigned long start_addr, > + unsigned char *bitmap, > + unsigned long mem_size) > +{ > + unsigned int i; > + unsigned int len; > + unsigned long *bitmap_ul = (unsigned long *)bitmap; > + > + /* bitmap-traveling by long size is faster than by byte size > + * especially when most of memory is not dirty. > + * bitmap should be long-size aligned for traveling by long. > + */ > + if (((unsigned long)bitmap & (TARGET_LONG_SIZE - 1)) == 0) { > + len = ((mem_size / TARGET_PAGE_SIZE) + TARGET_LONG_BITS - 1) / > + TARGET_LONG_BITS; > + for (i = 0; i < len; i++) > + if (bitmap_ul[i] != 0) > + kvm_get_dirty_pages_log_range_by_byte(i * TARGET_LONG_SIZE, > + (i + 1) * TARGET_LONG_SIZE, bitmap, start_addr); Missing { }, 2x. > + /* > + * We will check the remaining dirty-bitmap, > + * when the mem_size is not a multiple of TARGET_LONG_SIZE. > + */ > + if ((mem_size & (TARGET_LONG_SIZE - 1)) != 0) { > + len = ((mem_size / TARGET_PAGE_SIZE) + 7) / 8; > + kvm_get_dirty_pages_log_range_by_byte(i * TARGET_LONG_SIZE, > + len, bitmap, start_addr); This line should be indented to the '('. > + } > + } else { /* slow path: traveling by byte. */ > + len = ((mem_size / TARGET_PAGE_SIZE) + 7) / 8; > + kvm_get_dirty_pages_log_range_by_byte(0, len, bitmap, start_addr); > + } > + > + return 0; > } > > /** > @@ -297,8 +357,6 @@ int kvm_physical_sync_dirty_bitmap(target_phys_addr_t start_addr, > { > KVMState *s = kvm_state; > unsigned long size, allocated_size = 0; > - target_phys_addr_t phys_addr; > - ram_addr_t addr; > KVMDirtyLog d; > KVMSlot *mem; > int ret = 0; > @@ -327,17 +385,9 @@ int kvm_physical_sync_dirty_bitmap(target_phys_addr_t start_addr, > break; > } > > - for (phys_addr = mem->start_addr, addr = mem->phys_offset; > - phys_addr < mem->start_addr + mem->memory_size; > - phys_addr += TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) { > - unsigned char *bitmap = (unsigned char *)d.dirty_bitmap; > - unsigned nr = (phys_addr - mem->start_addr) >> TARGET_PAGE_BITS; > - > - if (test_le_bit(nr, bitmap)) { > - cpu_physical_memory_set_dirty(addr); > - } > - } > - start_addr = phys_addr; > + kvm_get_dirty_pages_log_range_by_long(mem->start_addr, > + d.dirty_bitmap, mem->memory_size); > + start_addr = mem->start_addr + mem->memory_size; > } > qemu_free(d.dirty_bitmap); > Thanks, Jan -- Siemens AG, Corporate Technology, CT T DE IT 1 Corporate Competence Center Embedded Linux -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html