--
Makefile.target | 1 +
arch_init.c | 351 ++++++++++++++++++++++++++++++++++++++++++++++------
block-migration.c | 3 +-
hash.h | 72 +++++++++++
hmp-commands.hx | 36 ++++--
hw/hw.h | 3 +-
lru.c | 142 +++++++++++++++++++++
lru.h | 13 ++
migration-exec.c | 6 +-
migration-fd.c | 6 +-
migration-tcp.c | 6 +-
migration-unix.c | 6 +-
migration.c | 119 +++++++++++++++++-
migration.h | 25 +++-
qmp-commands.hx | 43 ++++++-
savevm.c | 13 ++-
sysemu.h | 13 ++-
xbzrle.c | 126 +++++++++++++++++++
xbzrle.h | 12 ++
19 files changed, 917 insertions(+), 79 deletions(-)
diff --git a/Makefile.target b/Makefile.target
index 2800f47..b3215de 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -186,6 +186,7 @@ endif #CONFIG_BSD_USER
ifdef CONFIG_SOFTMMU
obj-y = arch_init.o cpus.o monitor.o machine.o gdbstub.o balloon.o
+obj-y += lru.o xbzrle.o
# virtio has to be here due to weird dependency between PCI and virtio-net.
# need to fix this properly
obj-y += virtio-blk.o virtio-balloon.o virtio-net.o virtio-serial-bus.o
diff --git a/arch_init.c b/arch_init.c
old mode 100644
new mode 100755
index 4486925..d67dc82
--- a/arch_init.c
+++ b/arch_init.c
@@ -40,6 +40,17 @@
#include "net.h"
#include "gdbstub.h"
#include "hw/smbios.h"
+#include "lru.h"
+#include "xbzrle.h"
+
+//#define DEBUG_ARCH_INIT
+#ifdef DEBUG_ARCH_INIT
+#define DPRINTF(fmt, ...) \
+ do { fprintf(stdout, "arch_init: " fmt, ## __VA_ARGS__); } while (0)
+#else
+#define DPRINTF(fmt, ...) \
+ do { } while (0)
+#endif
#ifdef TARGET_SPARC
int graphic_width = 1024;
@@ -88,6 +99,161 @@ const uint32_t arch_type = QEMU_ARCH;
#define RAM_SAVE_FLAG_PAGE 0x08
#define RAM_SAVE_FLAG_EOS 0x10
#define RAM_SAVE_FLAG_CONTINUE 0x20
+#define RAM_SAVE_FLAG_XBZRLE 0x40
+
+/***********************************************************/
+/* RAM Migration State */
+typedef struct ArchMigrationState {
+ int use_xbrle;
+ int64_t xbrle_cache_size;
+} ArchMigrationState;
+
+static ArchMigrationState arch_mig_state;
+
+void arch_set_params(int blk_enable, int shared_base, int use_xbrle,
+ int64_t xbrle_cache_size, void *opaque)
+{
+ arch_mig_state.use_xbrle = use_xbrle;
+ arch_mig_state.xbrle_cache_size = xbrle_cache_size;
+}
+
+#define BE16_MAGIC 0x0123
+
+/***********************************************************/
+/* XBZRLE (Xor Binary Zero Run-Length Encoding) */
+typedef struct XBZRLEHeader {
+ uint32_t xh_cksum; /* not used */
+ uint16_t xh_magic;
+ uint16_t xh_len;
+ uint8_t xh_flags;
+} XBZRLEHeader;
+
+static uint8_t dup_buf[TARGET_PAGE_SIZE];
+
+/***********************************************************/
+/* accounting */
+typedef struct AccountingInfo{
+ uint64_t dup_pages;
+ uint64_t norm_pages;
+ uint64_t xbrle_bytes;
+ uint64_t xbrle_pages;
+ uint64_t xbrle_overflow;
+ uint64_t xbrle_cache_lookup;
+ uint64_t xbrle_cache_hit;
+ uint64_t iterations;
+} AccountingInfo;
+
+static AccountingInfo acct_info;
+
+static void acct_clear(void)
+{
+ memset(&acct_info, 0, sizeof(acct_info));
+}
+
+uint64_t dup_mig_bytes_transferred(void)
+{
+ return acct_info.dup_pages;
+}
+
+uint64_t dup_mig_pages_transferred(void)
+{
+ return acct_info.dup_pages;
+}
+
+uint64_t norm_mig_bytes_transferred(void)
+{
+ return acct_info.norm_pages * TARGET_PAGE_SIZE;
+}
+
+uint64_t norm_mig_pages_transferred(void)
+{
+ return acct_info.norm_pages;
+}
+
+uint64_t xbrle_mig_bytes_transferred(void)
+{
+ return acct_info.xbrle_bytes;
+}
+
+uint64_t xbrle_mig_pages_transferred(void)
+{
+ return acct_info.xbrle_pages;
+}
+
+uint64_t xbrle_mig_pages_overflow(void)
+{
+ return acct_info.xbrle_overflow;
+}
+
+uint64_t xbrle_mig_pages_cache_hit(void)
+{
+ return acct_info.xbrle_cache_hit;
+}
+
+uint64_t xbrle_mig_pages_cache_lookup(void)
+{
+ return acct_info.xbrle_cache_lookup;
+}
+
+static void save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
+ int cont, int flag)
+{
+ qemu_put_be64(f, offset | cont | flag);
+ if (!cont) {
+ qemu_put_byte(f, strlen(block->idstr));
+ qemu_put_buffer(f, (uint8_t *)block->idstr,
+ strlen(block->idstr));
+ }
+}
+
+#define ENCODING_FLAG_XBZRLE 0x1
+
+static int save_xbrle_page(QEMUFile *f, uint8_t *current_page,
+ ram_addr_t current_addr, RAMBlock *block, ram_addr_t offset, int cont)
+{
+ int encoded_len = 0, bytes_sent = 0;
+ XBZRLEHeader hdr = {0, BE16_MAGIC};
+ uint8_t *encoded, *old_page;
+
+ /* abort if page not cached */
+ acct_info.xbrle_cache_lookup++;
+ old_page = lru_lookup(current_addr);
+ if (!old_page) {
+ goto done;
+ }
+ acct_info.xbrle_cache_hit++;
+
+ /* XBZRLE (XOR+ZRLE) encoding */
+ encoded = (uint8_t *) qemu_malloc(TARGET_PAGE_SIZE);
+ encoded_len = xbzrle_encode(encoded, old_page, current_page,
+ TARGET_PAGE_SIZE);
+
+ if (encoded_len< 0) {
+ DPRINTF("XBZRLE encoding overflow - sending uncompressed\n");
+ acct_info.xbrle_overflow++;
+ goto done;
+ }
+
+ hdr.xh_len = encoded_len;
+ hdr.xh_flags |= ENCODING_FLAG_XBZRLE;
+
+ /* Send XBZRLE compressed page */
+ save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_XBZRLE);
+
+ qemu_put_be32(f, hdr.xh_cksum);
+ qemu_put_buffer(f, (uint8_t *)&hdr.xh_magic, sizeof (hdr.xh_magic));
+ qemu_put_be16(f, hdr.xh_len);
+ qemu_put_byte(f, hdr.xh_flags);
+
+ qemu_put_buffer(f, encoded, encoded_len);
+ acct_info.xbrle_pages++;
+ bytes_sent = encoded_len + sizeof(hdr);
+ acct_info.xbrle_bytes += bytes_sent;
+
+done:
+ qemu_free(encoded);
+ return bytes_sent;
+}
static int is_dup_page(uint8_t *page, uint8_t ch)
{
@@ -107,7 +273,7 @@ static int is_dup_page(uint8_t *page, uint8_t ch)
static RAMBlock *last_block;
static ram_addr_t last_offset;
-static int ram_save_block(QEMUFile *f)
+static int ram_save_block(QEMUFile *f, int stage)
{
RAMBlock *block = last_block;
ram_addr_t offset = last_offset;
@@ -120,6 +286,7 @@ static int ram_save_block(QEMUFile *f)
current_addr = block->offset + offset;
do {
+ lru_free_cb_t free_cb = qemu_free;
if (cpu_physical_memory_get_dirty(current_addr, MIGRATION_DIRTY_FLAG)) {
uint8_t *p;
int cont = (block == last_block) ? RAM_SAVE_FLAG_CONTINUE : 0;
@@ -128,28 +295,35 @@ static int ram_save_block(QEMUFile *f)
current_addr + TARGET_PAGE_SIZE,
MIGRATION_DIRTY_FLAG);
- p = block->host + offset;
+ if (arch_mig_state.use_xbrle) {
+ p = qemu_malloc(TARGET_PAGE_SIZE);
+ memcpy(p, block->host + offset, TARGET_PAGE_SIZE);
+ } else {
+ p = block->host + offset;
+ }
if (is_dup_page(p, *p)) {
- qemu_put_be64(f, offset | cont | RAM_SAVE_FLAG_COMPRESS);
- if (!cont) {
- qemu_put_byte(f, strlen(block->idstr));
- qemu_put_buffer(f, (uint8_t *)block->idstr,
- strlen(block->idstr));
- }
+ save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_COMPRESS);
qemu_put_byte(f, *p);
bytes_sent = 1;
- } else {
- qemu_put_be64(f, offset | cont | RAM_SAVE_FLAG_PAGE);
- if (!cont) {
- qemu_put_byte(f, strlen(block->idstr));
- qemu_put_buffer(f, (uint8_t *)block->idstr,
- strlen(block->idstr));
+ acct_info.dup_pages++;
+ if (arch_mig_state.use_xbrle&& !*p) {
+ p = dup_buf;
+ free_cb = NULL;
}
+ } else if (stage == 2&& arch_mig_state.use_xbrle) {
+ bytes_sent = save_xbrle_page(f, p, current_addr, block,
+ offset, cont);
+ }
+ if (!bytes_sent) {
+ save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE);
qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
bytes_sent = TARGET_PAGE_SIZE;
+ acct_info.norm_pages++;
+ }
+ if (arch_mig_state.use_xbrle) {
+ lru_insert(current_addr, p, free_cb);
}
-
break;
}
@@ -221,6 +395,9 @@ int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque)
if (stage< 0) {
cpu_physical_memory_set_dirty_tracking(0);
+ if (arch_mig_state.use_xbrle) {
+ lru_fini();
+ }
return 0;
}
@@ -235,6 +412,11 @@ int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque)
last_block = NULL;
last_offset = 0;
+ if (arch_mig_state.use_xbrle) {
+ lru_init(arch_mig_state.xbrle_cache_size/TARGET_PAGE_SIZE, 0);
+ acct_clear();
+ }
+
/* Make sure all dirty bits are set */
QLIST_FOREACH(block,&ram_list.blocks, next) {
for (addr = block->offset; addr< block->offset + block->length;
@@ -264,8 +446,9 @@ int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque)
while (!qemu_file_rate_limit(f)) {
int bytes_sent;
- bytes_sent = ram_save_block(f);
+ bytes_sent = ram_save_block(f, stage);
bytes_transferred += bytes_sent;
+ acct_info.iterations++;
if (bytes_sent == 0) { /* no more blocks */
break;
}
@@ -285,19 +468,79 @@ int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque)
int bytes_sent;
/* flush all remaining blocks regardless of rate limiting */
- while ((bytes_sent = ram_save_block(f)) != 0) {
+ while ((bytes_sent = ram_save_block(f, stage))) {
bytes_transferred += bytes_sent;
}
cpu_physical_memory_set_dirty_tracking(0);
+ if (arch_mig_state.use_xbrle) {
+ lru_fini();
+ }
}
qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
expected_time = ram_save_remaining() * TARGET_PAGE_SIZE / bwidth;
+ DPRINTF("ram_save_live: expected(%ld)<= max(%ld)?\n", expected_time,
+ migrate_max_downtime());
+
return (stage == 2)&& (expected_time<= migrate_max_downtime());
}
+static int load_xbrle(QEMUFile *f, ram_addr_t addr, void *host)
+{
+ int len, rc = -1;
+ uint8_t *encoded;
+ XBZRLEHeader hdr = {0};
+
+ /* extract ZRLE header */
+ hdr.xh_cksum = qemu_get_be32(f);
+ qemu_get_buffer(f, (uint8_t *)&hdr.xh_magic, sizeof (hdr.xh_magic));
+ hdr.xh_len = qemu_get_be16(f);
+ hdr.xh_flags = qemu_get_byte(f);
+
+ if (!(hdr.xh_flags& ENCODING_FLAG_XBZRLE)) {
+ fprintf(stderr, "Failed to load XZBRLE page - wrong compression!\n");
+ goto done;
+ }
+
+ if (hdr.xh_len> TARGET_PAGE_SIZE) {
+ fprintf(stderr, "Failed to load XZBRLE page - len overflow!\n");
+ goto done;
+ }
+
+ /* load data and decode */
+ encoded = (uint8_t *) qemu_malloc(hdr.xh_len);
+ qemu_get_buffer(f, encoded, hdr.xh_len);
+ /* covert endianess if magic indicated destination differs from source */
+ if (hdr.xh_magic != BE16_MAGIC) {
+ const uint64_t *end = (uint64_t *) encoded +
+ hdr.xh_len / sizeof (uint64_t);
+ uint64_t *p;
+ for (p = (uint64_t *) encoded; p< end; p++) {
+ bswap64s(p);
+ }
+ }
+
+ /* decode ZRLE */
+ len = xbzrle_decode(host, host, encoded, hdr.xh_len);
+ if (len == -1) {
+ fprintf(stderr, "Failed to load XBZRLE page - decode error!\n");
+ goto done;
+ }
+
+ if (len != TARGET_PAGE_SIZE) {
+ fprintf(stderr, "Failed to load XBZRLE page - size %d expected %d!\n",
+ len, TARGET_PAGE_SIZE);
+ goto done;
+ }
+
+ rc = 0;
+done:
+ qemu_free(encoded);
+ return rc;
+}
+
static inline void *host_from_stream_offset(QEMUFile *f,
ram_addr_t offset,
int flags)
@@ -328,16 +571,38 @@ static inline void *host_from_stream_offset(QEMUFile *f,
return NULL;
}
+static inline void *host_from_stream_offset_versioned(int version_id,
+ QEMUFile *f, ram_addr_t offset, int flags)
+{
+ void *host;
+ if (version_id == 3) {
+ host = qemu_get_ram_ptr(offset);
+ } else {
+ host = host_from_stream_offset(f, offset, flags);
+ }
+ if (!host) {
+ fprintf(stderr, "Failed to convert RAM address to host"
+ " for offset 0x%lX!\n", offset);
+ abort();
+ }
+ return host;
+}
+
int ram_load(QEMUFile *f, void *opaque, int version_id)
{
ram_addr_t addr;
- int flags;
+ int flags, ret = 0;
+ static uint64_t seq_iter;
+
+ seq_iter++;
if (version_id< 3 || version_id> 4) {
- return -EINVAL;
+ ret = -EINVAL;
+ goto done;
}
do {
+ void *host;
addr = qemu_get_be64(f);
flags = addr& ~TARGET_PAGE_MASK;
@@ -346,7 +611,8 @@ int ram_load(QEMUFile *f, void *opaque, int version_id)
if (flags& RAM_SAVE_FLAG_MEM_SIZE) {
if (version_id == 3) {
if (addr != ram_bytes_total()) {
- return -EINVAL;
+ ret = -EINVAL;
+ goto done;
}
} else {
/* Synchronize RAM block list */
@@ -365,8 +631,10 @@ int ram_load(QEMUFile *f, void *opaque, int version_id)
QLIST_FOREACH(block,&ram_list.blocks, next) {
if (!strncmp(id, block->idstr, sizeof(id))) {
- if (block->length != length)
- return -EINVAL;
+ if (block->length != length) {
+ ret = -EINVAL;
+ goto done;
+ }
break;
}
}
@@ -374,7 +642,8 @@ int ram_load(QEMUFile *f, void *opaque, int version_id)
if (!block) {
fprintf(stderr, "Unknown ramblock \"%s\", cannot "
"accept migration\n", id);
- return -EINVAL;
+ ret = -EINVAL;
+ goto done;
}
total_ram_bytes -= length;
@@ -383,17 +652,10 @@ int ram_load(QEMUFile *f, void *opaque, int version_id)
}
if (flags& RAM_SAVE_FLAG_COMPRESS) {
- void *host;
uint8_t ch;
- if (version_id == 3)
- host = qemu_get_ram_ptr(addr);
- else
- host = host_from_stream_offset(f, addr, flags);
- if (!host) {
- return -EINVAL;
- }
-
+ host = host_from_stream_offset_versioned(version_id,
+ f, addr, flags);
ch = qemu_get_byte(f);
memset(host, ch, TARGET_PAGE_SIZE);
#ifndef _WIN32
@@ -403,21 +665,28 @@ int ram_load(QEMUFile *f, void *opaque, int version_id)
}
#endif
} else if (flags& RAM_SAVE_FLAG_PAGE) {
- void *host;
-
- if (version_id == 3)
- host = qemu_get_ram_ptr(addr);
- else
- host = host_from_stream_offset(f, addr, flags);
-
+ host = host_from_stream_offset_versioned(version_id,
+ f, addr, flags);
qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
+ } else if (flags& RAM_SAVE_FLAG_XBZRLE) {
+ host = host_from_stream_offset_versioned(version_id,
+ f, addr, flags);
+ if (load_xbrle(f, addr, host)< 0) {
+ ret = -EINVAL;
+ goto done;
+ }
}
+
if (qemu_file_has_error(f)) {
- return -EIO;
+ ret = -EIO;
+ goto done;
}
} while (!(flags& RAM_SAVE_FLAG_EOS));
- return 0;
+done:
+ DPRINTF("Completed load of VM with exit code %d seq iteration %ld\n",
+ ret, seq_iter);
+ return ret;
}
void qemu_service_io(void)
diff --git a/block-migration.c b/block-migration.c
index 3e66f49..504df70 100644
--- a/block-migration.c
+++ b/block-migration.c
@@ -689,7 +689,8 @@ static int block_load(QEMUFile *f, void *opaque, int version_id)
return 0;
}
-static void block_set_params(int blk_enable, int shared_base, void *opaque)
+static void block_set_params(int blk_enable, int shared_base,
+ int use_xbrle, int64_t xbrle_cache_size, void *opaque)
{
block_mig_state.blk_enable = blk_enable;
block_mig_state.shared_base = shared_base;
diff --git a/hash.h b/hash.h
new file mode 100644
index 0000000..7109905
--- /dev/null
+++ b/hash.h
@@ -0,0 +1,72 @@
+#ifndef _LINUX_HASH_H
+#define _LINUX_HASH_H
+/* Fast hashing routine for ints, longs and pointers.
+ (C) 2002 William Lee Irwin III, IBM */
+
+/*
+ * Knuth recommends primes in approximately golden ratio to the maximum
+ * integer representable by a machine word for multiplicative hashing.
+ * Chuck Lever verified the effectiveness of this technique:
+ * http://www.citi.umich.edu/techreports/reports/citi-tr-00-1.pdf
+ *
+ * These primes are chosen to be bit-sparse, that is operations on
+ * them can use shifts and additions instead of multiplications for
+ * machines where multiplications are slow.
+ */
+
+typedef uint64_t u64;
+typedef uint32_t u32;
+#define BITS_PER_LONG TARGET_LONG_BITS
+
+/* 2^31 + 2^29 - 2^25 + 2^22 - 2^19 - 2^16 + 1 */
+#define GOLDEN_RATIO_PRIME_32 0x9e370001UL
+/* 2^63 + 2^61 - 2^57 + 2^54 - 2^51 - 2^18 + 1 */
+#define GOLDEN_RATIO_PRIME_64 0x9e37fffffffc0001UL
+
+#if BITS_PER_LONG == 32
+#define GOLDEN_RATIO_PRIME GOLDEN_RATIO_PRIME_32
+#define hash_long(val, bits) hash_32(val, bits)
+#elif BITS_PER_LONG == 64
+#define hash_long(val, bits) hash_64(val, bits)
+#define GOLDEN_RATIO_PRIME GOLDEN_RATIO_PRIME_64
+#else
+#error Wordsize not 32 or 64
+#endif
+
+static inline u64 hash_64(u64 val, unsigned int bits)
+{
+ u64 hash = val;
+
+ /* Sigh, gcc can't optimise this alone like it does for 32 bits. */
+ u64 n = hash;
+ n<<= 18;
+ hash -= n;
+ n<<= 33;
+ hash -= n;
+ n<<= 3;
+ hash += n;
+ n<<= 3;
+ hash -= n;
+ n<<= 4;
+ hash += n;
+ n<<= 2;
+ hash += n;
+
+ /* High bits are more random, so use them. */
+ return hash>> (64 - bits);
+}
+
+static inline u32 hash_32(u32 val, unsigned int bits)
+{
+ /* On some cpus multiply is faster, on others gcc will do shifts */
+ u32 hash = val * GOLDEN_RATIO_PRIME_32;
+
+ /* High bits are more random, so use them. */
+ return hash>> (32 - bits);
+}
+
+static inline unsigned long hash_ptr(void *ptr, unsigned int bits)
+{
+ return hash_long((unsigned long)ptr, bits);
+}
+#endif /* _LINUX_HASH_H */
diff --git a/hmp-commands.hx b/hmp-commands.hx
old mode 100644
new mode 100755
index e5585ba..e49d5be
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -717,24 +717,27 @@ ETEXI
{
.name = "migrate",
- .args_type = "detach:-d,blk:-b,inc:-i,uri:s",
- .params = "[-d] [-b] [-i] uri",
- .help = "migrate to URI (using -d to not wait for completion)"
- "\n\t\t\t -b for migration without shared storage with"
- " full copy of disk\n\t\t\t -i for migration without "
- "shared storage with incremental copy of disk "
- "(base image shared between src and destination)",
+ .args_type = "detach:-d,blk:-b,inc:-i,xbrle:-x,uri:s",
+ .params = "[-d] [-b] [-i] [-x] uri",
+ .help = "migrate to URI"
+ "\n\t -d to not wait for completion"
+ "\n\t -b for migration without shared storage with"
+ " full copy of disk"
+ "\n\t -i for migration without"
+ " shared storage with incremental copy of disk"
+ " (base image shared between source and destination)"
+ "\n\t -x to use XBRLE page delta compression",
.user_print = monitor_user_noop,
.mhandler.cmd_new = do_migrate,
},
-
STEXI
-@item migrate [-d] [-b] [-i] @var{uri}
+@item migrate [-d] [-b] [-i] [-x] @var{uri}
@findex migrate
Migrate to @var{uri} (using -d to not wait for completion).
-b for migration with full copy of disk
-i for migration with incremental copy of disk (base image is shared)
+ -x to use XBRLE page delta compression
ETEXI
{
@@ -753,10 +756,23 @@ Cancel the current VM migration.
ETEXI
{
+ .name = "migrate_set_cachesize",
+ .args_type = "value:s",
+ .params = "value",
+ .help = "set cache size (in MB) for XBRLE migrations",
+ .mhandler.cmd = do_migrate_set_cachesize,
+ },
+
+STEXI
+@item migrate_set_cachesize @var{value}
+Set cache size (in MB) for xbrle migrations.
+ETEXI
+
+ {
.name = "migrate_set_speed",
.args_type = "value:o",
.params = "value",
- .help = "set maximum speed (in bytes) for migrations. "
+ .help = "set maximum XBRLE cache size (in bytes) for migrations. "
"Defaults to MB if no size suffix is specified, ie. B/K/M/G/T",
.user_print = monitor_user_noop,
.mhandler.cmd_new = do_migrate_set_speed,
diff --git a/hw/hw.h b/hw/hw.h
index 9d2cfc2..aa336ec 100644
--- a/hw/hw.h
+++ b/hw/hw.h
@@ -239,7 +239,8 @@ static inline void qemu_get_sbe64s(QEMUFile *f, int64_t *pv)
int64_t qemu_ftell(QEMUFile *f);
int64_t qemu_fseek(QEMUFile *f, int64_t pos, int whence);
-typedef void SaveSetParamsHandler(int blk_enable, int shared, void * opaque);
+typedef void SaveSetParamsHandler(int blk_enable, int shared,
+ int use_xbrle, int64_t xbrle_cache_size, void *opaque);
typedef void SaveStateHandler(QEMUFile *f, void *opaque);
typedef int SaveLiveStateHandler(Monitor *mon, QEMUFile *f, int stage,
void *opaque);
diff --git a/lru.c b/lru.c
new file mode 100644
index 0000000..e7230d0
--- /dev/null
+++ b/lru.c
@@ -0,0 +1,142 @@
+#include<assert.h>
+#include<math.h>
+#include "qemu-common.h"
+#include "qemu-queue.h"
+#include "host-utils.h"
+#include "lru.h"
+#include "hash.h"
+
+typedef struct CacheItem {
+ ram_addr_t it_addr;
+ uint8_t *it_data;
+ lru_free_cb_t it_free;
+ QCIRCLEQ_ENTRY(CacheItem) it_lru_next;
+ QCIRCLEQ_ENTRY(CacheItem) it_bucket_next;
+} CacheItem;
+
+typedef QCIRCLEQ_HEAD(, CacheItem) CacheBucket;
+static CacheBucket *page_hash;
+static int64_t cache_table_size;
+static uint64_t cache_max_items;
+static int64_t cache_num_items;
+static uint8_t cache_hash_bits;
+
+static QCIRCLEQ_HEAD(page_lru, CacheItem) page_lru;
+
+static uint64_t next_pow_of_2(uint64_t v)
+{
+ v--;
+ v |= v>> 1;
+ v |= v>> 2;
+ v |= v>> 4;
+ v |= v>> 8;
+ v |= v>> 16;
+ v |= v>> 32;
+ v++;
+ return v;
+}
+
+void lru_init(int64_t max_items, void *param)
+{
+ int i;
+
+ cache_num_items = 0;
+ cache_max_items = max_items;
+ /* add 20% to table size to reduce collisions */
+ cache_table_size = next_pow_of_2(1.2 * max_items);
+ cache_hash_bits = ctz64(cache_table_size) - 1;
+
+ QCIRCLEQ_INIT(&page_lru);
+
+ page_hash = qemu_mallocz(sizeof(CacheBucket) * cache_table_size);
+ assert(page_hash);
+ for (i = 0; i< cache_table_size; i++) {
+ QCIRCLEQ_INIT(&page_hash[i]);
+ }
+}
+
+static CacheBucket *page_bucket_list(ram_addr_t addr)
+{
+ return&page_hash[hash_long(addr, cache_hash_bits)];
+}
+
+static void do_lru_remove(CacheItem *it)
+{
+ assert(it);
+
+ QCIRCLEQ_REMOVE(&page_lru, it, it_lru_next);
+ QCIRCLEQ_REMOVE(page_bucket_list(it->it_addr), it, it_bucket_next);
+ if (it->it_free) {
+ (*it->it_free)(it->it_data);
+ }
+ qemu_free(it);
+ cache_num_items--;
+}
+
+static int do_lru_remove_first(void)
+{
+ CacheItem *first;
+
+ if (QCIRCLEQ_EMPTY(&page_lru)) {
+ return -1;
+ }
+ first = QCIRCLEQ_FIRST(&page_lru);
+ do_lru_remove(first);
+ return 0;
+}
+
+
+void lru_fini(void)
+{
+ while (!do_lru_remove_first()) {
+ }
+ qemu_free(page_hash);
+}
+
+static CacheItem *do_lru_lookup(ram_addr_t addr)
+{
+ CacheBucket *head = page_bucket_list(addr);
+ CacheItem *it;
+
+ if (QCIRCLEQ_EMPTY(head)) {
+ return NULL;
+ }
+ QCIRCLEQ_FOREACH(it, head, it_bucket_next) {
+ if (addr == it->it_addr) {
+ return it;
+ }
+ }
+ return NULL;
+}
+
+uint8_t *lru_lookup(ram_addr_t addr)
+{
+ CacheItem *it = do_lru_lookup(addr);
+ return it ? it->it_data : NULL;
+}
+
+void lru_insert(ram_addr_t addr, uint8_t *data, lru_free_cb_t free_cb)
+{
+ CacheItem *it;
+
+ /* remove old if item exists */
+ it = do_lru_lookup(addr);
+ if (it) {
+ do_lru_remove(it);
+ }
+
+ /* evict LRU if require free space */
+ if (cache_num_items == cache_max_items) {
+ do_lru_remove_first();
+ }
+
+ /* add new entry */
+ it = qemu_mallocz(sizeof(*it));
+ it->it_addr = addr;
+ it->it_data = data;
+ it->it_free = free_cb;
+ QCIRCLEQ_INSERT_HEAD(page_bucket_list(addr), it, it_bucket_next);
+ QCIRCLEQ_INSERT_TAIL(&page_lru, it, it_lru_next);
+ cache_num_items++;
+}
+
diff --git a/lru.h b/lru.h
new file mode 100644
index 0000000..6c70095
--- /dev/null
+++ b/lru.h
@@ -0,0 +1,13 @@
+#ifndef _LRU_H_
+#define _LRU_H_
+
+#include<unistd.h>
+#include<stdint.h>
+#include "cpu-all.h"
+typedef void (*lru_free_cb_t)(void *);
+void lru_init(ssize_t num_items, void *param);
+void lru_fini(void);
+void lru_insert(ram_addr_t id, uint8_t *pdata, lru_free_cb_t free_cb);
+uint8_t *lru_lookup(ram_addr_t addr);
+#endif
+
diff --git a/migration-exec.c b/migration-exec.c
index 14718dd..fe8254a 100644
--- a/migration-exec.c
+++ b/migration-exec.c
@@ -67,7 +67,9 @@ MigrationState *exec_start_outgoing_migration(Monitor *mon,
int64_t bandwidth_limit,
int detach,
int blk,
- int inc)
+ int inc,
+ int use_xbrle,
+ int64_t xbrle_cache_size)
{
FdMigrationState *s;
FILE *f;
@@ -99,6 +101,8 @@ MigrationState *exec_start_outgoing_migration(Monitor *mon,
s->mig_state.blk = blk;
s->mig_state.shared = inc;
+ s->mig_state.use_xbrle = use_xbrle;
+ s->mig_state.xbrle_cache_size = xbrle_cache_size;
s->state = MIG_STATE_ACTIVE;
s->mon = NULL;
diff --git a/migration-fd.c b/migration-fd.c
index 6d14505..4a1ddbd 100644
--- a/migration-fd.c
+++ b/migration-fd.c
@@ -56,7 +56,9 @@ MigrationState *fd_start_outgoing_migration(Monitor *mon,
int64_t bandwidth_limit,
int detach,
int blk,
- int inc)
+ int inc,
+ int use_xbrle,
+ int64_t xbrle_cache_size)
{
FdMigrationState *s;
@@ -82,6 +84,8 @@ MigrationState *fd_start_outgoing_migration(Monitor *mon,
s->mig_state.blk = blk;
s->mig_state.shared = inc;
+ s->mig_state.use_xbrle = use_xbrle;
+ s->mig_state.xbrle_cache_size = xbrle_cache_size;
s->state = MIG_STATE_ACTIVE;
s->mon = NULL;
diff --git a/migration-tcp.c b/migration-tcp.c
index b55f419..4ca5bf6 100644
--- a/migration-tcp.c
+++ b/migration-tcp.c
@@ -81,7 +81,9 @@ MigrationState *tcp_start_outgoing_migration(Monitor *mon,
int64_t bandwidth_limit,
int detach,
int blk,
- int inc)
+ int inc,
+ int use_xbrle,
+ int64_t xbrle_cache_size)
{
struct sockaddr_in addr;
FdMigrationState *s;
@@ -101,6 +103,8 @@ MigrationState *tcp_start_outgoing_migration(Monitor *mon,
s->mig_state.blk = blk;
s->mig_state.shared = inc;
+ s->mig_state.use_xbrle = use_xbrle;
+ s->mig_state.xbrle_cache_size = xbrle_cache_size;
s->state = MIG_STATE_ACTIVE;
s->mon = NULL;
diff --git a/migration-unix.c b/migration-unix.c
index 57232c0..0813902 100644
--- a/migration-unix.c
+++ b/migration-unix.c
@@ -80,7 +80,9 @@ MigrationState *unix_start_outgoing_migration(Monitor *mon,
int64_t bandwidth_limit,
int detach,
int blk,
- int inc)
+ int inc,
+ int use_xbrle,
+ int64_t xbrle_cache_size)
{
FdMigrationState *s;
struct sockaddr_un addr;
@@ -100,6 +102,8 @@ MigrationState *unix_start_outgoing_migration(Monitor *mon,
s->mig_state.blk = blk;
s->mig_state.shared = inc;
+ s->mig_state.use_xbrle = use_xbrle;
+ s->mig_state.xbrle_cache_size = xbrle_cache_size;
s->state = MIG_STATE_ACTIVE;
s->mon = NULL;
diff --git a/migration.c b/migration.c
old mode 100644
new mode 100755
index 9ee8b17..ccacf81
--- a/migration.c
+++ b/migration.c
@@ -34,6 +34,11 @@
/* Migration speed throttling */
static uint32_t max_throttle = (32<< 20);
+/* Migration XBRLE cache size */
+#define DEFAULT_MIGRATE_CACHE_SIZE (64 * 1024 * 1024)
+
+static int64_t migrate_cache_size = DEFAULT_MIGRATE_CACHE_SIZE;
+
static MigrationState *current_migration;
int qemu_start_incoming_migration(const char *uri)
@@ -80,6 +85,7 @@ int do_migrate(Monitor *mon, const QDict *qdict, QObject **ret_data)
int detach = qdict_get_try_bool(qdict, "detach", 0);
int blk = qdict_get_try_bool(qdict, "blk", 0);
int inc = qdict_get_try_bool(qdict, "inc", 0);
+ int use_xbrle = qdict_get_try_bool(qdict, "xbrle", 0);
const char *uri = qdict_get_str(qdict, "uri");
if (current_migration&&
@@ -90,17 +96,21 @@ int do_migrate(Monitor *mon, const QDict *qdict, QObject **ret_data)
if (strstart(uri, "tcp:",&p)) {
s = tcp_start_outgoing_migration(mon, p, max_throttle, detach,
- blk, inc);
+ blk, inc, use_xbrle,
+ migrate_cache_size);
#if !defined(WIN32)
} else if (strstart(uri, "exec:",&p)) {
s = exec_start_outgoing_migration(mon, p, max_throttle, detach,
- blk, inc);
+ blk, inc, use_xbrle,
+ migrate_cache_size);
} else if (strstart(uri, "unix:",&p)) {
s = unix_start_outgoing_migration(mon, p, max_throttle, detach,
- blk, inc);
+ blk, inc, use_xbrle,
+ migrate_cache_size);
} else if (strstart(uri, "fd:",&p)) {
s = fd_start_outgoing_migration(mon, p, max_throttle, detach,
- blk, inc);
+ blk, inc, use_xbrle,
+ migrate_cache_size);
#endif
} else {
monitor_printf(mon, "unknown migration protocol: %s\n", uri);
@@ -185,6 +195,36 @@ static void migrate_print_status(Monitor *mon, const char *name,
qdict_get_int(qdict, "total")>> 10);
}
+static void migrate_print_ram_status(Monitor *mon, const char *name,
+ const QDict *status_dict)
+{
+ QDict *qdict;
+ uint64_t overflow, cache_hit, cache_lookup;
+
+ qdict = qobject_to_qdict(qdict_get(status_dict, name));
+
+ monitor_printf(mon, "transferred %s: %" PRIu64 " kbytes\n", name,
+ qdict_get_int(qdict, "bytes")>> 10);
+ monitor_printf(mon, "transferred %s: %" PRIu64 " pages\n", name,
+ qdict_get_int(qdict, "pages"));
+ overflow = qdict_get_int(qdict, "overflow");
+ if (overflow> 0) {
+ monitor_printf(mon, "overflow %s: %" PRIu64 " pages\n", name,
+ overflow);
+ }
+ cache_hit = qdict_get_int(qdict, "cache-hit");
+ if (cache_hit> 0) {
+ monitor_printf(mon, "cache-hit %s: %" PRIu64 " pages\n", name,
+ cache_hit);
+ }
+ cache_lookup = qdict_get_int(qdict, "cache-lookup");
+ if (cache_lookup> 0) {
+ monitor_printf(mon, "cache-lookup %s: %" PRIu64 " pages\n", name,
+ cache_lookup);
+ }
+
+}
+
void do_info_migrate_print(Monitor *mon, const QObject *data)
{
QDict *qdict;
@@ -198,6 +238,18 @@ void do_info_migrate_print(Monitor *mon, const QObject *data)
migrate_print_status(mon, "ram", qdict);
}
+ if (qdict_haskey(qdict, "ram-duplicate")) {
+ migrate_print_ram_status(mon, "ram-duplicate", qdict);
+ }
+
+ if (qdict_haskey(qdict, "ram-normal")) {
+ migrate_print_ram_status(mon, "ram-normal", qdict);
+ }
+
+ if (qdict_haskey(qdict, "ram-xbrle")) {
+ migrate_print_ram_status(mon, "ram-xbrle", qdict);
+ }
+
if (qdict_haskey(qdict, "disk")) {
migrate_print_status(mon, "disk", qdict);
}
@@ -214,6 +266,23 @@ static void migrate_put_status(QDict *qdict, const char *name,
qdict_put_obj(qdict, name, obj);
}
+static void migrate_put_ram_status(QDict *qdict, const char *name,
+ uint64_t bytes, uint64_t pages,
+ uint64_t overflow, uint64_t cache_hit,
+ uint64_t cache_lookup)
+{
+ QObject *obj;
+
+ obj = qobject_from_jsonf("{ 'bytes': %" PRId64 ", "
+ "'pages': %" PRId64 ", "
+ "'overflow': %" PRId64 ", "
+ "'cache-hit': %" PRId64 ", "
+ "'cache-lookup': %" PRId64 " }",
+ bytes, pages, overflow, cache_hit,
+ cache_lookup);
+ qdict_put_obj(qdict, name, obj);
+}
+
void do_info_migrate(Monitor *mon, QObject **ret_data)
{
QDict *qdict;
@@ -228,6 +297,21 @@ void do_info_migrate(Monitor *mon, QObject **ret_data)
migrate_put_status(qdict, "ram", ram_bytes_transferred(),
ram_bytes_remaining(), ram_bytes_total());
+ if (s->use_xbrle) {
+ migrate_put_ram_status(qdict, "ram-duplicate",
+ dup_mig_bytes_transferred(),
+ dup_mig_pages_transferred(), 0, 0, 0);
+ migrate_put_ram_status(qdict, "ram-normal",
+ norm_mig_bytes_transferred(),
+ norm_mig_pages_transferred(), 0, 0, 0);
+ migrate_put_ram_status(qdict, "ram-xbrle",
+ xbrle_mig_bytes_transferred(),
+ xbrle_mig_pages_transferred(),
+ xbrle_mig_pages_overflow(),
+ xbrle_mig_pages_cache_hit(),
+ xbrle_mig_pages_cache_lookup());
+ }
+
if (blk_mig_active()) {
migrate_put_status(qdict, "disk", blk_mig_bytes_transferred(),
blk_mig_bytes_remaining(),
@@ -341,7 +425,8 @@ void migrate_fd_connect(FdMigrationState *s)
DPRINTF("beginning savevm\n");
ret = qemu_savevm_state_begin(s->mon, s->file, s->mig_state.blk,
- s->mig_state.shared);
+ s->mig_state.shared, s->mig_state.use_xbrle,
+ s->mig_state.xbrle_cache_size);
if (ret< 0) {
DPRINTF("failed, %d\n", ret);
migrate_fd_error(s);
@@ -448,3 +533,27 @@ int migrate_fd_close(void *opaque)
qemu_set_fd_handler2(s->fd, NULL, NULL, NULL, NULL);
return s->close(s);
}
+
+void do_migrate_set_cachesize(Monitor *mon, const QDict *qdict)
+{
+ ssize_t bytes;
+ const char *value = qdict_get_str(qdict, "value");
+
+ bytes = strtosz(value, NULL);
+ if (bytes< 0) {
+ monitor_printf(mon, "invalid cache size: %s\n", value);
+ return;
+ }
+
+ /* On 32-bit hosts, QEMU is limited by virtual address space */
+ if (bytes> (2047<< 20)&& HOST_LONG_BITS == 32) {
+ monitor_printf(mon, "cache can't exceed 2047 MB RAM limit on host\n");
+ return;
+ }
+ if (bytes != (uint64_t) bytes) {
+ monitor_printf(mon, "cache size too large\n");
+ return;
+ }
+ migrate_cache_size = bytes;
+}
+
diff --git a/migration.h b/migration.h
index d13ed4f..6dc0543 100644
--- a/migration.h
+++ b/migration.h
@@ -32,6 +32,8 @@ struct MigrationState
void (*release)(MigrationState *s);
int blk;
int shared;
+ int use_xbrle;
+ int64_t xbrle_cache_size;
};
typedef struct FdMigrationState FdMigrationState;
@@ -76,7 +78,9 @@ MigrationState *exec_start_outgoing_migration(Monitor *mon,
int64_t bandwidth_limit,
int detach,
int blk,
- int inc);
+ int inc,
+ int use_xbrle,
+ int64_t xbrle_cache_size);
int tcp_start_incoming_migration(const char *host_port);
@@ -85,7 +89,9 @@ MigrationState *tcp_start_outgoing_migration(Monitor *mon,
int64_t bandwidth_limit,
int detach,
int blk,
- int inc);
+ int inc,
+ int use_xbrle,
+ int64_t xbrle_cache_size);
int unix_start_incoming_migration(const char *path);
@@ -94,7 +100,9 @@ MigrationState *unix_start_outgoing_migration(Monitor *mon,
int64_t bandwidth_limit,
int detach,
int blk,
- int inc);
+ int inc,
+ int use_xbrle,
+ int64_t xbrle_cache_size);
int fd_start_incoming_migration(const char *path);
@@ -103,7 +111,9 @@ MigrationState *fd_start_outgoing_migration(Monitor *mon,
int64_t bandwidth_limit,
int detach,
int blk,
- int inc);
+ int inc,
+ int use_xbrle,
+ int64_t xbrle_cache_size);
void migrate_fd_monitor_suspend(FdMigrationState *s, Monitor *mon);
@@ -134,4 +144,11 @@ static inline FdMigrationState *migrate_to_fms(MigrationState *mig_state)
return container_of(mig_state, FdMigrationState, mig_state);
}
+void do_migrate_set_cachesize(Monitor *mon, const QDict *qdict);
+
+void arch_set_params(int blk_enable, int shared_base,
+ int use_xbrle, int64_t xbrle_cache_size, void *opaque);
+
+int xbrle_mig_active(void);
+
#endif
diff --git a/qmp-commands.hx b/qmp-commands.hx
index 793cf1c..8fbe64b 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -431,13 +431,16 @@ EQMP
{
.name = "migrate",
- .args_type = "detach:-d,blk:-b,inc:-i,uri:s",
- .params = "[-d] [-b] [-i] uri",
- .help = "migrate to URI (using -d to not wait for completion)"
- "\n\t\t\t -b for migration without shared storage with"
- " full copy of disk\n\t\t\t -i for migration without "
- "shared storage with incremental copy of disk "
- "(base image shared between src and destination)",
+ .args_type = "detach:-d,blk:-b,inc:-i,xbrle:-x,uri:s",
+ .params = "[-d] [-b] [-i] [-x] uri",
+ .help = "migrate to URI"
+ "\n\t -d to not wait for completion"
+ "\n\t -b for migration without shared storage with"
+ " full copy of disk"
+ "\n\t -i for migration without"
+ " shared storage with incremental copy of disk"
+ " (base image shared between source and destination)"
+ "\n\t -x to use XBRLE page delta compression",
.user_print = monitor_user_noop,
.mhandler.cmd_new = do_migrate,
},
@@ -453,6 +456,7 @@ Arguments:
- "blk": block migration, full disk copy (json-bool, optional)
- "inc": incremental disk copy (json-bool, optional)
- "uri": Destination URI (json-string)
+- "xbrle": to use XBRLE page delta compression
Example:
@@ -494,6 +498,31 @@ Example:
EQMP
{
+ .name = "migrate_set_cachesize",
+ .args_type = "value:s",
+ .params = "value",
+ .help = "set cache size (in MB) for xbrle migrations",
+ .mhandler.cmd = do_migrate_set_cachesize,
+ },
+
+SQMP
+migrate_set_cachesize
+---------------------
+
+Set cache size to be used by XBRLE migration
+
+Arguments:
+
+- "value": cache size in bytes (json-number)
+
+Example:
+
+-> { "execute": "migrate_set_cachesize", "arguments": { "value": 500M } }
+<- { "return": {} }
+
+EQMP
+
+ {
.name = "migrate_set_speed",
.args_type = "value:f",
.params = "value",
diff --git a/savevm.c b/savevm.c
index 4e49765..93b512b 100644
--- a/savevm.c
+++ b/savevm.c
@@ -1141,7 +1141,8 @@ int register_savevm(DeviceState *dev,
void *opaque)
{
return register_savevm_live(dev, idstr, instance_id, version_id,
- NULL, NULL, save_state, load_state, opaque);
+ arch_set_params, NULL, save_state,
+ load_state, opaque);
}
void unregister_savevm(DeviceState *dev, const char *idstr, void *opaque)
@@ -1428,15 +1429,17 @@ static int vmstate_save(QEMUFile *f, SaveStateEntry *se)
#define QEMU_VM_SUBSECTION 0x05
int qemu_savevm_state_begin(Monitor *mon, QEMUFile *f, int blk_enable,
- int shared)
+ int shared, int use_xbrle,
+ int64_t xbrle_cache_size)
{
SaveStateEntry *se;
QTAILQ_FOREACH(se,&savevm_handlers, entry) {
if(se->set_params == NULL) {
continue;
- }
- se->set_params(blk_enable, shared, se->opaque);
+ }
+ se->set_params(blk_enable, shared, use_xbrle, xbrle_cache_size,
+ se->opaque);
}
qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
@@ -1577,7 +1580,7 @@ static int qemu_savevm_state(Monitor *mon, QEMUFile *f)
bdrv_flush_all();
- ret = qemu_savevm_state_begin(mon, f, 0, 0);
+ ret = qemu_savevm_state_begin(mon, f, 0, 0, 0, 0);
if (ret< 0)
goto out;
diff --git a/sysemu.h b/sysemu.h
index b81a70e..eb53bf7 100644
--- a/sysemu.h
+++ b/sysemu.h
@@ -44,6 +44,16 @@ uint64_t ram_bytes_remaining(void);
uint64_t ram_bytes_transferred(void);
uint64_t ram_bytes_total(void);
+uint64_t dup_mig_bytes_transferred(void);
+uint64_t dup_mig_pages_transferred(void);
+uint64_t norm_mig_bytes_transferred(void);
+uint64_t norm_mig_pages_transferred(void);
+uint64_t xbrle_mig_bytes_transferred(void);
+uint64_t xbrle_mig_pages_transferred(void);
+uint64_t xbrle_mig_pages_overflow(void);
+uint64_t xbrle_mig_pages_cache_lookup(void);
+uint64_t xbrle_mig_pages_cache_hit(void);
+
int64_t cpu_get_ticks(void);
void cpu_enable_ticks(void);
void cpu_disable_ticks(void);
@@ -74,7 +84,8 @@ void qemu_announce_self(void);
void main_loop_wait(int nonblocking);
int qemu_savevm_state_begin(Monitor *mon, QEMUFile *f, int blk_enable,
- int shared);
+ int shared, int use_xbrle,
+ int64_t xbrle_cache_size);
int qemu_savevm_state_iterate(Monitor *mon, QEMUFile *f);
int qemu_savevm_state_complete(Monitor *mon, QEMUFile *f);
void qemu_savevm_state_cancel(Monitor *mon, QEMUFile *f);
diff --git a/xbzrle.c b/xbzrle.c
new file mode 100644
index 0000000..e9285e0
--- /dev/null
+++ b/xbzrle.c
@@ -0,0 +1,126 @@
+#include<stdint.h>
+#include<string.h>
+#include<assert.h>
+#include "cpu-all.h"
+#include "xbzrle.h"
+
+typedef struct {
+ uint64_t c;
+ uint64_t num;
+} zero_encoding_t;
+
+typedef struct {
+ uint64_t c;
+} char_encoding_t;
+
+static int rle_encode(uint64_t *in, int slen, uint8_t *out, const int dlen)
+{
+ int dl = 0;
+ uint64_t cp = 0, c, run_len = 0;
+
+ if (slen<= 0)
+ return -1;
+
+ while (1) {
+ if (!slen)
+ break;
+ c = *in++;
+ slen--;
+ if (!(cp || c)) {
+ run_len++;
+ } else if (!cp) {
+ ((zero_encoding_t *)out)->c = cp;
+ ((zero_encoding_t *)out)->num = run_len;
+ dl += sizeof(zero_encoding_t);
+ out += sizeof(zero_encoding_t);
+ run_len = 1;
+ } else {
+ ((char_encoding_t *)out)->c = cp;
+ dl += sizeof(char_encoding_t);
+ out += sizeof(char_encoding_t);
+ }
+ cp = c;
+ }
+
+ if (!cp) {
+ ((zero_encoding_t *)out)->c = cp;
+ ((zero_encoding_t *)out)->num = run_len;
+ dl += sizeof(zero_encoding_t);
+ out += sizeof(zero_encoding_t);
+ } else {
+ ((char_encoding_t *)out)->c = cp;
+ dl += sizeof(char_encoding_t);
+ out += sizeof(char_encoding_t);
+ }
+ return dl;
+}
+
+static int rle_decode(const uint8_t *in, int slen, uint64_t *out, int dlen)
+{
+ int tb = 0;
+ uint64_t run_len, c;
+
+ while (slen> 0) {
+ c = ((char_encoding_t *) in)->c;
+ if (c) {
+ slen -= sizeof(char_encoding_t);
+ in += sizeof(char_encoding_t);
+ *out++ = c;
+ tb++;
+ continue;
+ }
+ run_len = ((zero_encoding_t *) in)->num;
+ slen -= sizeof(zero_encoding_t);
+ in += sizeof(zero_encoding_t);
+ while (run_len--> 0) {
+ *out++ = c;
+ tb++;
+ }
+ }
+ return tb;
+}
+
+static void xor_encode_word(uint8_t *dst, const uint8_t *src1,
+ const uint8_t *src2)
+{
+ int len = TARGET_PAGE_SIZE / sizeof (uint64_t);
+ uint64_t *dstw = (uint64_t *) dst;
+ const uint64_t *srcw1 = (const uint64_t *) src1;
+ const uint64_t *srcw2 = (const uint64_t *) src2;
+
+ while (len--) {
+ *dstw++ = *srcw1++ ^ *srcw2++;
+ }
+}
+
+int xbzrle_encode(uint8_t *xbzrle, const uint8_t *old, const uint8_t *curr,
+ const size_t max_compressed_len)
+{
+ int compressed_len;
+ uint8_t xor_buf[TARGET_PAGE_SIZE];
+ uint8_t work_buf[TARGET_PAGE_SIZE * 2]; /* worst case xbzrle is 150% */
+
+ xor_encode_word(xor_buf, old, curr);
+ compressed_len = rle_encode((uint64_t *)xor_buf,
+ sizeof(xor_buf)/sizeof(uint64_t), work_buf,
+ sizeof(work_buf));
+ if (compressed_len> max_compressed_len) {
+ return -1;
+ }
+ memcpy(xbzrle, work_buf, compressed_len);
+ return compressed_len;
+}
+
+int xbzrle_decode(uint8_t *curr, const uint8_t *old, const uint8_t *xbrle,
+ const size_t compressed_len)
+{
+ uint8_t xor_buf[TARGET_PAGE_SIZE];
+
+ int len = rle_decode(xbrle, compressed_len,
+ (uint64_t *)xor_buf, sizeof(xor_buf)/sizeof(uint64_t));
+ if (len< 0) {
+ return len;
+ }
+ xor_encode_word(curr, old, xor_buf);
+ return len * sizeof(uint64_t);
+}
diff --git a/xbzrle.h b/xbzrle.h
new file mode 100644
index 0000000..5d625a0
--- /dev/null
+++ b/xbzrle.h
@@ -0,0 +1,12 @@
+#ifndef _XBZRLE_H_
+#define _XBZRLE_H_
+
+#include<stdio.h>
+
+int xbzrle_encode(uint8_t *xbrle, const uint8_t *old, const uint8_t *curr,
+ const size_t len);
+int xbzrle_decode(uint8_t *curr, const uint8_t *old, const uint8_t *xbrle,
+ const size_t len);
+
+#endif
+