Re: [RFC PATCH v3 3/4] lock to protect memslots

Paolo Bonzini <pbonzini@xxxxxxxxxx> · Mon, 15 Aug 2011 07:14:16 -0700

On 08/15/2011 12:26 AM, Marcelo Tosatti wrote:
> Actually the previous patchset does not traverse the ramlist without
> qemu_mutex locked, which is safe versus the most-recently-used-block
> optimization.

Actually it does:

     bytes_transferred_last = bytes_transferred;
     bwidth = qemu_get_clock_ns(rt_clock);
 
+    if (stage != 3) {
+        qemu_mutex_lock_ramlist();
+        qemu_mutex_unlock_iothread();
+    }
+
     while (!qemu_file_rate_limit(f)) {
         int bytes_sent;
 
         /* ram_save_block does traverse memory.  */
         bytes_sent = ram_save_block(f);
         bytes_transferred += bytes_sent;
         if (bytes_sent == 0) { /* no more blocks */
             break;
         }
     }
 
+    if (stage != 3) {
+        qemu_mutex_lock_iothread();
+        qemu_mutex_unlock_ramlist();
+    }
+
     bwidth = qemu_get_clock_ns(rt_clock) - bwidth;
     bwidth = (bytes_transferred - bytes_transferred_last) / bwidth;
 

What Umesh is doing is using "either ramlist mutex or iothread mutex" when reading
the ramlist, and "both" when writing the ramlist; similar to rwlocks done with a
regular mutex per CPU---clever!  So this:

+                qemu_mutex_lock_ramlist();
                 QLIST_REMOVE(block, next);
                 QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
+                qemu_mutex_unlock_ramlist();

is effectively upgrading the lock from read-side to write-side, assuming that
qemu_get_ram_ptr is never called from the migration thread (which is true).

However, I propose that you put the MRU order in a separate list.  You would still
need two locks: the IO thread lock to protect the new list, a new lock to protect
the other fields in the ram_list.  For simplicity you may skip the new lock if you
assume that the migration and I/O threads never modify the list concurrently,
which is true.  And more importantly, the MRU and migration code absolutely do not
affect each other, because indeed the migration thread does not do MRU accesses.
See the attachment for an untested patch.

Paolo
>From 8579b821a2c7c4da55a4208c5df3c86e8ce2cc87 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@xxxxxxxxxx>
Date: Fri, 12 Aug 2011 13:08:04 +0200
Subject: [PATCH] split MRU ram list

Outside the execution threads the normal, non-MRU-ized order of
the RAM blocks should always be enough.  So manage two separate
lists, which will have separate locking rules.

Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx>
---
 cpu-all.h |    4 +++-
 exec.c    |   16 +++++++++++-----
 2 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/cpu-all.h b/cpu-all.h
index f5c82cd..083d9e6 100644
--- a/cpu-all.h
+++ b/cpu-all.h
@@ -479,8 +479,9 @@ typedef struct RAMBlock {
     ram_addr_t offset;
     ram_addr_t length;
     uint32_t flags;
-    char idstr[256];
     QLIST_ENTRY(RAMBlock) next;
+    QLIST_ENTRY(RAMBlock) next_mru;
+    char idstr[256];
 #if defined(__linux__) && !defined(TARGET_S390X)
     int fd;
 #endif
@@ -489,6 +490,7 @@ typedef struct RAMBlock {
 typedef struct RAMList {
     uint8_t *phys_dirty;
     QLIST_HEAD(, RAMBlock) blocks;
+    QLIST_HEAD(, RAMBlock) blocks_mru;
 } RAMList;
 extern RAMList ram_list;
 
diff --git a/exec.c b/exec.c
index 253f42c..be0f37e 100644
--- a/exec.c
+++ b/exec.c
@@ -110,7 +110,10 @@ static uint8_t *code_gen_ptr;
 int phys_ram_fd;
 static int in_migration;
 
-RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
+RAMList ram_list = {
+    .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks),
+    .blocks_mru = QLIST_HEAD_INITIALIZER(ram_list.blocks_mru)
+};
 
 static MemoryRegion *system_memory;
 
@@ -2972,6 +2975,7 @@ ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name,
     new_block->length = size;
 
     QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
+    QLIST_INSERT_HEAD(&ram_list.blocks_mru, new_block, next_mru);
 
     ram_list.phys_dirty = qemu_realloc(ram_list.phys_dirty,
                                        last_ram_offset() >> TARGET_PAGE_BITS);
@@ -2996,6 +3000,7 @@ void qemu_ram_free_from_ptr(ram_addr_t addr)
     QLIST_FOREACH(block, &ram_list.blocks, next) {
         if (addr == block->offset) {
             QLIST_REMOVE(block, next);
+            QLIST_REMOVE(block, next_mru);
             qemu_free(block);
             return;
         }
@@ -3009,6 +3014,7 @@ void qemu_ram_free(ram_addr_t addr)
     QLIST_FOREACH(block, &ram_list.blocks, next) {
         if (addr == block->offset) {
             QLIST_REMOVE(block, next);
+            QLIST_REMOVE(block, next_mru);
             if (block->flags & RAM_PREALLOC_MASK) {
                 ;
             } else if (mem_path) {
@@ -3113,12 +3119,12 @@ void *qemu_get_ram_ptr(ram_addr_t addr)
 {
     RAMBlock *block;
 
-    QLIST_FOREACH(block, &ram_list.blocks, next) {
+    QLIST_FOREACH(block, &ram_list.blocks_mru, next_mru) {
         if (addr - block->offset < block->length) {
             /* Move this entry to to start of the list.  */
             if (block != QLIST_FIRST(&ram_list.blocks)) {
-                QLIST_REMOVE(block, next);
-                QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
+                QLIST_REMOVE(block, next_mru);
+                QLIST_INSERT_HEAD(&ram_list.blocks_mru, block, next_mru);
             }
             if (xen_enabled()) {
                 /* We need to check if the requested address is in the RAM
@@ -3213,7 +3219,7 @@ int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
         return 0;
     }
 
-    QLIST_FOREACH(block, &ram_list.blocks, next) {
+    QLIST_FOREACH(block, &ram_list.blocks_mru, next_mru) {
         /* This case append when the block is not mapped. */
         if (block->host == NULL) {
             continue;
-- 
1.7.6