[PATCH v2 35/41] postcopy: introduce helper functions for postcopy

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This patch introduces helper function for postcopy to access
umem char device and to communicate between incoming-qemu and umemd.

Signed-off-by: Isaku Yamahata <yamahata@xxxxxxxxxxxxx>
---
changes v1 -> v2:
- code simplification
- make fault trigger more robust
- introduce struct umem_pages
---
 umem.c |  364 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 umem.h |  101 ++++++++++++++++++
 2 files changed, 465 insertions(+), 0 deletions(-)
 create mode 100644 umem.c
 create mode 100644 umem.h

diff --git a/umem.c b/umem.c
new file mode 100644
index 0000000..64eaab5
--- /dev/null
+++ b/umem.c
@@ -0,0 +1,364 @@
+/*
+ * umem.c: user process backed memory module for postcopy livemigration
+ *
+ * Copyright (c) 2011
+ * National Institute of Advanced Industrial Science and Technology
+ *
+ * https://sites.google.com/site/grivonhome/quick-kvm-migration
+ * Author: Isaku Yamahata <yamahata at valinux co jp>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+
+#include <linux/umem.h>
+
+#include "bitops.h"
+#include "sysemu.h"
+#include "hw/hw.h"
+#include "umem.h"
+
+//#define DEBUG_UMEM
+#ifdef DEBUG_UMEM
+#include <sys/syscall.h>
+#define DPRINTF(format, ...)                                            \
+    do {                                                                \
+        printf("%d:%ld %s:%d "format, getpid(), syscall(SYS_gettid),    \
+               __func__, __LINE__, ## __VA_ARGS__);                     \
+    } while (0)
+#else
+#define DPRINTF(format, ...)    do { } while (0)
+#endif
+
+#define DEV_UMEM        "/dev/umem"
+
+UMem *umem_new(void *hostp, size_t size)
+{
+    struct umem_init uinit = {
+        .size = size,
+    };
+    UMem *umem;
+
+    assert((size % getpagesize()) == 0);
+    umem = g_new(UMem, 1);
+    umem->fd = open(DEV_UMEM, O_RDWR);
+    if (umem->fd < 0) {
+        perror("can't open "DEV_UMEM);
+        abort();
+    }
+
+    if (ioctl(umem->fd, UMEM_INIT, &uinit) < 0) {
+        perror("UMEM_INIT");
+        abort();
+    }
+    if (ftruncate(uinit.shmem_fd, uinit.size) < 0) {
+        perror("truncate(\"shmem_fd\")");
+        abort();
+    }
+
+    umem->nbits = 0;
+    umem->nsets = 0;
+    umem->faulted = NULL;
+    umem->page_shift = ffs(getpagesize()) - 1;
+    umem->shmem_fd = uinit.shmem_fd;
+    umem->size = uinit.size;
+    umem->umem = mmap(hostp, size, PROT_EXEC | PROT_READ | PROT_WRITE,
+                      MAP_PRIVATE | MAP_FIXED, umem->fd, 0);
+    if (umem->umem == MAP_FAILED) {
+        perror("mmap(UMem) failed");
+        abort();
+    }
+    return umem;
+}
+
+void umem_destroy(UMem *umem)
+{
+    if (umem->fd != -1) {
+        close(umem->fd);
+    }
+    if (umem->shmem_fd != -1) {
+        close(umem->shmem_fd);
+    }
+    g_free(umem->faulted);
+    g_free(umem);
+}
+
+void umem_get_page_request(UMem *umem, struct umem_pages *page_request)
+{
+    ssize_t ret = read(umem->fd, page_request->pgoffs,
+                       page_request->nr * sizeof(page_request->pgoffs[0]));
+    if (ret < 0) {
+        perror("daemon: umem read");
+        abort();
+    }
+    page_request->nr = ret / sizeof(page_request->pgoffs[0]);
+}
+
+void umem_mark_page_cached(UMem *umem, struct umem_pages *page_cached)
+{
+    const void *buf = page_cached->pgoffs;
+    ssize_t left = page_cached->nr * sizeof(page_cached->pgoffs[0]);
+
+    while (left > 0) {
+        ssize_t ret = write(umem->fd, buf, left);
+        if (ret == -1) {
+            if (errno == EINTR)
+                continue;
+
+            perror("daemon: umem write");
+            abort();
+        }
+
+        left -= ret;
+        buf += ret;
+    }
+}
+
+void umem_unmap(UMem *umem)
+{
+    munmap(umem->umem, umem->size);
+    umem->umem = NULL;
+}
+
+void umem_close(UMem *umem)
+{
+    close(umem->fd);
+    umem->fd = -1;
+}
+
+void *umem_map_shmem(UMem *umem)
+{
+    umem->nbits = umem->size >> umem->page_shift;
+    umem->nsets = 0;
+    umem->faulted = g_new0(unsigned long, BITS_TO_LONGS(umem->nbits));
+
+    umem->shmem = mmap(NULL, umem->size, PROT_READ | PROT_WRITE, MAP_SHARED,
+                       umem->shmem_fd, 0);
+    if (umem->shmem == MAP_FAILED) {
+        perror("daemon: mmap(\"shmem\")");
+        abort();
+    }
+    return umem->shmem;
+}
+
+void umem_unmap_shmem(UMem *umem)
+{
+    munmap(umem->shmem, umem->size);
+    umem->shmem = NULL;
+}
+
+void umem_remove_shmem(UMem *umem, size_t offset, size_t size)
+{
+    int s = offset >> umem->page_shift;
+    int e = (offset + size) >> umem->page_shift;
+    int i;
+
+    for (i = s; i < e; i++) {
+        if (!test_and_set_bit(i, umem->faulted)) {
+            umem->nsets++;
+#if defined(CONFIG_MADVISE) && defined(MADV_REMOVE)
+            madvise(umem->shmem + offset, size, MADV_REMOVE);
+#endif
+        }
+    }
+}
+
+void umem_close_shmem(UMem *umem)
+{
+    close(umem->shmem_fd);
+    umem->shmem_fd = -1;
+}
+
+/***************************************************************************/
+/* qemu <-> umem daemon communication */
+
+size_t umem_pages_size(uint64_t nr)
+{
+    return sizeof(struct umem_pages) + nr * sizeof(uint64_t);
+}
+
+static void umem_write_cmd(int fd, uint8_t cmd)
+{
+    DPRINTF("write cmd %c\n", cmd);
+
+    for (;;) {
+        ssize_t ret = write(fd, &cmd, 1);
+        if (ret == -1) {
+            if (errno == EINTR) {
+                continue;
+            } else if (errno == EPIPE) {
+                perror("pipe");
+                DPRINTF("write cmd %c %zd %d: pipe is closed\n",
+                        cmd, ret, errno);
+                break;
+            }
+
+            perror("pipe");
+            DPRINTF("write cmd %c %zd %d\n", cmd, ret, errno);
+            abort();
+        }
+
+        break;
+    }
+}
+
+static void umem_read_cmd(int fd, uint8_t expect)
+{
+    uint8_t cmd;
+    for (;;) {
+        ssize_t ret = read(fd, &cmd, 1);
+        if (ret == -1) {
+            if (errno == EINTR) {
+                continue;
+            }
+            perror("pipe");
+            DPRINTF("read error cmd %c %zd %d\n", cmd, ret, errno);
+            abort();
+        }
+
+        if (ret == 0) {
+            DPRINTF("read cmd %c %zd: pipe is closed\n", cmd, ret);
+            abort();
+        }
+
+        break;
+    }
+
+    DPRINTF("read cmd %c\n", cmd);
+    if (cmd != expect) {
+        DPRINTF("cmd %c expect %d\n", cmd, expect);
+        abort();
+    }
+}
+
+struct umem_pages *umem_recv_pages(QEMUFile *f, int *offset)
+{
+    int ret;
+    uint64_t nr;
+    size_t size;
+    struct umem_pages *pages;
+
+    ret = qemu_peek_buffer(f, (uint8_t*)&nr, sizeof(nr), *offset);
+    *offset += sizeof(nr);
+    DPRINTF("ret %d nr %ld\n", ret, nr);
+    if (ret != sizeof(nr) || nr == 0) {
+        return NULL;
+    }
+
+    size = umem_pages_size(nr);
+    pages = g_malloc(size);
+    pages->nr = nr;
+    size -= sizeof(pages->nr);
+
+    ret = qemu_peek_buffer(f, (uint8_t*)pages->pgoffs, size, *offset);
+    *offset += size;
+    if (ret != size) {
+        g_free(pages);
+        return NULL;
+    }
+    return pages;
+}
+
+static void umem_send_pages(QEMUFile *f, const struct umem_pages *pages)
+{
+    size_t len = umem_pages_size(pages->nr);
+    qemu_put_buffer(f, (const uint8_t*)pages, len);
+}
+
+/* umem daemon -> qemu */
+void umem_daemon_ready(int to_qemu_fd)
+{
+    umem_write_cmd(to_qemu_fd, UMEM_DAEMON_READY);
+}
+
+void umem_daemon_quit(QEMUFile *to_qemu)
+{
+    qemu_put_byte(to_qemu, UMEM_DAEMON_QUIT);
+}
+
+void umem_daemon_send_pages_present(QEMUFile *to_qemu,
+                                    struct umem_pages *pages)
+{
+    qemu_put_byte(to_qemu, UMEM_DAEMON_TRIGGER_PAGE_FAULT);
+    umem_send_pages(to_qemu, pages);
+}
+
+void umem_daemon_wait_for_qemu(int from_qemu_fd)
+{
+    umem_read_cmd(from_qemu_fd, UMEM_QEMU_READY);
+}
+
+/* qemu -> umem daemon */
+void umem_qemu_wait_for_daemon(int from_umemd_fd)
+{
+    umem_read_cmd(from_umemd_fd, UMEM_DAEMON_READY);
+}
+
+void umem_qemu_ready(int to_umemd_fd)
+{
+    umem_write_cmd(to_umemd_fd, UMEM_QEMU_READY);
+}
+
+void umem_qemu_quit(QEMUFile *to_umemd)
+{
+    qemu_put_byte(to_umemd, UMEM_QEMU_QUIT);
+}
+
+/* qemu side handler */
+struct umem_pages *umem_qemu_trigger_page_fault(QEMUFile *from_umemd,
+                                                int *offset)
+{
+    uint64_t i;
+    int page_shift = ffs(getpagesize()) - 1;
+    struct umem_pages *pages = umem_recv_pages(from_umemd, offset);
+    if (pages == NULL) {
+        return NULL;
+    }
+
+    for (i = 0; i < pages->nr; i++) {
+        ram_addr_t addr = pages->pgoffs[i] << page_shift;
+
+        /* make pages present by forcibly triggering page fault. */
+        volatile uint8_t *ram = qemu_get_ram_ptr(addr);
+        uint8_t dummy_read = ram[0];
+        (void)dummy_read;   /* suppress unused variable warning */
+    }
+
+    /*
+     * Very Linux implementation specific.
+     * Make it sure that other thread doesn't fault on the above virtual
+     * address. (More exactly other thread doesn't call fault handler with
+     * the offset.)
+     * the fault handler is called with mmap_sem read locked.
+     * madvise() does down/up_write(mmap_sem)
+     */
+    qemu_madvise(NULL, 0, MADV_NORMAL);
+
+    return pages;
+}
+
+void umem_qemu_send_pages_present(QEMUFile *to_umemd,
+                                  const struct umem_pages *pages)
+{
+    qemu_put_byte(to_umemd, UMEM_QEMU_PAGE_FAULTED);
+    umem_send_pages(to_umemd, pages);
+}
+
+void umem_qemu_send_pages_unmapped(QEMUFile *to_umemd,
+                                   const struct umem_pages *pages)
+{
+    qemu_put_byte(to_umemd, UMEM_QEMU_PAGE_UNMAPPED);
+    umem_send_pages(to_umemd, pages);
+}
diff --git a/umem.h b/umem.h
new file mode 100644
index 0000000..058cac6
--- /dev/null
+++ b/umem.h
@@ -0,0 +1,101 @@
+/*
+ * umem.h: user process backed memory module for postcopy livemigration
+ *
+ * Copyright (c) 2011
+ * National Institute of Advanced Industrial Science and Technology
+ *
+ * https://sites.google.com/site/grivonhome/quick-kvm-migration
+ * Author: Isaku Yamahata <yamahata at valinux co jp>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef QEMU_UMEM_H
+#define QEMU_UMEM_H
+
+#include <linux/umem.h>
+
+#include "qemu-common.h"
+
+typedef struct UMemDev UMemDev;
+
+struct UMem {
+    void *umem;
+    int fd;
+    void *shmem;
+    int shmem_fd;
+    uint64_t size;
+
+    /* indexed by host page size */
+    int page_shift;
+    int nbits;
+    int nsets;
+    unsigned long *faulted;
+};
+
+struct umem_pages {
+    uint64_t nr;
+    uint64_t pgoffs[0];
+};
+
+UMem *umem_new(void *hostp, size_t size);
+void umem_destroy(UMem *umem);
+
+/* umem device operations */
+void umem_get_page_request(UMem *umem, struct umem_pages *page_request);
+void umem_mark_page_cached(UMem *umem, struct umem_pages *page_cached);
+void umem_unmap(UMem *umem);
+void umem_close(UMem *umem);
+
+/* umem shmem operations */
+void *umem_map_shmem(UMem *umem);
+void umem_unmap_shmem(UMem *umem);
+void umem_remove_shmem(UMem *umem, size_t offset, size_t size);
+void umem_close_shmem(UMem *umem);
+
+/* qemu on source <-> umem daemon communication */
+
+/* daemon -> qemu */
+#define UMEM_DAEMON_READY               'R'
+#define UMEM_DAEMON_QUIT                'Q'
+#define UMEM_DAEMON_TRIGGER_PAGE_FAULT  'T'
+#define UMEM_DAEMON_ERROR               'E'
+
+/* qemu -> daemon */
+#define UMEM_QEMU_READY                 'r'
+#define UMEM_QEMU_QUIT                  'q'
+#define UMEM_QEMU_PAGE_FAULTED          't'
+#define UMEM_QEMU_PAGE_UNMAPPED         'u'
+
+struct umem_pages *umem_recv_pages(QEMUFile *f, int *offset);
+size_t umem_pages_size(uint64_t nr);
+
+/* for umem daemon */
+void umem_daemon_ready(int to_qemu_fd);
+void umem_daemon_wait_for_qemu(int from_qemu_fd);
+void umem_daemon_quit(QEMUFile *to_qemu);
+void umem_daemon_send_pages_present(QEMUFile *to_qemu,
+                                    struct umem_pages *pages);
+
+/* for qemu */
+void umem_qemu_wait_for_daemon(int from_umemd_fd);
+void umem_qemu_ready(int to_umemd_fd);
+void umem_qemu_quit(QEMUFile *to_umemd);
+struct umem_pages *umem_qemu_trigger_page_fault(QEMUFile *from_umemd,
+                                                int *offset);
+void umem_qemu_send_pages_present(QEMUFile *to_umemd,
+                                  const struct umem_pages *pages);
+void umem_qemu_send_pages_unmapped(QEMUFile *to_umemd,
+                                   const struct umem_pages *pages);
+
+#endif /* QEMU_UMEM_H */
-- 
1.7.1.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux