[libvirt PATCH v2 1/3] qemu: Add support for /dev/userfaultfd

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



/dev/userfaultfd device is preferred over userfaultfd syscall for
post-copy migrations. Unless qemu driver is configured to disable mount
namespace or to forbid access to /dev/userfaultfd in cgroup_device_acl,
we will copy it to the limited /dev filesystem QEMU will have access to
and label it appropriately. So in the default configuration post-copy
migration will be allowed even without enabling
vm.unprivileged_userfaultfd sysctl.

Signed-off-by: Jiri Denemark <jdenemar@xxxxxxxxxx>
---
 src/qemu/qemu.conf.in              |  3 +-
 src/qemu/qemu_cgroup.c             |  1 +
 src/qemu/qemu_process.c            | 38 +++++++++++++++++++++++++
 src/qemu/qemu_security.c           | 45 ++++++++++++++++++++++++++++++
 src/qemu/qemu_security.h           |  5 ++++
 src/qemu/test_libvirtd_qemu.aug.in |  1 +
 6 files changed, 92 insertions(+), 1 deletion(-)

diff --git a/src/qemu/qemu.conf.in b/src/qemu/qemu.conf.in
index 34025a02ef..f406df8749 100644
--- a/src/qemu/qemu.conf.in
+++ b/src/qemu/qemu.conf.in
@@ -565,7 +565,8 @@
 #cgroup_device_acl = [
 #    "/dev/null", "/dev/full", "/dev/zero",
 #    "/dev/random", "/dev/urandom",
-#    "/dev/ptmx", "/dev/kvm"
+#    "/dev/ptmx", "/dev/kvm",
+#    "/dev/userfaultfd"
 #]
 #
 # RDMA migration requires the following extra files to be added to the list:
diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c
index 47402b3750..5a5ba763a0 100644
--- a/src/qemu/qemu_cgroup.c
+++ b/src/qemu/qemu_cgroup.c
@@ -41,6 +41,7 @@ const char *const defaultDeviceACL[] = {
     "/dev/null", "/dev/full", "/dev/zero",
     "/dev/random", "/dev/urandom",
     "/dev/ptmx", "/dev/kvm",
+    "/dev/userfaultfd",
     NULL,
 };
 #define DEVICE_PTY_MAJOR 136
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index 0a6c18a671..6e51d6586b 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -2882,6 +2882,40 @@ qemuProcessStartManagedPRDaemon(virDomainObj *vm)
 }
 
 
+static int
+qemuProcessAllowPostCopyMigration(virDomainObj *vm)
+{
+    qemuDomainObjPrivate *priv = vm->privateData;
+    virQEMUDriver *driver = priv->driver;
+    g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
+    const char *const *devices = (const char *const *) cfg->cgroupDeviceACL;
+    const char *uffd = "/dev/userfaultfd";
+    int rc;
+
+    if (!virFileExists(uffd)) {
+        VIR_DEBUG("%s is not supported by the host", uffd);
+        return 0;
+    }
+
+    if (!devices)
+        devices = defaultDeviceACL;
+
+    if (!g_strv_contains(devices, uffd)) {
+        VIR_DEBUG("%s is not allowed by device ACL", uffd);
+        return 0;
+    }
+
+    VIR_DEBUG("Labeling %s in mount namespace", uffd);
+    if ((rc = qemuSecurityDomainSetMountNSPathLabel(driver, vm, uffd)) < 0)
+        return -1;
+
+    if (rc == 1)
+        VIR_DEBUG("Mount namespace is not enabled, leaving %s as is", uffd);
+
+    return 0;
+}
+
+
 static int
 qemuProcessInitPasswords(virQEMUDriver *driver,
                          virDomainObj *vm,
@@ -7802,6 +7836,10 @@ qemuProcessLaunch(virConnectPtr conn,
         qemuProcessStartManagedPRDaemon(vm) < 0)
         goto cleanup;
 
+    VIR_DEBUG("Setting up permissions to allow post-copy migration");
+    if (qemuProcessAllowPostCopyMigration(vm) < 0)
+        goto cleanup;
+
     VIR_DEBUG("Setting domain security labels");
     if (qemuSecuritySetAllLabel(driver,
                                 vm,
diff --git a/src/qemu/qemu_security.c b/src/qemu/qemu_security.c
index 8bcef14d08..4aaa863ae9 100644
--- a/src/qemu/qemu_security.c
+++ b/src/qemu/qemu_security.c
@@ -615,6 +615,51 @@ qemuSecurityDomainRestorePathLabel(virQEMUDriver *driver,
 }
 
 
+/**
+ * qemuSecurityDomainSetMountNSPathLabel:
+ *
+ * Label given path in mount namespace. If mount namespace is not enabled,
+ * nothing is labeled at all.
+ *
+ * Because the label is only applied in mount namespace, there's no need to
+ * restore it.
+ *
+ * Returns 0 on success,
+ *         1 when mount namespace is not enabled,
+ *        -1 on error.
+ */
+int
+qemuSecurityDomainSetMountNSPathLabel(virQEMUDriver *driver,
+                                      virDomainObj *vm,
+                                      const char *path)
+{
+    int ret = -1;
+
+    if (!qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT)) {
+        VIR_DEBUG("Not labeling '%s': mount namespace disabled for domain '%s'",
+                  path, vm->def->name);
+        return 1;
+    }
+
+    if (virSecurityManagerTransactionStart(driver->securityManager) < 0)
+        goto cleanup;
+
+    if (virSecurityManagerDomainSetPathLabel(driver->securityManager,
+                                             vm->def, path, false) < 0)
+        goto cleanup;
+
+    if (virSecurityManagerTransactionCommit(driver->securityManager,
+                                            vm->pid, false) < 0)
+        goto cleanup;
+
+    ret = 0;
+
+ cleanup:
+    virSecurityManagerTransactionAbort(driver->securityManager);
+    return ret;
+}
+
+
 /**
  * qemuSecurityCommandRun:
  * @driver: the QEMU driver
diff --git a/src/qemu/qemu_security.h b/src/qemu/qemu_security.h
index 10f11771b4..41da33debc 100644
--- a/src/qemu/qemu_security.h
+++ b/src/qemu/qemu_security.h
@@ -110,6 +110,11 @@ int qemuSecurityDomainRestorePathLabel(virQEMUDriver *driver,
                                        virDomainObj *vm,
                                        const char *path);
 
+int
+qemuSecurityDomainSetMountNSPathLabel(virQEMUDriver *driver,
+                                      virDomainObj *vm,
+                                      const char *path);
+
 int qemuSecurityCommandRun(virQEMUDriver *driver,
                            virDomainObj *vm,
                            virCommand *cmd,
diff --git a/src/qemu/test_libvirtd_qemu.aug.in b/src/qemu/test_libvirtd_qemu.aug.in
index e4cfde6cc7..b97e6de11e 100644
--- a/src/qemu/test_libvirtd_qemu.aug.in
+++ b/src/qemu/test_libvirtd_qemu.aug.in
@@ -67,6 +67,7 @@ module Test_libvirtd_qemu =
     { "5" = "/dev/urandom" }
     { "6" = "/dev/ptmx" }
     { "7" = "/dev/kvm" }
+    { "8" = "/dev/userfaultfd" }
 }
 { "save_image_format" = "raw" }
 { "dump_image_format" = "raw" }
-- 
2.43.0
_______________________________________________
Devel mailing list -- devel@xxxxxxxxxxxxxxxxx
To unsubscribe send an email to devel-leave@xxxxxxxxxxxxxxxxx




[Index of Archives]     [Virt Tools]     [Libvirt Users]     [Lib OS Info]     [Fedora Users]     [Fedora Desktop]     [Fedora SELinux]     [Big List of Linux Books]     [Yosemite News]     [KDE Users]     [Fedora Tools]

  Powered by Linux