Re: [PATCH v3 2/2] qemu: implement memory failure event

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 10/14/20 12:37 PM, zhenwei pi wrote:
Since QEMU 5.2 (commit-77b285f7f6), QEMU supports 'memory failure'
event, posts event to monitor if hitting a hardware memory error.
Fully support this feature for QEMU.

Test with commit 'libvirt: support memory failure event', build a
little complex environment(nested KVM):
1, install newly built libvirt in L1, and start a L2 vm. run command
in L1:
  ~# virsh event l2 --event memory-failure

2, run command in L0 to inject MCE to L1:
  ~# virsh qemu-monitor-command l1 --hmp mce 0 9 0xbd000000000000c0 0xd 0x62000000 0x8c

Test result in l1(recipient hypervisor case):
event 'memory-failure' for domain l2:
recipient: hypervisor
action: ignore
flags:
         action required: 0
         recursive: 0

Test result in l1(recipient guest case):
event 'memory-failure' for domain l2:
recipient: guest
action: inject
flags:
         action required: 0
         recursive: 0

Signed-off-by: zhenwei pi <pizhenwei@xxxxxxxxxxxxx>
---
  src/qemu/qemu_monitor.c      | 21 +++++++++++++++-
  src/qemu/qemu_monitor.h      | 39 +++++++++++++++++++++++++++++
  src/qemu/qemu_monitor_json.c | 49 ++++++++++++++++++++++++++++++++++++
  src/qemu/qemu_process.c      | 59 ++++++++++++++++++++++++++++++++++++++++++++
  4 files changed, 167 insertions(+), 1 deletion(-)

diff --git a/src/qemu/qemu_monitor.c b/src/qemu/qemu_monitor.c
index 8c991fefbb..189b789bb8 100644
--- a/src/qemu/qemu_monitor.c
+++ b/src/qemu/qemu_monitor.c
@@ -159,7 +159,6 @@ static int qemuMonitorOnceInit(void)
VIR_ONCE_GLOBAL_INIT(qemuMonitor); -
  VIR_ENUM_IMPL(qemuMonitorMigrationStatus,
                QEMU_MONITOR_MIGRATION_STATUS_LAST,
                "inactive", "setup",
@@ -197,6 +196,14 @@ VIR_ENUM_IMPL(qemuMonitorDumpStatus,
                "none", "active", "completed", "failed",
  );
+VIR_ENUM_IMPL(qemuMonitorMemoryFailureRecipient,
+              QEMU_MONITOR_MEMORY_FAILURE_RECIPIENT_LAST,
+              "hypervisor", "guest");
+
+VIR_ENUM_IMPL(qemuMonitorMemoryFailureAction,
+              QEMU_MONITOR_MEMORY_FAILURE_ACTION_LAST,
+              "ignore", "inject",
+              "fatal", "reset");
#if DEBUG_RAW_IO
  static char *
@@ -1428,6 +1435,18 @@ qemuMonitorEmitSpiceMigrated(qemuMonitorPtr mon)
int
+qemuMonitorEmitMemoryFailure(qemuMonitorPtr mon,
+                             qemuMonitorEventMemoryFailurePtr mfp)
+{
+    int ret = -1;
+
+    QEMU_MONITOR_CALLBACK(mon, ret, domainMemoryFailure, mon->vm, mfp);
+
+    return ret;
+}
+
+
+int
  qemuMonitorEmitMigrationStatus(qemuMonitorPtr mon,
                                 int status)
  {
diff --git a/src/qemu/qemu_monitor.h b/src/qemu/qemu_monitor.h
index a744c8975b..17ba006a2f 100644
--- a/src/qemu/qemu_monitor.h
+++ b/src/qemu/qemu_monitor.h
@@ -340,6 +340,40 @@ typedef int (*qemuMonitorDomainGuestCrashloadedCallback)(qemuMonitorPtr mon,
                                                           virDomainObjPtr vm,
                                                           void *opaque);
+typedef enum {
+    QEMU_MONITOR_MEMORY_FAILURE_RECIPIENT_HYPERVISOR,
+    QEMU_MONITOR_MEMORY_FAILURE_RECIPIENT_GUEST,
+
+    QEMU_MONITOR_MEMORY_FAILURE_RECIPIENT_LAST
+} qemuMonitorMemoryFailureRecipient;
+
+VIR_ENUM_DECL(qemuMonitorMemoryFailureRecipient);
+
+typedef enum {
+    QEMU_MONITOR_MEMORY_FAILURE_ACTION_IGNORE,
+    QEMU_MONITOR_MEMORY_FAILURE_ACTION_INJECT,
+    QEMU_MONITOR_MEMORY_FAILURE_ACTION_FATAL,
+    QEMU_MONITOR_MEMORY_FAILURE_ACTION_RESET,
+
+    QEMU_MONITOR_MEMORY_FAILURE_ACTION_LAST
+} qemuMonitorMemoryFailureAction;
+
+VIR_ENUM_DECL(qemuMonitorMemoryFailureAction);
+
+typedef struct _qemuMonitorEventMemoryFailure qemuMonitorEventMemoryFailure;
+typedef qemuMonitorEventMemoryFailure *qemuMonitorEventMemoryFailurePtr;
+struct _qemuMonitorEventMemoryFailure {
+    qemuMonitorMemoryFailureRecipient recipient;
+    qemuMonitorMemoryFailureAction action;
+    bool action_required;
+    bool recursive;
+};
+
+typedef int (*qemuMonitorDomainMemoryFailureCallback)(qemuMonitorPtr mon,
+                                                      virDomainObjPtr vm,
+                                                      qemuMonitorEventMemoryFailurePtr mfp,
+                                                      void *opaque);
+
  typedef struct _qemuMonitorCallbacks qemuMonitorCallbacks;
  typedef qemuMonitorCallbacks *qemuMonitorCallbacksPtr;
  struct _qemuMonitorCallbacks {
@@ -376,6 +410,7 @@ struct _qemuMonitorCallbacks {
      qemuMonitorDomainPRManagerStatusChangedCallback domainPRManagerStatusChanged;
      qemuMonitorDomainRdmaGidStatusChangedCallback domainRdmaGidStatusChanged;
      qemuMonitorDomainGuestCrashloadedCallback domainGuestCrashloaded;
+    qemuMonitorDomainMemoryFailureCallback domainMemoryFailure;
  };
qemuMonitorPtr qemuMonitorOpen(virDomainObjPtr vm,
@@ -475,6 +510,10 @@ int qemuMonitorEmitSerialChange(qemuMonitorPtr mon,
                                  const char *devAlias,
                                  bool connected);
  int qemuMonitorEmitSpiceMigrated(qemuMonitorPtr mon);
+
+int qemuMonitorEmitMemoryFailure(qemuMonitorPtr mon,
+                                 qemuMonitorEventMemoryFailurePtr mfp);
+
  int qemuMonitorEmitMigrationStatus(qemuMonitorPtr mon,
                                     int status);
  int qemuMonitorEmitMigrationPass(qemuMonitorPtr mon,
diff --git a/src/qemu/qemu_monitor_json.c b/src/qemu/qemu_monitor_json.c
index 26ac499fc5..aa256727d6 100644
--- a/src/qemu/qemu_monitor_json.c
+++ b/src/qemu/qemu_monitor_json.c
@@ -112,6 +112,7 @@ static void qemuMonitorJSONHandleBlockThreshold(qemuMonitorPtr mon, virJSONValue
  static void qemuMonitorJSONHandleDumpCompleted(qemuMonitorPtr mon, virJSONValuePtr data);
  static void qemuMonitorJSONHandlePRManagerStatusChanged(qemuMonitorPtr mon, virJSONValuePtr data);
  static void qemuMonitorJSONHandleRdmaGidStatusChanged(qemuMonitorPtr mon, virJSONValuePtr data);
+static void qemuMonitorJSONHandleMemoryFailure(qemuMonitorPtr mon, virJSONValuePtr data);
typedef struct {
      const char *type;
@@ -132,6 +133,7 @@ static qemuEventHandler eventHandlers[] = {
      { "GUEST_CRASHLOADED", qemuMonitorJSONHandleGuestCrashloaded, },
      { "GUEST_PANICKED", qemuMonitorJSONHandleGuestPanic, },
      { "JOB_STATUS_CHANGE", qemuMonitorJSONHandleJobStatusChange, },
+    { "MEMORY_FAILURE", qemuMonitorJSONHandleMemoryFailure, },
      { "MIGRATION", qemuMonitorJSONHandleMigrationStatus, },
      { "MIGRATION_PASS", qemuMonitorJSONHandleMigrationPass, },
      { "NIC_RX_FILTER_CHANGED", qemuMonitorJSONHandleNicRxFilterChanged, },
@@ -1336,6 +1338,53 @@ qemuMonitorJSONHandleSpiceMigrated(qemuMonitorPtr mon,
static void
+qemuMonitorJSONHandleMemoryFailure(qemuMonitorPtr mon,
+                                   virJSONValuePtr data)
+{
+    virJSONValuePtr flagsjson = virJSONValueObjectGetObject(data, "flags");
+    const char *str;
+    int recipient;
+    int action;
+    bool ar = false;
+    bool recursive = false;
+    qemuMonitorEventMemoryFailure mf = {0};
+
+    if (!(str = virJSONValueObjectGetString(data, "recipient"))) {
+        VIR_WARN("missing recipient in memory failure event");
+        return;
+    }
+
+    recipient = qemuMonitorMemoryFailureRecipientTypeFromString(str);
+    if (recipient == -1) {

Here ^^

+        VIR_WARN("unknown recipient '%s' in memory_failure event", str);
+        return;
+    }
+
+    if (!(str = virJSONValueObjectGetString(data, "action"))) {
+        VIR_WARN("missing action in memory failure event");
+        return;
+    }
+
+    action = qemuMonitorMemoryFailureActionTypeFromString(str);
+    if (action == -1) {

and here - I'd prefer if this was more robust check: < 0 because there's this common practice in libvirt: negative values represent an error state, positive and zero represent success. Now, it will probably never happen but if happened for other functions. If we need to introduce new error state, the function can return -2 to represent this new state. Now, all places that check for a negative value can stay as they are. Places which would check for exactly -1 need to be updated. This would produce unnecessary big change. Therefore I suggest the following change (again, no need to resend, I can do the change locally before pushing, I just want your confirmation):

diff --git a/src/qemu/qemu_monitor_json.c b/src/qemu/qemu_monitor_json.c
index f4ea9ea782..cba9ec7b19 100644
--- a/src/qemu/qemu_monitor_json.c
+++ b/src/qemu/qemu_monitor_json.c
@@ -1355,7 +1355,7 @@ qemuMonitorJSONHandleMemoryFailure(qemuMonitorPtr mon,
     }

     recipient = qemuMonitorMemoryFailureRecipientTypeFromString(str);
-    if (recipient == -1) {
+    if (recipient < 0) {
         VIR_WARN("unknown recipient '%s' in memory_failure event", str);
         return;
     }
@@ -1366,7 +1366,7 @@ qemuMonitorJSONHandleMemoryFailure(qemuMonitorPtr mon,
     }

     action = qemuMonitorMemoryFailureActionTypeFromString(str);
-    if (action == -1) {
+    if (action < 0) {
         VIR_WARN("unknown action '%s' in memory_failure event", str);
         return;
     }


Michal




[Index of Archives]     [Virt Tools]     [Libvirt Users]     [Lib OS Info]     [Fedora Users]     [Fedora Desktop]     [Fedora SELinux]     [Big List of Linux Books]     [Yosemite News]     [KDE Users]     [Fedora Tools]

  Powered by Linux