Re: [PATCH v3 3/3] spapr: nvdimm: Enable sync-dax device property for nvdimm

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 3/24/21 8:39 AM, David Gibson wrote:
On Tue, Mar 23, 2021 at 09:47:55AM -0400, Shivaprasad G Bhat wrote:
The patch adds the 'sync-dax' property to the nvdimm device.

When the sync-dax is 'off', the device tree property
"hcall-flush-required" is added to the nvdimm node which makes the
guest to issue H_SCM_FLUSH hcalls to request for flushes explicitly.
This would be the default behaviour without sync-dax property set
for the nvdimm device.

The sync-dax="on" would mean the guest need not make flush requests
to the qemu. On previous machine versions the sync-dax is set to be
"on" by default using the hw_compat magic.

Signed-off-by: Shivaprasad G Bhat <sbhat@xxxxxxxxxxxxx>
---
  hw/core/machine.c       |    1 +
  hw/mem/nvdimm.c         |    1 +
  hw/ppc/spapr_nvdimm.c   |   17 +++++++++++++++++
  include/hw/mem/nvdimm.h |   10 ++++++++++
  include/hw/ppc/spapr.h  |    1 +
  5 files changed, 30 insertions(+)

diff --git a/hw/core/machine.c b/hw/core/machine.c
index 257a664ea2..f843643574 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -41,6 +41,7 @@ GlobalProperty hw_compat_5_2[] = {
      { "PIIX4_PM", "smm-compat", "on"},
      { "virtio-blk-device", "report-discard-granularity", "off" },
      { "virtio-net-pci", "vectors", "3"},
+    { "nvdimm", "sync-dax", "on" },
  };
  const size_t hw_compat_5_2_len = G_N_ELEMENTS(hw_compat_5_2);
diff --git a/hw/mem/nvdimm.c b/hw/mem/nvdimm.c
index 7397b67156..8f0e29b191 100644
--- a/hw/mem/nvdimm.c
+++ b/hw/mem/nvdimm.c
@@ -229,6 +229,7 @@ static void nvdimm_write_label_data(NVDIMMDevice *nvdimm, const void *buf,
static Property nvdimm_properties[] = {
      DEFINE_PROP_BOOL(NVDIMM_UNARMED_PROP, NVDIMMDevice, unarmed, false),
+    DEFINE_PROP_BOOL(NVDIMM_SYNC_DAX_PROP, NVDIMMDevice, sync_dax, false),

I'm a bit uncomfortable adding this base NVDIMM property without at
least some logic about how it's handled on non-PAPR platforms.

yes these should be specific to PAPR. These are there to handle migration. with older guest. We can use the backing file to determine synchronous dax support. if it is a file backed nvdimm on a fsdax mount point, we can do synchronous dax. If it is one on a non dax file system synchronous dax can be disabled.


      DEFINE_PROP_END_OF_LIST(),
  };
diff --git a/hw/ppc/spapr_nvdimm.c b/hw/ppc/spapr_nvdimm.c
index 883317c1ed..dd1c90251b 100644
--- a/hw/ppc/spapr_nvdimm.c
+++ b/hw/ppc/spapr_nvdimm.c
@@ -125,6 +125,9 @@ static int spapr_dt_nvdimm(SpaprMachineState *spapr, void *fdt,
      uint64_t lsize = nvdimm->label_size;
      uint64_t size = object_property_get_int(OBJECT(nvdimm), PC_DIMM_SIZE_PROP,
                                              NULL);
+    bool sync_dax = object_property_get_bool(OBJECT(nvdimm),
+                                             NVDIMM_SYNC_DAX_PROP,
+                                             &error_abort);
drc = spapr_drc_by_id(TYPE_SPAPR_DRC_PMEM, slot);
      g_assert(drc);
@@ -159,6 +162,11 @@ static int spapr_dt_nvdimm(SpaprMachineState *spapr, void *fdt,
                               "operating-system")));
      _FDT(fdt_setprop(fdt, child_offset, "ibm,cache-flush-required", NULL, 0));
+ if (!sync_dax) {
+        _FDT(fdt_setprop(fdt, child_offset, "ibm,hcall-flush-required",
+                         NULL, 0));
+    }
+
      return child_offset;
  }
@@ -567,10 +575,12 @@ static target_ulong h_scm_flush(PowerPCCPU *cpu, SpaprMachineState *spapr,
                                        target_ulong opcode, target_ulong *args)
  {
      int ret;
+    bool sync_dax;
      uint32_t drc_index = args[0];
      uint64_t continue_token = args[1];
      SpaprDrc *drc = spapr_drc_by_index(drc_index);
      PCDIMMDevice *dimm;
+    NVDIMMDevice *nvdimm;
      HostMemoryBackend *backend = NULL;
      SpaprNVDIMMDeviceFlushState *state;
      ThreadPool *pool = aio_get_thread_pool(qemu_get_aio_context());
@@ -580,6 +590,13 @@ static target_ulong h_scm_flush(PowerPCCPU *cpu, SpaprMachineState *spapr,
          return H_PARAMETER;
      }
+ nvdimm = NVDIMM(drc->dev);
+    sync_dax = object_property_get_bool(OBJECT(nvdimm), NVDIMM_SYNC_DAX_PROP,
+                                        &error_abort);
+    if (sync_dax) {
+        return H_UNSUPPORTED;

Do you want to return UNSUPPORTED here, or just H_SUCCESS, since the
flush should be a no-op in this case.


The reason to handle this as error is to indicate the OS that it is using a wrong mechanism to flush.


+    }
+
      if (continue_token != 0) {
          ret = spapr_nvdimm_get_flush_status(continue_token);
          if (H_IS_LONG_BUSY(ret)) {
diff --git a/include/hw/mem/nvdimm.h b/include/hw/mem/nvdimm.h
index bcf62f825c..f82979cf2f 100644
--- a/include/hw/mem/nvdimm.h
+++ b/include/hw/mem/nvdimm.h
@@ -51,6 +51,7 @@ OBJECT_DECLARE_TYPE(NVDIMMDevice, NVDIMMClass, NVDIMM)
  #define NVDIMM_LABEL_SIZE_PROP "label-size"
  #define NVDIMM_UUID_PROP       "uuid"
  #define NVDIMM_UNARMED_PROP    "unarmed"
+#define NVDIMM_SYNC_DAX_PROP   "sync-dax"
struct NVDIMMDevice {
      /* private */
@@ -85,6 +86,15 @@ struct NVDIMMDevice {
       */
      bool unarmed;
+ /*
+     * On PPC64,
+     * The 'off' value results in the hcall-flush-required property set
+     * in the device tree for pseries machines. When 'off', the guest
+     * initiates explicit flush requests to the backend device ensuring
+     * write persistence.
+     */
+    bool sync_dax;
+
      /*
       * The PPC64 - spapr requires each nvdimm device have a uuid.
       */
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 7c27fb3e2d..51c35488a4 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -333,6 +333,7 @@ struct SpaprMachineState {
  #define H_P7              -60
  #define H_P8              -61
  #define H_P9              -62
+#define H_UNSUPPORTED     -67
  #define H_OVERLAP         -68
  #define H_UNSUPPORTED_FLAG -256
  #define H_MULTI_THREADS_ACTIVE -9005







[Index of Archives]     [KVM Development]     [KVM ARM]     [KVM ia64]     [Linux Virtualization]     [Linux USB Devel]     [Linux Video]     [Linux Audio Users]     [Linux Kernel]     [Linux SCSI]     [Big List of Linux Books]

  Powered by Linux