On 10/12/2015 07:27 PM, Michael S. Tsirkin wrote:
On Sun, Oct 11, 2015 at 11:52:55AM +0800, Xiao Guangrong wrote:
NFIT is defined in ACPI 6.0: 5.2.25 NVDIMM Firmware Interface Table (NFIT)
Currently, we only support PMEM mode. Each device has 3 structures:
- SPA structure, defines the PMEM region info
- MEM DEV structure, it has the @handle which is used to associate specified
ACPI NVDIMM device we will introduce in later patch.
Also we can happily ignored the memory device's interleave, the real
nvdimm hardware access is hidden behind host
- DCR structure, it defines vendor ID used to associate specified vendor
nvdimm driver. Since we only implement PMEM mode this time, Command
window and Data window are not needed
Signed-off-by: Xiao Guangrong <guangrong.xiao@xxxxxxxxxxxxxxx>
---
hw/i386/acpi-build.c | 4 +
hw/mem/nvdimm/acpi.c | 209 ++++++++++++++++++++++++++++++++++++++++++++++-
hw/mem/nvdimm/internal.h | 13 +++
hw/mem/nvdimm/nvdimm.c | 25 ++++++
include/hw/mem/nvdimm.h | 2 +
5 files changed, 252 insertions(+), 1 deletion(-)
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 95e0c65..c637dc8 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -1661,6 +1661,7 @@ static bool acpi_has_iommu(void)
static
void acpi_build(PcGuestInfo *guest_info, AcpiBuildTables *tables)
{
+ PCMachineState *pcms = PC_MACHINE(qdev_get_machine());
I don't like more code poking at machine directly.
I know srat does it, and I don't like it. Any chance you can add
acpi_get_nvdumm_info to get all you need from nvdimm state?
Do you mean introduce a wrapper to do this,like:
struct nvdimm_state *acpi_get_nvdumm_info(void)
{
return &PC_MACHINE(qdev_get_machine())->nvdimm_memory;
}
Or should we maintain nvdimm state in other place (for example, a global
value in nvdimm.c)?
GArray *table_offsets;
unsigned facs, ssdt, dsdt, rsdt;
AcpiCpuInfo cpu;
@@ -1742,6 +1743,9 @@ void acpi_build(PcGuestInfo *guest_info, AcpiBuildTables *tables)
build_dmar_q35(tables_blob, tables->linker);
}
+ nvdimm_build_acpi_table(&pcms->nvdimm_memory, table_offsets, tables_blob,
+ tables->linker);
+
/* Add tables supplied by user (if any) */
for (u = acpi_table_first(); u; u = acpi_table_next(u)) {
unsigned len = acpi_table_len(u);
diff --git a/hw/mem/nvdimm/acpi.c b/hw/mem/nvdimm/acpi.c
index b640874..62b1e02 100644
--- a/hw/mem/nvdimm/acpi.c
+++ b/hw/mem/nvdimm/acpi.c
@@ -32,6 +32,46 @@
#include "hw/mem/nvdimm.h"
#include "internal.h"
+static void nfit_spa_uuid_pm(uuid_le *uuid)
+{
+ uuid_le uuid_pm = UUID_LE(0x66f0d379, 0xb4f3, 0x4074, 0xac, 0x43, 0x0d,
+ 0x33, 0x18, 0xb7, 0x8c, 0xdb);
+ memcpy(uuid, &uuid_pm, sizeof(uuid_pm));
+}
+
Just add a static constant:
const uint8_t nfit_spa_uuid[] = {0x79, 0xd3, ..... }
then memcpy instead of a wrapper.
Okay, good to me.
+enum {
+ NFIT_STRUCTURE_SPA = 0,
+ NFIT_STRUCTURE_MEMDEV = 1,
+ NFIT_STRUCTURE_IDT = 2,
+ NFIT_STRUCTURE_SMBIOS = 3,
+ NFIT_STRUCTURE_DCR = 4,
+ NFIT_STRUCTURE_BDW = 5,
+ NFIT_STRUCTURE_FLUSH = 6,
+};
+
+enum {
+ EFI_MEMORY_UC = 0x1ULL,
+ EFI_MEMORY_WC = 0x2ULL,
+ EFI_MEMORY_WT = 0x4ULL,
+ EFI_MEMORY_WB = 0x8ULL,
+ EFI_MEMORY_UCE = 0x10ULL,
+ EFI_MEMORY_WP = 0x1000ULL,
+ EFI_MEMORY_RP = 0x2000ULL,
+ EFI_MEMORY_XP = 0x4000ULL,
+ EFI_MEMORY_NV = 0x8000ULL,
+ EFI_MEMORY_MORE_RELIABLE = 0x10000ULL,
+};
+
+/*
+ * NVDIMM Firmware Interface Table
+ * @signature: "NFIT"
+ */
+struct nfit {
+ ACPI_TABLE_HEADER_DEF
+ uint32_t reserved;
+} QEMU_PACKED;
+typedef struct nfit nfit;
+
/* System Physical Address Range Structure */
struct nfit_spa {
uint16_t type;
@@ -40,13 +80,21 @@ struct nfit_spa {
uint16_t flags;
uint32_t reserved;
uint32_t proximity_domain;
- uint8_t type_guid[16];
+ uuid_le type_guid;
uint64_t spa_base;
uint64_t spa_length;
uint64_t mem_attr;
} QEMU_PACKED;
typedef struct nfit_spa nfit_spa;
+/*
+ * Control region is strictly for management during hot add/online
+ * operation.
+ */
+#define SPA_FLAGS_ADD_ONLINE_ONLY (1)
unused
Indeed, currently vNVDIMM did not use this flag, it just introduces
the definition following the spec.
I do not see the hurt of these macros, it is really unacceptable?
Or just the programming style in QEMU?
+/* Data in Proximity Domain field is valid. */
+#define SPA_FLAGS_PROXIMITY_VALID (1 << 1)
+
/* Memory Device to System Physical Address Range Mapping Structure */
struct nfit_memdev {
uint16_t type;
@@ -91,12 +139,20 @@ struct nfit_dcr {
} QEMU_PACKED;
typedef struct nfit_dcr nfit_dcr;
+#define REVSISON_ID 1
+#define NFIT_FIC1 0x201
+
static uint64_t nvdimm_device_structure_size(uint64_t slots)
{
/* each nvdimm has three structures. */
return slots * (sizeof(nfit_spa) + sizeof(nfit_memdev) + sizeof(nfit_dcr));
}
+static uint64_t get_nfit_total_size(uint64_t slots)
+{
+ return sizeof(struct nfit) + nvdimm_device_structure_size(slots);
+}
+
static uint64_t nvdimm_acpi_memory_size(uint64_t slots, uint64_t page_size)
{
uint64_t size = nvdimm_device_structure_size(slots);
@@ -118,3 +174,154 @@ void nvdimm_init_memory_state(NVDIMMState *state, MemoryRegion*system_memory,
NVDIMM_ACPI_MEM_SIZE);
memory_region_add_subregion(system_memory, state->base, &state->mr);
}
+
+static uint32_t nvdimm_slot_to_sn(int slot)
+{
+ return 0x123456 + slot;
+}
+
+static uint32_t nvdimm_slot_to_handle(int slot)
+{
+ return slot + 1;
+}
+
+static uint16_t nvdimm_slot_to_spa_index(int slot)
+{
+ return (slot + 1) << 1;
+}
+
+static uint32_t nvdimm_slot_to_dcr_index(int slot)
+{
+ return nvdimm_slot_to_spa_index(slot) + 1;
+}
+
There are lots of magic numbers here with no comments.
Pls explain the logic in code comments.
Okay, will comment these carefully in the next version.
+static int build_structure_spa(void *buf, NVDIMMDevice *nvdimm)
Pls document the specific chapter that this implements.
same everywhere else.
Good style indeed, will do.
+{
+ nfit_spa *nfit_spa;
+ uint64_t addr = object_property_get_int(OBJECT(nvdimm), DIMM_ADDR_PROP,
+ NULL);
+ uint64_t size = object_property_get_int(OBJECT(nvdimm), DIMM_SIZE_PROP,
+ NULL);
+ uint32_t node = object_property_get_int(OBJECT(nvdimm), DIMM_NODE_PROP,
+ NULL);
+ int slot = object_property_get_int(OBJECT(nvdimm), DIMM_SLOT_PROP,
+ NULL);
+
+ nfit_spa = buf;
+
+ nfit_spa->type = cpu_to_le16(NFIT_STRUCTURE_SPA);
Don't do these 1-time enums. They are hard to match against spec.
nfit_spa->type = cpu_to_le16(0 /* System Physical Address Range Structure */);
same everywhere else.
Yeah, learn it.
+ nfit_spa->length = cpu_to_le16(sizeof(*nfit_spa));
+ nfit_spa->spa_index = cpu_to_le16(nvdimm_slot_to_spa_index(slot));
+ nfit_spa->flags = cpu_to_le16(SPA_FLAGS_PROXIMITY_VALID);
+ nfit_spa->proximity_domain = cpu_to_le32(node);
+ nfit_spa_uuid_pm(&nfit_spa->type_guid);
+ nfit_spa->spa_base = cpu_to_le64(addr);
+ nfit_spa->spa_length = cpu_to_le64(size);
+ nfit_spa->mem_attr = cpu_to_le64(EFI_MEMORY_WB | EFI_MEMORY_NV);
+
+ return sizeof(*nfit_spa);
+}
+
+static int build_structure_memdev(void *buf, NVDIMMDevice *nvdimm)
+{
+ nfit_memdev *nfit_memdev;
+ uint64_t addr = object_property_get_int(OBJECT(nvdimm), DIMM_ADDR_PROP,
+ NULL);
+ uint64_t size = object_property_get_int(OBJECT(nvdimm), DIMM_SIZE_PROP,
+ NULL);
+ int slot = object_property_get_int(OBJECT(nvdimm), DIMM_SLOT_PROP,
+ NULL);
+ uint32_t handle = nvdimm_slot_to_handle(slot);
+
+ nfit_memdev = buf;
+ nfit_memdev->type = cpu_to_le16(NFIT_STRUCTURE_MEMDEV);
+ nfit_memdev->length = cpu_to_le16(sizeof(*nfit_memdev));
+ nfit_memdev->nfit_handle = cpu_to_le32(handle);
+ /* point to nfit_spa. */
+ nfit_memdev->spa_index = cpu_to_le16(nvdimm_slot_to_spa_index(slot));
+ /* point to nfit_dcr. */
+ nfit_memdev->dcr_index = cpu_to_le16(nvdimm_slot_to_dcr_index(slot));
+ nfit_memdev->region_len = cpu_to_le64(size);
+ nfit_memdev->region_dpa = cpu_to_le64(addr);
+ /* Only one interleave for pmem. */
+ nfit_memdev->interleave_ways = cpu_to_le16(1);
+
+ return sizeof(*nfit_memdev);
+}
+
+static int build_structure_dcr(void *buf, NVDIMMDevice *nvdimm)
+{
+ nfit_dcr *nfit_dcr;
+ int slot = object_property_get_int(OBJECT(nvdimm), DIMM_SLOT_PROP,
+ NULL);
+ uint32_t sn = nvdimm_slot_to_sn(slot);
+
+ nfit_dcr = buf;
+ nfit_dcr->type = cpu_to_le16(NFIT_STRUCTURE_DCR);
+ nfit_dcr->length = cpu_to_le16(sizeof(*nfit_dcr));
+ nfit_dcr->dcr_index = cpu_to_le16(nvdimm_slot_to_dcr_index(slot));
+ nfit_dcr->vendor_id = cpu_to_le16(0x8086);
+ nfit_dcr->device_id = cpu_to_le16(1);
+ nfit_dcr->revision_id = cpu_to_le16(REVSISON_ID);
+ nfit_dcr->serial_number = cpu_to_le32(sn);
+ nfit_dcr->fic = cpu_to_le16(NFIT_FIC1);
+
+ return sizeof(*nfit_dcr);
+}
+
+static void build_device_structure(GSList *device_list, char *buf)
+{
+ buf += sizeof(nfit);
+
+ for (; device_list; device_list = device_list->next) {
+ NVDIMMDevice *nvdimm = device_list->data;
+
+ /* build System Physical Address Range Description Table. */
+ buf += build_structure_spa(buf, nvdimm);
+
+ /*
+ * build Memory Device to System Physical Address Range Mapping
+ * Table.
+ */
+ buf += build_structure_memdev(buf, nvdimm);
+
+ /* build Control Region Descriptor Table. */
+ buf += build_structure_dcr(buf, nvdimm);
+ }
+}
+
+static void build_nfit(GSList *device_list, GArray *table_offsets,
+ GArray *table_data, GArray *linker)
+{
+ size_t total;
+ char *buf;
+ int nfit_start, nr;
+
+ nr = g_slist_length(device_list);
+ total = get_nfit_total_size(nr);
+
+ nfit_start = table_data->len;
+ acpi_add_table(table_offsets, table_data);
+
+ buf = acpi_data_push(table_data, total);
+ build_device_structure(device_list, buf);
This seems fragile. Should build_device_structure overflow
a buffer we'll corrupt memory.
Current code does use acpi_data_push but only for trivial
things like fixed size headers.
Can't you use glib to dynamically append things to table
as they are generated?
Okay, good point, will adjust it in the next version.
+
+ build_header(linker, table_data, (void *)(table_data->data + nfit_start),
+ "NFIT", table_data->len - nfit_start, 1);
+}
+
+void nvdimm_build_acpi_table(NVDIMMState *state, GArray *table_offsets,
+ GArray *table_data, GArray *linker)
+{
+ GSList *device_list = nvdimm_get_built_list();
+
+ if (!memory_region_size(&state->mr)) {
+ assert(!device_list);
+ return;
+ }
+
+ if (device_list) {
+ build_nfit(device_list, table_offsets, table_data, linker);
+ g_slist_free(device_list);
+ }
+}
diff --git a/hw/mem/nvdimm/internal.h b/hw/mem/nvdimm/internal.h
index c4ba750..5551448 100644
--- a/hw/mem/nvdimm/internal.h
+++ b/hw/mem/nvdimm/internal.h
@@ -14,4 +14,17 @@
#define NVDIMM_INTERNAL_H
#define MIN_NAMESPACE_LABEL_SIZE (128UL << 10)
+
+struct uuid_le {
+ uint8_t b[16];
+};
+typedef struct uuid_le uuid_le;
+
+#define UUID_LE(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \
+((uuid_le) \
+{ { (a) & 0xff, ((a) >> 8) & 0xff, ((a) >> 16) & 0xff, ((a) >> 24) & 0xff, \
+ (b) & 0xff, ((b) >> 8) & 0xff, (c) & 0xff, ((c) >> 8) & 0xff, \
+ (d0), (d1), (d2), (d3), (d4), (d5), (d6), (d7) } })
+
Please avoid polluting the global namespace.
Prefix everything with NVDIMM.
Hmm... this include-file, "internal.h", locates at hw/mem/nvdimm/ which
is only used in NVDIMM internal. But your point is good to me, i will carefully
name the stuff defined in a include-file.
+GSList *nvdimm_get_built_list(void);
You are adding an extern function with no comment
about it's purpose anywhere. Pls fix this.
The name isn't pretty. What does "built" mean?
List of what? Is this a device list?
I used the sytle in pc-dimm.c, pc_dimm_built_list(), i will
rename it to nvdimm_device_list() for better match its doing.
#endif
This header is too small to be worth it.
nvdimm_get_built_list seems to be the only interface -
just stick it in the header you have under include.
Other functions are introudced and included into it in later patches,
it includes the internal things shared between nvdimm device, nvdimm ACPI,
nvdimm namespace.
Furthermore, this is a internal include file, it is not bad i think.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html