NFIT is defined in ACPI 6.0: 5.2.25 NVDIMM Firmware Interface Table (NFIT) Currently, we only support PMEM mode. Each device has 3 structures: - SPA structure, defines the PMEM region info - MEM DEV structure, it has the @handle which is used to associate specified ACPI NVDIMM device we will introduce in later patch. Also we can happily ignored the memory device's interleave, the real nvdimm hardware access is hidden behind host - DCR structure, it defines vendor ID used to associate specified vendor nvdimm driver. Since we only implement PMEM mode this time, Command window and Data window are not needed Signed-off-by: Xiao Guangrong <guangrong.xiao@xxxxxxxxxxxxxxx> --- hw/acpi/nvdimm.c | 286 ++++++++++++++++++++++++++++++++++++++++++++++++ hw/i386/acpi-build.c | 10 ++ hw/mem/nvdimm.c | 24 ++++ include/hw/mem/nvdimm.h | 13 +++ 4 files changed, 333 insertions(+) diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c index fd70de2..8d8376c 100644 --- a/hw/acpi/nvdimm.c +++ b/hw/acpi/nvdimm.c @@ -31,6 +31,72 @@ #include "hw/acpi/aml-build.h" #include "hw/mem/nvdimm.h" +#define NVDIMM_UUID_LE(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \ + { (a) & 0xff, ((a) >> 8) & 0xff, ((a) >> 16) & 0xff, ((a) >> 24) & 0xff, \ + (b) & 0xff, ((b) >> 8) & 0xff, (c) & 0xff, ((c) >> 8) & 0xff, \ + (d0), (d1), (d2), (d3), (d4), (d5), (d6), (d7) } + +/* + * This GUID defines a Byte Addressable Persistent Memory (PM) Region. + * Please refer to ACPI 6.0: 5.2.25.1 System Physical Address Range + * Structure. + */ +static const uint8_t nfit_spa_uuid_pm[] = NVDIMM_UUID_LE(0x66f0d379, 0xb4f3, + 0x4074, 0xac, 0x43, 0x0d, 0x33, 0x18, 0xb7, 0x8c, 0xdb); + +/* NFIT Structure Types. */ +enum { + NFIT_STRUCTURE_SPA = 0, + NFIT_STRUCTURE_MEMDEV = 1, + NFIT_STRUCTURE_IDT = 2, + NFIT_STRUCTURE_SMBIOS = 3, + NFIT_STRUCTURE_DCR = 4, + NFIT_STRUCTURE_BDW = 5, + NFIT_STRUCTURE_FLUSH = 6, +}; + +/* + * NVDIMM Firmware Interface Table + * @signature: "NFIT" + * + * It provides information that allows OSPM to enumerate NVDIMM present in + * the platform and associate system physical address ranges created by the + * NVDIMMs. + * + * Detailed info please refer to ACPI 6.0: 5.2.25 NVDIMM Firmware Interface + * Table (NFIT) + */ +struct nfit { + ACPI_TABLE_HEADER_DEF + uint32_t reserved; +} QEMU_PACKED; +typedef struct nfit nfit; + +/* + * Memory mapping attributes for the address range described in system + * physical address range structure. + */ +enum { + EFI_MEMORY_UC = 0x1ULL, + EFI_MEMORY_WC = 0x2ULL, + EFI_MEMORY_WT = 0x4ULL, + EFI_MEMORY_WB = 0x8ULL, + EFI_MEMORY_UCE = 0x10ULL, + EFI_MEMORY_WP = 0x1000ULL, + EFI_MEMORY_RP = 0x2000ULL, + EFI_MEMORY_XP = 0x4000ULL, + EFI_MEMORY_NV = 0x8000ULL, + EFI_MEMORY_MORE_RELIABLE = 0x10000ULL, +}; + +/* + * Control region is strictly for management during hot add/online + * operation. + */ +#define SPA_FLAGS_ADD_ONLINE_ONLY (1) +/* Data in Proximity Domain field is valid. */ +#define SPA_FLAGS_PROXIMITY_VALID (1 << 1) + /* * System Physical Address Range Structure * @@ -76,6 +142,14 @@ struct nfit_memdev { typedef struct nfit_memdev nfit_memdev; /* + * please refer to DSM specification, Chapter 2 NVDIMM Device Specific + * Method (DSM). + */ +#define REVSISON_ID 1 +/* the format interface code supported by DSM specification. */ +#define NFIT_FIC1 0x201 + +/* * NVDIMM Control Region Structure * * It describes the NVDIMM and if applicable, Block Control Window. @@ -141,3 +215,215 @@ void nvdimm_init_memory_state(NVDIMMState *state, MemoryRegion *system_memory, NVDIMM_ACPI_MEM_SIZE); memory_region_add_subregion(system_memory, state->base, &state->mr); } + +/* + * Module serial number is a unique number for each device. We use the + * slot id of NVDIMM device to generate this number so that each device + * associates with a different number. + * + * 0x123456 is a magic number we arbitrarily chose. + */ +static uint32_t nvdimm_slot_to_sn(int slot) +{ + return 0x123456 + slot; +} + +/* + * handle is used to uniquely associate nfit_memdev structure with NVDIMM + * ACPI device - nfit_memdev.nfit_handle matches with the value returned + * by ACPI device _ADR method. + * + * We generate the handle with the slot id of NVDIMM device and reserve + * 0 for NVDIMM root device. + */ +static uint32_t nvdimm_slot_to_handle(int slot) +{ + return slot + 1; +} + +/* + * index uniquely identifies the structure, 0 is reserved which indicates + * that the structure is not valid or the associated structure is not + * present. + * + * Each NVDIMM device needs two indexes, one for nfit_spa and another for + * nfit_dc which are generated by the slot id of NVDIMM device. + */ +static uint16_t nvdimm_slot_to_spa_index(int slot) +{ + return (slot + 1) << 1; +} + +/* See the comment of nvdimm_slot_to_spa_index(). */ +static uint32_t nvdimm_slot_to_dcr_index(int slot) +{ + return nvdimm_slot_to_spa_index(slot) + 1; +} + +/* + * Please refer to ACPI 6.0: 5.2.25.1 System Physical Address Range + * Structure + */ +static void +nvdimm_build_structure_spa(GArray *structures, NVDIMMDevice *nvdimm) +{ + nfit_spa *nfit_spa; + uint64_t addr = object_property_get_int(OBJECT(nvdimm), DIMM_ADDR_PROP, + NULL); + uint64_t size = object_property_get_int(OBJECT(nvdimm), DIMM_SIZE_PROP, + NULL); + uint32_t node = object_property_get_int(OBJECT(nvdimm), DIMM_NODE_PROP, + NULL); + int slot = object_property_get_int(OBJECT(nvdimm), DIMM_SLOT_PROP, + NULL); + + nfit_spa = acpi_data_push(structures, sizeof(*nfit_spa)); + + /* System Physical Address Range Structure */ + nfit_spa->type = cpu_to_le16(NFIT_STRUCTURE_SPA); + nfit_spa->length = cpu_to_le16(sizeof(*nfit_spa)); + nfit_spa->spa_index = cpu_to_le16(nvdimm_slot_to_spa_index(slot)); + + /* + * - Proximity Domain field is valid as NUMA node is valid. + * - Control region is strictly during hot add as all the device + * info, such as SN, index, is associated with slot id. + */ + nfit_spa->flags = cpu_to_le16(SPA_FLAGS_PROXIMITY_VALID | + SPA_FLAGS_ADD_ONLINE_ONLY); + + /* NUMA node. */ + nfit_spa->proximity_domain = cpu_to_le32(node); + /* the region reported as PMEM. */ + memcpy(nfit_spa->type_guid, nfit_spa_uuid_pm, sizeof(nfit_spa_uuid_pm)); + + nfit_spa->spa_base = cpu_to_le64(addr); + nfit_spa->spa_length = cpu_to_le64(size); + + /* It is the PMEM and can be cached as writeback. */ + nfit_spa->mem_attr = cpu_to_le64(EFI_MEMORY_WB | EFI_MEMORY_NV); +} + +/* + * Please refer to ACPI 6.0: 5.2.25.2 Memory Device to System Physical + * Address Range Mapping Structure + */ +static void +nvdimm_build_structure_memdev(GArray *structures, NVDIMMDevice *nvdimm) +{ + nfit_memdev *nfit_memdev; + uint64_t addr = object_property_get_int(OBJECT(nvdimm), DIMM_ADDR_PROP, + NULL); + uint64_t size = object_property_get_int(OBJECT(nvdimm), DIMM_SIZE_PROP, + NULL); + int slot = object_property_get_int(OBJECT(nvdimm), DIMM_SLOT_PROP, + NULL); + uint32_t handle = nvdimm_slot_to_handle(slot); + + nfit_memdev = acpi_data_push(structures, sizeof(*nfit_memdev)); + + /* Memory Device to System Address Range Map Structure */ + nfit_memdev->type = cpu_to_le16(NFIT_STRUCTURE_MEMDEV); + nfit_memdev->length = cpu_to_le16(sizeof(*nfit_memdev)); + nfit_memdev->nfit_handle = cpu_to_le32(handle); + + /* + * associate memory device with System Physical Address Range + * Structure. + */ + nfit_memdev->spa_index = cpu_to_le16(nvdimm_slot_to_spa_index(slot)); + /* associate memory device with Control Region Structure. */ + nfit_memdev->dcr_index = cpu_to_le16(nvdimm_slot_to_dcr_index(slot)); + + /* The memory region on the device. */ + nfit_memdev->region_len = cpu_to_le64(size); + nfit_memdev->region_dpa = cpu_to_le64(addr); + + /* Only one interleave for PMEM. */ + nfit_memdev->interleave_ways = cpu_to_le16(1); +} + +/* Please refer to ACPI 6.0: 5.2.25.5 NVDIMM Control Region Structure */ +static void nvdimm_build_structure_dcr(GArray *structures, NVDIMMDevice *nvdimm) +{ + nfit_dcr *nfit_dcr; + int slot = object_property_get_int(OBJECT(nvdimm), DIMM_SLOT_PROP, + NULL); + uint32_t sn = nvdimm_slot_to_sn(slot); + + nfit_dcr = acpi_data_push(structures, sizeof(*nfit_dcr)); + + /* NVDIMM Control Region Structure */ + nfit_dcr->type = cpu_to_le16(NFIT_STRUCTURE_DCR); + nfit_dcr->length = cpu_to_le16(sizeof(*nfit_dcr)); + nfit_dcr->dcr_index = cpu_to_le16(nvdimm_slot_to_dcr_index(slot)); + + /* vendor: Intel. */ + nfit_dcr->vendor_id = cpu_to_le16(0x8086); + nfit_dcr->device_id = cpu_to_le16(1); + + /* The _DSM method is following Intel's DSM specification. */ + nfit_dcr->revision_id = cpu_to_le16(REVSISON_ID); + nfit_dcr->serial_number = cpu_to_le32(sn); + nfit_dcr->fic = cpu_to_le16(NFIT_FIC1); +} + +static GArray *nvdimm_build_device_structure(GSList *device_list) +{ + GArray *structures = g_array_new(false, true /* clear */, 1); + + for (; device_list; device_list = device_list->next) { + NVDIMMDevice *nvdimm = device_list->data; + + /* build System Physical Address Range Structure. */ + nvdimm_build_structure_spa(structures, nvdimm); + + /* + * build Memory Device to System Physical Address Range Mapping + * Structure. + */ + nvdimm_build_structure_memdev(structures, nvdimm); + + /* build NVDIMM Control Region Structure. */ + nvdimm_build_structure_dcr(structures, nvdimm); + } + + return structures; +} + +static void nvdimm_build_nfit(GSList *device_list, GArray *table_offsets, + GArray *table_data, GArray *linker) +{ + GArray *structures = nvdimm_build_device_structure(device_list); + void *header; + + acpi_add_table(table_offsets, table_data); + + /* NFIT header. */ + header = acpi_data_push(table_data, sizeof(nfit)); + + /* NVDIMM device structures. */ + g_array_append_vals(table_data, structures->data, structures->len); + + build_header(linker, table_data, header, "NFIT", + sizeof(nfit) + structures->len, 1); + g_array_free(structures, true); +} + +void nvdimm_build_acpi(NVDIMMState *state, GArray *table_offsets, + GArray *table_data, GArray *linker) +{ + GSList *device_list = nvdimm_get_plugged_device_list(); + + if (!memory_region_size(&state->mr)) { + assert(!device_list); + return; + } + + if (!device_list) { + return; + } + + nvdimm_build_nfit(device_list, table_offsets, table_data, linker); + g_slist_free(device_list); +} diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 95e0c65..7531537 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -1658,6 +1658,13 @@ static bool acpi_has_iommu(void) return intel_iommu && !ambiguous; } +static NVDIMMState *acpi_get_nvdimm_state(void) +{ + PCMachineState *pcms = PC_MACHINE(qdev_get_machine()); + + return &pcms->nvdimm_memory; +} + static void acpi_build(PcGuestInfo *guest_info, AcpiBuildTables *tables) { @@ -1742,6 +1749,9 @@ void acpi_build(PcGuestInfo *guest_info, AcpiBuildTables *tables) build_dmar_q35(tables_blob, tables->linker); } + nvdimm_build_acpi(acpi_get_nvdimm_state(), table_offsets, tables_blob, + tables->linker); + /* Add tables supplied by user (if any) */ for (u = acpi_table_first(); u; u = acpi_table_next(u)) { unsigned len = acpi_table_len(u); diff --git a/hw/mem/nvdimm.c b/hw/mem/nvdimm.c index 51494b6..2d121f6 100644 --- a/hw/mem/nvdimm.c +++ b/hw/mem/nvdimm.c @@ -25,6 +25,30 @@ #include "qapi/visitor.h" #include "hw/mem/nvdimm.h" +static int nvdimm_plugged_device_list(Object *obj, void *opaque) +{ + GSList **list = opaque; + + if (object_dynamic_cast(obj, TYPE_NVDIMM)) { + NVDIMMDevice *nvdimm = NVDIMM(obj); + + if (memory_region_is_mapped(&nvdimm->nvdimm_mr)) { + *list = g_slist_append(*list, DEVICE(obj)); + } + } + + object_child_foreach(obj, nvdimm_plugged_device_list, opaque); + return 0; +} + +GSList *nvdimm_get_plugged_device_list(void) +{ + GSList *list = NULL; + + object_child_foreach(qdev_get_machine(), nvdimm_plugged_device_list, &list); + return list; +} + static MemoryRegion *nvdimm_get_memory_region(DIMMDevice *dimm) { NVDIMMDevice *nvdimm = NVDIMM(dimm); diff --git a/include/hw/mem/nvdimm.h b/include/hw/mem/nvdimm.h index 8e31983..dc77a1f 100644 --- a/include/hw/mem/nvdimm.h +++ b/include/hw/mem/nvdimm.h @@ -68,6 +68,15 @@ struct NVDIMMDevice { typedef struct NVDIMMDevice NVDIMMDevice; /* + * inquire plugged NVDIMM devices and link them into the list which is + * returned to the caller. + * + * Note: it is the caller's responsibility to free the list to avoid + * memory leak. + */ +GSList *nvdimm_get_plugged_device_list(void); + +/* * NVDIMMState: * @base: address in guest address space where NVDIMM ACPI memory begins. * @mr: NVDIMM ACPI memory address space container. @@ -81,4 +90,8 @@ typedef struct NVDIMMState NVDIMMState; /* Initialize the memory region needed by NVDIMM ACPI.*/ void nvdimm_init_memory_state(NVDIMMState *state, MemoryRegion *system_memory, MachineState *machine); + +/* Build NVDIMM ACPI including NFIT, NVDIMM devices, etc. */ +void nvdimm_build_acpi(NVDIMMState *state, GArray *table_offsets, + GArray *table_data, GArray *linker); #endif -- 1.8.3.1 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html