On Sun, Oct 11, 2015 at 11:52:55AM +0800, Xiao Guangrong wrote: > NFIT is defined in ACPI 6.0: 5.2.25 NVDIMM Firmware Interface Table (NFIT) > > Currently, we only support PMEM mode. Each device has 3 structures: > - SPA structure, defines the PMEM region info > > - MEM DEV structure, it has the @handle which is used to associate specified > ACPI NVDIMM device we will introduce in later patch. > Also we can happily ignored the memory device's interleave, the real > nvdimm hardware access is hidden behind host > > - DCR structure, it defines vendor ID used to associate specified vendor > nvdimm driver. Since we only implement PMEM mode this time, Command > window and Data window are not needed > > Signed-off-by: Xiao Guangrong <guangrong.xiao@xxxxxxxxxxxxxxx> > --- > hw/i386/acpi-build.c | 4 + > hw/mem/nvdimm/acpi.c | 209 ++++++++++++++++++++++++++++++++++++++++++++++- > hw/mem/nvdimm/internal.h | 13 +++ > hw/mem/nvdimm/nvdimm.c | 25 ++++++ > include/hw/mem/nvdimm.h | 2 + > 5 files changed, 252 insertions(+), 1 deletion(-) > > diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c > index 95e0c65..c637dc8 100644 > --- a/hw/i386/acpi-build.c > +++ b/hw/i386/acpi-build.c > @@ -1661,6 +1661,7 @@ static bool acpi_has_iommu(void) > static > void acpi_build(PcGuestInfo *guest_info, AcpiBuildTables *tables) > { > + PCMachineState *pcms = PC_MACHINE(qdev_get_machine()); I don't like more code poking at machine directly. I know srat does it, and I don't like it. Any chance you can add acpi_get_nvdumm_info to get all you need from nvdimm state? > GArray *table_offsets; > unsigned facs, ssdt, dsdt, rsdt; > AcpiCpuInfo cpu; > @@ -1742,6 +1743,9 @@ void acpi_build(PcGuestInfo *guest_info, AcpiBuildTables *tables) > build_dmar_q35(tables_blob, tables->linker); > } > > + nvdimm_build_acpi_table(&pcms->nvdimm_memory, table_offsets, tables_blob, > + tables->linker); > + > /* Add tables supplied by user (if any) */ > for (u = acpi_table_first(); u; u = acpi_table_next(u)) { > unsigned len = acpi_table_len(u); > diff --git a/hw/mem/nvdimm/acpi.c b/hw/mem/nvdimm/acpi.c > index b640874..62b1e02 100644 > --- a/hw/mem/nvdimm/acpi.c > +++ b/hw/mem/nvdimm/acpi.c > @@ -32,6 +32,46 @@ > #include "hw/mem/nvdimm.h" > #include "internal.h" > > +static void nfit_spa_uuid_pm(uuid_le *uuid) > +{ > + uuid_le uuid_pm = UUID_LE(0x66f0d379, 0xb4f3, 0x4074, 0xac, 0x43, 0x0d, > + 0x33, 0x18, 0xb7, 0x8c, 0xdb); > + memcpy(uuid, &uuid_pm, sizeof(uuid_pm)); > +} > + Just add a static constant: const uint8_t nfit_spa_uuid[] = {0x79, 0xd3, ..... } then memcpy instead of a wrapper. > +enum { > + NFIT_STRUCTURE_SPA = 0, > + NFIT_STRUCTURE_MEMDEV = 1, > + NFIT_STRUCTURE_IDT = 2, > + NFIT_STRUCTURE_SMBIOS = 3, > + NFIT_STRUCTURE_DCR = 4, > + NFIT_STRUCTURE_BDW = 5, > + NFIT_STRUCTURE_FLUSH = 6, > +}; > + > +enum { > + EFI_MEMORY_UC = 0x1ULL, > + EFI_MEMORY_WC = 0x2ULL, > + EFI_MEMORY_WT = 0x4ULL, > + EFI_MEMORY_WB = 0x8ULL, > + EFI_MEMORY_UCE = 0x10ULL, > + EFI_MEMORY_WP = 0x1000ULL, > + EFI_MEMORY_RP = 0x2000ULL, > + EFI_MEMORY_XP = 0x4000ULL, > + EFI_MEMORY_NV = 0x8000ULL, > + EFI_MEMORY_MORE_RELIABLE = 0x10000ULL, > +}; > + > +/* > + * NVDIMM Firmware Interface Table > + * @signature: "NFIT" > + */ > +struct nfit { > + ACPI_TABLE_HEADER_DEF > + uint32_t reserved; > +} QEMU_PACKED; > +typedef struct nfit nfit; > + > /* System Physical Address Range Structure */ > struct nfit_spa { > uint16_t type; > @@ -40,13 +80,21 @@ struct nfit_spa { > uint16_t flags; > uint32_t reserved; > uint32_t proximity_domain; > - uint8_t type_guid[16]; > + uuid_le type_guid; > uint64_t spa_base; > uint64_t spa_length; > uint64_t mem_attr; > } QEMU_PACKED; > typedef struct nfit_spa nfit_spa; > > +/* > + * Control region is strictly for management during hot add/online > + * operation. > + */ > +#define SPA_FLAGS_ADD_ONLINE_ONLY (1) unused > +/* Data in Proximity Domain field is valid. */ > +#define SPA_FLAGS_PROXIMITY_VALID (1 << 1) > + > /* Memory Device to System Physical Address Range Mapping Structure */ > struct nfit_memdev { > uint16_t type; > @@ -91,12 +139,20 @@ struct nfit_dcr { > } QEMU_PACKED; > typedef struct nfit_dcr nfit_dcr; > > +#define REVSISON_ID 1 > +#define NFIT_FIC1 0x201 > + > static uint64_t nvdimm_device_structure_size(uint64_t slots) > { > /* each nvdimm has three structures. */ > return slots * (sizeof(nfit_spa) + sizeof(nfit_memdev) + sizeof(nfit_dcr)); > } > > +static uint64_t get_nfit_total_size(uint64_t slots) > +{ > + return sizeof(struct nfit) + nvdimm_device_structure_size(slots); > +} > + > static uint64_t nvdimm_acpi_memory_size(uint64_t slots, uint64_t page_size) > { > uint64_t size = nvdimm_device_structure_size(slots); > @@ -118,3 +174,154 @@ void nvdimm_init_memory_state(NVDIMMState *state, MemoryRegion*system_memory, > NVDIMM_ACPI_MEM_SIZE); > memory_region_add_subregion(system_memory, state->base, &state->mr); > } > + > +static uint32_t nvdimm_slot_to_sn(int slot) > +{ > + return 0x123456 + slot; > +} > + > +static uint32_t nvdimm_slot_to_handle(int slot) > +{ > + return slot + 1; > +} > + > +static uint16_t nvdimm_slot_to_spa_index(int slot) > +{ > + return (slot + 1) << 1; > +} > + > +static uint32_t nvdimm_slot_to_dcr_index(int slot) > +{ > + return nvdimm_slot_to_spa_index(slot) + 1; > +} > + There are lots of magic numbers here with no comments. Pls explain the logic in code comments. > +static int build_structure_spa(void *buf, NVDIMMDevice *nvdimm) Pls document the specific chapter that this implements. same everywhere else. > +{ > + nfit_spa *nfit_spa; > + uint64_t addr = object_property_get_int(OBJECT(nvdimm), DIMM_ADDR_PROP, > + NULL); > + uint64_t size = object_property_get_int(OBJECT(nvdimm), DIMM_SIZE_PROP, > + NULL); > + uint32_t node = object_property_get_int(OBJECT(nvdimm), DIMM_NODE_PROP, > + NULL); > + int slot = object_property_get_int(OBJECT(nvdimm), DIMM_SLOT_PROP, > + NULL); > + > + nfit_spa = buf; > + > + nfit_spa->type = cpu_to_le16(NFIT_STRUCTURE_SPA); Don't do these 1-time enums. They are hard to match against spec. nfit_spa->type = cpu_to_le16(0 /* System Physical Address Range Structure */); same everywhere else. > + nfit_spa->length = cpu_to_le16(sizeof(*nfit_spa)); > + nfit_spa->spa_index = cpu_to_le16(nvdimm_slot_to_spa_index(slot)); > + nfit_spa->flags = cpu_to_le16(SPA_FLAGS_PROXIMITY_VALID); > + nfit_spa->proximity_domain = cpu_to_le32(node); > + nfit_spa_uuid_pm(&nfit_spa->type_guid); > + nfit_spa->spa_base = cpu_to_le64(addr); > + nfit_spa->spa_length = cpu_to_le64(size); > + nfit_spa->mem_attr = cpu_to_le64(EFI_MEMORY_WB | EFI_MEMORY_NV); > + > + return sizeof(*nfit_spa); > +} > + > +static int build_structure_memdev(void *buf, NVDIMMDevice *nvdimm) > +{ > + nfit_memdev *nfit_memdev; > + uint64_t addr = object_property_get_int(OBJECT(nvdimm), DIMM_ADDR_PROP, > + NULL); > + uint64_t size = object_property_get_int(OBJECT(nvdimm), DIMM_SIZE_PROP, > + NULL); > + int slot = object_property_get_int(OBJECT(nvdimm), DIMM_SLOT_PROP, > + NULL); > + uint32_t handle = nvdimm_slot_to_handle(slot); > + > + nfit_memdev = buf; > + nfit_memdev->type = cpu_to_le16(NFIT_STRUCTURE_MEMDEV); > + nfit_memdev->length = cpu_to_le16(sizeof(*nfit_memdev)); > + nfit_memdev->nfit_handle = cpu_to_le32(handle); > + /* point to nfit_spa. */ > + nfit_memdev->spa_index = cpu_to_le16(nvdimm_slot_to_spa_index(slot)); > + /* point to nfit_dcr. */ > + nfit_memdev->dcr_index = cpu_to_le16(nvdimm_slot_to_dcr_index(slot)); > + nfit_memdev->region_len = cpu_to_le64(size); > + nfit_memdev->region_dpa = cpu_to_le64(addr); > + /* Only one interleave for pmem. */ > + nfit_memdev->interleave_ways = cpu_to_le16(1); > + > + return sizeof(*nfit_memdev); > +} > + > +static int build_structure_dcr(void *buf, NVDIMMDevice *nvdimm) > +{ > + nfit_dcr *nfit_dcr; > + int slot = object_property_get_int(OBJECT(nvdimm), DIMM_SLOT_PROP, > + NULL); > + uint32_t sn = nvdimm_slot_to_sn(slot); > + > + nfit_dcr = buf; > + nfit_dcr->type = cpu_to_le16(NFIT_STRUCTURE_DCR); > + nfit_dcr->length = cpu_to_le16(sizeof(*nfit_dcr)); > + nfit_dcr->dcr_index = cpu_to_le16(nvdimm_slot_to_dcr_index(slot)); > + nfit_dcr->vendor_id = cpu_to_le16(0x8086); > + nfit_dcr->device_id = cpu_to_le16(1); > + nfit_dcr->revision_id = cpu_to_le16(REVSISON_ID); > + nfit_dcr->serial_number = cpu_to_le32(sn); > + nfit_dcr->fic = cpu_to_le16(NFIT_FIC1); > + > + return sizeof(*nfit_dcr); > +} > + > +static void build_device_structure(GSList *device_list, char *buf) > +{ > + buf += sizeof(nfit); > + > + for (; device_list; device_list = device_list->next) { > + NVDIMMDevice *nvdimm = device_list->data; > + > + /* build System Physical Address Range Description Table. */ > + buf += build_structure_spa(buf, nvdimm); > + > + /* > + * build Memory Device to System Physical Address Range Mapping > + * Table. > + */ > + buf += build_structure_memdev(buf, nvdimm); > + > + /* build Control Region Descriptor Table. */ > + buf += build_structure_dcr(buf, nvdimm); > + } > +} > + > +static void build_nfit(GSList *device_list, GArray *table_offsets, > + GArray *table_data, GArray *linker) > +{ > + size_t total; > + char *buf; > + int nfit_start, nr; > + > + nr = g_slist_length(device_list); > + total = get_nfit_total_size(nr); > + > + nfit_start = table_data->len; > + acpi_add_table(table_offsets, table_data); > + > + buf = acpi_data_push(table_data, total); > + build_device_structure(device_list, buf); This seems fragile. Should build_device_structure overflow a buffer we'll corrupt memory. Current code does use acpi_data_push but only for trivial things like fixed size headers. Can't you use glib to dynamically append things to table as they are generated? > + > + build_header(linker, table_data, (void *)(table_data->data + nfit_start), > + "NFIT", table_data->len - nfit_start, 1); > +} > + > +void nvdimm_build_acpi_table(NVDIMMState *state, GArray *table_offsets, > + GArray *table_data, GArray *linker) > +{ > + GSList *device_list = nvdimm_get_built_list(); > + > + if (!memory_region_size(&state->mr)) { > + assert(!device_list); > + return; > + } > + > + if (device_list) { > + build_nfit(device_list, table_offsets, table_data, linker); > + g_slist_free(device_list); > + } > +} > diff --git a/hw/mem/nvdimm/internal.h b/hw/mem/nvdimm/internal.h > index c4ba750..5551448 100644 > --- a/hw/mem/nvdimm/internal.h > +++ b/hw/mem/nvdimm/internal.h > @@ -14,4 +14,17 @@ > #define NVDIMM_INTERNAL_H > > #define MIN_NAMESPACE_LABEL_SIZE (128UL << 10) > + > +struct uuid_le { > + uint8_t b[16]; > +}; > +typedef struct uuid_le uuid_le; > + > +#define UUID_LE(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \ > +((uuid_le) \ > +{ { (a) & 0xff, ((a) >> 8) & 0xff, ((a) >> 16) & 0xff, ((a) >> 24) & 0xff, \ > + (b) & 0xff, ((b) >> 8) & 0xff, (c) & 0xff, ((c) >> 8) & 0xff, \ > + (d0), (d1), (d2), (d3), (d4), (d5), (d6), (d7) } }) > + Please avoid polluting the global namespace. Prefix everything with NVDIMM. > +GSList *nvdimm_get_built_list(void); You are adding an extern function with no comment about it's purpose anywhere. Pls fix this. The name isn't pretty. What does "built" mean? List of what? Is this a device list? > #endif This header is too small to be worth it. nvdimm_get_built_list seems to be the only interface - just stick it in the header you have under include. > diff --git a/hw/mem/nvdimm/nvdimm.c b/hw/mem/nvdimm/nvdimm.c > index 0850e82..bc8c577 100644 > --- a/hw/mem/nvdimm/nvdimm.c > +++ b/hw/mem/nvdimm/nvdimm.c > @@ -26,6 +26,31 @@ > #include "hw/mem/nvdimm.h" > #include "internal.h" > > +static int nvdimm_built_list(Object *obj, void *opaque) > +{ > + GSList **list = opaque; > + > + if (object_dynamic_cast(obj, TYPE_NVDIMM)) { > + DeviceState *dev = DEVICE(obj); > + > + /* only realized NVDIMMs matter */ > + if (dev->realized) { > + *list = g_slist_append(*list, dev); > + } > + } > + > + object_child_foreach(obj, nvdimm_built_list, opaque); > + return 0; > +} > + > +GSList *nvdimm_get_built_list(void) > +{ > + GSList *list = NULL; > + > + object_child_foreach(qdev_get_machine(), nvdimm_built_list, &list); > + return list; > +} > + > static MemoryRegion *nvdimm_get_memory_region(DIMMDevice *dimm) > { > NVDIMMDevice *nvdimm = NVDIMM(dimm); > diff --git a/include/hw/mem/nvdimm.h b/include/hw/mem/nvdimm.h > index aa95961..0a6bda4 100644 > --- a/include/hw/mem/nvdimm.h > +++ b/include/hw/mem/nvdimm.h > @@ -49,4 +49,6 @@ typedef struct NVDIMMState NVDIMMState; > > void nvdimm_init_memory_state(NVDIMMState *state, MemoryRegion*system_memory, > MachineState *machine , uint64_t page_size); > +void nvdimm_build_acpi_table(NVDIMMState *state, GArray *table_offsets, > + GArray *table_data, GArray *linker); > #endif > -- > 1.8.3.1 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html