On Tue, 2019-12-03 at 14:46 +1100, Alastair D'Silva wrote: > From: Alastair D'Silva <alastair@xxxxxxxxxxx> > > This driver exposes LPC memory on OpenCAPI SCM cards > as an NVDIMM, allowing the existing nvram infrastructure > to be used. > > Namespace metadata is stored on the media itself, so > scm_reserve_metadata() maps 1 section's worth of PMEM storage > at the start to hold this. The rest of the PMEM range is registered > with libnvdimm as an nvdimm. scm_ndctl_config_read/write/size() > provide > callbacks to libnvdimm to access the metadata. > > Signed-off-by: Alastair D'Silva <alastair@xxxxxxxxxxx> > --- > drivers/nvdimm/Kconfig | 2 + > drivers/nvdimm/Makefile | 2 +- > drivers/nvdimm/ocxl/Kconfig | 15 + > drivers/nvdimm/ocxl/Makefile | 7 + > drivers/nvdimm/ocxl/scm.c | 519 > +++++++++++++++++++++++++++++ > drivers/nvdimm/ocxl/scm_internal.h | 28 ++ > 6 files changed, 572 insertions(+), 1 deletion(-) > create mode 100644 drivers/nvdimm/ocxl/Kconfig > create mode 100644 drivers/nvdimm/ocxl/Makefile > create mode 100644 drivers/nvdimm/ocxl/scm.c > create mode 100644 drivers/nvdimm/ocxl/scm_internal.h > > diff --git a/drivers/nvdimm/Kconfig b/drivers/nvdimm/Kconfig > index 36af7af6b7cf..d1bab36da61c 100644 > --- a/drivers/nvdimm/Kconfig > +++ b/drivers/nvdimm/Kconfig > @@ -130,4 +130,6 @@ config NVDIMM_TEST_BUILD > core devm_memremap_pages() implementation and other > infrastructure. > > +source "drivers/nvdimm/ocxl/Kconfig" > + > endif > diff --git a/drivers/nvdimm/Makefile b/drivers/nvdimm/Makefile > index 29203f3d3069..e33492128042 100644 > --- a/drivers/nvdimm/Makefile > +++ b/drivers/nvdimm/Makefile > @@ -1,5 +1,5 @@ > # SPDX-License-Identifier: GPL-2.0 > -obj-$(CONFIG_LIBNVDIMM) += libnvdimm.o > +obj-$(CONFIG_LIBNVDIMM) += libnvdimm.o ocxl/ > obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o > obj-$(CONFIG_ND_BTT) += nd_btt.o > obj-$(CONFIG_ND_BLK) += nd_blk.o > diff --git a/drivers/nvdimm/ocxl/Kconfig > b/drivers/nvdimm/ocxl/Kconfig > new file mode 100644 > index 000000000000..24099b300f5e > --- /dev/null > +++ b/drivers/nvdimm/ocxl/Kconfig > @@ -0,0 +1,15 @@ > +# SPDX-License-Identifier: GPL-2.0-only > +if LIBNVDIMM > + > +config OCXL_SCM > + tristate "OpenCAPI Storage Class Memory" > + depends on LIBNVDIMM && PPC_POWERNV && PCI && EEH > + select ZONE_DEVICE > + select OCXL > + help > + Exposes devices that implement the OpenCAPI Storage Class > Memory > + specification as persistent memory regions. > + > + Select N if unsure. > + > +endif > diff --git a/drivers/nvdimm/ocxl/Makefile > b/drivers/nvdimm/ocxl/Makefile > new file mode 100644 > index 000000000000..74a1bd98848e > --- /dev/null > +++ b/drivers/nvdimm/ocxl/Makefile > @@ -0,0 +1,7 @@ > +# SPDX-License-Identifier: GPL-2.0 > + > +ccflags-$(CONFIG_PPC_WERROR) += -Werror > + > +obj-$(CONFIG_OCXL_SCM) += ocxlscm.o > + > +ocxlscm-y := scm.o > diff --git a/drivers/nvdimm/ocxl/scm.c b/drivers/nvdimm/ocxl/scm.c > new file mode 100644 > index 000000000000..571058a9e7b8 > --- /dev/null > +++ b/drivers/nvdimm/ocxl/scm.c > @@ -0,0 +1,519 @@ > +// SPDX-License-Identifier: GPL-2.0+ > +// Copyright 2019 IBM Corp. > + > +/* > + * A driver for Storage Class Memory, connected via OpenCAPI > + */ > + > +#include <linux/module.h> > +#include <misc/ocxl.h> > +#include <linux/ndctl.h> > +#include <linux/mm_types.h> > +#include <linux/memory_hotplug.h> > +#include "scm_internal.h" > + > + > +static const struct pci_device_id scm_pci_tbl[] = { > + { PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x0625), }, > + { } > +}; > + > +MODULE_DEVICE_TABLE(pci, scm_pci_tbl); > + > +#define SCM_NUM_MINORS 256 // Total to reserve > + > +static dev_t scm_dev; > +static struct class *scm_class; > +static struct mutex minors_idr_lock; > +static struct idr minors_idr; > + > +static const struct attribute_group *scm_pmem_attribute_groups[] = { > + &nvdimm_bus_attribute_group, > + NULL, > +}; > + > +static const struct attribute_group > *scm_pmem_region_attribute_groups[] = { > + &nd_region_attribute_group, > + &nd_device_attribute_group, > + &nd_mapping_attribute_group, > + &nd_numa_attribute_group, > + NULL, > +}; > + > +/** > + * scm_ndctl_config_write() - Handle a ND_CMD_SET_CONFIG_DATA > command from ndctl > + * @scm_data: the SCM metadata > + * @command: the incoming data to write > + * Return: 0 on success, negative on failure > + */ > +static int scm_ndctl_config_write(struct scm_data *scm_data, > + struct nd_cmd_set_config_hdr > *command) > +{ > + if (command->in_offset + command->in_length > > SCM_LABEL_AREA_SIZE) > + return -EINVAL; > + > + memcpy_flushcache(scm_data->metadata_addr + command->in_offset, > command->in_buf, > + command->in_length); > + > + return 0; > +} > + > +/** > + * scm_ndctl_config_read() - Handle a ND_CMD_GET_CONFIG_DATA command > from ndctl > + * @scm_data: the SCM metadata > + * @command: the read request > + * Return: 0 on success, negative on failure > + */ > +static int scm_ndctl_config_read(struct scm_data *scm_data, > + struct nd_cmd_get_config_data_hdr > *command) > +{ > + if (command->in_offset + command->in_length > > SCM_LABEL_AREA_SIZE) > + return -EINVAL; > + > + memcpy_mcsafe(command->out_buf, scm_data->metadata_addr + > command->in_offset, > + command->in_length); > + > + return 0; > +} > + > +/** > + * scm_ndctl_config_size() - Handle a ND_CMD_GET_CONFIG_SIZE command > from ndctl > + * @scm_data: the SCM metadata > + * @command: the read request > + * Return: 0 on success, negative on failure > + */ > +static int scm_ndctl_config_size(struct nd_cmd_get_config_size > *command) > +{ > + command->status = 0; > + command->config_size = SCM_LABEL_AREA_SIZE; > + command->max_xfer = PAGE_SIZE; > + > + return 0; > +} > + > +static int scm_ndctl(struct nvdimm_bus_descriptor *nd_desc, > + struct nvdimm *nvdimm, > + unsigned int cmd, void *buf, unsigned int buf_len, > int *cmd_rc) > +{ > + struct scm_data *scm_data = container_of(nd_desc, struct > scm_data, bus_desc); > + > + switch (cmd) { > + case ND_CMD_GET_CONFIG_SIZE: > + *cmd_rc = scm_ndctl_config_size(buf); > + return 0; > + > + case ND_CMD_GET_CONFIG_DATA: > + *cmd_rc = scm_ndctl_config_read(scm_data, buf); > + return 0; > + > + case ND_CMD_SET_CONFIG_DATA: > + *cmd_rc = scm_ndctl_config_write(scm_data, buf); > + return 0; > + > + default: > + return -ENOTTY; > + } > +} > + > +static ssize_t serial_show(struct device *dev, > + struct device_attribute *attr, char *buf) > +{ > + struct nvdimm *nvdimm = to_nvdimm(dev); > + struct scm_data *scm_data = nvdimm_provider_data(nvdimm); > + const struct ocxl_fn_config *config = > ocxl_function_config(scm_data->ocxl_fn); > + > + return sprintf(buf, "0x%llx\n", config->serial); > +} > +static DEVICE_ATTR_RO(serial); > + > +static struct attribute *scm_dimm_attributes[] = { > + &dev_attr_serial.attr, > + NULL, > +}; > + > +static umode_t scm_dimm_attr_visible(struct kobject *kobj, > + struct attribute *a, int n) > +{ > + return a->mode; > +} > + > +static const struct attribute_group scm_dimm_attribute_group = { > + .name = "ocxl", > + .attrs = scm_dimm_attributes, > + .is_visible = scm_dimm_attr_visible, > +}; > + > +static const struct attribute_group *scm_dimm_attribute_groups[] = { > + &nvdimm_attribute_group, > + &nd_device_attribute_group, > + &scm_dimm_attribute_group, > + NULL, > +}; As Aneesh Kumar has pointed out, these attribute groups should be dropped when the following patch is accepted: https://patchwork.kernel.org/patch/11248491/ > + > +/** > + * scm_reserve_metadata() - Reserve space for nvdimm metadata > + * @scm_data: The SCM device data > + * @lpc_mem: The resource representing the LPC memory of the SCM > device > + */ > +static int scm_reserve_metadata(struct scm_data *scm_data, > + struct resource *lpc_mem) > +{ > + scm_data->metadata_addr = devm_memremap(&scm_data->dev, > lpc_mem->start, > + SCM_LABEL_AREA_SIZE, > MEMREMAP_WB); > + if (IS_ERR(scm_data->metadata_addr)) > + return PTR_ERR(scm_data->metadata_addr); > + > + return 0; > +} > + > +/** > + * scm_register_lpc_mem() - Discover persistent memory on a device > and register it with the NVDIMM subsystem > + * @scm_data: The SCM device data > + * Return: 0 on success > + */ > +static int scm_register_lpc_mem(struct scm_data *scm_data) > +{ > + struct nd_region_desc region_desc; > + struct nd_mapping_desc nd_mapping_desc; > + struct resource *lpc_mem; > + const struct ocxl_afu_config *config; > + const struct ocxl_fn_config *fn_config; > + int rc; > + unsigned long nvdimm_cmd_mask = 0; > + unsigned long nvdimm_flags = 0; > + int target_node; > + char serial[16+1]; > + > + // Set up the reserved metadata area > + rc = ocxl_afu_map_lpc_mem(scm_data->ocxl_afu); > + if (rc < 0) > + return rc; > + > + lpc_mem = ocxl_afu_lpc_mem(scm_data->ocxl_afu); > + if (lpc_mem == NULL || lpc_mem->start == 0) > + return -EINVAL; > + > + config = ocxl_afu_config(scm_data->ocxl_afu); > + fn_config = ocxl_function_config(scm_data->ocxl_fn); > + > + rc = scm_reserve_metadata(scm_data, lpc_mem); > + if (rc) > + return rc; > + > + scm_data->bus_desc.attr_groups = scm_pmem_attribute_groups; > + scm_data->bus_desc.provider_name = "ocxl-scm"; > + scm_data->bus_desc.ndctl = scm_ndctl; > + scm_data->bus_desc.module = THIS_MODULE; > + > + scm_data->nvdimm_bus = nvdimm_bus_register(&scm_data->dev, > + &scm_data->bus_desc); > + if (!scm_data->nvdimm_bus) > + return -EINVAL; > + > + scm_data->scm_res.start = (u64)lpc_mem->start + > SCM_LABEL_AREA_SIZE; > + scm_data->scm_res.end = (u64)lpc_mem->start + config- > >lpc_mem_size - 1; > + scm_data->scm_res.name = "SCM persistent memory"; > + > + set_bit(ND_CMD_GET_CONFIG_SIZE, &nvdimm_cmd_mask); > + set_bit(ND_CMD_GET_CONFIG_DATA, &nvdimm_cmd_mask); > + set_bit(ND_CMD_SET_CONFIG_DATA, &nvdimm_cmd_mask); > + > + set_bit(NDD_ALIASING, &nvdimm_flags); > + > + snprintf(serial, sizeof(serial), "%llx", fn_config->serial); > + nd_mapping_desc.nvdimm = nvdimm_create(scm_data->nvdimm_bus, > scm_data, > + scm_dimm_attribute_groups, > + nvdimm_flags, nvdimm_cmd_mask, > + 0, NULL); > + if (!nd_mapping_desc.nvdimm) > + return -ENOMEM; > + > + if (nvdimm_bus_check_dimm_count(scm_data->nvdimm_bus, 1)) > + return -EINVAL; > + > + nd_mapping_desc.start = scm_data->scm_res.start; > + nd_mapping_desc.size = resource_size(&scm_data->scm_res); > + nd_mapping_desc.position = 0; > + > + scm_data->nd_set.cookie1 = fn_config->serial + 1; // allow for > empty serial > + scm_data->nd_set.cookie2 = fn_config->serial + 1; > + > + target_node = of_node_to_nid(scm_data->pdev->dev.of_node); > + > + memset(®ion_desc, 0, sizeof(region_desc)); > + region_desc.res = &scm_data->scm_res; > + region_desc.attr_groups = scm_pmem_region_attribute_groups; > + region_desc.numa_node = NUMA_NO_NODE; > + region_desc.target_node = target_node; > + region_desc.num_mappings = 1; > + region_desc.mapping = &nd_mapping_desc; > + region_desc.nd_set = &scm_data->nd_set; > + > + set_bit(ND_REGION_PAGEMAP, ®ion_desc.flags); > + /* > + * NB: libnvdimm copies the data from ndr_desc into it's own > + * structures so passing a stack pointer is fine. > + */ > + scm_data->nd_region = nvdimm_pmem_region_create(scm_data- > >nvdimm_bus, > + ®ion_desc); > + if (!scm_data->nd_region) > + return -EINVAL; > + > + dev_info(&scm_data->dev, > + "Onlining %lluMB of persistent memory\n", > + nd_mapping_desc.size / SZ_1M); > + > + return 0; > +} > + > +/** > + * allocate_scm_minor() - Allocate a minor number to use for an SCM > device > + * @scm_data: The SCM device to associate the minor with > + * Return: the allocated minor number > + */ > +static int allocate_scm_minor(struct scm_data *scm_data) > +{ > + int minor; > + > + mutex_lock(&minors_idr_lock); > + minor = idr_alloc(&minors_idr, scm_data, 0, SCM_NUM_MINORS, > GFP_KERNEL); > + mutex_unlock(&minors_idr_lock); > + return minor; > +} > + > +static void free_scm_minor(struct scm_data *scm_data) > +{ > + mutex_lock(&minors_idr_lock); > + idr_remove(&minors_idr, MINOR(scm_data->dev.devt)); > + mutex_unlock(&minors_idr_lock); > +} > + > +/** > + * free_scm() - Free all members of an SCM struct > + * @scm_data: the SCM metadata to clear > + */ > +static void free_scm(struct scm_data *scm_data) > +{ > + int rc; > + > + if (scm_data->nvdimm_bus) > + nvdimm_bus_unregister(scm_data->nvdimm_bus); > + > + free_scm_minor(scm_data); > + > + if (scm_data->metadata_addr) > + devm_memunmap(&scm_data->dev, scm_data->metadata_addr); > + > + if (scm_data->ocxl_context) { > + rc = ocxl_context_detach(scm_data->ocxl_context); > + if (rc == -EBUSY) > + dev_warn(&scm_data->dev, "Timeout detaching > ocxl context\n"); > + else > + ocxl_context_free(scm_data->ocxl_context); > + > + } > + > + if (scm_data->ocxl_afu) > + ocxl_afu_put(scm_data->ocxl_afu); > + > + if (scm_data->ocxl_fn) > + ocxl_function_close(scm_data->ocxl_fn); > + > + kfree(scm_data); > +} > + > +/** > + * free_scm_dev - Free an SCM device > + * @dev: The device struct > + */ > +static void free_scm_dev(struct device *dev) > +{ > + struct scm_data *scm_data = container_of(dev, struct scm_data, > dev); > + > + free_scm(scm_data); > +} > + > +/** > + * scm_register - Register an SCM device with the kernel > + * @scm_data: the SCM metadata > + * Return: 0 on success, negative on failure > + */ > +static int scm_register(struct scm_data *scm_data) > +{ > + int rc; > + int minor = allocate_scm_minor(scm_data); > + > + if (minor < 0) > + return minor; > + > + scm_data->dev.release = free_scm_dev; > + rc = dev_set_name(&scm_data->dev, "ocxl-scm%d", minor); > + if (rc < 0) > + return rc; > + > + scm_data->dev.devt = MKDEV(MAJOR(scm_dev), minor); > + scm_data->dev.class = scm_class; > + scm_data->dev.parent = &scm_data->pdev->dev; > + > + rc = device_register(&scm_data->dev); > + return rc; > +} > + > +/** > + * scm_remove() - Free an OpenCAPI Storage Class Memory device > + * @pdev: the PCI device information struct > + */ > +static void scm_remove(struct pci_dev *pdev) > +{ > + if (PCI_FUNC(pdev->devfn) == 0) { > + struct scm_function_0 *scm_func_0 = > pci_get_drvdata(pdev); > + > + if (scm_func_0) { > + ocxl_function_close(scm_func_0->ocxl_fn); > + scm_func_0->ocxl_fn = NULL; > + } > + } else { > + struct scm_data *scm_data = pci_get_drvdata(pdev); > + > + if (scm_data) > + device_unregister(&scm_data->dev); > + } > +} > + > +/** > + * scm_probe_function_0 - Set up function 0 for an OpenCAPI Storage > Class Memory device > + * This is important as it enables templates higher than 0 across > all other functions, > + * which in turn enables higher bandwidth accesses > + * which in turn enables higher bandwidth accesses > + * @pdev: the PCI device information struct > + * Return: 0 on success, negative on failure > + */ > +static int scm_probe_function_0(struct pci_dev *pdev) > +{ > + struct scm_function_0 *scm_func_0 = NULL; > + struct ocxl_fn *fn; > + > + scm_func_0 = kzalloc(sizeof(*scm_func_0), GFP_KERNEL); > + if (!scm_func_0) > + return -ENOMEM; > + > + scm_func_0->pdev = pdev; > + fn = ocxl_function_open(pdev); > + if (IS_ERR(fn)) { > + kfree(scm_func_0); > + dev_err(&pdev->dev, "failed to open OCXL function\n"); > + return PTR_ERR(fn); > + } > + scm_func_0->ocxl_fn = fn; > + > + pci_set_drvdata(pdev, scm_func_0); > + > + return 0; > +} > + > +/** > + * scm_probe - Init an OpenCAPI Storage Class Memory device > + * @pdev: the PCI device information struct > + * @ent: The entry from scm_pci_tbl > + * Return: 0 on success, negative on failure > + */ > +static int scm_probe(struct pci_dev *pdev, const struct > pci_device_id *ent) > +{ > + struct scm_data *scm_data = NULL; > + > + if (PCI_FUNC(pdev->devfn) == 0) > + return scm_probe_function_0(pdev); > + else if (PCI_FUNC(pdev->devfn) != 1) > + return 0; > + > + scm_data = kzalloc(sizeof(*scm_data), GFP_KERNEL); > + if (!scm_data) { > + dev_err(&pdev->dev, "Could not allocate SCM > metadata\n"); > + goto err; > + } > + scm_data->pdev = pdev; > + > + pci_set_drvdata(pdev, scm_data); > + > + scm_data->ocxl_fn = ocxl_function_open(pdev); > + if (IS_ERR(scm_data->ocxl_fn)) { > + kfree(scm_data); > + scm_data = NULL; > + pci_set_drvdata(pdev, NULL); > + dev_err(&pdev->dev, "failed to open OCXL function\n"); > + goto err; > + } > + > + scm_data->ocxl_afu = ocxl_function_fetch_afu(scm_data->ocxl_fn, > 0); > + if (scm_data->ocxl_afu == NULL) { > + dev_err(&pdev->dev, "Could not get OCXL AFU from > function\n"); > + goto err; > + } > + > + ocxl_afu_get(scm_data->ocxl_afu); > + > + if (scm_register(scm_data) < 0) { > + dev_err(&pdev->dev, "Could not register SCM device with > the kernel\n"); > + goto err; > + } > + > + // Resources allocated below here are cleaned up in the release > handler > + > + if (ocxl_context_alloc(&scm_data->ocxl_context, scm_data- > >ocxl_afu, NULL)) { > + dev_err(&pdev->dev, "Could not allocate OCXL > context\n"); > + goto err; > + } > + > + if (ocxl_context_attach(scm_data->ocxl_context, 0, NULL)) { > + dev_err(&pdev->dev, "Could not attach ocxl context\n"); > + goto err; > + } > + > + if (scm_register_lpc_mem(scm_data)) { > + dev_err(&pdev->dev, "Could not register OCXL SCM memory > with libnvdimm\n"); > + goto err; > + } > + > + return 0; > + > +err: > + /* > + * Further cleanup is done in the release handler via > free_scm() > + * This allows us to keep the character device live to handle > IOCTLs to > + * investigate issues if the card has an error > + */ > + > + dev_err(&pdev->dev, > + "Error detected, will not register storage class > memory\n"); > + return -ENXIO; > +} > + > +static struct pci_driver scm_pci_driver = { > + .name = "ocxl-scm", > + .id_table = scm_pci_tbl, > + .probe = scm_probe, > + .remove = scm_remove, > + .shutdown = scm_remove, > +}; > + > +static int __init scm_init(void) > +{ > + int rc = 0; > + > + rc = pci_register_driver(&scm_pci_driver); > + if (rc) > + return rc; > + > + return 0; > +} > + > +static void scm_exit(void) > +{ > + pci_unregister_driver(&scm_pci_driver); > +} > + > +module_init(scm_init); > +module_exit(scm_exit); > + > +MODULE_DESCRIPTION("Storage Class Memory"); > +MODULE_LICENSE("GPL"); > diff --git a/drivers/nvdimm/ocxl/scm_internal.h > b/drivers/nvdimm/ocxl/scm_internal.h > new file mode 100644 > index 000000000000..6340012e0f8a > --- /dev/null > +++ b/drivers/nvdimm/ocxl/scm_internal.h > @@ -0,0 +1,28 @@ > +// SPDX-License-Identifier: GPL-2.0+ > +// Copyright 2019 IBM Corp. > + > +#include <linux/pci.h> > +#include <misc/ocxl.h> > +#include <linux/libnvdimm.h> > +#include <linux/mm.h> > + > +#define SCM_LABEL_AREA_SIZE (1UL << PA_SECTION_SHIFT) > + > +struct scm_function_0 { > + struct pci_dev *pdev; > + struct ocxl_fn *ocxl_fn; > +}; > + > +struct scm_data { > + struct device dev; > + struct pci_dev *pdev; > + struct ocxl_fn *ocxl_fn; > + struct nd_interleave_set nd_set; > + struct nvdimm_bus_descriptor bus_desc; > + struct nvdimm_bus *nvdimm_bus; > + struct ocxl_afu *ocxl_afu; > + struct ocxl_context *ocxl_context; > + void *metadata_addr; > + struct resource scm_res; > + struct nd_region *nd_region; > +}; -- Alastair D'Silva Open Source Developer Linux Technology Centre, IBM Australia mob: 0423 762 819