Jonathan Cameron wrote: > On Thu, 23 Jun 2022 21:19:50 -0700 > Dan Williams <dan.j.williams@xxxxxxxxx> wrote: > > > The LIBNVDIMM subsystem is a platform agnostic representation of system > > NVDIMM / persistent memory resources. To date, the CXL subsystem's > > interaction with LIBNVDIMM has been to register an nvdimm-bridge device > > and cxl_nvdimm objects to proxy CXL capabilities into existing LIBNVDIMM > > subsystem mechanics. > > > > With regions the approach is the same. Create a new cxl_pmem_region > > object to proxy CXL region details into a LIBNVDIMM definition. With > > this enabling LIBNVDIMM can partition CXL persistent memory regions with > > legacy namespace labels. A follow-on patch will add CXL region label and > > CXL namespace label support to persist region configurations across > > driver reload / system-reset events. > ah. Now I see why we share ID space with NVDIMMs. Fair enough, I should > have read to the end ;) > > > > > Co-developed-by: Ben Widawsky <bwidawsk@xxxxxxxxxx> > > Signed-off-by: Ben Widawsky <bwidawsk@xxxxxxxxxx> > > Signed-off-by: Dan Williams <dan.j.williams@xxxxxxxxx> > > End of day, so a fairly superficial review on this and I'll hopefully > take a second look at one or two of the earlier patches when time allows. > > Jonathan > > ... > > > +static struct cxl_pmem_region *cxl_pmem_region_alloc(struct cxl_region *cxlr) > > +{ > > + struct cxl_pmem_region *cxlr_pmem = ERR_PTR(-ENXIO); > > Rarely used, so better to set it where it is. Ok. > > > + struct cxl_region_params *p = &cxlr->params; > > + struct device *dev; > > + int i; > > + > > + down_read(&cxl_region_rwsem); > > + if (p->state != CXL_CONFIG_COMMIT) > > + goto out; > > + cxlr_pmem = kzalloc(struct_size(cxlr_pmem, mapping, p->nr_targets), > > + GFP_KERNEL); > > + if (!cxlr_pmem) { > > + cxlr_pmem = ERR_PTR(-ENOMEM); > > + goto out; > > + } > > + > > + cxlr_pmem->hpa_range.start = p->res->start; > > + cxlr_pmem->hpa_range.end = p->res->end; > > + > > + /* Snapshot the region configuration underneath the cxl_region_rwsem */ > > + cxlr_pmem->nr_mappings = p->nr_targets; > > + for (i = 0; i < p->nr_targets; i++) { > > + struct cxl_endpoint_decoder *cxled = p->targets[i]; > > + struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); > > + struct cxl_pmem_region_mapping *m = &cxlr_pmem->mapping[i]; > > + > > + m->cxlmd = cxlmd; > > + get_device(&cxlmd->dev); > > + m->start = cxled->dpa_res->start; > > + m->size = resource_size(cxled->dpa_res); > > + m->position = i; > > + } > > + > > + dev = &cxlr_pmem->dev; > > + cxlr_pmem->cxlr = cxlr; > > + device_initialize(dev); > > + lockdep_set_class(&dev->mutex, &cxl_pmem_region_key); > > + device_set_pm_not_required(dev); > > + dev->parent = &cxlr->dev; > > + dev->bus = &cxl_bus_type; > > + dev->type = &cxl_pmem_region_type; > > +out: > > + up_read(&cxl_region_rwsem); > > + > > + return cxlr_pmem; > > +} > > + > > +static void cxlr_pmem_unregister(void *dev) > > +{ > > + device_unregister(dev); > > +} > > + > > +/** > > + * devm_cxl_add_pmem_region() - add a cxl_region to nd_region bridge > > + * @host: same host as @cxlmd > > Run kernel-doc over these and clean all the warning sup. > Parameter if cxlr not host Fixed. > > > > + * > > + * Return: 0 on success negative error code on failure. > > + */ > > > > /* > > * Unit test builds overrides this to __weak, find the 'strong' version > > diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c > > index b271f6e90b91..4ba7248275ac 100644 > > --- a/drivers/cxl/pmem.c > > +++ b/drivers/cxl/pmem.c > > @@ -7,6 +7,7 @@ > > > > > > > +static int match_cxl_nvdimm(struct device *dev, void *data) > > +{ > > + return is_cxl_nvdimm(dev); > > +} > > + > > +static void unregister_region(void *nd_region) > > Better to give this a more specific name as we have several > unregister_region() functions in CXL now. Ok, unregister_nvdimm_region() it is. > > > +{ > > + struct cxl_nvdimm_bridge *cxl_nvb; > > + struct cxl_pmem_region *cxlr_pmem; > > + int i; > > + > > + cxlr_pmem = nd_region_provider_data(nd_region); > > + cxl_nvb = cxlr_pmem->bridge; > > + device_lock(&cxl_nvb->dev); > > + for (i = 0; i < cxlr_pmem->nr_mappings; i++) { > > + struct cxl_pmem_region_mapping *m = &cxlr_pmem->mapping[i]; > > + struct cxl_nvdimm *cxl_nvd = m->cxl_nvd; > > + > > + if (cxl_nvd->region) { > > + put_device(&cxlr_pmem->dev); > > + cxl_nvd->region = NULL; > > + } > > + } > > + device_unlock(&cxl_nvb->dev); > > + > > + nvdimm_region_delete(nd_region); > > +} > > + > > > + > > +static int cxl_pmem_region_probe(struct device *dev) > > +{ > > + struct nd_mapping_desc mappings[CXL_DECODER_MAX_INTERLEAVE]; > > + struct cxl_pmem_region *cxlr_pmem = to_cxl_pmem_region(dev); > > + struct cxl_region *cxlr = cxlr_pmem->cxlr; > > + struct cxl_pmem_region_info *info = NULL; > > + struct cxl_nvdimm_bridge *cxl_nvb; > > + struct nd_interleave_set *nd_set; > > + struct nd_region_desc ndr_desc; > > + struct cxl_nvdimm *cxl_nvd; > > + struct nvdimm *nvdimm; > > + struct resource *res; > > + int rc = 0, i; > > + > > + cxl_nvb = cxl_find_nvdimm_bridge(&cxlr_pmem->mapping[0].cxlmd->dev); > > + if (!cxl_nvb) { > > + dev_dbg(dev, "bridge not found\n"); > > + return -ENXIO; > > + } > > + cxlr_pmem->bridge = cxl_nvb; > > + > > + device_lock(&cxl_nvb->dev); > > + if (!cxl_nvb->nvdimm_bus) { > > + dev_dbg(dev, "nvdimm bus not found\n"); > > + rc = -ENXIO; > > + goto out; > > + } > > + > > + memset(&mappings, 0, sizeof(mappings)); > > + memset(&ndr_desc, 0, sizeof(ndr_desc)); > > + > > + res = devm_kzalloc(dev, sizeof(*res), GFP_KERNEL); > > + if (!res) { > > + rc = -ENOMEM; > > + goto out; > > + } > > + > > + res->name = "Persistent Memory"; > > + res->start = cxlr_pmem->hpa_range.start; > > + res->end = cxlr_pmem->hpa_range.end; > > + res->flags = IORESOURCE_MEM; > > + res->desc = IORES_DESC_PERSISTENT_MEMORY; > > + > > + rc = insert_resource(&iomem_resource, res); > > + if (rc) > > + goto out; > > + > > + rc = devm_add_action_or_reset(dev, cxlr_pmem_remove_resource, res); > > + if (rc) > > + goto out; > > + > > + ndr_desc.res = res; > > + ndr_desc.provider_data = cxlr_pmem; > > + > > + ndr_desc.numa_node = memory_add_physaddr_to_nid(res->start); > > + ndr_desc.target_node = phys_to_target_node(res->start); > > + if (ndr_desc.target_node == NUMA_NO_NODE) { > > + ndr_desc.target_node = ndr_desc.numa_node; > > + dev_dbg(&cxlr->dev, "changing target node from %d to %d", > > + NUMA_NO_NODE, ndr_desc.target_node); > > + } > > + > > + nd_set = devm_kzalloc(dev, sizeof(*nd_set), GFP_KERNEL); > > + if (!nd_set) { > > + rc = -ENOMEM; > > + goto out; > > + } > > + > > + ndr_desc.memregion = cxlr->id; > > + set_bit(ND_REGION_CXL, &ndr_desc.flags); > > + set_bit(ND_REGION_PERSIST_MEMCTRL, &ndr_desc.flags); > > + > > + info = kmalloc_array(cxlr_pmem->nr_mappings, sizeof(*info), GFP_KERNEL); > > + if (!info) > > + goto out; > > + > > + rc = -ENODEV; > > Personal taste, but I'd much rather see that set in the error handlers > so I can quickly see where it applies. Ok. > > > + for (i = 0; i < cxlr_pmem->nr_mappings; i++) { > > + struct cxl_pmem_region_mapping *m = &cxlr_pmem->mapping[i]; > > + struct cxl_memdev *cxlmd = m->cxlmd; > > + struct cxl_dev_state *cxlds = cxlmd->cxlds; > > + struct device *d; > > + > > + d = device_find_child(&cxlmd->dev, NULL, match_cxl_nvdimm); > > + if (!d) { > > + dev_dbg(dev, "[%d]: %s: no cxl_nvdimm found\n", i, > > + dev_name(&cxlmd->dev)); > > + goto err; > > + } > > + > > + /* safe to drop ref now with bridge lock held */ > > + put_device(d); > > + > > + cxl_nvd = to_cxl_nvdimm(d); > > + nvdimm = dev_get_drvdata(&cxl_nvd->dev); > > + if (!nvdimm) { > > + dev_dbg(dev, "[%d]: %s: no nvdimm found\n", i, > > + dev_name(&cxlmd->dev)); > > + goto err; > > + } > > + cxl_nvd->region = cxlr_pmem; > > + get_device(&cxlr_pmem->dev); > > + m->cxl_nvd = cxl_nvd; > > + mappings[i] = (struct nd_mapping_desc) { > > + .nvdimm = nvdimm, > > + .start = m->start, > > + .size = m->size, > > + .position = i, > > + }; > > + info[i].offset = m->start; > > + info[i].serial = cxlds->serial; > > + } > > + ndr_desc.num_mappings = cxlr_pmem->nr_mappings; > > + ndr_desc.mapping = mappings; > > + > > + /* > > + * TODO enable CXL labels which skip the need for 'interleave-set cookie' > > + */ > > + nd_set->cookie1 = > > + nd_fletcher64(info, sizeof(*info) * cxlr_pmem->nr_mappings, 0); > > + nd_set->cookie2 = nd_set->cookie1; > > + ndr_desc.nd_set = nd_set; > > + > > + cxlr_pmem->nd_region = > > + nvdimm_pmem_region_create(cxl_nvb->nvdimm_bus, &ndr_desc); > > + if (IS_ERR(cxlr_pmem->nd_region)) { > > + rc = PTR_ERR(cxlr_pmem->nd_region); > > + goto err; > > + } else > > no need for else as other branch has gone flying off down to > err. Yup. > > > + rc = devm_add_action_or_reset(dev, unregister_region, > > + cxlr_pmem->nd_region); > > +out: > > Having labels out: and err: where both are used for errors is pretty > confusing naming... Perhaps you are better off just not sharing the > good exit path with any of the error paths. > Ok. > > > + device_unlock(&cxl_nvb->dev); > > + put_device(&cxl_nvb->dev); > > + kfree(info); > > Ok, so safe to do this here, but would be nice to do this > in reverse order of setup with multiple labels so we can avoid > paths that free things that were never created. Doesn't look > like it would hurt much to move kfree(info) above the device_unlock() > and only do that if we have allocated info. Ok, but no need for more labels, unconditionally free'ing info and trying to unwind the mapping references can proceed if @info is initialized to NULL and @i is initialized to 0.