Jonathan Cameron wrote: > On Sun, 05 Feb 2023 17:03:29 -0800 > Dan Williams <dan.j.williams@xxxxxxxxx> wrote: > > > Region autodiscovery is an asynchrounous state machine advanced by > > cxl_port_probe(). After the decoders on an endpoint port are enumerated > > they are scanned for actively enabled instances. Each active decoder is > > flagged for auto-assembly CXL_DECODER_F_AUTO and attached to a region. > > If a region does not already exist for the address range setting of the > > decoder one is created. That creation process may race with other > > decoders of the same region being discovered since cxl_port_probe() is > > asynchronous. A new 'struct cxl_root_decoder' lock, @range_lock, is > > introduced to mitigate that race. > > > > Once all decoders have arrived, "p->nr_targets == p->interleave_ways", > > they are sorted by their relative decode position. The sort algorithm > > involves finding the point in the cxl_port topology where one leg of the > > decode leads to deviceA and the other deviceB. At that point in the > > topology the target order in the 'struct cxl_switch_decoder' indicates > > the relative position of those endpoint decoders in the region. > > > > >From that point the region goes through the same setup and validation > > steps as user-created regions, but instead of programming the decoders > > it validates that driver would have written the same values to the > > decoders as were already present. > > > > Signed-off-by: Dan Williams <dan.j.williams@xxxxxxxxx> > A few comments inline, > > Thanks, > > Jonathan > > > --- > > drivers/cxl/core/hdm.c | 5 > > drivers/cxl/core/port.c | 2 > > drivers/cxl/core/region.c | 496 ++++++++++++++++++++++++++++++++++++++++++++- > > drivers/cxl/cxl.h | 16 + > > drivers/cxl/port.c | 26 ++ > > 5 files changed, 531 insertions(+), 14 deletions(-) > > > > diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c > > index dcc16d7cb8f3..174cddfec6e8 100644 > > --- a/drivers/cxl/core/hdm.c > > +++ b/drivers/cxl/core/hdm.c > > @@ -674,7 +674,7 @@ static int cxl_decoder_reset(struct cxl_decoder *cxld) > > up_read(&cxl_dpa_rwsem); > > > > port->commit_end--; > > - cxld->flags &= ~CXL_DECODER_F_ENABLE; > > + cxld->flags &= ~(CXL_DECODER_F_ENABLE | CXL_DECODER_F_AUTO); > > > > return 0; > > } > > @@ -719,7 +719,7 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, > > > > /* decoders are enabled if committed */ > > if (committed) { > > - cxld->flags |= CXL_DECODER_F_ENABLE; > > + cxld->flags |= CXL_DECODER_F_ENABLE | CXL_DECODER_F_AUTO; > > if (ctrl & CXL_HDM_DECODER0_CTRL_LOCK) > > cxld->flags |= CXL_DECODER_F_LOCK; > > if (FIELD_GET(CXL_HDM_DECODER0_CTRL_TYPE, ctrl)) > > @@ -783,6 +783,7 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, > > return rc; > > } > > *dpa_base += dpa_size + skip; > > + > > :) No comment. Spurious newlines... my old friend. > > > return 0; > > } > > > > > ... > > > > +static int cmp_decode_pos(const void *a, const void *b) > > +{ > > + struct cxl_endpoint_decoder *cxled_a = *(typeof(cxled_a) *)a; > > + struct cxl_endpoint_decoder *cxled_b = *(typeof(cxled_b) *)b; > > + struct cxl_memdev *cxlmd_a = cxled_to_memdev(cxled_a); > > + struct cxl_memdev *cxlmd_b = cxled_to_memdev(cxled_b); > > + struct cxl_port *port_a = cxled_to_port(cxled_a); > > + struct cxl_port *port_b = cxled_to_port(cxled_b); > > + struct cxl_port *iter_a, *iter_b; > > + > > + /* Exit early if any prior sorting failed */ > > + if (cxled_a->pos < 0 || cxled_b->pos < 0) > > + return 0; > > + > > + /* > > + * Walk up the hierarchy to find a shared port, find the decoder > > + * that maps the range, compare the relative position of those > > + * dport mappings. > > + */ > > + for (iter_a = port_a; iter_a; iter_a = next_port(iter_a)) { > > + struct cxl_port *next_a, *next_b, *port; > > + struct cxl_switch_decoder *cxlsd; > > + > > + next_a = next_port(iter_a); > > + for (iter_b = port_b; iter_b; iter_b = next_port(iter_b)) { > > + int a_pos, b_pos, result; > > + struct device *dev; > > + unsigned int seq; > > + > > + next_b = next_port(iter_b); > > + if (next_a != next_b) > > + continue; > > + if (!next_a) > > + goto out; > Perhaps treat this as a 'find the match' then carry on, so as to avoid having the > guts of the handling nested so deep. So something like. I always like a good out-indent, will give it a look. > > struct cxl_switch_decoder *cxlsd; > struct cxl_port *next_a, *next_b, *port; > bool matched = false; // May be simpler test - I'm just lazy > > // possibly pull this out as a utility function so that > // error handling can be tidied up by simply returning > // rather than gotos. > for (iter_a = port_a; iter_a; iter_a = next_port(iter_a)) { > next_a = next_port(iter_a); > for (iter_b = port_b; iter_b; iter_b = next_port(iter_b)) { > int a_pos, b_pos, result; > struct device *dev; > unsigned int seq; > > next_b = next_port(iter_b); > if (next_a != next_b) > continue; > if (!next_a) > goto out; > port = next_a; > found = true; @found can just be @port. > break; > } > if (found) > break; > } > if (!found) > goto out; > > dev = device_find_child(&port->dev, cxled_a, > decoder_match_range); > if (!dev) { > struct range *range = &cxled_a->cxld.hpa_range; > dev_err(port->uport, > "failed to find decoder that maps %#llx-:%#llx\n", > range->start, range->end); > cxled_a->pos = -1; > return 0; > } > > cxlsd = to_cxl_switch_decoder(dev); > do { > seq = read_seqbegin(&cxlsd->target_lock); > find_positions(cxlsd, iter_a, iter_b, &a_pos, &b_pos); > } while (read_seqretry(&cxlsd->target_lock, seq)); > > if (a_pos < 0 || b_pos < 0) { > dev_err(port->uport, > "failed to find shared decoder for %s and %s\n", > dev_name(cxlmd_a->dev.parent), > dev_name(cxlmd_b->dev.parent)); > cxled_a->pos = -1; > result = 0; > //if factored as suggested above then a goto here and drop > // the else would be neater again. Perhaps, I'll take a look. > > } else { > result = a_pos - b_pos; > dev_dbg(port->uport, "%s: %s comes %s %s\n", > dev_name(&cxlsd->cxld.dev), > dev_name(cxlmd_a->dev.parent), > result < 0 ? "before" : "after", > dev_name(cxlmd_b->dev.parent)); > } > put_device(dev); > > return result; > out: > dev_err(cxlmd_a->dev.parent, "failed to find shared port with %s\n", > dev_name(cxlmd_b->dev.parent)); > cxled_a->pos = -1; > return 0; > } > > > > + dev = device_find_child(&port->dev, cxled_a, > > + decoder_match_range); > > + if (!dev) { > > + struct range *range = &cxled_a->cxld.hpa_range; > > + > > + dev_err(port->uport, > > + "failed to find decoder that maps %#llx-:%#llx\n", > > + range->start, range->end); > > + cxled_a->pos = -1; > > + return 0; > > + } > > + > > + cxlsd = to_cxl_switch_decoder(dev); > > + do { > > + seq = read_seqbegin(&cxlsd->target_lock); > > + find_positions(cxlsd, iter_a, iter_b, &a_pos, > > + &b_pos); > > + } while (read_seqretry(&cxlsd->target_lock, seq)); > > + > > + if (a_pos < 0 || b_pos < 0) { > > + dev_err(port->uport, > > + "failed to find shared decoder for %s and %s\n", > > + dev_name(cxlmd_a->dev.parent), > > + dev_name(cxlmd_b->dev.parent)); > > + cxled_a->pos = -1; > > + result = 0; > > + } else { > > + result = a_pos - b_pos; > > + dev_dbg(port->uport, "%s: %s comes %s %s\n", > > + dev_name(&cxlsd->cxld.dev), > > + dev_name(cxlmd_a->dev.parent), > > + result < 0 ? "before" : "after", > > + dev_name(cxlmd_b->dev.parent)); > > + } > > + > > + put_device(dev); > > + > > + return result; > > + } > > + } > > +out: > > + dev_err(cxlmd_a->dev.parent, "failed to find shared port with %s\n", > > + dev_name(cxlmd_b->dev.parent)); > > + cxled_a->pos = -1; > > + return 0; > > +} > > + > > +static int cxl_region_sort_targets(struct cxl_region *cxlr) > > +{ > > + struct cxl_region_params *p = &cxlr->params; > > + int i, rc = 0; > > + > > + sort(p->targets, p->nr_targets, sizeof(p->targets[0]), cmp_decode_pos, > > + NULL); > > + > > + for (i = 0; i < p->nr_targets; i++) { > > + struct cxl_endpoint_decoder *cxled = p->targets[i]; > > + > > + if (cxled->pos < 0) > > + rc = -ENXIO; > > + cxled->pos = i; > > + } > > + > > + dev_dbg(&cxlr->dev, "region sort %s\n", rc ? "failed" : "successful"); > > + return rc; > > +} > > + > > static int cxl_region_attach(struct cxl_region *cxlr, > > struct cxl_endpoint_decoder *cxled, int pos) > > { > > @@ -1350,6 +1572,50 @@ static int cxl_region_attach(struct cxl_region *cxlr, > > return -EINVAL; > > } > > > > + if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) { > > + int i; > > + > > + rc = cxl_region_attach_auto(cxlr, cxled, pos); > > + if (rc) > > + return rc; > > + > > + /* await more targets to arrive... */ > > + if (p->nr_targets < p->interleave_ways) > > + return 0; > > + > > + /* > > + * All targets are here, which implies all PCI enumeration that > > + * affects this region has been completed. Walk the topology to > > + * sort the devices into their relative region decode position. > > + */ > > + rc = cxl_region_sort_targets(cxlr); > > + if (rc) > > + return rc; > > + > > + for (i = 0; i < p->nr_targets; i++) { > > + cxled = p->targets[i]; > > + ep_port = cxled_to_port(cxled); > > + dport = cxl_find_dport_by_dev(root_port, > > + ep_port->host_bridge); > > + rc = cxl_region_attach_position(cxlr, cxlrd, cxled, > > + dport, i); > > + if (rc) > > + return rc; > > + } > > + > > + rc = cxl_region_setup_targets(cxlr); > > + if (rc) > > + return rc; > > + > > + /* > > + * If target setup succeeds in the autodiscovery case > > + * then the region is already committed. > > + */ > > + p->state = CXL_CONFIG_COMMIT; > > + > > + return 0; > > + } > > + > > rc = cxl_region_validate_position(cxlr, cxled, pos); > > if (rc) > > return rc; > > @@ -1500,8 +1766,8 @@ static int detach_target(struct cxl_region *cxlr, int pos) > > return rc; > > } > > > > -static size_t store_targetN(struct cxl_region *cxlr, const char *buf, int pos, > > - size_t len) > > +static ssize_t store_targetN(struct cxl_region *cxlr, const char *buf, int pos, > > + size_t len) > > ? > > > > ... > > > > + > > +void cxl_add_to_region(struct cxl_endpoint_decoder *cxled) > > +{ > > + struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); > > + struct range *hpa = &cxled->cxld.hpa_range; > > + struct cxl_decoder *cxld = &cxled->cxld; > > + struct cxl_root_decoder *cxlrd; > > + struct cxl_region_params *p; > > + struct cxl_region *cxlr; > > + struct cxl_port *root; > > + bool attach = false; > > + struct device *dev; > > + > > + root = find_cxl_root(&cxlmd->dev); > > + if (!root) { > > + dev_err(cxlmd->dev.parent, "%s: failed to map CXL root\n", > > + dev_name(&cxlmd->dev)); > > + return; > > I'm not a fan of muddling on from errors at lower layers of code. > I'd push that decision up to the caller - so return an error code from here. > If caller just wants to ignore it (with appropriate comment on why) then > that is fine by me. Yeah, in fact the caller should already know about the root at this point since this is called from cxl_port_probe() that did that port establishment. Perhaps this can just take @root as an argument. Note though that all errors here are ignored by the caller because there is simply no way to know ahead of time whether this region will succeed in receiving all its members. So the failure scenario is missing regions, not anything that would make the port driver fail to load. > > > > + } > > + > > + dev = device_find_child(&root->dev, &cxld->hpa_range, > > + match_decoder_by_range); > > device_find_child is acquiring a reference to the child I think. > > > + if (!dev) { > > + dev_err(cxlmd->dev.parent, > > + "%s:%s no CXL window for range %#llx:%#llx\n", > > + dev_name(&cxlmd->dev), dev_name(&cxld->dev), > > + cxld->hpa_range.start, cxld->hpa_range.end); > > + goto out; > > + } > > + cxlrd = to_cxl_root_decoder(dev); > Here we've stashed the dev that we have a reference to in cxlrd. > > + > > + mutex_lock(&cxlrd->range_lock); > > + dev = device_find_child(&cxlrd->cxlsd.cxld.dev, hpa, > > + match_region_by_range); > > + if (!dev) > > + cxlr = construct_region(cxlrd, cxled); > > + else > > + cxlr = to_cxl_region(dev); > > + mutex_unlock(&cxlrd->range_lock); > > + > > + if (IS_ERR(cxlr)) > > In this path, do we drop the reference to cxlrd->dev? No, good eye. The reference can be dropped immediately since the root decoder will not be unregistered until the root port is removed and by this point this port has registered itself in the device_unregister() chain when its ancestors are taken out. > > > + goto out; > > + > > + attach_target(cxlr, cxled, -1, TASK_UNINTERRUPTIBLE); > > + > > + down_read(&cxl_region_rwsem); > > + p = &cxlr->params; > > + attach = p->state >= CXL_CONFIG_COMMIT; > > Test is a bit non obvious. In the tree I'm looking at > CXL_CONFIG_COMMIT is last value in the enum. Can switch to "==" > > > > + up_read(&cxl_region_rwsem); > > + > > + if (attach) { > > + int rc = device_attach(&cxlr->dev); > > + > > + /* > > + * If device_attach() fails the range may still be > > + * active via the platform-firmware memory map > > + */ > > + if (rc < 0) > > We also want to know if the no matching driver found case happened so perhaps > if (rc != 1) That can't happen as the only possible driver, cxl_region_driver, is in this same file. I'll add a comment to that effect as its a small layering violation to know that. > > > + dev_err(&cxlr->dev, "failed to enable, range: %pr\n", > > + p->res); > > + } > > + > > + put_device(&cxlr->dev); > > +out: > > + put_device(&root->dev); > > +} > > +EXPORT_SYMBOL_NS_GPL(cxl_add_to_region, CXL); > > + > > static int cxl_region_invalidate_memregion(struct cxl_region *cxlr) > > { > > if (!test_bit(CXL_REGION_F_INCOHERENT, &cxlr->flags)) > > @@ -2103,6 +2554,15 @@ static int cxl_region_invalidate_memregion(struct cxl_region *cxlr) > > return 0; > > } > > > diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h > > index ca76879af1de..9b3765c5c81a 100644 > > --- a/drivers/cxl/cxl.h > > +++ b/drivers/cxl/cxl.h > > @@ -261,6 +261,8 @@ resource_size_t cxl_rcrb_to_component(struct device *dev, > > * cxl_decoder flags that define the type of memory / devices this > > * decoder supports as well as configuration lock status See "CXL 2.0 > > * 8.2.5.12.7 CXL HDM Decoder 0 Control Register" for details. > > + * Additionally indicate whether decoder settings were autodetected, > > + * user customized. > > */ > > #define CXL_DECODER_F_RAM BIT(0) > > #define CXL_DECODER_F_PMEM BIT(1) > > @@ -268,6 +270,7 @@ resource_size_t cxl_rcrb_to_component(struct device *dev, > > #define CXL_DECODER_F_TYPE3 BIT(3) > > #define CXL_DECODER_F_LOCK BIT(4) > > #define CXL_DECODER_F_ENABLE BIT(5) > > +#define CXL_DECODER_F_AUTO BIT(6) > > That's a bit nasty and unexpected - can we rename to make it really clear > it's not a hardware flag at the point of usage. > Or just put it in a a new structure field rather than pushing it in here. I'll move it to an endpoint-flag value since it is only relevant for those. > > #define CXL_DECODER_F_MASK GENMASK(5, 0) > > Hmm. This mask isn't currently used and now has > an odd meaning because of hiding a flag in bit(6). True, it can be deleted by a separate cleanup. > > > ... > > > > > /* > > diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c > > index 5453771bf330..012a0c6f8476 100644 > > --- a/drivers/cxl/port.c > > +++ b/drivers/cxl/port.c > > @@ -30,6 +30,23 @@ static void schedule_detach(void *cxlmd) > > schedule_cxl_memdev_detach(cxlmd); > > } > > > > +static int discover_region(struct device *dev, void *data) > > +{ > > + const unsigned long flags = CXL_DECODER_F_ENABLE | CXL_DECODER_F_AUTO; > > + struct cxl_endpoint_decoder *cxled; > > + > > + if (!is_endpoint_decoder(dev)) > > + return 0; > > + > > + cxled = to_cxl_endpoint_decoder(dev); > > + if ((cxled->cxld.flags & flags) != flags) > > + return 0; > > + > > + cxl_add_to_region(cxled); > > + > > + return 0; > > +} > > + > > static int cxl_port_probe(struct device *dev) > > { > > struct cxl_port *port = to_cxl_port(dev); > > @@ -78,6 +95,15 @@ static int cxl_port_probe(struct device *dev) > > return rc; > > } > > > > + if (!is_cxl_endpoint(port)) > > + return 0; > > + > > + /* > > + * Now that all endpoint decoders are successfully enumerated, > > + * try to assemble regions from committed decoders > > + */ > > + device_for_each_child(dev, NULL, discover_region); > > + > > There is a steady reduction in common code vs specific code for the two > cases in here. Maybe it's time just to split the probe into > > static int cxl_port_probe(struct device *dev) > { > if (is_cxl_endpoint(port)) > cxl_port_probe_endpoint(); > else > cxl_port_probe_not endpoint(); //with better naming. > } > > Shared code is about 7 lines vs 40+ unshared. Works for me.