Re: [PATCH 11/18] cxl/region: Add region autodiscovery

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Sun, 05 Feb 2023 17:03:29 -0800
Dan Williams <dan.j.williams@xxxxxxxxx> wrote:

> Region autodiscovery is an asynchrounous state machine advanced by
> cxl_port_probe(). After the decoders on an endpoint port are enumerated
> they are scanned for actively enabled instances. Each active decoder is
> flagged for auto-assembly CXL_DECODER_F_AUTO and attached to a region.
> If a region does not already exist for the address range setting of the
> decoder one is created. That creation process may race with other
> decoders of the same region being discovered since cxl_port_probe() is
> asynchronous. A new 'struct cxl_root_decoder' lock, @range_lock, is
> introduced to mitigate that race.
> 
> Once all decoders have arrived, "p->nr_targets == p->interleave_ways",
> they are sorted by their relative decode position. The sort algorithm
> involves finding the point in the cxl_port topology where one leg of the
> decode leads to deviceA and the other deviceB. At that point in the
> topology the target order in the 'struct cxl_switch_decoder' indicates
> the relative position of those endpoint decoders in the region.
> 
> >From that point the region goes through the same setup and validation  
> steps as user-created regions, but instead of programming the decoders
> it validates that driver would have written the same values to the
> decoders as were already present.
> 
> Signed-off-by: Dan Williams <dan.j.williams@xxxxxxxxx>
A few comments inline,

Thanks,

Jonathan

> ---
>  drivers/cxl/core/hdm.c    |    5 
>  drivers/cxl/core/port.c   |    2 
>  drivers/cxl/core/region.c |  496 ++++++++++++++++++++++++++++++++++++++++++++-
>  drivers/cxl/cxl.h         |   16 +
>  drivers/cxl/port.c        |   26 ++
>  5 files changed, 531 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
> index dcc16d7cb8f3..174cddfec6e8 100644
> --- a/drivers/cxl/core/hdm.c
> +++ b/drivers/cxl/core/hdm.c
> @@ -674,7 +674,7 @@ static int cxl_decoder_reset(struct cxl_decoder *cxld)
>  	up_read(&cxl_dpa_rwsem);
>  
>  	port->commit_end--;
> -	cxld->flags &= ~CXL_DECODER_F_ENABLE;
> +	cxld->flags &= ~(CXL_DECODER_F_ENABLE | CXL_DECODER_F_AUTO);
>  
>  	return 0;
>  }
> @@ -719,7 +719,7 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
>  
>  	/* decoders are enabled if committed */
>  	if (committed) {
> -		cxld->flags |= CXL_DECODER_F_ENABLE;
> +		cxld->flags |= CXL_DECODER_F_ENABLE | CXL_DECODER_F_AUTO;
>  		if (ctrl & CXL_HDM_DECODER0_CTRL_LOCK)
>  			cxld->flags |= CXL_DECODER_F_LOCK;
>  		if (FIELD_GET(CXL_HDM_DECODER0_CTRL_TYPE, ctrl))
> @@ -783,6 +783,7 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
>  		return rc;
>  	}
>  	*dpa_base += dpa_size + skip;
> +

:)  No comment.

>  	return 0;
>  }
>  


...


> +static int cmp_decode_pos(const void *a, const void *b)
> +{
> +	struct cxl_endpoint_decoder *cxled_a = *(typeof(cxled_a) *)a;
> +	struct cxl_endpoint_decoder *cxled_b = *(typeof(cxled_b) *)b;
> +	struct cxl_memdev *cxlmd_a = cxled_to_memdev(cxled_a);
> +	struct cxl_memdev *cxlmd_b = cxled_to_memdev(cxled_b);
> +	struct cxl_port *port_a = cxled_to_port(cxled_a);
> +	struct cxl_port *port_b = cxled_to_port(cxled_b);
> +	struct cxl_port *iter_a, *iter_b;
> +
> +	/* Exit early if any prior sorting failed */
> +	if (cxled_a->pos < 0 || cxled_b->pos < 0)
> +		return 0;
> +
> +	/*
> +	 * Walk up the hierarchy to find a shared port, find the decoder
> +	 * that maps the range, compare the relative position of those
> +	 * dport mappings.
> +	 */
> +	for (iter_a = port_a; iter_a; iter_a = next_port(iter_a)) {
> +		struct cxl_port *next_a, *next_b, *port;
> +		struct cxl_switch_decoder *cxlsd;
> +
> +		next_a = next_port(iter_a);
> +		for (iter_b = port_b; iter_b; iter_b = next_port(iter_b)) {
> +			int a_pos, b_pos, result;
> +			struct device *dev;
> +			unsigned int seq;
> +
> +			next_b = next_port(iter_b);
> +			if (next_a != next_b)
> +				continue;
> +			if (!next_a)
> +				goto out;
Perhaps treat this as a 'find the match' then carry on, so as to avoid having the
guts of the handling nested so deep.  So something like.

	struct cxl_switch_decoder *cxlsd;
	struct cxl_port *next_a, *next_b, *port;
	bool matched = false; // May be simpler test - I'm just lazy

	// possibly pull this out as a utility function so that
	// error handling can be tidied up by simply returning 
	// rather than gotos.
	for (iter_a = port_a; iter_a; iter_a = next_port(iter_a)) {
		next_a = next_port(iter_a);
		for (iter_b = port_b; iter_b; iter_b = next_port(iter_b)) {
			int a_pos, b_pos, result;
			struct device *dev;
			unsigned int seq;

			next_b = next_port(iter_b);
			if (next_a != next_b)
				continue;
			if (!next_a)
				goto out;
			port = next_a;
			found = true;
			break;
		}
		if (found)
			break;
	}
	if (!found)
		goto out;

	dev = device_find_child(&port->dev, cxled_a,
				decoder_match_range);
	if (!dev) {
		struct range *range = &cxled_a->cxld.hpa_range;
		dev_err(port->uport,
			"failed to find decoder that maps %#llx-:%#llx\n",
		range->start, range->end);
		cxled_a->pos = -1;
		return 0;
	}

	cxlsd = to_cxl_switch_decoder(dev);
	do {
		seq = read_seqbegin(&cxlsd->target_lock);
		find_positions(cxlsd, iter_a, iter_b, &a_pos, &b_pos);
	} while (read_seqretry(&cxlsd->target_lock, seq));

	if (a_pos < 0 || b_pos < 0) {
		dev_err(port->uport,
			"failed to find shared decoder for %s and %s\n",
			dev_name(cxlmd_a->dev.parent),
			dev_name(cxlmd_b->dev.parent));
		cxled_a->pos = -1;
		result = 0;
//if factored as suggested above then a goto here and drop
// the else would be neater again.

	} else {
		result = a_pos - b_pos;
		dev_dbg(port->uport, "%s: %s comes %s %s\n",
			dev_name(&cxlsd->cxld.dev),
			dev_name(cxlmd_a->dev.parent),
			result < 0 ? "before" : "after",
			dev_name(cxlmd_b->dev.parent));
		}
	put_device(dev);

	return result;
out:
	dev_err(cxlmd_a->dev.parent, "failed to find shared port with %s\n",
		dev_name(cxlmd_b->dev.parent));
	cxled_a->pos = -1;
	return 0;
}


> +			dev = device_find_child(&port->dev, cxled_a,
> +						decoder_match_range);
> +			if (!dev) {
> +				struct range *range = &cxled_a->cxld.hpa_range;
> +
> +				dev_err(port->uport,
> +					"failed to find decoder that maps %#llx-:%#llx\n",
> +					range->start, range->end);
> +				cxled_a->pos = -1;
> +				return 0;
> +			}
> +
> +			cxlsd = to_cxl_switch_decoder(dev);
> +			do {
> +				seq = read_seqbegin(&cxlsd->target_lock);
> +				find_positions(cxlsd, iter_a, iter_b, &a_pos,
> +					       &b_pos);
> +			} while (read_seqretry(&cxlsd->target_lock, seq));
> +
> +			if (a_pos < 0 || b_pos < 0) {
> +				dev_err(port->uport,
> +					"failed to find shared decoder for %s and %s\n",
> +					dev_name(cxlmd_a->dev.parent),
> +					dev_name(cxlmd_b->dev.parent));
> +				cxled_a->pos = -1;
> +				result = 0;
> +			} else {
> +				result = a_pos - b_pos;
> +				dev_dbg(port->uport, "%s: %s comes %s %s\n",
> +					dev_name(&cxlsd->cxld.dev),
> +					dev_name(cxlmd_a->dev.parent),
> +					result < 0 ? "before" : "after",
> +					dev_name(cxlmd_b->dev.parent));
> +			}
> +
> +			put_device(dev);
> +
> +			return result;
> +		}
> +	}
> +out:
> +	dev_err(cxlmd_a->dev.parent, "failed to find shared port with %s\n",
> +		dev_name(cxlmd_b->dev.parent));
> +	cxled_a->pos = -1;
> +	return 0;
> +}
> +
> +static int cxl_region_sort_targets(struct cxl_region *cxlr)
> +{
> +	struct cxl_region_params *p = &cxlr->params;
> +	int i, rc = 0;
> +
> +	sort(p->targets, p->nr_targets, sizeof(p->targets[0]), cmp_decode_pos,
> +	     NULL);
> +
> +	for (i = 0; i < p->nr_targets; i++) {
> +		struct cxl_endpoint_decoder *cxled = p->targets[i];
> +
> +		if (cxled->pos < 0)
> +			rc = -ENXIO;
> +		cxled->pos = i;
> +	}
> +
> +	dev_dbg(&cxlr->dev, "region sort %s\n", rc ? "failed" : "successful");
> +	return rc;
> +}
> +
>  static int cxl_region_attach(struct cxl_region *cxlr,
>  			     struct cxl_endpoint_decoder *cxled, int pos)
>  {
> @@ -1350,6 +1572,50 @@ static int cxl_region_attach(struct cxl_region *cxlr,
>  		return -EINVAL;
>  	}
>  
> +	if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) {
> +		int i;
> +
> +		rc = cxl_region_attach_auto(cxlr, cxled, pos);
> +		if (rc)
> +			return rc;
> +
> +		/* await more targets to arrive... */
> +		if (p->nr_targets < p->interleave_ways)
> +			return 0;
> +
> +		/*
> +		 * All targets are here, which implies all PCI enumeration that
> +		 * affects this region has been completed. Walk the topology to
> +		 * sort the devices into their relative region decode position.
> +		 */
> +		rc = cxl_region_sort_targets(cxlr);
> +		if (rc)
> +			return rc;
> +
> +		for (i = 0; i < p->nr_targets; i++) {
> +			cxled = p->targets[i];
> +			ep_port = cxled_to_port(cxled);
> +			dport = cxl_find_dport_by_dev(root_port,
> +						      ep_port->host_bridge);
> +			rc = cxl_region_attach_position(cxlr, cxlrd, cxled,
> +							dport, i);
> +			if (rc)
> +				return rc;
> +		}
> +
> +		rc = cxl_region_setup_targets(cxlr);
> +		if (rc)
> +			return rc;
> +
> +		/*
> +		 * If target setup succeeds in the autodiscovery case
> +		 * then the region is already committed.
> +		 */
> +		p->state = CXL_CONFIG_COMMIT;
> +
> +		return 0;
> +	}
> +
>  	rc = cxl_region_validate_position(cxlr, cxled, pos);
>  	if (rc)
>  		return rc;
> @@ -1500,8 +1766,8 @@ static int detach_target(struct cxl_region *cxlr, int pos)
>  	return rc;
>  }
>  
> -static size_t store_targetN(struct cxl_region *cxlr, const char *buf, int pos,
> -			    size_t len)
> +static ssize_t store_targetN(struct cxl_region *cxlr, const char *buf, int pos,
> +			     size_t len)

?



...


> +
> +void cxl_add_to_region(struct cxl_endpoint_decoder *cxled)
> +{
> +	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
> +	struct range *hpa = &cxled->cxld.hpa_range;
> +	struct cxl_decoder *cxld = &cxled->cxld;
> +	struct cxl_root_decoder *cxlrd;
> +	struct cxl_region_params *p;
> +	struct cxl_region *cxlr;
> +	struct cxl_port *root;
> +	bool attach = false;
> +	struct device *dev;
> +
> +	root = find_cxl_root(&cxlmd->dev);
> +	if (!root) {
> +		dev_err(cxlmd->dev.parent, "%s: failed to map CXL root\n",
> +			dev_name(&cxlmd->dev));
> +		return;

I'm not a fan of muddling on from errors at lower layers of code.
I'd push that decision up to the caller - so return an error code from here.
If caller just wants to ignore it (with appropriate comment on why) then
that is fine by me.


> +	}
> +
> +	dev = device_find_child(&root->dev, &cxld->hpa_range,
> +				  match_decoder_by_range);

device_find_child is acquiring a reference to the child I think.

> +	if (!dev) {
> +		dev_err(cxlmd->dev.parent,
> +			"%s:%s no CXL window for range %#llx:%#llx\n",
> +			dev_name(&cxlmd->dev), dev_name(&cxld->dev),
> +			cxld->hpa_range.start, cxld->hpa_range.end);
> +		goto out;
> +	}
> +	cxlrd = to_cxl_root_decoder(dev);
Here we've stashed the dev that we have a reference to in cxlrd.
> +
> +	mutex_lock(&cxlrd->range_lock);
> +	dev = device_find_child(&cxlrd->cxlsd.cxld.dev, hpa,
> +				match_region_by_range);
> +	if (!dev)
> +		cxlr = construct_region(cxlrd, cxled);
> +	else
> +		cxlr = to_cxl_region(dev);
> +	mutex_unlock(&cxlrd->range_lock);
> +
> +	if (IS_ERR(cxlr))

In this path, do we drop the reference to cxlrd->dev?

> +		goto out;
> +
> +	attach_target(cxlr, cxled, -1, TASK_UNINTERRUPTIBLE);
> +
> +	down_read(&cxl_region_rwsem);
> +	p = &cxlr->params;
> +	attach = p->state >= CXL_CONFIG_COMMIT;

Test is a bit non obvious.  In the tree I'm looking at
CXL_CONFIG_COMMIT is last value in the enum.


> +	up_read(&cxl_region_rwsem);
> +
> +	if (attach) {
> +		int rc = device_attach(&cxlr->dev);
> +
> +		/*
> +		 * If device_attach() fails the range may still be
> +		 * active via the platform-firmware memory map
> +		 */
> +		if (rc < 0)

We also want to know if the no matching driver found case happened so perhaps
		if (rc != 1)

> +			dev_err(&cxlr->dev, "failed to enable, range: %pr\n",
> +				p->res);
> +	}
> +
> +	put_device(&cxlr->dev);
> +out:
> +	put_device(&root->dev);
> +}
> +EXPORT_SYMBOL_NS_GPL(cxl_add_to_region, CXL);
> +
>  static int cxl_region_invalidate_memregion(struct cxl_region *cxlr)
>  {
>  	if (!test_bit(CXL_REGION_F_INCOHERENT, &cxlr->flags))
> @@ -2103,6 +2554,15 @@ static int cxl_region_invalidate_memregion(struct cxl_region *cxlr)
>  	return 0;
>  }

> diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
> index ca76879af1de..9b3765c5c81a 100644
> --- a/drivers/cxl/cxl.h
> +++ b/drivers/cxl/cxl.h
> @@ -261,6 +261,8 @@ resource_size_t cxl_rcrb_to_component(struct device *dev,
>   * cxl_decoder flags that define the type of memory / devices this
>   * decoder supports as well as configuration lock status See "CXL 2.0
>   * 8.2.5.12.7 CXL HDM Decoder 0 Control Register" for details.
> + * Additionally indicate whether decoder settings were autodetected,
> + * user customized.
>   */
>  #define CXL_DECODER_F_RAM   BIT(0)
>  #define CXL_DECODER_F_PMEM  BIT(1)
> @@ -268,6 +270,7 @@ resource_size_t cxl_rcrb_to_component(struct device *dev,
>  #define CXL_DECODER_F_TYPE3 BIT(3)
>  #define CXL_DECODER_F_LOCK  BIT(4)
>  #define CXL_DECODER_F_ENABLE    BIT(5)
> +#define CXL_DECODER_F_AUTO  BIT(6)

That's a bit nasty and unexpected - can we rename to make it really clear
it's not a hardware flag at the point of usage.
Or just put it in a a new structure field rather than pushing it in here.

>  #define CXL_DECODER_F_MASK  GENMASK(5, 0)

Hmm. This mask isn't currently used and now has
an odd meaning because of hiding a flag in bit(6).


...

>  
>  /*
> diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c
> index 5453771bf330..012a0c6f8476 100644
> --- a/drivers/cxl/port.c
> +++ b/drivers/cxl/port.c
> @@ -30,6 +30,23 @@ static void schedule_detach(void *cxlmd)
>  	schedule_cxl_memdev_detach(cxlmd);
>  }
>  
> +static int discover_region(struct device *dev, void *data)
> +{
> +	const unsigned long flags = CXL_DECODER_F_ENABLE | CXL_DECODER_F_AUTO;
> +	struct cxl_endpoint_decoder *cxled;
> +
> +	if (!is_endpoint_decoder(dev))
> +		return 0;
> +
> +	cxled = to_cxl_endpoint_decoder(dev);
> +	if ((cxled->cxld.flags & flags) != flags)
> +		return 0;
> +
> +	cxl_add_to_region(cxled);
> +
> +	return 0;
> +}
> +
>  static int cxl_port_probe(struct device *dev)
>  {
>  	struct cxl_port *port = to_cxl_port(dev);
> @@ -78,6 +95,15 @@ static int cxl_port_probe(struct device *dev)
>  		return rc;
>  	}
>  
> +	if (!is_cxl_endpoint(port))
> +		return 0;
> +
> +	/*
> +	 * Now that all endpoint decoders are successfully enumerated,
> +	 * try to assemble regions from committed decoders
> +	 */
> +	device_for_each_child(dev, NULL, discover_region);
> +

There is a steady reduction in common code vs specific code for the two
cases in here.  Maybe it's time just to split the probe into

static int cxl_port_probe(struct device *dev)
{
	if (is_cxl_endpoint(port))
		cxl_port_probe_endpoint();
	else
		cxl_port_probe_not endpoint(); //with better naming.
}

Shared code is about 7 lines vs 40+ unshared.



>  	return 0;
>  }
>  
> 
> 





[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux