Re: [RFC PATCH 3/6] acpi/hmat / cxl: Add extended linear cache support for CXL

Jonathan Cameron <Jonathan.Cameron@xxxxxxxxxx> · Thu, 17 Oct 2024 17:20:59 +0100

On Fri, 27 Sep 2024 07:16:55 -0700
Dave Jiang <dave.jiang@xxxxxxxxx> wrote:

> The current cxl region size only indicates the size of the CXL memory region
> without accounting for the extended linear cache size. Retrieve the cache
> size from HMAT and append that to the cxl region size for the cxl region
> range that matches the SRAT range that has extended linear cache enabled.
> 
> The SRAT defines the whole memory range that inclues the extended linear

includes

> cache and the CXL memory region. The new HMAT update to the Memory Side

ECN/ECR, not update.  After all update might mean _HMA

> Cache Information Structure defines the size of the extended linear cache
> size and matches to the SRAT Memory Affinity Structure by the memory proxmity
> domain. Add a helper to match the cxl range to the SRAT memory range in order
> to retrieve the cache size.
> 
> There are several places that checks the cxl region range against the
> decoder range. Use new helper to check between the two ranges and address
> the new cache size.
> 
> Signed-off-by: Dave Jiang <dave.jiang@xxxxxxxxx>
Various comments inline.
> ---
>  drivers/acpi/numa/hmat.c  | 44 +++++++++++++++++++++++++++++++++
>  drivers/cxl/core/Makefile |  1 +
>  drivers/cxl/core/acpi.c   | 11 +++++++++
>  drivers/cxl/core/core.h   |  3 +++
>  drivers/cxl/core/region.c | 51 ++++++++++++++++++++++++++++++++++++---
>  drivers/cxl/cxl.h         |  2 ++
>  include/linux/acpi.h      |  8 ++++++
>  tools/testing/cxl/Kbuild  |  1 +
>  8 files changed, 117 insertions(+), 4 deletions(-)
>  create mode 100644 drivers/cxl/core/acpi.c
> 
> diff --git a/drivers/acpi/numa/hmat.c b/drivers/acpi/numa/hmat.c
> index 39524f36be5b..d299f8d7af8c 100644
> --- a/drivers/acpi/numa/hmat.c
> +++ b/drivers/acpi/numa/hmat.c
> @@ -108,6 +108,50 @@ static struct memory_target *find_mem_target(unsigned int mem_pxm)
>  	return NULL;
>  }
>  
> +/**
> + * hmat_get_extended_linear_cache_size - Retrieve the extended linear cache size
> + * @backing_res: resource from the backing media
> + * @nid: node id for the memory region
> + * @cache_size: (Output) size of extended linear cache.
> + *
> + * Return: 0 on success. Errno on failure.
> + *
> + */
> +int hmat_get_extended_linear_cache_size(struct resource *backing_res, int nid,
> +					resource_size_t *cache_size)
> +{
> +	unsigned int pxm = node_to_pxm(nid);
> +	struct memory_target *target;
> +	struct target_cache *tcache;
> +	bool cache_found = false;
> +	struct resource *res;
> +
> +	target = find_mem_target(pxm);
> +	if (!target)
> +		return -ENOENT;
> +
> +	list_for_each_entry(tcache, &target->caches, node) {
> +		if (tcache->cache_attrs.mode == NODE_CACHE_MODE_EXTENDED_LINEAR) {

Why is finding the first one appropriate?  Maybe you have more than one?
I'd move the code bellow up here then carry on to see if there is another
entry if resource_contains fails.

> +			cache_found = true;
> +			break;
> +		}
> +	}
> +
> +	if (!cache_found) {
> +		*cache_size = 0;
> +		return 0;
> +	}
> +
> +	res = &target->memregions;
> +	if (!resource_contains(res, backing_res))
> +		return -ENOENT;
> +
> +	*cache_size = tcache->cache_attrs.size;
> +
> +	return 0;
> +}
> +EXPORT_SYMBOL_NS_GPL(hmat_get_extended_linear_cache_size, CXL);
> +
>  static struct memory_target *acpi_find_genport_target(u32 uid)
>  {
>  	struct memory_target *target;

> diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
> index 21ad5f242875..ddfb1e1a8909 100644
> --- a/drivers/cxl/core/region.c
> +++ b/drivers/cxl/core/region.c
> @@ -816,6 +816,17 @@ static int match_free_decoder(struct device *dev, void *data)
>  	return 0;
>  }
>  
> +static bool region_res_match_range(struct cxl_region_params *p,

This is a little odd. So comment on what it is doing needed.
I think it is patching the CXL backed bit of the region
by offsetting the start back to where it was before you
subtracted the dram cache size.

> +				   struct range *range)
> +{
> +	if (p->res &&
I'd break the
	if (!p->res)
		return false;
off then
	return p->res->start + p->cache_size == range->start &&
	       p->res->end == range->end;

> +	    p->res->start + p->cache_size == range->start &&
> +	    p->res->end == range->end)
> +		return true;
> +
> +	return false;
> +}
Reasonable to factor this out first.
> +
>  static int match_auto_decoder(struct device *dev, void *data)
>  {
>  	struct cxl_region_params *p = data;
> @@ -828,7 +839,7 @@ static int match_auto_decoder(struct device *dev, void *data)
>  	cxld = to_cxl_decoder(dev);
>  	r = &cxld->hpa_range;
>  
> -	if (p->res && p->res->start == r->start && p->res->end == r->end)
> +	if (region_res_match_range(p, r))
>  		return 1;
>  
>  	return 0;
> @@ -1406,8 +1417,7 @@ static int cxl_port_setup_targets(struct cxl_port *port,
>  	if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) {
>  		if (cxld->interleave_ways != iw ||
>  		    cxld->interleave_granularity != ig ||
> -		    cxld->hpa_range.start != p->res->start ||
> -		    cxld->hpa_range.end != p->res->end ||
> +		    !region_res_match_range(p, &cxld->hpa_range) ||
>  		    ((cxld->flags & CXL_DECODER_F_ENABLE) == 0)) {
>  			dev_err(&cxlr->dev,
>  				"%s:%s %s expected iw: %d ig: %d %pr\n",
> @@ -1931,7 +1941,7 @@ static int cxl_region_attach(struct cxl_region *cxlr,
>  		return -ENXIO;
>  	}
>  
> -	if (resource_size(cxled->dpa_res) * p->interleave_ways !=
> +	if (resource_size(cxled->dpa_res) * p->interleave_ways + p->cache_size !=
>  	    resource_size(p->res)) {
>  		dev_dbg(&cxlr->dev,
>  			"%s:%s: decoder-size-%#llx * ways-%d != region-size-%#llx\n",
> @@ -3226,6 +3236,34 @@ static int match_region_by_range(struct device *dev, void *data)
>  	return rc;
>  }
>  
> +static int cxl_extended_linear_cache_resize(struct cxl_region_params *p,
> +					    struct resource *res)
> +{
> +	int nid = phys_to_target_node(res->start);
> +	resource_size_t size, cache_size;
> +	int rc;
> +
> +	size = resource_size(res);
> +	if (!size)
> +		return -EINVAL;
> +
> +	rc = cxl_acpi_get_extended_linear_cache_size(res, nid, &cache_size);
> +	if (rc)
> +		return rc;
> +
> +	if (!cache_size)
> +		return 0;
> +
> +	if (size != cache_size)
> +		return -EINVAL;
> +
> +	res->start -= cache_size;

I don't recall the ECN saying which way round they were (and it didn't
occur to me at the time) i.e. local dram first or CXL dram first.
Did I miss that?  I was kind of thinking extra capacity at higher
addresses but no particularly reason why...

> +	p->cache_size = cache_size;
> +
> +	return 0;
> +}
> +
Trivial but 1 blank line probably appropriate.
> +
>  /* Establish an empty region covering the given HPA range */