Re: [RFC PATCH v2 2/5] acpi/hmat / cxl: Add extended linear cache support for CXL

Dave Jiang <dave.jiang@xxxxxxxxx> · Tue, 3 Dec 2024 16:08:48 -0700



On 11/26/24 9:23 AM, Jonathan Cameron wrote:
> On Tue, 12 Nov 2024 15:12:34 -0700
> Dave Jiang <dave.jiang@xxxxxxxxx> wrote:
> 
>> The current cxl region size only indicates the size of the CXL memory
>> region without accounting for the extended linear cache size. Retrieve the
>> cache size from HMAT and append that to the cxl region size for the cxl
>> region range that matches the SRAT range that has extended linear cache
>> enabled.
>>
>> The SRAT defines the whole memory range that includes the extended linear
>> cache and the CXL memory region. The new HMAT ECN/ECR to the Memory Side
>> Cache Information Structure defines the size of the extended linear cache
>> size and matches to the SRAT Memory Affinity Structure by the memory
>> proxmity domain. Add a helper to match the cxl range to the SRAT memory
>> range in order to retrieve the cache size.
>>
>> There are several places that checks the cxl region range against the
>> decoder range. Use new helper to check between the two ranges and address
>> the new cache size.
>>
>> Signed-off-by: Dave Jiang <dave.jiang@xxxxxxxxx>
> Hi Dave,
> 
> A few minor comments inline.
> 
> Thanks,
> 
> Jonathan
> 
>> ---
>>  drivers/acpi/numa/hmat.c  | 44 ++++++++++++++++++++++++
>>  drivers/cxl/core/Makefile |  1 +
>>  drivers/cxl/core/acpi.c   | 11 ++++++
>>  drivers/cxl/core/core.h   |  3 ++
>>  drivers/cxl/core/region.c | 70 ++++++++++++++++++++++++++++++++++++---
>>  drivers/cxl/cxl.h         |  2 ++
>>  include/linux/acpi.h      | 19 +++++++++++
>>  tools/testing/cxl/Kbuild  |  1 +
>>  8 files changed, 147 insertions(+), 4 deletions(-)
>>  create mode 100644 drivers/cxl/core/acpi.c
>>
>> diff --git a/drivers/acpi/numa/hmat.c b/drivers/acpi/numa/hmat.c
>> index 39524f36be5b..92b818b72ecc 100644
>> --- a/drivers/acpi/numa/hmat.c
>> +++ b/drivers/acpi/numa/hmat.c
>> @@ -108,6 +108,50 @@ static struct memory_target *find_mem_target(unsigned int mem_pxm)
>>  	return NULL;
>>  }
>>  
>> +/**
>> + * hmat_get_extended_linear_cache_size - Retrieve the extended linear cache size
>> + * @backing_res: resource from the backing media
>> + * @nid: node id for the memory region
>> + * @cache_size: (Output) size of extended linear cache.
>> + *
>> + * Return: 0 on success. Errno on failure.
>> + *
>> + */
>> +int hmat_get_extended_linear_cache_size(struct resource *backing_res, int nid,
>> +					resource_size_t *cache_size)
>> +{
>> +	unsigned int pxm = node_to_pxm(nid);
>> +	struct memory_target *target;
>> +	struct target_cache *tcache;
>> +	bool cache_found = false;
>> +	struct resource *res;
>> +
>> +	target = find_mem_target(pxm);
>> +	if (!target)
>> +		return -ENOENT;
>> +
>> +	list_for_each_entry(tcache, &target->caches, node) {
>> +		if (tcache->cache_attrs.mode == NODE_CACHE_MODE_EXTENDED_LINEAR) {
> 
> I'd flip this for slightly better readability.

ok

> 		if (tcache->cache_attrs.mode != NODE_CACHE_MODE_EXTENDED_LINEAR)
> 			continue;
> 
> 		res = ...
> 
> 
>> +			res = &target->memregions;
>> +			if (!resource_contains(res, backing_res))
>> +				continue;
>> +
>> +			cache_found = true;
>> +			break;
>> +		}
>> +	}
>> +
>> +	if (!cache_found) {
>> +		*cache_size = 0;
>> +		return 0;
>> +	}
>> +
>> +	*cache_size = tcache->cache_attrs.size;
> 
> Why not set this and return in the loop?
> That way no need to have a local variable.

ok

> 
>> +
>> +	return 0;
>> +}
>> +EXPORT_SYMBOL_NS_GPL(hmat_get_extended_linear_cache_size, CXL);
> 
>> diff --git a/drivers/cxl/core/acpi.c b/drivers/cxl/core/acpi.c
>> new file mode 100644
>> index 000000000000..f13b4dae6ac5
>> --- /dev/null
>> +++ b/drivers/cxl/core/acpi.c
>> @@ -0,0 +1,11 @@
>> +// SPDX-License-Identifier: GPL-2.0-only
>> +/* Copyright(c) 2024 Intel Corporation. All rights reserved. */
>> +#include <linux/acpi.h>
>> +#include "cxl.h"
>> +#include "core.h"
> 
> Why do you need the cxl headers?  Maybe a forwards def of
> struct resource, but I'm not seeing anything else being needed.

The prototype is declared in core.h, and it seems core.h needs cxl.h. I wonder if core.h should just include cxl.h.
> 
> 
>> +
>> +int cxl_acpi_get_extended_linear_cache_size(struct resource *backing_res,
>> +					    int nid, resource_size_t *size)
>> +{
>> +	return hmat_get_extended_linear_cache_size(backing_res, nid, size);
>> +}
> 
> 
>> @@ -3215,6 +3229,42 @@ static int match_region_by_range(struct device *dev, void *data)
>>  	return rc;
>>  }
>>  
>> +static int cxl_extended_linear_cache_resize(struct cxl_region *cxlr,
>> +					    struct resource *res)
>> +{
>> +	struct cxl_region_params *p = &cxlr->params;
>> +	int nid = phys_to_target_node(res->start);
>> +	resource_size_t size, cache_size;
>> +	int rc;
>> +
>> +	size = resource_size(res);
>> +	if (!size)
>> +		return -EINVAL;
>> +
>> +	rc = cxl_acpi_get_extended_linear_cache_size(res, nid, &cache_size);
>> +	if (rc)
>> +		return rc;
>> +
>> +	if (!cache_size)
>> +		return 0;
>> +
>> +	if (size != cache_size) {
>> +		dev_warn(&cxlr->dev, "Extended Linear Cache is not 1:1, unsupported!");
>> +		return -EOPNOTSUPP;
>> +	}
>> +
>> +	/*
>> +	 * Move the start of the range to where the cache range starts. The
>> +	 * implementation assumes that the cache range is in front of the
>> +	 * CXL range. This is not dictated by the HMAT spec but is how the
>> +	 * currently known implementation configured.
> 
> is configured

will fix

> 
>> +	 */
>> +	res->start -= cache_size;
>> +	p->cache_size = cache_size;
>> +
>> +	return 0;
>> +}
> 
>