Re: [PATCH] cxl/acpi: Fix load failures due to single window creation failure

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Sat, 17 Feb 2024 12:29:32 -0800
Dan Williams <dan.j.williams@xxxxxxxxx> wrote:

> The expectation is that cxl_parse_cfwms() continues in the face the of
> failure as evidenced by code like:
> 
>     cxlrd = cxl_root_decoder_alloc(root_port, ways, cxl_calc_hb);
>     if (IS_ERR(cxlrd))
>     	return 0;
> 
> There are other error paths in that function which mistakenly follow
> idiomatic expectations and return an error when they should not. Most of
> those mistakes are innocuous checks that hardly ever fail in practice.
> However, a recent change succeed in making the implementation more
> fragile by applying an idiomatic, but still wrong "fix" [1]. In this
> failure case the kernel reports:
> 
>     cxl root0: Failed to populate active decoder targets
>     cxl_acpi ACPI0017:00: Failed to add decode range: [mem 0x00000000-0x7fffffff flags 0x200]
> 
> ...which is a real issue with that one window (to be fixed separately),
> but ends up failing the entirety of cxl_acpi_probe().
> 
> Undo that recent breakage while also removing the confusion about
> ignoring errors. Update all exits paths to return an error per typical
> expectations and let an outer wrapper function handle dropping the
> error.
> 
> Fixes: 91019b5bc7c2 ("cxl/acpi: Return 'rc' instead of '0' in cxl_parse_cfmws()") [1]
> Cc: <stable@xxxxxxxxxxxxxxx>
> Cc: Breno Leitao <leitao@xxxxxxxxxx>
> Cc: Alison Schofield <alison.schofield@xxxxxxxxx>
> Cc: Vishal Verma <vishal.l.verma@xxxxxxxxx>
> Signed-off-by: Dan Williams <dan.j.williams@xxxxxxxxx>

General idea makes a lot of sense to me.

A few comments on specific implementation inline

> ---
>  drivers/cxl/acpi.c |   45 +++++++++++++++++++++++++++------------------
>  1 file changed, 27 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c
> index dcf2b39e1048..53d2dff0c7a3 100644
> --- a/drivers/cxl/acpi.c
> +++ b/drivers/cxl/acpi.c
> @@ -316,31 +316,27 @@ static const struct cxl_root_ops acpi_root_ops = {
>  	.qos_class = cxl_acpi_qos_class,
>  };
>  
> -static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg,
> -			   const unsigned long end)
> +static int __cxl_parse_cfmws(struct acpi_cedt_cfmws *cfmws,
> +			     struct cxl_cfmws_context *ctx)
>  {
>  	int target_map[CXL_DECODER_MAX_INTERLEAVE];
> -	struct cxl_cfmws_context *ctx = arg;
>  	struct cxl_port *root_port = ctx->root_port;
>  	struct resource *cxl_res = ctx->cxl_res;
>  	struct cxl_cxims_context cxims_ctx;
>  	struct cxl_root_decoder *cxlrd;
>  	struct device *dev = ctx->dev;
> -	struct acpi_cedt_cfmws *cfmws;
>  	cxl_calc_hb_fn cxl_calc_hb;
>  	struct cxl_decoder *cxld;
>  	unsigned int ways, i, ig;
>  	struct resource *res;
>  	int rc;
>  
> -	cfmws = (struct acpi_cedt_cfmws *) header;
> -
>  	rc = cxl_acpi_cfmws_verify(dev, cfmws);
>  	if (rc) {
>  		dev_err(dev, "CFMWS range %#llx-%#llx not registered\n",
>  			cfmws->base_hpa,
>  			cfmws->base_hpa + cfmws->window_size - 1);

Why keep this error print?

> -		return 0;
> +		return rc;
>  	}
>  
>  	rc = eiw_to_ways(cfmws->interleave_ways, &ways);
> @@ -376,7 +372,7 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg,
>  
>  	cxlrd = cxl_root_decoder_alloc(root_port, ways, cxl_calc_hb);
>  	if (IS_ERR(cxlrd))
> -		return 0;
> +		return PTR_ERR(cxlrd);
>  
>  	cxld = &cxlrd->cxlsd.cxld;
>  	cxld->flags = cfmws_to_decoder_flags(cfmws->restrictions);
> @@ -420,16 +416,7 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg,
>  		put_device(&cxld->dev);
>  	else
>  		rc = cxl_decoder_autoremove(dev, cxld);
> -	if (rc) {
> -		dev_err(dev, "Failed to add decode range: %pr", res);
> -		return rc;

As no longer sharing this message. Might be neater to have this lot as
	rc = cxl_decoder_add(cxld, target_map);
err_xormap:
	if (rc) {
		put_device(&cxld->dev);
		return rc;
	}

	return cxl_decoder_autoremove(dev, cxld);

or a second error exit path.

	rc = cxl_decoder_add(cxld, target_map);
	if (rc)
		goto err_put_devie;

	return cxl_decoder_autoremove(dev, cxld;

err_put_device;
	put_device(&cxld->dev);
	return rc;

err_insert:
	kfree(res->name); ...


> -	}
> -	dev_dbg(dev, "add: %s node: %d range [%#llx - %#llx]\n",
> -		dev_name(&cxld->dev),
> -		phys_to_target_node(cxld->hpa_range.start),
> -		cxld->hpa_range.start, cxld->hpa_range.end);
> -
> -	return 0;
> +	return rc;
>  
>  err_insert:
>  	kfree(res->name);
> @@ -438,6 +425,28 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg,
>  	return -ENOMEM;
>  }
>  
> +static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg,
> +			   const unsigned long end)
> +{
> +	struct acpi_cedt_cfmws *cfmws = (struct acpi_cedt_cfmws *)header;
> +	struct cxl_cfmws_context *ctx = arg;
> +	struct device *dev = ctx->dev;
> +	int rc;
> +
> +	dev_dbg(dev, "decode range: node: %d range [%#llx - %#llx]\n",
> +		phys_to_target_node(cfmws->base_hpa), cfmws->base_hpa,
> +		cfmws->base_hpa + cfmws->window_size - 1);

Could maybe put this in an else below?

> +	rc = __cxl_parse_cfmws(cfmws, ctx);
> +	if (rc)
> +		dev_err(dev,
> +			"Failed to add decode range: [%#llx - %#llx] (%d)\n",
> +			cfmws->base_hpa,
> +			cfmws->base_hpa + cfmws->window_size - 1, rc);
> +
else 
	dev_dbg();

so we only give the dbg version on success?

> +	/* never fail cxl_acpi load for a single window failure */
> +	return 0;
> +}
> +
>  __mock struct acpi_device *to_cxl_host_bridge(struct device *host,
>  					      struct device *dev)
>  {
> 
> 





[Index of Archives]     [Linux Kernel]     [Kernel Development Newbies]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite Hiking]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux