Jonathan Cameron wrote: > On Wed, 21 Feb 2024 09:31:10 -0800 > Dan Williams <dan.j.williams@xxxxxxxxx> wrote: > > > Jonathan Cameron wrote: > > > On Sat, 17 Feb 2024 12:29:32 -0800 > > > Dan Williams <dan.j.williams@xxxxxxxxx> wrote: > > > > > > > The expectation is that cxl_parse_cfwms() continues in the face the of > > > > failure as evidenced by code like: > > > > > > > > cxlrd = cxl_root_decoder_alloc(root_port, ways, cxl_calc_hb); > > > > if (IS_ERR(cxlrd)) > > > > return 0; > > > > > > > > There are other error paths in that function which mistakenly follow > > > > idiomatic expectations and return an error when they should not. Most of > > > > those mistakes are innocuous checks that hardly ever fail in practice. > > > > However, a recent change succeed in making the implementation more > > > > fragile by applying an idiomatic, but still wrong "fix" [1]. In this > > > > failure case the kernel reports: > > > > > > > > cxl root0: Failed to populate active decoder targets > > > > cxl_acpi ACPI0017:00: Failed to add decode range: [mem 0x00000000-0x7fffffff flags 0x200] > > > > > > > > ...which is a real issue with that one window (to be fixed separately), > > > > but ends up failing the entirety of cxl_acpi_probe(). > > > > > > > > Undo that recent breakage while also removing the confusion about > > > > ignoring errors. Update all exits paths to return an error per typical > > > > expectations and let an outer wrapper function handle dropping the > > > > error. > > > > > > > > Fixes: 91019b5bc7c2 ("cxl/acpi: Return 'rc' instead of '0' in cxl_parse_cfmws()") [1] > > > > Cc: <stable@xxxxxxxxxxxxxxx> > > > > Cc: Breno Leitao <leitao@xxxxxxxxxx> > > > > Cc: Alison Schofield <alison.schofield@xxxxxxxxx> > > > > Cc: Vishal Verma <vishal.l.verma@xxxxxxxxx> > > > > Signed-off-by: Dan Williams <dan.j.williams@xxxxxxxxx> > > > > > > General idea makes a lot of sense to me. > > > > > > A few comments on specific implementation inline > > > > > > > --- > > > > drivers/cxl/acpi.c | 45 +++++++++++++++++++++++++++------------------ > > > > 1 file changed, 27 insertions(+), 18 deletions(-) > > > > > > > > diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c > > > > index dcf2b39e1048..53d2dff0c7a3 100644 > > > > --- a/drivers/cxl/acpi.c > > > > +++ b/drivers/cxl/acpi.c > > > > @@ -316,31 +316,27 @@ static const struct cxl_root_ops acpi_root_ops = { > > > > .qos_class = cxl_acpi_qos_class, > > > > }; > > > > > > > > -static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg, > > > > - const unsigned long end) > > > > +static int __cxl_parse_cfmws(struct acpi_cedt_cfmws *cfmws, > > > > + struct cxl_cfmws_context *ctx) > > > > { > > > > int target_map[CXL_DECODER_MAX_INTERLEAVE]; > > > > - struct cxl_cfmws_context *ctx = arg; > > > > struct cxl_port *root_port = ctx->root_port; > > > > struct resource *cxl_res = ctx->cxl_res; > > > > struct cxl_cxims_context cxims_ctx; > > > > struct cxl_root_decoder *cxlrd; > > > > struct device *dev = ctx->dev; > > > > - struct acpi_cedt_cfmws *cfmws; > > > > cxl_calc_hb_fn cxl_calc_hb; > > > > struct cxl_decoder *cxld; > > > > unsigned int ways, i, ig; > > > > struct resource *res; > > > > int rc; > > > > > > > > - cfmws = (struct acpi_cedt_cfmws *) header; > > > > - > > > > rc = cxl_acpi_cfmws_verify(dev, cfmws); > > > > if (rc) { > > > > dev_err(dev, "CFMWS range %#llx-%#llx not registered\n", > > > > cfmws->base_hpa, > > > > cfmws->base_hpa + cfmws->window_size - 1); > > > > > > Why keep this error print? > > > > True, that can go. > > > > > > - return 0; > > > > + return rc; > > > > } > > > > > > > > rc = eiw_to_ways(cfmws->interleave_ways, &ways); > > > > @@ -376,7 +372,7 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg, > > > > > > > > cxlrd = cxl_root_decoder_alloc(root_port, ways, cxl_calc_hb); > > > > if (IS_ERR(cxlrd)) > > > > - return 0; > > > > + return PTR_ERR(cxlrd); > > > > > > > > cxld = &cxlrd->cxlsd.cxld; > > > > cxld->flags = cfmws_to_decoder_flags(cfmws->restrictions); > > > > @@ -420,16 +416,7 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg, > > > > put_device(&cxld->dev); > > > > else > > > > rc = cxl_decoder_autoremove(dev, cxld); > > > > - if (rc) { > > > > - dev_err(dev, "Failed to add decode range: %pr", res); > > > > - return rc; > > > > > > As no longer sharing this message. Might be neater to have this lot as > > > rc = cxl_decoder_add(cxld, target_map); > > > err_xormap: > > > if (rc) { > > > put_device(&cxld->dev); > > > return rc; > > > } > > > > > > return cxl_decoder_autoremove(dev, cxld); > > > > > > or a second error exit path. > > > > > > rc = cxl_decoder_add(cxld, target_map); > > > if (rc) > > > goto err_put_devie; > > > > > > return cxl_decoder_autoremove(dev, cxld; > > > > > > err_put_device; > > > put_device(&cxld->dev); > > > return rc; > > > > > > err_insert: > > > kfree(res->name); ... > > > > True, there's enough here to do an even deeper cleanup... below. > > > > > > > > > > > > - } > > > > - dev_dbg(dev, "add: %s node: %d range [%#llx - %#llx]\n", > > > > - dev_name(&cxld->dev), > > > > - phys_to_target_node(cxld->hpa_range.start), > > > > - cxld->hpa_range.start, cxld->hpa_range.end); > > > > - > > > > - return 0; > > > > + return rc; > > > > > > > > err_insert: > > > > kfree(res->name); > > > > @@ -438,6 +425,28 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg, > > > > return -ENOMEM; > > > > } > > > > > > > > +static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg, > > > > + const unsigned long end) > > > > +{ > > > > + struct acpi_cedt_cfmws *cfmws = (struct acpi_cedt_cfmws *)header; > > > > + struct cxl_cfmws_context *ctx = arg; > > > > + struct device *dev = ctx->dev; > > > > + int rc; > > > > + > > > > + dev_dbg(dev, "decode range: node: %d range [%#llx - %#llx]\n", > > > > + phys_to_target_node(cfmws->base_hpa), cfmws->base_hpa, > > > > + cfmws->base_hpa + cfmws->window_size - 1); > > > > > > Could maybe put this in an else below? > > > > > > > + rc = __cxl_parse_cfmws(cfmws, ctx); > > > > + if (rc) > > > > + dev_err(dev, > > > > + "Failed to add decode range: [%#llx - %#llx] (%d)\n", > > > > + cfmws->base_hpa, > > > > + cfmws->base_hpa + cfmws->window_size - 1, rc); > > > > + > > > else > > > dev_dbg(); > > > > > > so we only give the dbg version on success? > > > > Yeah, I will switch to this since the previous state was also skipping > > the debug messages on success. > > > > Follow-on cleanup: > > > > -- 8< -- > > From e30c267c0b69d5e4531d8f65ec86e4fa32d72340 Mon Sep 17 00:00:00 2001 > > From: Dan Williams <dan.j.williams@xxxxxxxxx> > > Date: Tue, 20 Feb 2024 22:44:34 -0800 > > Subject: [PATCH] cxl/acpi: Cleanup __cxl_parse_cfmws() error exits > > > > As a follow on to the recent rework of __cxl_parse_cfmws() to always > > return errors, use cleanup.h helpers to remove goto and other cleanups > > now that logging is moved to the cxl_parse_cfmws() wrapper. > > This runs into the question of where the declarations should be for > cleanup.h changes. I can dig out the Linus comment on this but > I'm feeling lazy ;) > > In general I like this stuff, but in this case I think it's ended > up harder to read than the original code. > > > > > Reported-by: Jonathan Cameron <Jonathan.Cameron@xxxxxxxxxx> > > Closes: http://lore.kernel.org/r/20240219124041.00002bda@xxxxxxxxxx > > Signed-off-by: Dan Williams <dan.j.williams@xxxxxxxxx> > > --- > > drivers/cxl/acpi.c | 45 +++++++++++++++++++-------------------------- > > 1 file changed, 19 insertions(+), 26 deletions(-) > > > > diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c > > index 1a3e6aafbdcc..b1ea2d152c65 100644 > > --- a/drivers/cxl/acpi.c > > +++ b/drivers/cxl/acpi.c > > @@ -319,25 +319,23 @@ static const struct cxl_root_ops acpi_root_ops = { > > static int __cxl_parse_cfmws(struct acpi_cedt_cfmws *cfmws, > > struct cxl_cfmws_context *ctx) > > { > > + struct device *cxld_dev __free(put_device) = NULL; > > int target_map[CXL_DECODER_MAX_INTERLEAVE]; > > struct cxl_port *root_port = ctx->root_port; > > + struct resource *res __free(kfree) = NULL; > > struct resource *cxl_res = ctx->cxl_res; > > + const char *name __free(kfree) = NULL; > > Linus has expressed that he prefers these done inline > so the allocation and clearing are obviously associated - there > is an ordering related factor as well as they will unwind > in the reverse of declaration order, not allocation order. I considered that, yeah I came to the wrong conclusion, will move the declarations down. > > > > struct cxl_cxims_context cxims_ctx; > > struct cxl_root_decoder *cxlrd; > > struct device *dev = ctx->dev; > > cxl_calc_hb_fn cxl_calc_hb; > > struct cxl_decoder *cxld; > > unsigned int ways, i, ig; > > - struct resource *res; > > int rc; > > > > rc = cxl_acpi_cfmws_verify(dev, cfmws); > > - if (rc) { > > - dev_err(dev, "CFMWS range %#llx-%#llx not registered\n", > > - cfmws->base_hpa, > > - cfmws->base_hpa + cfmws->window_size - 1); > > + if (rc) > > return rc; > > - } > > > > rc = eiw_to_ways(cfmws->interleave_ways, &ways); > > if (rc) > > @@ -352,10 +350,11 @@ static int __cxl_parse_cfmws(struct acpi_cedt_cfmws *cfmws, > > if (!res) > > return -ENOMEM; > > > > - res->name = kasprintf(GFP_KERNEL, "CXL Window %d", ctx->id++); > > - if (!res->name) > > - goto err_name; > > const char *name __free(kfree) = kasprintf(...); > > same for the others. > > + name = kasprintf(GFP_KERNEL, "CXL Window %d", ctx->id++); > > + if (!name) > > + return -ENOMEM; > > > > + res->name = name; > > res->start = cfmws->base_hpa; > > res->end = cfmws->base_hpa + cfmws->window_size - 1; > > res->flags = IORESOURCE_MEM; > > @@ -363,7 +362,9 @@ static int __cxl_parse_cfmws(struct acpi_cedt_cfmws *cfmws, > > /* add to the local resource tracking to establish a sort order */ > > rc = insert_resource(cxl_res, res); > > if (rc) > > - goto err_insert; > > + return rc; > > + name = NULL; > > I guess we'll get used to this. Kind of annoying that no_free_ptr() is > defined as __must_check. Otherwise would be good to use that to document > the 'why' of these are being set to NULL. This is the main part I am unhappy about as well. What I would love is some way to declare that if a given statement is successfull then "list of variables" cleanup-scope has ended. Something like: cond_no_free_ptr(condition, fail_statement, ...); ...to be able to write: cond_no_free_ptr(rc == 0, return rc, name, res); ...where it handles setting all passed VA_ARG pointers to NULL. > > > + res = NULL; > > > > if (cfmws->interleave_arithmetic == ACPI_CEDT_CFMWS_ARITHMETIC_MODULO) > > cxl_calc_hb = cxl_hb_modulo; > > @@ -375,11 +376,12 @@ static int __cxl_parse_cfmws(struct acpi_cedt_cfmws *cfmws, > > return PTR_ERR(cxlrd); > > > > cxld = &cxlrd->cxlsd.cxld; > > + cxld_dev = &cxld->dev; > > This I find odd, there is no allocation as such in here so the matching of > the unwind and the allocation isn't clear. Yes, another moment of weakness, every time I write this pattern it really indicates the need for a new DEFINE_FREE(). I will add one for cxl_root_decoder_alloc(). > > cxld->flags = cfmws_to_decoder_flags(cfmws->restrictions); > > cxld->target_type = CXL_DECODER_HOSTONLYMEM; > > cxld->hpa_range = (struct range) { > > - .start = res->start, > > - .end = res->end, > > + .start = cfmws->base_hpa, > > + .end = cfmws->base_hpa + cfmws->window_size - 1, > > }; > > cxld->interleave_ways = ways; > > /* > > @@ -399,11 +401,10 @@ static int __cxl_parse_cfmws(struct acpi_cedt_cfmws *cfmws, > > rc = acpi_table_parse_cedt(ACPI_CEDT_TYPE_CXIMS, > > cxl_parse_cxims, &cxims_ctx); > > if (rc < 0) > > - goto err_xormap; > > + return rc; > > if (!cxlrd->platform_data) { > > dev_err(dev, "No CXIMS for HBIG %u\n", ig); > > - rc = -EINVAL; > > - goto err_xormap; > > + return -EINVAL; > > } > > } > > } > > @@ -411,18 +412,10 @@ static int __cxl_parse_cfmws(struct acpi_cedt_cfmws *cfmws, > > cxlrd->qos_class = cfmws->qtg_id; > > > > rc = cxl_decoder_add(cxld, target_map); > > -err_xormap: > > if (rc) > > - put_device(&cxld->dev); > > - else > > - rc = cxl_decoder_autoremove(dev, cxld); > > - return rc; > > - > > -err_insert: > > - kfree(res->name); > > -err_name: > > - kfree(res); > > - return -ENOMEM; > > + return rc; > > + cxld_dev = NULL; > > This is definitely not nice to read. We are randomly setting an > apparently unrelated pointer to NULL. At very least the __free > should operating on cxld then we can use > > return cxl_decoder_autoremove(dev, no_free_ptr(cxld)); That looks nice, and would look even nicer as: return cxl_root_decoder_autoremove(dev, no_free_ptr(cxlrd)); ...i.e. paired with the new DEFINE_FREE() for cxl_root_decoder_alloc().