Re: [PATCH] acpi, nfit: Fix Address Range Scrub completion tracking

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




On 10/13/2018 09:23 PM, Dan Williams wrote:
> The Address Range Scrub implementation tried to skip running scrubs
> against ranges that were already scrubbed by the BIOS. Unfortunately
> that support also resulted in early scrub completions as evidenced by
> this debug output from nfit_test:
> 
>     nd_region region9: ARS: range 1 short complete
>     nd_region region3: ARS: range 1 short complete
>     nd_region region4: ARS: range 2 ARS start (0)
>     nd_region region4: ARS: range 2 short complete
> 
> ...i.e. completions without any indications that the scrub was started.
> 
> This state of affairs was hard to see in the code due to the
> proliferation of state bits and mistakenly trying to track done state
> per-range when the completion is a global property of the bus.
> 
> So, kill the four ARS state bits (ARS_REQ, ARS_REQ_REDO, ARS_DONE, and
> ARS_SHORT), and replace them with just 2 request flags ARS_REQ_SHORT and
> ARS_REQ_LONG. The implementation will still complete and reap the
> results of BIOS initiated ARS, but it will not attempt to use that
> information to affect the completion status of scrubbing the ranges from
> a Linux perspective.
> 
> Instead, try to synchronously run a short ARS per range at init time and
> schedule a long scrub in the background. If ARS is busy with an ARS
> request schedule both a short and a long scrub for when ARS returns to
> idle. This logic also satisfies the intent of what ARS_REQ_REDO was
> trying to achieve. The new rule is that the REQ flag stays set until the
> next successful ars_start() for that range.
> 
> With the new policy that the REQ flags are not cleared until the next
> start, the implementation no longer loses requests as can be seen from
> the following log:
> 
>     nd_region region3: ARS: range 1 ARS start short (0)
>     nd_region region9: ARS: range 1 ARS start short (0)
>     nd_region region3: ARS: range 1 complete
>     nd_region region4: ARS: range 2 ARS start short (0)
>     nd_region region9: ARS: range 1 complete
>     nd_region region9: ARS: range 1 ARS start long (0)
>     nd_region region4: ARS: range 2 complete
>     nd_region region3: ARS: range 1 ARS start long (0)
>     nd_region region9: ARS: range 1 complete
>     nd_region region3: ARS: range 1 complete
>     nd_region region4: ARS: range 2 ARS start long (0)
>     nd_region region4: ARS: range 2 complete
> 
> ...note that the nfit_test emulated driver provides 2 buses, that is why
> some of the range indices are duplicated. Notice that each range
> now successfully completes a short and long scrub.
> 
> Cc: <stable@xxxxxxxxxxxxxxx>
> Fixes: 14c73f997a5e ("nfit, address-range-scrub: introduce nfit_spa->ars_state")
> Fixes: cc3d3458d46f ("acpi/nfit: queue issuing of ars when an uc error...")
> Reported-by: Jacek Zloch <jacek.zloch@xxxxxxxxx>
> Reported-by: Krzysztof Rusocki <krzysztof.rusocki@xxxxxxxxx>
> Signed-off-by: Dan Williams <dan.j.williams@xxxxxxxxx>

Reviewed-by: Dave Jiang <dave.jiang@xxxxxxxxx>

> ---
>  drivers/acpi/nfit/core.c |  169 ++++++++++++++++++++++++++--------------------
>  drivers/acpi/nfit/nfit.h |   10 +--
>  2 files changed, 101 insertions(+), 78 deletions(-)
> 
> diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
> index a0dfbcf8220d..f7efcd9843e0 100644
> --- a/drivers/acpi/nfit/core.c
> +++ b/drivers/acpi/nfit/core.c
> @@ -2572,7 +2572,8 @@ static int ars_get_cap(struct acpi_nfit_desc *acpi_desc,
>  	return cmd_rc;
>  }
>  
> -static int ars_start(struct acpi_nfit_desc *acpi_desc, struct nfit_spa *nfit_spa)
> +static int ars_start(struct acpi_nfit_desc *acpi_desc,
> +		struct nfit_spa *nfit_spa, enum nfit_ars_state req_type)
>  {
>  	int rc;
>  	int cmd_rc;
> @@ -2583,7 +2584,7 @@ static int ars_start(struct acpi_nfit_desc *acpi_desc, struct nfit_spa *nfit_spa
>  	memset(&ars_start, 0, sizeof(ars_start));
>  	ars_start.address = spa->address;
>  	ars_start.length = spa->length;
> -	if (test_bit(ARS_SHORT, &nfit_spa->ars_state))
> +	if (req_type == ARS_REQ_SHORT)
>  		ars_start.flags = ND_ARS_RETURN_PREV_DATA;
>  	if (nfit_spa_type(spa) == NFIT_SPA_PM)
>  		ars_start.type = ND_ARS_PERSISTENT;
> @@ -2640,6 +2641,15 @@ static void ars_complete(struct acpi_nfit_desc *acpi_desc,
>  	struct nd_region *nd_region = nfit_spa->nd_region;
>  	struct device *dev;
>  
> +	lockdep_assert_held(&acpi_desc->init_mutex);
> +	/*
> +	 * Only advance the ARS state for ARS runs initiated by the
> +	 * kernel, ignore ARS results from BIOS initiated runs for scrub
> +	 * completion tracking.
> +	 */
> +	if (acpi_desc->scrub_spa != nfit_spa)
> +		return;
> +
>  	if ((ars_status->address >= spa->address && ars_status->address
>  				< spa->address + spa->length)
>  			|| (ars_status->address < spa->address)) {
> @@ -2659,28 +2669,13 @@ static void ars_complete(struct acpi_nfit_desc *acpi_desc,
>  	} else
>  		return;
>  
> -	if (test_bit(ARS_DONE, &nfit_spa->ars_state))
> -		return;
> -
> -	if (!test_and_clear_bit(ARS_REQ, &nfit_spa->ars_state))
> -		return;
> -
> +	acpi_desc->scrub_spa = NULL;
>  	if (nd_region) {
>  		dev = nd_region_dev(nd_region);
>  		nvdimm_region_notify(nd_region, NVDIMM_REVALIDATE_POISON);
>  	} else
>  		dev = acpi_desc->dev;
> -
> -	dev_dbg(dev, "ARS: range %d %s complete\n", spa->range_index,
> -			test_bit(ARS_SHORT, &nfit_spa->ars_state)
> -			? "short" : "long");
> -	clear_bit(ARS_SHORT, &nfit_spa->ars_state);
> -	if (test_and_clear_bit(ARS_REQ_REDO, &nfit_spa->ars_state)) {
> -		set_bit(ARS_SHORT, &nfit_spa->ars_state);
> -		set_bit(ARS_REQ, &nfit_spa->ars_state);
> -		dev_dbg(dev, "ARS: processing scrub request received while in progress\n");
> -	} else
> -		set_bit(ARS_DONE, &nfit_spa->ars_state);
> +	dev_dbg(dev, "ARS: range %d complete\n", spa->range_index);
>  }
>  
>  static int ars_status_process_records(struct acpi_nfit_desc *acpi_desc)
> @@ -2961,46 +2956,55 @@ static int acpi_nfit_query_poison(struct acpi_nfit_desc *acpi_desc)
>  	return 0;
>  }
>  
> -static int ars_register(struct acpi_nfit_desc *acpi_desc, struct nfit_spa *nfit_spa,
> -		int *query_rc)
> +static int ars_register(struct acpi_nfit_desc *acpi_desc,
> +		struct nfit_spa *nfit_spa)
>  {
> -	int rc = *query_rc;
> +	int rc;
>  
> -	if (no_init_ars)
> +	if (no_init_ars || test_bit(ARS_FAILED, &nfit_spa->ars_state))
>  		return acpi_nfit_register_region(acpi_desc, nfit_spa);
>  
> -	set_bit(ARS_REQ, &nfit_spa->ars_state);
> -	set_bit(ARS_SHORT, &nfit_spa->ars_state);
> +	set_bit(ARS_REQ_SHORT, &nfit_spa->ars_state);
> +	set_bit(ARS_REQ_LONG, &nfit_spa->ars_state);
>  
> -	switch (rc) {
> +	switch (acpi_nfit_query_poison(acpi_desc)) {
>  	case 0:
>  	case -EAGAIN:
> -		rc = ars_start(acpi_desc, nfit_spa);
> -		if (rc == -EBUSY) {
> -			*query_rc = rc;
> +		rc = ars_start(acpi_desc, nfit_spa, ARS_REQ_SHORT);
> +		/* shouldn't happen, try again later */
> +		if (rc == -EBUSY)
>  			break;
> -		} else if (rc == 0) {
> -			rc = acpi_nfit_query_poison(acpi_desc);
> -		} else {
> +		if (rc) {
>  			set_bit(ARS_FAILED, &nfit_spa->ars_state);
>  			break;
>  		}
> -		if (rc == -EAGAIN)
> -			clear_bit(ARS_SHORT, &nfit_spa->ars_state);
> -		else if (rc == 0)
> -			ars_complete(acpi_desc, nfit_spa);
> +		clear_bit(ARS_REQ_SHORT, &nfit_spa->ars_state);
> +		rc = acpi_nfit_query_poison(acpi_desc);
> +		if (rc)
> +			break;
> +		acpi_desc->scrub_spa = nfit_spa;
> +		ars_complete(acpi_desc, nfit_spa);
> +		/*
> +		 * If ars_complete() says we didn't complete the
> +		 * short scrub, we'll try again with a long
> +		 * request.
> +		 */
> +		acpi_desc->scrub_spa = NULL;
>  		break;
>  	case -EBUSY:
> +	case -ENOMEM:
>  	case -ENOSPC:
> +		/*
> +		 * BIOS was using ARS, wait for it to complete (or
> +		 * resources to become available) and then perform our
> +		 * own scrubs.
> +		 */
>  		break;
>  	default:
>  		set_bit(ARS_FAILED, &nfit_spa->ars_state);
>  		break;
>  	}
>  
> -	if (test_and_clear_bit(ARS_DONE, &nfit_spa->ars_state))
> -		set_bit(ARS_REQ, &nfit_spa->ars_state);
> -
>  	return acpi_nfit_register_region(acpi_desc, nfit_spa);
>  }
>  
> @@ -3022,6 +3026,8 @@ static unsigned int __acpi_nfit_scrub(struct acpi_nfit_desc *acpi_desc,
>  	struct device *dev = acpi_desc->dev;
>  	struct nfit_spa *nfit_spa;
>  
> +	lockdep_assert_held(&acpi_desc->init_mutex);
> +
>  	if (acpi_desc->cancel)
>  		return 0;
>  
> @@ -3045,21 +3051,49 @@ static unsigned int __acpi_nfit_scrub(struct acpi_nfit_desc *acpi_desc,
>  
>  	ars_complete_all(acpi_desc);
>  	list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
> +		enum nfit_ars_state req_type;
> +		int rc;
> +
>  		if (test_bit(ARS_FAILED, &nfit_spa->ars_state))
>  			continue;
> -		if (test_bit(ARS_REQ, &nfit_spa->ars_state)) {
> -			int rc = ars_start(acpi_desc, nfit_spa);
> -
> -			clear_bit(ARS_DONE, &nfit_spa->ars_state);
> -			dev = nd_region_dev(nfit_spa->nd_region);
> -			dev_dbg(dev, "ARS: range %d ARS start (%d)\n",
> -					nfit_spa->spa->range_index, rc);
> -			if (rc == 0 || rc == -EBUSY)
> -				return 1;
> -			dev_err(dev, "ARS: range %d ARS failed (%d)\n",
> -					nfit_spa->spa->range_index, rc);
> -			set_bit(ARS_FAILED, &nfit_spa->ars_state);
> +
> +		/* prefer short ARS requests first */
> +		if (test_bit(ARS_REQ_SHORT, &nfit_spa->ars_state))
> +			req_type = ARS_REQ_SHORT;
> +		else if (test_bit(ARS_REQ_LONG, &nfit_spa->ars_state))
> +			req_type = ARS_REQ_LONG;
> +		else
> +			continue;
> +		rc = ars_start(acpi_desc, nfit_spa, req_type);
> +
> +		dev = nd_region_dev(nfit_spa->nd_region);
> +		dev_dbg(dev, "ARS: range %d ARS start %s (%d)\n",
> +				nfit_spa->spa->range_index,
> +				req_type == ARS_REQ_SHORT ? "short" : "long",
> +				rc);
> +		/*
> +		 * Hmm, we raced someone else starting ARS? Try again in
> +		 * a bit.
> +		 */
> +		if (rc == -EBUSY)
> +			return 1;
> +		if (rc == 0) {
> +			dev_WARN_ONCE(dev, acpi_desc->scrub_spa,
> +					"scrub start while range %d active\n",
> +					acpi_desc->scrub_spa->spa->range_index);
> +			clear_bit(req_type, &nfit_spa->ars_state);
> +			acpi_desc->scrub_spa = nfit_spa;
> +			/*
> +			 * Consider this spa last for future scrub
> +			 * requests
> +			 */
> +			list_move_tail(&nfit_spa->list, &acpi_desc->spas);
> +			return 1;
>  		}
> +
> +		dev_err(dev, "ARS: range %d ARS failed (%d)\n",
> +				nfit_spa->spa->range_index, rc);
> +		set_bit(ARS_FAILED, &nfit_spa->ars_state);
>  	}
>  	return 0;
>  }
> @@ -3115,6 +3149,7 @@ static void acpi_nfit_init_ars(struct acpi_nfit_desc *acpi_desc,
>  	struct nd_cmd_ars_cap ars_cap;
>  	int rc;
>  
> +	set_bit(ARS_FAILED, &nfit_spa->ars_state);
>  	memset(&ars_cap, 0, sizeof(ars_cap));
>  	rc = ars_get_cap(acpi_desc, &ars_cap, nfit_spa);
>  	if (rc < 0)
> @@ -3131,16 +3166,14 @@ static void acpi_nfit_init_ars(struct acpi_nfit_desc *acpi_desc,
>  	nfit_spa->clear_err_unit = ars_cap.clear_err_unit;
>  	acpi_desc->max_ars = max(nfit_spa->max_ars, acpi_desc->max_ars);
>  	clear_bit(ARS_FAILED, &nfit_spa->ars_state);
> -	set_bit(ARS_REQ, &nfit_spa->ars_state);
>  }
>  
>  static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc)
>  {
>  	struct nfit_spa *nfit_spa;
> -	int rc, query_rc;
> +	int rc;
>  
>  	list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
> -		set_bit(ARS_FAILED, &nfit_spa->ars_state);
>  		switch (nfit_spa_type(nfit_spa->spa)) {
>  		case NFIT_SPA_VOLATILE:
>  		case NFIT_SPA_PM:
> @@ -3149,20 +3182,12 @@ static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc)
>  		}
>  	}
>  
> -	/*
> -	 * Reap any results that might be pending before starting new
> -	 * short requests.
> -	 */
> -	query_rc = acpi_nfit_query_poison(acpi_desc);
> -	if (query_rc == 0)
> -		ars_complete_all(acpi_desc);
> -
>  	list_for_each_entry(nfit_spa, &acpi_desc->spas, list)
>  		switch (nfit_spa_type(nfit_spa->spa)) {
>  		case NFIT_SPA_VOLATILE:
>  		case NFIT_SPA_PM:
>  			/* register regions and kick off initial ARS run */
> -			rc = ars_register(acpi_desc, nfit_spa, &query_rc);
> +			rc = ars_register(acpi_desc, nfit_spa);
>  			if (rc)
>  				return rc;
>  			break;
> @@ -3374,7 +3399,8 @@ static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc,
>  	return __acpi_nfit_clear_to_send(nd_desc, nvdimm, cmd);
>  }
>  
> -int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, unsigned long flags)
> +int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc,
> +		enum nfit_ars_state req_type)
>  {
>  	struct device *dev = acpi_desc->dev;
>  	int scheduled = 0, busy = 0;
> @@ -3394,14 +3420,10 @@ int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, unsigned long flags)
>  		if (test_bit(ARS_FAILED, &nfit_spa->ars_state))
>  			continue;
>  
> -		if (test_and_set_bit(ARS_REQ, &nfit_spa->ars_state)) {
> +		if (test_and_set_bit(req_type, &nfit_spa->ars_state))
>  			busy++;
> -			set_bit(ARS_REQ_REDO, &nfit_spa->ars_state);
> -		} else {
> -			if (test_bit(ARS_SHORT, &flags))
> -				set_bit(ARS_SHORT, &nfit_spa->ars_state);
> +		else
>  			scheduled++;
> -		}
>  	}
>  	if (scheduled) {
>  		sched_ars(acpi_desc);
> @@ -3587,10 +3609,11 @@ static void acpi_nfit_update_notify(struct device *dev, acpi_handle handle)
>  static void acpi_nfit_uc_error_notify(struct device *dev, acpi_handle handle)
>  {
>  	struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(dev);
> -	unsigned long flags = (acpi_desc->scrub_mode == HW_ERROR_SCRUB_ON) ?
> -			0 : 1 << ARS_SHORT;
>  
> -	acpi_nfit_ars_rescan(acpi_desc, flags);
> +	if (acpi_desc->scrub_mode == HW_ERROR_SCRUB_ON)
> +		acpi_nfit_ars_rescan(acpi_desc, ARS_REQ_LONG);
> +	else
> +		acpi_nfit_ars_rescan(acpi_desc, ARS_REQ_SHORT);
>  }
>  
>  void __acpi_nfit_notify(struct device *dev, acpi_handle handle, u32 event)
> diff --git a/drivers/acpi/nfit/nfit.h b/drivers/acpi/nfit/nfit.h
> index 8c1af38a5dee..a7d95b427efd 100644
> --- a/drivers/acpi/nfit/nfit.h
> +++ b/drivers/acpi/nfit/nfit.h
> @@ -133,10 +133,8 @@ enum nfit_dimm_notifiers {
>  };
>  
>  enum nfit_ars_state {
> -	ARS_REQ,
> -	ARS_REQ_REDO,
> -	ARS_DONE,
> -	ARS_SHORT,
> +	ARS_REQ_SHORT,
> +	ARS_REQ_LONG,
>  	ARS_FAILED,
>  };
>  
> @@ -223,6 +221,7 @@ struct acpi_nfit_desc {
>  	struct device *dev;
>  	u8 ars_start_flags;
>  	struct nd_cmd_ars_status *ars_status;
> +	struct nfit_spa *scrub_spa;
>  	struct delayed_work dwork;
>  	struct list_head list;
>  	struct kernfs_node *scrub_count_state;
> @@ -277,7 +276,8 @@ struct nfit_blk {
>  
>  extern struct list_head acpi_descs;
>  extern struct mutex acpi_desc_lock;
> -int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, unsigned long flags);
> +int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc,
> +		enum nfit_ars_state req_type);
>  
>  #ifdef CONFIG_X86_MCE
>  void nfit_mce_register(void);
> 



[Index of Archives]     [Linux IBM ACPI]     [Linux Power Management]     [Linux Kernel]     [Linux Laptop]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Samba]     [Video 4 Linux]     [Device Mapper]     [Linux Resources]

  Powered by Linux