Re: [PATCH v20 1/2] scsi: ufs: Enable power management for wlun

Adrian Hunter <adrian.hunter@xxxxxxxxx> · Mon, 19 Apr 2021 21:37:27 +0300

On 16/04/21 10:49 pm, Asutosh Das wrote:
> During runtime-suspend of ufs host, the scsi devices are
> already suspended and so are the queues associated with them.
> But the ufs host sends SSU (START_STOP_UNIT) to wlun
> during its runtime-suspend.
> During the process blk_queue_enter checks if the queue is not in
> suspended state. If so, it waits for the queue to resume, and never
> comes out of it.
> The commit
> (d55d15a33: scsi: block: Do not accept any requests while suspended)
> adds the check if the queue is in suspended state in blk_queue_enter().
> 
> Call trace:
>  __switch_to+0x174/0x2c4
>  __schedule+0x478/0x764
>  schedule+0x9c/0xe0
>  blk_queue_enter+0x158/0x228
>  blk_mq_alloc_request+0x40/0xa4
>  blk_get_request+0x2c/0x70
>  __scsi_execute+0x60/0x1c4
>  ufshcd_set_dev_pwr_mode+0x124/0x1e4
>  ufshcd_suspend+0x208/0x83c
>  ufshcd_runtime_suspend+0x40/0x154
>  ufshcd_pltfrm_runtime_suspend+0x14/0x20
>  pm_generic_runtime_suspend+0x28/0x3c
>  __rpm_callback+0x80/0x2a4
>  rpm_suspend+0x308/0x614
>  rpm_idle+0x158/0x228
>  pm_runtime_work+0x84/0xac
>  process_one_work+0x1f0/0x470
>  worker_thread+0x26c/0x4c8
>  kthread+0x13c/0x320
>  ret_from_fork+0x10/0x18
> 
> Fix this by registering ufs device wlun as a scsi driver and
> registering it for block runtime-pm. Also make this as a
> supplier for all other luns. That way, this device wlun
> suspends after all the consumers and resumes after
> hba resumes. This also registers a new scsi driver for rpmb wlun.
> This new driver is mostly used to clear rpmb uac.
> 
> Fixed smatch warnings:
> Reported-by: kernel test robot <lkp@xxxxxxxxx>
> Reported-by: Dan Carpenter <dan.carpenter@xxxxxxxxxx>
> 
> Co-developed-by: Can Guo <cang@xxxxxxxxxxxxxx>
> Signed-off-by: Can Guo <cang@xxxxxxxxxxxxxx>
> Signed-off-by: Asutosh Das <asutoshd@xxxxxxxxxxxxxx>
> ---

I came across 3 issues while testing.  See comments below.

<SNIP>

> @@ -5753,12 +5797,13 @@ static void ufshcd_clk_scaling_suspend(struct ufs_hba *hba, bool suspend)
>  
>  static void ufshcd_err_handling_prepare(struct ufs_hba *hba)
>  {
> -	pm_runtime_get_sync(hba->dev);
> -	if (pm_runtime_status_suspended(hba->dev) || hba->is_sys_suspended) {
> +	ufshcd_rpm_get_sync(hba);

hba->sdev_ufs_device could be NULL.
Need to add a check for that in ufshcd_err_handling_should_stop()

> +	if (pm_runtime_status_suspended(&hba->sdev_ufs_device->sdev_gendev) ||
> +	    hba->is_sys_suspended) {
>  		enum ufs_pm_op pm_op;
>  
>  		/*
> -		 * Don't assume anything of pm_runtime_get_sync(), if
> +		 * Don't assume anything of resume, if
>  		 * resume fails, irq and clocks can be OFF, and powers
>  		 * can be OFF or in LPM.
>  		 */
> @@ -5794,7 +5839,7 @@ static void ufshcd_err_handling_unprepare(struct ufs_hba *hba)
>  	if (ufshcd_is_clkscaling_supported(hba))
>  		ufshcd_clk_scaling_suspend(hba, false);
>  	ufshcd_clear_ua_wluns(hba);

ufshcd_clear_ua_wluns() deadlocks trying to clear UFS_UPIU_RPMB_WLUN
if sdev_rpmb is suspended and sdev_ufs_device is suspending.
e.g. ufshcd_wl_suspend() is waiting on host_sem while ufshcd_err_handler()
is running, at which point sdev_rpmb has already suspended.

> -	pm_runtime_put(hba->dev);
> +	ufshcd_rpm_put(hba);
>  }

<SNIP>

> +void ufshcd_resume_complete(struct device *dev)
> +{
> +	struct ufs_hba *hba = dev_get_drvdata(dev);
> +
> +	ufshcd_rpm_put(hba);
> +}
> +EXPORT_SYMBOL_GPL(ufshcd_resume_complete);
> +
> +int ufshcd_suspend_prepare(struct device *dev)
> +{
> +	struct ufs_hba *hba = dev_get_drvdata(dev);
> +	struct device *ufs_dev = &hba->sdev_ufs_device->sdev_gendev;
> +	enum ufs_dev_pwr_mode spm_pwr_mode;
> +	enum uic_link_state spm_link_state;
> +	unsigned long flags;
> +	bool rpm_state_ok;
> +
> +	/*
> +	 * SCSI assumes that runtime-pm and system-pm for scsi drivers
> +	 * are same. And it doesn't wake up the device for system-suspend
> +	 * if it's runtime suspended. But ufs doesn't follow that.
> +	 * The rpm-lvl and spm-lvl can be different in ufs.
> +	 * However, if the current_{pwr_mode, link_state} is same as the
> +	 * desired_{pwr_mode, link_state}, there's no need to rpm resume
> +	 * the device.
> +	 * Refer ufshcd_resume_complete()
> +	 */
> +	pm_runtime_get_noresume(ufs_dev);
> +
> +	spin_lock_irqsave(&ufs_dev->power.lock, flags);
> +
> +	spm_pwr_mode = ufs_get_pm_lvl_to_dev_pwr_mode(hba->spm_lvl);
> +	spm_link_state = ufs_get_pm_lvl_to_link_pwr_state(hba->spm_lvl);
> +
> +	rpm_state_ok = pm_runtime_suspended(ufs_dev) &&
> +		hba->curr_dev_pwr_mode == spm_pwr_mode &&
> +		hba->uic_link_state == spm_link_state &&
> +		!hba->dev_info.b_rpm_dev_flush_capable;
> +
> +	spin_unlock_irqrestore(&ufs_dev->power.lock, flags);
> +
> +	if (!rpm_state_ok) {
> +		int ret = pm_runtime_resume(ufs_dev);
> +
> +		if (ret < 0 && ret != -EACCES) {
> +			pm_runtime_put(ufs_dev);
> +			return ret;
> +		}
> +	}
> +	return 0;
> +}

Unfortunately this does not work because SCSI PM forcibly sets
the sdevs to runtime active after system resume.  Really we should
change SCSI PM to call the driver's .prepare / .complete then we could
use direct complete, but let's leave that for now and go back to
before, but allowing for errors and !hba->sdev_ufs_device. e.g.

void ufshcd_resume_complete(struct device *dev)
{
	struct ufs_hba *hba = dev_get_drvdata(dev);

	if (hba->complete_put) {
		hba->complete_put = false;
		ufshcd_rpm_put(hba);
	}
}
EXPORT_SYMBOL_GPL(ufshcd_resume_complete);

int ufshcd_suspend_prepare(struct device *dev)
{
	struct ufs_hba *hba = dev_get_drvdata(dev);
	int ret;

	if (!hba->sdev_ufs_device)
		return 0;

	ret = ufshcd_rpm_get_sync(hba);
	if (ret < 0 && ret != -EACCES) {
		ufshcd_rpm_put(hba);
		return ret;
	}
	hba->complete_put = true;
	return 0;
}
EXPORT_SYMBOL_GPL(ufshcd_suspend_prepare);