Re: [PATCH] scsi: sd: Revert "Rework asynchronous resume support"

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 8/17/22 22:06, Vlastimil Babka wrote:
> On 8/16/22 19:26, Bart Van Assche wrote:
>> Although patch "Rework asynchronous resume support" eliminates the delay
>> for some ATA disks after resume, it causes resume of ATA disks to fail
>> on other setups. See also:
>> * "Resume process hangs for 5-6 seconds starting sometime in 5.16"
>>   (https://bugzilla.kernel.org/show_bug.cgi?id=215880).
>> * Geert's regression report
>>   (https://lore.kernel.org/linux-scsi/alpine.DEB.2.22.394.2207191125130.1006766@xxxxxxxxxxxxxx/).
>>
>> This is what I understand about this issue:
>> * During resume, ata_port_pm_resume() starts the SCSI error handler.
>>   This changes the SCSI host state into SHOST_RECOVERY and causes
>>   scsi_queue_rq() to return BLK_STS_RESOURCE.
>> * sd_resume() calls sd_start_stop_device() for ATA devices. That
>>   function in turn calls sd_submit_start() which tries to submit a START
>>   STOP UNIT command. That command can only be submitted after the SCSI
>>   error handler has changed the SCSI host state back to SHOST_RUNNING.
>> * The SCSI error handler runs on its own thread and calls
>>   schedule_work(&(ap->scsi_rescan_task)). That causes
>>   ata_scsi_dev_rescan() to be called from the context of a kernel
>>   workqueue. That call hangs in blk_mq_get_tag(). I'm not sure why -
>>   maybe because all available tags have been allocated by
>>   sd_submit_start() calls (this is a guess).
>>
>> Cc: Damien Le Moal <damien.lemoal@xxxxxxxxxxxxxxxxxx>
>> Cc: Hannes Reinecke <hare@xxxxxxx>
>> Cc: Geert Uytterhoeven <geert@xxxxxxxxxxxxxx>
>> Cc: gzhqyz@xxxxxxxxx
>> Reported-by: Geert Uytterhoeven <geert@xxxxxxxxxxxxxx>
>> Reported-by: gzhqyz@xxxxxxxxx
>> Fixes: 88f1669019bd ("scsi: sd: Rework asynchronous resume support"; v6.0-rc1~114^2~68)
>> Signed-off-by: Bart Van Assche <bvanassche@xxxxxxx>
> 
> Reported-and-tested-by: Vlastimil Babka <vbabka@xxxxxxx>

What's the hold up on this? Didn't make it to rc2 and the number of
people who independently spent time bisecting this seems to be only
increasing.

Thanks, Vlastimil

> 
>> ---
>>  drivers/scsi/sd.c | 84 ++++++++++-------------------------------------
>>  drivers/scsi/sd.h |  5 ---
>>  2 files changed, 18 insertions(+), 71 deletions(-)
>>
>> diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
>> index 8f79fa6318fe..eb76ba055021 100644
>> --- a/drivers/scsi/sd.c
>> +++ b/drivers/scsi/sd.c
>> @@ -103,7 +103,6 @@ static void sd_config_discard(struct scsi_disk *, unsigned int);
>>  static void sd_config_write_same(struct scsi_disk *);
>>  static int  sd_revalidate_disk(struct gendisk *);
>>  static void sd_unlock_native_capacity(struct gendisk *disk);
>> -static void sd_start_done_work(struct work_struct *work);
>>  static int  sd_probe(struct device *);
>>  static int  sd_remove(struct device *);
>>  static void sd_shutdown(struct device *);
>> @@ -3471,7 +3470,6 @@ static int sd_probe(struct device *dev)
>>  	sdkp->max_retries = SD_MAX_RETRIES;
>>  	atomic_set(&sdkp->openers, 0);
>>  	atomic_set(&sdkp->device->ioerr_cnt, 0);
>> -	INIT_WORK(&sdkp->start_done_work, sd_start_done_work);
>>  
>>  	if (!sdp->request_queue->rq_timeout) {
>>  		if (sdp->type != TYPE_MOD)
>> @@ -3594,69 +3592,12 @@ static void scsi_disk_release(struct device *dev)
>>  	kfree(sdkp);
>>  }
>>  
>> -/* Process sense data after a START command finished. */
>> -static void sd_start_done_work(struct work_struct *work)
>> -{
>> -	struct scsi_disk *sdkp = container_of(work, typeof(*sdkp),
>> -					      start_done_work);
>> -	struct scsi_sense_hdr sshdr;
>> -	int res = sdkp->start_result;
>> -
>> -	if (res == 0)
>> -		return;
>> -
>> -	sd_print_result(sdkp, "Start/Stop Unit failed", res);
>> -
>> -	if (res < 0)
>> -		return;
>> -
>> -	if (scsi_normalize_sense(sdkp->start_sense_buffer,
>> -				 sdkp->start_sense_len, &sshdr))
>> -		sd_print_sense_hdr(sdkp, &sshdr);
>> -}
>> -
>> -/* A START command finished. May be called from interrupt context. */
>> -static void sd_start_done(struct request *req, blk_status_t status)
>> -{
>> -	const struct scsi_cmnd *scmd = blk_mq_rq_to_pdu(req);
>> -	struct scsi_disk *sdkp = scsi_disk(req->q->disk);
>> -
>> -	sdkp->start_result = scmd->result;
>> -	WARN_ON_ONCE(scmd->sense_len > SCSI_SENSE_BUFFERSIZE);
>> -	sdkp->start_sense_len = scmd->sense_len;
>> -	memcpy(sdkp->start_sense_buffer, scmd->sense_buffer,
>> -	       ARRAY_SIZE(sdkp->start_sense_buffer));
>> -	WARN_ON_ONCE(!schedule_work(&sdkp->start_done_work));
>> -}
>> -
>> -/* Submit a START command asynchronously. */
>> -static int sd_submit_start(struct scsi_disk *sdkp, u8 cmd[], u8 cmd_len)
>> -{
>> -	struct scsi_device *sdev = sdkp->device;
>> -	struct request_queue *q = sdev->request_queue;
>> -	struct request *req;
>> -	struct scsi_cmnd *scmd;
>> -
>> -	req = scsi_alloc_request(q, REQ_OP_DRV_IN, BLK_MQ_REQ_PM);
>> -	if (IS_ERR(req))
>> -		return PTR_ERR(req);
>> -
>> -	scmd = blk_mq_rq_to_pdu(req);
>> -	scmd->cmd_len = cmd_len;
>> -	memcpy(scmd->cmnd, cmd, cmd_len);
>> -	scmd->allowed = sdkp->max_retries;
>> -	req->timeout = SD_TIMEOUT;
>> -	req->rq_flags |= RQF_PM | RQF_QUIET;
>> -	req->end_io = sd_start_done;
>> -	blk_execute_rq_nowait(req, /*at_head=*/true);
>> -
>> -	return 0;
>> -}
>> -
>>  static int sd_start_stop_device(struct scsi_disk *sdkp, int start)
>>  {
>>  	unsigned char cmd[6] = { START_STOP };	/* START_VALID */
>> +	struct scsi_sense_hdr sshdr;
>>  	struct scsi_device *sdp = sdkp->device;
>> +	int res;
>>  
>>  	if (start)
>>  		cmd[4] |= 1;	/* START */
>> @@ -3667,10 +3608,23 @@ static int sd_start_stop_device(struct scsi_disk *sdkp, int start)
>>  	if (!scsi_device_online(sdp))
>>  		return -ENODEV;
>>  
>> -	/* Wait until processing of sense data has finished. */
>> -	flush_work(&sdkp->start_done_work);
>> +	res = scsi_execute(sdp, cmd, DMA_NONE, NULL, 0, NULL, &sshdr,
>> +			SD_TIMEOUT, sdkp->max_retries, 0, RQF_PM, NULL);
>> +	if (res) {
>> +		sd_print_result(sdkp, "Start/Stop Unit failed", res);
>> +		if (res > 0 && scsi_sense_valid(&sshdr)) {
>> +			sd_print_sense_hdr(sdkp, &sshdr);
>> +			/* 0x3a is medium not present */
>> +			if (sshdr.asc == 0x3a)
>> +				res = 0;
>> +		}
>> +	}
>>  
>> -	return sd_submit_start(sdkp, cmd, sizeof(cmd));
>> +	/* SCSI error codes must not go to the generic layer */
>> +	if (res)
>> +		return -EIO;
>> +
>> +	return 0;
>>  }
>>  
>>  /*
>> @@ -3697,8 +3651,6 @@ static void sd_shutdown(struct device *dev)
>>  		sd_printk(KERN_NOTICE, sdkp, "Stopping disk\n");
>>  		sd_start_stop_device(sdkp, 0);
>>  	}
>> -
>> -	flush_work(&sdkp->start_done_work);
>>  }
>>  
>>  static int sd_suspend_common(struct device *dev, bool ignore_stop_errors)
>> diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
>> index b89187761d61..5eea762f84d1 100644
>> --- a/drivers/scsi/sd.h
>> +++ b/drivers/scsi/sd.h
>> @@ -150,11 +150,6 @@ struct scsi_disk {
>>  	unsigned	urswrz : 1;
>>  	unsigned	security : 1;
>>  	unsigned	ignore_medium_access_errors : 1;
>> -
>> -	int		start_result;
>> -	u32		start_sense_len;
>> -	u8		start_sense_buffer[SCSI_SENSE_BUFFERSIZE];
>> -	struct work_struct start_done_work;
>>  };
>>  #define to_scsi_disk(obj) container_of(obj, struct scsi_disk, disk_dev)
>>  
> 




[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Index of Archives]     [SCSI Target Devel]     [Linux SCSI Target Infrastructure]     [Kernel Newbies]     [IDE]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux ATA RAID]     [Linux IIO]     [Samba]     [Device Mapper]

  Powered by Linux