Re: [PATCH 13/16] fnic: allocate device reset command on the fly

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Adding the fnic maintainers as they are probably most qualified to
review and test this.

On Mon, Oct 23, 2023 at 11:15:04AM +0200, Hannes Reinecke wrote:
> Allocate a reset command on the fly instead of relying
> on using the command which triggered the device failure.
> This might fail if all available tags are busy, but in
> that case it'll be safer to fall back to host reset anyway.
> 
> Signed-off-by: Hannes Reinecke <hare@xxxxxxx>
> ---
>  drivers/scsi/fnic/fnic.h      |   1 -
>  drivers/scsi/fnic/fnic_scsi.c | 113 ++++++++++++++++------------------
>  drivers/scsi/snic/snic_scsi.c |   5 +-
>  3 files changed, 55 insertions(+), 64 deletions(-)
> 
> diff --git a/drivers/scsi/fnic/fnic.h b/drivers/scsi/fnic/fnic.h
> index 93c68931a593..8ffcafb4687f 100644
> --- a/drivers/scsi/fnic/fnic.h
> +++ b/drivers/scsi/fnic/fnic.h
> @@ -236,7 +236,6 @@ struct fnic {
>  	unsigned int wq_count;
>  	unsigned int cq_count;
>  
> -	struct mutex sgreset_mutex;
>  	struct dentry *fnic_stats_debugfs_host;
>  	struct dentry *fnic_stats_debugfs_file;
>  	struct dentry *fnic_reset_debugfs_file;
> diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c
> index 9761b2c9db48..7a8b6285a096 100644
> --- a/drivers/scsi/fnic/fnic_scsi.c
> +++ b/drivers/scsi/fnic/fnic_scsi.c
> @@ -1984,7 +1984,6 @@ static inline int fnic_queue_dr_io_req(struct fnic *fnic,
>  
>  struct fnic_pending_aborts_iter_data {
>  	struct fnic *fnic;
> -	struct scsi_cmnd *lr_sc;
>  	struct scsi_device *lun_dev;
>  	int ret;
>  };
> @@ -2002,7 +2001,7 @@ static bool fnic_pending_aborts_iter(struct scsi_cmnd *sc, void *data)
>  	DECLARE_COMPLETION_ONSTACK(tm_done);
>  	enum fnic_ioreq_state old_ioreq_state;
>  
> -	if (sc == iter_data->lr_sc || sc->device != lun_dev)
> +	if (sc->device != lun_dev)
>  		return true;
>  
>  	io_lock = fnic_io_lock_tag(fnic, abt_tag);
> @@ -2105,17 +2104,11 @@ static bool fnic_pending_aborts_iter(struct scsi_cmnd *sc, void *data)
>  		return false;
>  	}
>  	fnic_priv(sc)->state = FNIC_IOREQ_ABTS_COMPLETE;
> -
> -	/* original sc used for lr is handled by dev reset code */
> -	if (sc != iter_data->lr_sc)
> -		fnic_priv(sc)->io_req = NULL;
> +	fnic_priv(sc)->io_req = NULL;
>  	spin_unlock_irqrestore(io_lock, flags);
>  
> -	/* original sc used for lr is handled by dev reset code */
> -	if (sc != iter_data->lr_sc) {
> -		fnic_release_ioreq_buf(fnic, io_req, sc);
> -		mempool_free(io_req, fnic->io_req_pool);
> -	}
> +	fnic_release_ioreq_buf(fnic, io_req, sc);
> +	mempool_free(io_req, fnic->io_req_pool);
>  
>  	/*
>  	 * Any IO is returned during reset, it needs to call scsi_done
> @@ -2135,8 +2128,7 @@ static bool fnic_pending_aborts_iter(struct scsi_cmnd *sc, void *data)
>   * successfully aborted, 1 otherwise
>   */
>  static int fnic_clean_pending_aborts(struct fnic *fnic,
> -				     struct scsi_cmnd *lr_sc,
> -				     bool new_sc)
> +				     struct scsi_cmnd *lr_sc)
>  
>  {
>  	int ret = 0;
> @@ -2146,9 +2138,6 @@ static int fnic_clean_pending_aborts(struct fnic *fnic,
>  		.ret = SUCCESS,
>  	};
>  
> -	if (new_sc)
> -		iter_data.lr_sc = lr_sc;
> -
>  	scsi_host_busy_iter(fnic->lport->host,
>  			    fnic_pending_aborts_iter, &iter_data);
>  	if (iter_data.ret == FAILED) {
> @@ -2174,7 +2163,8 @@ static int fnic_clean_pending_aborts(struct fnic *fnic,
>   */
>  int fnic_device_reset(struct scsi_cmnd *sc)
>  {
> -	struct request *rq = scsi_cmd_to_rq(sc);
> +	struct scsi_device *sdev = sc->device;
> +	struct request *req;
>  	struct fc_lport *lp;
>  	struct fnic *fnic;
>  	struct fnic_io_req *io_req = NULL;
> @@ -2187,15 +2177,17 @@ int fnic_device_reset(struct scsi_cmnd *sc)
>  	struct scsi_lun fc_lun;
>  	struct fnic_stats *fnic_stats;
>  	struct reset_stats *reset_stats;
> -	int tag = rq->tag;
> +	int tag;
>  	DECLARE_COMPLETION_ONSTACK(tm_done);
> -	bool new_sc = 0;
>  
>  	/* Wait for rport to unblock */
> -	fc_block_scsi_eh(sc);
> +	rport = starget_to_rport(scsi_target(sdev));
> +	ret = fc_block_rport(rport);
> +	if (ret)
> +		return ret;
>  
>  	/* Get local-port, check ready and link up */
> -	lp = shost_priv(sc->device->host);
> +	lp = shost_priv(sdev->host);
>  
>  	fnic = lport_priv(lp);
>  	fnic_stats = &fnic->fnic_stats;
> @@ -2203,53 +2195,55 @@ int fnic_device_reset(struct scsi_cmnd *sc)
>  
>  	atomic64_inc(&reset_stats->device_resets);
>  
> -	rport = starget_to_rport(scsi_target(sc->device));
>  	FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
> -		      "Device reset called FCID 0x%x, LUN 0x%llx sc 0x%p\n",
> -		      rport->port_id, sc->device->lun, sc);
> +		      "Device reset called FCID 0x%x, LUN 0x%llx\n",
> +		      rport->port_id, sdev->lun);
>  
>  	if (lp->state != LPORT_ST_READY || !(lp->link_up))
> -		goto fnic_device_reset_end;
> +		return ret;
>  
>  	/* Check if remote port up */
>  	if (fc_remote_port_chkready(rport)) {
>  		atomic64_inc(&fnic_stats->misc_stats.rport_not_ready);
> -		goto fnic_device_reset_end;
> +		return ret;
>  	}
>  
> -	fnic_priv(sc)->flags = FNIC_DEVICE_RESET;
> -
> -	if (unlikely(tag < 0)) {
> +	req = scsi_alloc_request(sdev->request_queue, REQ_OP_DRV_IN,
> +				 BLK_MQ_REQ_NOWAIT);
> +	if (!req) {
>  		/*
> -		 * For device reset issued through sg3utils, we let
> -		 * only one LUN_RESET to go through and use a special
> -		 * tag equal to max_tag_id so that we don't have to allocate
> -		 * or free it. It won't interact with tags
> -		 * allocated by mid layer.
> +		 * Request allocation might fail, indicating that
> +		 * all tags are busy.
> +		 * But device reset will be called only from within
> +		 * SCSI EH, at which time all I/O is stopped. So the
> +		 * only active tags would be for failed I/O, but
> +		 * when all I/O is failed it'll be better to escalate
> +		 * to host reset anyway.
>  		 */
> -		mutex_lock(&fnic->sgreset_mutex);
> -		tag = fnic->fnic_max_tag_id;
> -		new_sc = 1;
> +		FNIC_SCSI_DBG(KERN_ERR, fnic->lport->host,
> +			      "Device reset allocation failed, all tags busy\n");
> +		return ret;
>  	}
> +	sc = blk_mq_rq_to_pdu(req);
> +
> +	tag = req->tag;
>  	io_lock = fnic_io_lock_hash(fnic, sc);
>  	spin_lock_irqsave(io_lock, flags);
>  	io_req = fnic_priv(sc)->io_req;
> +	if (io_req)
> +		goto fnic_device_reset_end;
>  
> -	/*
> -	 * If there is a io_req attached to this command, then use it,
> -	 * else allocate a new one.
> -	 */
> +	io_req = mempool_alloc(fnic->io_req_pool, GFP_ATOMIC);
>  	if (!io_req) {
> -		io_req = mempool_alloc(fnic->io_req_pool, GFP_ATOMIC);
> -		if (!io_req) {
> -			spin_unlock_irqrestore(io_lock, flags);
> -			goto fnic_device_reset_end;
> -		}
> -		memset(io_req, 0, sizeof(*io_req));
> -		io_req->port_id = rport->port_id;
> -		fnic_priv(sc)->io_req = io_req;
> +		spin_unlock_irqrestore(io_lock, flags);
> +		goto fnic_device_reset_end;
>  	}
> +	memset(io_req, 0, sizeof(*io_req));
> +	io_req->port_id = rport->port_id;
> +	fnic_priv(sc)->io_req = io_req;
> +
>  	io_req->dr_done = &tm_done;
> +	fnic_priv(sc)->flags = FNIC_DEVICE_RESET;
>  	fnic_priv(sc)->state = FNIC_IOREQ_CMD_PENDING;
>  	fnic_priv(sc)->lr_status = FCPIO_INVALID_CODE;
>  	spin_unlock_irqrestore(io_lock, flags);
> @@ -2260,11 +2254,13 @@ int fnic_device_reset(struct scsi_cmnd *sc)
>  	 * issue the device reset, if enqueue failed, clean up the ioreq
>  	 * and break assoc with scsi cmd
>  	 */
> +	WRITE_ONCE(req->state, MQ_RQ_IN_FLIGHT);
>  	if (fnic_queue_dr_io_req(fnic, sc, io_req)) {
>  		spin_lock_irqsave(io_lock, flags);
>  		io_req = fnic_priv(sc)->io_req;
>  		if (io_req)
>  			io_req->dr_done = NULL;
> +		WRITE_ONCE(req->state, MQ_RQ_IDLE);
>  		goto fnic_device_reset_clean;
>  	}
>  	spin_lock_irqsave(io_lock, flags);
> @@ -2279,11 +2275,12 @@ int fnic_device_reset(struct scsi_cmnd *sc)
>  				    msecs_to_jiffies(FNIC_LUN_RESET_TIMEOUT));
>  
>  	spin_lock_irqsave(io_lock, flags);
> +	WRITE_ONCE(req->state, MQ_RQ_IDLE);
>  	io_req = fnic_priv(sc)->io_req;
>  	if (!io_req) {
>  		spin_unlock_irqrestore(io_lock, flags);
>  		FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
> -				"io_req is null tag 0x%x sc 0x%p\n", tag, sc);
> +				"io_req is null tag 0x%x\n", tag);
>  		goto fnic_device_reset_end;
>  	}
>  	io_req->dr_done = NULL;
> @@ -2326,7 +2323,7 @@ int fnic_device_reset(struct scsi_cmnd *sc)
>  				spin_unlock_irqrestore(io_lock, flags);
>  				FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
>  				"Abort and terminate issued on Device reset "
> -				"tag 0x%x sc 0x%p\n", tag, sc);
> +				"tag 0x%x\n", tag);
>  				break;
>  			}
>  		}
> @@ -2364,7 +2361,7 @@ int fnic_device_reset(struct scsi_cmnd *sc)
>  	 * the lun reset cmd. If all cmds get cleaned, the lun reset
>  	 * succeeds
>  	 */
> -	if (fnic_clean_pending_aborts(fnic, sc, new_sc)) {
> +	if (fnic_clean_pending_aborts(fnic, sc)) {
>  		spin_lock_irqsave(io_lock, flags);
>  		io_req = fnic_priv(sc)->io_req;
>  		FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
> @@ -2393,15 +2390,15 @@ int fnic_device_reset(struct scsi_cmnd *sc)
>  	}
>  
>  fnic_device_reset_end:
> -	FNIC_TRACE(fnic_device_reset, sc->device->host->host_no, rq->tag, sc,
> +	FNIC_TRACE(fnic_device_reset, sc->device->host->host_no,
> +		  scsi_cmd_to_rq(sc)->tag, sc,
>  		  jiffies_to_msecs(jiffies - start_time),
>  		  0, ((u64)sc->cmnd[0] << 32 |
>  		  (u64)sc->cmnd[2] << 24 | (u64)sc->cmnd[3] << 16 |
>  		  (u64)sc->cmnd[4] << 8 | sc->cmnd[5]),
>  		  fnic_flags_and_state(sc));
>  
> -	if (new_sc)
> -		mutex_unlock(&fnic->sgreset_mutex);
> +	blk_mq_free_request(req);
>  
>  	FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
>  		      "Returning from device reset %s\n",
> @@ -2633,8 +2630,6 @@ static bool fnic_abts_pending_iter(struct scsi_cmnd *sc, void *data)
>  	 * ignore this lun reset cmd or cmds that do not belong to
>  	 * this lun
>  	 */
> -	if (iter_data->lr_sc && sc == iter_data->lr_sc)
> -		return true;
>  	if (iter_data->lun_dev && sc->device != iter_data->lun_dev)
>  		return true;
>  
> @@ -2677,10 +2672,8 @@ int fnic_is_abts_pending(struct fnic *fnic, struct scsi_cmnd *lr_sc)
>  		.ret = 0,
>  	};
>  
> -	if (lr_sc) {
> +	if (lr_sc)
>  		iter_data.lun_dev = lr_sc->device;
> -		iter_data.lr_sc = lr_sc;
> -	}
>  
>  	/* walk again to check, if IOs are still pending in fw */
>  	scsi_host_busy_iter(fnic->lport->host,
> diff --git a/drivers/scsi/snic/snic_scsi.c b/drivers/scsi/snic/snic_scsi.c
> index f123531e7dd1..c38f648da3d7 100644
> --- a/drivers/scsi/snic/snic_scsi.c
> +++ b/drivers/scsi/snic/snic_scsi.c
> @@ -2367,7 +2367,7 @@ snic_cmpl_pending_tmreq(struct snic *snic, struct scsi_cmnd *sc)
>  }
>  
>  static bool
> -snic_scsi_cleanup_iter(struct scsi_cmnd *sc, void *data, bool reserved)
> +snic_scsi_cleanup_iter(struct scsi_cmnd *sc, void *data)
>  {
>  	struct snic *snic = data;
>  	struct snic_req_info *rqi = NULL;
> @@ -2541,8 +2541,7 @@ struct snic_tgt_scsi_abort_io_data {
>  	int abt_cnt;
>  };
>  
> -static bool snic_tgt_scsi_abort_io_iter(struct scsi_cmnd *sc, void *data,
> -					bool reserved)
> +static bool snic_tgt_scsi_abort_io_iter(struct scsi_cmnd *sc, void *data)
>  {
>  	struct snic_tgt_scsi_abort_io_data *iter_data = data;
>  	struct snic *snic = iter_data->snic;
> -- 
> 2.35.3
---end quoted text---



[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Index of Archives]     [SCSI Target Devel]     [Linux SCSI Target Infrastructure]     [Kernel Newbies]     [IDE]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux ATA RAID]     [Linux IIO]     [Samba]     [Device Mapper]

  Powered by Linux