> -----Original Message----- > From: Stephen Hemminger [mailto:stephen@xxxxxxxxxxxxxxxxxx] > Sent: Friday, March 3, 2017 4:50 PM > To: James Bottomley <James.Bottomley@xxxxxxxxxxxxxxxxxxxxx> > Cc: Hannes Reinecke <hare@xxxxxxx>; Christoph Hellwig <hch@xxxxxx>; > James Bottomley <jejb@xxxxxxxxxxxxxxxxxx>; Jens Axboe > <axboe@xxxxxxxxx>; Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>; > Martin K. Petersen <martin.petersen@xxxxxxxxxx>; KY Srinivasan > <kys@xxxxxxxxxxxxx>; Dexuan Cui <decui@xxxxxxxxxxxxx>; Long Li > <longli@xxxxxxxxxxxxx>; Josh Poulson <jopoulso@xxxxxxxxxxxxx>; Adrian > Suhov (Cloudbase Solutions SRL) <v-adsuho@xxxxxxxxxxxxx>; linux- > scsi@xxxxxxxxxxxxxxx; Haiyang Zhang <haiyangz@xxxxxxxxxxxxx> > Subject: [RFC] hv_storvsc: error handling. > > Needs more testing but this does fix the observed problem. > > From: Stephen Hemminger <sthemmin@xxxxxxxxxxxxx> > > Subject: [PATCH] hv_storvsc: fix error handling > > The Hyper-V storvsc SCSI driver was hiding all errors in INQUIRY and > MODE_SENSE commands. This caused the scan process to incorrectly think > devices were present and online. Also invalid LUN errors were not > being handled correctly. > > This fixes problems booting a GEN2 VM on Hyper-V. It effectively > reverts commit 4ed51a21c0f69 ("Staging: hv: storvsc: Fixup > srb and scsi status for INQUIRY and MODE_SENSE") > > Signed-off-by: Stephen Hemminger <sthemmin@xxxxxxxxxxxxx> > --- > drivers/scsi/storvsc_drv.c | 48 ++++------------------------------------------ > 1 file changed, 4 insertions(+), 44 deletions(-) > > diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c > index 638e5f427c90..8cc241fc54b8 100644 > --- a/drivers/scsi/storvsc_drv.c > +++ b/drivers/scsi/storvsc_drv.c > @@ -543,28 +543,6 @@ static void storvsc_host_scan(struct work_struct > *work) > kfree(wrk); > } > > -static void storvsc_remove_lun(struct work_struct *work) > -{ > - struct storvsc_scan_work *wrk; > - struct scsi_device *sdev; > - > - wrk = container_of(work, struct storvsc_scan_work, work); > - if (!scsi_host_get(wrk->host)) > - goto done; > - > - sdev = scsi_device_lookup(wrk->host, 0, wrk->tgt_id, wrk->lun); > - > - if (sdev) { > - scsi_remove_device(sdev); > - scsi_device_put(sdev); > - } > - scsi_host_put(wrk->host); > - > -done: > - kfree(wrk); > -} > - > - > /* > * We can get incoming messages from the host that are not in response to > * messages that we have sent out. An example of this would be messages > @@ -955,8 +933,7 @@ static void storvsc_handle_error(struct > vmscsi_request *vm_srb, > } > break; > case SRB_STATUS_INVALID_LUN: > - do_work = true; > - process_err_fn = storvsc_remove_lun; > + set_host_byte(scmnd, DID_NO_CONNECT); > break; > case SRB_STATUS_ABORTED: > if (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID > && > @@ -1050,32 +1027,15 @@ static void storvsc_on_io_completion(struct > storvsc_device *stor_device, > > stor_pkt = &request->vstor_packet; > > - /* > - * The current SCSI handling on the host side does > - * not correctly handle: > - * INQUIRY command with page code parameter set to 0x80 > - * MODE_SENSE command with cmd[2] == 0x1c > - * > - * Setup srb and scsi status so this won't be fatal. > - * We do this so we can distinguish truly fatal failues > - * (srb status == 0x4) and off-line the device in that case. > - */ > - > - if ((stor_pkt->vm_srb.cdb[0] == INQUIRY) || > - (stor_pkt->vm_srb.cdb[0] == MODE_SENSE)) { > - vstor_packet->vm_srb.scsi_status = 0; > - vstor_packet->vm_srb.srb_status = SRB_STATUS_SUCCESS; > - } > - > - > /* Copy over the status...etc */ > stor_pkt->vm_srb.scsi_status = vstor_packet->vm_srb.scsi_status; > stor_pkt->vm_srb.srb_status = vstor_packet->vm_srb.srb_status; > stor_pkt->vm_srb.sense_info_length = > vstor_packet->vm_srb.sense_info_length; > > - if (vstor_packet->vm_srb.scsi_status != 0 || > - vstor_packet->vm_srb.srb_status != SRB_STATUS_SUCCESS) > + if (stor_pkt->vm_srb.cdb[0] != INQUIRY && > + (vstor_packet->vm_srb.scsi_status != 0 || > + vstor_packet->vm_srb.srb_status != SRB_STATUS_SUCCESS)) > storvsc_log(device, STORVSC_LOGGING_WARN, > "cmd 0x%x scsi status 0x%x srb status 0x%x\n", > stor_pkt->vm_srb.cdb[0], > -- This patch gets rid of the ability to "hot remove" LUNs. I don't think that can be part of any solution. The INQUIRY hack I put in a long time ago was to deal with host bugs on prior versions of Windows server. WS2016 should not be trigerring this code. Stephen, could you please test this patch - a quick hack: >From b97f24f224a71a6e745c42e5640045a553eb407c Mon Sep 17 00:00:00 2001 From: K. Y. Srinivasan <kys@xxxxxxxxxxxxx> Date: Sat, 4 Mar 2017 14:00:46 -0700 Subject: [PATCH 1/1] scsi: storvsc: Fix a bug in LUN removal code Reply-To: kys@xxxxxxxxxxxxx Signed-off-by: K. Y. Srinivasan <kys@xxxxxxxxxxxxx> --- drivers/scsi/storvsc_drv.c | 13 +++++++++++++ 1 files changed, 13 insertions(+), 0 deletions(-) diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index 05526b7..27eb682 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -885,6 +885,7 @@ static void storvsc_handle_error(struct vmscsi_request *vm_srb, struct storvsc_scan_work *wrk; void (*process_err_fn)(struct work_struct *work); bool do_work = false; + struct scsi_device *sdev; switch (SRB_STATUS(vm_srb->srb_status)) { case SRB_STATUS_ERROR: @@ -911,6 +912,18 @@ static void storvsc_handle_error(struct vmscsi_request *vm_srb, } break; case SRB_STATUS_INVALID_LUN: + if (!scsi_host_get(host)) { + set_host_byte(scmnd, DID_NO_CONNECT); + break; + } + + sdev = scsi_device_lookup(wrk->host, 0, wrk->tgt_id, wrk->lun); + + if (!sdev) { + set_host_byte(scmnd, DID_NO_CONNECT); + break; + } + do_work = true; process_err_fn = storvsc_remove_lun; break; -- 1.7.1 > 2.11.0 > >