On Fri, Jan 29, 2016 at 9:23 PM, Tejun Heo <tj@xxxxxxxxxx> wrote: > Hello, > > On Fri, Jan 29, 2016 at 02:40:33PM +0100, Dmitry Vyukov wrote: >> It now deadlocks at this stack. It is probably not OK to call >> ata_sff_hsm_move holding the spinlock. > > Yeah, the locking is pretty messed up with the polling path. Can you > please try the following? No crashes over the weekend. We can consider this fixed. Thanks! > diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c > index cdf6215..7dbba38 100644 > --- a/drivers/ata/libata-sff.c > +++ b/drivers/ata/libata-sff.c > @@ -997,12 +997,9 @@ static inline int ata_hsm_ok_in_wq(struct ata_port *ap, > static void ata_hsm_qc_complete(struct ata_queued_cmd *qc, int in_wq) > { > struct ata_port *ap = qc->ap; > - unsigned long flags; > > if (ap->ops->error_handler) { > if (in_wq) { > - spin_lock_irqsave(ap->lock, flags); > - > /* EH might have kicked in while host lock is > * released. > */ > @@ -1014,8 +1011,6 @@ static void ata_hsm_qc_complete(struct ata_queued_cmd *qc, int in_wq) > } else > ata_port_freeze(ap); > } > - > - spin_unlock_irqrestore(ap->lock, flags); > } else { > if (likely(!(qc->err_mask & AC_ERR_HSM))) > ata_qc_complete(qc); > @@ -1024,10 +1019,8 @@ static void ata_hsm_qc_complete(struct ata_queued_cmd *qc, int in_wq) > } > } else { > if (in_wq) { > - spin_lock_irqsave(ap->lock, flags); > ata_sff_irq_on(ap); > ata_qc_complete(qc); > - spin_unlock_irqrestore(ap->lock, flags); > } else > ata_qc_complete(qc); > } > @@ -1048,9 +1041,10 @@ int ata_sff_hsm_move(struct ata_port *ap, struct ata_queued_cmd *qc, > { > struct ata_link *link = qc->dev->link; > struct ata_eh_info *ehi = &link->eh_info; > - unsigned long flags = 0; > int poll_next; > > + lockdep_assert_held(ap->lock); > + > WARN_ON_ONCE((qc->flags & ATA_QCFLAG_ACTIVE) == 0); > > /* Make sure ata_sff_qc_issue() does not throw things > @@ -1112,14 +1106,6 @@ int ata_sff_hsm_move(struct ata_port *ap, struct ata_queued_cmd *qc, > } > } > > - /* Send the CDB (atapi) or the first data block (ata pio out). > - * During the state transition, interrupt handler shouldn't > - * be invoked before the data transfer is complete and > - * hsm_task_state is changed. Hence, the following locking. > - */ > - if (in_wq) > - spin_lock_irqsave(ap->lock, flags); > - > if (qc->tf.protocol == ATA_PROT_PIO) { > /* PIO data out protocol. > * send first data block. > @@ -1135,9 +1121,6 @@ int ata_sff_hsm_move(struct ata_port *ap, struct ata_queued_cmd *qc, > /* send CDB */ > atapi_send_cdb(ap, qc); > > - if (in_wq) > - spin_unlock_irqrestore(ap->lock, flags); > - > /* if polling, ata_sff_pio_task() handles the rest. > * otherwise, interrupt handler takes over from here. > */ > @@ -1361,12 +1344,14 @@ static void ata_sff_pio_task(struct work_struct *work) > u8 status; > int poll_next; > > + spin_lock_irq(ap->lock); > + > BUG_ON(ap->sff_pio_task_link == NULL); > /* qc can be NULL if timeout occurred */ > qc = ata_qc_from_tag(ap, link->active_tag); > if (!qc) { > ap->sff_pio_task_link = NULL; > - return; > + goto out_unlock; > } > > fsm_start: > @@ -1381,11 +1366,14 @@ static void ata_sff_pio_task(struct work_struct *work) > */ > status = ata_sff_busy_wait(ap, ATA_BUSY, 5); > if (status & ATA_BUSY) { > + spin_unlock_irq(ap->lock); > ata_msleep(ap, 2); > + spin_lock_irq(ap->lock); > + > status = ata_sff_busy_wait(ap, ATA_BUSY, 10); > if (status & ATA_BUSY) { > ata_sff_queue_pio_task(link, ATA_SHORT_PAUSE); > - return; > + goto out_unlock; > } > } > > @@ -1402,6 +1390,8 @@ static void ata_sff_pio_task(struct work_struct *work) > */ > if (poll_next) > goto fsm_start; > +out_unlock: > + spin_unlock_irq(ap->lock); > } > > /** -- To unsubscribe from this list: send the line "unsubscribe linux-ide" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html