This issue is seen on Marvell Controller with device ids 0x9215 and 0x9235. Its reproduced within a minute with block size of 64K, 100 threads, 100 jobs and 512 iodepth on AMD platform. With decreased work load it takes 8-9 hrs. Sequence leading to an issue as per PCIe trace - PxSact is read with slots 7 and 24 being cleared. - Host starts processing these commands while data is not in system memory yet. - Last pkt of 512B was sent to host. - SDB.FIS is copied, telling host command slot 24 is done. Reading SDB.FIS confirms the transfer is complete. Cc: Soochon Radee <soochon@xxxxxxxxxx> Tested-by: Manoj Phadtare <mphadtare@xxxxxxxxxxx> Signed-off-by: Saurav Kashyap <skashyap@xxxxxxxxxxx> --- v1->v2: Added workload and platform related details in the description. drivers/ata/libata-sata.c | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/drivers/ata/libata-sata.c b/drivers/ata/libata-sata.c index 0fb1934875f2..7cdeb0a38c5b 100644 --- a/drivers/ata/libata-sata.c +++ b/drivers/ata/libata-sata.c @@ -14,9 +14,11 @@ #include <scsi/scsi_eh.h> #include <linux/libata.h> #include <asm/unaligned.h> +#include <linux/pci.h> #include "libata.h" #include "libata-transport.h" +#include "ahci.h" /* debounce timing parameters in msecs { interval, duration, timeout } */ const unsigned int sata_deb_timing_normal[] = { 5, 100, 2000 }; @@ -649,6 +651,7 @@ EXPORT_SYMBOL_GPL(sata_link_hardreset); int ata_qc_complete_multiple(struct ata_port *ap, u64 qc_active) { u64 done_mask, ap_qc_active = ap->qc_active; + struct pci_dev *pdev = to_pci_dev(ap->host->dev); int nr_done = 0; /* @@ -677,7 +680,30 @@ int ata_qc_complete_multiple(struct ata_port *ap, u64 qc_active) unsigned int tag = __ffs64(done_mask); qc = ata_qc_from_tag(ap, tag); - if (qc) { + if (pdev->vendor == PCI_VENDOR_ID_MARVELL_EXT && + (pdev->device == 0x9215 || pdev->device == 0x9235)) { + struct ahci_port_priv *pp = ap->private_data; + u8 *rx_fis = pp->rx_fis; + + if (pp->fbs_enabled) + rx_fis += ap->link.pmp * AHCI_RX_FIS_SZ; + + if (!qc) + continue; + + if (ata_is_ncq(qc->tf.protocol)) { + u32 *fis = (u32 *)(rx_fis + RX_FIS_SDB); + u32 fis_active = fis[1]; + + if ((fis_active & (1 << tag))) { + ata_qc_complete(qc); + nr_done++; + } + } else { + ata_qc_complete(qc); + nr_done++; + } + } else if (qc) { ata_qc_complete(qc); nr_done++; } -- 2.23.1