The patch titled IB/ipath: fix lost interrupts on HT-400 has been added to the -mm tree. Its filename is ib-ipath-fix-lost-interrupts-on-ht-400.patch See http://www.zip.com.au/~akpm/linux/patches/stuff/added-to-mm.txt to find out what to do about this ------------------------------------------------------ Subject: IB/ipath: fix lost interrupts on HT-400 From: "Bryan O'Sullivan" <bos@xxxxxxxxxxxxx> Do an extra check to see if in-memory tail changed while processing packets, and if so, going back through the loop again (but only once per call to ipath_kreceive()). In practice, this seems to be enough to guarantee that if we crossed the clearing of an interrupt at start of ipath_intr with a scheduled tail register update, that we'll process the "extra" packet that lost the interrupt because we cleared it just as it was about to arrive. Signed-off-by: Dave Olson <dave.olson@xxxxxxxxxx> Signed-off-by: Bryan O'Sullivan <bryan.osullivan@xxxxxxxxxx> Cc: "Michael S. Tsirkin" <mst@xxxxxxxxxxxxxx> Cc: Roland Dreier <rolandd@xxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxx> --- drivers/infiniband/hw/ipath/ipath_driver.c | 25 ++++++++++++++-- drivers/infiniband/hw/ipath/ipath_intr.c | 28 +++++++++---------- 2 files changed, 36 insertions(+), 17 deletions(-) diff -puN drivers/infiniband/hw/ipath/ipath_driver.c~ib-ipath-fix-lost-interrupts-on-ht-400 drivers/infiniband/hw/ipath/ipath_driver.c --- a/drivers/infiniband/hw/ipath/ipath_driver.c~ib-ipath-fix-lost-interrupts-on-ht-400 +++ a/drivers/infiniband/hw/ipath/ipath_driver.c @@ -870,7 +870,7 @@ void ipath_kreceive(struct ipath_devdata const u32 maxcnt = dd->ipath_rcvhdrcnt * rsize; /* words */ u32 etail = -1, l, hdrqtail; struct ips_message_header *hdr; - u32 eflags, i, etype, tlen, pkttot = 0, updegr=0; + u32 eflags, i, etype, tlen, pkttot = 0, updegr=0, reloop=0; static u64 totcalls; /* stats, may eventually remove */ char emsg[128]; @@ -885,9 +885,11 @@ void ipath_kreceive(struct ipath_devdata goto bail; l = dd->ipath_port0head; - if (l == (u32)le64_to_cpu(*dd->ipath_hdrqtailptr)) + hdrqtail = (u32) le64_to_cpu(*dd->ipath_hdrqtailptr); + if (l == hdrqtail) goto done; +reloop: /* read only once at start for performance */ hdrqtail = (u32)le64_to_cpu(*dd->ipath_hdrqtailptr); @@ -1011,7 +1013,7 @@ void ipath_kreceive(struct ipath_devdata */ if (l == hdrqtail || (i && !(i&0xf))) { u64 lval; - if (l == hdrqtail) /* want interrupt only on last */ + if (l == hdrqtail) /* PE-800 interrupt only on last */ lval = dd->ipath_rhdrhead_intr_off | l; else lval = l; @@ -1024,6 +1026,23 @@ void ipath_kreceive(struct ipath_devdata } } + if (!dd->ipath_rhdrhead_intr_off && !reloop) { + /* HT-400 workaround; we can have a race clearing chip + * interrupt with another interrupt about to be delivered, + * and can clear it before it is delivered on the GPIO + * workaround. By doing the extra check here for the + * in-memory tail register updating while we were doing + * earlier packets, we "almost" guarantee we have covered + * that case. + */ + u32 hqtail = (u32)le64_to_cpu(*dd->ipath_hdrqtailptr); + if (hqtail != hdrqtail) { + hdrqtail = hqtail; + reloop = 1; /* loop 1 extra time at most */ + goto reloop; + } + } + pkttot += i; dd->ipath_port0head = l; diff -puN drivers/infiniband/hw/ipath/ipath_intr.c~ib-ipath-fix-lost-interrupts-on-ht-400 drivers/infiniband/hw/ipath/ipath_intr.c --- a/drivers/infiniband/hw/ipath/ipath_intr.c~ib-ipath-fix-lost-interrupts-on-ht-400 +++ a/drivers/infiniband/hw/ipath/ipath_intr.c @@ -766,7 +766,7 @@ irqreturn_t ipath_intr(int irq, void *da u32 istat, chk0rcv = 0; ipath_err_t estat = 0; irqreturn_t ret; - u32 p0bits, oldhead; + u32 oldhead, curtail; static unsigned unexpected = 0; static const u32 port0rbits = (1U<<INFINIPATH_I_RCVAVAIL_SHIFT) | (1U<<INFINIPATH_I_RCVURG_SHIFT); @@ -809,15 +809,16 @@ irqreturn_t ipath_intr(int irq, void *da * lose intr for later packets that arrive while we are processing. */ oldhead = dd->ipath_port0head; - if (oldhead != (u32) le64_to_cpu(*dd->ipath_hdrqtailptr)) { + curtail = (u32)le64_to_cpu(*dd->ipath_hdrqtailptr); + if (oldhead != curtail) { if (dd->ipath_flags & IPATH_GPIO_INTR) { ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_clear, (u64) (1 << 2)); - p0bits = port0rbits | INFINIPATH_I_GPIO; + istat = port0rbits | INFINIPATH_I_GPIO; } else - p0bits = port0rbits; - ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, p0bits); + istat = port0rbits; + ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, istat); ipath_kreceive(dd); if (oldhead != dd->ipath_port0head) { ipath_stats.sps_fastrcvint++; @@ -827,7 +828,6 @@ irqreturn_t ipath_intr(int irq, void *da } istat = ipath_read_kreg32(dd, dd->ipath_kregs->kr_intstatus); - p0bits = port0rbits; if (unlikely(!istat)) { ipath_stats.sps_nullintr++; @@ -890,19 +890,19 @@ irqreturn_t ipath_intr(int irq, void *da else { /* Clear GPIO status bit 2 */ ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_clear, - (u64) (1 << 2)); - p0bits |= INFINIPATH_I_GPIO; + (u64) (1 << 2)); chk0rcv = 1; } } - chk0rcv |= istat & p0bits; + chk0rcv |= istat & port0rbits; /* - * clear the ones we will deal with on this round - * We clear it early, mostly for receive interrupts, so we - * know the chip will have seen this by the time we process - * the queue, and will re-interrupt if necessary. The processor - * itself won't take the interrupt again until we return. + * Clear the interrupt bits we found set, unless they are receive + * related, in which case we already cleared them above, and don't + * want to clear them again, because we might lose an interrupt. + * Clear it early, so we "know" know the chip will have seen this by + * the time we process the queue, and will re-interrupt if necessary. + * The processor itself won't take the interrupt again until we return. */ ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, istat); _ Patches currently in -mm which might be from bos@xxxxxxxxxxxxx are ib-ipath-name-zero-counter-offsets-so-its-clear.patch ib-ipath-update-copyrights-and-other-strings-to.patch ib-ipath-share-more-common-code-between-rc-and-uc.patch ib-ipath-fix-an-indenting-problem.patch ib-ipath-fix-shared-receive-queues-for-rc.patch ib-ipath-allow-diags-on-any-unit.patch ib-ipath-update-some-comments-and-fix-typos.patch ib-ipath-remove-some-duplicate-code.patch ib-ipath-dont-allow-resources-to-be-created-with.patch ib-ipath-fix-some-memory-leaks-on-failure-paths.patch ib-ipath-return-an-error-for-unknown-multicast-gid.patch ib-ipath-report-correct-device-identification.patch ib-ipath-enforce-device-resource-limits.patch ib-ipath-removed-unused-field-ipath_kregvirt-from.patch ib-ipath-print-better-debug-info-when-handling.patch ib-ipath-enable-freeze-mode-when-shutting-down.patch ib-ipath-use-more-appropriate-gfp-flags.patch ib-ipath-use-vmalloc-to-allocate-struct.patch ib-ipath-memory-management-cleanups.patch ib-ipath-reduce-overhead-on-receive-interrupts.patch ib-ipath-fixed-bug-9776.patch ib-ipath-fix-lost-interrupts-on-ht-400.patch ib-ipath-disallow-send-of-invalid-packet-sizes.patch ib-ipath-dont-confuse-the-max-message-size-with.patch ib-ipath-removed-redundant-statements.patch ib-ipath-check-for-valid-lid-and-multicast-lids.patch ib-ipath-fixes-to-performance-get-counters-for-ib.patch ib-ipath-fixes-a-bug-where-our-delay-for-eeprom-no.patch ib-ipath-rc-receive-interrupt-performance-changes.patch ib-ipath-purge-sps_lid-and-sps_mlid-arrays.patch ib-ipath-drop-the-stats-sysfs-attribute-group.patch ib-ipath-support-more-models-of-infinipath-hardware.patch ib-ipath-read-write-correct-sizes-through-diag.patch ib-ipath-fix-a-bug-that-results-in-addresses-near.patch ib-ipath-remove-some-if-0-code-related-to.patch ib-ipath-ignore-receive-queue-size-if-srq-is.patch ib-ipath-namespace-cleanup-replace-ips-with-ipath.patch - To unsubscribe from this list: send the line "unsubscribe mm-commits" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html