The patch titled IB/ipath: reduce overhead on receive interrupts has been added to the -mm tree. Its filename is ib-ipath-reduce-overhead-on-receive-interrupts.patch See http://www.zip.com.au/~akpm/linux/patches/stuff/added-to-mm.txt to find out what to do about this ------------------------------------------------------ Subject: IB/ipath: reduce overhead on receive interrupts From: "Bryan O'Sullivan" <bos@xxxxxxxxxxxxx> Also count the number of interrupts where that works (fastrcvint). On any interrupt where the port0 head and tail registers are not equal, just call the ipath_kreceive code without reading the interrupt status, thus saving the approximately 0.25usec processor stall waiting for the read to return. If any other interrupt bits are set, or head==tail, take the normal path, but that has been reordered to handle read ahead of pioavail. Also no longer call ipath_kreceive() from ipath_qcheck(), because that just seems to make things worse, and isn't really buying us anything, these days. Also no longer loop in ipath_kreceive(); better to not hold things off too long (I saw many cases where we would loop 4-8 times, and handle thousands (up to 3500) in a single call). Signed-off-by: Dave Olson <dave.olson@xxxxxxxxxx> Signed-off-by: Bryan O'Sullivan <bryan.osullivan@xxxxxxxxxx> Cc: "Michael S. Tsirkin" <mst@xxxxxxxxxxxxxx> Cc: Roland Dreier <rolandd@xxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxx> --- drivers/infiniband/hw/ipath/ipath_common.h | 4 drivers/infiniband/hw/ipath/ipath_driver.c | 11 - drivers/infiniband/hw/ipath/ipath_intr.c | 162 +++++++++++-------- drivers/infiniband/hw/ipath/ipath_stats.c | 1 4 files changed, 105 insertions(+), 73 deletions(-) diff -puN drivers/infiniband/hw/ipath/ipath_common.h~ib-ipath-reduce-overhead-on-receive-interrupts drivers/infiniband/hw/ipath/ipath_common.h --- a/drivers/infiniband/hw/ipath/ipath_common.h~ib-ipath-reduce-overhead-on-receive-interrupts +++ a/drivers/infiniband/hw/ipath/ipath_common.h @@ -97,8 +97,8 @@ struct infinipath_stats { __u64 sps_hwerrs; /* number of times IB link changed state unexpectedly */ __u64 sps_iblink; - /* no longer used; left for compatibility */ - __u64 sps_unused3; + /* kernel receive interrupts that didn't read intstat */ + __u64 sps_fastrcvint; /* number of kernel (port0) packets received */ __u64 sps_port0pkts; /* number of "ethernet" packets sent by driver */ diff -puN drivers/infiniband/hw/ipath/ipath_driver.c~ib-ipath-reduce-overhead-on-receive-interrupts drivers/infiniband/hw/ipath/ipath_driver.c --- a/drivers/infiniband/hw/ipath/ipath_driver.c~ib-ipath-reduce-overhead-on-receive-interrupts +++ a/drivers/infiniband/hw/ipath/ipath_driver.c @@ -888,12 +888,7 @@ void ipath_kreceive(struct ipath_devdata (u32)le64_to_cpu(*dd->ipath_hdrqtailptr)) goto done; -gotmore: - /* - * read only once at start. If in flood situation, this helps - * performance slightly. If more arrive while we are processing, - * we'll come back here and do them - */ + /* read only once at start for performance */ hdrqtail = (u32)le64_to_cpu(*dd->ipath_hdrqtailptr); for (i = 0, l = dd->ipath_port0head; l != hdrqtail; i++) { @@ -1022,10 +1017,6 @@ gotmore: dd->ipath_port0head = l; - if (hdrqtail != (u32)le64_to_cpu(*dd->ipath_hdrqtailptr)) - /* more arrived while we handled first batch */ - goto gotmore; - if (pkttot > ipath_stats.sps_maxpkts_call) ipath_stats.sps_maxpkts_call = pkttot; ipath_stats.sps_port0pkts += pkttot; diff -puN drivers/infiniband/hw/ipath/ipath_intr.c~ib-ipath-reduce-overhead-on-receive-interrupts drivers/infiniband/hw/ipath/ipath_intr.c --- a/drivers/infiniband/hw/ipath/ipath_intr.c~ib-ipath-reduce-overhead-on-receive-interrupts +++ a/drivers/infiniband/hw/ipath/ipath_intr.c @@ -539,10 +539,10 @@ static void handle_errors(struct ipath_d continue; if (hd == (tl + 1) || (!hd && tl == dd->ipath_hdrqlast)) { - dd->ipath_lastrcvhdrqtails[i] = tl; - pd->port_hdrqfull++; if (i == 0) chkerrpkts = 1; + dd->ipath_lastrcvhdrqtails[i] = tl; + pd->port_hdrqfull++; } } } @@ -724,7 +724,12 @@ set: dd->ipath_sendctrl); } -static void handle_rcv(struct ipath_devdata *dd, u32 istat) +/* + * Handle receive interrupts for user ports; this means a user + * process was waiting for a packet to arrive, and didn't want + * to poll + */ +static void handle_urcv(struct ipath_devdata *dd, u32 istat) { u64 portr; int i; @@ -734,22 +739,17 @@ static void handle_rcv(struct ipath_devd infinipath_i_rcvavail_mask) | ((istat >> INFINIPATH_I_RCVURG_SHIFT) & infinipath_i_rcvurg_mask); - for (i = 0; i < dd->ipath_cfgports; i++) { + for (i = 1; i < dd->ipath_cfgports; i++) { struct ipath_portdata *pd = dd->ipath_pd[i]; - if (portr & (1 << i) && pd && - pd->port_cnt) { - if (i == 0) - ipath_kreceive(dd); - else if (test_bit(IPATH_PORT_WAITING_RCV, - &pd->port_flag)) { - int rcbit; - clear_bit(IPATH_PORT_WAITING_RCV, - &pd->port_flag); - rcbit = i + INFINIPATH_R_INTRAVAIL_SHIFT; - clear_bit(1UL << rcbit, &dd->ipath_rcvctrl); - wake_up_interruptible(&pd->port_wait); - rcvdint = 1; - } + if (portr & (1 << i) && pd && pd->port_cnt && + test_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag)) { + int rcbit; + clear_bit(IPATH_PORT_WAITING_RCV, + &pd->port_flag); + rcbit = i + INFINIPATH_R_INTRAVAIL_SHIFT; + clear_bit(1UL << rcbit, &dd->ipath_rcvctrl); + wake_up_interruptible(&pd->port_wait); + rcvdint = 1; } } if (rcvdint) { @@ -767,14 +767,17 @@ irqreturn_t ipath_intr(int irq, void *da struct ipath_devdata *dd = data; u32 istat; ipath_err_t estat = 0; - static unsigned unexpected = 0; irqreturn_t ret; + u32 p0bits; + static unsigned unexpected = 0; + static const u32 port0rbits = (1U<<INFINIPATH_I_RCVAVAIL_SHIFT) | + (1U<<INFINIPATH_I_RCVURG_SHIFT); - if(!(dd->ipath_flags & IPATH_PRESENT)) { - /* this is mostly so we don't try to touch the chip while - * it is being reset */ + ipath_stats.sps_ints++; + + if (!(dd->ipath_flags & IPATH_PRESENT)) { /* - * This return value is perhaps odd, but we do not want the + * This return value is not great, but we do not want the * interrupt core code to remove our interrupt handler * because we don't appear to be handling an interrupt * during a chip reset. @@ -782,6 +785,50 @@ irqreturn_t ipath_intr(int irq, void *da return IRQ_HANDLED; } + /* + * this needs to be flags&initted, not statusp, so we keep + * taking interrupts even after link goes down, etc. + * Also, we *must* clear the interrupt at some point, or we won't + * take it again, which can be real bad for errors, etc... + */ + + if (!(dd->ipath_flags & IPATH_INITTED)) { + ipath_bad_intr(dd, &unexpected); + ret = IRQ_NONE; + goto bail; + } + + /* + * We try to avoid reading the interrupt status register, since + * that's a PIO read, and stalls the processor for up to about + * ~0.25 usec. The idea is that if we processed a port0 packet, + * we blindly clear the port 0 receive interrupt bits, and nothing + * else, then return. If other interrupts are pending, the chip + * will re-interrupt us as soon as we write the intclear register. + * We then won't process any more kernel packets (if not the 2nd + * time, then the 3rd or 4th) and we'll then handle the other + * interrupts. We clear the interrupts first so that we don't + * lose intr for later packets that arrive while we are processing. + */ + if (dd->ipath_port0head != + (u32)le64_to_cpu(*dd->ipath_hdrqtailptr)) { + u32 oldhead = dd->ipath_port0head; + if (dd->ipath_flags & IPATH_GPIO_INTR) { + ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_clear, + (u64) (1 << 2)); + p0bits = port0rbits | INFINIPATH_I_GPIO; + } + else + p0bits = port0rbits; + ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, p0bits); + ipath_kreceive(dd); + if (oldhead != dd->ipath_port0head) { + ipath_stats.sps_fastrcvint++; + goto done; + } + istat = ipath_read_kreg32(dd, dd->ipath_kregs->kr_intstatus); + } + istat = ipath_read_kreg32(dd, dd->ipath_kregs->kr_intstatus); if (unlikely(!istat)) { ipath_stats.sps_nullintr++; @@ -795,31 +842,17 @@ irqreturn_t ipath_intr(int irq, void *da goto bail; } - ipath_stats.sps_ints++; - - /* - * this needs to be flags&initted, not statusp, so we keep - * taking interrupts even after link goes down, etc. - * Also, we *must* clear the interrupt at some point, or we won't - * take it again, which can be real bad for errors, etc... - */ - - if (!(dd->ipath_flags & IPATH_INITTED)) { - ipath_bad_intr(dd, &unexpected); - ret = IRQ_NONE; - goto bail; - } if (unexpected) unexpected = 0; - ipath_cdbg(VERBOSE, "intr stat=0x%x\n", istat); - - if (istat & ~infinipath_i_bitsextant) + if (unlikely(istat & ~infinipath_i_bitsextant)) ipath_dev_err(dd, "interrupt with unknown interrupts %x set\n", istat & (u32) ~ infinipath_i_bitsextant); + else + ipath_cdbg(VERBOSE, "intr stat=0x%x\n", istat); - if (istat & INFINIPATH_I_ERROR) { + if (unlikely(istat & INFINIPATH_I_ERROR)) { ipath_stats.sps_errints++; estat = ipath_read_kreg64(dd, dd->ipath_kregs->kr_errorstatus); @@ -837,7 +870,14 @@ irqreturn_t ipath_intr(int irq, void *da handle_errors(dd, estat); } + p0bits = port0rbits; if (istat & INFINIPATH_I_GPIO) { + /* + * Packets are available in the port 0 rcv queue. + * Eventually this needs to be generalized to check + * IPATH_GPIO_INTR, and the specific GPIO bit, if + * GPIO interrupts are used for anything else. + */ if (unlikely(!(dd->ipath_flags & IPATH_GPIO_INTR))) { u32 gpiostatus; gpiostatus = ipath_read_kreg32( @@ -851,14 +891,7 @@ irqreturn_t ipath_intr(int irq, void *da /* Clear GPIO status bit 2 */ ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_clear, (u64) (1 << 2)); - - /* - * Packets are available in the port 0 rcv queue. - * Eventually this needs to be generalized to check - * IPATH_GPIO_INTR, and the specific GPIO bit, if - * GPIO interrupts are used for anything else. - */ - ipath_kreceive(dd); + p0bits |= INFINIPATH_I_GPIO; } } @@ -871,6 +904,25 @@ irqreturn_t ipath_intr(int irq, void *da */ ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, istat); + /* + * we check for both transition from empty to non-empty, and urgent + * packets (those with the interrupt bit set in the header), and + * if enabled, the GPIO bit 2 interrupt used for port0 on some + * HT-400 boards. + * Do this before checking for pio buffers available, since + * receives can overflow; piobuf waiters can afford a few + * extra cycles, since they were waiting anyway. + */ + if (istat & p0bits) { + ipath_kreceive(dd); + istat &= ~port0rbits; + } + if (istat & ((infinipath_i_rcvavail_mask << + INFINIPATH_I_RCVAVAIL_SHIFT) + | (infinipath_i_rcvurg_mask << + INFINIPATH_I_RCVURG_SHIFT))) + handle_urcv(dd, istat); + if (istat & INFINIPATH_I_SPIOBUFAVAIL) { clear_bit(IPATH_S_PIOINTBUFAVAIL, &dd->ipath_sendctrl); ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, @@ -882,17 +934,7 @@ irqreturn_t ipath_intr(int irq, void *da handle_layer_pioavail(dd); } - /* - * we check for both transition from empty to non-empty, and urgent - * packets (those with the interrupt bit set in the header) - */ - - if (istat & ((infinipath_i_rcvavail_mask << - INFINIPATH_I_RCVAVAIL_SHIFT) - | (infinipath_i_rcvurg_mask << - INFINIPATH_I_RCVURG_SHIFT))) - handle_rcv(dd, istat); - +done: ret = IRQ_HANDLED; bail: diff -puN drivers/infiniband/hw/ipath/ipath_stats.c~ib-ipath-reduce-overhead-on-receive-interrupts drivers/infiniband/hw/ipath/ipath_stats.c --- a/drivers/infiniband/hw/ipath/ipath_stats.c~ib-ipath-reduce-overhead-on-receive-interrupts +++ a/drivers/infiniband/hw/ipath/ipath_stats.c @@ -186,7 +186,6 @@ static void ipath_qcheck(struct ipath_de dd->ipath_port0head, (unsigned long long) ipath_stats.sps_port0pkts); - ipath_kreceive(dd); } dd->ipath_lastport0rcv_cnt = ipath_stats.sps_port0pkts; } _ Patches currently in -mm which might be from bos@xxxxxxxxxxxxx are ib-ipath-name-zero-counter-offsets-so-its-clear.patch ib-ipath-update-copyrights-and-other-strings-to.patch ib-ipath-share-more-common-code-between-rc-and-uc.patch ib-ipath-fix-an-indenting-problem.patch ib-ipath-fix-shared-receive-queues-for-rc.patch ib-ipath-allow-diags-on-any-unit.patch ib-ipath-update-some-comments-and-fix-typos.patch ib-ipath-remove-some-duplicate-code.patch ib-ipath-dont-allow-resources-to-be-created-with.patch ib-ipath-fix-some-memory-leaks-on-failure-paths.patch ib-ipath-return-an-error-for-unknown-multicast-gid.patch ib-ipath-report-correct-device-identification.patch ib-ipath-enforce-device-resource-limits.patch ib-ipath-removed-unused-field-ipath_kregvirt-from.patch ib-ipath-print-better-debug-info-when-handling.patch ib-ipath-enable-freeze-mode-when-shutting-down.patch ib-ipath-use-more-appropriate-gfp-flags.patch ib-ipath-use-vmalloc-to-allocate-struct.patch ib-ipath-memory-management-cleanups.patch ib-ipath-reduce-overhead-on-receive-interrupts.patch ib-ipath-fixed-bug-9776.patch ib-ipath-fix-lost-interrupts-on-ht-400.patch ib-ipath-disallow-send-of-invalid-packet-sizes.patch ib-ipath-dont-confuse-the-max-message-size-with.patch ib-ipath-removed-redundant-statements.patch ib-ipath-check-for-valid-lid-and-multicast-lids.patch ib-ipath-fixes-to-performance-get-counters-for-ib.patch ib-ipath-fixes-a-bug-where-our-delay-for-eeprom-no.patch ib-ipath-rc-receive-interrupt-performance-changes.patch ib-ipath-purge-sps_lid-and-sps_mlid-arrays.patch ib-ipath-drop-the-stats-sysfs-attribute-group.patch ib-ipath-support-more-models-of-infinipath-hardware.patch ib-ipath-read-write-correct-sizes-through-diag.patch ib-ipath-fix-a-bug-that-results-in-addresses-near.patch ib-ipath-remove-some-if-0-code-related-to.patch ib-ipath-ignore-receive-queue-size-if-srq-is.patch ib-ipath-namespace-cleanup-replace-ips-with-ipath.patch - To unsubscribe from this list: send the line "unsubscribe mm-commits" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html