On Sat, 2014-10-11 at 15:16 +0800, Jason Wang wrote: > We free transmitted packets in ndo_start_xmit() in the past to get better > performance in the past. One side effect is that skb_orphan() needs to be > called in ndo_start_xmit() which makes sk_wmem_alloc not accurate in > fact. For TCP protocol, this means several optimization could not work well > such as TCP small queue and auto corking. This can lead extra low > throughput of small packets stream. > > Thanks to the urgent descriptor support. This patch tries to solve this > issue by enable the tx interrupt selectively for stream packets. This means > we don't need to orphan TCP stream packets in ndo_start_xmit() but enable > tx interrupt for those packets. After we get tx interrupt, a tx napi was > scheduled to free those packets. > > With this method, sk_wmem_alloc of TCP socket were more accurate than in > the past which let TCP can batch more through TSQ and auto corking. > > Signed-off-by: Jason Wang <jasowang@xxxxxxxxxx> > --- > drivers/net/virtio_net.c | 164 ++++++++++++++++++++++++++++++++++++----------- > 1 file changed, 128 insertions(+), 36 deletions(-) > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c > index 5810841..b450fc4 100644 > --- a/drivers/net/virtio_net.c > +++ b/drivers/net/virtio_net.c > @@ -72,6 +72,8 @@ struct send_queue { > > /* Name of the send queue: output.$index */ > char name[40]; > + > + struct napi_struct napi; > }; > > /* Internal representation of a receive virtqueue */ > @@ -217,15 +219,40 @@ static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask) > return p; > } > > +static int free_old_xmit_skbs(struct send_queue *sq, int budget) > +{ > + struct sk_buff *skb; > + unsigned int len; > + struct virtnet_info *vi = sq->vq->vdev->priv; > + struct virtnet_stats *stats = this_cpu_ptr(vi->stats); > + int sent = 0; > + > + while (sent < budget && > + (skb = virtqueue_get_buf(sq->vq, &len)) != NULL) { > + pr_debug("Sent skb %p\n", skb); > + > + u64_stats_update_begin(&stats->tx_syncp); > + stats->tx_bytes += skb->len; > + stats->tx_packets++; > + u64_stats_update_end(&stats->tx_syncp); > + > + dev_kfree_skb_any(skb); > + sent++; > + } > + You could accumulate skb->len in a totlen var, and perform a single u64_stats_update_begin(&stats->tx_syncp); stats->tx_bytes += totlen; stats->tx_packets += sent; u64_stats_update_end(&stats->tx_syncp); after the loop. > + return sent; > +} > + ... > + > +static bool virtnet_skb_needs_intr(struct sk_buff *skb) > +{ > + union { > + unsigned char *network; > + struct iphdr *ipv4; > + struct ipv6hdr *ipv6; > + } hdr; > + struct tcphdr *th = tcp_hdr(skb); > + u16 payload_len; > + > + hdr.network = skb_network_header(skb); > + > + /* Only IPv4/IPv6 with TCP is supported */ Oh well, yet another packet flow dissector :) If most packets were caught by your implementation, you could use it for fast patj and fallback to skb_flow_dissect() for encapsulated traffic. struct flow_keys keys; if (!skb_flow_dissect(skb, &keys)) return false; if (keys.ip_proto != IPPROTO_TCP) return false; then check __skb_get_poff() how to get th, and check if there is some payload... > + if ((skb->protocol == htons(ETH_P_IP)) && > + hdr.ipv4->protocol == IPPROTO_TCP) { > + payload_len = ntohs(hdr.ipv4->tot_len) - hdr.ipv4->ihl * 4 - > + th->doff * 4; > + } else if ((skb->protocol == htons(ETH_P_IPV6) || > + hdr.ipv6->nexthdr == IPPROTO_TCP)) { > + payload_len = ntohs(hdr.ipv6->payload_len) - th->doff * 4; > + } else { > + return false; > + } > + > + /* We don't want to dealy packet with PUSH bit and pure ACK packet */ > + if (!th->psh && payload_len) > + return true; > + > + return false; > } -- To unsubscribe from this list: send the line "unsubscribe linux-api" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html