I was working on the spurios interrupt problem and I noticed something weird. static int virtnet_poll_tx(struct napi_struct *napi, int budget) { struct send_queue *sq = container_of(napi, struct send_queue, napi); struct virtnet_info *vi = sq->vq->vdev->priv; unsigned int index = vq2txq(sq->vq); struct netdev_queue *txq; if (unlikely(is_xdp_raw_buffer_queue(vi, index))) { /* We don't need to enable cb for XDP */ napi_complete_done(napi, 0); return 0; } txq = netdev_get_tx_queue(vi->dev, index); __netif_tx_lock(txq, raw_smp_processor_id()); free_old_xmit_skbs(sq, true); __netif_tx_unlock(txq); virtqueue_napi_complete(napi, sq->vq, 0); if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) netif_tx_wake_queue(txq); return 0; } So virtqueue_napi_complete is called when txq is not locked, thinkably start_xmit can happen right? Now virtqueue_napi_complete static void virtqueue_napi_complete(struct napi_struct *napi, struct virtqueue *vq, int processed) { int opaque; opaque = virtqueue_enable_cb_prepare(vq); if (napi_complete_done(napi, processed)) { if (unlikely(virtqueue_poll(vq, opaque))) virtqueue_napi_schedule(napi, vq); } else { virtqueue_disable_cb(vq); } } So it is calling virtqueue_enable_cb_prepare but tx queue could be running and can process things in parallel ... What gives? I suspect this corrupts the ring, and explains why we are seeing weird hangs with vhost packed ring ... Jason? -- MST _______________________________________________ Virtualization mailing list Virtualization@xxxxxxxxxxxxxxxxxxxxxxxxxx https://lists.linuxfoundation.org/mailman/listinfo/virtualization