Hi Michael,  Excuse me for disturbing you with this letter!  Iâ??m using linux 4.7.2 virtio driver + X86 64 + smart nic to validate PF and VF virtio performance.  The VFâ??s performance is >1Mpps, but the PFâ??s performance is only  500Kpps even if using 8 virtio queues.  So I reviewed virtio iommu patch in detail. I worry  dma map/unmap cost many cycles. Do you have any suggestion? Thanks a lot!  Today I also reviewed Intel 40G driver of linux 4.7.2 and compared with virtio net driver. I focus on buffer management and list the difference in below table: Â
 Look forward to your reply.  Thanks, Jason BTW: 1. Intel Ethernet Controller XL710(40G) driver: TX: i40e_lan_xmit_frame    ->i40e_xmit_frame_ring()       ->i40e_tx_map()         {           ......           dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);           ......           dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size,                                                                      DMA_TO_DEVICE);           ......         }                                                     i40e_clean_tx_irq() {    ......    /* unmap skb header data */                                dma_unmap_single(tx_ring->dev,                                                                dma_unmap_addr(tx_buf, dma),                                                                dma_unmap_len(tx_buf, len),                                                                DMA_TO_DEVICE);                                 /* clear tx_buffer data */                                tx_buf->skb = NULL;                                dma_unmap_len_set(tx_buf, len, 0);    ......    /* unmap remaining buffers */                                while (tx_desc != eop_desc) {      /* unmap any remaining paged data */                                                if (dma_unmap_len(tx_buf, len)) {                                                                dma_unmap_page(tx_ring->dev,                                                                                      dma_unmap_addr(tx_buf, dma),                                                                                      dma_unmap_len(tx_buf, len),                                                                                      DMA_TO_DEVICE);                                                                dma_unmap_len_set(tx_buf, len, 0);                                                }                                                ......                                }    ...... }  i40e_vsi_free_tx_resources()    ->i40e_free_tx_resources()        ->i40e_clean_tx_ring()            ->i40e_unmap_and_free_tx_resource()              {                  if (tx_buffer->skb) {                                              dev_kfree_skb_any(tx_buffer->skb);                                              if (dma_unmap_len(tx_buffer, len))                                                              dma_unmap_single(ring->dev,                                                                                               dma_unmap_addr(tx_buffer, dma),                                                                                               dma_unmap_len(tx_buffer, len),                                                                                               DMA_TO_DEVICE);                               } else if (dma_unmap_len(tx_buffer, len)) {                                              dma_unmap_page(ring->dev,                                                                     dma_unmap_addr(tx_buffer, dma),                                                                     dma_unmap_len(tx_buffer, len),                                                                     DMA_TO_DEVICE);                               }                               ......              } RX: i40e_vsi_configure()    ->i40e_vsi_configure_rx()      {        ......        /* set up individual rings */        for (i = 0; i < vsi->num_queue_pairs && !err; i++)          err = i40e_configure_rx_ring(vsi->rx_rings[i]);        ......      }        ->i40e_configure_rx_ring()            ->i40e_alloc_rx_buffers()              {                ......                do {                  if (!i40e_alloc_mapped_page(rx_ring, bi))                    goto no_buffers;                  ......                  cleaned_count--;                             } while (cleaned_count);                             ......              }                i40e_clean_rx_irq {    ......    if (cleaned_count >= I40E_RX_BUFFER_WRITE) {                                                failure = failure ||                                                                 i40e_alloc_rx_buffers(rx_ring, cleaned_count);                                                cleaned_count = 0;                                }    ...... /* we are reusing so sync this buffer for CPU use */ dma_sync_single_range_for_cpu(rx_ring->dev,                                                                     rx_buffer->dma,                                                                     rx_buffer->page_offset,                                                                     I40E_RXBUFFER_2048,                                                                     DMA_FROM_DEVICE);    ...... }  i40e_vsi_free_rx_resources()    ->i40e_free_rx_resources()        ->i40e_clean_rx_ring()          {              ......              dma_unmap_page(dev, rx_bi->dma, PAGE_SIZE, DMA_FROM_DEVICE);              ......          }  2. virtio net driver: init_vqs()    ->virtnet_alloc_queues()    ->virtnet_find_vqs()        ->vp_find_vqs()            ->vp_try_to_find_vqs()              ->vp_setup_vq()                  ->setup_vq()               TX: start_xmit()    ->free_old_xmit_skbs()      {        ......        virtqueue_get_buf()            ->detach_buf()              {                ......                vring_unmap_one(vq, &vq->vring.desc[i])                ......              }        ......      }    ->xmit_skb()        virtqueue_add_outbuf()            ->virtqueue_add()              {                ......                dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE);                ......              } RX: virtnet_receive()    ->virtqueue_get_buf()        ->detach_buf()          {            ......            vring_unmap_one(vq, &vq->vring.desc[i])            ......          }    ->receive_buf()     ->try_fill_recv()         ->add_recvbuf_small()            ->virtqueue_add_inbuf()                ->virtqueue_add()                  {                    ......                    dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE);                    ......                  }    |
_______________________________________________ Virtualization mailing list Virtualization@xxxxxxxxxxxxxxxxxxxxxxxxxx https://lists.linuxfoundation.org/mailman/listinfo/virtualization