Re: [PATCH 4/5] virtio-net: Introduce a new bottom half packet TX

"Michael S. Tsirkin" <mst@xxxxxxxxxx> · Tue, 31 Aug 2010 23:14:33 +0300

On Fri, Aug 27, 2010 at 04:37:36PM -0600, Alex Williamson wrote:
> Based on a patch from Mark McLoughlin, this patch introduces a new
> bottom half packet transmitter that avoids the latency imposed by
> the tx_timer approach.  Rather than scheduling a timer when a TX
> packet comes in, schedule a bottom half to be run from the iothread.
> The bottom half handler first attempts to flush the queue with
> notification disabled (this is where we could race with a guest
> without txburst).  If we flush a full burst, reschedule immediately.
> If we send short of a full burst, try to re-enable notification.
> To avoid a race with TXs that may have occurred, we must then
> flush again.  If we find some packets to send, the guest it probably
> active, so we can reschedule again.
> 
> tx_timer and tx_bh are mutually exclusive, so we can re-use the
> tx_waiting flag to indicate one or the other needs to be setup.
> This allows us to seamlessly migrate between timer and bh TX
> handling.
> 
> Signed-off-by: Alex Williamson <alex.williamson@xxxxxxxxxx>
> ---
> 
>  hw/virtio-net.c |   81 ++++++++++++++++++++++++++++++++++++++++++++++---------
>  1 files changed, 68 insertions(+), 13 deletions(-)
> 
> diff --git a/hw/virtio-net.c b/hw/virtio-net.c
> index 8b652f2..3288c77 100644
> --- a/hw/virtio-net.c
> +++ b/hw/virtio-net.c
> @@ -36,6 +36,7 @@ typedef struct VirtIONet
>      VirtQueue *ctrl_vq;
>      NICState *nic;
>      QEMUTimer *tx_timer;
> +    QEMUBH *tx_bh;
>      uint32_t tx_timeout;
>      int32_t tx_burst;
>      int tx_waiting;
> @@ -704,16 +705,25 @@ static void virtio_net_handle_tx(VirtIODevice *vdev, VirtQueue *vq)
>  {
>      VirtIONet *n = to_virtio_net(vdev);
>  
> -    if (n->tx_waiting) {
> -        virtio_queue_set_notification(vq, 1);
> -        qemu_del_timer(n->tx_timer);
> -        n->tx_waiting = 0;
> -        virtio_net_flush_tx(n, vq);
> +    if (n->tx_timer) {
> +        if (n->tx_waiting) {
> +            virtio_queue_set_notification(vq, 1);
> +            qemu_del_timer(n->tx_timer);
> +            n->tx_waiting = 0;
> +            virtio_net_flush_tx(n, vq);
> +        } else {
> +            qemu_mod_timer(n->tx_timer,
> +                           qemu_get_clock(vm_clock) + n->tx_timeout);
> +            n->tx_waiting = 1;
> +            virtio_queue_set_notification(vq, 0);
> +        }
>      } else {
> -        qemu_mod_timer(n->tx_timer,
> -                       qemu_get_clock(vm_clock) + n->tx_timeout);
> +        if (unlikely(n->tx_waiting)) {
> +            return;
> +        }
> +        virtio_queue_set_notification(n->tx_vq, 0);
> +        qemu_bh_schedule(n->tx_bh);
>          n->tx_waiting = 1;
> -        virtio_queue_set_notification(vq, 0);
>      }
>  }
>  
> @@ -731,6 +741,41 @@ static void virtio_net_tx_timer(void *opaque)
>      virtio_net_flush_tx(n, n->tx_vq);
>  }
>  
> +static void virtio_net_tx_bh(void *opaque)
> +{
> +    VirtIONet *n = opaque;
> +    int32_t ret;
> +
> +    n->tx_waiting = 0;
> +
> +    /* Just in case the driver is not ready on more */
> +    if (unlikely(!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)))
> +        return;
> +
> +    ret = virtio_net_flush_tx(n, n->tx_vq);
> +    if (ret == -EBUSY) {
> +        return; /* Notification re-enable handled by tx_complete */
> +    }
> +
> +    /* If we flush a full burst of packets, assume there are
> +     * more coming and immediately reschedule */
> +    if (ret >= n->tx_burst) {
> +        qemu_bh_schedule(n->tx_bh);
> +        n->tx_waiting = 1;
> +        return;
> +    }
> +
> +    /* If less than a full burst, re-enable notification and flush
> +     * anything that may have come in while we weren't looking.  If
> +     * we find something, assume the guest is still active and reschedule */
> +    virtio_queue_set_notification(n->tx_vq, 1);
> +    if (virtio_net_flush_tx(n, n->tx_vq) > 0) {

Shouldn't this be virtio_net_flush_tx(n, n->tx_vq) >= n->tx_burst?
If we get less than tx_burst, the ring is empty now so no need to
reschedule.
Right?

> +        virtio_queue_set_notification(n->tx_vq, 0);
> +        qemu_bh_schedule(n->tx_bh);
> +        n->tx_waiting = 1;
> +    }
> +}
> +
>  static void virtio_net_save(QEMUFile *f, void *opaque)
>  {
>      VirtIONet *n = opaque;
> @@ -850,8 +895,12 @@ static int virtio_net_load(QEMUFile *f, void *opaque, int version_id)
>      n->mac_table.first_multi = i;
>  
>      if (n->tx_waiting) {
> -        qemu_mod_timer(n->tx_timer,
> -                       qemu_get_clock(vm_clock) + n->tx_timeout);
> +        if (n->tx_timer) {
> +            qemu_mod_timer(n->tx_timer,
> +                           qemu_get_clock(vm_clock) + n->tx_timeout);
> +        } else {
> +            qemu_bh_schedule(n->tx_bh);
> +        }
>      }
>      return 0;
>  }
> @@ -939,9 +988,9 @@ VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
>  
>      qemu_format_nic_info_str(&n->nic->nc, conf->macaddr.a);
>  
> -    n->tx_timer = qemu_new_timer(vm_clock, virtio_net_tx_timer, n);
>      n->tx_waiting = 0;
>      if (txtimer) {
> +        n->tx_timer = qemu_new_timer(vm_clock, virtio_net_tx_timer, n);
>          if (txtimer == 1) {
>              /* For convenience, 1 = "on" = predefined default, anything else
>               * specifies and actual timeout value */
> @@ -949,6 +998,8 @@ VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
>          } else {
>              n->tx_timeout = txtimer;
>          }
> +    } else {
> +        n->tx_bh = qemu_bh_new(virtio_net_tx_bh, n);
>      }
>      n->tx_burst = txburst;
>      n->mergeable_rx_bufs = 0;
> @@ -982,8 +1033,12 @@ void virtio_net_exit(VirtIODevice *vdev)
>      qemu_free(n->mac_table.macs);
>      qemu_free(n->vlans);
>  
> -    qemu_del_timer(n->tx_timer);
> -    qemu_free_timer(n->tx_timer);
> +    if (n->tx_timer) {
> +        qemu_del_timer(n->tx_timer);
> +        qemu_free_timer(n->tx_timer);
> +    } else {
> +        qemu_bh_delete(n->tx_bh);
> +    }
>  
>      virtio_cleanup(&n->vdev);
>      qemu_del_vlan_client(&n->nic->nc);
> 
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html