On Tue, Aug 31, 2010 at 02:33:46PM -0600, Alex Williamson wrote: > On Tue, 2010-08-31 at 23:14 +0300, Michael S. Tsirkin wrote: > > On Fri, Aug 27, 2010 at 04:37:36PM -0600, Alex Williamson wrote: > > > Based on a patch from Mark McLoughlin, this patch introduces a new > > > bottom half packet transmitter that avoids the latency imposed by > > > the tx_timer approach. Rather than scheduling a timer when a TX > > > packet comes in, schedule a bottom half to be run from the iothread. > > > The bottom half handler first attempts to flush the queue with > > > notification disabled (this is where we could race with a guest > > > without txburst). If we flush a full burst, reschedule immediately. > > > If we send short of a full burst, try to re-enable notification. > > > To avoid a race with TXs that may have occurred, we must then > > > flush again. If we find some packets to send, the guest it probably > > > active, so we can reschedule again. > > > > > > tx_timer and tx_bh are mutually exclusive, so we can re-use the > > > tx_waiting flag to indicate one or the other needs to be setup. > > > This allows us to seamlessly migrate between timer and bh TX > > > handling. > > > > > > Signed-off-by: Alex Williamson <alex.williamson@xxxxxxxxxx> > > > --- > > > > > > hw/virtio-net.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++--------- > > > 1 files changed, 68 insertions(+), 13 deletions(-) > > > > > > diff --git a/hw/virtio-net.c b/hw/virtio-net.c > > > index 8b652f2..3288c77 100644 > > > --- a/hw/virtio-net.c > > > +++ b/hw/virtio-net.c > > > @@ -36,6 +36,7 @@ typedef struct VirtIONet > > > VirtQueue *ctrl_vq; > > > NICState *nic; > > > QEMUTimer *tx_timer; > > > + QEMUBH *tx_bh; > > > uint32_t tx_timeout; > > > int32_t tx_burst; > > > int tx_waiting; > > > @@ -704,16 +705,25 @@ static void virtio_net_handle_tx(VirtIODevice *vdev, VirtQueue *vq) > > > { > > > VirtIONet *n = to_virtio_net(vdev); > > > > > > - if (n->tx_waiting) { > > > - virtio_queue_set_notification(vq, 1); > > > - qemu_del_timer(n->tx_timer); > > > - n->tx_waiting = 0; > > > - virtio_net_flush_tx(n, vq); > > > + if (n->tx_timer) { > > > + if (n->tx_waiting) { > > > + virtio_queue_set_notification(vq, 1); > > > + qemu_del_timer(n->tx_timer); > > > + n->tx_waiting = 0; > > > + virtio_net_flush_tx(n, vq); > > > + } else { > > > + qemu_mod_timer(n->tx_timer, > > > + qemu_get_clock(vm_clock) + n->tx_timeout); > > > + n->tx_waiting = 1; > > > + virtio_queue_set_notification(vq, 0); > > > + } > > > } else { > > > - qemu_mod_timer(n->tx_timer, > > > - qemu_get_clock(vm_clock) + n->tx_timeout); > > > + if (unlikely(n->tx_waiting)) { > > > + return; > > > + } > > > + virtio_queue_set_notification(n->tx_vq, 0); > > > + qemu_bh_schedule(n->tx_bh); > > > n->tx_waiting = 1; > > > - virtio_queue_set_notification(vq, 0); > > > } > > > } > > > > > > @@ -731,6 +741,41 @@ static void virtio_net_tx_timer(void *opaque) > > > virtio_net_flush_tx(n, n->tx_vq); > > > } > > > > > > +static void virtio_net_tx_bh(void *opaque) > > > +{ > > > + VirtIONet *n = opaque; > > > + int32_t ret; > > > + > > > + n->tx_waiting = 0; > > > + > > > + /* Just in case the driver is not ready on more */ > > > + if (unlikely(!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK))) > > > + return; > > > + > > > + ret = virtio_net_flush_tx(n, n->tx_vq); > > > + if (ret == -EBUSY) { > > > + return; /* Notification re-enable handled by tx_complete */ > > > + } > > > + > > > + /* If we flush a full burst of packets, assume there are > > > + * more coming and immediately reschedule */ > > > + if (ret >= n->tx_burst) { > > > + qemu_bh_schedule(n->tx_bh); > > > + n->tx_waiting = 1; > > > + return; > > > + } > > > + > > > + /* If less than a full burst, re-enable notification and flush > > > + * anything that may have come in while we weren't looking. If > > > + * we find something, assume the guest is still active and reschedule */ > > > + virtio_queue_set_notification(n->tx_vq, 1); > > > + if (virtio_net_flush_tx(n, n->tx_vq) > 0) { > > > > Shouldn't this be virtio_net_flush_tx(n, n->tx_vq) >= n->tx_burst? > > If we get less than tx_burst, the ring is empty now so no need to > > reschedule. > > Right? > > I suppose it depends on how aggressive we want to be. If the guest put > something on the queue between the first flush and this one, then it > might be actively transmitting, and if we want to optimize latency, we > anticipate that it might continue to transmit and re-schedule. This is > taken straight from markmc's rhel5 patch. I wouldn't argue that it's > wrong to not reschedule here, but it's clearly less aggressive. Thanks, > > Alex I'm a bit concerned that we are aggressive but not consistently aggressive. For example if the guest adds a packet before we do disable notification, we do not reschedule bh, but if it adds a packet after this, we do. If we get 255 packets, then another 255 packets, we poll without rescheduling an extra bh, if we get 255*2 packets in one go we reschedule. I think it might cause jitter in performance where e.g. slowing guest down a bit suddenly speeds up networking. It might be better to be consistent: always poll at most 256 entries, if we get all of them reschedule, if we get x < 256 we enable notification, and poll again, if we get 256 - x entries we reschedule, if we get less stop polling. > > > + virtio_queue_set_notification(n->tx_vq, 0); > > > + qemu_bh_schedule(n->tx_bh); > > > + n->tx_waiting = 1; > > > + } > > > +} > > > + > > > static void virtio_net_save(QEMUFile *f, void *opaque) > > > { > > > VirtIONet *n = opaque; > > > @@ -850,8 +895,12 @@ static int virtio_net_load(QEMUFile *f, void *opaque, int version_id) > > > n->mac_table.first_multi = i; > > > > > > if (n->tx_waiting) { > > > - qemu_mod_timer(n->tx_timer, > > > - qemu_get_clock(vm_clock) + n->tx_timeout); > > > + if (n->tx_timer) { > > > + qemu_mod_timer(n->tx_timer, > > > + qemu_get_clock(vm_clock) + n->tx_timeout); > > > + } else { > > > + qemu_bh_schedule(n->tx_bh); > > > + } > > > } > > > return 0; > > > } > > > @@ -939,9 +988,9 @@ VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf, > > > > > > qemu_format_nic_info_str(&n->nic->nc, conf->macaddr.a); > > > > > > - n->tx_timer = qemu_new_timer(vm_clock, virtio_net_tx_timer, n); > > > n->tx_waiting = 0; > > > if (txtimer) { > > > + n->tx_timer = qemu_new_timer(vm_clock, virtio_net_tx_timer, n); > > > if (txtimer == 1) { > > > /* For convenience, 1 = "on" = predefined default, anything else > > > * specifies and actual timeout value */ > > > @@ -949,6 +998,8 @@ VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf, > > > } else { > > > n->tx_timeout = txtimer; > > > } > > > + } else { > > > + n->tx_bh = qemu_bh_new(virtio_net_tx_bh, n); > > > } > > > n->tx_burst = txburst; > > > n->mergeable_rx_bufs = 0; > > > @@ -982,8 +1033,12 @@ void virtio_net_exit(VirtIODevice *vdev) > > > qemu_free(n->mac_table.macs); > > > qemu_free(n->vlans); > > > > > > - qemu_del_timer(n->tx_timer); > > > - qemu_free_timer(n->tx_timer); > > > + if (n->tx_timer) { > > > + qemu_del_timer(n->tx_timer); > > > + qemu_free_timer(n->tx_timer); > > > + } else { > > > + qemu_bh_delete(n->tx_bh); > > > + } > > > > > > virtio_cleanup(&n->vdev); > > > qemu_del_vlan_client(&n->nic->nc); > > > > > > -- > > > To unsubscribe from this list: send the line "unsubscribe kvm" in > > > the body of a message to majordomo@xxxxxxxxxxxxxxx > > > More majordomo info at http://vger.kernel.org/majordomo-info.html > > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html