On Fri, Dec 24, 2010 at 10:14:50PM +0900, Yoshiaki Tamura wrote: > 2010/12/24 Michael S. Tsirkin <mst@xxxxxxxxxx>: > > On Fri, Dec 24, 2010 at 08:22:00PM +0900, Yoshiaki Tamura wrote: > >> 2010/12/24 Michael S. Tsirkin <mst@xxxxxxxxxx>: > >> > On Fri, Dec 24, 2010 at 12:18:15PM +0900, Yoshiaki Tamura wrote: > >> >> virtio save/load is currently sending last_avail_idx, but inuse isn't. > >> >> This causes inconsistent state when using Kemari which replays > >> >> outstanding requests on the secondary. By letting last_avail_idx to > >> >> be updated after inuse is decreased, it would be possible to replay > >> >> the outstanding requests. Noth that live migration shouldn't be > >> >> affected because it waits until flushing all requests. Also in > >> >> conjunction with event-tap, requests inversion should be avoided. > >> >> > >> >> Signed-off-by: Yoshiaki Tamura <tamura.yoshiaki@xxxxxxxxxxxxx> > >> > > >> > I think I understood the request inversion. My question now is, > >> > event-tap transfers inuse events as well, wont the same > >> > request be repeated twice? > >> > > >> >> --- > >> >> hw/virtio.c | 8 +++++++- > >> >> 1 files changed, 7 insertions(+), 1 deletions(-) > >> >> > >> >> diff --git a/hw/virtio.c b/hw/virtio.c > >> >> index 07dbf86..f915c46 100644 > >> >> --- a/hw/virtio.c > >> >> +++ b/hw/virtio.c > >> >> @@ -72,7 +72,7 @@ struct VirtQueue > >> >> VRing vring; > >> >> target_phys_addr_t pa; > >> >> uint16_t last_avail_idx; > >> >> - int inuse; > >> >> + uint16_t inuse; > >> >> uint16_t vector; > >> >> void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq); > >> >> VirtIODevice *vdev; > >> >> @@ -671,6 +671,7 @@ void virtio_save(VirtIODevice *vdev, QEMUFile *f) > >> >> qemu_put_be32(f, vdev->vq[i].vring.num); > >> >> qemu_put_be64(f, vdev->vq[i].pa); > >> >> qemu_put_be16s(f, &vdev->vq[i].last_avail_idx); > >> >> + qemu_put_be16s(f, &vdev->vq[i].inuse); > >> >> if (vdev->binding->save_queue) > >> >> vdev->binding->save_queue(vdev->binding_opaque, i, f); > >> >> } > >> >> @@ -710,6 +711,11 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f) > >> >> vdev->vq[i].vring.num = qemu_get_be32(f); > >> >> vdev->vq[i].pa = qemu_get_be64(f); > >> >> qemu_get_be16s(f, &vdev->vq[i].last_avail_idx); > >> >> + qemu_get_be16s(f, &vdev->vq[i].inuse); > >> >> + > >> >> + /* revert last_avail_idx if there are outstanding emulation. */ > >> > > >> > if there are outstanding emulation -> if requests > >> > are outstanding in event-tap? > >> > > >> >> + vdev->vq[i].last_avail_idx -= vdev->vq[i].inuse; > >> >> + vdev->vq[i].inuse = 0; > >> >> > >> > > >> > I don't understand it, if this is all we do we can equivalently > >> > decrement on the sender side and avoid breaking migration compatibility? > >> > >> It seems I sent the old patch... I'm really sorry. Currently > >> I'm taking the approach to update last_avai_idx later. > >> Decreasing looks scary to me if the guest already knows about it. > > > > It seems exactly the same functionally. > > If it is the same I'm fine to go with the decreasing approach. > Is it fine for the guest? Is last_avai_idx irrelevant to the > guest's behavior? > > Yoshi At least at the moment, yes. > >> commit 8ac6ba51cc558b3bfcac7a5814d92f275ee874e9 > >> Author: Yoshiaki Tamura <tamura.yoshiaki@xxxxxxxxxxxxx> > >> Date: Mon May 17 10:36:14 2010 +0900 > >> > >> virtio: update last_avail_idx when inuse is decreased. > >> > >> virtio save/load is currently sending last_avail_idx, but inuse isn't. > >> This causes inconsistent state when using Kemari which replays > >> outstanding requests on the secondary. By letting last_avail_idx to > >> be updated after inuse is decreased, it would be possible to replay > >> the outstanding requests. Noth that live migration shouldn't be > >> affected because it waits until flushing all requests. Also in > >> conjunction with event-tap, requests inversion should be avoided. > >> > >> Signed-off-by: Yoshiaki Tamura <tamura.yoshiaki@xxxxxxxxxxxxx> > >> > >> diff --git a/hw/virtio.c b/hw/virtio.c > >> index 07dbf86..b1586da 100644 > >> --- a/hw/virtio.c > >> +++ b/hw/virtio.c > >> @@ -198,7 +198,7 @@ int virtio_queue_ready(VirtQueue *vq) > >> > >> int virtio_queue_empty(VirtQueue *vq) > >> { > >> - return vring_avail_idx(vq) == vq->last_avail_idx; > >> + return vring_avail_idx(vq) == vq->last_avail_idx + vq->inuse; > >> } > >> > >> void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, > >> @@ -238,6 +238,7 @@ void virtqueue_flush(VirtQueue *vq, unsigned int count) > >> wmb(); > >> trace_virtqueue_flush(vq, count); > >> vring_used_idx_increment(vq, count); > >> + vq->last_avail_idx += count; > >> vq->inuse -= count; > >> } > >> > >> @@ -306,7 +307,7 @@ int virtqueue_avail_bytes(VirtQueue *vq, int in_bytes, int o > >> unsigned int idx; > >> int total_bufs, in_total, out_total; > >> > >> - idx = vq->last_avail_idx; > >> + idx = vq->last_avail_idx + vq->inuse; > >> > >> total_bufs = in_total = out_total = 0; > >> while (virtqueue_num_heads(vq, idx)) { > >> @@ -386,7 +387,7 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem) > >> unsigned int i, head, max; > >> target_phys_addr_t desc_pa = vq->vring.desc; > >> > >> - if (!virtqueue_num_heads(vq, vq->last_avail_idx)) > >> + if (!virtqueue_num_heads(vq, vq->last_avail_idx + vq->inuse)) > >> return 0; > >> > >> /* When we start there are none of either input nor output. */ > >> @@ -394,7 +395,7 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem) > >> > >> max = vq->vring.num; > >> > >> - i = head = virtqueue_get_head(vq, vq->last_avail_idx++); > >> + i = head = virtqueue_get_head(vq, vq->last_avail_idx + vq->inuse); > >> > >> if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_INDIRECT) { > >> if (vring_desc_len(desc_pa, i) % sizeof(VRingDesc)) { > >> @@ -626,7 +627,7 @@ void virtio_notify(VirtIODevice *vdev, VirtQueue *vq) > >> /* Always notify when queue is empty (when feature acknowledge) */ > >> if ((vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT) && > >> (!(vdev->guest_features & (1 << VIRTIO_F_NOTIFY_ON_EMPTY)) || > >> - (vq->inuse || vring_avail_idx(vq) != vq->last_avail_idx))) > >> + (vq->inuse || vring_avail_idx(vq) != vq->last_avail_idx + vq->inuse))) > >> return; > >> > >> trace_virtio_notify(vdev, vq); > >> > >> > >> > > >> >> if (vdev->vq[i].pa) { > >> >> uint16_t nheads; > >> >> -- > >> >> 1.7.1.2 > >> > -- > >> > To unsubscribe from this list: send the line "unsubscribe kvm" in > >> > the body of a message to majordomo@xxxxxxxxxxxxxxx > >> > More majordomo info at http://vger.kernel.org/majordomo-info.html > >> > > > -- > > To unsubscribe from this list: send the line "unsubscribe kvm" in > > the body of a message to majordomo@xxxxxxxxxxxxxxx > > More majordomo info at http://vger.kernel.org/majordomo-info.html > > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html