2010/12/24 Michael S. Tsirkin <mst@xxxxxxxxxx>: > On Fri, Dec 24, 2010 at 10:14:50PM +0900, Yoshiaki Tamura wrote: >> 2010/12/24 Michael S. Tsirkin <mst@xxxxxxxxxx>: >> > On Fri, Dec 24, 2010 at 08:22:00PM +0900, Yoshiaki Tamura wrote: >> >> 2010/12/24 Michael S. Tsirkin <mst@xxxxxxxxxx>: >> >> > On Fri, Dec 24, 2010 at 12:18:15PM +0900, Yoshiaki Tamura wrote: >> >> >> virtio save/load is currently sending last_avail_idx, but inuse isn't. >> >> >> This causes inconsistent state when using Kemari which replays >> >> >> outstanding requests on the secondary. By letting last_avail_idx to >> >> >> be updated after inuse is decreased, it would be possible to replay >> >> >> the outstanding requests. Noth that live migration shouldn't be >> >> >> affected because it waits until flushing all requests. Also in >> >> >> conjunction with event-tap, requests inversion should be avoided. >> >> >> >> >> >> Signed-off-by: Yoshiaki Tamura <tamura.yoshiaki@xxxxxxxxxxxxx> >> >> > >> >> > I think I understood the request inversion. My question now is, >> >> > event-tap transfers inuse events as well, wont the same >> >> > request be repeated twice? >> >> > >> >> >> --- >> >> >> hw/virtio.c | 8 +++++++- >> >> >> 1 files changed, 7 insertions(+), 1 deletions(-) >> >> >> >> >> >> diff --git a/hw/virtio.c b/hw/virtio.c >> >> >> index 07dbf86..f915c46 100644 >> >> >> --- a/hw/virtio.c >> >> >> +++ b/hw/virtio.c >> >> >> @@ -72,7 +72,7 @@ struct VirtQueue >> >> >> VRing vring; >> >> >> target_phys_addr_t pa; >> >> >> uint16_t last_avail_idx; >> >> >> - int inuse; >> >> >> + uint16_t inuse; >> >> >> uint16_t vector; >> >> >> void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq); >> >> >> VirtIODevice *vdev; >> >> >> @@ -671,6 +671,7 @@ void virtio_save(VirtIODevice *vdev, QEMUFile *f) >> >> >> qemu_put_be32(f, vdev->vq[i].vring.num); >> >> >> qemu_put_be64(f, vdev->vq[i].pa); >> >> >> qemu_put_be16s(f, &vdev->vq[i].last_avail_idx); >> >> >> + qemu_put_be16s(f, &vdev->vq[i].inuse); >> >> >> if (vdev->binding->save_queue) >> >> >> vdev->binding->save_queue(vdev->binding_opaque, i, f); >> >> >> } >> >> >> @@ -710,6 +711,11 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f) >> >> >> vdev->vq[i].vring.num = qemu_get_be32(f); >> >> >> vdev->vq[i].pa = qemu_get_be64(f); >> >> >> qemu_get_be16s(f, &vdev->vq[i].last_avail_idx); >> >> >> + qemu_get_be16s(f, &vdev->vq[i].inuse); >> >> >> + >> >> >> + /* revert last_avail_idx if there are outstanding emulation. */ >> >> > >> >> > if there are outstanding emulation -> if requests >> >> > are outstanding in event-tap? >> >> > >> >> >> + vdev->vq[i].last_avail_idx -= vdev->vq[i].inuse; >> >> >> + vdev->vq[i].inuse = 0; >> >> >> >> >> > >> >> > I don't understand it, if this is all we do we can equivalently >> >> > decrement on the sender side and avoid breaking migration compatibility? >> >> >> >> It seems I sent the old patch... I'm really sorry. Currently >> >> I'm taking the approach to update last_avai_idx later. >> >> Decreasing looks scary to me if the guest already knows about it. >> > >> > It seems exactly the same functionally. >> >> If it is the same I'm fine to go with the decreasing approach. >> Is it fine for the guest? Is last_avai_idx irrelevant to the >> guest's behavior? >> >> Yoshi > > At least at the moment, yes. OK. I'll put it in the next spin. Thanks for your advices! Yoshi > >> >> commit 8ac6ba51cc558b3bfcac7a5814d92f275ee874e9 >> >> Author: Yoshiaki Tamura <tamura.yoshiaki@xxxxxxxxxxxxx> >> >> Date: Mon May 17 10:36:14 2010 +0900 >> >> >> >> virtio: update last_avail_idx when inuse is decreased. >> >> >> >> virtio save/load is currently sending last_avail_idx, but inuse isn't. >> >> This causes inconsistent state when using Kemari which replays >> >> outstanding requests on the secondary. By letting last_avail_idx to >> >> be updated after inuse is decreased, it would be possible to replay >> >> the outstanding requests. Noth that live migration shouldn't be >> >> affected because it waits until flushing all requests. Also in >> >> conjunction with event-tap, requests inversion should be avoided. >> >> >> >> Signed-off-by: Yoshiaki Tamura <tamura.yoshiaki@xxxxxxxxxxxxx> >> >> >> >> diff --git a/hw/virtio.c b/hw/virtio.c >> >> index 07dbf86..b1586da 100644 >> >> --- a/hw/virtio.c >> >> +++ b/hw/virtio.c >> >> @@ -198,7 +198,7 @@ int virtio_queue_ready(VirtQueue *vq) >> >> >> >> int virtio_queue_empty(VirtQueue *vq) >> >> { >> >> - return vring_avail_idx(vq) == vq->last_avail_idx; >> >> + return vring_avail_idx(vq) == vq->last_avail_idx + vq->inuse; >> >> } >> >> >> >> void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, >> >> @@ -238,6 +238,7 @@ void virtqueue_flush(VirtQueue *vq, unsigned int count) >> >> wmb(); >> >> trace_virtqueue_flush(vq, count); >> >> vring_used_idx_increment(vq, count); >> >> + vq->last_avail_idx += count; >> >> vq->inuse -= count; >> >> } >> >> >> >> @@ -306,7 +307,7 @@ int virtqueue_avail_bytes(VirtQueue *vq, int in_bytes, int o >> >> unsigned int idx; >> >> int total_bufs, in_total, out_total; >> >> >> >> - idx = vq->last_avail_idx; >> >> + idx = vq->last_avail_idx + vq->inuse; >> >> >> >> total_bufs = in_total = out_total = 0; >> >> while (virtqueue_num_heads(vq, idx)) { >> >> @@ -386,7 +387,7 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem) >> >> unsigned int i, head, max; >> >> target_phys_addr_t desc_pa = vq->vring.desc; >> >> >> >> - if (!virtqueue_num_heads(vq, vq->last_avail_idx)) >> >> + if (!virtqueue_num_heads(vq, vq->last_avail_idx + vq->inuse)) >> >> return 0; >> >> >> >> /* When we start there are none of either input nor output. */ >> >> @@ -394,7 +395,7 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem) >> >> >> >> max = vq->vring.num; >> >> >> >> - i = head = virtqueue_get_head(vq, vq->last_avail_idx++); >> >> + i = head = virtqueue_get_head(vq, vq->last_avail_idx + vq->inuse); >> >> >> >> if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_INDIRECT) { >> >> if (vring_desc_len(desc_pa, i) % sizeof(VRingDesc)) { >> >> @@ -626,7 +627,7 @@ void virtio_notify(VirtIODevice *vdev, VirtQueue *vq) >> >> /* Always notify when queue is empty (when feature acknowledge) */ >> >> if ((vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT) && >> >> (!(vdev->guest_features & (1 << VIRTIO_F_NOTIFY_ON_EMPTY)) || >> >> - (vq->inuse || vring_avail_idx(vq) != vq->last_avail_idx))) >> >> + (vq->inuse || vring_avail_idx(vq) != vq->last_avail_idx + vq->inuse))) >> >> return; >> >> >> >> trace_virtio_notify(vdev, vq); >> >> >> >> >> >> > >> >> >> if (vdev->vq[i].pa) { >> >> >> uint16_t nheads; >> >> >> -- >> >> >> 1.7.1.2 >> >> > -- >> >> > To unsubscribe from this list: send the line "unsubscribe kvm" in >> >> > the body of a message to majordomo@xxxxxxxxxxxxxxx >> >> > More majordomo info at http://vger.kernel.org/majordomo-info.html >> >> > >> > -- >> > To unsubscribe from this list: send the line "unsubscribe kvm" in >> > the body of a message to majordomo@xxxxxxxxxxxxxxx >> > More majordomo info at http://vger.kernel.org/majordomo-info.html >> > > > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html