2010/12/24 Michael S. Tsirkin <mst@xxxxxxxxxx>: > On Fri, Dec 24, 2010 at 12:18:15PM +0900, Yoshiaki Tamura wrote: >> virtio save/load is currently sending last_avail_idx, but inuse isn't. >> This causes inconsistent state when using Kemari which replays >> outstanding requests on the secondary. By letting last_avail_idx to >> be updated after inuse is decreased, it would be possible to replay >> the outstanding requests. Noth that live migration shouldn't be >> affected because it waits until flushing all requests. Also in >> conjunction with event-tap, requests inversion should be avoided. >> >> Signed-off-by: Yoshiaki Tamura <tamura.yoshiaki@xxxxxxxxxxxxx> > > I think I understood the request inversion. My question now is, > event-tap transfers inuse events as well, wont the same > request be repeated twice? > >> --- >> hw/virtio.c | 8 +++++++- >> 1 files changed, 7 insertions(+), 1 deletions(-) >> >> diff --git a/hw/virtio.c b/hw/virtio.c >> index 07dbf86..f915c46 100644 >> --- a/hw/virtio.c >> +++ b/hw/virtio.c >> @@ -72,7 +72,7 @@ struct VirtQueue >> VRing vring; >> target_phys_addr_t pa; >> uint16_t last_avail_idx; >> - int inuse; >> + uint16_t inuse; >> uint16_t vector; >> void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq); >> VirtIODevice *vdev; >> @@ -671,6 +671,7 @@ void virtio_save(VirtIODevice *vdev, QEMUFile *f) >> qemu_put_be32(f, vdev->vq[i].vring.num); >> qemu_put_be64(f, vdev->vq[i].pa); >> qemu_put_be16s(f, &vdev->vq[i].last_avail_idx); >> + qemu_put_be16s(f, &vdev->vq[i].inuse); >> if (vdev->binding->save_queue) >> vdev->binding->save_queue(vdev->binding_opaque, i, f); >> } >> @@ -710,6 +711,11 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f) >> vdev->vq[i].vring.num = qemu_get_be32(f); >> vdev->vq[i].pa = qemu_get_be64(f); >> qemu_get_be16s(f, &vdev->vq[i].last_avail_idx); >> + qemu_get_be16s(f, &vdev->vq[i].inuse); >> + >> + /* revert last_avail_idx if there are outstanding emulation. */ > > if there are outstanding emulation -> if requests > are outstanding in event-tap? > >> + vdev->vq[i].last_avail_idx -= vdev->vq[i].inuse; >> + vdev->vq[i].inuse = 0; >> > > I don't understand it, if this is all we do we can equivalently > decrement on the sender side and avoid breaking migration compatibility? It seems I sent the old patch... I'm really sorry. Currently I'm taking the approach to update last_avai_idx later. Decreasing looks scary to me if the guest already knows about it. commit 8ac6ba51cc558b3bfcac7a5814d92f275ee874e9 Author: Yoshiaki Tamura <tamura.yoshiaki@xxxxxxxxxxxxx> Date: Mon May 17 10:36:14 2010 +0900 virtio: update last_avail_idx when inuse is decreased. virtio save/load is currently sending last_avail_idx, but inuse isn't. This causes inconsistent state when using Kemari which replays outstanding requests on the secondary. By letting last_avail_idx to be updated after inuse is decreased, it would be possible to replay the outstanding requests. Noth that live migration shouldn't be affected because it waits until flushing all requests. Also in conjunction with event-tap, requests inversion should be avoided. Signed-off-by: Yoshiaki Tamura <tamura.yoshiaki@xxxxxxxxxxxxx> diff --git a/hw/virtio.c b/hw/virtio.c index 07dbf86..b1586da 100644 --- a/hw/virtio.c +++ b/hw/virtio.c @@ -198,7 +198,7 @@ int virtio_queue_ready(VirtQueue *vq) int virtio_queue_empty(VirtQueue *vq) { - return vring_avail_idx(vq) == vq->last_avail_idx; + return vring_avail_idx(vq) == vq->last_avail_idx + vq->inuse; } void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, @@ -238,6 +238,7 @@ void virtqueue_flush(VirtQueue *vq, unsigned int count) wmb(); trace_virtqueue_flush(vq, count); vring_used_idx_increment(vq, count); + vq->last_avail_idx += count; vq->inuse -= count; } @@ -306,7 +307,7 @@ int virtqueue_avail_bytes(VirtQueue *vq, int in_bytes, int o unsigned int idx; int total_bufs, in_total, out_total; - idx = vq->last_avail_idx; + idx = vq->last_avail_idx + vq->inuse; total_bufs = in_total = out_total = 0; while (virtqueue_num_heads(vq, idx)) { @@ -386,7 +387,7 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem) unsigned int i, head, max; target_phys_addr_t desc_pa = vq->vring.desc; - if (!virtqueue_num_heads(vq, vq->last_avail_idx)) + if (!virtqueue_num_heads(vq, vq->last_avail_idx + vq->inuse)) return 0; /* When we start there are none of either input nor output. */ @@ -394,7 +395,7 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem) max = vq->vring.num; - i = head = virtqueue_get_head(vq, vq->last_avail_idx++); + i = head = virtqueue_get_head(vq, vq->last_avail_idx + vq->inuse); if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_INDIRECT) { if (vring_desc_len(desc_pa, i) % sizeof(VRingDesc)) { @@ -626,7 +627,7 @@ void virtio_notify(VirtIODevice *vdev, VirtQueue *vq) /* Always notify when queue is empty (when feature acknowledge) */ if ((vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT) && (!(vdev->guest_features & (1 << VIRTIO_F_NOTIFY_ON_EMPTY)) || - (vq->inuse || vring_avail_idx(vq) != vq->last_avail_idx))) + (vq->inuse || vring_avail_idx(vq) != vq->last_avail_idx + vq->inuse))) return; trace_virtio_notify(vdev, vq); > >> if (vdev->vq[i].pa) { >> uint16_t nheads; >> -- >> 1.7.1.2 > -- > To unsubscribe from this list: send the line "unsubscribe kvm" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at http://vger.kernel.org/majordomo-info.html > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html