On 27.03.20 12:08, Eugenio Pérez wrote: > Hi Christian. > > Sorry for the late response. Could we try this one over eccb852f1fe6bede630e2e4f1a121a81e34354ab, and see if you still > can reproduce the bug? To much time has passed and too many things have changed on that system. I have trouble reproducing this with either eccb852f1fe6bede630e2e4f1a121a81e34354ab or 52c36ce7f334. I will try to reproduce this again :-/ > > Apart from that, could you print me the backtrace when qemu calls vhost_kernel_set_vring_base and > vhost_kernel_get_vring_base functions? > > Thank you very much! > > diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c > index e158159671fa..a1a4239512bb 100644 > --- a/drivers/vhost/net.c > +++ b/drivers/vhost/net.c > @@ -1505,10 +1505,13 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) > > mutex_lock(&n->dev.mutex); > r = vhost_dev_check_owner(&n->dev); > - if (r) > + if (r) { > + pr_debug("vhost_dev_check_owner index=%u fd=%d rc r=%d", index, fd, r); > goto err; > + } > > if (index >= VHOST_NET_VQ_MAX) { > + pr_debug("vhost_dev_check_owner index=%u fd=%d MAX=%d", index, fd, VHOST_NET_VQ_MAX); > r = -ENOBUFS; > goto err; > } > @@ -1518,22 +1521,26 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) > > /* Verify that ring has been setup correctly. */ > if (!vhost_vq_access_ok(vq)) { > + pr_debug("vhost_net_set_backend index=%u fd=%d !vhost_vq_access_ok", index, fd); > r = -EFAULT; > goto err_vq; > } > sock = get_socket(fd); > if (IS_ERR(sock)) { > r = PTR_ERR(sock); > + pr_debug("vhost_net_set_backend index=%u fd=%d get_socket err r=%d", index, fd, r); > goto err_vq; > } > > /* start polling new socket */ > oldsock = vq->private_data; > if (sock != oldsock) { > + pr_debug("sock=%p != oldsock=%p index=%u fd=%d vq=%p", sock, oldsock, index, fd, vq); > ubufs = vhost_net_ubuf_alloc(vq, > sock && vhost_sock_zcopy(sock)); > if (IS_ERR(ubufs)) { > r = PTR_ERR(ubufs); > + pr_debug("ubufs index=%u fd=%d err r=%d vq=%p", index, fd, r, vq); > goto err_ubufs; > } > > @@ -1541,11 +1548,15 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) > vq->private_data = sock; > vhost_net_buf_unproduce(nvq); > r = vhost_vq_init_access(vq); > - if (r) > + if (r) { > + pr_debug("init_access index=%u fd=%d r=%d vq=%p", index, fd, r, vq); > goto err_used; > + } > r = vhost_net_enable_vq(n, vq); > - if (r) > + if (r) { > + pr_debug("enable_vq index=%u fd=%d r=%d vq=%p", index, fd, r, vq); > goto err_used; > + } > if (index == VHOST_NET_VQ_RX) > nvq->rx_ring = get_tap_ptr_ring(fd); > > @@ -1559,6 +1570,8 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) > > mutex_unlock(&vq->mutex); > > + pr_debug("sock=%p", sock); > + > if (oldubufs) { > vhost_net_ubuf_put_wait_and_free(oldubufs); > mutex_lock(&vq->mutex); > @@ -1712,6 +1725,9 @@ static long vhost_net_ioctl(struct file *f, unsigned int ioctl, > case VHOST_NET_SET_BACKEND: > if (copy_from_user(&backend, argp, sizeof backend)) > return -EFAULT; > + pr_debug("VHOST_NET_SET_BACKEND [b.index=%u][b.fd=%d]", > + backend.index, backend.fd); > + dump_stack(); > return vhost_net_set_backend(n, backend.index, backend.fd); > case VHOST_GET_FEATURES: > features = VHOST_NET_FEATURES; > diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c > index b5a51b1f2e79..9dd0bcae0b22 100644 > --- a/drivers/vhost/vhost.c > +++ b/drivers/vhost/vhost.c > @@ -372,6 +372,11 @@ static int vhost_worker(void *data) > return 0; > } > > +static int vhost_vq_num_batch_descs(struct vhost_virtqueue *vq) > +{ > + return vq->max_descs - UIO_MAXIOV; > +} > + > static void vhost_vq_free_iovecs(struct vhost_virtqueue *vq) > { > kfree(vq->descs); > @@ -394,7 +399,9 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev) > for (i = 0; i < dev->nvqs; ++i) { > vq = dev->vqs[i]; > vq->max_descs = dev->iov_limit; > - vq->batch_descs = dev->iov_limit - UIO_MAXIOV; > + if (vhost_vq_num_batch_descs(vq) < 0) { > + return -EINVAL; > + } > vq->descs = kmalloc_array(vq->max_descs, > sizeof(*vq->descs), > GFP_KERNEL); > @@ -1642,15 +1649,27 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg > r = -EINVAL; > break; > } > + > + pr_debug( > + "VHOST_SET_VRING_BASE [vq=%p][s.index=%u][s.num=%u][vq->avail_idx=%d][vq->last_avail_idx=%d][vq- >> ndescs=%d][vq->first_desc=%d]", > + vq, s.index, s.num, vq->avail_idx, vq->last_avail_idx, > + vq->ndescs, vq->first_desc); > + dump_stack(); > vq->last_avail_idx = s.num; > /* Forget the cached index value. */ > vq->avail_idx = vq->last_avail_idx; > + vq->ndescs = vq->first_desc = 0; > break; > case VHOST_GET_VRING_BASE: > s.index = idx; > s.num = vq->last_avail_idx; > if (copy_to_user(argp, &s, sizeof s)) > r = -EFAULT; > + pr_debug( > + "VHOST_GET_VRING_BASE [vq=%p][s.index=%u][s.num=%u][vq->avail_idx=%d][vq->last_avail_idx=%d][vq- >> ndescs=%d][vq->first_desc=%d]", > + vq, s.index, s.num, vq->avail_idx, vq->last_avail_idx, > + vq->ndescs, vq->first_desc); > + dump_stack(); > break; > case VHOST_SET_VRING_KICK: > if (copy_from_user(&f, argp, sizeof f)) { > @@ -2239,8 +2258,8 @@ static int fetch_buf(struct vhost_virtqueue *vq) > vq->avail_idx = vhost16_to_cpu(vq, avail_idx); > > if (unlikely((u16)(vq->avail_idx - last_avail_idx) > vq->num)) { > - vq_err(vq, "Guest moved used index from %u to %u", > - last_avail_idx, vq->avail_idx); > + vq_err(vq, "Guest moved vq %p used index from %u to %u", > + vq, last_avail_idx, vq->avail_idx); > return -EFAULT; > } > > @@ -2316,6 +2335,9 @@ static int fetch_buf(struct vhost_virtqueue *vq) > BUG_ON(!(vq->used_flags & VRING_USED_F_NO_NOTIFY)); > > /* On success, increment avail index. */ > + pr_debug( > + "[vq=%p][vq->last_avail_idx=%u][vq->avail_idx=%u][vq->ndescs=%d][vq->first_desc=%d]", > + vq, vq->last_avail_idx, vq->avail_idx, vq->ndescs, vq->first_desc); > vq->last_avail_idx++; > > return 0; > @@ -2333,7 +2355,7 @@ static int fetch_descs(struct vhost_virtqueue *vq) > if (vq->ndescs) > return 0; > > - while (!ret && vq->ndescs <= vq->batch_descs) > + while (!ret && vq->ndescs <= vhost_vq_num_batch_descs(vq)) > ret = fetch_buf(vq); > > return vq->ndescs ? 0 : ret; > @@ -2432,6 +2454,9 @@ EXPORT_SYMBOL_GPL(vhost_get_vq_desc); > /* Reverse the effect of vhost_get_vq_desc. Useful for error handling. */ > void vhost_discard_vq_desc(struct vhost_virtqueue *vq, int n) > { > + pr_debug( > + "DISCARD [vq=%p][vq->last_avail_idx=%u][vq->avail_idx=%u][n=%d]", > + vq, vq->last_avail_idx, vq->avail_idx, n); > vq->last_avail_idx -= n; > } > EXPORT_SYMBOL_GPL(vhost_discard_vq_desc); > diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h > index 661088ae6dc7..e648b9b997d4 100644 > --- a/drivers/vhost/vhost.h > +++ b/drivers/vhost/vhost.h > @@ -102,7 +102,6 @@ struct vhost_virtqueue { > int ndescs; > int first_desc; > int max_descs; > - int batch_descs; > > const struct vhost_umem_node *meta_iotlb[VHOST_NUM_ADDRS]; > struct file *kick; >