On Wed, Aug 03, 2022 at 01:25:15PM +0100, Vladimir Murzin wrote: > On 8/3/22 12:39, Michael S. Tsirkin wrote: > > On Wed, Aug 03, 2022 at 09:57:35AM +0100, Vladimir Murzin wrote: > >> On 8/2/22 13:49, Vladimir Murzin wrote: > >>> Hi Laurent, > >>> > >>> On 10/28/21 11:11, Laurent Vivier wrote: > >>>> If we ensure we have already some data available by enqueuing > >>>> again the buffer once data are exhausted, we can return what we > >>>> have without waiting for the device answer. > >>>> > >>>> Signed-off-by: Laurent Vivier <lvivier@xxxxxxxxxx> > >>>> --- > >>>> drivers/char/hw_random/virtio-rng.c | 26 ++++++++++++-------------- > >>>> 1 file changed, 12 insertions(+), 14 deletions(-) > >>>> > >>>> diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c > >>>> index 8ba97cf4ca8f..0a7dde135db1 100644 > >>>> --- a/drivers/char/hw_random/virtio-rng.c > >>>> +++ b/drivers/char/hw_random/virtio-rng.c > >>>> @@ -20,7 +20,6 @@ struct virtrng_info { > >>>> struct virtqueue *vq; > >>>> char name[25]; > >>>> int index; > >>>> - bool busy; > >>>> bool hwrng_register_done; > >>>> bool hwrng_removed; > >>>> /* data transfer */ > >>>> @@ -44,16 +43,18 @@ static void random_recv_done(struct virtqueue *vq) > >>>> return; > >>>> > >>>> vi->data_idx = 0; > >>>> - vi->busy = false; > >>>> > >>>> complete(&vi->have_data); > >>>> } > >>>> > >>>> -/* The host will fill any buffer we give it with sweet, sweet randomness. */ > >>>> -static void register_buffer(struct virtrng_info *vi) > >>>> +static void request_entropy(struct virtrng_info *vi) > >>>> { > >>>> struct scatterlist sg; > >>>> > >>>> + reinit_completion(&vi->have_data); > >>>> + vi->data_avail = 0; > >>>> + vi->data_idx = 0; > >>>> + > >>>> sg_init_one(&sg, vi->data, sizeof(vi->data)); > >>>> > >>>> /* There should always be room for one buffer. */ > >>>> @@ -69,6 +70,8 @@ static unsigned int copy_data(struct virtrng_info *vi, void *buf, > >>>> memcpy(buf, vi->data + vi->data_idx, size); > >>>> vi->data_idx += size; > >>>> vi->data_avail -= size; > >>>> + if (vi->data_avail == 0) > >>>> + request_entropy(vi); > >>>> return size; > >>>> } > >>>> > >>>> @@ -98,13 +101,7 @@ static int virtio_read(struct hwrng *rng, void *buf, size_t size, bool wait) > >>>> * so either size is 0 or data_avail is 0 > >>>> */ > >>>> while (size != 0) { > >>>> - /* data_avail is 0 */ > >>>> - if (!vi->busy) { > >>>> - /* no pending request, ask for more */ > >>>> - vi->busy = true; > >>>> - reinit_completion(&vi->have_data); > >>>> - register_buffer(vi); > >>>> - } > >>>> + /* data_avail is 0 but a request is pending */ > >>>> ret = wait_for_completion_killable(&vi->have_data); > >>>> if (ret < 0) > >>>> return ret; > >>>> @@ -126,8 +123,7 @@ static void virtio_cleanup(struct hwrng *rng) > >>>> { > >>>> struct virtrng_info *vi = (struct virtrng_info *)rng->priv; > >>>> > >>>> - if (vi->busy) > >>>> - complete(&vi->have_data); > >>>> + complete(&vi->have_data); > >>>> } > >>>> > >>>> static int probe_common(struct virtio_device *vdev) > >>>> @@ -163,6 +159,9 @@ static int probe_common(struct virtio_device *vdev) > >>>> goto err_find; > >>>> } > >>>> > >>>> + /* we always have a pending entropy request */ > >>>> + request_entropy(vi); > >>>> + > >>>> return 0; > >>>> > >>>> err_find: > >>>> @@ -181,7 +180,6 @@ static void remove_common(struct virtio_device *vdev) > >>>> vi->data_idx = 0; > >>>> complete(&vi->have_data); > >>>> vdev->config->reset(vdev); > >>>> - vi->busy = false; > >>>> if (vi->hwrng_register_done) > >>>> hwrng_unregister(&vi->hwrng); > >>>> vdev->config->del_vqs(vdev); > >>> > >>> We observed that after this commit virtio-rng implementation in FVP doesn't > >>> work > >>> > >>> INFO: bp.virtio_rng: Selected Random Generator Device: XORSHIFT DEVICE > >>> INFO: bp.virtio_rng: Using seed value: 0x5674bba8 > >>> Error: FVP_Base_AEMvA: bp.virtio_rng: <vq0-requestq> Found invalid descriptor index > >>> In file: (unknown):0 > >>> In process: FVP_Base_AEMvA.thread_p_12 @ 935500020 ns > >>> Info: FVP_Base_AEMvA: bp.virtio_rng: Could not extract buffer > >>> > >>> while basic baremetal test works as expected > >>> > >>> INFO: bp.virtio_rng: Selected Random Generator Device: XORSHIFT DEVICE > >>> INFO: bp.virtio_rng: Using seed value: 0x541c142e > >>> Info: FVP_Base_AEMv8A: bp.virtio_rng: Generated Number: 0x4b098991ceb377e6 > >>> Info: FVP_Base_AEMv8A: bp.virtio_rng: Generated Number: 0xbdcbe3f765ba62f7 > >>> > >>> We are trying to get an idea what is missing and where, yet none of us familiar > >>> with the driver :( > >>> > >>> I'm looping Kevin who originally reported that and Mauricio who is looking form > >>> the FVP side. > >> > >> With the following diff FVP works agin > >> > >> diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c > >> index a6f3a8a2ac..042503ad6c 100644 > >> --- a/drivers/char/hw_random/virtio-rng.c > >> +++ b/drivers/char/hw_random/virtio-rng.c > >> @@ -54,6 +54,7 @@ static void request_entropy(struct virtrng_info *vi) > >> reinit_completion(&vi->have_data); > >> vi->data_avail = 0; > >> vi->data_idx = 0; > >> + smp_mb(); > >> > >> sg_init_one(&sg, vi->data, sizeof(vi->data)); > >> > >> > >> What do you reckon? > >> > >> Cheers > >> Vladimir > > > > Thanks for debugging this! > > > > OK, interesting. > > > > data_idx and data_avail are accessed from virtio_read. > > > > Which as far as I can tell is invoked just with reading_mutex. > > > > > > But, request_entropy is called from probe when device is registered > > this time without locks > > so it can trigger while another thread is calling virtio_read. > > > > Second request_entropy is called from a callback random_recv_done > > also without locks. > > > > So it's great that smp_mb helped here but I suspect in fact we > > need locking. Laurent? > > > > I'm sorry for the noise, but it looks like I'm seeing issue for some different reasons. > I manage to reproduce issue even with smb_mb() in place. The reason I though it helped > is because I changed both environment and added smb_mb(). > > Anyway, thank you for your time! > > Cheers > Vladimir Well we at least have a race condition found by code review here. Here's a quick hack attempting to fix it. I don't like it much since it adds buffers with GFP_ATOMIC and kicks under a spinlock, but for now we can at least test it. I did a quick build but that's all, a bit rushed now sorry. Would appreciate knowing whether this addresses the issue for you. diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c index a6f3a8a2aca6..36121c8d0315 100644 --- a/drivers/char/hw_random/virtio-rng.c +++ b/drivers/char/hw_random/virtio-rng.c @@ -23,6 +23,7 @@ struct virtrng_info { bool hwrng_register_done; bool hwrng_removed; /* data transfer */ + spinlock_t lock; struct completion have_data; unsigned int data_avail; unsigned int data_idx; @@ -37,6 +38,9 @@ struct virtrng_info { static void random_recv_done(struct virtqueue *vq) { struct virtrng_info *vi = vq->vdev->priv; + unsigned long flags; + + spin_lock_irqsave(&vi->lock, flags); /* We can get spurious callbacks, e.g. shared IRQs + virtio_pci. */ if (!virtqueue_get_buf(vi->vq, &vi->data_avail)) @@ -45,20 +49,20 @@ static void random_recv_done(struct virtqueue *vq) vi->data_idx = 0; complete(&vi->have_data); + spin_unlock_irqrestore(&vi->lock, flags); } static void request_entropy(struct virtrng_info *vi) { struct scatterlist sg; - reinit_completion(&vi->have_data); - vi->data_avail = 0; + BUG_ON(vi->data_avail != 0); vi->data_idx = 0; sg_init_one(&sg, vi->data, sizeof(vi->data)); /* There should always be room for one buffer. */ - virtqueue_add_inbuf(vi->vq, &sg, 1, vi->data, GFP_KERNEL); + virtqueue_add_inbuf(vi->vq, &sg, 1, vi->data, GFP_ATOMIC); virtqueue_kick(vi->vq); } @@ -70,8 +74,10 @@ static unsigned int copy_data(struct virtrng_info *vi, void *buf, memcpy(buf, vi->data + vi->data_idx, size); vi->data_idx += size; vi->data_avail -= size; - if (vi->data_avail == 0) + if (vi->data_avail == 0) { + reinit_completion(&vi->have_data); request_entropy(vi); + } return size; } @@ -81,18 +87,21 @@ static int virtio_read(struct hwrng *rng, void *buf, size_t size, bool wait) struct virtrng_info *vi = (struct virtrng_info *)rng->priv; unsigned int chunk; size_t read; + unsigned long flags; if (vi->hwrng_removed) return -ENODEV; read = 0; + spin_lock_irqsave(&vi->lock, flags); /* copy available data */ if (vi->data_avail) { chunk = copy_data(vi, buf, size); size -= chunk; read += chunk; } + spin_unlock_irqrestore(&vi->lock, flags); if (!wait) return read; @@ -108,12 +117,14 @@ static int virtio_read(struct hwrng *rng, void *buf, size_t size, bool wait) /* if vi->data_avail is 0, we have been interrupted * by a cleanup, but buffer stays in the queue */ + spin_lock_irqsave(&vi->lock, flags); if (vi->data_avail == 0) return read; chunk = copy_data(vi, buf + read, size); size -= chunk; read += chunk; + spin_unlock_irqrestore(&vi->lock, flags); } return read; @@ -122,19 +133,25 @@ static int virtio_read(struct hwrng *rng, void *buf, size_t size, bool wait) static void virtio_cleanup(struct hwrng *rng) { struct virtrng_info *vi = (struct virtrng_info *)rng->priv; + unsigned long flags; + spin_lock_irqsave(&vi->lock, flags); complete(&vi->have_data); + spin_unlock_irqrestore(&vi->lock, flags); } static int probe_common(struct virtio_device *vdev) { int err, index; struct virtrng_info *vi = NULL; + unsigned long flags; vi = kzalloc(sizeof(struct virtrng_info), GFP_KERNEL); if (!vi) return -ENOMEM; + spin_lock_init(&vi->lock); + vi->index = index = ida_simple_get(&rng_index_ida, 0, 0, GFP_KERNEL); if (index < 0) { err = index; @@ -162,7 +179,10 @@ static int probe_common(struct virtio_device *vdev) virtio_device_ready(vdev); /* we always have a pending entropy request */ - request_entropy(vi); + spin_lock_irqsave(&vi->lock, flags); + if (vi->data_avail == 0) + request_entropy(vi); + spin_unlock_irqrestore(&vi->lock, flags); return 0;