On 8/9/22 21:16, Michael S. Tsirkin wrote: > On Wed, Aug 03, 2022 at 02:12:25PM +0100, Vladimir Murzin wrote: >> On 8/3/22 13:55, Michael S. Tsirkin wrote: >>> On Wed, Aug 03, 2022 at 01:25:15PM +0100, Vladimir Murzin wrote: >>>> On 8/3/22 12:39, Michael S. Tsirkin wrote: >>>>> On Wed, Aug 03, 2022 at 09:57:35AM +0100, Vladimir Murzin wrote: >>>>>> On 8/2/22 13:49, Vladimir Murzin wrote: >>>>>>> Hi Laurent, >>>>>>> >>>>>>> On 10/28/21 11:11, Laurent Vivier wrote: >>>>>>>> If we ensure we have already some data available by enqueuing >>>>>>>> again the buffer once data are exhausted, we can return what we >>>>>>>> have without waiting for the device answer. >>>>>>>> >>>>>>>> Signed-off-by: Laurent Vivier <lvivier@xxxxxxxxxx> >>>>>>>> --- >>>>>>>> drivers/char/hw_random/virtio-rng.c | 26 ++++++++++++-------------- >>>>>>>> 1 file changed, 12 insertions(+), 14 deletions(-) >>>>>>>> >>>>>>>> diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c >>>>>>>> index 8ba97cf4ca8f..0a7dde135db1 100644 >>>>>>>> --- a/drivers/char/hw_random/virtio-rng.c >>>>>>>> +++ b/drivers/char/hw_random/virtio-rng.c >>>>>>>> @@ -20,7 +20,6 @@ struct virtrng_info { >>>>>>>> struct virtqueue *vq; >>>>>>>> char name[25]; >>>>>>>> int index; >>>>>>>> - bool busy; >>>>>>>> bool hwrng_register_done; >>>>>>>> bool hwrng_removed; >>>>>>>> /* data transfer */ >>>>>>>> @@ -44,16 +43,18 @@ static void random_recv_done(struct virtqueue *vq) >>>>>>>> return; >>>>>>>> >>>>>>>> vi->data_idx = 0; >>>>>>>> - vi->busy = false; >>>>>>>> >>>>>>>> complete(&vi->have_data); >>>>>>>> } >>>>>>>> >>>>>>>> -/* The host will fill any buffer we give it with sweet, sweet randomness. */ >>>>>>>> -static void register_buffer(struct virtrng_info *vi) >>>>>>>> +static void request_entropy(struct virtrng_info *vi) >>>>>>>> { >>>>>>>> struct scatterlist sg; >>>>>>>> >>>>>>>> + reinit_completion(&vi->have_data); >>>>>>>> + vi->data_avail = 0; >>>>>>>> + vi->data_idx = 0; >>>>>>>> + >>>>>>>> sg_init_one(&sg, vi->data, sizeof(vi->data)); >>>>>>>> >>>>>>>> /* There should always be room for one buffer. */ >>>>>>>> @@ -69,6 +70,8 @@ static unsigned int copy_data(struct virtrng_info *vi, void *buf, >>>>>>>> memcpy(buf, vi->data + vi->data_idx, size); >>>>>>>> vi->data_idx += size; >>>>>>>> vi->data_avail -= size; >>>>>>>> + if (vi->data_avail == 0) >>>>>>>> + request_entropy(vi); >>>>>>>> return size; >>>>>>>> } >>>>>>>> >>>>>>>> @@ -98,13 +101,7 @@ static int virtio_read(struct hwrng *rng, void *buf, size_t size, bool wait) >>>>>>>> * so either size is 0 or data_avail is 0 >>>>>>>> */ >>>>>>>> while (size != 0) { >>>>>>>> - /* data_avail is 0 */ >>>>>>>> - if (!vi->busy) { >>>>>>>> - /* no pending request, ask for more */ >>>>>>>> - vi->busy = true; >>>>>>>> - reinit_completion(&vi->have_data); >>>>>>>> - register_buffer(vi); >>>>>>>> - } >>>>>>>> + /* data_avail is 0 but a request is pending */ >>>>>>>> ret = wait_for_completion_killable(&vi->have_data); >>>>>>>> if (ret < 0) >>>>>>>> return ret; >>>>>>>> @@ -126,8 +123,7 @@ static void virtio_cleanup(struct hwrng *rng) >>>>>>>> { >>>>>>>> struct virtrng_info *vi = (struct virtrng_info *)rng->priv; >>>>>>>> >>>>>>>> - if (vi->busy) >>>>>>>> - complete(&vi->have_data); >>>>>>>> + complete(&vi->have_data); >>>>>>>> } >>>>>>>> >>>>>>>> static int probe_common(struct virtio_device *vdev) >>>>>>>> @@ -163,6 +159,9 @@ static int probe_common(struct virtio_device *vdev) >>>>>>>> goto err_find; >>>>>>>> } >>>>>>>> >>>>>>>> + /* we always have a pending entropy request */ >>>>>>>> + request_entropy(vi); >>>>>>>> + >>>>>>>> return 0; >>>>>>>> >>>>>>>> err_find: >>>>>>>> @@ -181,7 +180,6 @@ static void remove_common(struct virtio_device *vdev) >>>>>>>> vi->data_idx = 0; >>>>>>>> complete(&vi->have_data); >>>>>>>> vdev->config->reset(vdev); >>>>>>>> - vi->busy = false; >>>>>>>> if (vi->hwrng_register_done) >>>>>>>> hwrng_unregister(&vi->hwrng); >>>>>>>> vdev->config->del_vqs(vdev); >>>>>>> >>>>>>> We observed that after this commit virtio-rng implementation in FVP doesn't >>>>>>> work >>>>>>> >>>>>>> INFO: bp.virtio_rng: Selected Random Generator Device: XORSHIFT DEVICE >>>>>>> INFO: bp.virtio_rng: Using seed value: 0x5674bba8 >>>>>>> Error: FVP_Base_AEMvA: bp.virtio_rng: <vq0-requestq> Found invalid descriptor index >>>>>>> In file: (unknown):0 >>>>>>> In process: FVP_Base_AEMvA.thread_p_12 @ 935500020 ns >>>>>>> Info: FVP_Base_AEMvA: bp.virtio_rng: Could not extract buffer >>>>>>> >>>>>>> while basic baremetal test works as expected >>>>>>> >>>>>>> INFO: bp.virtio_rng: Selected Random Generator Device: XORSHIFT DEVICE >>>>>>> INFO: bp.virtio_rng: Using seed value: 0x541c142e >>>>>>> Info: FVP_Base_AEMv8A: bp.virtio_rng: Generated Number: 0x4b098991ceb377e6 >>>>>>> Info: FVP_Base_AEMv8A: bp.virtio_rng: Generated Number: 0xbdcbe3f765ba62f7 >>>>>>> >>>>>>> We are trying to get an idea what is missing and where, yet none of us familiar >>>>>>> with the driver :( >>>>>>> >>>>>>> I'm looping Kevin who originally reported that and Mauricio who is looking form >>>>>>> the FVP side. >>>>>> >>>>>> With the following diff FVP works agin >>>>>> >>>>>> diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c >>>>>> index a6f3a8a2ac..042503ad6c 100644 >>>>>> --- a/drivers/char/hw_random/virtio-rng.c >>>>>> +++ b/drivers/char/hw_random/virtio-rng.c >>>>>> @@ -54,6 +54,7 @@ static void request_entropy(struct virtrng_info *vi) >>>>>> reinit_completion(&vi->have_data); >>>>>> vi->data_avail = 0; >>>>>> vi->data_idx = 0; >>>>>> + smp_mb(); >>>>>> >>>>>> sg_init_one(&sg, vi->data, sizeof(vi->data)); >>>>>> >>>>>> >>>>>> What do you reckon? >>>>>> >>>>>> Cheers >>>>>> Vladimir >>>>> >>>>> Thanks for debugging this! >>>>> >>>>> OK, interesting. >>>>> >>>>> data_idx and data_avail are accessed from virtio_read. >>>>> >>>>> Which as far as I can tell is invoked just with reading_mutex. >>>>> >>>>> >>>>> But, request_entropy is called from probe when device is registered >>>>> this time without locks >>>>> so it can trigger while another thread is calling virtio_read. >>>>> >>>>> Second request_entropy is called from a callback random_recv_done >>>>> also without locks. >>>>> >>>>> So it's great that smp_mb helped here but I suspect in fact we >>>>> need locking. Laurent? >>>>> >>>> >>>> I'm sorry for the noise, but it looks like I'm seeing issue for some different reasons. >>>> I manage to reproduce issue even with smb_mb() in place. The reason I though it helped >>>> is because I changed both environment and added smb_mb(). >>>> >>>> Anyway, thank you for your time! >>>> >>>> Cheers >>>> Vladimir >>> >>> Well we at least have a race condition found by code review here. Here's >>> a quick hack attempting to fix it. I don't like it much since >>> it adds buffers with GFP_ATOMIC and kicks under a spinlock, but >>> for now we can at least test it. I did a quick build but that's >>> all, a bit rushed now sorry. Would appreciate knowing whether >>> this addresses the issue for you. >>> >>> >>> diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c >>> index a6f3a8a2aca6..36121c8d0315 100644 >>> --- a/drivers/char/hw_random/virtio-rng.c >>> +++ b/drivers/char/hw_random/virtio-rng.c >>> @@ -23,6 +23,7 @@ struct virtrng_info { >>> bool hwrng_register_done; >>> bool hwrng_removed; >>> /* data transfer */ >>> + spinlock_t lock; >>> struct completion have_data; >>> unsigned int data_avail; >>> unsigned int data_idx; >>> @@ -37,6 +38,9 @@ struct virtrng_info { >>> static void random_recv_done(struct virtqueue *vq) >>> { >>> struct virtrng_info *vi = vq->vdev->priv; >>> + unsigned long flags; >>> + >>> + spin_lock_irqsave(&vi->lock, flags); >>> >>> /* We can get spurious callbacks, e.g. shared IRQs + virtio_pci. */ >>> if (!virtqueue_get_buf(vi->vq, &vi->data_avail)) >>> @@ -45,20 +49,20 @@ static void random_recv_done(struct virtqueue *vq) >>> vi->data_idx = 0; >>> >>> complete(&vi->have_data); >>> + spin_unlock_irqrestore(&vi->lock, flags); >>> } >>> >>> static void request_entropy(struct virtrng_info *vi) >>> { >>> struct scatterlist sg; >>> >>> - reinit_completion(&vi->have_data); >>> - vi->data_avail = 0; >>> + BUG_ON(vi->data_avail != 0); >>> vi->data_idx = 0; >>> >>> sg_init_one(&sg, vi->data, sizeof(vi->data)); >>> >>> /* There should always be room for one buffer. */ >>> - virtqueue_add_inbuf(vi->vq, &sg, 1, vi->data, GFP_KERNEL); >>> + virtqueue_add_inbuf(vi->vq, &sg, 1, vi->data, GFP_ATOMIC); >>> >>> virtqueue_kick(vi->vq); >>> } >>> @@ -70,8 +74,10 @@ static unsigned int copy_data(struct virtrng_info *vi, void *buf, >>> memcpy(buf, vi->data + vi->data_idx, size); >>> vi->data_idx += size; >>> vi->data_avail -= size; >>> - if (vi->data_avail == 0) >>> + if (vi->data_avail == 0) { >>> + reinit_completion(&vi->have_data); >>> request_entropy(vi); >>> + } >>> return size; >>> } >>> >>> @@ -81,18 +87,21 @@ static int virtio_read(struct hwrng *rng, void *buf, size_t size, bool wait) >>> struct virtrng_info *vi = (struct virtrng_info *)rng->priv; >>> unsigned int chunk; >>> size_t read; >>> + unsigned long flags; >>> >>> if (vi->hwrng_removed) >>> return -ENODEV; >>> >>> read = 0; >>> >>> + spin_lock_irqsave(&vi->lock, flags); >>> /* copy available data */ >>> if (vi->data_avail) { >>> chunk = copy_data(vi, buf, size); >>> size -= chunk; >>> read += chunk; >>> } >>> + spin_unlock_irqrestore(&vi->lock, flags); >>> >>> if (!wait) >>> return read; >>> @@ -108,12 +117,14 @@ static int virtio_read(struct hwrng *rng, void *buf, size_t size, bool wait) >>> /* if vi->data_avail is 0, we have been interrupted >>> * by a cleanup, but buffer stays in the queue >>> */ >>> + spin_lock_irqsave(&vi->lock, flags); >>> if (vi->data_avail == 0) >>> return read; >>> >>> chunk = copy_data(vi, buf + read, size); >>> size -= chunk; >>> read += chunk; >>> + spin_unlock_irqrestore(&vi->lock, flags); >>> } >>> >>> return read; >>> @@ -122,19 +133,25 @@ static int virtio_read(struct hwrng *rng, void *buf, size_t size, bool wait) >>> static void virtio_cleanup(struct hwrng *rng) >>> { >>> struct virtrng_info *vi = (struct virtrng_info *)rng->priv; >>> + unsigned long flags; >>> >>> + spin_lock_irqsave(&vi->lock, flags); >>> complete(&vi->have_data); >>> + spin_unlock_irqrestore(&vi->lock, flags); >>> } >>> >>> static int probe_common(struct virtio_device *vdev) >>> { >>> int err, index; >>> struct virtrng_info *vi = NULL; >>> + unsigned long flags; >>> >>> vi = kzalloc(sizeof(struct virtrng_info), GFP_KERNEL); >>> if (!vi) >>> return -ENOMEM; >>> >>> + spin_lock_init(&vi->lock); >>> + >>> vi->index = index = ida_simple_get(&rng_index_ida, 0, 0, GFP_KERNEL); >>> if (index < 0) { >>> err = index; >>> @@ -162,7 +179,10 @@ static int probe_common(struct virtio_device *vdev) >>> virtio_device_ready(vdev); >>> >>> /* we always have a pending entropy request */ >>> - request_entropy(vi); >>> + spin_lock_irqsave(&vi->lock, flags); >>> + if (vi->data_avail == 0) >>> + request_entropy(vi); >>> + spin_unlock_irqrestore(&vi->lock, flags); >>> >>> return 0; >>> >>> >> >> Thanks a lot! I gave it a go and it did not help. I think I need to find out how exactly >> my environment affected... it not necessary need to be kernel related. > > Okay ... I'll wait to hear your report then. You are saying maybe > there's no bug in kernel, something else changed in your environment? > Yes, I noticed that if I swap bootloader then problem goes away, so unlikely kernel issue. I passed reproducer to people working on a model, so they can investigate from the other side. So far feel free to ignore my report. Cheers Vladimir