On Tue, 2022-11-29 at 23:05 -0800, vdasa@xxxxxxxxxx wrote: > From: Vishnu Dasa <vdasa@xxxxxxxxxx> > > The vmci_dispatch_dgs() tasklet function calls vmci_read_data() > which uses wait_event() resulting in invalid sleep in an atomic > context (and therefore potentially in a deadlock). > > Use threaded irqs to fix this issue and completely remove usage > of tasklets. > > [ 20.264639] BUG: sleeping function called from invalid context at > drivers/misc/vmw_vmci/vmci_guest.c:145 > [ 20.264643] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 762, name: > vmtoolsd > [ 20.264645] preempt_count: 101, expected: 0 > [ 20.264646] RCU nest depth: 0, expected: 0 > [ 20.264647] 1 lock held by vmtoolsd/762: > [ 20.264648] #0: ffff0000874ae440 (sk_lock-AF_VSOCK){+.+.}-{0:0}, at: > vsock_connect+0x60/0x330 [vsock] > [ 20.264658] Preemption disabled at: > [ 20.264659] [<ffff80000151d7d8>] vmci_send_datagram+0x44/0xa0 [vmw_vmci] > [ 20.264665] CPU: 0 PID: 762 Comm: vmtoolsd Not tainted 5.19.0- > 0.rc8.20220727git39c3c396f813.60.fc37.aarch64 #1 > [ 20.264667] Hardware name: VMware, Inc. VBSA/VBSA, BIOS VEFI 12/31/2020 > [ 20.264668] Call trace: > [ 20.264669] dump_backtrace+0xc4/0x130 > [ 20.264672] show_stack+0x24/0x80 > [ 20.264673] dump_stack_lvl+0x88/0xb4 > [ 20.264676] dump_stack+0x18/0x34 > [ 20.264677] __might_resched+0x1a0/0x280 > [ 20.264679] __might_sleep+0x58/0x90 > [ 20.264681] vmci_read_data+0x74/0x120 [vmw_vmci] > [ 20.264683] vmci_dispatch_dgs+0x64/0x204 [vmw_vmci] > [ 20.264686] tasklet_action_common.constprop.0+0x13c/0x150 > [ 20.264688] tasklet_action+0x40/0x50 > [ 20.264689] __do_softirq+0x23c/0x6b4 > [ 20.264690] __irq_exit_rcu+0x104/0x214 > [ 20.264691] irq_exit_rcu+0x1c/0x50 > [ 20.264693] el1_interrupt+0x38/0x6c > [ 20.264695] el1h_64_irq_handler+0x18/0x24 > [ 20.264696] el1h_64_irq+0x68/0x6c > [ 20.264697] preempt_count_sub+0xa4/0xe0 > [ 20.264698] _raw_spin_unlock_irqrestore+0x64/0xb0 > [ 20.264701] vmci_send_datagram+0x7c/0xa0 [vmw_vmci] > [ 20.264703] vmci_datagram_dispatch+0x84/0x100 [vmw_vmci] > [ 20.264706] vmci_datagram_send+0x2c/0x40 [vmw_vmci] > [ 20.264709] vmci_transport_send_control_pkt+0xb8/0x120 > [vmw_vsock_vmci_transport] > [ 20.264711] vmci_transport_connect+0x40/0x7c [vmw_vsock_vmci_transport] > [ 20.264713] vsock_connect+0x278/0x330 [vsock] > [ 20.264715] __sys_connect_file+0x8c/0xc0 > [ 20.264718] __sys_connect+0x84/0xb4 > [ 20.264720] __arm64_sys_connect+0x2c/0x3c > [ 20.264721] invoke_syscall+0x78/0x100 > [ 20.264723] el0_svc_common.constprop.0+0x68/0x124 > [ 20.264724] do_el0_svc+0x38/0x4c > [ 20.264725] el0_svc+0x60/0x180 > [ 20.264726] el0t_64_sync_handler+0x11c/0x150 > [ 20.264728] el0t_64_sync+0x190/0x194 > > Signed-off-by: Vishnu Dasa <vdasa@xxxxxxxxxx> > Suggested-by: Zack Rusin <zackr@xxxxxxxxxx> > Reported-by: Nadav Amit <namit@xxxxxxxxxx> > Reported-by: Nathan Chancellor <nathan@xxxxxxxxxx> > Tested-by: Nathan Chancellor <nathan@xxxxxxxxxx> > Fixes: 463713eb6164 ("VMCI: dma dg: add support for DMA datagrams receive") > Cc: <stable@xxxxxxxxxxxxxxx> # v5.18+ > Cc: VMware PV-Drivers Reviewers <pv-drivers@xxxxxxxxxx> > Cc: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx> > Cc: Bryan Tan <bryantan@xxxxxxxxxx> > --- > drivers/misc/vmw_vmci/vmci_guest.c | 49 ++++++++++++------------------ > 1 file changed, 19 insertions(+), 30 deletions(-) > > diff --git a/drivers/misc/vmw_vmci/vmci_guest.c > b/drivers/misc/vmw_vmci/vmci_guest.c > index aa7b05de97dd..6ab717b4a5db 100644 > --- a/drivers/misc/vmw_vmci/vmci_guest.c > +++ b/drivers/misc/vmw_vmci/vmci_guest.c > @@ -56,8 +56,6 @@ struct vmci_guest_device { > > bool exclusive_vectors; > > - struct tasklet_struct datagram_tasklet; > - struct tasklet_struct bm_tasklet; > struct wait_queue_head inout_wq; > > void *data_buffer; > @@ -304,9 +302,8 @@ static int vmci_check_host_caps(struct pci_dev *pdev) > * This function assumes that it has exclusive access to the data > * in register(s) for the duration of the call. > */ > -static void vmci_dispatch_dgs(unsigned long data) > +static void vmci_dispatch_dgs(struct vmci_guest_device *vmci_dev) > { > - struct vmci_guest_device *vmci_dev = (struct vmci_guest_device *)data; > u8 *dg_in_buffer = vmci_dev->data_buffer; > struct vmci_datagram *dg; > size_t dg_in_buffer_size = VMCI_MAX_DG_SIZE; > @@ -465,10 +462,8 @@ static void vmci_dispatch_dgs(unsigned long data) > * Scans the notification bitmap for raised flags, clears them > * and handles the notifications. > */ > -static void vmci_process_bitmap(unsigned long data) > +static void vmci_process_bitmap(struct vmci_guest_device *dev) > { > - struct vmci_guest_device *dev = (struct vmci_guest_device *)data; > - > if (!dev->notification_bitmap) { > dev_dbg(dev->dev, "No bitmap present in %s\n", __func__); > return; > @@ -486,13 +481,13 @@ static irqreturn_t vmci_interrupt(int irq, void *_dev) > struct vmci_guest_device *dev = _dev; > > /* > - * If we are using MSI-X with exclusive vectors then we simply schedule > - * the datagram tasklet, since we know the interrupt was meant for us. > + * If we are using MSI-X with exclusive vectors then we simply call > + * vmci_dispatch_dgs(), since we know the interrupt was meant for us. > * Otherwise we must read the ICR to determine what to do. > */ > > if (dev->exclusive_vectors) { > - tasklet_schedule(&dev->datagram_tasklet); > + vmci_dispatch_dgs(dev); > } else { > unsigned int icr; > > @@ -502,12 +497,12 @@ static irqreturn_t vmci_interrupt(int irq, void *_dev) > return IRQ_NONE; > > if (icr & VMCI_ICR_DATAGRAM) { > - tasklet_schedule(&dev->datagram_tasklet); > + vmci_dispatch_dgs(dev); > icr &= ~VMCI_ICR_DATAGRAM; > } > > if (icr & VMCI_ICR_NOTIFICATION) { > - tasklet_schedule(&dev->bm_tasklet); > + vmci_process_bitmap(dev); > icr &= ~VMCI_ICR_NOTIFICATION; > } > > @@ -536,7 +531,7 @@ static irqreturn_t vmci_interrupt_bm(int irq, void *_dev) > struct vmci_guest_device *dev = _dev; > > /* For MSI-X we can just assume it was meant for us. */ > - tasklet_schedule(&dev->bm_tasklet); > + vmci_process_bitmap(dev); > > return IRQ_HANDLED; > } > @@ -638,10 +633,6 @@ static int vmci_guest_probe_device(struct pci_dev *pdev, > vmci_dev->iobase = iobase; > vmci_dev->mmio_base = mmio_base; > > - tasklet_init(&vmci_dev->datagram_tasklet, > - vmci_dispatch_dgs, (unsigned long)vmci_dev); > - tasklet_init(&vmci_dev->bm_tasklet, > - vmci_process_bitmap, (unsigned long)vmci_dev); > init_waitqueue_head(&vmci_dev->inout_wq); > > if (mmio_base != NULL) { > @@ -808,8 +799,9 @@ static int vmci_guest_probe_device(struct pci_dev *pdev, > * Request IRQ for legacy or MSI interrupts, or for first > * MSI-X vector. > */ > - error = request_irq(pci_irq_vector(pdev, 0), vmci_interrupt, > - IRQF_SHARED, KBUILD_MODNAME, vmci_dev); > + error = request_threaded_irq(pci_irq_vector(pdev, 0), NULL, > + vmci_interrupt, IRQF_SHARED, > + KBUILD_MODNAME, vmci_dev); > if (error) { > dev_err(&pdev->dev, "Irq %u in use: %d\n", > pci_irq_vector(pdev, 0), error); > @@ -823,9 +815,9 @@ static int vmci_guest_probe_device(struct pci_dev *pdev, > * between the vectors. > */ > if (vmci_dev->exclusive_vectors) { > - error = request_irq(pci_irq_vector(pdev, 1), > - vmci_interrupt_bm, 0, KBUILD_MODNAME, > - vmci_dev); > + error = request_threaded_irq(pci_irq_vector(pdev, 1), NULL, > + vmci_interrupt_bm, 0, > + KBUILD_MODNAME, vmci_dev); > if (error) { > dev_err(&pdev->dev, > "Failed to allocate irq %u: %d\n", > @@ -833,9 +825,11 @@ static int vmci_guest_probe_device(struct pci_dev *pdev, > goto err_free_irq; > } > if (caps_in_use & VMCI_CAPS_DMA_DATAGRAM) { > - error = request_irq(pci_irq_vector(pdev, 2), > - vmci_interrupt_dma_datagram, > - 0, KBUILD_MODNAME, vmci_dev); > + error = request_threaded_irq(pci_irq_vector(pdev, 2), > + NULL, > + vmci_interrupt_dma_datagram, > + 0, KBUILD_MODNAME, > + vmci_dev); > if (error) { > dev_err(&pdev->dev, > "Failed to allocate irq %u: %d\n", > @@ -871,8 +865,6 @@ static int vmci_guest_probe_device(struct pci_dev *pdev, > > err_free_irq: > free_irq(pci_irq_vector(pdev, 0), vmci_dev); > - tasklet_kill(&vmci_dev->datagram_tasklet); > - tasklet_kill(&vmci_dev->bm_tasklet); > > err_disable_msi: > pci_free_irq_vectors(pdev); > @@ -943,9 +935,6 @@ static void vmci_guest_remove_device(struct pci_dev *pdev) > free_irq(pci_irq_vector(pdev, 0), vmci_dev); > pci_free_irq_vectors(pdev); > > - tasklet_kill(&vmci_dev->datagram_tasklet); > - tasklet_kill(&vmci_dev->bm_tasklet); > - > if (vmci_dev->notification_bitmap) { > /* > * The device reset above cleared the bitmap state of the I'm happy to see this fixed! Reviewed-by: Zack Rusin <zackr@xxxxxxxxxx> z