On Wed, Mar 08, 2023 at 06:03:01PM +0900, Damien Le Moal wrote: > pci_epf_test_data_transfer() and pci_epf_test_dma_callback() are not > handling DMA transfer completion correctly, leading to completion > notifications to the RP side that are too early. This problem can be Can you say RC which stands for Root Complex instead of RP? > detected when the RP side is running an IOMMU with messages such as: > > pci-endpoint-test 0000:0b:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT > domain=0x001c address=0xfff00000 flags=0x0000] > > When running the pcitest.sh tests: the address used for a previous > test transfer generates the above error while the next test transfer is > running. > > Fix this by testing the dma transfer status in > pci_epf_test_dma_callback() and notifying the completion only when the > transfer status is DMA_COMPLETE or DMA_ERROR. Furthermore, in > pci_epf_test_data_transfer(), be paranoid and check again the transfer > status and always call dmaengine_terminate_sync() before returning. > > Fixes: 8353813c88ef ("PCI: endpoint: Enable DMA tests for endpoints with DMA capabilities") > Signed-off-by: Damien Le Moal <damien.lemoal@xxxxxxxxxxxxxxxxxx> > --- > drivers/pci/endpoint/functions/pci-epf-test.c | 40 ++++++++++++++----- > 1 file changed, 29 insertions(+), 11 deletions(-) > > diff --git a/drivers/pci/endpoint/functions/pci-epf-test.c b/drivers/pci/endpoint/functions/pci-epf-test.c > index d65419735d2e..557fbb91c729 100644 > --- a/drivers/pci/endpoint/functions/pci-epf-test.c > +++ b/drivers/pci/endpoint/functions/pci-epf-test.c > @@ -54,6 +54,9 @@ struct pci_epf_test { > struct delayed_work cmd_handler; > struct dma_chan *dma_chan_tx; > struct dma_chan *dma_chan_rx; > + struct dma_chan *transfer_chan; > + dma_cookie_t transfer_cookie; > + enum dma_status transfer_status; > struct completion transfer_complete; > bool dma_supported; > bool dma_private; > @@ -85,8 +88,14 @@ static size_t bar_size[] = { 512, 512, 1024, 16384, 131072, 1048576 }; > static void pci_epf_test_dma_callback(void *param) > { > struct pci_epf_test *epf_test = param; > - > - complete(&epf_test->transfer_complete); > + struct dma_tx_state state; > + > + epf_test->transfer_status = > + dmaengine_tx_status(epf_test->transfer_chan, > + epf_test->transfer_cookie, &state); > + if (epf_test->transfer_status == DMA_COMPLETE || > + epf_test->transfer_status == DMA_ERROR) > + complete(&epf_test->transfer_complete); > } > > /** > @@ -120,7 +129,6 @@ static int pci_epf_test_data_transfer(struct pci_epf_test *epf_test, > struct dma_async_tx_descriptor *tx; > struct dma_slave_config sconf = {}; > struct device *dev = &epf->dev; > - dma_cookie_t cookie; > int ret; > > if (IS_ERR_OR_NULL(chan)) { > @@ -152,25 +160,35 @@ static int pci_epf_test_data_transfer(struct pci_epf_test *epf_test, > } > > reinit_completion(&epf_test->transfer_complete); > + epf_test->transfer_chan = chan; > tx->callback = pci_epf_test_dma_callback; > tx->callback_param = epf_test; > - cookie = tx->tx_submit(tx); > + epf_test->transfer_cookie = tx->tx_submit(tx); > > - ret = dma_submit_error(cookie); > + ret = dma_submit_error(epf_test->transfer_cookie); > if (ret) { > - dev_err(dev, "Failed to do DMA tx_submit %d\n", cookie); > - return -EIO; > + dev_err(dev, "Failed to do DMA tx_submit %d\n", ret); > + goto terminate; > } > > dma_async_issue_pending(chan); > ret = wait_for_completion_interruptible(&epf_test->transfer_complete); > if (ret < 0) { > - dmaengine_terminate_sync(chan); > - dev_err(dev, "DMA wait_for_completion_timeout\n"); > - return -ETIMEDOUT; > + dev_err(dev, "DMA wait_for_completion interrupted\n"); > + goto terminate; > } > > - return 0; > + if (epf_test->transfer_status == DMA_ERROR) { > + dev_err(dev, "DMA transfer failed\n"); > + ret = -EIO; > + } > + > + WARN_ON(epf_test->transfer_status != DMA_COMPLETE); Why do you need this check? Even if required, WARN_ON is superfluous here. Thanks, Mani > + > +terminate: > + dmaengine_terminate_sync(chan); > + > + return ret; > } > > struct epf_dma_filter { > -- > 2.39.2 > -- மணிவண்ணன் சதாசிவம்