Re: [PATCH 6/7] spi: bcm2835: Overcome sglist entry length limitation

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, Nov 08, 2018 at 08:06:10AM +0100, Lukas Wunner wrote:
> When in DMA mode, the BCM2835 SPI controller requires that the FIFO is
> accessed in 4 byte chunks.  This rule is not fulfilled if a transfer
> consists of multiple sglist entries, one per page, and the first entry
> starts in the middle of a page with an offset not a multiple of 4.
> 
> The driver currently falls back to programmed I/O for such transfers,
> incurring a significant performance penalty.
> 
> Overcome this hardware limitation by transferring the first few bytes of
> a transfer without DMA such that the remainder of the first sglist entry
> becomes a multiple of 4.

FWIW the patch below is what I used to test and debug this.

It copies the transfer buffers to a vmalloc'ed area to force the SPI core
to create one sglist entry per page.  The offset within the first page
can be set with the BCM2835_SPI_FIRST_TX_LEN and BCM2835_SPI_FIRST_RX_LEN
macros to test various pathological alignments or force spillover of the
TX prologue to the second sglist entry.

I'm just leaving this here in case anyone else wants to perform further
in-depth testing.

-- >8 --

diff --git a/drivers/spi/spi-bcm2835.c b/drivers/spi/spi-bcm2835.c
index fd9a73963f8c..0f87bf0b18b2 100644
--- a/drivers/spi/spi-bcm2835.c
+++ b/drivers/spi/spi-bcm2835.c
@@ -112,6 +112,7 @@ struct bcm2835_spi {
 	int rx_prologue;
 	bool tx_spillover;
 	bool dma_pending;
+	void *vm, *tx_buf_saved, *rx_buf_saved;
 };
 
 static inline u32 bcm2835_rd(struct bcm2835_spi *bs, unsigned reg)
@@ -346,6 +347,22 @@ static int bcm2835_spi_transfer_one_irq(struct spi_master *master,
  * list making it slightly unpractical...
  */
 
+static void dump_sg(struct spi_transfer *tfr)
+{
+	struct scatterlist *sgl;
+	int i;
+
+	for_each_sg(tfr->tx_sg.sgl, sgl, tfr->tx_sg.nents, i)
+		pr_info("tx sgl %d: addr=%#x, len=%u, multiple of 4? %d\n", i,
+			sg_dma_address(sgl), sg_dma_len(sgl),
+			!(sg_dma_len(sgl) % 4));
+
+	for_each_sg(tfr->rx_sg.sgl, sgl, tfr->rx_sg.nents, i)
+		pr_info("rx sgl %d: addr=%#x, len=%u, multiple of 4? %d\n", i,
+			sg_dma_address(sgl), sg_dma_len(sgl),
+			!(sg_dma_len(sgl) % 4));
+}
+
 /**
  * bcm2835_spi_transfer_prologue() - transfer first few bytes without DMA
  * @master: SPI master
@@ -425,6 +442,12 @@ static void bcm2835_spi_transfer_prologue(struct spi_master *master,
 	if (!bs->tx_prologue)
 		return;
 
+
+	pr_info("tx_prologue=%d, rx_prologue=%d, tx_spillover=%d\n",
+		bs->tx_prologue, bs->rx_prologue, bs->tx_spillover);
+	// pr_info("before:\n");
+	// dump_sg(tfr);
+
 	/* Write and read RX prologue.  Adjust first entry in RX sglist. */
 	if (bs->rx_prologue) {
 		bcm2835_wr(bs, BCM2835_SPI_DLEN, bs->rx_prologue);
@@ -494,6 +517,10 @@ static void bcm2835_spi_undo_prologue(struct bcm2835_spi *bs)
 		tfr->tx_sg.sgl[1].dma_address -= 4;
 		tfr->tx_sg.sgl[1].length      += 4;
 	}
+
+	// pr_info("after:\n");
+	// dump_sg(tfr);
+
 }
 
 static void bcm2835_spi_dma_done(void *data)
@@ -514,6 +541,18 @@ static void bcm2835_spi_dma_done(void *data)
 		bcm2835_spi_undo_prologue(bs);
 	}
 
+	/* if (bs->tx_prologue) {
+		struct spi_transfer *tfr = bs->tfr;
+		struct scatterlist *sgl;
+		int i;
+
+		pr_info("rx dump:\n");
+		for_each_sg(tfr->rx_sg.sgl, sgl, tfr->rx_sg.nents, i) {
+			pr_info("rx sgl %d: paddr=%#x addr=%p len=%u\n", i, sg_dma_address(sgl), sg_virt(sgl), sg_dma_len(sgl));
+			print_hex_dump(KERN_INFO, "rx ", DUMP_PREFIX_OFFSET, 16, 1, sg_virt(sgl), sg_dma_len(sgl), true);
+		}
+	} */
+
 	/* and mark as completed */;
 	complete(&master->xfer_completion);
 }
@@ -842,6 +881,8 @@ static int bcm2835_spi_prepare_message(struct spi_master *master,
 	struct spi_device *spi = msg->spi;
 	struct bcm2835_spi *bs = spi_master_get_devdata(master);
 	u32 cs = bcm2835_rd(bs, BCM2835_SPI_CS);
+	struct spi_transfer *xfer;
+	void *ptr;
 
 	cs &= ~(BCM2835_SPI_CS_CPOL | BCM2835_SPI_CS_CPHA);
 
@@ -852,6 +893,54 @@ static int bcm2835_spi_prepare_message(struct spi_master *master,
 
 	bcm2835_wr(bs, BCM2835_SPI_CS, cs);
 
+#define BCM2835_SPI_FIRST_TX_LEN 2040
+#define BCM2835_SPI_FIRST_RX_LEN 43
+
+	list_for_each_entry(xfer, &msg->transfers, transfer_list) {
+		if (xfer->len < BCM2835_SPI_DMA_MIN_LENGTH ||
+		    xfer->len > PAGE_SIZE)
+			continue;
+
+		ptr = bs->vm +      PAGE_SIZE - BCM2835_SPI_FIRST_TX_LEN;
+		if (xfer->tx_buf)
+			memcpy(ptr, xfer->tx_buf, xfer->len);
+		else
+			memset(ptr, 0x00, xfer->len);
+		bs->tx_buf_saved = (void *)xfer->tx_buf;
+		xfer->tx_buf = ptr;
+
+		ptr = bs->vm + (3 * PAGE_SIZE) - BCM2835_SPI_FIRST_RX_LEN;
+		memset(ptr, 0xf4, xfer->len);
+		bs->rx_buf_saved = xfer->rx_buf;
+		xfer->rx_buf = ptr;
+		// pr_info("copied to vmalloc area\n");
+		break;
+	}
+
+	return 0;
+}
+
+static int bcm2835_spi_unprepare_message(struct spi_master *master,
+					 struct spi_message *msg)
+{
+	struct bcm2835_spi *bs = spi_master_get_devdata(master);
+	struct spi_transfer *xfer;
+
+	list_for_each_entry(xfer, &msg->transfers, transfer_list) {
+		if (xfer->len < BCM2835_SPI_DMA_MIN_LENGTH ||
+		    xfer->len > PAGE_SIZE)
+			continue;
+
+		xfer->tx_buf = bs->tx_buf_saved;
+
+		// pr_info("%s:memcpy(%#x, %#x, %u)\n", __func__, bs->rx_buf_saved, xfer->rx_buf, xfer->len);
+		if (bs->rx_buf_saved)
+			memcpy(bs->rx_buf_saved, xfer->rx_buf, xfer->len);
+		xfer->rx_buf = bs->rx_buf_saved;
+		// pr_info("copied from vmalloc area\n");
+		break;
+	}
+
 	return 0;
 }
 
@@ -944,29 +1033,36 @@ static int bcm2835_spi_probe(struct platform_device *pdev)
 	master->transfer_one = bcm2835_spi_transfer_one;
 	master->handle_err = bcm2835_spi_handle_err;
 	master->prepare_message = bcm2835_spi_prepare_message;
+	master->unprepare_message = bcm2835_spi_unprepare_message;
 	master->dev.of_node = pdev->dev.of_node;
 
 	bs = spi_master_get_devdata(master);
 
+	bs->vm = vmalloc(4 * PAGE_SIZE);
+	if (!bs->vm) {
+		err = -ENOMEM;
+		goto out_master_put;
+	}
+
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	bs->regs = devm_ioremap_resource(&pdev->dev, res);
 	if (IS_ERR(bs->regs)) {
 		err = PTR_ERR(bs->regs);
-		goto out_master_put;
+		goto out_free_vm;
 	}
 
 	bs->clk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(bs->clk)) {
 		err = PTR_ERR(bs->clk);
 		dev_err(&pdev->dev, "could not get clk: %d\n", err);
-		goto out_master_put;
+		goto out_free_vm;
 	}
 
 	bs->irq = platform_get_irq(pdev, 0);
 	if (bs->irq <= 0) {
 		dev_err(&pdev->dev, "could not get IRQ: %d\n", bs->irq);
 		err = bs->irq ? bs->irq : -ENODEV;
-		goto out_master_put;
+		goto out_free_vm;
 	}
 
 	clk_prepare_enable(bs->clk);
@@ -994,6 +1090,8 @@ static int bcm2835_spi_probe(struct platform_device *pdev)
 
 out_clk_disable:
 	clk_disable_unprepare(bs->clk);
+out_free_vm:
+	vfree(bs->vm);
 out_master_put:
 	spi_master_put(master);
 	return err;
@@ -1012,6 +1110,8 @@ static int bcm2835_spi_remove(struct platform_device *pdev)
 
 	bcm2835_dma_release(master);
 
+	vfree(bs->vm);
+
 	return 0;
 }
 



[Index of Archives]     [Linux Kernel]     [Linux ARM (vger)]     [Linux ARM MSM]     [Linux Omap]     [Linux Arm]     [Linux Tegra]     [Fedora ARM]     [Linux for Samsung SOC]     [eCos]     [Linux Fastboot]     [Gcc Help]     [Git]     [DCCP]     [IETF Announce]     [Security]     [Linux MIPS]     [Yosemite Campsites]

  Powered by Linux