In Raspberry-pi CM4 devices with BCM2711 processor, the documentation points to a limitation with 64-bit accesses. Using memcpy_fromio and memcpy_toio for each 64-bit SPI read/write causes the first 4 bytes to be repeated. To address the limitation, each read/write is limited to 4 bytes in case of BCM2711 processors. On x64 systems, using memcpy_toio and memcpy_fromio results in 4-byte TLP writes instead of 8-byte. Add the custom IO write and read for enabling 64-bit access by default. Tested and verified performance improvement on x64 devices while transferring 1024 bytes for 20000 iterations at 25 MHz clock frequency: Test 1: With memcpy_fromio and memcpy_toio spi mode: 0x0 bits per word: 8 max speed: 25000000 Hz (25000 kHz) rate: tx 6232.5kbps, rx 6232.5kbps rate: tx 6889.5kbps, rx 6889.5kbps rate: tx 6765.0kbps, rx 6765.0kbps rate: tx 6873.1kbps, rx 6873.1kbps total: tx 20000.0KB, rx 20000.0KB Test 2: With the custom IO write and read spi mode: 0x0 bits per word: 8 max speed: 25000000 Hz (25000 kHz) rate: tx 9774.7kbps, rx 9774.7kbps rate: tx 10985.5kbps, rx 10985.5kbps rate: tx 10749.5kbps, rx 10749.5kbps total: tx 20000.0KB, rx 20000.0KB Signed-off-by: Rengarajan S <rengarajan.s@xxxxxxxxxxxxx> --- drivers/spi/spi-pci1xxxx.c | 95 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 91 insertions(+), 4 deletions(-) diff --git a/drivers/spi/spi-pci1xxxx.c b/drivers/spi/spi-pci1xxxx.c index fc98979eba48..ae1d76f03268 100644 --- a/drivers/spi/spi-pci1xxxx.c +++ b/drivers/spi/spi-pci1xxxx.c @@ -12,6 +12,7 @@ #include <linux/irq.h> #include <linux/module.h> #include <linux/msi.h> +#include <linux/of.h> #include <linux/pci_regs.h> #include <linux/pci.h> #include <linux/spinlock.h> @@ -407,6 +408,68 @@ static void pci1xxxx_start_spi_xfer(struct pci1xxxx_spi_internal *p, u8 hw_inst) writel(regval, p->parent->reg_base + SPI_MST_CTL_REG_OFFSET(hw_inst)); } +static void pci1xxxx_spi_write_to_io(void __iomem *to, const void *from, + size_t count, size_t size) +{ + while (count) { + if (size == 8 && (IS_ALIGNED((unsigned long)to, 8)) && + count >= 8) { + __raw_writeq(*(u64 *)from, to); + from += 8; + to += 8; + count -= 8; + } else if (size >= 4 && (IS_ALIGNED((unsigned long)to, 4)) && + count >= 4) { + __raw_writel(*(u32 *)from, to); + from += 4; + to += 4; + count -= 4; + } else if (size >= 2 && (IS_ALIGNED((unsigned long)to, 2)) && + count >= 2) { + __raw_writew(*(u16 *)from, to); + from += 2; + to += 2; + count -= 2; + } else { + __raw_writeb(*(u8 *)from, to); + from += 1; + to += 1; + count -= 1; + } + } +} + +static void pci1xxxx_spi_read_from_io(void *to, const void __iomem *from, + size_t count, size_t size) +{ + while (count) { + if (size == 8 && (IS_ALIGNED((unsigned long)from, 8)) && + count >= 8) { + *(u64 *)to = __raw_readq(from); + from += 8; + to += 8; + count -= 8; + } else if (size >= 4 && (IS_ALIGNED((unsigned long)from, 4)) && + count >= 4) { + *(u32 *)to = __raw_readl(from); + from += 4; + to += 4; + count -= 4; + } else if (size >= 2 && (IS_ALIGNED((unsigned long)from, 2)) && + count >= 2) { + *(u16 *)to = __raw_readw(from); + from += 2; + to += 2; + count -= 2; + } else { + *(u8 *)to = __raw_readb(from); + from += 1; + to += 1; + count -= 1; + } + } +} + static int pci1xxxx_spi_transfer_with_io(struct spi_controller *spi_ctlr, struct spi_device *spi, struct spi_transfer *xfer) { @@ -444,8 +507,23 @@ static int pci1xxxx_spi_transfer_with_io(struct spi_controller *spi_ctlr, len = transfer_len % SPI_MAX_DATA_LEN; reinit_completion(&p->spi_xfer_done); - memcpy_toio(par->reg_base + SPI_MST_CMD_BUF_OFFSET(p->hw_inst), - &tx_buf[bytes_transfered], len); + /* + * Raspberry Pi CM4 BCM2711 doesn't support 64-bit + * accesses. + */ + if (of_machine_is_compatible("brcm,bcm2711")) { + pci1xxxx_spi_write_to_io(par->reg_base + + SPI_MST_CMD_BUF_OFFSET + (p->hw_inst), + &tx_buf[bytes_transfered], + len, 4); + } else { + pci1xxxx_spi_write_to_io(par->reg_base + + SPI_MST_CMD_BUF_OFFSET + (p->hw_inst), + &tx_buf[bytes_transfered], + len, 8); + } bytes_transfered += len; pci1xxxx_spi_setup(par, p->hw_inst, spi->mode, clkdiv, len); pci1xxxx_start_spi_xfer(p, p->hw_inst); @@ -457,8 +535,17 @@ static int pci1xxxx_spi_transfer_with_io(struct spi_controller *spi_ctlr, return -ETIMEDOUT; if (rx_buf) { - memcpy_fromio(&rx_buf[bytes_recvd], par->reg_base + - SPI_MST_RSP_BUF_OFFSET(p->hw_inst), len); + if (of_machine_is_compatible("brcm,bcm2711")) { + pci1xxxx_spi_read_from_io(&rx_buf[bytes_recvd], + par->reg_base + + SPI_MST_RSP_BUF_OFFSET + (p->hw_inst), len, 4); + } else { + pci1xxxx_spi_read_from_io(&rx_buf[bytes_recvd], + par->reg_base + + SPI_MST_RSP_BUF_OFFSET + (p->hw_inst), len, 8); + } bytes_recvd += len; } } -- 2.25.1