On Fri, May 15, 2020 at 01:47:41PM +0300, Serge Semin wrote: > Since DMA transfers are performed asynchronously with actual SPI > transaction, then even if DMA transfers are finished it doesn't mean > all data is actually pushed to the SPI bus. Some data might still be > in the controller FIFO. This is specifically true for Tx-only > transfers. In this case if the next SPI transfer is recharged while > a tail of the previous one is still in FIFO, we'll loose that tail > data. In order to fix this lets add the wait procedure of the Tx/Rx > SPI transfers completion after the corresponding DMA transactions > are finished. General question, doesn't spi core provides us some helpers like spi_delay_exec()? > Co-developed-by: Georgy Vlasov <Georgy.Vlasov@xxxxxxxxxxxxxxxxxxxx> > Signed-off-by: Georgy Vlasov <Georgy.Vlasov@xxxxxxxxxxxxxxxxxxxx> > Signed-off-by: Serge Semin <Sergey.Semin@xxxxxxxxxxxxxxxxxxxx> > Cc: Ramil Zaripov <Ramil.Zaripov@xxxxxxxxxxxxxxxxxxxx> > Cc: Alexey Malahov <Alexey.Malahov@xxxxxxxxxxxxxxxxxxxx> > Cc: Thomas Bogendoerfer <tsbogend@xxxxxxxxxxxxxxxx> > Cc: Paul Burton <paulburton@xxxxxxxxxx> > Cc: Ralf Baechle <ralf@xxxxxxxxxxxxxx> > Cc: Arnd Bergmann <arnd@xxxxxxxx> > Cc: Allison Randal <allison@xxxxxxxxxxx> > Cc: Andy Shevchenko <andriy.shevchenko@xxxxxxxxxxxxxxx> > Cc: Gareth Williams <gareth.williams.jx@xxxxxxxxxxx> > Cc: Rob Herring <robh+dt@xxxxxxxxxx> > Cc: linux-mips@xxxxxxxxxxxxxxx > Cc: devicetree@xxxxxxxxxxxxxxx > > --- > > Changelog v2: > - Use conditional statement instead of the ternary operator in the ref > clock getter. > - Move the patch to the head of the series so one could be picked up to > the stable kernels as a fix. You forgot a Fixes tag. > --- > drivers/spi/spi-dw-mid.c | 50 ++++++++++++++++++++++++++++++++++++++++ > drivers/spi/spi-dw.h | 10 ++++++++ > 2 files changed, 60 insertions(+) > > diff --git a/drivers/spi/spi-dw-mid.c b/drivers/spi/spi-dw-mid.c > index 177e1f5ec62b..7a5ae1506365 100644 > --- a/drivers/spi/spi-dw-mid.c > +++ b/drivers/spi/spi-dw-mid.c > @@ -16,7 +16,9 @@ > #include <linux/irqreturn.h> > #include <linux/pci.h> > #include <linux/platform_data/dma-dw.h> > +#include <linux/delay.h> Keep it in order. > > +#define WAIT_RETRIES 5 > #define RX_BUSY 0 > #define TX_BUSY 1 > > @@ -141,6 +143,28 @@ static enum dma_slave_buswidth convert_dma_width(u32 dma_width) { > return DMA_SLAVE_BUSWIDTH_UNDEFINED; > } > > +static inline bool dw_spi_dma_tx_busy(struct dw_spi *dws) > +{ > + return !(dw_readl(dws, DW_SPI_SR) & SR_TF_EMPT); > +} > + > +static void dw_spi_dma_wait_tx_done(struct dw_spi *dws) > +{ > + int retry = WAIT_RETRIES; > + unsigned long ns; > + > + ns = (NSEC_PER_SEC / spi_get_clk(dws)) * dws->n_bytes * BITS_PER_BYTE; > + ns *= dw_readl(dws, DW_SPI_TXFLR); > + > + while (dw_spi_dma_tx_busy(dws) && retry--) > + ndelay(ns); This misses power management for CPU and do you really need this to be atomic? At the end why not to use readx_poll_timeout() ? > + if (retry < 0) { Usually we do unsigned int retries = NNNN; do { ... } while (--retries); if (!retries) ... But in any case, see above. > + dev_err(&dws->master->dev, "Tx hanged up\n"); > + dws->master->cur_msg->status = -EIO; > + } > +} Same comments to Rx part. > + > /* > * dws->dma_chan_busy is set before the dma transfer starts, callback for tx > * channel will clear a corresponding bit. > @@ -149,6 +173,8 @@ static void dw_spi_dma_tx_done(void *arg) > { > struct dw_spi *dws = arg; > > + dw_spi_dma_wait_tx_done(dws); > + > clear_bit(TX_BUSY, &dws->dma_chan_busy); > if (test_bit(RX_BUSY, &dws->dma_chan_busy)) > return; > @@ -188,6 +214,28 @@ static struct dma_async_tx_descriptor *dw_spi_dma_prepare_tx(struct dw_spi *dws, > return txdesc; > } > > +static inline bool dw_spi_dma_rx_busy(struct dw_spi *dws) > +{ > + return !!(dw_readl(dws, DW_SPI_SR) & SR_RF_NOT_EMPT); > +} > + > +static void dw_spi_dma_wait_rx_done(struct dw_spi *dws) > +{ > + int retry = WAIT_RETRIES; > + unsigned long ns; > + > + ns = (NSEC_PER_SEC / spi_get_clk(dws)) * dws->n_bytes * BITS_PER_BYTE; > + ns *= dw_readl(dws, DW_SPI_RXFLR); > + > + while (dw_spi_dma_rx_busy(dws) && retry--) > + ndelay(ns); > + > + if (retry < 0) { > + dev_err(&dws->master->dev, "Rx hanged up\n"); > + dws->master->cur_msg->status = -EIO; > + } > +} > + > /* > * dws->dma_chan_busy is set before the dma transfer starts, callback for rx > * channel will clear a corresponding bit. > @@ -196,6 +244,8 @@ static void dw_spi_dma_rx_done(void *arg) > { > struct dw_spi *dws = arg; > > + dw_spi_dma_wait_rx_done(dws); > + > clear_bit(RX_BUSY, &dws->dma_chan_busy); > if (test_bit(TX_BUSY, &dws->dma_chan_busy)) > return; > diff --git a/drivers/spi/spi-dw.h b/drivers/spi/spi-dw.h > index e92d43b9a9e6..81364f501b7e 100644 > --- a/drivers/spi/spi-dw.h > +++ b/drivers/spi/spi-dw.h > @@ -210,6 +210,16 @@ static inline void spi_set_clk(struct dw_spi *dws, u16 div) > dw_writel(dws, DW_SPI_BAUDR, div); > } > > +static inline u32 spi_get_clk(struct dw_spi *dws) > +{ > + u32 div = dw_readl(dws, DW_SPI_BAUDR); > + > + if (!div) > + return 0; > + > + return dws->max_freq / div; > +} > + > /* Disable IRQ bits */ > static inline void spi_mask_intr(struct dw_spi *dws, u32 mask) > { > -- > 2.25.1 > -- With Best Regards, Andy Shevchenko