On 31.10.2018 19:03, Ben Dooks wrote: > The tx_status callback does not report the state of the transfer > beyond complete segments. This causes problems with users such as > ALSA when applications want to know accurately how much data has > been moved. > > This patch addes a function tegra_dma_update_residual() to query > the hardware and modify the residual information accordinly. It > takes into account any hardware issues when trying to read the > state, such as delays between finishing a buffer and signalling > the interrupt. > > Signed-off-by: Ben Dooks <ben.dooks@xxxxxxxxxxxxxxx> Hello Ben, Thank you very much for the patch! It fixes "farting sound" for chromium-browser and applications that use chromium-engine (tested on Tegra20) because apparently it tries to use low latency for everything and audio buffer is constantly underflowing without more detailed DMA-progress reporting. See couple more comments below. > --- > drivers/dma/tegra20-apb-dma.c | 94 ++++++++++++++++++++++++++++++++--- > 1 file changed, 87 insertions(+), 7 deletions(-) > > diff --git a/drivers/dma/tegra20-apb-dma.c b/drivers/dma/tegra20-apb-dma.c > index 4f7d1e576d03..3fa3a1ac4f57 100644 > --- a/drivers/dma/tegra20-apb-dma.c > +++ b/drivers/dma/tegra20-apb-dma.c > @@ -802,12 +802,96 @@ static int tegra_dma_terminate_all(struct dma_chan *dc) > return 0; > } > > +static unsigned int tegra_dma_update_residual(struct tegra_dma_channel *tdc, > + struct tegra_dma_sg_req *sg_req, > + struct tegra_dma_desc *dma_desc, > + unsigned int residual) > +{ > + unsigned long status = 0x0; There is no need to initialize "status" variable. > + unsigned long wcount; > + unsigned long ahbptr; > + unsigned long tmp = 0x0; > + unsigned int result; > + int retries = TEGRA_APBDMA_BURST_COMPLETE_TIME * 10; > + int done; > + > + /* if we're not the current request, then don't alter the residual */ > + if (sg_req != list_first_entry(&tdc->pending_sg_req, > + struct tegra_dma_sg_req, node)) { > + result = residual; > + ahbptr = 0xffffffff; > + goto done; > + } > + > + /* loop until we have a reliable result for residual */ > + do { > + ahbptr = tdc_read(tdc, TEGRA_APBDMA_CHAN_AHBPTR); > + status = tdc_read(tdc, TEGRA_APBDMA_CHAN_STATUS); > + tmp = tdc_read(tdc, 0x08); /* total count for debug */ Register 0x08 (DMA_BYTE_STA) doesn't present on Tegra20 and "tmp" isn't used anywhere in the code. Please remove it entirely. > + > + /* check status, if channel isn't busy then skip */ > + if (!(status & TEGRA_APBDMA_STATUS_BUSY)) { > + result = residual; > + break; > + } If "BUSY" is unset, doesn't this mean that transaction could be completed already? I don't quite understand why you want to skip here. > + > + /* if we've got an interrupt pending on the channel, don't > + * try and deal with the residue as the hardware has likely > + * moved on to the next buffer. return all data moved. > + */ > + if (status & TEGRA_APBDMA_STATUS_ISE_EOC) { > + result = residual - sg_req->req_len; > + break; > + } > + > + if (tdc->tdma->chip_data->support_separate_wcount_reg) > + wcount = tdc_read(tdc, TEGRA_APBDMA_CHAN_WORD_TRANSFER); > + else > + wcount = status; > + > + /* If the request is at the full point, then there is a > + * chance that we have read the status register in the > + * middle of the hardware reloading the next buffer. > + * > + * The sequence seems to be at the end of the buffer, to > + * load the new word count before raising the EOC flag (or > + * changing the ping-pong flag which could have also been > + * used to determine a new buffer). This means there is a two whitespaces here-----| > + * small window where we cannot determine zero-done for the > + * current buffer, or moved to next buffer. > + * > + * If done shows 0, then retry the load, as it may hit the > + * above hardware race. We will either get a new value which > + * is from the first buffer, or we get an EOC (new buffer) > + * or both a new value and an EOC... I think we just need to wait 20usec after reading out "words count" and then re-check interrupt status, so transfer is done if interrupt is set and otherwise "words count" value is actual and reliable. > + */ > + done = get_current_xferred_count(tdc, sg_req, wcount); > + if (done != 0) { > + result = residual - done; > + break; > + } > + > + ndelay(100); There is no ndelay() on ARM, hence your 20usec timeout is 200usec. Please use udelay(). > + } while (--retries > 0); > + > + if (retries <= 0) { > + dev_err(tdc2dev(tdc), "timeout waiting for dma load\n"); > + result = residual; > + } > + > +done: Please rename goto label as it duplicates local variable name. > + dev_dbg(tdc2dev(tdc), "residual: req %08lx, ahb@%08lx, wcount %08lx, done %d\n", > + sg_req->ch_regs.ahb_ptr, ahbptr, wcount, done); Whitespace just after tab not needed. > + > + return result; > +} > + > static enum dma_status tegra_dma_tx_status(struct dma_chan *dc, > dma_cookie_t cookie, struct dma_tx_state *txstate) > { > struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc); > struct tegra_dma_desc *dma_desc; > - struct tegra_dma_sg_req *sg_req; > + struct tegra_dma_sg_req *sg_req = NULL; > enum dma_status ret; > unsigned long flags; > unsigned int residual; > @@ -843,6 +927,7 @@ static enum dma_status tegra_dma_tx_status(struct dma_chan *dc, > residual = dma_desc->bytes_requested - > (dma_desc->bytes_transferred % > dma_desc->bytes_requested); > + residual = tegra_dma_update_residual(tdc, sg_req, dma_desc, residual); > dma_set_residue(txstate, residual); > } > > @@ -1436,12 +1521,7 @@ static int tegra_dma_probe(struct platform_device *pdev) > BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) | > BIT(DMA_SLAVE_BUSWIDTH_8_BYTES); > tdma->dma_dev.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV); > - /* > - * XXX The hardware appears to support > - * DMA_RESIDUE_GRANULARITY_BURST-level reporting, but it's > - * only used by this driver during tegra_dma_terminate_all() > - */ > - tdma->dma_dev.residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT; > + tdma->dma_dev.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; > tdma->dma_dev.device_config = tegra_dma_slave_config; > tdma->dma_dev.device_terminate_all = tegra_dma_terminate_all; > tdma->dma_dev.device_tx_status = tegra_dma_tx_status; > Summarizing all of the comments above, patch may look like this: diff --git a/drivers/dma/tegra20-apb-dma.c b/drivers/dma/tegra20-apb-dma.c index 9a558e30c461..956e8130c059 100644 --- a/drivers/dma/tegra20-apb-dma.c +++ b/drivers/dma/tegra20-apb-dma.c @@ -799,12 +799,73 @@ static int tegra_dma_terminate_all(struct dma_chan *dc) return 0; } +static unsigned int tegra_dma_update_residual(struct tegra_dma_channel *tdc, + struct tegra_dma_sg_req *sg_req, + struct tegra_dma_desc *dma_desc, + unsigned int residual) +{ + unsigned long status; + unsigned int result; + int done; + + /* if we're not the current request, then don't alter the residual */ + if (sg_req != list_first_entry(&tdc->pending_sg_req, + struct tegra_dma_sg_req, node)) + return residual; + + status = tdc_read(tdc, TEGRA_APBDMA_CHAN_STATUS); + + /* if we've got an interrupt pending on the channel, don't + * try and deal with the residue as the hardware has likely + * moved on to the next buffer. return all data moved. + */ + if (status & TEGRA_APBDMA_STATUS_ISE_EOC) { + result = residual - sg_req->req_len; + goto out; + } + + if (tdc->tdma->chip_data->support_separate_wcount_reg) + status = tdc_read(tdc, TEGRA_APBDMA_CHAN_WORD_TRANSFER); + + /* + * If the request is at the full point, then there is a + * chance that we have read the status register in the + * middle of the hardware reloading the next buffer. + * + * The sequence seems to be at the end of the buffer, to + * load the new word count before raising the EOC flag (or + * changing the ping-pong flag which could have also been + * used to determine a new buffer). This means there is a + * small window where we cannot determine zero-done for the + * current buffer, or moved to next buffer. + */ + done = get_current_xferred_count(tdc, sg_req, status); + + udelay(TEGRA_APBDMA_BURST_COMPLETE_TIME); + + status = tdc_read(tdc, TEGRA_APBDMA_CHAN_STATUS); + + if (status & TEGRA_APBDMA_STATUS_ISE_EOC) + result = residual - sg_req->req_len; + else + result = residual - done; +out: +#ifdef DEBUG + ahbptr = tdc_read(tdc, TEGRA_APBDMA_CHAN_AHBPTR); + + dev_dbg(tdc2dev(tdc), "residual: req %08lx, ahb@%08lx, wcount %08lx, done %d\n", + sg_req->ch_regs.ahb_ptr, ahbptr, wcount, done); +#endif + + return result; +} + static enum dma_status tegra_dma_tx_status(struct dma_chan *dc, dma_cookie_t cookie, struct dma_tx_state *txstate) { struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc); struct tegra_dma_desc *dma_desc; - struct tegra_dma_sg_req *sg_req; + struct tegra_dma_sg_req *sg_req = NULL; enum dma_status ret; unsigned long flags; unsigned int residual; @@ -840,6 +901,7 @@ static enum dma_status tegra_dma_tx_status(struct dma_chan *dc, residual = dma_desc->bytes_requested - (dma_desc->bytes_transferred % dma_desc->bytes_requested); + residual = tegra_dma_update_residual(tdc, sg_req, dma_desc, residual); dma_set_residue(txstate, residual); } @@ -1433,12 +1495,7 @@ static int tegra_dma_probe(struct platform_device *pdev) BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) | BIT(DMA_SLAVE_BUSWIDTH_8_BYTES); tdma->dma_dev.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV); - /* - * XXX The hardware appears to support - * DMA_RESIDUE_GRANULARITY_BURST-level reporting, but it's - * only used by this driver during tegra_dma_terminate_all() - */ - tdma->dma_dev.residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT; + tdma->dma_dev.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; tdma->dma_dev.device_config = tegra_dma_slave_config; tdma->dma_dev.device_terminate_all = tegra_dma_terminate_all; tdma->dma_dev.device_tx_status = tegra_dma_tx_status; --