On Mon, Feb 11, 2019 at 06:34:30PM +0100, Gustavo Pimentel wrote: > Add Synopsys PCIe Endpoint eDMA IP core driver to kernel. > > This IP is generally distributed with Synopsys PCIe Endpoint IP (depends > of the use and licensing agreement). > > This core driver, initializes and configures the eDMA IP using vma-helpers > functions and dma-engine subsystem. > > This driver can be compile as built-in or external module in kernel. > > To enable this driver just select DW_EDMA option in kernel configuration, > however it requires and selects automatically DMA_ENGINE and > DMA_VIRTUAL_CHANNELS option too. First comment here is that: try to shrink your code base by let's say 15%. I believe something here is done is suboptimal way or doesn't take into consideration existing facilities in the kernel. > +static void dw_edma_free_chunk(struct dw_edma_desc *desc) > +{ > + struct dw_edma_chan *chan = desc->chan; > + struct dw_edma_chunk *child, *_next; > + > + if (!desc->chunk) > + return; Is it necessary check? Why? > + > + /* Remove all the list elements */ > + list_for_each_entry_safe(child, _next, &desc->chunk->list, list) { > + dw_edma_free_burst(child); > + if (child->bursts_alloc) > + dev_dbg(chan2dev(chan), "%u bursts still allocated\n", > + child->bursts_alloc); > + list_del(&child->list); > + kfree(child); > + desc->chunks_alloc--; > + } > + > + /* Remove the list head */ > + kfree(child); > + desc->chunk = NULL; > +} > +static int dw_edma_device_config(struct dma_chan *dchan, > + struct dma_slave_config *config) > +{ > + struct dw_edma_chan *chan = dchan2dw_edma_chan(dchan); > + unsigned long flags; > + int err = 0; > + > + spin_lock_irqsave(&chan->vc.lock, flags); > + > + if (!config) { > + err = -EINVAL; > + goto err_config; > + } Is it necessary check? Why? Why this is under spin lock? > + dev_dbg(chan2dev(chan), "addr(physical) src=%pa, dst=%pa\n", > + &config->src_addr, &config->dst_addr); And this. What do you protect by locks? Check all your locking carefully. > + > + chan->src_addr = config->src_addr; > + chan->dst_addr = config->dst_addr; > + > + chan->configured = true; > + dev_dbg(chan2dev(chan), "channel configured\n"); > + > +err_config: > + spin_unlock_irqrestore(&chan->vc.lock, flags); > + return err; > +} > + > +static int dw_edma_device_pause(struct dma_chan *dchan) > +{ > + struct dw_edma_chan *chan = dchan2dw_edma_chan(dchan); > + unsigned long flags; > + int err = 0; > + > + spin_lock_irqsave(&chan->vc.lock, flags); > + > + if (!chan->configured) { > + dev_err(chan2dev(chan), "(pause) channel not configured\n"); Why this is under spinlock? > + err = -EPERM; > + goto err_pause; > + } > + > + if (chan->status != EDMA_ST_BUSY) { > + err = -EPERM; > + goto err_pause; > + } > + > + if (chan->request != EDMA_REQ_NONE) { > + err = -EPERM; > + goto err_pause; > + } > + > + chan->request = EDMA_REQ_PAUSE; > + dev_dbg(chan2dev(chan), "pause requested\n"); Ditto. Moreover, check what functional tracer can provide you for debugging. > + > +err_pause: > + spin_unlock_irqrestore(&chan->vc.lock, flags); > + return err; > +} > +{ > + struct dw_edma_chan *chan = dchan2dw_edma_chan(dchan); > + struct dw_edma_desc *desc; > + struct virt_dma_desc *vd; > + unsigned long flags; > + enum dma_status ret; > + u32 residue = 0; > + > + ret = dma_cookie_status(dchan, cookie, txstate); > + if (ret == DMA_COMPLETE) > + return ret; > + > + if (!txstate) > + return ret; So, if there is no txstate, you can't return PAUSED state? Why? > + > + spin_lock_irqsave(&chan->vc.lock, flags); > + > + if (ret == DMA_IN_PROGRESS && chan->status == EDMA_ST_PAUSE) > + ret = DMA_PAUSED; > + > + vd = vchan_find_desc(&chan->vc, cookie); > + if (!vd) > + goto ret_status; > + > + desc = vd2dw_edma_desc(vd); > + if (desc) > + residue = desc->alloc_sz - desc->xfer_sz; > + > +ret_status: > + spin_unlock_irqrestore(&chan->vc.lock, flags); > + dma_set_residue(txstate, residue); > + > + return ret; > +} > + for (i = 0; i < cnt; i++) { > + if (!xfer->cyclic && !sg) > + break; > + > + if (chunk->bursts_alloc == chan->ll_max) { > + chunk = dw_edma_alloc_chunk(desc); > + if (unlikely(!chunk)) > + goto err_alloc; > + } > + > + burst = dw_edma_alloc_burst(chunk); > + > + if (unlikely(!burst)) > + goto err_alloc; > + > + if (xfer->cyclic) > + burst->sz = xfer->xfer.cyclic.len; > + else > + burst->sz = sg_dma_len(sg); > + > + chunk->ll_region.sz += burst->sz; > + desc->alloc_sz += burst->sz; > + > + if (direction == DMA_DEV_TO_MEM) { > + burst->sar = src_addr; > + if (xfer->cyclic) { > + burst->dar = xfer->xfer.cyclic.paddr; > + } else { > + burst->dar = sg_dma_address(sg); > + src_addr += sg_dma_len(sg); > + } > + } else { > + burst->dar = dst_addr; > + if (xfer->cyclic) { > + burst->sar = xfer->xfer.cyclic.paddr; > + } else { > + burst->sar = sg_dma_address(sg); > + dst_addr += sg_dma_len(sg); > + } > + } > + > + dev_dbg(chan2dev(chan), "lli %u/%u, sar=0x%.8llx, dar=0x%.8llx, size=%u bytes\n", > + i + 1, cnt, burst->sar, burst->dar, burst->sz); > + > + if (!xfer->cyclic) > + sg = sg_next(sg); Shouldn't you rather to use for_each_sg()? > + } > + spin_lock_irqsave(&chan->vc.lock, flags); > + vd = vchan_next_desc(&chan->vc); > + switch (chan->request) { > + case EDMA_REQ_NONE: > + if (!vd) > + break; Shouldn't be outside of switch? > + > + desc = vd2dw_edma_desc(vd); > + if (desc->chunks_alloc) { > + dev_dbg(chan2dev(chan), "sub-transfer complete\n"); > + chan->status = EDMA_ST_BUSY; > + dev_dbg(chan2dev(chan), "transferred %u bytes\n", > + desc->xfer_sz); > + dw_edma_start_transfer(chan); > + } else { > + list_del(&vd->node); > + vchan_cookie_complete(vd); > + chan->status = EDMA_ST_IDLE; > + dev_dbg(chan2dev(chan), "transfer complete\n"); > + } > + break; > + case EDMA_REQ_STOP: > + if (!vd) > + break; > + > + list_del(&vd->node); > + vchan_cookie_complete(vd); > + chan->request = EDMA_REQ_NONE; > + chan->status = EDMA_ST_IDLE; > + dev_dbg(chan2dev(chan), "transfer stop\n"); > + break; > + case EDMA_REQ_PAUSE: > + chan->request = EDMA_REQ_NONE; > + chan->status = EDMA_ST_PAUSE; > + break; > + default: > + dev_err(chan2dev(chan), "invalid status state\n"); > + break; > + } > + spin_unlock_irqrestore(&chan->vc.lock, flags); > +} > +static irqreturn_t dw_edma_interrupt(int irq, void *data, bool write) > +{ > + struct dw_edma_irq *dw_irq = data; > + struct dw_edma *dw = dw_irq->dw; > + unsigned long total, pos, val; > + unsigned long off; > + u32 mask; > + > + if (write) { > + total = dw->wr_ch_cnt; > + off = 0; > + mask = dw_irq->wr_mask; > + } else { > + total = dw->rd_ch_cnt; > + off = dw->wr_ch_cnt; > + mask = dw_irq->rd_mask; > + } > + > + pos = 0; > + val = dw_edma_v0_core_status_done_int(dw, write ? > + EDMA_DIR_WRITE : > + EDMA_DIR_READ); > + val &= mask; > + while ((pos = find_next_bit(&val, total, pos)) != total) { Is this funny version of for_each_set_bit()? > + struct dw_edma_chan *chan = &dw->chan[pos + off]; > + > + dw_edma_done_interrupt(chan); > + pos++; > + } > + > + pos = 0; > + val = dw_edma_v0_core_status_abort_int(dw, write ? > + EDMA_DIR_WRITE : > + EDMA_DIR_READ); > + val &= mask; > + while ((pos = find_next_bit(&val, total, pos)) != total) { Ditto. > + struct dw_edma_chan *chan = &dw->chan[pos + off]; > + > + dw_edma_abort_interrupt(chan); > + pos++; > + } > + > + return IRQ_HANDLED; > +} > + do { > + ret = dw_edma_device_terminate_all(dchan); > + if (!ret) > + break; > + > + if (time_after_eq(jiffies, timeout)) { > + dev_err(chan2dev(chan), "free timeout\n"); > + return; > + } > + > + cpu_relax(); > + } while (1); So, what prevents you to do while (time_before(...)); here? > + > + dev_dbg(dchan2dev(dchan), "channel freed\n"); > + > + pm_runtime_put(chan->chip->dev); > +} > +static int dw_edma_channel_setup(struct dw_edma_chip *chip, bool write, > + u32 wr_alloc, u32 rd_alloc) > +{ > + struct dw_edma_region *dt_region; > + struct device *dev = chip->dev; > + struct dw_edma *dw = chip->dw; > + struct dw_edma_chan *chan; > + size_t ll_chunk, dt_chunk; > + struct dw_edma_irq *irq; > + struct dma_device *dma; > + u32 i, j, cnt, ch_cnt; > + u32 alloc, off_alloc; > + int err = 0; > + u32 pos; > + > + ch_cnt = dw->wr_ch_cnt + dw->rd_ch_cnt; > + ll_chunk = dw->ll_region.sz; > + dt_chunk = dw->dt_region.sz; > + > + /* Calculate linked list chunk for each channel */ > + ll_chunk /= roundup_pow_of_two(ch_cnt); > + > + /* Calculate linked list chunk for each channel */ > + dt_chunk /= roundup_pow_of_two(ch_cnt); > + INIT_LIST_HEAD(&dma->channels); > + > + for (j = 0; (alloc || dw->nr_irqs == 1) && j < cnt; j++, i++) { > + chan = &dw->chan[i]; > + > + dt_region = devm_kzalloc(dev, sizeof(*dt_region), GFP_KERNEL); > + if (!dt_region) > + return -ENOMEM; > + > + chan->vc.chan.private = dt_region; > + > + chan->chip = chip; > + chan->id = j; > + chan->dir = write ? EDMA_DIR_WRITE : EDMA_DIR_READ; > + chan->configured = false; > + chan->request = EDMA_REQ_NONE; > + chan->status = EDMA_ST_IDLE; > + > + chan->ll_off = (ll_chunk * i); > + chan->ll_max = (ll_chunk / EDMA_LL_SZ) - 1; > + > + chan->dt_off = (dt_chunk * i); > + > + dev_dbg(dev, "L. List:\tChannel %s[%u] off=0x%.8lx, max_cnt=%u\n", > + write ? "write" : "read", j, > + chan->ll_off, chan->ll_max); > + > + if (dw->nr_irqs == 1) > + pos = 0; > + else > + pos = off_alloc + (j % alloc); > + > + irq = &dw->irq[pos]; > + > + if (write) > + irq->wr_mask |= BIT(j); > + else > + irq->rd_mask |= BIT(j); > + > + irq->dw = dw; > + memcpy(&chan->msi, &irq->msi, sizeof(chan->msi)); > + > + dev_dbg(dev, "MSI:\t\tChannel %s[%u] addr=0x%.8x%.8x, data=0x%.8x\n", > + write ? "write" : "read", j, > + chan->msi.address_hi, chan->msi.address_lo, > + chan->msi.data); > + > + chan->vc.desc_free = vchan_free_desc; > + vchan_init(&chan->vc, dma); > + > + dt_region->paddr = dw->dt_region.paddr + chan->dt_off; > + dt_region->vaddr = dw->dt_region.vaddr + chan->dt_off; > + dt_region->sz = dt_chunk; > + > + dev_dbg(dev, "Data:\tChannel %s[%u] off=0x%.8lx\n", > + write ? "write" : "read", j, chan->dt_off); > + > + dw_edma_v0_core_device_config(chan); > + } > + > + /* Set DMA channel capabilities */ > + dma_cap_zero(dma->cap_mask); > + dma_cap_set(DMA_SLAVE, dma->cap_mask); > + dma_cap_set(DMA_CYCLIC, dma->cap_mask); Doesn't it used for slave transfers? DMA_PRIVATE is good to be set for this. > + return err; > +} > + > +static inline void dw_edma_dec_irq_alloc(int *nr_irqs, u32 *alloc, u16 cnt) > +{ > + if (*nr_irqs && *alloc < cnt) { > + (*alloc)++; > + (*nr_irqs)--; > + } I don't see any allocation here. > +} > + > +static inline void dw_edma_add_irq_mask(u32 *mask, u32 alloc, u16 cnt) > +{ > + while (*mask * alloc < cnt) > + (*mask)++; Do you really need a loop here? > +} > + > +static int dw_edma_irq_request(struct dw_edma_chip *chip, > + u32 *wr_alloc, u32 *rd_alloc) > +{ > + struct device *dev = chip->dev; > + struct dw_edma *dw = chip->dw; > + u32 wr_mask = 1; > + u32 rd_mask = 1; > + int i, err = 0; > + u32 ch_cnt; > + > + ch_cnt = dw->wr_ch_cnt + dw->rd_ch_cnt; > + > + if (dw->nr_irqs < 1) { > + dev_err(dev, "invalid number of irqs (%u)\n", dw->nr_irqs); > + return -EINVAL; > + } > + > + if (dw->nr_irqs == 1) { > + /* Common IRQ shared among all channels */ > + err = request_irq(pci_irq_vector(to_pci_dev(dev), 0), > + dw_edma_interrupt_common, > + IRQF_SHARED, dw->name, &dw->irq[0]); > + if (err) { > + dw->nr_irqs = 0; > + return err; > + } > + > + get_cached_msi_msg(pci_irq_vector(to_pci_dev(dev), 0), > + &dw->irq[0].msi); Are you going to call each time to pci_irq_vector()? Btw, am I missed pci_irq_alloc()? > + } else { > + /* Distribute IRQs equally among all channels */ > + int tmp = dw->nr_irqs; Is it always achievable? > + > + while (tmp && (*wr_alloc + *rd_alloc) < ch_cnt) { > + dw_edma_dec_irq_alloc(&tmp, wr_alloc, dw->wr_ch_cnt); > + dw_edma_dec_irq_alloc(&tmp, rd_alloc, dw->rd_ch_cnt); > + } > + > + dw_edma_add_irq_mask(&wr_mask, *wr_alloc, dw->wr_ch_cnt); > + dw_edma_add_irq_mask(&rd_mask, *rd_alloc, dw->rd_ch_cnt); > + > + for (i = 0; i < (*wr_alloc + *rd_alloc); i++) { > + err = request_irq(pci_irq_vector(to_pci_dev(dev), i), > + i < *wr_alloc ? > + dw_edma_interrupt_write : > + dw_edma_interrupt_read, > + IRQF_SHARED, dw->name, > + &dw->irq[i]); > + if (err) { > + dw->nr_irqs = i; > + return err; > + } > + > + get_cached_msi_msg(pci_irq_vector(to_pci_dev(dev), i), > + &dw->irq[i].msi); > + } > + > + dw->nr_irqs = i; > + } > + > + return err; > +} > + > +int dw_edma_probe(struct dw_edma_chip *chip) > +{ > + struct device *dev = chip->dev; > + struct dw_edma *dw = chip->dw; > + u32 wr_alloc = 0; > + u32 rd_alloc = 0; > + int i, err; > + > + raw_spin_lock_init(&dw->lock); > + > + /* Find out how many write channels are supported by hardware */ > + dw->wr_ch_cnt = dw_edma_v0_core_ch_count(dw, EDMA_DIR_WRITE); > + if (!dw->wr_ch_cnt) { > + dev_err(dev, "invalid number of write channels(0)\n"); > + return -EINVAL; > + } > + > + /* Find out how many read channels are supported by hardware */ > + dw->rd_ch_cnt = dw_edma_v0_core_ch_count(dw, EDMA_DIR_READ); > + if (!dw->rd_ch_cnt) { > + dev_err(dev, "invalid number of read channels(0)\n"); > + return -EINVAL; > + } > + > + dev_dbg(dev, "Channels:\twrite=%d, read=%d\n", > + dw->wr_ch_cnt, dw->rd_ch_cnt); > + > + /* Allocate channels */ > + dw->chan = devm_kcalloc(dev, dw->wr_ch_cnt + dw->rd_ch_cnt, > + sizeof(*dw->chan), GFP_KERNEL); > + if (!dw->chan) > + return -ENOMEM; > + > + snprintf(dw->name, sizeof(dw->name), "dw-edma-core:%d", chip->id); > + > + /* Disable eDMA, only to establish the ideal initial conditions */ > + dw_edma_v0_core_off(dw); > + > + /* Request IRQs */ > + err = dw_edma_irq_request(chip, &wr_alloc, &rd_alloc); > + if (err) > + return err; > + > + /* Setup write channels */ > + err = dw_edma_channel_setup(chip, true, wr_alloc, rd_alloc); > + if (err) > + goto err_irq_free; > + > + /* Setup read channels */ > + err = dw_edma_channel_setup(chip, false, wr_alloc, rd_alloc); > + if (err) > + goto err_irq_free; I think you may look into ep93xx driver to see how different type of channels are allocated and be set up. > + > + /* Power management */ > + pm_runtime_enable(dev); > + > + /* Turn debugfs on */ > + err = dw_edma_v0_core_debugfs_on(chip); > + if (err) { > + dev_err(dev, "unable to create debugfs structure\n"); > + goto err_pm_disable; > + } > + > + return 0; > + > +err_pm_disable: > + pm_runtime_disable(dev); > +err_irq_free: > + for (i = (dw->nr_irqs - 1); i >= 0; i--) > + free_irq(pci_irq_vector(to_pci_dev(dev), i), &dw->irq[i]); > + > + dw->nr_irqs = 0; > + > + return err; > +} > +EXPORT_SYMBOL_GPL(dw_edma_probe); > +/** > + * struct dw_edma_chip - representation of DesignWare eDMA controller hardware > + * @dev: struct device of the eDMA controller > + * @id: instance ID > + * @irq: irq line > + * @dw: struct dw_edma that is filed by dw_edma_probe() > + */ > +struct dw_edma_chip { > + struct device *dev; > + int id; > + int irq; > + struct dw_edma *dw; > +}; > + > +/* Export to the platform drivers */ > +#if IS_ENABLED(CONFIG_DW_EDMA) > +int dw_edma_probe(struct dw_edma_chip *chip); > +int dw_edma_remove(struct dw_edma_chip *chip); > +#else > +static inline int dw_edma_probe(struct dw_edma_chip *chip) > +{ > + return -ENODEV; > +} > + > +static inline int dw_edma_remove(struct dw_edma_chip *chip) > +{ > + return 0; > +} > +#endif /* CONFIG_DW_EDMA */ Is it going to be used as a library? -- With Best Regards, Andy Shevchenko