On 01/10/19 11:46 AM, Peter Ujfalusi wrote: > From: Grygorii Strashko <grygorii.strashko@xxxxxx> > > The Ring Accelerator (RINGACC or RA) provides hardware acceleration to > enable straightforward passing of work between a producer and a consumer. > There is one RINGACC module per NAVSS on TI AM65x SoCs. > > The RINGACC converts constant-address read and write accesses to equivalent > read or write accesses to a circular data structure in memory. The RINGACC > eliminates the need for each DMA controller which needs to access ring > elements from having to know the current state of the ring (base address, > current offset). The DMA controller performs a read or write access to a > specific address range (which maps to the source interface on the RINGACC) > and the RINGACC replaces the address for the transaction with a new address > which corresponds to the head or tail element of the ring (head for reads, > tail for writes). Since the RINGACC maintains the state, multiple DMA > controllers or channels are allowed to coherently share the same rings as > applicable. The RINGACC is able to place data which is destined towards > software into cached memory directly. > > Supported ring modes: > - Ring Mode > - Messaging Mode > - Credentials Mode > - Queue Manager Mode > > TI-SCI integration: > > Texas Instrument's System Control Interface (TI-SCI) Message Protocol now > has control over Ringacc module resources management (RM) and Rings > configuration. > > The corresponding support of TI-SCI Ringacc module RM protocol > introduced as option through DT parameters: > - ti,sci: phandle on TI-SCI firmware controller DT node > - ti,sci-dev-id: TI-SCI device identifier as per TI-SCI firmware spec > > if both parameters present - Ringacc driver will configure/free/reset Rings > using TI-SCI Message Ringacc RM Protocol. > > The Ringacc driver manages Rings allocation by itself now and requests > TI-SCI firmware to allocate and configure specific Rings only. It's done > this way because, Linux driver implements two stage Rings allocation and > configuration (allocate ring and configure ring) while I-SCI Message > Protocol supports only one combined operation (allocate+configure). > > Signed-off-by: Grygorii Strashko <grygorii.strashko@xxxxxx> > Signed-off-by: Peter Ujfalusi <peter.ujfalusi@xxxxxx> > --- > drivers/soc/ti/Kconfig | 12 + > drivers/soc/ti/Makefile | 1 + > drivers/soc/ti/k3-ringacc.c | 1165 +++++++++++++++++++++++++++++ > include/linux/soc/ti/k3-ringacc.h | 245 ++++++ > 4 files changed, 1423 insertions(+) > create mode 100644 drivers/soc/ti/k3-ringacc.c > create mode 100644 include/linux/soc/ti/k3-ringacc.h > > diff --git a/drivers/soc/ti/Kconfig b/drivers/soc/ti/Kconfig > index cf545f428d03..87722d33333a 100644 > --- a/drivers/soc/ti/Kconfig > +++ b/drivers/soc/ti/Kconfig > @@ -80,6 +80,18 @@ config TI_SCI_PM_DOMAINS > called ti_sci_pm_domains. Note this is needed early in boot before > rootfs may be available. > > +config TI_K3_RINGACC > + tristate "K3 Ring accelerator Sub System" > + depends on ARCH_K3 || COMPILE_TEST > + depends on TI_SCI_INTA_IRQCHIP > + default y > + help > + Say y here to support the K3 Ring accelerator module. > + The Ring Accelerator (RINGACC or RA) provides hardware acceleration > + to enable straightforward passing of work between a producer > + and a consumer. There is one RINGACC module per NAVSS on TI AM65x SoCs > + If unsure, say N. > + > endif # SOC_TI > > config TI_SCI_INTA_MSI_DOMAIN > diff --git a/drivers/soc/ti/Makefile b/drivers/soc/ti/Makefile > index b3868d392d4f..cc4bc8b08bf5 100644 > --- a/drivers/soc/ti/Makefile > +++ b/drivers/soc/ti/Makefile > @@ -9,3 +9,4 @@ obj-$(CONFIG_AMX3_PM) += pm33xx.o > obj-$(CONFIG_WKUP_M3_IPC) += wkup_m3_ipc.o > obj-$(CONFIG_TI_SCI_PM_DOMAINS) += ti_sci_pm_domains.o > obj-$(CONFIG_TI_SCI_INTA_MSI_DOMAIN) += ti_sci_inta_msi.o > +obj-$(CONFIG_TI_K3_RINGACC) += k3-ringacc.o > diff --git a/drivers/soc/ti/k3-ringacc.c b/drivers/soc/ti/k3-ringacc.c > new file mode 100644 > index 000000000000..4728a79fd2c0 > --- /dev/null > +++ b/drivers/soc/ti/k3-ringacc.c > @@ -0,0 +1,1165 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* > + * TI K3 NAVSS Ring Accelerator subsystem driver > + * > + * Copyright (C) 2019 Texas Instruments Incorporated - http://www.ti.com > + */ > + > +#include <linux/dma-mapping.h> > +#include <linux/io.h> > +#include <linux/module.h> > +#include <linux/of.h> > +#include <linux/platform_device.h> > +#include <linux/pm_runtime.h> > +#include <linux/soc/ti/k3-ringacc.h> > +#include <linux/soc/ti/ti_sci_protocol.h> > +#include <linux/soc/ti/ti_sci_inta_msi.h> > +#include <linux/of_irq.h> > +#include <linux/irqdomain.h> > + > +static LIST_HEAD(k3_ringacc_list); > +static DEFINE_MUTEX(k3_ringacc_list_lock); > + > +#define K3_RINGACC_CFG_RING_SIZE_ELCNT_MASK GENMASK(19, 0) > + > +/** > + * struct k3_ring_rt_regs - The RA Control/Status Registers region > + */ > +struct k3_ring_rt_regs { > + u32 resv_16[4]; > + u32 db; /* RT Ring N Doorbell Register */ > + u32 resv_4[1]; > + u32 occ; /* RT Ring N Occupancy Register */ > + u32 indx; /* RT Ring N Current Index Register */ > + u32 hwocc; /* RT Ring N Hardware Occupancy Register */ > + u32 hwindx; /* RT Ring N Current Index Register */ > +}; > + > +#define K3_RINGACC_RT_REGS_STEP 0x1000 > + > +/** > + * struct k3_ring_fifo_regs - The Ring Accelerator Queues Registers region > + */ > +struct k3_ring_fifo_regs { > + u32 head_data[128]; /* Ring Head Entry Data Registers */ > + u32 tail_data[128]; /* Ring Tail Entry Data Registers */ > + u32 peek_head_data[128]; /* Ring Peek Head Entry Data Regs */ > + u32 peek_tail_data[128]; /* Ring Peek Tail Entry Data Regs */ > +}; > + > +/** > + * struct k3_ringacc_proxy_gcfg_regs - RA Proxy Global Config MMIO Region > + */ > +struct k3_ringacc_proxy_gcfg_regs { > + u32 revision; /* Revision Register */ > + u32 config; /* Config Register */ > +}; > + > +#define K3_RINGACC_PROXY_CFG_THREADS_MASK GENMASK(15, 0) > + > +/** > + * struct k3_ringacc_proxy_target_regs - Proxy Datapath MMIO Region > + */ > +struct k3_ringacc_proxy_target_regs { > + u32 control; /* Proxy Control Register */ > + u32 status; /* Proxy Status Register */ > + u8 resv_512[504]; > + u32 data[128]; /* Proxy Data Register */ > +}; > + > +#define K3_RINGACC_PROXY_TARGET_STEP 0x1000 > +#define K3_RINGACC_PROXY_NOT_USED (-1) > + > +enum k3_ringacc_proxy_access_mode { > + PROXY_ACCESS_MODE_HEAD = 0, > + PROXY_ACCESS_MODE_TAIL = 1, > + PROXY_ACCESS_MODE_PEEK_HEAD = 2, > + PROXY_ACCESS_MODE_PEEK_TAIL = 3, > +}; > + > +#define K3_RINGACC_FIFO_WINDOW_SIZE_BYTES (512U) > +#define K3_RINGACC_FIFO_REGS_STEP 0x1000 > +#define K3_RINGACC_MAX_DB_RING_CNT (127U) > + > +/** > + * struct k3_ring_ops - Ring operations > + */ > +struct k3_ring_ops { > + int (*push_tail)(struct k3_ring *ring, void *elm); > + int (*push_head)(struct k3_ring *ring, void *elm); > + int (*pop_tail)(struct k3_ring *ring, void *elm); > + int (*pop_head)(struct k3_ring *ring, void *elm); > +}; > + > +/** > + * struct k3_ring - RA Ring descriptor > + * > + * @rt - Ring control/status registers > + * @fifos - Ring queues registers > + * @proxy - Ring Proxy Datapath registers > + * @ring_mem_dma - Ring buffer dma address > + * @ring_mem_virt - Ring buffer virt address > + * @ops - Ring operations > + * @size - Ring size in elements > + * @elm_size - Size of the ring element > + * @mode - Ring mode > + * @flags - flags > + * @free - Number of free elements > + * @occ - Ring occupancy > + * @windex - Write index (only for @K3_RINGACC_RING_MODE_RING) > + * @rindex - Read index (only for @K3_RINGACC_RING_MODE_RING) > + * @ring_id - Ring Id > + * @parent - Pointer on struct @k3_ringacc > + * @use_count - Use count for shared rings > + * @proxy_id - RA Ring Proxy Id (only if @K3_RINGACC_RING_USE_PROXY) > + */ > +struct k3_ring { > + struct k3_ring_rt_regs __iomem *rt; > + struct k3_ring_fifo_regs __iomem *fifos; > + struct k3_ringacc_proxy_target_regs __iomem *proxy; > + dma_addr_t ring_mem_dma; > + void *ring_mem_virt; > + struct k3_ring_ops *ops; > + u32 size; > + enum k3_ring_size elm_size; > + enum k3_ring_mode mode; > + u32 flags; > +#define K3_RING_FLAG_BUSY BIT(1) > +#define K3_RING_FLAG_SHARED BIT(2) > + u32 free; > + u32 occ; > + u32 windex; > + u32 rindex; > + u32 ring_id; > + struct k3_ringacc *parent; > + u32 use_count; > + int proxy_id; > +}; > + > +/** > + * struct k3_ringacc - Rings accelerator descriptor > + * > + * @dev - pointer on RA device > + * @proxy_gcfg - RA proxy global config registers > + * @proxy_target_base - RA proxy datapath region > + * @num_rings - number of ring in RA > + * @rings_inuse - bitfield for ring usage tracking > + * @rm_gp_range - general purpose rings range from tisci > + * @dma_ring_reset_quirk - DMA reset w/a enable > + * @num_proxies - number of RA proxies > + * @proxy_inuse - bitfield for proxy usage tracking > + * @rings - array of rings descriptors (struct @k3_ring) > + * @list - list of RAs in the system > + * @tisci - pointer ti-sci handle > + * @tisci_ring_ops - ti-sci rings ops > + * @tisci_dev_id - ti-sci device id > + */ > +struct k3_ringacc { > + struct device *dev; > + struct k3_ringacc_proxy_gcfg_regs __iomem *proxy_gcfg; > + void __iomem *proxy_target_base; > + u32 num_rings; /* number of rings in Ringacc module */ > + unsigned long *rings_inuse; > + struct ti_sci_resource *rm_gp_range; > + > + bool dma_ring_reset_quirk; > + u32 num_proxies; > + unsigned long *proxy_inuse; > + > + struct k3_ring *rings; > + struct list_head list; > + struct mutex req_lock; /* protect rings allocation */ > + > + const struct ti_sci_handle *tisci; > + const struct ti_sci_rm_ringacc_ops *tisci_ring_ops; > + u32 tisci_dev_id; This can be dropped no? pdev->id has it already. > +}; > + > +static long k3_ringacc_ring_get_fifo_pos(struct k3_ring *ring) > +{ > + return K3_RINGACC_FIFO_WINDOW_SIZE_BYTES - > + (4 << ring->elm_size); > +} > + > +static void *k3_ringacc_get_elm_addr(struct k3_ring *ring, u32 idx) > +{ > + return (ring->ring_mem_virt + idx * (4 << ring->elm_size)); > +} > + > +static int k3_ringacc_ring_push_mem(struct k3_ring *ring, void *elem); > +static int k3_ringacc_ring_pop_mem(struct k3_ring *ring, void *elem); > + > +static struct k3_ring_ops k3_ring_mode_ring_ops = { > + .push_tail = k3_ringacc_ring_push_mem, > + .pop_head = k3_ringacc_ring_pop_mem, > +}; > + > +static int k3_ringacc_ring_push_io(struct k3_ring *ring, void *elem); > +static int k3_ringacc_ring_pop_io(struct k3_ring *ring, void *elem); > +static int k3_ringacc_ring_push_head_io(struct k3_ring *ring, void *elem); > +static int k3_ringacc_ring_pop_tail_io(struct k3_ring *ring, void *elem); > + > +static struct k3_ring_ops k3_ring_mode_msg_ops = { > + .push_tail = k3_ringacc_ring_push_io, > + .push_head = k3_ringacc_ring_push_head_io, > + .pop_tail = k3_ringacc_ring_pop_tail_io, > + .pop_head = k3_ringacc_ring_pop_io, > +}; > + > +static int k3_ringacc_ring_push_head_proxy(struct k3_ring *ring, void *elem); > +static int k3_ringacc_ring_push_tail_proxy(struct k3_ring *ring, void *elem); > +static int k3_ringacc_ring_pop_head_proxy(struct k3_ring *ring, void *elem); > +static int k3_ringacc_ring_pop_tail_proxy(struct k3_ring *ring, void *elem); > + > +static struct k3_ring_ops k3_ring_mode_proxy_ops = { > + .push_tail = k3_ringacc_ring_push_tail_proxy, > + .push_head = k3_ringacc_ring_push_head_proxy, > + .pop_tail = k3_ringacc_ring_pop_tail_proxy, > + .pop_head = k3_ringacc_ring_pop_head_proxy, > +}; > + > +static void k3_ringacc_ring_dump(struct k3_ring *ring) > +{ > + struct device *dev = ring->parent->dev; > + > + dev_dbg(dev, "dump ring: %d\n", ring->ring_id); > + dev_dbg(dev, "dump mem virt %p, dma %pad\n", ring->ring_mem_virt, > + &ring->ring_mem_dma); > + dev_dbg(dev, "dump elmsize %d, size %d, mode %d, proxy_id %d\n", > + ring->elm_size, ring->size, ring->mode, ring->proxy_id); > + > + dev_dbg(dev, "dump ring_rt_regs: db%08x\n", readl(&ring->rt->db)); > + dev_dbg(dev, "dump occ%08x\n", readl(&ring->rt->occ)); > + dev_dbg(dev, "dump indx%08x\n", readl(&ring->rt->indx)); > + dev_dbg(dev, "dump hwocc%08x\n", readl(&ring->rt->hwocc)); > + dev_dbg(dev, "dump hwindx%08x\n", readl(&ring->rt->hwindx)); > + > + if (ring->ring_mem_virt) > + print_hex_dump_debug("dump ring_mem_virt ", DUMP_PREFIX_NONE, > + 16, 1, ring->ring_mem_virt, 16 * 8, false); > +} > + > +struct k3_ring *k3_ringacc_request_ring(struct k3_ringacc *ringacc, > + int id, u32 flags) > +{ > + int proxy_id = K3_RINGACC_PROXY_NOT_USED; > + > + mutex_lock(&ringacc->req_lock); > + > + if (id == K3_RINGACC_RING_ID_ANY) { > + /* Request for any general purpose ring */ > + struct ti_sci_resource_desc *gp_rings = > + &ringacc->rm_gp_range->desc[0];> + unsigned long size; > + > + size = gp_rings->start + gp_rings->num; > + id = find_next_zero_bit(ringacc->rings_inuse, size, > + gp_rings->start); ti_sci_get_free resource can be used no? In case if id is passed, that bit alone can be set. > + if (id == size) > + goto error; > + } else if (id < 0) { > + goto error; > + } > + > + if (test_bit(id, ringacc->rings_inuse) && > + !(ringacc->rings[id].flags & K3_RING_FLAG_SHARED)) > + goto error; > + else if (ringacc->rings[id].flags & K3_RING_FLAG_SHARED) > + goto out; > + > + if (flags & K3_RINGACC_RING_USE_PROXY) { > + proxy_id = find_next_zero_bit(ringacc->proxy_inuse, > + ringacc->num_proxies, 0); May be a dump question, but how do we make sure that these proxies are not used by another Hosts? > + if (proxy_id == ringacc->num_proxies) > + goto error; > + } > + > + if (!try_module_get(ringacc->dev->driver->owner)) > + goto error; > + > + if (proxy_id != K3_RINGACC_PROXY_NOT_USED) { > + set_bit(proxy_id, ringacc->proxy_inuse); > + ringacc->rings[id].proxy_id = proxy_id; > + dev_dbg(ringacc->dev, "Giving ring#%d proxy#%d\n", id, > + proxy_id); > + } else { > + dev_dbg(ringacc->dev, "Giving ring#%d\n", id); > + } > + > + set_bit(id, ringacc->rings_inuse); > +out: > + ringacc->rings[id].use_count++; > + mutex_unlock(&ringacc->req_lock); > + return &ringacc->rings[id]; > + > +error: > + mutex_unlock(&ringacc->req_lock); > + return NULL; > +} > +EXPORT_SYMBOL_GPL(k3_ringacc_request_ring); > + [..snip..] > + > +static int k3_ringacc_probe(struct platform_device *pdev) > +{ > + struct k3_ringacc *ringacc; > + void __iomem *base_fifo, *base_rt; > + struct device *dev = &pdev->dev; > + struct resource *res; > + int ret, i; > + > + ringacc = devm_kzalloc(dev, sizeof(*ringacc), GFP_KERNEL); > + if (!ringacc) > + return -ENOMEM; > + > + ringacc->dev = dev; > + mutex_init(&ringacc->req_lock); > + > + dev->msi_domain = of_msi_get_domain(dev, dev->of_node, > + DOMAIN_BUS_TI_SCI_INTA_MSI); > + if (!dev->msi_domain) { > + dev_err(dev, "Failed to get MSI domain\n"); > + return -EPROBE_DEFER; > + } > + > + ret = k3_ringacc_probe_dt(ringacc); > + if (ret) > + return ret; > + > + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "rt"); > + base_rt = devm_ioremap_resource(dev, res); > + if (IS_ERR(base_rt)) > + return PTR_ERR(base_rt); > + > + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "fifos"); > + base_fifo = devm_ioremap_resource(dev, res); > + if (IS_ERR(base_fifo)) > + return PTR_ERR(base_fifo); > + > + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "proxy_gcfg"); > + ringacc->proxy_gcfg = devm_ioremap_resource(dev, res); > + if (IS_ERR(ringacc->proxy_gcfg)) > + return PTR_ERR(ringacc->proxy_gcfg); > + > + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, > + "proxy_target"); > + ringacc->proxy_target_base = devm_ioremap_resource(dev, res); > + if (IS_ERR(ringacc->proxy_target_base)) > + return PTR_ERR(ringacc->proxy_target_base); > + > + ringacc->num_proxies = readl(&ringacc->proxy_gcfg->config) & > + K3_RINGACC_PROXY_CFG_THREADS_MASK; > + > + ringacc->rings = devm_kzalloc(dev, > + sizeof(*ringacc->rings) * > + ringacc->num_rings, > + GFP_KERNEL); > + ringacc->rings_inuse = devm_kcalloc(dev, > + BITS_TO_LONGS(ringacc->num_rings), > + sizeof(unsigned long), GFP_KERNEL); > + ringacc->proxy_inuse = devm_kcalloc(dev, > + BITS_TO_LONGS(ringacc->num_proxies), > + sizeof(unsigned long), GFP_KERNEL); > + > + if (!ringacc->rings || !ringacc->rings_inuse || !ringacc->proxy_inuse) > + return -ENOMEM; > + > + for (i = 0; i < ringacc->num_rings; i++) { > + ringacc->rings[i].rt = base_rt + > + K3_RINGACC_RT_REGS_STEP * i; > + ringacc->rings[i].fifos = base_fifo + > + K3_RINGACC_FIFO_REGS_STEP * i; > + ringacc->rings[i].parent = ringacc; > + ringacc->rings[i].ring_id = i; > + ringacc->rings[i].proxy_id = K3_RINGACC_PROXY_NOT_USED; > + } > + dev_set_drvdata(dev, ringacc); > + > + ringacc->tisci_ring_ops = &ringacc->tisci->ops.rm_ring_ops; > + > + pm_runtime_enable(dev); > + ret = pm_runtime_get_sync(dev); > + if (ret < 0) { > + pm_runtime_put_noidle(dev); > + dev_err(dev, "Failed to enable pm %d\n", ret); > + goto err; > + } Don't you need power-domains property in DT so that pm is actually working? If that is populated, dev-id can be derived from power-domains rather than a separate dt property. [...snip..] > diff --git a/include/linux/soc/ti/k3-ringacc.h b/include/linux/soc/ti/k3-ringacc.h > new file mode 100644 > index 000000000000..526b2e38fcce > --- /dev/null > +++ b/include/linux/soc/ti/k3-ringacc.h > @@ -0,0 +1,245 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > +/* > + * K3 Ring Accelerator (RA) subsystem interface > + * > + * Copyright (C) 2019 Texas Instruments Incorporated - http://www.ti.com > + */ > + > +#ifndef __SOC_TI_K3_RINGACC_API_H_ > +#define __SOC_TI_K3_RINGACC_API_H_ > + > +#include <linux/types.h> > + > +struct device_node; > + [...snip..] > + > +/** > + * k3_ringacc_ring_reset - ring reset > + * @ring: pointer on Ring > + * > + * Resets ring internal state ((hw)occ, (hw)idx). > + * TODO_GS: ? Ring can be reused without reconfiguration TODO_GS? Thanks and regards, Lokesh