On Wed, Feb 22, 2017 at 4:31 AM, Sricharan <sricharan@xxxxxxxxxxxxxx> wrote: > Hi Rob, > >>diff --git a/Documentation/devicetree/bindings/iommu/qcom,iommu.txt >>b/Documentation/devicetree/bindings/iommu/qcom,iommu.txt >>new file mode 100644 >>index 0000000..78a8d65 >>--- /dev/null >>+++ b/Documentation/devicetree/bindings/iommu/qcom,iommu.txt >>@@ -0,0 +1,45 @@ >>+* QCOM IOMMU Implementation >>+ >>+Qualcomm "B" family devices which are not compatible with arm-smmu have >>+a similar looking IOMMU but without access to the global register space. >>+This is modelled as separate IOMMU devices which have just a single >>+master. >>+ >>+** Required properties: >>+ >>+- compatible : Should be one of: >>+ >>+ "qcom,msm8916-iommu-context-bank" >>+ >>+ depending on the particular implementation and/or the >>+ version of the architecture implemented. >>+ >>+- reg : Base address and size of the SMMU. And optionally, >>+ if present, the "smmu_local_base" >>+ >>+- interrupts : The context fault irq. >>+ >>+- #iommu-cells : Must be 0 >>+ >>+- qcom,iommu-ctx-asid : context ASID >>+ >>+- qcom,iommu-secure-id : secure-id >>+ >>+- clocks : The interface clock (iface_clk) and bus clock (bus_clk) >>+ >>+** Examples: >>+ >>+ mdp_iommu: iommu-context-bank@1e24000 { >>+ compatible = "qcom,msm8916-iommu-context-bank"; >>+ reg = <0x1e24000 0x1000 >>+ 0x1ef0000 0x3000>; >>+ reg-names = "iommu_base", "smmu_local_base"; >>+ interrupts = <GIC_SPI 70 0>; >>+ qcom,iommu-ctx-asid = <4>; >>+ qcom,iommu-secure-id = <17>; > > This is not an per context bank property and can be programmed for an > given iommu only once. So we call qcom_iommu_sec_init for > each context bank once, which does not look correct. Similarly for > smmu_local_base as well. So should this be handled using an global > once for all contexts ? yeah, smmu_local_base and secure-id would be duplicate for all context banks that are part of the same actual iommu. (But it was Robin's suggestion to just model this as separate context-bank devices, since we cannot touch the global space). Did I misunderstand the downstream driver code? It looked like qcom_scm_restore_sec_cfg() was called once on first attach per context-bank, not globally for the entire iommu, which is what I'm doing with this driver. But I haven't yet tried to enable other context-banks in the apps iommu yet. >>+ #iommu-cells = <0>; >>+ clocks = <&gcc GCC_SMMU_CFG_CLK>, >>+ <&gcc GCC_APSS_TCU_CLK>; >>+ clock-names = "iface_clk", "bus_clk"; > > I am trying to generalize the clock bindings for MMU-500 and one more > qcom specific. Anyways this can follow that. no problem to adapt to what you come up with for arm-smmu, it is basically the same requirements. >>+ status = "okay"; > > <..> > >>+#define pr_fmt(fmt) "qcom-iommu: " fmt >>+ >>+#include <linux/atomic.h> >>+#include <linux/clk.h> >>+#include <linux/delay.h> >>+#include <linux/dma-iommu.h> >>+#include <linux/dma-mapping.h> >>+#include <linux/err.h> >>+#include <linux/interrupt.h> >>+#include <linux/io.h> >>+#include <linux/io-64-nonatomic-hi-lo.h> >>+#include <linux/iommu.h> >>+#include <linux/iopoll.h> >>+#include <linux/module.h> >>+#include <linux/of.h> >>+#include <linux/of_address.h> >>+#include <linux/of_device.h> >>+#include <linux/of_iommu.h> >>+#include <linux/platform_device.h> >>+#include <linux/pm_runtime.h> >>+#include <linux/qcom_scm.h> >>+#include <linux/slab.h> >>+#include <linux/spinlock.h> >>+ >>+#include "io-pgtable.h" >>+#include "arm-smmu-regs.h" >>+ >>+// TODO are these qcom specific, or just something no one bothered to add to arm-smmu >>+#define SMMU_CB_TLBSYNC 0x7f0 >>+#define SMMU_CB_TLBSTATUS 0x7f4 > > I think the reason was in arm-smmu, we are using the global TLBSYNC/STATUS bits, as its > used in both global device reset and flush path. Otherwise here, its correct to add this. ok, that is what I suspected.. in next version I'll add these two to the shared header instead >>+#define SMMU_INTR_SEL_NS 0x2000 >>+ >>+ >>+struct qcom_iommu_device { >>+ struct device *dev; >>+ >>+ void __iomem *base; >>+ void __iomem *local_base; >>+ unsigned int irq; >>+ struct clk *iface_clk; >>+ struct clk *bus_clk; >>+ >>+ bool secure_init; >>+ u32 asid; /* asid and ctx bank # are 1:1 */ >>+ u32 sec_id; >>+ >>+ /* single group per device: */ >>+ struct iommu_group *group; >>+}; >>+ >>+struct qcom_iommu_domain { >>+ struct qcom_iommu_device *iommu; >>+ struct io_pgtable_ops *pgtbl_ops; >>+ spinlock_t pgtbl_lock; >>+ struct mutex init_mutex; /* Protects iommu pointer */ >>+ struct iommu_domain domain; >>+}; >>+ >>+static struct qcom_iommu_domain *to_qcom_iommu_domain(struct iommu_domain *dom) >>+{ >>+ return container_of(dom, struct qcom_iommu_domain, domain); >>+} >>+ >>+static const struct iommu_ops qcom_iommu_ops; >>+static struct platform_driver qcom_iommu_driver; >>+ >>+static struct qcom_iommu_device * dev_to_iommu(struct device *dev) >>+{ >>+ struct iommu_fwspec *fwspec = dev->iommu_fwspec; >>+ if (WARN_ON(!fwspec || fwspec->ops != &qcom_iommu_ops)) >>+ return NULL; >>+ return fwspec->iommu_priv; >>+} >>+ >>+static inline void >>+iommu_writel(struct qcom_iommu_device *qcom_iommu, unsigned reg, u32 val) >>+{ >>+ writel_relaxed(val, qcom_iommu->base + reg); >>+} >>+ >>+static inline void >>+iommu_writeq(struct qcom_iommu_device *qcom_iommu, unsigned reg, u64 val) >>+{ >>+ writeq_relaxed(val, qcom_iommu->base + reg); >>+} >>+ >>+static inline u32 >>+iommu_readl(struct qcom_iommu_device *qcom_iommu, unsigned reg) >>+{ >>+ return readl_relaxed(qcom_iommu->base + reg); >>+} >>+ >>+static inline u32 >>+iommu_readq(struct qcom_iommu_device *qcom_iommu, unsigned reg) >>+{ >>+ return readq_relaxed(qcom_iommu->base + reg); >>+} >>+ >>+static void __sync_tlb(struct qcom_iommu_device *qcom_iommu) >>+{ >>+ unsigned int val; >>+ unsigned int ret; >>+ >>+ iommu_writel(qcom_iommu, SMMU_CB_TLBSYNC, 0); >>+ >>+ ret = readl_poll_timeout(qcom_iommu->base + SMMU_CB_TLBSTATUS, val, >>+ (val & 0x1) == 0, 0, 5000000); >>+ if (ret) >>+ dev_err(qcom_iommu->dev, "timeout waiting for TLB SYNC\n"); >>+} >>+ >>+ >>+static void qcom_iommu_tlb_sync (void *cookie) >>+{ >>+ struct qcom_iommu_device *qcom_iommu = cookie; >>+ __sync_tlb(qcom_iommu); >>+} >>+ >>+static void qcom_iommu_tlb_inv_context(void *cookie) >>+{ >>+ struct qcom_iommu_device *qcom_iommu = cookie; >>+ >>+ iommu_writel(qcom_iommu, ARM_SMMU_CB_S1_TLBIASID, qcom_iommu->asid); >>+ __sync_tlb(qcom_iommu); >>+} >>+ >>+static void qcom_iommu_tlb_inv_range_nosync(unsigned long iova, size_t size, >>+ size_t granule, bool leaf, void *cookie) >>+{ >>+ struct qcom_iommu_device *qcom_iommu = cookie; >>+ unsigned reg; >>+ >>+ reg = leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA; >>+ >>+ /* TODO do we need to support aarch64 fmt too? */ >>+ >>+ iova >>= 12; >>+ iova |= (u64)qcom_iommu->asid << 48; >>+ do { >>+ iommu_writeq(qcom_iommu, reg, iova); >>+ iova += granule >> 12; >>+ } while (size -= granule); > > Is this not for ARCH64 format ?, i see that the arm-smmu does this when the > format is ARCH64. This is what you mentioned as fixed in V2, otherwise. yup :-) >>+} >>+ >>+static const struct iommu_gather_ops qcom_gather_ops = { >>+ .tlb_flush_all = qcom_iommu_tlb_inv_context, >>+ .tlb_add_flush = qcom_iommu_tlb_inv_range_nosync, >>+ .tlb_sync = qcom_iommu_tlb_sync, >>+}; >>+ >>+static irqreturn_t qcom_iommu_fault(int irq, void *dev) >>+{ >>+ struct qcom_iommu_device *qcom_iommu = dev; >>+ u32 fsr, fsynr; >>+ unsigned long iova; >>+ >>+ fsr = iommu_readl(qcom_iommu, ARM_SMMU_CB_FSR); >>+ >>+ if (!(fsr & FSR_FAULT)) >>+ return IRQ_NONE; >>+ >>+ fsynr = iommu_readl(qcom_iommu, ARM_SMMU_CB_FSYNR0); >>+ iova = iommu_readq(qcom_iommu, ARM_SMMU_CB_FAR); >>+ >>+ dev_err_ratelimited(qcom_iommu->dev, >>+ "Unhandled context fault: fsr=0x%x, " >>+ "iova=0x%08lx, fsynr=0x%x, cb=%d\n", >>+ fsr, iova, fsynr, qcom_iommu->asid); >>+ >>+ iommu_writel(qcom_iommu, ARM_SMMU_CB_FSR, fsr); >>+ >>+ return IRQ_HANDLED; >>+} >>+ >>+static int qcom_iommu_sec_init(struct qcom_iommu_device *qcom_iommu) >>+{ >>+ if (qcom_iommu->local_base) { >>+ writel_relaxed(0xffffffff, qcom_iommu->local_base + SMMU_INTR_SEL_NS); >>+ mb(); >>+ } >>+ >>+ return qcom_scm_restore_sec_cfg(qcom_iommu->sec_id, qcom_iommu->asid); >>+} >>+ >>+ >>+static int qcom_iommu_init_domain_context(struct iommu_domain *domain, >>+ struct qcom_iommu_device *qcom_iommu) >>+{ >>+ struct qcom_iommu_domain *qcom_domain = to_qcom_iommu_domain(domain); >>+ struct io_pgtable_ops *pgtbl_ops; >>+ struct io_pgtable_cfg pgtbl_cfg; >>+ int ret = 0; >>+ u32 reg; >>+ >>+ mutex_lock(&qcom_domain->init_mutex); >>+ if (qcom_domain->iommu) >>+ goto out_unlock; >>+ >>+ /* >>+ * TODO do we need to make the pagetable format configurable to >>+ * support other devices? Is deciding based on compat string >>+ * sufficient? >>+ */ > > The problem in choosing the pagetable format is, the firmware has set the > format for CBA2R to ARM_32_LPAE_S1 as default. So that register has to be changed > using some scm api to choose 64bit format, if we have to support some device. > But also, there is no way for an device to pass in this option either. Downstream driver > was never enabling 64bit format for any devices. So i feel, we can > introduce the support for 64bit format additionally if required. ok, if firmware is using ARM_32_LPAE_S1 for everything (and I guess that makes sense since *most* of the devices that would use this are armv7) then I'll just leave it as-is. Otherwise I think we'd need a dt property to know how firmware was configured, or pick from compat string. >>+ >>+ pgtbl_cfg = (struct io_pgtable_cfg) { >>+ .pgsize_bitmap = qcom_iommu_ops.pgsize_bitmap, >>+ .ias = 32, >>+ .oas = 40, >>+ .tlb = &qcom_gather_ops, >>+ .iommu_dev = qcom_iommu->dev, >>+ }; >>+ >>+ qcom_domain->iommu = qcom_iommu; >>+ pgtbl_ops = alloc_io_pgtable_ops(ARM_32_LPAE_S1, &pgtbl_cfg, qcom_iommu); >>+ if (!pgtbl_ops) { >>+ dev_err(qcom_iommu->dev, "failed to allocate pagetable ops\n"); >>+ ret = -ENOMEM; >>+ goto out_clear_iommu; >>+ } >>+ >>+ /* Update the domain's page sizes to reflect the page table format */ >>+ domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap; >>+ domain->geometry.aperture_end = (1UL << 48) - 1; >>+ domain->geometry.force_aperture = true; >>+ >>+ if (!qcom_iommu->secure_init) { >>+ ret = qcom_iommu_sec_init(qcom_iommu); >>+ if (ret) { >>+ dev_err(qcom_iommu->dev, "secure init failed: %d\n", ret); >>+ goto out_clear_iommu; >>+ } >>+ qcom_iommu->secure_init = true; >>+ } >>+ >>+ /* TTBRs */ >>+ iommu_writeq(qcom_iommu, ARM_SMMU_CB_TTBR0, >>+ pgtbl_cfg.arm_lpae_s1_cfg.ttbr[0] | >>+ ((u64)qcom_iommu->asid << TTBRn_ASID_SHIFT)); >>+ iommu_writeq(qcom_iommu, ARM_SMMU_CB_TTBR1, >>+ pgtbl_cfg.arm_lpae_s1_cfg.ttbr[1] | >>+ ((u64)qcom_iommu->asid << TTBRn_ASID_SHIFT)); >>+ >>+ /* TTBCR */ >>+ iommu_writel(qcom_iommu, ARM_SMMU_CB_TTBCR2, >>+ (pgtbl_cfg.arm_lpae_s1_cfg.tcr >> 32) | >>+ TTBCR2_SEP_UPSTREAM); >>+ iommu_writel(qcom_iommu, ARM_SMMU_CB_TTBCR, >>+ pgtbl_cfg.arm_lpae_s1_cfg.tcr); >>+ >>+ /* MAIRs (stage-1 only) */ >>+ iommu_writel(qcom_iommu, ARM_SMMU_CB_S1_MAIR0, >>+ pgtbl_cfg.arm_lpae_s1_cfg.mair[0]); >>+ iommu_writel(qcom_iommu, ARM_SMMU_CB_S1_MAIR1, >>+ pgtbl_cfg.arm_lpae_s1_cfg.mair[1]); >>+ >>+ /* SCTLR */ >>+ reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_AFE | SCTLR_TRE | SCTLR_M | >>+ SCTLR_S1_ASIDPNE; >>+#ifdef __BIG_ENDIAN >>+ reg |= SCTLR_E; >>+#endif >>+ iommu_writel(qcom_iommu, ARM_SMMU_CB_SCTLR, reg); >>+ >>+ mutex_unlock(&qcom_domain->init_mutex); >>+ >>+ /* Publish page table ops for map/unmap */ >>+ qcom_domain->pgtbl_ops = pgtbl_ops; >>+ >>+ return 0; >>+ >>+out_clear_iommu: >>+ qcom_domain->iommu = NULL; >>+out_unlock: >>+ mutex_unlock(&qcom_domain->init_mutex); >>+ return ret; >>+} >>+ >>+static struct iommu_domain *qcom_iommu_domain_alloc(unsigned type) >>+{ >>+ struct qcom_iommu_domain *qcom_domain; >>+ >>+ if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA) >>+ return NULL; >>+ /* >>+ * Allocate the domain and initialise some of its data structures. >>+ * We can't really do anything meaningful until we've added a >>+ * master. >>+ */ >>+ qcom_domain = kzalloc(sizeof(*qcom_domain), GFP_KERNEL); >>+ if (!qcom_domain) >>+ return NULL; >>+ >>+ if (type == IOMMU_DOMAIN_DMA && >>+ iommu_get_dma_cookie(&qcom_domain->domain)) { >>+ kfree(qcom_domain); >>+ return NULL; >>+ } >>+ >>+ mutex_init(&qcom_domain->init_mutex); >>+ spin_lock_init(&qcom_domain->pgtbl_lock); >>+ >>+ return &qcom_domain->domain; >>+} >>+ >>+static void qcom_iommu_domain_free(struct iommu_domain *domain) >>+{ >>+ struct qcom_iommu_domain *qcom_domain = to_qcom_iommu_domain(domain); >>+ struct qcom_iommu_device *qcom_iommu = qcom_domain->iommu; >>+ >>+ if (!qcom_iommu) >>+ return; >>+ >>+ /* >>+ * Free the domain resources. We assume that all devices have >>+ * already been detached. >>+ */ >>+ iommu_put_dma_cookie(domain); >>+ >>+ /* >>+ * Disable the context bank before freeing page table >>+ */ >>+ iommu_writel(qcom_iommu, ARM_SMMU_CB_SCTLR, 0); >>+ > > We need to add a pm_runtime here as well, at this point the device_link between > master and smmu might not be there any more. ok >>+ free_io_pgtable_ops(qcom_domain->pgtbl_ops); >>+ >>+ kfree(qcom_domain); >>+} >>+ > > <..> > >>+static int qcom_iommu_of_xlate(struct device *dev, struct of_phandle_args *args) >>+{ >>+ struct platform_device *iommu_pdev; >>+ u32 fwid = 0; >>+ >>+ if (args->args_count != 0) { >>+ dev_err(dev, "incorrect number of iommu params found for %s " >>+ "(found %d, expected 0)\n", >>+ args->np->full_name, args->args_count); >>+ return -EINVAL; >>+ } >>+ >>+ if (!dev->iommu_fwspec->iommu_priv) { >>+ iommu_pdev = of_find_device_by_node(args->np); >>+ if (WARN_ON(!iommu_pdev)) >>+ return -EINVAL; >>+ >>+ dev->iommu_fwspec->iommu_priv = platform_get_drvdata(iommu_pdev); > > This can be done as a part of the add_device callback as well. there seemed to be a mix in other drivers of doing this at _of_xlate() vs _add_device().. I wasn't really sure which was the new shiny way to do it vs legacy >>+ } >>+ >>+ return iommu_fwspec_add_ids(dev, &fwid, 1); > > This is not required, we do not have any fwid to add here. oh, ok >>+} >>+ >>+static const struct iommu_ops qcom_iommu_ops = { >>+ .capable = qcom_iommu_capable, >>+ .domain_alloc = qcom_iommu_domain_alloc, >>+ .domain_free = qcom_iommu_domain_free, >>+ .attach_dev = qcom_iommu_attach_dev, >>+ .map = qcom_iommu_map, >>+ .unmap = qcom_iommu_unmap, >>+ .map_sg = default_iommu_map_sg, >>+ .iova_to_phys = qcom_iommu_iova_to_phys, >>+ .add_device = qcom_iommu_add_device, >>+ .remove_device = qcom_iommu_remove_device, >>+ .device_group = qcom_iommu_device_group, >>+ .of_xlate = qcom_iommu_of_xlate, >>+ .pgsize_bitmap = SZ_4K | SZ_64K | SZ_1M | SZ_16M, >>+}; >>+ >>+static const struct of_device_id qcom_iommu_of_match[] = { >>+// TODO we probably need to use this driver (vs arm-smmu) for all the early >>+// "B" family devices prior to 8x96 or so.. so maybe having msm8916 in the >>+// compat name isn't right.. or maybe we just add a bunch more compat strings >>+// as needed? >>+ { .compatible = "qcom,msm8916-iommu-context-bank" }, > > Maybe, qcom,msm-sec-iommu-context-bank, meaning that this driver > is always for iommu which is secure and we will need an extra binding when > we try to add secure context banks as well. ok, esp. if all have same table format and we don't need to pick lpae-s1 vs aarch64 based on compat string then the more generic compatible makes sense. Thanks BR, -R -- To unsubscribe from this list: send the line "unsubscribe linux-arm-msm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html