Re: [PATCH 2/2] iommu: add qcom_iommu

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, Feb 22, 2017 at 4:31 AM, Sricharan <sricharan@xxxxxxxxxxxxxx> wrote:
> Hi Rob,
>
>>diff --git a/Documentation/devicetree/bindings/iommu/qcom,iommu.txt
>>b/Documentation/devicetree/bindings/iommu/qcom,iommu.txt
>>new file mode 100644
>>index 0000000..78a8d65
>>--- /dev/null
>>+++ b/Documentation/devicetree/bindings/iommu/qcom,iommu.txt
>>@@ -0,0 +1,45 @@
>>+* QCOM IOMMU Implementation
>>+
>>+Qualcomm "B" family devices which are not compatible with arm-smmu have
>>+a similar looking IOMMU but without access to the global register space.
>>+This is modelled as separate IOMMU devices which have just a single
>>+master.
>>+
>>+** Required properties:
>>+
>>+- compatible    : Should be one of:
>>+
>>+                        "qcom,msm8916-iommu-context-bank"
>>+
>>+                  depending on the particular implementation and/or the
>>+                  version of the architecture implemented.
>>+
>>+- reg           : Base address and size of the SMMU.  And optionally,
>>+                  if present, the "smmu_local_base"
>>+
>>+- interrupts    : The context fault irq.
>>+
>>+- #iommu-cells  : Must be 0
>>+
>>+- qcom,iommu-ctx-asid   : context ASID
>>+
>>+- qcom,iommu-secure-id  : secure-id
>>+
>>+- clocks        : The interface clock (iface_clk) and bus clock (bus_clk)
>>+
>>+** Examples:
>>+
>>+      mdp_iommu: iommu-context-bank@1e24000 {
>>+              compatible = "qcom,msm8916-iommu-context-bank";
>>+              reg = <0x1e24000 0x1000
>>+                      0x1ef0000 0x3000>;
>>+              reg-names = "iommu_base", "smmu_local_base";
>>+              interrupts = <GIC_SPI 70 0>;
>>+              qcom,iommu-ctx-asid = <4>;
>>+              qcom,iommu-secure-id = <17>;
>
> This is not an per context bank property and can be programmed for an
> given iommu only once. So we call qcom_iommu_sec_init for
> each context bank once, which does not look correct. Similarly for
> smmu_local_base as well. So should this be handled using an global
> once for all contexts ?

yeah, smmu_local_base and secure-id would be duplicate for all context
banks that are part of the same actual iommu.  (But it was Robin's
suggestion to just model this as separate context-bank devices, since
we cannot touch the global space).

Did I misunderstand the downstream driver code?  It looked like
qcom_scm_restore_sec_cfg() was called once on first attach per
context-bank, not globally for the entire iommu, which is what I'm
doing with this driver.  But I haven't yet tried to enable other
context-banks in the apps iommu yet.

>>+              #iommu-cells = <0>;
>>+              clocks = <&gcc GCC_SMMU_CFG_CLK>,
>>+                       <&gcc GCC_APSS_TCU_CLK>;
>>+              clock-names = "iface_clk", "bus_clk";
>
> I am trying to generalize the clock bindings for MMU-500 and one more
> qcom specific. Anyways this can follow that.

no problem to adapt to what you come up with for arm-smmu, it is
basically the same requirements.

>>+              status = "okay";
>
> <..>
>
>>+#define pr_fmt(fmt) "qcom-iommu: " fmt
>>+
>>+#include <linux/atomic.h>
>>+#include <linux/clk.h>
>>+#include <linux/delay.h>
>>+#include <linux/dma-iommu.h>
>>+#include <linux/dma-mapping.h>
>>+#include <linux/err.h>
>>+#include <linux/interrupt.h>
>>+#include <linux/io.h>
>>+#include <linux/io-64-nonatomic-hi-lo.h>
>>+#include <linux/iommu.h>
>>+#include <linux/iopoll.h>
>>+#include <linux/module.h>
>>+#include <linux/of.h>
>>+#include <linux/of_address.h>
>>+#include <linux/of_device.h>
>>+#include <linux/of_iommu.h>
>>+#include <linux/platform_device.h>
>>+#include <linux/pm_runtime.h>
>>+#include <linux/qcom_scm.h>
>>+#include <linux/slab.h>
>>+#include <linux/spinlock.h>
>>+
>>+#include "io-pgtable.h"
>>+#include "arm-smmu-regs.h"
>>+
>>+// TODO are these qcom specific, or just something no one bothered to add to arm-smmu
>>+#define SMMU_CB_TLBSYNC      0x7f0
>>+#define SMMU_CB_TLBSTATUS    0x7f4
>
> I think the reason was in arm-smmu, we are using the global TLBSYNC/STATUS bits, as its
> used in both global device reset and flush path. Otherwise here, its correct to add this.

ok, that is what I suspected.. in next version I'll add these two to
the shared header instead

>>+#define SMMU_INTR_SEL_NS     0x2000
>>+
>>+
>>+struct qcom_iommu_device {
>>+      struct device           *dev;
>>+
>>+      void __iomem            *base;
>>+      void __iomem            *local_base;
>>+      unsigned int             irq;
>>+      struct clk              *iface_clk;
>>+      struct clk              *bus_clk;
>>+
>>+      bool                     secure_init;
>>+      u32                      asid;      /* asid and ctx bank # are 1:1 */
>>+      u32                      sec_id;
>>+
>>+      /* single group per device: */
>>+      struct iommu_group      *group;
>>+};
>>+
>>+struct qcom_iommu_domain {
>>+      struct qcom_iommu_device        *iommu;
>>+      struct io_pgtable_ops           *pgtbl_ops;
>>+      spinlock_t                       pgtbl_lock;
>>+      struct mutex                     init_mutex; /* Protects iommu pointer */
>>+      struct iommu_domain              domain;
>>+};
>>+
>>+static struct qcom_iommu_domain *to_qcom_iommu_domain(struct iommu_domain *dom)
>>+{
>>+      return container_of(dom, struct qcom_iommu_domain, domain);
>>+}
>>+
>>+static const struct iommu_ops qcom_iommu_ops;
>>+static struct platform_driver qcom_iommu_driver;
>>+
>>+static struct qcom_iommu_device * dev_to_iommu(struct device *dev)
>>+{
>>+      struct iommu_fwspec *fwspec = dev->iommu_fwspec;
>>+      if (WARN_ON(!fwspec || fwspec->ops != &qcom_iommu_ops))
>>+              return NULL;
>>+      return fwspec->iommu_priv;
>>+}
>>+
>>+static inline void
>>+iommu_writel(struct qcom_iommu_device *qcom_iommu, unsigned reg, u32 val)
>>+{
>>+      writel_relaxed(val, qcom_iommu->base + reg);
>>+}
>>+
>>+static inline void
>>+iommu_writeq(struct qcom_iommu_device *qcom_iommu, unsigned reg, u64 val)
>>+{
>>+      writeq_relaxed(val, qcom_iommu->base + reg);
>>+}
>>+
>>+static inline u32
>>+iommu_readl(struct qcom_iommu_device *qcom_iommu, unsigned reg)
>>+{
>>+      return readl_relaxed(qcom_iommu->base + reg);
>>+}
>>+
>>+static inline u32
>>+iommu_readq(struct qcom_iommu_device *qcom_iommu, unsigned reg)
>>+{
>>+      return readq_relaxed(qcom_iommu->base + reg);
>>+}
>>+
>>+static void __sync_tlb(struct qcom_iommu_device *qcom_iommu)
>>+{
>>+      unsigned int val;
>>+      unsigned int ret;
>>+
>>+      iommu_writel(qcom_iommu, SMMU_CB_TLBSYNC, 0);
>>+
>>+      ret = readl_poll_timeout(qcom_iommu->base + SMMU_CB_TLBSTATUS, val,
>>+                               (val & 0x1) == 0, 0, 5000000);
>>+      if (ret)
>>+              dev_err(qcom_iommu->dev, "timeout waiting for TLB SYNC\n");
>>+}
>>+
>>+
>>+static void qcom_iommu_tlb_sync (void *cookie)
>>+{
>>+      struct qcom_iommu_device *qcom_iommu = cookie;
>>+      __sync_tlb(qcom_iommu);
>>+}
>>+
>>+static void qcom_iommu_tlb_inv_context(void *cookie)
>>+{
>>+      struct qcom_iommu_device *qcom_iommu = cookie;
>>+
>>+      iommu_writel(qcom_iommu, ARM_SMMU_CB_S1_TLBIASID, qcom_iommu->asid);
>>+      __sync_tlb(qcom_iommu);
>>+}
>>+
>>+static void qcom_iommu_tlb_inv_range_nosync(unsigned long iova, size_t size,
>>+                                          size_t granule, bool leaf, void *cookie)
>>+{
>>+      struct qcom_iommu_device *qcom_iommu = cookie;
>>+      unsigned reg;
>>+
>>+      reg = leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA;
>>+
>>+      /* TODO do we need to support aarch64 fmt too? */
>>+
>>+      iova >>= 12;
>>+      iova |= (u64)qcom_iommu->asid << 48;
>>+      do {
>>+              iommu_writeq(qcom_iommu, reg, iova);
>>+              iova += granule >> 12;
>>+      } while (size -= granule);
>
> Is this not for ARCH64 format ?, i see that the arm-smmu does this when the
> format is ARCH64. This is what you mentioned as fixed in V2, otherwise.

yup :-)

>>+}
>>+
>>+static const struct iommu_gather_ops qcom_gather_ops = {
>>+      .tlb_flush_all  = qcom_iommu_tlb_inv_context,
>>+      .tlb_add_flush  = qcom_iommu_tlb_inv_range_nosync,
>>+      .tlb_sync       = qcom_iommu_tlb_sync,
>>+};
>>+
>>+static irqreturn_t qcom_iommu_fault(int irq, void *dev)
>>+{
>>+      struct qcom_iommu_device *qcom_iommu = dev;
>>+      u32 fsr, fsynr;
>>+      unsigned long iova;
>>+
>>+      fsr = iommu_readl(qcom_iommu, ARM_SMMU_CB_FSR);
>>+
>>+      if (!(fsr & FSR_FAULT))
>>+              return IRQ_NONE;
>>+
>>+      fsynr = iommu_readl(qcom_iommu, ARM_SMMU_CB_FSYNR0);
>>+      iova = iommu_readq(qcom_iommu, ARM_SMMU_CB_FAR);
>>+
>>+      dev_err_ratelimited(qcom_iommu->dev,
>>+                          "Unhandled context fault: fsr=0x%x, "
>>+                          "iova=0x%08lx, fsynr=0x%x, cb=%d\n",
>>+                          fsr, iova, fsynr, qcom_iommu->asid);
>>+
>>+      iommu_writel(qcom_iommu, ARM_SMMU_CB_FSR, fsr);
>>+
>>+      return IRQ_HANDLED;
>>+}
>>+
>>+static int qcom_iommu_sec_init(struct qcom_iommu_device *qcom_iommu)
>>+{
>>+      if (qcom_iommu->local_base) {
>>+              writel_relaxed(0xffffffff, qcom_iommu->local_base + SMMU_INTR_SEL_NS);
>>+              mb();
>>+      }
>>+
>>+      return qcom_scm_restore_sec_cfg(qcom_iommu->sec_id, qcom_iommu->asid);
>>+}
>>+
>>+
>>+static int qcom_iommu_init_domain_context(struct iommu_domain *domain,
>>+                                        struct qcom_iommu_device *qcom_iommu)
>>+{
>>+      struct qcom_iommu_domain *qcom_domain = to_qcom_iommu_domain(domain);
>>+      struct io_pgtable_ops *pgtbl_ops;
>>+      struct io_pgtable_cfg pgtbl_cfg;
>>+      int ret = 0;
>>+      u32 reg;
>>+
>>+      mutex_lock(&qcom_domain->init_mutex);
>>+      if (qcom_domain->iommu)
>>+              goto out_unlock;
>>+
>>+      /*
>>+       * TODO do we need to make the pagetable format configurable to
>>+       * support other devices?  Is deciding based on compat string
>>+       * sufficient?
>>+       */
>
> The problem in choosing the pagetable format is, the firmware has set the
> format for CBA2R to ARM_32_LPAE_S1 as default. So that register has to be changed
> using some scm api to choose 64bit format, if we have to support some device.
> But also, there is no way for an device to pass in this option either. Downstream driver
> was never enabling 64bit format for any devices. So i feel, we can
> introduce the support for 64bit format additionally if required.

ok, if firmware is using ARM_32_LPAE_S1 for everything (and I guess
that makes sense since *most* of the devices that would use this are
armv7) then I'll just leave it as-is.  Otherwise I think we'd need a
dt property to know how firmware was configured, or pick from compat
string.

>>+
>>+      pgtbl_cfg = (struct io_pgtable_cfg) {
>>+              .pgsize_bitmap  = qcom_iommu_ops.pgsize_bitmap,
>>+              .ias            = 32,
>>+              .oas            = 40,
>>+              .tlb            = &qcom_gather_ops,
>>+              .iommu_dev      = qcom_iommu->dev,
>>+      };
>>+
>>+      qcom_domain->iommu = qcom_iommu;
>>+      pgtbl_ops = alloc_io_pgtable_ops(ARM_32_LPAE_S1, &pgtbl_cfg, qcom_iommu);
>>+      if (!pgtbl_ops) {
>>+              dev_err(qcom_iommu->dev, "failed to allocate pagetable ops\n");
>>+              ret = -ENOMEM;
>>+              goto out_clear_iommu;
>>+      }
>>+
>>+      /* Update the domain's page sizes to reflect the page table format */
>>+      domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
>>+      domain->geometry.aperture_end = (1UL << 48) - 1;
>>+      domain->geometry.force_aperture = true;
>>+
>>+      if (!qcom_iommu->secure_init) {
>>+              ret = qcom_iommu_sec_init(qcom_iommu);
>>+              if (ret) {
>>+                      dev_err(qcom_iommu->dev, "secure init failed: %d\n", ret);
>>+                      goto out_clear_iommu;
>>+              }
>>+              qcom_iommu->secure_init = true;
>>+      }
>>+
>>+      /* TTBRs */
>>+      iommu_writeq(qcom_iommu, ARM_SMMU_CB_TTBR0,
>>+                   pgtbl_cfg.arm_lpae_s1_cfg.ttbr[0] |
>>+                   ((u64)qcom_iommu->asid << TTBRn_ASID_SHIFT));
>>+      iommu_writeq(qcom_iommu, ARM_SMMU_CB_TTBR1,
>>+                   pgtbl_cfg.arm_lpae_s1_cfg.ttbr[1] |
>>+                   ((u64)qcom_iommu->asid << TTBRn_ASID_SHIFT));
>>+
>>+      /* TTBCR */
>>+      iommu_writel(qcom_iommu, ARM_SMMU_CB_TTBCR2,
>>+                   (pgtbl_cfg.arm_lpae_s1_cfg.tcr >> 32) |
>>+                   TTBCR2_SEP_UPSTREAM);
>>+      iommu_writel(qcom_iommu, ARM_SMMU_CB_TTBCR,
>>+                   pgtbl_cfg.arm_lpae_s1_cfg.tcr);
>>+
>>+      /* MAIRs (stage-1 only) */
>>+      iommu_writel(qcom_iommu, ARM_SMMU_CB_S1_MAIR0,
>>+                   pgtbl_cfg.arm_lpae_s1_cfg.mair[0]);
>>+      iommu_writel(qcom_iommu, ARM_SMMU_CB_S1_MAIR1,
>>+                   pgtbl_cfg.arm_lpae_s1_cfg.mair[1]);
>>+
>>+      /* SCTLR */
>>+      reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_AFE | SCTLR_TRE | SCTLR_M |
>>+              SCTLR_S1_ASIDPNE;
>>+#ifdef __BIG_ENDIAN
>>+      reg |= SCTLR_E;
>>+#endif
>>+      iommu_writel(qcom_iommu, ARM_SMMU_CB_SCTLR, reg);
>>+
>>+      mutex_unlock(&qcom_domain->init_mutex);
>>+
>>+      /* Publish page table ops for map/unmap */
>>+      qcom_domain->pgtbl_ops = pgtbl_ops;
>>+
>>+      return 0;
>>+
>>+out_clear_iommu:
>>+      qcom_domain->iommu = NULL;
>>+out_unlock:
>>+      mutex_unlock(&qcom_domain->init_mutex);
>>+      return ret;
>>+}
>>+
>>+static struct iommu_domain *qcom_iommu_domain_alloc(unsigned type)
>>+{
>>+      struct qcom_iommu_domain *qcom_domain;
>>+
>>+      if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA)
>>+              return NULL;
>>+      /*
>>+       * Allocate the domain and initialise some of its data structures.
>>+       * We can't really do anything meaningful until we've added a
>>+       * master.
>>+       */
>>+      qcom_domain = kzalloc(sizeof(*qcom_domain), GFP_KERNEL);
>>+      if (!qcom_domain)
>>+              return NULL;
>>+
>>+      if (type == IOMMU_DOMAIN_DMA &&
>>+          iommu_get_dma_cookie(&qcom_domain->domain)) {
>>+              kfree(qcom_domain);
>>+              return NULL;
>>+      }
>>+
>>+      mutex_init(&qcom_domain->init_mutex);
>>+      spin_lock_init(&qcom_domain->pgtbl_lock);
>>+
>>+      return &qcom_domain->domain;
>>+}
>>+
>>+static void qcom_iommu_domain_free(struct iommu_domain *domain)
>>+{
>>+      struct qcom_iommu_domain *qcom_domain = to_qcom_iommu_domain(domain);
>>+      struct qcom_iommu_device *qcom_iommu = qcom_domain->iommu;
>>+
>>+      if (!qcom_iommu)
>>+              return;
>>+
>>+      /*
>>+       * Free the domain resources. We assume that all devices have
>>+       * already been detached.
>>+       */
>>+      iommu_put_dma_cookie(domain);
>>+
>>+      /*
>>+       * Disable the context bank before freeing page table
>>+       */
>>+      iommu_writel(qcom_iommu, ARM_SMMU_CB_SCTLR, 0);
>>+
>
> We need to add a pm_runtime here as well, at this point the device_link between
> master and smmu might not be there any more.

ok

>>+      free_io_pgtable_ops(qcom_domain->pgtbl_ops);
>>+
>>+      kfree(qcom_domain);
>>+}
>>+
>
> <..>
>
>>+static int qcom_iommu_of_xlate(struct device *dev, struct of_phandle_args *args)
>>+{
>>+      struct platform_device *iommu_pdev;
>>+      u32 fwid = 0;
>>+
>>+      if (args->args_count != 0) {
>>+              dev_err(dev, "incorrect number of iommu params found for %s "
>>+                      "(found %d, expected 0)\n",
>>+                      args->np->full_name, args->args_count);
>>+              return -EINVAL;
>>+      }
>>+
>>+      if (!dev->iommu_fwspec->iommu_priv) {
>>+              iommu_pdev = of_find_device_by_node(args->np);
>>+              if (WARN_ON(!iommu_pdev))
>>+                      return -EINVAL;
>>+
>>+              dev->iommu_fwspec->iommu_priv = platform_get_drvdata(iommu_pdev);
>
> This can be done as a part of the add_device callback as well.

there seemed to be a mix in other drivers of doing this at _of_xlate()
vs _add_device()..  I wasn't really sure which was the new shiny way
to do it vs legacy

>>+      }
>>+
>>+      return iommu_fwspec_add_ids(dev, &fwid, 1);
>
> This is not required, we do not have any fwid to add here.

oh, ok

>>+}
>>+
>>+static const struct iommu_ops qcom_iommu_ops = {
>>+      .capable                = qcom_iommu_capable,
>>+      .domain_alloc           = qcom_iommu_domain_alloc,
>>+      .domain_free            = qcom_iommu_domain_free,
>>+      .attach_dev             = qcom_iommu_attach_dev,
>>+      .map                    = qcom_iommu_map,
>>+      .unmap                  = qcom_iommu_unmap,
>>+      .map_sg                 = default_iommu_map_sg,
>>+      .iova_to_phys           = qcom_iommu_iova_to_phys,
>>+      .add_device             = qcom_iommu_add_device,
>>+      .remove_device          = qcom_iommu_remove_device,
>>+      .device_group           = qcom_iommu_device_group,
>>+      .of_xlate               = qcom_iommu_of_xlate,
>>+      .pgsize_bitmap          = SZ_4K | SZ_64K | SZ_1M | SZ_16M,
>>+};
>>+
>>+static const struct of_device_id qcom_iommu_of_match[] = {
>>+// TODO we probably need to use this driver (vs arm-smmu) for all the early
>>+// "B" family devices prior to 8x96 or so.. so maybe having msm8916 in the
>>+// compat name isn't right.. or maybe we just add a bunch more compat strings
>>+// as needed?
>>+      { .compatible = "qcom,msm8916-iommu-context-bank" },
>
> Maybe, qcom,msm-sec-iommu-context-bank, meaning that this driver
> is always for iommu which is secure and we will need an extra binding when
> we try to add secure context banks as well.

ok, esp. if all have same table format and we don't need to pick
lpae-s1 vs aarch64 based on compat string then the more generic
compatible makes sense.

Thanks

BR,
-R
--
To unsubscribe from this list: send the line "unsubscribe linux-arm-msm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [Linux for Sparc]     [IETF Annouce]     [Security]     [Bugtraq]     [Linux MIPS]     [ECOS]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux