Re: [PATCH 1/4] ARM: sun9i: Support SMP on A80 with Multi-Cluster Power Management (MCPM)

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




在 2017-07-25 16:29,Chen-Yu Tsai 写道:
default ARCH_SUNXI
On Tue, Jul 25, 2017 at 3:47 PM, Maxime Ripard
<maxime.ripard@xxxxxxxxxxxxxxxxxx> wrote:
Hi Chen-Yu,

On Tue, Jul 25, 2017 at 01:09:16PM +0800, Chen-Yu Tsai wrote:
The A80 is a big.LITTLE SoC with 1 cluster of 4 Cortex-A7s and
1 cluster of 4 Cortex-A15s.

This patch adds support to bring up the second cluster and thus all
cores using the common MCPM code. Core/cluster power down has not
been implemented, thus CPU hotplugging and big.LITTLE switcher is
not supported.

Signed-off-by: Chen-Yu Tsai <wens@xxxxxxxx>
---
 arch/arm/mach-sunxi/Kconfig  |  10 ++
 arch/arm/mach-sunxi/Makefile |   1 +
arch/arm/mach-sunxi/mcpm.c | 391 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 402 insertions(+)
 create mode 100644 arch/arm/mach-sunxi/mcpm.c

diff --git a/arch/arm/mach-sunxi/Kconfig b/arch/arm/mach-sunxi/Kconfig
index 58153cdf025b..177380548d99 100644
--- a/arch/arm/mach-sunxi/Kconfig
+++ b/arch/arm/mach-sunxi/Kconfig
@@ -47,5 +47,15 @@ config MACH_SUN9I
      bool "Allwinner (sun9i) SoCs support"
      default ARCH_SUNXI
      select ARM_GIC
+     imply MCPM
+
+config SUN9I_A80_MCPM
+     bool "Allwinner A80 Multi-Cluster PM support"
+     depends on MCPM && MACH_SUN9I
+     default MACH_SUN9I
+     select ARM_CCI400_PORT_CTRL
+     help
+       This is needed to provide CPU and cluster power management
+       on Allwinner A80 implementing big.LITTLE.

Do we really need an option for that? we don't provide the option to
disable the CPU SMP operations for the rest of the SoCs.

It was an option as it also required MCPM and CCI400 support to be built.
We could hide it. Or, using mach-hisi as a reference, we could do:

I think a hidden config option is a proper way, as we can then select
this config option in MACH_SUN8I when introducing A83T support.


config MACH_SUN9I
        default ARCH_SUNXI
        select ARM_GIC
        select MCPM if SMP
        select ARM_CCI400_PORT_CTRL if SMP

and in the Makefile:

obj-$(CONFIG_MCPM) += sun9i-mcpm.o


 endif
diff --git a/arch/arm/mach-sunxi/Makefile b/arch/arm/mach-sunxi/Makefile
index 27b168f121a1..e8558912c714 100644
--- a/arch/arm/mach-sunxi/Makefile
+++ b/arch/arm/mach-sunxi/Makefile
@@ -1,2 +1,3 @@
 obj-$(CONFIG_ARCH_SUNXI) += sunxi.o
 obj-$(CONFIG_SMP) += platsmp.o
+obj-$(CONFIG_SUN9I_A80_MCPM) += mcpm.o
diff --git a/arch/arm/mach-sunxi/mcpm.c b/arch/arm/mach-sunxi/mcpm.c
new file mode 100644
index 000000000000..4b6e1d6ae379
--- /dev/null
+++ b/arch/arm/mach-sunxi/mcpm.c
@@ -0,0 +1,391 @@
+/*
+ * Copyright (c) 2015 Chen-Yu Tsai
+ *
+ * Chen-Yu Tsai <wens@xxxxxxxx>
+ *
+ * arch/arm/mach-sunxi/mcpm.c
+ *
+ * Based on arch/arm/mach-exynos/mcpm-exynos.c and Allwinner code
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/arm-cci.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/of_address.h>
+
+#include <asm/cputype.h>
+#include <asm/cp15.h>
+#include <asm/mcpm.h>
+
+#define SUNXI_CPUS_PER_CLUSTER               4
+#define SUNXI_NR_CLUSTERS            2
+
+#define SUN9I_A80_A15_CLUSTER                1

Don't we have a way to derive that from the DT ?

Indeed we can.

It would be slighty more complicated though:

node = of_cpu_device_node_get(cluster * SUNXI_CPUS_PER_CLUSTER + cpu);
if (of_device_is_compatible(node, "arm,cortex-a15")) {
        ...
}


+#define CPUCFG_CX_CTRL_REG0(c)               (0x10 * (c))
+#define CPUCFG_CX_CTRL_REG0_L1_RST_DISABLE(n)        BIT(n)
+#define CPUCFG_CX_CTRL_REG0_L1_RST_DISABLE_ALL       0xf
+#define CPUCFG_CX_CTRL_REG0_L2_RST_DISABLE_A7        BIT(4)
+#define CPUCFG_CX_CTRL_REG0_L2_RST_DISABLE_A15       BIT(0)
+#define CPUCFG_CX_CTRL_REG1(c)               (0x10 * (c) + 0x4)
+#define CPUCFG_CX_CTRL_REG1_ACINACTM BIT(0)
+#define CPUCFG_CX_RST_CTRL(c)                (0x80 + 0x4 * (c))
+#define CPUCFG_CX_RST_CTRL_DBG_SOC_RST       BIT(24)
+#define CPUCFG_CX_RST_CTRL_ETM_RST(n)        BIT(20 + (n))
+#define CPUCFG_CX_RST_CTRL_ETM_RST_ALL       (0xf << 20)
+#define CPUCFG_CX_RST_CTRL_DBG_RST(n)        BIT(16 + (n))
+#define CPUCFG_CX_RST_CTRL_DBG_RST_ALL       (0xf << 16)
+#define CPUCFG_CX_RST_CTRL_H_RST     BIT(12)
+#define CPUCFG_CX_RST_CTRL_L2_RST    BIT(8)
+#define CPUCFG_CX_RST_CTRL_CX_RST(n) BIT(4 + (n))
+#define CPUCFG_CX_RST_CTRL_CORE_RST(n)       BIT(n)
+
+#define PRCM_CPU_PO_RST_CTRL(c)              (0x4 + 0x4 * (c))
+#define PRCM_CPU_PO_RST_CTRL_CORE(n) BIT(n)
+#define PRCM_CPU_PO_RST_CTRL_CORE_ALL        0xf
+#define PRCM_PWROFF_GATING_REG(c)    (0x100 + 0x4 * (c))
+#define PRCM_PWROFF_GATING_REG_CLUSTER       BIT(4)
+#define PRCM_PWROFF_GATING_REG_CORE(n)       BIT(n)
+#define PRCM_PWR_SWITCH_REG(c, cpu) (0x140 + 0x10 * (c) + 0x4 * (cpu))
+#define PRCM_CPU_SOFT_ENTRY_REG              0x164
+
+static void __iomem *cpucfg_base;
+static void __iomem *prcm_base;
+
+static int sunxi_cpu_power_switch_set(unsigned int cpu, unsigned int cluster,
+                                   bool enable)
+{
+     u32 reg;
+
+ /* control sequence from Allwinner A80 user manual v1.2 PRCM section */
+     reg = readl(prcm_base + PRCM_PWR_SWITCH_REG(cluster, cpu));
+     if (enable) {
+             if (reg == 0x00) {
+ pr_debug("power clamp for cluster %u cpu %u already open\n",
+                              cluster, cpu);
+                     return 0;
+             }
+
+ writel(0xff, prcm_base + PRCM_PWR_SWITCH_REG(cluster, cpu));
+             udelay(10);
+ writel(0xfe, prcm_base + PRCM_PWR_SWITCH_REG(cluster, cpu));
+             udelay(10);
+ writel(0xf8, prcm_base + PRCM_PWR_SWITCH_REG(cluster, cpu));
+             udelay(10);
+ writel(0xf0, prcm_base + PRCM_PWR_SWITCH_REG(cluster, cpu));
+             udelay(10);
+ writel(0x00, prcm_base + PRCM_PWR_SWITCH_REG(cluster, cpu));
+             udelay(10);
+     } else {
+ writel(0xff, prcm_base + PRCM_PWR_SWITCH_REG(cluster, cpu));
+             udelay(10);
+     }
+
+     return 0;
+}
+
+static int sunxi_cpu_powerup(unsigned int cpu, unsigned int cluster)
+{
+     u32 reg;
+
+     pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
+ if (cpu >= SUNXI_CPUS_PER_CLUSTER || cluster >= SUNXI_NR_CLUSTERS)
+             return -EINVAL;
+
+     /* assert processor power-on reset */
+     reg = readl(prcm_base + PRCM_CPU_PO_RST_CTRL(cluster));
+     reg &= ~PRCM_CPU_PO_RST_CTRL_CORE(cpu);
+     writel(reg, prcm_base + PRCM_CPU_PO_RST_CTRL(cluster));
+
+     /* Cortex-A7: hold L1 reset disable signal low */
+     if (!(of_machine_is_compatible("allwinner,sun9i-a80") &&
+                     cluster == SUN9I_A80_A15_CLUSTER)) {
+ reg = readl(cpucfg_base + CPUCFG_CX_CTRL_REG0(cluster));
+             reg &= ~CPUCFG_CX_CTRL_REG0_L1_RST_DISABLE(cpu);
+ writel(reg, cpucfg_base + CPUCFG_CX_CTRL_REG0(cluster));
+     }
+
+     /* assert processor related resets */
+     reg = readl(cpucfg_base + CPUCFG_CX_RST_CTRL(cluster));
+     reg &= ~CPUCFG_CX_RST_CTRL_DBG_RST(cpu);
+
+     /*
+ * Allwinner code also asserts resets for NEON on A15. According
+      * to ARM manuals, asserting power-on reset is sufficient.
+      */
+     if (!(of_machine_is_compatible("allwinner,sun9i-a80") &&
+                     cluster == SUN9I_A80_A15_CLUSTER)) {
+             reg &= ~CPUCFG_CX_RST_CTRL_ETM_RST(cpu);
+     }
+     writel(reg, cpucfg_base + CPUCFG_CX_RST_CTRL(cluster));
+
+     /* open power switch */
+     sunxi_cpu_power_switch_set(cpu, cluster, true);
+
+     /* clear processor power gate */
+     reg = readl(prcm_base + PRCM_PWROFF_GATING_REG(cluster));
+     reg &= ~PRCM_PWROFF_GATING_REG_CORE(cpu);
+     writel(reg, prcm_base + PRCM_PWROFF_GATING_REG(cluster));
+     udelay(20);
+
+     /* de-assert processor power-on reset */
+     reg = readl(prcm_base + PRCM_CPU_PO_RST_CTRL(cluster));
+     reg |= PRCM_CPU_PO_RST_CTRL_CORE(cpu);
+     writel(reg, prcm_base + PRCM_CPU_PO_RST_CTRL(cluster));
+
+     /* de-assert all processor resets */
+     reg = readl(cpucfg_base + CPUCFG_CX_RST_CTRL(cluster));
+     reg |= CPUCFG_CX_RST_CTRL_DBG_RST(cpu);
+     reg |= CPUCFG_CX_RST_CTRL_CORE_RST(cpu);
+     if (!(of_machine_is_compatible("allwinner,sun9i-a80") &&
+                     cluster == SUN9I_A80_A15_CLUSTER)) {
+             reg |= CPUCFG_CX_RST_CTRL_ETM_RST(cpu);
+     } else {
+             reg |= CPUCFG_CX_RST_CTRL_CX_RST(cpu); /* NEON */
+     }
+     writel(reg, cpucfg_base + CPUCFG_CX_RST_CTRL(cluster));
+
+     return 0;
+}
+
+static int sunxi_cluster_powerup(unsigned int cluster)
+{
+     u32 reg;
+
+     pr_debug("%s: cluster %u\n", __func__, cluster);
+     if (cluster >= SUNXI_NR_CLUSTERS)
+             return -EINVAL;
+
+     /* assert ACINACTM */
+     reg = readl(cpucfg_base + CPUCFG_CX_CTRL_REG1(cluster));
+     reg |= CPUCFG_CX_CTRL_REG1_ACINACTM;
+     writel(reg, cpucfg_base + CPUCFG_CX_CTRL_REG1(cluster));
+
+     /* assert cluster processor power-on resets */
+     reg = readl(prcm_base + PRCM_CPU_PO_RST_CTRL(cluster));
+     reg &= ~PRCM_CPU_PO_RST_CTRL_CORE_ALL;
+     writel(reg, prcm_base + PRCM_CPU_PO_RST_CTRL(cluster));
+
+     /* assert cluster resets */
+     reg = readl(cpucfg_base + CPUCFG_CX_RST_CTRL(cluster));
+     reg &= ~CPUCFG_CX_RST_CTRL_DBG_SOC_RST;
+     reg &= ~CPUCFG_CX_RST_CTRL_DBG_RST_ALL;
+     reg &= ~CPUCFG_CX_RST_CTRL_H_RST;
+     reg &= ~CPUCFG_CX_RST_CTRL_L2_RST;
+
+     /*
+ * Allwinner code also asserts resets for NEON on A15. According
+      * to ARM manuals, asserting power-on reset is sufficient.
+      */
+     if (!(of_machine_is_compatible("allwinner,sun9i-a80") &&
+                     cluster == SUN9I_A80_A15_CLUSTER)) {
+             reg &= ~CPUCFG_CX_RST_CTRL_ETM_RST_ALL;
+     }
+     writel(reg, cpucfg_base + CPUCFG_CX_RST_CTRL(cluster));
+
+     /* hold L1/L2 reset disable signals low */
+     reg = readl(cpucfg_base + CPUCFG_CX_CTRL_REG0(cluster));
+     if (of_machine_is_compatible("allwinner,sun9i-a80") &&
+                     cluster == SUN9I_A80_A15_CLUSTER) {
+             /* Cortex-A15: hold L2RSTDISABLE low */
+             reg &= ~CPUCFG_CX_CTRL_REG0_L2_RST_DISABLE_A15;
+     } else {
+             /* Cortex-A7: hold L1RSTDISABLE and L2RSTDISABLE low */
+             reg &= ~CPUCFG_CX_CTRL_REG0_L1_RST_DISABLE_ALL;
+             reg &= ~CPUCFG_CX_CTRL_REG0_L2_RST_DISABLE_A7;
+     }
+     writel(reg, cpucfg_base + CPUCFG_CX_CTRL_REG0(cluster));
+
+     /* clear cluster power gate */
+     reg = readl(prcm_base + PRCM_PWROFF_GATING_REG(cluster));
+     reg &= ~PRCM_PWROFF_GATING_REG_CLUSTER;
+     writel(reg, prcm_base + PRCM_PWROFF_GATING_REG(cluster));
+     udelay(20);
+
+     /* de-assert cluster resets */
+     reg = readl(cpucfg_base + CPUCFG_CX_RST_CTRL(cluster));
+     reg |= CPUCFG_CX_RST_CTRL_DBG_SOC_RST;
+     reg |= CPUCFG_CX_RST_CTRL_H_RST;
+     reg |= CPUCFG_CX_RST_CTRL_L2_RST;
+     writel(reg, cpucfg_base + CPUCFG_CX_RST_CTRL(cluster));
+
+     /* de-assert ACINACTM */
+     reg = readl(cpucfg_base + CPUCFG_CX_CTRL_REG1(cluster));
+     reg &= ~CPUCFG_CX_CTRL_REG1_ACINACTM;
+     writel(reg, cpucfg_base + CPUCFG_CX_CTRL_REG1(cluster));
+
+     return 0;
+}
+
+static void sunxi_cpu_cache_disable(void)
+{
+     /* Disable and flush the local CPU cache. */
+     v7_exit_coherency_flush(louis);
+}
+
+/*
+ * This bit is shared between the initial mcpm_sync_init call to enable
+ * CCI-400 and proper cluster cache disable before power down.
+ */
+static void sunxi_cluster_cache_disable_without_axi(void)
+{
+     if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A15) {
+             /*
+              * On the Cortex-A15 we need to disable
+              * L2 prefetching before flushing the cache.
+              */
+             asm volatile(
+             "mcr    p15, 1, %0, c15, c0, 3\n"
+             "isb\n"
+             "dsb"
+             : : "r" (0x400));
+     }
+
+     /* Flush all cache levels for this cluster. */
+     v7_exit_coherency_flush(all);
+
+     /*
+      * Disable cluster-level coherency by masking
+      * incoming snoops and DVM messages:
+      */
+     cci_disable_port_by_cpu(read_cpuid_mpidr());
+}
+
+static void sunxi_cluster_cache_disable(void)
+{
+ unsigned int cluster = MPIDR_AFFINITY_LEVEL(read_cpuid_mpidr(), 1);
+     u32 reg;
+
+     pr_info("%s: cluster %u\n", __func__, cluster);
+
+     sunxi_cluster_cache_disable_without_axi();
+
+     /* last man standing, assert ACINACTM */
+     reg = readl(cpucfg_base + CPUCFG_CX_CTRL_REG1(cluster));
+     reg |= CPUCFG_CX_CTRL_REG1_ACINACTM;
+     writel(reg, cpucfg_base + CPUCFG_CX_CTRL_REG1(cluster));
+}
+
+static const struct mcpm_platform_ops sunxi_power_ops = {
+     .cpu_powerup            = sunxi_cpu_powerup,
+     .cluster_powerup        = sunxi_cluster_powerup,
+     .cpu_cache_disable      = sunxi_cpu_cache_disable,
+     .cluster_cache_disable  = sunxi_cluster_cache_disable,
+};
+
+/*
+ * Enable cluster-level coherency, in preparation for turning on the MMU.
+ *
+ * Also enable regional clock gating and L2 data latency settings for
+ * Cortex-A15.
+ */
+static void __naked sunxi_power_up_setup(unsigned int affinity_level)
+{
+     asm volatile (
+             "mrc    p15, 0, r1, c0, c0, 0\n"
+ "movw r2, #" __stringify(ARM_CPU_PART_MASK & 0xffff) "\n" + "movt r2, #" __stringify(ARM_CPU_PART_MASK >> 16) "\n"
+             "and    r1, r1, r2\n"
+ "movw r2, #" __stringify(ARM_CPU_PART_CORTEX_A15 & 0xffff) "\n" + "movt r2, #" __stringify(ARM_CPU_PART_CORTEX_A15 >> 16) "\n"
+             "cmp    r1, r2\n"
+             "bne    not_a15\n"
+
+             /* The following is Cortex-A15 specific */
+
+             /* L2CTRL: Enable CPU regional clock gates */
+             "mrc p15, 1, r1, c15, c0, 4\n"
+             "orr r1, r1, #(0x1<<31)\n"
+             "mcr p15, 1, r1, c15, c0, 4\n"
+
+             /* L2ACTLR */
+             "mrc p15, 1, r1, c15, c0, 0\n"
+             /* Enable L2, GIC, and Timer regional clock gates */
+             "orr r1, r1, #(0x1<<26)\n"
+             /* Disable clean/evict from being pushed to external */
+             "orr r1, r1, #(0x1<<3)\n"
+             "mcr p15, 1, r1, c15, c0, 0\n"
+
+             /* L2 data RAM latency */
+             "mrc p15, 1, r1, c9, c0, 2\n"
+             "bic r1, r1, #(0x7<<0)\n"
+             "orr r1, r1, #(0x3<<0)\n"
+             "mcr p15, 1, r1, c9, c0, 2\n"
+
+             /* End of Cortex-A15 specific setup */
+             "not_a15:\n"
+
+             "cmp    r0, #1\n"
+             "bxne   lr\n"
+             "b      cci_enable_port_for_self"
+     );
+}
+
+static void sunxi_mcpm_setup_entry_point(void)
+{
+     __raw_writel(virt_to_phys(mcpm_entry_point),
+                  prcm_base + PRCM_CPU_SOFT_ENTRY_REG);
+}
+
+static int __init sunxi_mcpm_init(void)
+{
+     struct device_node *node;
+     int ret;
+
+     if (!of_machine_is_compatible("allwinner,sun9i-a80"))
+             return -ENODEV;
+
+     if (!cci_probed())
+             return -ENODEV;
+
+     node = of_find_compatible_node(NULL, NULL,
+                     "allwinner,sun9i-a80-cpucfg");
+     if (!node)
+             return -ENODEV;
+
+     cpucfg_base = of_iomap(node, 0);
+     of_node_put(node);
+     if (!cpucfg_base) {
+ pr_err("%s: failed to map CPUCFG registers\n", __func__);
+             return -ENOMEM;
+     }

Can't we request the region as well?

Yes we can! But only for the CPUCFG registers. The PRCM block is
shared with all the PRCM block clock drivers. :(


+
+     node = of_find_compatible_node(NULL, NULL,
+                     "allwinner,sun9i-a80-prcm");
+     if (!node)
+             return -ENODEV;
+
+     prcm_base = of_iomap(node, 0);
+
+     of_node_put(node);
+     if (!prcm_base) {
+             pr_err("%s: failed to map PRCM registers\n", __func__);
+             iounmap(prcm_base);
+             return -ENOMEM;
+     }
+
+     ret = mcpm_platform_register(&sunxi_power_ops);
+     if (!ret)
+             ret = mcpm_sync_init(sunxi_power_up_setup);
+     if (!ret)
+ /* do not disable AXI master as no one will re-enable it */ + ret = mcpm_loopback(sunxi_cluster_cache_disable_without_axi);
+     if (ret) {
+             iounmap(cpucfg_base);
+             iounmap(prcm_base);
+             return ret;
+     }
+
+     mcpm_smp_set_ops();
+
+     pr_info("sunxi MCPM support installed\n");
+
+     sunxi_mcpm_setup_entry_point();
+
+     return ret;
+}

It looks mostly good, and I would replace the sunxi by sun9i, and call
that file sun9i-mcpm.c

I was hoping to reuse the file for the A83T, so it was sunxi-mcpm.c
or just mcpm. Most of the stuff is similiar, except the A83T has two
revisions and one of them has two gate/power bits swapped. :(

ChenYu


Thanks!
Maxime

--
Maxime Ripard, Free Electrons
Embedded Linux and Kernel engineering
http://free-electrons.com

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@xxxxxxxxxxxxxxxxxxx
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Device Tree Compilter]     [Device Tree Spec]     [Linux Driver Backports]     [Video for Linux]     [Linux USB Devel]     [Linux PCI Devel]     [Linux Audio Users]     [Linux Kernel]     [Linux SCSI]     [XFree86]     [Yosemite Backpacking]


  Powered by Linux