On Thu, Mar 26, 2015 at 06:18:14PM +0800, chenhui zhao wrote: > Implemented CPU hotplug on e500mc, e5500 and e6500, and support > multiple threads mode and 64-bits mode. > > For e6500 with two threads, if one thread is online, it can > enable/disable the other thread in the same core. If two threads of > one core are offline, the core will enter the PH20 state (a low power > state). When the core is up again, Thread0 is up first, and it will be > bound with the present booting cpu. This way, all CPUs can hotplug > separately. > > Signed-off-by: Chenhui Zhao <chenhui.zhao@xxxxxxxxxxxxx> > --- > arch/powerpc/Kconfig | 2 +- > arch/powerpc/include/asm/fsl_pm.h | 4 + > arch/powerpc/include/asm/smp.h | 2 + > arch/powerpc/kernel/head_64.S | 20 +++-- > arch/powerpc/kernel/smp.c | 5 ++ > arch/powerpc/platforms/85xx/smp.c | 182 +++++++++++++++++++++++++++++--------- > arch/powerpc/sysdev/fsl_rcpm.c | 56 ++++++++++++ > 7 files changed, 220 insertions(+), 51 deletions(-) Please factor out changes to generic code (including but not limited to cur_boot_cpu and PIR handling) into separate patches with clear explanations. > diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig > index 22b0940..9846c83 100644 > --- a/arch/powerpc/Kconfig > +++ b/arch/powerpc/Kconfig > @@ -380,7 +380,7 @@ config SWIOTLB > config HOTPLUG_CPU > bool "Support for enabling/disabling CPUs" > depends on SMP && (PPC_PSERIES || \ > - PPC_PMAC || PPC_POWERNV || (PPC_85xx && !PPC_E500MC)) > + PPC_PMAC || PPC_POWERNV || FSL_SOC_BOOKE) > ---help--- > Say Y here to be able to disable and re-enable individual > CPUs at runtime on SMP machines. > diff --git a/arch/powerpc/include/asm/fsl_pm.h b/arch/powerpc/include/asm/fsl_pm.h > index bbe6089..579f495 100644 > --- a/arch/powerpc/include/asm/fsl_pm.h > +++ b/arch/powerpc/include/asm/fsl_pm.h > @@ -34,6 +34,10 @@ struct fsl_pm_ops { > void (*cpu_enter_state)(int cpu, int state); > /* exit the CPU from the specified state */ > void (*cpu_exit_state)(int cpu, int state); > + /* cpu up */ > + void (*cpu_up)(int cpu); Again, this sort of comment is useless. Tell us what "cpu up" *does*, when it should be called, etc. > @@ -189,16 +193,14 @@ _GLOBAL(fsl_secondary_thread_init) > isync > > /* > - * Fix PIR to match the linear numbering in the device tree. > - * > - * On e6500, the reset value of PIR uses the low three bits for > - * the thread within a core, and the upper bits for the core > - * number. There are two threads per core, so shift everything > - * but the low bit right by two bits so that the cpu numbering is > - * continuous. Why are you getting rid of this? If it's to avoid doing it twice on the same thread, in my work-in-progress kexec patches I instead check to see whether BUCSR has already been set up -- if it has, I assume we've already been here. > + * The current thread has been in 64-bit mode, > + * see the value of TMRN_IMSR. I don't see what the relevance of this comment is here. > + * compute the address of __cur_boot_cpu > */ > - mfspr r3, SPRN_PIR > - rlwimi r3, r3, 30, 2, 30 > + bl 10f > +10: mflr r22 > + addi r22,r22,(__cur_boot_cpu - 10b) > + lwz r3,0(r22) Please save non-volatile registers for things that need to stick around for a while. > mtspr SPRN_PIR, r3 If __cur_boot_cpu is meant to be the PIR of the currently booting CPU, it's a misleading. It looks like it's supposed to have something to do with the boot cpu (not "booting"). Also please don't put leading underscores on symbols just because the adjacent symbols have them. > -#ifdef CONFIG_HOTPLUG_CPU > +#ifdef CONFIG_PPC_E500MC > +static void qoriq_cpu_wait_die(void) > +{ > + unsigned int cpu = smp_processor_id(); > + > + hard_irq_disable(); > + /* mask all irqs to prevent cpu wakeup */ > + qoriq_pm_ops->irq_mask(cpu); > + idle_task_exit(); > + > + mtspr(SPRN_TCR, 0); > + mtspr(SPRN_TSR, mfspr(SPRN_TSR)); > + > + cur_cpu_spec->cpu_flush_caches(); > + > + generic_set_cpu_dead(cpu); > + smp_mb(); Comment memory barriers, as checkpatch says. > + while (1) > + ; Indent the ; > @@ -174,17 +232,29 @@ static inline u32 read_spin_table_addr_l(void *spin_table) > static void wake_hw_thread(void *info) > { > void fsl_secondary_thread_init(void); > - unsigned long imsr1, inia1; > + unsigned long imsr, inia; > int nr = *(const int *)info; > - > - imsr1 = MSR_KERNEL; > - inia1 = *(unsigned long *)fsl_secondary_thread_init; > - > - mttmr(TMRN_IMSR1, imsr1); > - mttmr(TMRN_INIA1, inia1); > - mtspr(SPRN_TENS, TEN_THREAD(1)); > + int hw_cpu = get_hard_smp_processor_id(nr); > + int thread_idx = cpu_thread_in_core(hw_cpu); > + > + __cur_boot_cpu = (u32)hw_cpu; > + imsr = MSR_KERNEL; > + inia = *(unsigned long *)fsl_secondary_thread_init; > + smp_mb(); > + if (thread_idx == 0) { > + mttmr(TMRN_IMSR0, imsr); > + mttmr(TMRN_INIA0, inia); > + } else { > + mttmr(TMRN_IMSR1, imsr); > + mttmr(TMRN_INIA1, inia); > + } > + isync(); > + mtspr(SPRN_TENS, TEN_THREAD(thread_idx)); Support for waking a secondary core should be a separate patch (I have similar code on the way for kexec). Likewise adding smp_mb()/isync() if it's really needed. In general, this patch tries to do too much at once. > smp_generic_kick_cpu(nr); > +#ifdef CONFIG_HOTPLUG_CPU > + generic_set_cpu_up(nr); > +#endif > } > #endif > > @@ -203,28 +273,46 @@ static int smp_85xx_kick_cpu(int nr) > > pr_debug("smp_85xx_kick_cpu: kick CPU #%d\n", nr); > > +#ifdef CONFIG_HOTPLUG_CPU > + sync_tb = 0; > + smp_mb(); > +#endif Timebase synchronization should also be separate. > #ifdef CONFIG_PPC64 > - /* Threads don't use the spin table */ > - if (cpu_thread_in_core(nr) != 0) { > + if (threads_per_core > 1) { > int primary = cpu_first_thread_sibling(nr); > > if (WARN_ON_ONCE(!cpu_has_feature(CPU_FTR_SMT))) > return -ENOENT; > > - if (cpu_thread_in_core(nr) != 1) { > - pr_err("%s: cpu %d: invalid hw thread %d\n", > - __func__, nr, cpu_thread_in_core(nr)); > - return -ENOENT; > + /* > + * If either one of threads in the same core is online, > + * use the online one to start the other. > + */ > + if (cpu_online(primary) || cpu_online(primary + 1)) { > + qoriq_pm_ops->cpu_up(nr); What if we don't have qoriq_pm_ops (e.g. VM guest, or some failure)? > + if (cpu_online(primary)) > + smp_call_function_single(primary, > + wake_hw_thread, &nr, 1); > + else > + smp_call_function_single(primary + 1, > + wake_hw_thread, &nr, 1); > + return 0; > } > - > - if (!cpu_online(primary)) { > - pr_err("%s: cpu %d: primary %d not online\n", > - __func__, nr, primary); > - return -ENOENT; > + /* > + * If both threads are offline, reset core to start. > + * When core is up, Thread 0 always gets up first, > + * so bind the current logical cpu with Thread 0. > + */ What if the core is not in a PM state that requires a reset? Where does this reset occur? > + if (hw_cpu != cpu_first_thread_sibling(hw_cpu)) { > + int hw_cpu1, hw_cpu2; > + > + hw_cpu1 = get_hard_smp_processor_id(primary); > + hw_cpu2 = get_hard_smp_processor_id(primary + 1); > + set_hard_smp_processor_id(primary, hw_cpu2); > + set_hard_smp_processor_id(primary + 1, hw_cpu1); > + /* get new physical cpu id */ > + hw_cpu = get_hard_smp_processor_id(nr); Why are you swapping the hard smp ids? > } > - > - smp_call_function_single(primary, wake_hw_thread, &nr, 0); > - return 0; > } > #endif > > @@ -252,11 +340,7 @@ static int smp_85xx_kick_cpu(int nr) > spin_table = phys_to_virt(*cpu_rel_addr); > > local_irq_save(flags); > -#ifdef CONFIG_PPC32 > #ifdef CONFIG_HOTPLUG_CPU > - /* Corresponding to generic_set_cpu_dead() */ > - generic_set_cpu_up(nr); > - Why did you move this? > if (system_state == SYSTEM_RUNNING) { > /* > * To keep it compatible with old boot program which uses > @@ -269,11 +353,16 @@ static int smp_85xx_kick_cpu(int nr) > out_be32(&spin_table->addr_l, 0); > flush_spin_table(spin_table); > > +#ifdef CONFIG_PPC_E500MC > + qoriq_pm_ops->cpu_up(nr); > +#endif Again, you've killed a VM guest kernel (this time, even if the guest doesn't see SMT). > @@ -489,13 +586,16 @@ void __init mpc85xx_smp_init(void) > __func__); > return; > } > - smp_85xx_ops.give_timebase = mpc85xx_give_timebase; > - smp_85xx_ops.take_timebase = mpc85xx_take_timebase; > -#ifdef CONFIG_HOTPLUG_CPU > - ppc_md.cpu_die = smp_85xx_mach_cpu_die; > -#endif You're moving this from a place that only runs when guts is found... > } > > + smp_85xx_ops.cpu_die = generic_cpu_die; > + ppc_md.cpu_die = smp_85xx_mach_cpu_die; > +#endif > + smp_85xx_ops.give_timebase = mpc85xx_give_timebase; > + smp_85xx_ops.take_timebase = mpc85xx_take_timebase; > + smp_85xx_ops.cpu_disable = generic_cpu_disable; > +#endif /* CONFIG_HOTPLUG_CPU */ ...to a place that runs unconditionally. Again, you're breaking VM guests. -Scott -- To unsubscribe from this list: send the line "unsubscribe devicetree" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html