On Mon, Jan 08, 2024 at 11:22:34AM -0800, Evan Green wrote: > On Wed, Dec 27, 2023 at 9:38 AM Charlie Jenkins <charlie@xxxxxxxxxxxx> wrote: > > > > Support static branches depending on the value of misaligned accesses. > > This will be used by a later patch in the series. All online cpus must > > be considered "fast" for this static branch to be flipped. > > > > Signed-off-by: Charlie Jenkins <charlie@xxxxxxxxxxxx> > > This is fancier than I would have gone for, I probably would have > punted on heterogeneous hotplug out of laziness for now. However, what > you've done looks smart, in that we'll basically flip the branch if at > any moment all the online CPUs are fast. I've got some nits below, but > won't withhold my review for them (making them optional I suppose :)). > > Reviewed-by: Evan Green <evan@xxxxxxxxxxxx> > Thanks! > > --- > > arch/riscv/include/asm/cpufeature.h | 2 + > > arch/riscv/kernel/cpufeature.c | 89 +++++++++++++++++++++++++++++++++++-- > > 2 files changed, 87 insertions(+), 4 deletions(-) > > > > diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h > > index a418c3112cd6..7b129e5e2f07 100644 > > --- a/arch/riscv/include/asm/cpufeature.h > > +++ b/arch/riscv/include/asm/cpufeature.h > > @@ -133,4 +133,6 @@ static __always_inline bool riscv_cpu_has_extension_unlikely(int cpu, const unsi > > return __riscv_isa_extension_available(hart_isa[cpu].isa, ext); > > } > > > > +DECLARE_STATIC_KEY_FALSE(fast_misaligned_access_speed_key); > > + > > #endif > > diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c > > index b3785ffc1570..dfd716b93565 100644 > > --- a/arch/riscv/kernel/cpufeature.c > > +++ b/arch/riscv/kernel/cpufeature.c > > @@ -8,8 +8,10 @@ > > > > #include <linux/acpi.h> > > #include <linux/bitmap.h> > > +#include <linux/cpu.h> > > #include <linux/cpuhotplug.h> > > #include <linux/ctype.h> > > +#include <linux/jump_label.h> > > #include <linux/log2.h> > > #include <linux/memory.h> > > #include <linux/module.h> > > @@ -44,6 +46,8 @@ struct riscv_isainfo hart_isa[NR_CPUS]; > > /* Performance information */ > > DEFINE_PER_CPU(long, misaligned_access_speed); > > > > +static cpumask_t fast_misaligned_access; > > + > > /** > > * riscv_isa_extension_base() - Get base extension word > > * > > @@ -643,6 +647,16 @@ static int check_unaligned_access(void *param) > > (speed == RISCV_HWPROBE_MISALIGNED_FAST) ? "fast" : "slow"); > > > > per_cpu(misaligned_access_speed, cpu) = speed; > > + > > + /* > > + * Set the value of fast_misaligned_access of a CPU. These operations > > + * are atomic to avoid race conditions. > > + */ > > + if (speed == RISCV_HWPROBE_MISALIGNED_FAST) > > + cpumask_set_cpu(cpu, &fast_misaligned_access); > > + else > > + cpumask_clear_cpu(cpu, &fast_misaligned_access); > > + > > return 0; > > } > > > > @@ -655,13 +669,70 @@ static void check_unaligned_access_nonboot_cpu(void *param) > > check_unaligned_access(pages[cpu]); > > } > > > > +DEFINE_STATIC_KEY_FALSE(fast_misaligned_access_speed_key); > > + > > +static int exclude_set_unaligned_access_static_branches(int cpu) > > +{ > > + /* > > + * Same as set_unaligned_access_static_branches, except excludes the > > + * given CPU from the result. When a CPU is hotplugged into an offline > > + * state, this function is called before the CPU is set to offline in > > + * the cpumask, and thus the CPU needs to be explicitly excluded. > > + */ > > + > > + cpumask_t online_fast_misaligned_access; > > + > > + cpumask_and(&online_fast_misaligned_access, &fast_misaligned_access, cpu_online_mask); > > + cpumask_clear_cpu(cpu, &online_fast_misaligned_access); > > + > > + if (cpumask_weight(&online_fast_misaligned_access) == (num_online_cpus() - 1)) > > + static_branch_enable_cpuslocked(&fast_misaligned_access_speed_key); > > + else > > + static_branch_disable_cpuslocked(&fast_misaligned_access_speed_key); > > + > > + return 0; > > +} > > A minor nit: the function above and below are looking a little > copy/pasty, and lead to multiple spots where the static branch gets > changed. You could make a third function that actually does the > setting with parameters, then these two could call it in different > ways. The return types also don't need to be int, since you always > return 0. Something like: > > static void modify_unaligned_access_branches(cpumask_t *mask, int weight) > { > if (cpumask_weight(mask) == weight) { > static_branch_enable_cpuslocked(&fast_misaligned_access_speed_key); > } else { > static_branch_disable_cpuslocked(&fast_misaligned_access_speed_key); > } > } > > static void set_unaligned_access_branches(void) > { > cpumask_t fast_and_online; > > cpumask_and(&fast_and_online, &fast_misaligned_access, cpu_online_mask); > modify_unaligned_access_branches(&fast_and_online, num_online_cpus()); > } > > static void set_unaligned_access_branches_except_cpu(unsigned int cpu) > { > cpumask_t fast_except_me; > > cpumask_and(&online_fast_misaligned_access, > &fast_misaligned_access, cpu_online_mask); > cpumask_clear_cpu(cpu, &fast_except_me); > modify_unaligned_access_branches(&fast_except_me, > num_online_cpus() - 1); > } > Great suggestions, I will apply these changes and send out a new version. - Charlie > > + > > +static int set_unaligned_access_static_branches(void) > > +{ > > + /* > > + * This will be called after check_unaligned_access_all_cpus so the > > + * result of unaligned access speed for all CPUs will be available. > > + * > > + * To avoid the number of online cpus changing between reading > > + * cpu_online_mask and calling num_online_cpus, cpus_read_lock must be > > + * held before calling this function. > > + */ > > + cpumask_t online_fast_misaligned_access; > > + > > + cpumask_and(&online_fast_misaligned_access, &fast_misaligned_access, cpu_online_mask); > > + > > + if (cpumask_weight(&online_fast_misaligned_access) == num_online_cpus()) > > + static_branch_enable_cpuslocked(&fast_misaligned_access_speed_key); > > + else > > + static_branch_disable_cpuslocked(&fast_misaligned_access_speed_key); > > + > > + return 0; > > +} > > + > > +static int lock_and_set_unaligned_access_static_branch(void) > > +{ > > + cpus_read_lock(); > > + set_unaligned_access_static_branches(); > > + cpus_read_unlock(); > > + > > + return 0; > > +} > > + > > +arch_initcall_sync(lock_and_set_unaligned_access_static_branch); > > + > > static int riscv_online_cpu(unsigned int cpu) > > { > > static struct page *buf; > > > > /* We are already set since the last check */ > > if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_UNKNOWN) > > - return 0; > > + goto exit; > > > > buf = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER); > > if (!buf) { > > @@ -671,7 +742,14 @@ static int riscv_online_cpu(unsigned int cpu) > > > > check_unaligned_access(buf); > > __free_pages(buf, MISALIGNED_BUFFER_ORDER); > > - return 0; > > + > > +exit: > > + return set_unaligned_access_static_branches(); > > +} > > + > > +static int riscv_offline_cpu(unsigned int cpu) > > +{ > > + return exclude_set_unaligned_access_static_branches(cpu); > > } > > > > /* Measure unaligned access on all CPUs present at boot in parallel. */ > > @@ -705,9 +783,12 @@ static int check_unaligned_access_all_cpus(void) > > /* Check core 0. */ > > smp_call_on_cpu(0, check_unaligned_access, bufs[0], true); > > > > - /* Setup hotplug callback for any new CPUs that come online. */ > > + /* > > + * Setup hotplug callbacks for any new CPUs that come online or go > > + * offline. > > + */ > > cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online", > > - riscv_online_cpu, NULL); > > + riscv_online_cpu, riscv_offline_cpu); > > > > out: > > unaligned_emulation_finish(); > > > > -- > > 2.43.0 > >