Re: [PATCH v7 5/9] x86/smpboot: Split up native_cpu_up into separate phases and document them

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, Feb 07, 2023 at 11:04:32PM +0000, Usama Arif wrote:
> From: David Woodhouse <dwmw@xxxxxxxxxxxx>
>
> There are four logical parts to what native_cpu_up() does on the BSP (or
> on the controlling CPU for a later hotplug):
>
>  1) Wake the AP by sending the INIT/SIPI/SIPI sequence.
>
>  2) Wait for the AP to make it as far as wait_for_master_cpu() which
>     sets that CPU's bit in cpu_initialized_mask, then sets the bit in
>     cpu_callout_mask to let the AP proceed through cpu_init().
>
>  3) Waits for the AP to finish cpu_init() and get as far as the
>     smp_callin() call, which sets that CPU's bit in cpu_callin_mask.
>
>  4) Perform the TSC synchronization and wait for the AP to actually
>     mark itself online in cpu_online_mask.
>
> In preparation to allow these phases to operate in parallel on multiple
> APs, split them out into separate functions and document the interactions
> a little more clearly in both the BSP and AP code paths.
>
> No functional change intended.
>
> [Usama Arif: fixed rebase conflict]
> Signed-off-by: David Woodhouse <dwmw@xxxxxxxxxxxx>
> Signed-off-by: Usama Arif <usama.arif@xxxxxxxxxxxxx>
> ---
>  arch/x86/kernel/smpboot.c | 182 +++++++++++++++++++++++++++-----------
>  1 file changed, 128 insertions(+), 54 deletions(-)
>
> diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
> index 3a793772a2aa..eee717086ab7 100644
> --- a/arch/x86/kernel/smpboot.c
> +++ b/arch/x86/kernel/smpboot.c
> @@ -204,6 +204,10 @@ static void smp_callin(void)
>
>  	wmb();
>
> +	/*
> +	 * This runs the AP through all the cpuhp states to its target
> +	 * state (CPUHP_ONLINE in the case of serial bringup).
> +	 */
>  	notify_cpu_starting(cpuid);
>
>  	/*
> @@ -231,17 +235,33 @@ static void notrace start_secondary(void *unused)
>  	load_cr3(swapper_pg_dir);
>  	__flush_tlb_all();
>  #endif
> +	/*
> +	 * Sync point with do_wait_cpu_initialized(). On boot, all secondary
> +	 * CPUs reach this stage after receiving INIT/SIPI from do_cpu_up()
> +	 * in the x86/cpu:kick cpuhp stage. At the start of cpu_init() they
> +	 * will wait for do_wait_cpu_initialized() to set their bit in
> +	 * smp_callout_mask to release them.

The last sentence of the comment looks confused. The fact is:

For serial case, The BSP waits AP to set cpu_initialized_mask from
wait_for_master_cpu() after fired INIT/SIPI, then AP starts to wait
cpu_callout_mask set by BSP from do_boot_cpu().

Or the comments below "Bringup step two:..." which also looks clear
enough then above.

> +	 */
>  	cpu_init_secondary();
>  	rcu_cpu_starting(raw_smp_processor_id());
>  	x86_cpuinit.early_percpu_clock_init();
> +
> +	/*
> +	 * Sync point with do_wait_cpu_callin(). The AP doesn't wait here
> +	 * but just sets the bit to let the controlling CPU (BSP) know that
> +	 * it's got this far.
> +	 */
>  	smp_callin();
>
>  	enable_start_cpu0 = 0;
>
>  	/* otherwise gcc will move up smp_processor_id before the cpu_init */
>  	barrier();
> +
>  	/*
> -	 * Check TSC synchronization with the boot CPU:
> +	 * Check TSC synchronization with the boot CPU (or whichever CPU
> +	 * is controlling the bringup). It will do its part of this from
> +	 * do_wait_cpu_online(), making it an implicit sync point.
>  	 */
>  	check_tsc_sync_target();
>
> @@ -254,6 +274,7 @@ static void notrace start_secondary(void *unused)
>  	 * half valid vector space.
>  	 */
>  	lock_vector_lock();
> +	/* Sync point with do_wait_cpu_online() */
>  	set_cpu_online(smp_processor_id(), true);
>  	lapic_online();
>  	unlock_vector_lock();
> @@ -1083,7 +1104,6 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
>  	unsigned long start_ip = real_mode_header->trampoline_start;
>
>  	unsigned long boot_error = 0;
> -	unsigned long timeout;
>
>  #ifdef CONFIG_X86_64
>  	/* If 64-bit wakeup method exists, use the 64-bit mode trampoline IP */
> @@ -1144,55 +1164,94 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
>  		boot_error = wakeup_cpu_via_init_nmi(cpu, start_ip, apicid,
>  						     cpu0_nmi_registered);
>
> -	if (!boot_error) {
> -		/*
> -		 * Wait 10s total for first sign of life from AP
> -		 */
> -		boot_error = -1;
> -		timeout = jiffies + 10*HZ;
> -		while (time_before(jiffies, timeout)) {
> -			if (cpumask_test_cpu(cpu, cpu_initialized_mask)) {
> -				/*
> -				 * Tell AP to proceed with initialization
> -				 */
> -				cpumask_set_cpu(cpu, cpu_callout_mask);
> -				boot_error = 0;
> -				break;
> -			}
> -			schedule();
> -		}
> -	}
> +	return boot_error;
> +}
>
> -	if (!boot_error) {
> -		/*
> -		 * Wait till AP completes initial initialization
> -		 */
> -		while (!cpumask_test_cpu(cpu, cpu_callin_mask)) {
> -			/*
> -			 * Allow other tasks to run while we wait for the
> -			 * AP to come online. This also gives a chance
> -			 * for the MTRR work(triggered by the AP coming online)
> -			 * to be completed in the stop machine context.
> -			 */
> -			schedule();
> -		}
> +static int do_wait_cpu_cpumask(unsigned int cpu, const struct cpumask *mask)
> +{
> +	unsigned long timeout;
> +
> +	/*
> +	 * Wait up to 10s for the CPU to report in.
> +	 */
> +	timeout = jiffies + 10*HZ;
> +	while (time_before(jiffies, timeout)) {
> +		if (cpumask_test_cpu(cpu, mask))
> +			return 0;
> +
> +		schedule();
>  	}
> +	return -1;
> +}
>
> -	if (x86_platform.legacy.warm_reset) {
> -		/*
> -		 * Cleanup possible dangling ends...
> -		 */
> -		smpboot_restore_warm_reset_vector();
> +/*
> + * Bringup step two: Wait for the target AP to reach cpu_init_secondary()
> + * and thus wait_for_master_cpu(), then set cpu_callout_mask to allow it
> + * to proceed.  The AP will then proceed past setting its 'callin' bit
> + * and end up waiting in check_tsc_sync_target() until we reach
> + * do_wait_cpu_online() to tend to it.
> + */
> +static int do_wait_cpu_initialized(unsigned int cpu)
> +{
> +	/*
> +	 * Wait for first sign of life from AP.
> +	 */
> +	if (do_wait_cpu_cpumask(cpu, cpu_initialized_mask))
> +		return -1;
> +
> +	cpumask_set_cpu(cpu, cpu_callout_mask);
> +	return 0;
> +}
> +
> +/*
> + * Bringup step three: Wait for the target AP to reach smp_callin().
> + * The AP is not waiting for us here so we don't need to parallelise
> + * this step. Not entirely clear why we care about this, since we just
> + * proceed directly to TSC synchronization which is the next sync
> + * point with the AP anyway.
> + */
> +static int do_wait_cpu_callin(unsigned int cpu)
> +{
> +	/*
> +	 * Wait till AP completes initial initialization.
> +	 */
> +	return do_wait_cpu_cpumask(cpu, cpu_callin_mask);
> +}
> +
> +/*
> + * Bringup step four: Synchronize the TSC and wait for the target AP
> + * to reach set_cpu_online() in start_secondary().
> + */
> +static int do_wait_cpu_online(unsigned int cpu)
> +{
> +	unsigned long flags;
> +
> +	/*
> +	 * Check TSC synchronization with the AP (keep irqs disabled
> +	 * while doing so):
> +	 */
> +	local_irq_save(flags);
> +	check_tsc_sync_source(cpu);
> +	local_irq_restore(flags);
> +
> +	/*
> +	 * Wait for the AP to mark itself online. Not entirely
> +	 * clear why we care, since the generic cpuhp code will
> +	 * wait for it to each CPUHP_AP_ONLINE_IDLE before going
> +	 * ahead with the rest of the bringup anyway.
> +	 */
> +	while (!cpu_online(cpu)) {
> +		cpu_relax();
> +		touch_nmi_watchdog();
>  	}
>
> -	return boot_error;
> +	return 0;
>  }
>
> -int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
> +static int do_cpu_up(unsigned int cpu, struct task_struct *tidle)
>  {
>  	int apicid = apic->cpu_present_to_apicid(cpu);
>  	int cpu0_nmi_registered = 0;
> -	unsigned long flags;
>  	int err, ret = 0;
>
>  	lockdep_assert_irqs_enabled();
> @@ -1239,19 +1298,6 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
>  		goto unreg_nmi;
>  	}
>
> -	/*
> -	 * Check TSC synchronization with the AP (keep irqs disabled
> -	 * while doing so):
> -	 */
> -	local_irq_save(flags);
> -	check_tsc_sync_source(cpu);
> -	local_irq_restore(flags);
> -
> -	while (!cpu_online(cpu)) {
> -		cpu_relax();
> -		touch_nmi_watchdog();
> -	}
> -
>  unreg_nmi:
>  	/*
>  	 * Clean up the nmi handler. Do this after the callin and callout sync
> @@ -1263,6 +1309,34 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
>  	return ret;
>  }
>
> +int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
> +{
> +	int ret;
> +
> +	ret = do_cpu_up(cpu, tidle);
> +	if (ret)
> +		return ret;
> +
> +	ret = do_wait_cpu_initialized(cpu);
> +	if (ret)
> +		return ret;
> +
> +	ret = do_wait_cpu_callin(cpu);
> +	if (ret)
> +		return ret;
> +
> +	ret = do_wait_cpu_online(cpu);
> +
> +	if (x86_platform.legacy.warm_reset) {
> +		/*
> +		 * Cleanup possible dangling ends...
> +		 */
> +		smpboot_restore_warm_reset_vector();
> +	}
> +
> +	return ret;
> +}
> +
>  /**
>   * arch_disable_smp_support() - disables SMP support for x86 at runtime
>   */
> --
> 2.25.1
>



[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux