Re: [RFC][PATCH 15/26] sched, numa: Implement hotplug hooks

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 03/16/2012 08:10 PM, Peter Zijlstra wrote:

> start/stop numa balance threads on-demand using cpu-hotlpug.
> 
> Signed-off-by: Peter Zijlstra <a.p.zijlstra@xxxxxxxxx>
> ---
>  kernel/sched/numa.c |   62 ++++++++++++++++++++++++++++++++++++++++++++++------
>  1 file changed, 55 insertions(+), 7 deletions(-)
> --- a/kernel/sched/numa.c
> +++ b/kernel/sched/numa.c
> @@ -596,31 +596,79 @@ static int numad_thread(void *data)
>  	return 0;
>  }
> 
> +static int __cpuinit
> +numa_hotplug(struct notifier_block *nb, unsigned long action, void *hcpu)
> +{
> +	int cpu = (long)hcpu;
> +	int node = cpu_to_node(cpu);
> +	struct node_queue *nq = nq_of(node);
> +	struct task_struct *numad;
> +	int err = 0;
> +
> +	switch (action & ~CPU_TASKS_FROZEN) {
> +	case CPU_UP_PREPARE:
> +		if (nq->numad)
> +			break;
> +
> +		numad = kthread_create_on_node(numad_thread,
> +				nq, node, "numad/%d", node);
> +		if (IS_ERR(numad)) {
> +			err = PTR_ERR(numad);
> +			break;
> +		}
> +
> +		nq->numad = numad;
> +		nq->next_schedule = jiffies + HZ; // XXX sync-up?
> +		break;
> +
> +	case CPU_ONLINE:
> +		wake_up_process(nq->numad);
> +		break;
> +
> +	case CPU_DEAD:
> +	case CPU_UP_CANCELED:
> +		if (!nq->numad)
> +			break;
> +
> +		if (cpumask_any_and(cpu_online_mask,
> +				    cpumask_of_node(node)) >= nr_cpu_ids) {
> +			kthread_stop(nq->numad);
> +			nq->numad = NULL;
> +		}
> +		break;
> +	}
> +
> +	return notifier_from_errno(err);
> +}
> +
>  static __init int numa_init(void)
>  {
> -	int node;
> +	int node, cpu, err;
> 
>  	nqs = kzalloc(sizeof(struct node_queue*) * nr_node_ids, GFP_KERNEL);
>  	BUG_ON(!nqs);
> 
> -	for_each_node(node) { // XXX hotplug
> +	for_each_node(node) {
>  		struct node_queue *nq = kmalloc_node(sizeof(*nq),
>  				GFP_KERNEL | __GFP_ZERO, node);
>  		BUG_ON(!nq);
> 
> -		nq->numad = kthread_create_on_node(numad_thread,
> -				nq, node, "numad/%d", node);
> -		BUG_ON(IS_ERR(nq->numad));
> -
>  		spin_lock_init(&nq->lock);
>  		INIT_LIST_HEAD(&nq->entity_list);
> 
>  		nq->next_schedule = jiffies + HZ;
>  		nq->node = node;
>  		nqs[node] = nq;
> +	}
> 
> -		wake_up_process(nq->numad);
> +	get_online_cpus();
> +	cpu_notifier(numa_hotplug, 0);


ABBA deadlock!

CPU 0						CPU1
				echo 0/1 > /sys/devices/.../cpu*/online

					acquire cpu_add_remove_lock

get_online_cpus()
	acquire cpu_hotplug lock
					
					Blocked on cpu hotplug lock

cpu_notifier()
	acquire cpu_add_remove_lock

ABBA DEADLOCK!

[cpu_maps_update_begin/done() deal with cpu_add_remove_lock].

So, basically, at the moment there is no way to register a CPU Hotplug notifier
and do setup for all currently online cpus in a totally race-free manner.

One approach to fix this is to audit whether register_cpu_notifier() really needs
to take cpu_add_remove_lock and if no, then acquire cpu hotplug lock instead.

The other approach is to keep the existing lock ordering as it is and yet provide
a race-free way to register, as I had posted some time ago (incomplete/untested):

http://thread.gmane.org/gmane.linux.kernel/1258880/focus=15826


> +	for_each_online_cpu(cpu) {
> +		err = numa_hotplug(NULL, CPU_UP_PREPARE, (void *)(long)cpu);
> +		BUG_ON(notifier_to_errno(err));
> +		numa_hotplug(NULL, CPU_ONLINE, (void *)(long)cpu);
>  	}
> +	put_online_cpus();
> 
>  	return 0;
>  }
> 
> 

 
Regards,
Srivatsa S. Bhat

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@xxxxxxxxx.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@xxxxxxxxx";> email@xxxxxxxxx </a>


[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux]     [Linux OMAP]     [Linux MIPS]     [ECOS]     [Asterisk Internet PBX]     [Linux API]