On 09/01, Ingo Molnar wrote: > > * Oleg Nesterov <oleg@xxxxxxxxxx> wrote: > > > Yes, this should work. But I _think_ we can make the better fix... > > > > I'll try to make the patch soon. Afaics we don't need > > kthreadd_task_init_done. > > ok. Just in case, the patch is ready. I need to re-check my thinking and test it somehow... - remove kthreadd_task initialization from rest_init() - change kthreadd() to initialize kthreadd_task = current - change the main loop in kthreadd() to take kthread_create_lock before the first schedule() (just shift schedule() down) This way, if kthreadd_task needs the wakeup, kthread_create() must see kthreadd_task != NULL after unlock(kthread_create_lock). If kthread_create() sees kthreadd_task == NULL we can just sleep on create.done, kthreadd() must notice the new request before it calls schedule(). Note that with this change it is possible to use kthread_create() at any time, but the caller will sleep until rest_init() creates kthreadd. Oleg. init/main.c | 5 +---- kernel/kthread.c | 30 ++++++++++++++++++------------ 2 files changed, 19 insertions(+), 16 deletions(-) --- a/init/main.c +++ b/init/main.c @@ -449,12 +449,9 @@ static void __init setup_command_line(ch static noinline void __init_refok rest_init(void) __releases(kernel_lock) { - int pid; - kernel_thread(kernel_init, NULL, CLONE_FS | CLONE_SIGHAND); numa_default_policy(); - pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES); - kthreadd_task = find_task_by_pid_ns(pid, &init_pid_ns); + kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES); unlock_kernel(); /* --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -128,8 +128,14 @@ struct task_struct *kthread_create(int ( spin_lock(&kthread_create_lock); list_add_tail(&create.list, &kthread_create_list); spin_unlock(&kthread_create_lock); - - wake_up_process(kthreadd_task); + /* + * If kthreadd was not created yet, kthreadd() must see the result + * of list_add_tail() later, it takes kthread_create_lock before the + * first schedule(). If kthreadd() locked kthread_create_lock at + * least once, we must see kthreadd_task != NULL. + */ + if (likely(kthreadd_task)) + wake_up_process(kthreadd_task); wait_for_completion(&create.done); if (!IS_ERR(create.result)) { @@ -216,23 +222,18 @@ EXPORT_SYMBOL(kthread_stop); int kthreadd(void *unused) { - struct task_struct *tsk = current; + kthreadd_task = current; /* Setup a clean context for our children to inherit. */ - set_task_comm(tsk, "kthreadd"); - ignore_signals(tsk); - set_user_nice(tsk, KTHREAD_NICE_LEVEL); - set_cpus_allowed_ptr(tsk, cpu_all_mask); + set_task_comm(kthreadd_task, "kthreadd"); + ignore_signals(kthreadd_task); + set_user_nice(kthreadd_task, KTHREAD_NICE_LEVEL); + set_cpus_allowed_ptr(kthreadd_task, cpu_all_mask); set_mems_allowed(node_possible_map); current->flags |= PF_NOFREEZE | PF_FREEZER_NOSIG; for (;;) { - set_current_state(TASK_INTERRUPTIBLE); - if (list_empty(&kthread_create_list)) - schedule(); - __set_current_state(TASK_RUNNING); - spin_lock(&kthread_create_lock); while (!list_empty(&kthread_create_list)) { struct kthread_create_info *create; @@ -247,6 +248,11 @@ int kthreadd(void *unused) spin_lock(&kthread_create_lock); } spin_unlock(&kthread_create_lock); + + set_current_state(TASK_INTERRUPTIBLE); + if (list_empty(&kthread_create_list)) + schedule(); + __set_current_state(TASK_RUNNING); } return 0; -- To unsubscribe from this list: send the line "unsubscribe linux-tip-commits" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html