From: Daniel Wagner <daniel.wagner@xxxxxxxxxxxx> Hi, As Peter correctly pointed out in [1] a simple conversion from wait to swait in completion.c wont work. I played a bit around and came up with this rather ugly idea. So in case complete_all() is called in hard irq context we just wake up one waiter and let that one call swake_up_all(). For this I needed to somehow transfer this information from complete_all() to wait_for_completion(). The only working idea I found was to introduce a new flag in struct completion. Ideas to overcome this problem are highly appreciated. I did also some performance measurement with below test program. The test creates a trigger thread and a bunch of waiter threads. The trigger thread calls complete_all() either from thread context or from hard irq context. Time needed for 1000 iterations measured. This was done on a idle IvyBridge machine with 64 logial cores (E5-4610). waiter_nr: number of waiter threads irqwork: 0 complete_all() from thread context, 1 complete_all() from irq_work() wait: waiter_nr 64 irqwork 0 count 66.000000 mean 0.378318 std 0.018468 min 0.344000 25% 0.364000 50% 0.382500 75% 0.395000 max 0.407000 swait: waiter_nr 64 irqwork 1 count 86.000000 mean 0.315221 std 0.007115 min 0.291000 25% 0.312000 50% 0.316500 75% 0.320000 max 0.329000 swait: waiter_nr 64 irqwork 0 count 81.000000 mean 0.344642 std 0.021708 min 0.294000 25% 0.336000 50% 0.341000 75% 0.355000 max 0.403000 cheers, daniel completion-test.c: #include <linux/module.h> #include <linux/wait.h> #include <linux/swait.h> #include <linux/kthread.h> #include <linux/threads.h> #include <linux/slab.h> #include <linux/delay.h> #include <linux/completion.h> #include <linux/irq_work.h> static unsigned int waiter_nr = 5; static bool irqwork = true; static unsigned int counter = 1000; module_param(waiter_nr, uint, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP); MODULE_PARM_DESC(waiter_nr, "Number of waiter threads"); module_param(irqwork, bool, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP); MODULE_PARM_DESC(irqwork, "irqwork"); module_param(counter, uint, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP); MODULE_PARM_DESC(counter, "counter"); struct completion_test { /* We need two completions to avoid a race with reinit of the * completion. */ struct completion sync_stage1; struct completion sync_stage2; wait_queue_head_t wq_stage1; wait_queue_head_t wq_stage2; atomic_t cnt_stage1; atomic_t cnt_stage2; struct irq_work irq_work; }; static struct completion_test test_data; static struct task_struct **waiter_tasks; static struct task_struct *trigger_task; static void trigger_irq(struct irq_work *arg) { struct completion_test *ct = container_of(arg, struct completion_test, irq_work); complete_all(&ct->sync_stage1); } static int waiter(void *arg) { struct completion_test *ct = arg; for (;;) { atomic_inc(&ct->cnt_stage1); wake_up(&ct->wq_stage1); wait_for_completion_interruptible(&ct->sync_stage1); if (kthread_should_stop()) break; atomic_inc(&ct->cnt_stage2); wake_up(&ct->wq_stage2); wait_for_completion_interruptible(&ct->sync_stage2); if (kthread_should_stop()) break; } return 0; } static int trigger(void *arg) { struct completion_test *ct = arg; struct timespec ts_start, ts; unsigned long cnt; cnt = counter; ts_start = current_kernel_time(); for (;;) { cnt--; if (cnt == 0) { ts = timespec_sub(current_kernel_time(), ts_start); printk("%ld.%.9ld\n", ts.tv_sec, ts.tv_nsec); cnt = counter; ts_start = current_kernel_time(); } wait_event_interruptible(ct->wq_stage1, !(atomic_read(&ct->cnt_stage1) < waiter_nr)); if (kthread_should_stop()) { complete_all(&ct->sync_stage1); break; } atomic_set(&ct->cnt_stage2, 0); reinit_completion(&ct->sync_stage2); if (irqwork) irq_work_queue(&ct->irq_work); else complete_all(&ct->sync_stage1); wait_event_interruptible(ct->wq_stage2, !(atomic_read(&ct->cnt_stage2) < waiter_nr)); if (kthread_should_stop()) { complete_all(&ct->sync_stage2); break; } reinit_completion(&ct->sync_stage1); atomic_set(&ct->cnt_stage1, 0); complete_all(&ct->sync_stage2); } return 0; } static void __exit completion_test_module_cleanup(void) { unsigned int i; if (trigger_task) kthread_stop(trigger_task); if (waiter_tasks) { for (i = 0; i < waiter_nr; i++) { if (waiter_tasks[i] && !IS_ERR(waiter_tasks[i])) kthread_stop(waiter_tasks[i]); } kfree(waiter_tasks); } } static int __init completion_test_module_init(void) { struct completion_test *ct = &test_data; unsigned int i; int err; init_completion(&ct->sync_stage1); init_completion(&ct->sync_stage2); init_waitqueue_head(&ct->wq_stage1); init_waitqueue_head(&ct->wq_stage2); atomic_set(&ct->cnt_stage1, 0); atomic_set(&ct->cnt_stage2, 0); init_irq_work(&ct->irq_work, trigger_irq); waiter_tasks = kcalloc(waiter_nr, sizeof(waiter_tasks[0]), GFP_KERNEL); if (!waiter_tasks) { printk("out of memory\n"); err = -ENOMEM; goto unwind; } for (i = 0; i < waiter_nr; i++) { waiter_tasks[i] = kthread_run(waiter, ct, "waiter"); if (IS_ERR(waiter_tasks[i])) { err = -PTR_ERR(waiter_tasks[i]); goto unwind; } } trigger_task = kthread_run(trigger, ct, "trigger"); if (IS_ERR(trigger_task)) { err = -PTR_ERR(trigger_task); goto unwind; } return 0; unwind: completion_test_module_cleanup(); return err; } module_init(completion_test_module_init); module_exit(completion_test_module_cleanup); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Daniel Wagner"); MODULE_DESCRIPTION("completion test"); [1] http://thread.gmane.org/gmane.linux.kernel/2034867/focus=2034873 Daniel Wagner (1): sched/completion: convert completions to use simple wait queues include/linux/completion.h | 14 ++++++++++---- kernel/sched/completion.c | 41 +++++++++++++++++++++++++---------------- 2 files changed, 35 insertions(+), 20 deletions(-) -- 2.5.0 -- To unsubscribe from this list: send the line "unsubscribe linux-rt-users" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html