From: Sebastian Andrzej Siewior <bigeasy@xxxxxxxxxxxxx> Subject: lib/percpu_ida.c: use _irqsave() instead of local_irq_save() + spin_lock percpu_ida() decouples disabling interrupts from the locking operations. This breaks some assumptions if the locking operations are replaced like they are under -RT. The same locking can be achieved by avoiding local_irq_save() and using spin_lock_irqsave() instead. percpu_ida_alloc() gains one more preemption point because after unlocking the fastpath and before the pool lock is acquired, the interrupts are briefly enabled. Link: http://lkml.kernel.org/r/20180504153218.7301-1-bigeasy@xxxxxxxxxxxxx Signed-off-by: Sebastian Andrzej Siewior <bigeasy@xxxxxxxxxxxxx> Reviewed-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx> Cc: Nicholas Bellinger <nab@xxxxxxxxxxxxxxx> Cc: Shaohua Li <shli@xxxxxx> Cc: Kent Overstreet <kent.overstreet@xxxxxxxxx> Cc: Matthew Wilcox <willy@xxxxxxxxxxxxx> Cc: Jens Axboe <axboe@xxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- lib/percpu_ida.c | 63 +++++++++++++++------------------------------ 1 file changed, 21 insertions(+), 42 deletions(-) diff -puN lib/percpu_ida.c~percpu_ida-use-_irqsave-instead-of-local_irq_save-spin_lock lib/percpu_ida.c --- a/lib/percpu_ida.c~percpu_ida-use-_irqsave-instead-of-local_irq_save-spin_lock +++ a/lib/percpu_ida.c @@ -112,18 +112,6 @@ static inline void alloc_global_tags(str min(pool->nr_free, pool->percpu_batch_size)); } -static inline unsigned alloc_local_tag(struct percpu_ida_cpu *tags) -{ - int tag = -ENOSPC; - - spin_lock(&tags->lock); - if (tags->nr_free) - tag = tags->freelist[--tags->nr_free]; - spin_unlock(&tags->lock); - - return tag; -} - /** * percpu_ida_alloc - allocate a tag * @pool: pool to allocate from @@ -147,20 +135,22 @@ int percpu_ida_alloc(struct percpu_ida * DEFINE_WAIT(wait); struct percpu_ida_cpu *tags; unsigned long flags; - int tag; + int tag = -ENOSPC; - local_irq_save(flags); - tags = this_cpu_ptr(pool->tag_cpu); + tags = raw_cpu_ptr(pool->tag_cpu); + spin_lock_irqsave(&tags->lock, flags); /* Fastpath */ - tag = alloc_local_tag(tags); - if (likely(tag >= 0)) { - local_irq_restore(flags); + if (likely(tags->nr_free >= 0)) { + tag = tags->freelist[--tags->nr_free]; + spin_unlock_irqrestore(&tags->lock, flags); return tag; } + spin_unlock_irqrestore(&tags->lock, flags); while (1) { - spin_lock(&pool->lock); + spin_lock_irqsave(&pool->lock, flags); + tags = this_cpu_ptr(pool->tag_cpu); /* * prepare_to_wait() must come before steal_tags(), in case @@ -184,8 +174,7 @@ int percpu_ida_alloc(struct percpu_ida * &pool->cpus_have_tags); } - spin_unlock(&pool->lock); - local_irq_restore(flags); + spin_unlock_irqrestore(&pool->lock, flags); if (tag >= 0 || state == TASK_RUNNING) break; @@ -196,9 +185,6 @@ int percpu_ida_alloc(struct percpu_ida * } schedule(); - - local_irq_save(flags); - tags = this_cpu_ptr(pool->tag_cpu); } if (state != TASK_RUNNING) finish_wait(&pool->wait, &wait); @@ -222,28 +208,24 @@ void percpu_ida_free(struct percpu_ida * BUG_ON(tag >= pool->nr_tags); - local_irq_save(flags); - tags = this_cpu_ptr(pool->tag_cpu); + tags = raw_cpu_ptr(pool->tag_cpu); - spin_lock(&tags->lock); + spin_lock_irqsave(&tags->lock, flags); tags->freelist[tags->nr_free++] = tag; nr_free = tags->nr_free; - spin_unlock(&tags->lock); if (nr_free == 1) { cpumask_set_cpu(smp_processor_id(), &pool->cpus_have_tags); wake_up(&pool->wait); } + spin_unlock_irqrestore(&tags->lock, flags); if (nr_free == pool->percpu_max_size) { - spin_lock(&pool->lock); + spin_lock_irqsave(&pool->lock, flags); + spin_lock(&tags->lock); - /* - * Global lock held and irqs disabled, don't need percpu - * lock - */ if (tags->nr_free == pool->percpu_max_size) { move_tags(pool->freelist, &pool->nr_free, tags->freelist, &tags->nr_free, @@ -251,10 +233,9 @@ void percpu_ida_free(struct percpu_ida * wake_up(&pool->wait); } - spin_unlock(&pool->lock); + spin_unlock(&tags->lock); + spin_unlock_irqrestore(&pool->lock, flags); } - - local_irq_restore(flags); } EXPORT_SYMBOL_GPL(percpu_ida_free); @@ -346,29 +327,27 @@ int percpu_ida_for_each_free(struct perc struct percpu_ida_cpu *remote; unsigned cpu, i, err = 0; - local_irq_save(flags); for_each_possible_cpu(cpu) { remote = per_cpu_ptr(pool->tag_cpu, cpu); - spin_lock(&remote->lock); + spin_lock_irqsave(&remote->lock, flags); for (i = 0; i < remote->nr_free; i++) { err = fn(remote->freelist[i], data); if (err) break; } - spin_unlock(&remote->lock); + spin_unlock_irqrestore(&remote->lock, flags); if (err) goto out; } - spin_lock(&pool->lock); + spin_lock_irqsave(&pool->lock, flags); for (i = 0; i < pool->nr_free; i++) { err = fn(pool->freelist[i], data); if (err) break; } - spin_unlock(&pool->lock); + spin_unlock_irqrestore(&pool->lock, flags); out: - local_irq_restore(flags); return err; } EXPORT_SYMBOL_GPL(percpu_ida_for_each_free); _ -- To unsubscribe from this list: send the line "unsubscribe mm-commits" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html