From: Hou Tao <houtao1@xxxxxxxxxx> For htab_use_raw_lock=true case, the normally concurrent map updates are allowed by using preempt_disable() instead of migrate_disable() before increasing htab->map_locked. However the false case can not use preempt_disable(), because a sleepable spin-lock is acquired afterwards. So introducing a locking_bpf_map bit in task_struct. Setting it before acquiring bucket lock and clearing it after releasing the lock, so if htab_lock_bucket() is re-entered, the re-enterancy will be rejected. And if there is just preemption from another process, these processes can run concurrently. Signed-off-by: Hou Tao <houtao1@xxxxxxxxxx> --- include/linux/sched.h | 3 +++ kernel/bpf/hashtab.c | 61 ++++++++++++++++++++++++------------------- 2 files changed, 37 insertions(+), 27 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 51dc1e89d43f..55667f46e459 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -944,6 +944,9 @@ struct task_struct { #ifdef CONFIG_CPU_SUP_INTEL unsigned reported_split_lock:1; #endif +#if defined(CONFIG_PREEMPT_RT) && defined(CONFIG_BPF_SYSCALL) + unsigned bpf_map_busy:1; +#endif unsigned long atomic_flags; /* Flags requiring atomic access. */ diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index ad09da139589..3ef7a853c737 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -138,6 +138,23 @@ static inline bool htab_use_raw_lock(const struct bpf_htab *htab) return (!IS_ENABLED(CONFIG_PREEMPT_RT) || htab_is_prealloc(htab)); } +static inline void bpf_clear_map_busy(void) +{ +#ifdef CONFIG_PREEMPT_RT + current->bpf_map_busy = 0; +#endif +} + +static inline int bpf_test_and_set_map_busy(void) +{ +#ifdef CONFIG_PREEMPT_RT + if (current->bpf_map_busy) + return 1; + current->bpf_map_busy = 1; +#endif + return 0; +} + static void htab_init_buckets(struct bpf_htab *htab) { unsigned int i; @@ -162,28 +179,21 @@ static inline int htab_lock_bucket(const struct bpf_htab *htab, unsigned long *pflags) { unsigned long flags; - bool use_raw_lock; - hash = hash & HASHTAB_MAP_LOCK_MASK; - - use_raw_lock = htab_use_raw_lock(htab); - if (use_raw_lock) + if (htab_use_raw_lock(htab)) { + hash = hash & HASHTAB_MAP_LOCK_MASK; preempt_disable(); - else - migrate_disable(); - if (unlikely(__this_cpu_inc_return(*(htab->map_locked[hash])) != 1)) { - __this_cpu_dec(*(htab->map_locked[hash])); - if (use_raw_lock) + if (unlikely(__this_cpu_inc_return(*(htab->map_locked[hash])) != 1)) { + __this_cpu_dec(*(htab->map_locked[hash])); preempt_enable(); - else - migrate_enable(); - return -EBUSY; - } - - if (use_raw_lock) + return -EBUSY; + } raw_spin_lock_irqsave(&b->raw_lock, flags); - else + } else { + if (bpf_test_and_set_map_busy()) + return -EBUSY; spin_lock_irqsave(&b->lock, flags); + } *pflags = flags; return 0; @@ -193,18 +203,15 @@ static inline void htab_unlock_bucket(const struct bpf_htab *htab, struct bucket *b, u32 hash, unsigned long flags) { - bool use_raw_lock = htab_use_raw_lock(htab); - - hash = hash & HASHTAB_MAP_LOCK_MASK; - if (use_raw_lock) + if (htab_use_raw_lock(htab)) { + hash = hash & HASHTAB_MAP_LOCK_MASK; raw_spin_unlock_irqrestore(&b->raw_lock, flags); - else - spin_unlock_irqrestore(&b->lock, flags); - __this_cpu_dec(*(htab->map_locked[hash])); - if (use_raw_lock) + __this_cpu_dec(*(htab->map_locked[hash])); preempt_enable(); - else - migrate_enable(); + } else { + spin_unlock_irqrestore(&b->lock, flags); + bpf_clear_map_busy(); + } } static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node); -- 2.29.2