Hi, On 11/5/2024 12:28 AM, Sebastian Andrzej Siewior wrote: > On 2024-11-03 18:29:04 [-0800], syzbot wrote: >> syzbot has bisected this issue to: >> >> commit 560af5dc839eef08a273908f390cfefefb82aa04 >> Author: Sebastian Andrzej Siewior <bigeasy@xxxxxxxxxxxxx> >> Date: Wed Oct 9 15:45:03 2024 +0000 >> >> lockdep: Enable PROVE_RAW_LOCK_NESTING with PROVE_LOCKING. >> >> bisection log: https://syzkaller.appspot.com/x/bisect.txt?x=122a4740580000 >> start commit: f9f24ca362a4 Add linux-next specific files for 20241031 >> git tree: linux-next >> final oops: https://syzkaller.appspot.com/x/report.txt?x=112a4740580000 >> console output: https://syzkaller.appspot.com/x/log.txt?x=162a4740580000 >> kernel config: https://syzkaller.appspot.com/x/.config?x=328572ed4d152be9 >> dashboard link: https://syzkaller.appspot.com/bug?extid=d2adb332fe371b0595e3 >> syz repro: https://syzkaller.appspot.com/x/repro.syz?x=174432a7980000 >> C reproducer: https://syzkaller.appspot.com/x/repro.c?x=14ffe55f980000 >> >> Reported-by: syzbot+d2adb332fe371b0595e3@xxxxxxxxxxxxxxxxxxxxxxxxx >> Fixes: 560af5dc839e ("lockdep: Enable PROVE_RAW_LOCK_NESTING with PROVE_LOCKING.") >> >> For information about bisection process see: https://goo.gl/tpsmEJ#bisection > This is due to raw_spinlock_t in bucket::lock and the acquired > spinlock_t underneath. Would it would to move free part outside of the > locked section? I think moving free_htab_elem() after htab_unlock_bucket() is OK. But the fix below is not enough, and there is some corn cases for pre-allocated element . I had written a patch for the problem a few day ago because the problem can be easily reproduced by running test_maps. I am also writing a selftest patch for it. I could post the patch and the selftest patch if you are OK with it. > > diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c > index b14b87463ee04..1d8d09fdd2da5 100644 > --- a/kernel/bpf/hashtab.c > +++ b/kernel/bpf/hashtab.c > @@ -824,13 +824,14 @@ static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node) > hlist_nulls_for_each_entry_rcu(l, n, head, hash_node) > if (l == tgt_l) { > hlist_nulls_del_rcu(&l->hash_node); > - check_and_free_fields(htab, l); > bpf_map_dec_elem_count(&htab->map); > break; > } > > htab_unlock_bucket(htab, b, tgt_l->hash, flags); > > + if (l == tgt_l) > + check_and_free_fields(htab, l); > return l == tgt_l; > } > > @@ -1181,14 +1182,18 @@ static long htab_map_update_elem(struct bpf_map *map, void *key, void *value, > * concurrent search will find it before old elem > */ > hlist_nulls_add_head_rcu(&l_new->hash_node, head); > - if (l_old) { > + if (l_old) > hlist_nulls_del_rcu(&l_old->hash_node); > + htab_unlock_bucket(htab, b, hash, flags); > + > + if (l_old) { > if (!htab_is_prealloc(htab)) > free_htab_elem(htab, l_old); > else > check_and_free_fields(htab, l_old); > } > - ret = 0; > + return 0; > + > err: > htab_unlock_bucket(htab, b, hash, flags); > return ret; > @@ -1433,14 +1438,15 @@ static long htab_map_delete_elem(struct bpf_map *map, void *key) > > l = lookup_elem_raw(head, hash, key, key_size); > > - if (l) { > + if (l) > hlist_nulls_del_rcu(&l->hash_node); > - free_htab_elem(htab, l); > - } else { > + else > ret = -ENOENT; > - } > > htab_unlock_bucket(htab, b, hash, flags); > + > + if (l) > + free_htab_elem(htab, l); > return ret; > } > > @@ -1647,14 +1653,16 @@ static int __htab_map_lookup_and_delete_elem(struct bpf_map *map, void *key, > } > > hlist_nulls_del_rcu(&l->hash_node); > - if (!is_lru_map) > - free_htab_elem(htab, l); > } > > htab_unlock_bucket(htab, b, hash, bflags); > > - if (is_lru_map && l) > - htab_lru_push_free(htab, l); > + if (l) { > + if (is_lru_map) > + htab_lru_push_free(htab, l); > + else > + free_htab_elem(htab, l); > + } > > return ret; > } > @@ -1851,15 +1859,12 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map, > > /* bpf_lru_push_free() will acquire lru_lock, which > * may cause deadlock. See comments in function > - * prealloc_lru_pop(). Let us do bpf_lru_push_free() > - * after releasing the bucket lock. > + * prealloc_lru_pop(). htab_lru_push_free() may allocate > + * sleeping locks. Let us do bpf_lru_push_free() after > + * releasing the bucket lock. > */ > - if (is_lru_map) { > - l->batch_flink = node_to_free; > - node_to_free = l; > - } else { > - free_htab_elem(htab, l); > - } > + l->batch_flink = node_to_free; > + node_to_free = l; > } > dst_key += key_size; > dst_val += value_size; > @@ -1871,7 +1876,10 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map, > while (node_to_free) { > l = node_to_free; > node_to_free = node_to_free->batch_flink; > - htab_lru_push_free(htab, l); > + if (is_lru_map) > + htab_lru_push_free(htab, l); > + else > + free_htab_elem(htab, l); > } > > next_batch: > > .