On 30/03/23 16:27, Marcelo Tosatti wrote: > +/* > + * invalidate_bh_lrus() is called rarely - but not only at unmount. > + */ > void invalidate_bh_lrus(void) > { > - on_each_cpu_cond(has_bh_in_lru, invalidate_bh_lru, NULL, 1); > + int cpu, oidx; > + > + mutex_lock(&bh_lru_invalidate_mutex); > + cpus_read_lock(); > + oidx = bh_lru_idx; > + bh_lru_idx++; > + if (bh_lru_idx >= 2) > + bh_lru_idx = 0; > + You could make this a bool and flip it: bh_lru_idx = !bh_lru_idx > + /* Assign the per-CPU bh_lru pointer */ > + for_each_online_cpu(cpu) > + rcu_assign_pointer(per_cpu(bh_lrup, cpu), > + per_cpu_ptr(&bh_lrus[bh_lru_idx], cpu)); > + synchronize_rcu_expedited(); > + > + for_each_online_cpu(cpu) { > + struct bh_lru *b = per_cpu_ptr(&bh_lrus[oidx], cpu); > + > + bh_lru_lock(); > + __invalidate_bh_lrus(b); > + bh_lru_unlock(); Given the bh_lrup has been updated and we're past the synchronize_rcu(), what is bh_lru_lock() used for here? > + } > + cpus_read_unlock(); > + mutex_unlock(&bh_lru_invalidate_mutex); Re scalability, this is shifting a set of per-CPU-IPI callbacks to a single CPU, which isn't great. Can we consider doing something like [1], i.e. in the general case send an IPI to: rcu_assign_pointer() + call_rcu(/* invalidation callback */) and in the case we're NOHZ_FULL and the target CPU is not executing in the kernel, we do that remotely to reduce interference. We might want to batch the synchronize_rcu() for the remote invalidates, maybe some abuse of the API like so? bool do_local_invalidate(int cpu, struct cpumask *mask) { if (cpu_in_kernel(cpu)) { __cpumask_clear_cpu(cpu, mask); return true; } return false; } void invalidate_bh_lrus(void) { cpumask_var_t cpumask; cpus_read_lock(); cpumask_copy(&cpumask, cpu_online_mask); on_each_cpu_cond(do_local_invalidate, invalidate_bh_lru, &cpumask, 1); for_each_cpu(cpu, &cpumask) rcu_assign_pointer(per_cpu(bh_lrup, cpu), per_cpu_ptr(&bh_lrus[bh_lru_idx], cpu)); synchronize_rcu_expedited(); for_each_cpu(cpu, &cpumask) { // Do remote invalidate here } } [1]: https://lore.kernel.org/lkml/20230404134224.137038-4-ypodemsk@xxxxxxxxxx/ > } > EXPORT_SYMBOL_GPL(invalidate_bh_lrus); > > @@ -1465,8 +1505,10 @@ void invalidate_bh_lrus_cpu(void) > struct bh_lru *b; > > bh_lru_lock(); > - b = this_cpu_ptr(&bh_lrus); > + rcu_read_lock(); > + b = rcu_dereference(per_cpu(bh_lrup, smp_processor_id())); > __invalidate_bh_lrus(b); > + rcu_read_unlock(); > bh_lru_unlock(); > } > > @@ -2968,15 +3010,25 @@ void free_buffer_head(struct buffer_head *bh) > } > EXPORT_SYMBOL(free_buffer_head); > > +static int buffer_cpu_online(unsigned int cpu) > +{ > + rcu_assign_pointer(per_cpu(bh_lrup, cpu), > + per_cpu_ptr(&bh_lrus[bh_lru_idx], cpu)); > + return 0; > +} What serializes this against invalidate_bh_lrus()? Are you relying on this running under cpus_write_lock()?