On 2022-02-14 11:46:00 [-0500], Johannes Weiner wrote: > > diff --git a/mm/memcontrol.c b/mm/memcontrol.c > > index c1caa662946dc..466466f285cea 100644 > > --- a/mm/memcontrol.c > > +++ b/mm/memcontrol.c > > @@ -705,6 +705,8 @@ void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, > > pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); > > memcg = pn->memcg; > > > > + if (IS_ENABLED(CONFIG_PREEMPT_RT)) > > + preempt_disable(); > > /* Update memcg */ > > __this_cpu_add(memcg->vmstats_percpu->state[idx], val); > > > > @@ -712,6 +714,8 @@ void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, > > __this_cpu_add(pn->lruvec_stats_percpu->state[idx], val); > > > > memcg_rstat_updated(memcg, val); > > + if (IS_ENABLED(CONFIG_PREEMPT_RT)) > > + preempt_enable(); > > } > > I notice you didn't annoate __mod_memcg_state(). I suppose that is > because it's called with explicit local_irq_disable(), and that > disables preemption on rt? And you only need another preempt_disable() > for stacks that rely on coming from spin_lock_irq(save)? Correct. The code is not used in_hardirq() on PREEMPT_RT so preempt_disable() is sufficient. I didn't bother to replace all local_irq_save() with preempt_disable() since it is probably not worth it. And yes: spin_lock_irq() does not disable interrupts so I need something here to ensure that the RMW operation is not interrupted. > That makes sense, but it's difficult to maintain. It'll easily break > if somebody adds more memory accounting sites that may also rely on an > irq-disabled spinlock somewhere. > > So better to make this an unconditional locking protocol: > > static void memcg_stats_lock(void) > { > #ifdef CONFIG_PREEMPT_RT > preempt_disable(); > #else > VM_BUG_ON(!irqs_disabled()); > #endif > } > > static void memcg_stats_unlock(void) > { > #ifdef CONFIG_PREEMPT_RT > preempt_enable(); > #endif > } > > and always use these around the counter updates. Something like the following perhaps? I didn't add anything to __mod_memcg_state() since it has no users besides the one which does local_irq_save(). diff --git a/mm/memcontrol.c b/mm/memcontrol.c index c1caa662946dc..69130a5fe3d51 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -629,6 +629,28 @@ static DEFINE_SPINLOCK(stats_flush_lock); static DEFINE_PER_CPU(unsigned int, stats_updates); static atomic_t stats_flush_threshold = ATOMIC_INIT(0); +/* + * Accessors to ensure that preemption is disabled on PREEMPT_RT because it can + * not rely on this as part of an acquired spinlock_t lock. These functions are + * never used in hardirq context on PREEMPT_RT and therefore disabling preemtion + * is sufficient. + */ +static void memcg_stats_lock(void) +{ +#ifdef CONFIG_PREEMPT_RT + preempt_disable(); +#else + VM_BUG_ON(!irqs_disabled()); +#endif +} + +static void memcg_stats_unlock(void) +{ +#ifdef CONFIG_PREEMPT_RT + preempt_enable(); +#endif +} + static inline void memcg_rstat_updated(struct mem_cgroup *memcg, int val) { unsigned int x; @@ -705,6 +727,7 @@ void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); memcg = pn->memcg; + memcg_stats_lock(); /* Update memcg */ __this_cpu_add(memcg->vmstats_percpu->state[idx], val); @@ -712,6 +735,7 @@ void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, __this_cpu_add(pn->lruvec_stats_percpu->state[idx], val); memcg_rstat_updated(memcg, val); + memcg_stats_unlock(); } /** @@ -794,8 +818,10 @@ void __count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx, if (mem_cgroup_disabled()) return; + memcg_stats_lock(); __this_cpu_add(memcg->vmstats_percpu->events[idx], count); memcg_rstat_updated(memcg, count); + memcg_stats_unlock(); } static unsigned long memcg_events(struct mem_cgroup *memcg, int event) @@ -7149,8 +7175,9 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry) * important here to have the interrupts disabled because it is the * only synchronisation we have for updating the per-CPU variables. */ - VM_BUG_ON(!irqs_disabled()); + memcg_stats_lock(); mem_cgroup_charge_statistics(memcg, -nr_entries); + memcg_stats_unlock(); memcg_check_events(memcg, page_to_nid(page)); css_put(&memcg->css); -- 2.34.1