In some cases it is possible for mmu_interval_notifier_remove() to race with mn_tree_inv_end() allowing it to return while the notifier data structure is still in use. Consider the following sequence: CPU0 - mn_tree_inv_end() CPU1 - mmu_interval_notifier_remove() ----------------------------------- ------------------------------------ spin_lock(subscriptions->lock); seq = subscriptions->invalidate_seq; spin_lock(subscriptions->lock); spin_unlock(subscriptions->lock); subscriptions->invalidate_seq++; wait_event(invalidate_seq != seq); return; interval_tree_remove(interval_sub); kfree(interval_sub); spin_unlock(subscriptions->lock); wake_up_all(); As the wait_event() condition is true it will return immediately. This can lead to use-after-free type errors if the caller frees the data structure containing the interval notifier subscription while it is still on a deferred list. Fix this by changing invalidate_seq to an atomic type as it is read outside of the lock and moving the increment until after deferred lists have been updated. Signed-off-by: Alistair Popple <apopple@xxxxxxxxxx> Fixes: 99cb252f5e68 ("mm/mmu_notifier: add an interval tree notifier") --- mm/mmu_notifier.c | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c index 3f3bbcd298c6..41eb5c2e09f4 100644 --- a/mm/mmu_notifier.c +++ b/mm/mmu_notifier.c @@ -40,7 +40,7 @@ struct mmu_notifier_subscriptions { bool has_itree; /* to serialize the list modifications and hlist_unhashed */ spinlock_t lock; - unsigned long invalidate_seq; + atomic_long_t invalidate_seq; unsigned long active_invalidate_ranges; struct rb_root_cached itree; wait_queue_head_t wq; @@ -87,7 +87,7 @@ static bool mn_itree_is_invalidating(struct mmu_notifier_subscriptions *subscriptions) { lockdep_assert_held(&subscriptions->lock); - return subscriptions->invalidate_seq & 1; + return atomic_long_read(&subscriptions->invalidate_seq) & 1; } static struct mmu_interval_notifier * @@ -103,12 +103,12 @@ mn_itree_inv_start_range(struct mmu_notifier_subscriptions *subscriptions, node = interval_tree_iter_first(&subscriptions->itree, range->start, range->end - 1); if (node) { - subscriptions->invalidate_seq |= 1; + atomic_long_or(1, &subscriptions->invalidate_seq); res = container_of(node, struct mmu_interval_notifier, interval_tree); } - *seq = subscriptions->invalidate_seq; + *seq = atomic_long_read(&subscriptions->invalidate_seq); spin_unlock(&subscriptions->lock); return res; } @@ -138,9 +138,6 @@ static void mn_itree_inv_end(struct mmu_notifier_subscriptions *subscriptions) return; } - /* Make invalidate_seq even */ - subscriptions->invalidate_seq++; - /* * The inv_end incorporates a deferred mechanism like rtnl_unlock(). * Adds and removes are queued until the final inv_end happens then @@ -158,6 +155,13 @@ static void mn_itree_inv_end(struct mmu_notifier_subscriptions *subscriptions) &subscriptions->itree); hlist_del(&interval_sub->deferred_item); } + + /* Pairs with the atomic_long_read in mmu_interval_notifier_remove(). */ + smp_mb__before_atomic(); + + /* Make invalidate_seq even */ + atomic_long_inc(&subscriptions->invalidate_seq); + spin_unlock(&subscriptions->lock); wake_up_all(&subscriptions->wq); @@ -232,7 +236,7 @@ mmu_interval_read_begin(struct mmu_interval_notifier *interval_sub) spin_lock(&subscriptions->lock); /* Pairs with the WRITE_ONCE in mmu_interval_set_seq() */ seq = READ_ONCE(interval_sub->invalidate_seq); - is_invalidating = seq == subscriptions->invalidate_seq; + is_invalidating = seq == atomic_long_read(&subscriptions->invalidate_seq); spin_unlock(&subscriptions->lock); /* @@ -246,7 +250,7 @@ mmu_interval_read_begin(struct mmu_interval_notifier *interval_sub) lock_map_release(&__mmu_notifier_invalidate_range_start_map); if (is_invalidating) wait_event(subscriptions->wq, - READ_ONCE(subscriptions->invalidate_seq) != seq); + atomic_long_read(&subscriptions->invalidate_seq) != seq); /* * Notice that mmu_interval_read_retry() can already be true at this @@ -648,7 +652,7 @@ int __mmu_notifier_register(struct mmu_notifier *subscription, INIT_HLIST_HEAD(&subscriptions->list); spin_lock_init(&subscriptions->lock); - subscriptions->invalidate_seq = 2; + atomic_long_set(&subscriptions->invalidate_seq, 2); subscriptions->itree = RB_ROOT_CACHED; init_waitqueue_head(&subscriptions->wq); INIT_HLIST_HEAD(&subscriptions->deferred_list); @@ -954,11 +958,11 @@ static int __mmu_interval_notifier_insert( hlist_add_head(&interval_sub->deferred_item, &subscriptions->deferred_list); else { - subscriptions->invalidate_seq |= 1; + atomic_long_or(1, &subscriptions->invalidate_seq); interval_tree_insert(&interval_sub->interval_tree, &subscriptions->itree); } - interval_sub->invalidate_seq = subscriptions->invalidate_seq; + interval_sub->invalidate_seq = atomic_long_read(&subscriptions->invalidate_seq); } else { WARN_ON(mn_itree_is_invalidating(subscriptions)); /* @@ -968,7 +972,7 @@ static int __mmu_interval_notifier_insert( * soon. */ interval_sub->invalidate_seq = - subscriptions->invalidate_seq - 1; + atomic_long_read(&subscriptions->invalidate_seq) - 1; interval_tree_insert(&interval_sub->interval_tree, &subscriptions->itree); } @@ -1066,7 +1070,7 @@ void mmu_interval_notifier_remove(struct mmu_interval_notifier *interval_sub) } else { hlist_add_head(&interval_sub->deferred_item, &subscriptions->deferred_list); - seq = subscriptions->invalidate_seq; + seq = atomic_long_read(&subscriptions->invalidate_seq); } } else { if (WARN_ON(RB_EMPTY_NODE(&interval_sub->interval_tree.rb))) { @@ -1086,7 +1090,7 @@ void mmu_interval_notifier_remove(struct mmu_interval_notifier *interval_sub) lock_map_release(&__mmu_notifier_invalidate_range_start_map); if (seq) wait_event(subscriptions->wq, - READ_ONCE(subscriptions->invalidate_seq) != seq); + atomic_long_read(&subscriptions->invalidate_seq) != seq); /* pairs with mmgrab in mmu_interval_notifier_insert() */ mmdrop(mm); -- 2.34.1