From: Yu Kuai <yukuai3@xxxxxxxxxx> Our test reports a uaf for 'mddev->sync_thread': T1 T2 md_start_sync md_register_thread // mddev->sync_thread is set raid1d md_check_recovery md_reap_sync_thread md_unregister_thread kfree md_wakeup_thread wake_up ->sync_thread was freed Root cause is that there is a small windown between register thread and wake up thread, where the thread can be freed concurrently. Currently, a global spinlock 'pers_lock' is borrowed to protect 'mddev->thread', this problem can be fixed likewise, however, there might be similar problem for other md_thread, and I really don't like the idea to borrow a global lock. This patch protect md_thread with rcu. Signed-off-by: Yu Kuai <yukuai3@xxxxxxxxxx> --- drivers/md/md-bitmap.c | 21 ++++++++++---- drivers/md/md.c | 62 ++++++++++++++++++---------------------- drivers/md/md.h | 14 ++++----- drivers/md/raid1.c | 4 +-- drivers/md/raid1.h | 2 +- drivers/md/raid10.c | 4 +-- drivers/md/raid10.h | 2 +- drivers/md/raid5-cache.c | 2 +- drivers/md/raid5.c | 4 +-- drivers/md/raid5.h | 2 +- 10 files changed, 61 insertions(+), 56 deletions(-) diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c index f670c72d97be..dc2ea2ce0ae9 100644 --- a/drivers/md/md-bitmap.c +++ b/drivers/md/md-bitmap.c @@ -1784,6 +1784,7 @@ void md_bitmap_wait_behind_writes(struct mddev *mddev) void md_bitmap_destroy(struct mddev *mddev) { struct bitmap *bitmap = mddev->bitmap; + struct md_thread *thread; if (!bitmap) /* there was no bitmap */ return; @@ -1797,8 +1798,12 @@ void md_bitmap_destroy(struct mddev *mddev) mddev->bitmap = NULL; /* disconnect from the md device */ spin_unlock(&mddev->lock); mutex_unlock(&mddev->bitmap_info.mutex); - if (mddev->thread) - mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT; + + rcu_read_lock(); + thread = rcu_dereference(mddev->thread); + if (thread) + thread->timeout = MAX_SCHEDULE_TIMEOUT; + rcu_read_unlock(); md_bitmap_free(bitmap); } @@ -2433,6 +2438,7 @@ timeout_store(struct mddev *mddev, const char *buf, size_t len) { /* timeout can be set at any time */ unsigned long timeout; + struct md_thread *thread; int rv = strict_strtoul_scaled(buf, &timeout, 4); if (rv) return rv; @@ -2448,16 +2454,21 @@ timeout_store(struct mddev *mddev, const char *buf, size_t len) if (timeout < 1) timeout = 1; mddev->bitmap_info.daemon_sleep = timeout; - if (mddev->thread) { + + rcu_read_lock(); + thread = rcu_dereference(mddev->thread); + if (thread) { /* if thread->timeout is MAX_SCHEDULE_TIMEOUT, then * the bitmap is all clean and we don't need to * adjust the timeout right now */ - if (mddev->thread->timeout < MAX_SCHEDULE_TIMEOUT) { - mddev->thread->timeout = timeout; + if (thread->timeout < MAX_SCHEDULE_TIMEOUT) { + thread->timeout = timeout; md_wakeup_thread(mddev->thread); } } + rcu_read_unlock(); + return len; } diff --git a/drivers/md/md.c b/drivers/md/md.c index d5a29ccb24ec..5609b7e3abab 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -70,11 +70,7 @@ #include "md-bitmap.h" #include "md-cluster.h" -/* pers_list is a list of registered personalities protected - * by pers_lock. - * pers_lock does extra service to protect accesses to - * mddev->thread when the mutex cannot be held. - */ +/* pers_list is a list of registered personalities protected by pers_lock. */ static LIST_HEAD(pers_list); static DEFINE_SPINLOCK(pers_lock); @@ -92,7 +88,7 @@ static struct workqueue_struct *md_rdev_misc_wq; static int remove_and_add_spares(struct mddev *mddev, struct md_rdev *this); static void mddev_detach(struct mddev *mddev); -static void md_wakeup_thread_directly(struct md_thread *thread); +static void md_wakeup_thread_directly(struct md_thread __rcu *thread); enum md_ro_state { MD_RDWR, @@ -803,13 +799,8 @@ void mddev_unlock(struct mddev *mddev) } else mutex_unlock(&mddev->reconfig_mutex); - /* As we've dropped the mutex we need a spinlock to - * make sure the thread doesn't disappear - */ - spin_lock(&pers_lock); md_wakeup_thread(mddev->thread); wake_up(&mddev->sb_wait); - spin_unlock(&pers_lock); } EXPORT_SYMBOL_GPL(mddev_unlock); @@ -7893,23 +7884,33 @@ static int md_thread(void *arg) return 0; } -static void md_wakeup_thread_directly(struct md_thread *thread) +static void md_wakeup_thread_directly(struct md_thread __rcu *thread) { - if (thread) - wake_up_process(thread->tsk); + struct md_thread *t; + + rcu_read_lock(); + t = rcu_dereference(thread); + if (t) + wake_up_process(t->tsk); + rcu_read_unlock(); } -void md_wakeup_thread(struct md_thread *thread) +void md_wakeup_thread(struct md_thread __rcu *thread) { - if (thread) { - pr_debug("md: waking up MD thread %s.\n", thread->tsk->comm); - set_bit(THREAD_WAKEUP, &thread->flags); - wake_up(&thread->wqueue); + struct md_thread *t; + + rcu_read_lock(); + t = rcu_dereference(thread); + if (t) { + pr_debug("md: waking up MD thread %s.\n", t->tsk->comm); + set_bit(THREAD_WAKEUP, &t->flags); + wake_up(&t->wqueue); } + rcu_read_unlock(); } EXPORT_SYMBOL(md_wakeup_thread); -int md_register_thread(struct md_thread **threadp, +int md_register_thread(struct md_thread __rcu **threadp, void (*run)(struct md_thread *), struct mddev *mddev, const char *name) { @@ -7933,27 +7934,20 @@ int md_register_thread(struct md_thread **threadp, return err; } - *threadp = thread; + rcu_assign_pointer(*threadp, thread); return 0; } EXPORT_SYMBOL(md_register_thread); -void md_unregister_thread(struct md_thread **threadp) +void md_unregister_thread(struct md_thread __rcu **threadp) { - struct md_thread *thread; + struct md_thread *thread = rcu_access_pointer(*threadp); - /* - * Locking ensures that mddev_unlock does not wake_up a - * non-existent thread - */ - spin_lock(&pers_lock); - thread = *threadp; - if (!thread) { - spin_unlock(&pers_lock); + if (!thread) return; - } - *threadp = NULL; - spin_unlock(&pers_lock); + + rcu_assign_pointer(*threadp, NULL); + synchronize_rcu(); pr_debug("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk)); kthread_stop(thread->tsk); diff --git a/drivers/md/md.h b/drivers/md/md.h index 5acdd704a922..0525f6d66a4d 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -367,8 +367,8 @@ struct mddev { int new_chunk_sectors; int reshape_backwards; - struct md_thread *thread; /* management thread */ - struct md_thread *sync_thread; /* doing resync or reconstruct */ + struct md_thread __rcu *thread; /* management thread */ + struct md_thread __rcu *sync_thread; /* doing resync or reconstruct */ /* 'last_sync_action' is initialized to "none". It is set when a * sync operation (i.e "data-check", "requested-resync", "resync", @@ -719,9 +719,9 @@ struct md_io_acct { #define THREAD_WAKEUP 0 /* caller need to make sured returned md_thread won't be freed */ -static inline struct md_thread *get_md_thread(struct md_thread *t) +static inline struct md_thread *get_md_thread(struct md_thread __rcu *t) { - return t; + return rcu_access_pointer(t); } static inline void safe_put_page(struct page *p) @@ -736,11 +736,11 @@ extern int register_md_cluster_operations(struct md_cluster_operations *ops, extern int unregister_md_cluster_operations(void); extern int md_setup_cluster(struct mddev *mddev, int nodes); extern void md_cluster_stop(struct mddev *mddev); -int md_register_thread(struct md_thread **threadp, +int md_register_thread(struct md_thread __rcu **threadp, void (*run)(struct md_thread *thread), struct mddev *mddev, const char *name); -extern void md_unregister_thread(struct md_thread **threadp); -extern void md_wakeup_thread(struct md_thread *thread); +extern void md_unregister_thread(struct md_thread __rcu **threadp); +extern void md_wakeup_thread(struct md_thread __rcu *thread); extern void md_check_recovery(struct mddev *mddev); extern void md_reap_sync_thread(struct mddev *mddev); extern int mddev_init_writes_pending(struct mddev *mddev); diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 1217c1db0a40..64f019e3615f 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -3176,8 +3176,8 @@ static int raid1_run(struct mddev *mddev) /* * Ok, everything is just fine now */ - mddev->thread = conf->thread; - conf->thread = NULL; + rcu_assign_pointer(mddev->thread, conf->thread); + rcu_assign_pointer(conf->thread, NULL); mddev->private = conf; set_bit(MD_FAILFAST_SUPPORTED, &mddev->flags); diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h index ebb6788820e7..468f189da7a0 100644 --- a/drivers/md/raid1.h +++ b/drivers/md/raid1.h @@ -130,7 +130,7 @@ struct r1conf { /* When taking over an array from a different personality, we store * the new thread here until we fully activate the array. */ - struct md_thread *thread; + struct md_thread __rcu *thread; /* Keep track of cluster resync window to send to other * nodes. diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index fc8d07fb1c7d..5509955b79ec 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -4140,8 +4140,8 @@ static int raid10_run(struct mddev *mddev) } } - mddev->thread = conf->thread; - conf->thread = NULL; + rcu_assign_pointer(mddev->thread, conf->thread); + rcu_assign_pointer(conf->thread, NULL); if (mddev->queue) { blk_queue_max_write_zeroes_sectors(mddev->queue, 0); diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h index 8c072ce0bc54..63e48b11b552 100644 --- a/drivers/md/raid10.h +++ b/drivers/md/raid10.h @@ -100,7 +100,7 @@ struct r10conf { /* When taking over an array from a different personality, we store * the new thread here until we fully activate the array. */ - struct md_thread *thread; + struct md_thread __rcu *thread; /* * Keep track of cluster resync window to send to other nodes. diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index 7e45df3e093f..b677441d8889 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -120,7 +120,7 @@ struct r5l_log { struct bio_set bs; mempool_t meta_pool; - struct md_thread *reclaim_thread; + struct md_thread __rcu *reclaim_thread; unsigned long reclaim_target; /* number of space that need to be * reclaimed. if it's 0, reclaim spaces * used by io_units which are in diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 04b1093195d0..a7e47c37daf3 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -7888,8 +7888,8 @@ static int raid5_run(struct mddev *mddev) } conf->min_offset_diff = min_offset_diff; - mddev->thread = conf->thread; - conf->thread = NULL; + rcu_assign_pointer(mddev->thread, conf->thread); + rcu_assign_pointer(conf->thread, NULL); mddev->private = conf; for (i = 0; i < conf->raid_disks && conf->previous_raid_disks; diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index e873938a6125..f19707189a7b 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h @@ -679,7 +679,7 @@ struct r5conf { /* When taking over an array from a different personality, we store * the new thread here until we fully activate the array. */ - struct md_thread *thread; + struct md_thread __rcu *thread; struct list_head temp_inactive_list[NR_STRIPE_HASH_LOCKS]; struct r5worker_group *worker_groups; int group_cnt; -- 2.39.2