frontswap_register_ops can race with swapon. Consider the following scene: CPU1 CPU2 ---- ---- frontswap_register_ops fill bitmap a ops->init sys_swapon enable_swap_info frontswap_init without new ops add ops to frontswap_ops list check if swap_active_head changed add to swap_active_head So the frontswap_ops init is missed on the new swap device. Consider the another scene: CPU1 CPU2 ---- ---- frontswap_register_ops fill bitmap a ops->init add ops to frontswap_ops list sys_swapon enable_swap_info frontswap_init with new ops add to swap_active_head check if swap_active_head changed ops->init for new swap device [twice!] The frontswap_ops init will be called two times on the new swap device this time. frontswap_register_ops can also race with swapoff. Consider the following scene: CPU1 CPU2 ---- ---- sys_swapoff removed from swap_active_head frontswap_register_ops fill bitmap a ops->init without swap device add ops to frontswap_ops list invalidate_area with new ops check if swap_active_head changed We could call invalidate_area on a swap device under swapoff with frontswap is uninitialized yet. Fix all these by using swapon_mutex to guard against race with swapon and add swap_info_get_if_under_swapoff() to collect swap devices under swapoff. Fixes: d1dc6f1bcf1e ("frontswap: allow multiple backends") Signed-off-by: Miaohe Lin <linmiaohe@xxxxxxxxxx> --- include/linux/swapfile.h | 2 ++ mm/frontswap.c | 40 +++++++++++++++++----------------------- mm/swapfile.c | 13 ++++++++++++- 3 files changed, 31 insertions(+), 24 deletions(-) diff --git a/include/linux/swapfile.h b/include/linux/swapfile.h index e06febf62978..7ae15d917828 100644 --- a/include/linux/swapfile.h +++ b/include/linux/swapfile.h @@ -9,8 +9,10 @@ extern spinlock_t swap_lock; extern struct plist_head swap_active_head; extern struct swap_info_struct *swap_info[]; +extern struct mutex swapon_mutex; extern int try_to_unuse(unsigned int, bool, unsigned long); extern unsigned long generic_max_swapfile_size(void); extern unsigned long max_swapfile_size(void); +extern struct swap_info_struct *swap_info_get_if_under_swapoff(int type); #endif /* _LINUX_SWAPFILE_H */ diff --git a/mm/frontswap.c b/mm/frontswap.c index 130e301c5ac0..c16bfc7550b5 100644 --- a/mm/frontswap.c +++ b/mm/frontswap.c @@ -123,12 +123,26 @@ void frontswap_register_ops(struct frontswap_ops *ops) bitmap_zero(a, MAX_SWAPFILES); bitmap_zero(b, MAX_SWAPFILES); - + mutex_lock(&swapon_mutex); spin_lock(&swap_lock); plist_for_each_entry(si, &swap_active_head, list) { if (!WARN_ON(!si->frontswap_map)) set_bit(si->type, a); } + /* + * There might be some swap devices under swapoff, i.e. they are + * removed from swap_active_head but frontswap_invalidate_area() + * is not called yet due to swapon_mutex is held here. We must + * collect these swap devices and call ops->init on them or they + * might invalidate frontswap area while frontswap is uninitialized. + */ + for_each_clear_bit(i, a, MAX_SWAPFILES) { + si = swap_info_get_if_under_swapoff(i); + if (!si || !si->frontswap_map) + continue; + set_bit(si->type, b); + } + bitmap_or(a, a, b, MAX_SWAPFILES); spin_unlock(&swap_lock); /* the new ops needs to know the currently active swap devices */ @@ -144,29 +158,9 @@ void frontswap_register_ops(struct frontswap_ops *ops) ops->next = frontswap_ops; } while (cmpxchg(&frontswap_ops, ops->next, ops) != ops->next); - static_branch_inc(&frontswap_enabled_key); - - spin_lock(&swap_lock); - plist_for_each_entry(si, &swap_active_head, list) { - if (si->frontswap_map) - set_bit(si->type, b); - } - spin_unlock(&swap_lock); + mutex_unlock(&swapon_mutex); - /* - * On the very unlikely chance that a swap device was added or - * removed between setting the "a" list bits and the ops init - * calls, we re-check and do init or invalidate for any changed - * bits. - */ - if (unlikely(!bitmap_equal(a, b, MAX_SWAPFILES))) { - for (i = 0; i < MAX_SWAPFILES; i++) { - if (!test_bit(i, a) && test_bit(i, b)) - ops->init(i); - else if (test_bit(i, a) && !test_bit(i, b)) - ops->invalidate_area(i); - } - } + static_branch_inc(&frontswap_enabled_key); } EXPORT_SYMBOL(frontswap_register_ops); diff --git a/mm/swapfile.c b/mm/swapfile.c index 149e77454e3c..ee736533717f 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -89,7 +89,7 @@ static DEFINE_SPINLOCK(swap_avail_lock); struct swap_info_struct *swap_info[MAX_SWAPFILES]; -static DEFINE_MUTEX(swapon_mutex); +DEFINE_MUTEX(swapon_mutex); static DECLARE_WAIT_QUEUE_HEAD(proc_poll_wait); /* Activity counter to indicate that a swapon or swapoff has occurred */ @@ -2958,6 +2958,17 @@ __weak unsigned long max_swapfile_size(void) return generic_max_swapfile_size(); } +struct swap_info_struct *swap_info_get_if_under_swapoff(int type) +{ + struct swap_info_struct *si = swap_type_to_swap_info(type); + + if (!si || !si->swap_map) + return NULL; + if ((si->flags & SWP_USED) && !(si->flags & SWP_WRITEOK)) + return si; + return NULL; +} + static unsigned long read_swap_header(struct swap_info_struct *p, union swap_header *swap_header, struct inode *inode) -- 2.19.1