3.16.70-rc1 review patch. If anyone has any objections, please let me know. ------------------ From: Daniel Jordan <daniel.m.jordan@xxxxxxxxxx> commit c10d38cc8d3e43f946b6c2bf4602c86791587f30 upstream. Dan Carpenter reports a potential NULL dereference in get_swap_page_of_type: Smatch complains that the NULL checks on "si" aren't consistent. This seems like a real bug because we have not ensured that the type is valid and so "si" can be NULL. Add the missing check for NULL, taking care to use a read barrier to ensure CPU1 observes CPU0's updates in the correct order: CPU0 CPU1 alloc_swap_info() if (type >= nr_swapfiles) swap_info[type] = p /* handle invalid entry */ smp_wmb() smp_rmb() ++nr_swapfiles p = swap_info[type] Without smp_rmb, CPU1 might observe CPU0's write to nr_swapfiles before CPU0's write to swap_info[type] and read NULL from swap_info[type]. Ying Huang noticed other places in swapfile.c don't order these reads properly. Introduce swap_type_to_swap_info to encourage correct usage. Use READ_ONCE and WRITE_ONCE to follow the Linux Kernel Memory Model (see tools/memory-model/Documentation/explanation.txt). This ordering need not be enforced in places where swap_lock is held (e.g. si_swapinfo) because swap_lock serializes updates to nr_swapfiles and the swap_info array. Link: http://lkml.kernel.org/r/20190131024410.29859-1-daniel.m.jordan@xxxxxxxxxx Fixes: ec8acf20afb8 ("swap: add per-partition lock for swapfile") Signed-off-by: Daniel Jordan <daniel.m.jordan@xxxxxxxxxx> Reported-by: Dan Carpenter <dan.carpenter@xxxxxxxxxx> Suggested-by: "Huang, Ying" <ying.huang@xxxxxxxxx> Reviewed-by: Andrea Parri <andrea.parri@xxxxxxxxxxxxxxxxxxxx> Acked-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx> Cc: Alan Stern <stern@xxxxxxxxxxxxxxxxxxx> Cc: Andi Kleen <ak@xxxxxxxxxxxxxxx> Cc: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx> Cc: Omar Sandoval <osandov@xxxxxx> Cc: Paul McKenney <paulmck@xxxxxxxxxxxxxxxxxx> Cc: Shaohua Li <shli@xxxxxxxxxx> Cc: Stephen Rothwell <sfr@xxxxxxxxxxxxxxxx> Cc: Tejun Heo <tj@xxxxxxxxxx> Cc: Will Deacon <will.deacon@xxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> Signed-off-by: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx> [bwh: Backported to 3.16: - Add swp_swap_info(), as done in upstream commit 0bcac06f27d7 "mm, swap: skip swapcache for swapin of synchronous device" - Use ACCESS_ONCE() instead of {READ,WRITE}_ONCE() - Adjust context] Signed-off-by: Ben Hutchings <ben@xxxxxxxxxxxxxxx> --- --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -451,6 +451,7 @@ extern sector_t map_swap_page(struct pag extern sector_t swapdev_block(int, pgoff_t); extern int page_swapcount(struct page *); extern struct swap_info_struct *page_swap_info(struct page *); +extern struct swap_info_struct *swp_swap_info(swp_entry_t entry); extern int reuse_swap_page(struct page *); extern int try_to_free_swap(struct page *); struct backing_dev_info; --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -86,6 +86,15 @@ static DECLARE_WAIT_QUEUE_HEAD(proc_poll /* Activity counter to indicate that a swapon or swapoff has occurred */ static atomic_t proc_poll_event = ATOMIC_INIT(0); +static struct swap_info_struct *swap_type_to_swap_info(int type) +{ + if (type >= ACCESS_ONCE(nr_swapfiles)) + return NULL; + + smp_rmb(); /* Pairs with smp_wmb in alloc_swap_info. */ + return ACCESS_ONCE(swap_info[type]); +} + static inline unsigned char swap_count(unsigned char ent) { return ent & ~SWAP_HAS_CACHE; /* may include COUNT_CONTINUED flag */ @@ -703,12 +712,14 @@ noswap: /* The only caller of this function is now suspend routine */ swp_entry_t get_swap_page_of_type(int type) { - struct swap_info_struct *si; + struct swap_info_struct *si = swap_type_to_swap_info(type); pgoff_t offset; - si = swap_info[type]; + if (!si) + goto fail; + spin_lock(&si->lock); - if (si && (si->flags & SWP_WRITEOK)) { + if (si->flags & SWP_WRITEOK) { atomic_long_dec(&nr_swap_pages); /* This is called for allocating swap entry, not cache */ offset = scan_swap_map(si, 1); @@ -719,6 +730,7 @@ swp_entry_t get_swap_page_of_type(int ty atomic_long_inc(&nr_swap_pages); } spin_unlock(&si->lock); +fail: return (swp_entry_t) {0}; } @@ -730,9 +742,9 @@ static struct swap_info_struct *swap_inf if (!entry.val) goto out; type = swp_type(entry); - if (type >= nr_swapfiles) + p = swap_type_to_swap_info(type); + if (!p) goto bad_nofile; - p = swap_info[type]; if (!(p->flags & SWP_USED)) goto bad_device; offset = swp_offset(entry); @@ -1037,10 +1049,9 @@ int swap_type_of(dev_t device, sector_t sector_t swapdev_block(int type, pgoff_t offset) { struct block_device *bdev; + struct swap_info_struct *si = swap_type_to_swap_info(type); - if ((unsigned int)type >= nr_swapfiles) - return 0; - if (!(swap_info[type]->flags & SWP_WRITEOK)) + if (!si || !(si->flags & SWP_WRITEOK)) return 0; return map_swap_entry(swp_entry(type, offset), &bdev); } @@ -1584,7 +1595,7 @@ static sector_t map_swap_entry(swp_entry struct swap_extent *se; pgoff_t offset; - sis = swap_info[swp_type(entry)]; + sis = swp_swap_info(entry); *bdev = sis->bdev; offset = swp_offset(entry); @@ -1982,9 +1993,7 @@ static void *swap_start(struct seq_file if (!l) return SEQ_START_TOKEN; - for (type = 0; type < nr_swapfiles; type++) { - smp_rmb(); /* read nr_swapfiles before swap_info[type] */ - si = swap_info[type]; + for (type = 0; (si = swap_type_to_swap_info(type)); type++) { if (!(si->flags & SWP_USED) || !si->swap_map) continue; if (!--l) @@ -2004,9 +2013,7 @@ static void *swap_next(struct seq_file * else type = si->type + 1; - for (; type < nr_swapfiles; type++) { - smp_rmb(); /* read nr_swapfiles before swap_info[type] */ - si = swap_info[type]; + for (; (si = swap_type_to_swap_info(type)); type++) { if (!(si->flags & SWP_USED) || !si->swap_map) continue; ++*pos; @@ -2111,14 +2118,14 @@ static struct swap_info_struct *alloc_sw } if (type >= nr_swapfiles) { p->type = type; - swap_info[type] = p; + ACCESS_ONCE(swap_info[type]) = p; /* * Write swap_info[type] before nr_swapfiles, in case a * racing procfs swap_start() or swap_next() is reading them. * (We never shrink nr_swapfiles, we never free this entry.) */ smp_wmb(); - nr_swapfiles++; + ACCESS_ONCE(nr_swapfiles) = nr_swapfiles + 1; } else { kfree(p); p = swap_info[type]; @@ -2598,7 +2605,7 @@ void si_swapinfo(struct sysinfo *val) static int __swap_duplicate(swp_entry_t entry, unsigned char usage) { struct swap_info_struct *p; - unsigned long offset, type; + unsigned long offset; unsigned char count; unsigned char has_cache; int err = -EINVAL; @@ -2606,10 +2613,10 @@ static int __swap_duplicate(swp_entry_t if (non_swap_entry(entry)) goto out; - type = swp_type(entry); - if (type >= nr_swapfiles) + p = swp_swap_info(entry); + if (!p) goto bad_file; - p = swap_info[type]; + offset = swp_offset(entry); spin_lock(&p->lock); @@ -2704,11 +2711,16 @@ int swapcache_prepare(swp_entry_t entry) return __swap_duplicate(entry, SWAP_HAS_CACHE); } +struct swap_info_struct *swp_swap_info(swp_entry_t entry) +{ + return swap_type_to_swap_info(swp_type(entry)); +} + struct swap_info_struct *page_swap_info(struct page *page) { - swp_entry_t swap = { .val = page_private(page) }; + swp_entry_t entry = { .val = page_private(page) }; BUG_ON(!PageSwapCache(page)); - return swap_info[swp_type(swap)]; + return swp_swap_info(entry); } /*