Re: [PATCH] mm, swap: bounds check swap_info accesses to avoid NULL derefs

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, Jan 14, 2019 at 07:23:05PM -0500, Daniel Jordan wrote:
> Dan Carpenter reports a potential NULL dereference in
> get_swap_page_of_type:
> 
>   Smatch complains that the NULL checks on "si" aren't consistent.  This
>   seems like a real bug because we have not ensured that the type is
>   valid and so "si" can be NULL.
> 
> Add the missing check for NULL, taking care to use a read barrier to
> ensure CPU1 observes CPU0's updates in the correct order:
> 
>         CPU0                           CPU1
>         alloc_swap_info()              if (type >= nr_swapfiles)
>           swap_info[type] = p              /* handle invalid entry */
>           smp_wmb()                    smp_rmb()
>           ++nr_swapfiles               p = swap_info[type]
> 
> Without smp_rmb, CPU1 might observe CPU0's write to nr_swapfiles before
> CPU0's write to swap_info[type] and read NULL from swap_info[type].
> 
> Ying Huang noticed that other places don't order these reads properly.
> Introduce swap_type_to_swap_info to encourage correct usage.
> 
> Use READ_ONCE and WRITE_ONCE to follow the Linux Kernel Memory Model
> (see tools/memory-model/Documentation/explanation.txt).
> 
> This ordering need not be enforced in places where swap_lock is held
> (e.g. si_swapinfo) because swap_lock serializes updates to nr_swapfiles
> and the swap_info array.
> 
> This is a theoretical problem, no actual reports of it exist.
> 
> Fixes: ec8acf20afb8 ("swap: add per-partition lock for swapfile")
> Reported-by: Dan Carpenter <dan.carpenter@xxxxxxxxxx>
> Signed-off-by: Daniel Jordan <daniel.m.jordan@xxxxxxxxxx>
> Cc: Alan Stern <stern@xxxxxxxxxxxxxxxxxxx>
> Cc: Andi Kleen <ak@xxxxxxxxxxxxxxx>
> Cc: Andrea Parri <andrea.parri@xxxxxxxxxxxxxxxxxxxx>
> Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
> Cc: Dan Carpenter <dan.carpenter@xxxxxxxxxx>
> Cc: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx>
> Cc: Huang Ying <ying.huang@xxxxxxxxx>
> Cc: Omar Sandoval <osandov@xxxxxx>
> Cc: Paul McKenney <paulmck@xxxxxxxxxxxxxxxxxx>
> Cc: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
> Cc: Shaohua Li <shli@xxxxxxxxxx>
> Cc: Stephen Rothwell <sfr@xxxxxxxxxxxxxxxx>
> Cc: Tejun Heo <tj@xxxxxxxxxx>
> Cc: Will Deacon <will.deacon@xxxxxxx>

Please feel free to add:

Reviewed-by: Andrea Parri <andrea.parri@xxxxxxxxxxxxxxxxxxxx>

  Andrea


> 
> ---
> 
> I'd appreciate it if someone more familiar with memory barriers could
> check this over.  Thanks.
> 
> Probably no need for stable, this is all theoretical.
> 
> Against linux-mmotm tag v5.0-rc1-mmotm-2019-01-09-13-40
> 
>  mm/swapfile.c | 43 +++++++++++++++++++++++++++----------------
>  1 file changed, 27 insertions(+), 16 deletions(-)
> 
> diff --git a/mm/swapfile.c b/mm/swapfile.c
> index f0edf7244256..dad52fc67045 100644
> --- a/mm/swapfile.c
> +++ b/mm/swapfile.c
> @@ -99,6 +99,15 @@ static atomic_t proc_poll_event = ATOMIC_INIT(0);
>  
>  atomic_t nr_rotate_swap = ATOMIC_INIT(0);
>  
> +static struct swap_info_struct *swap_type_to_swap_info(int type)
> +{
> +	if (type >= READ_ONCE(nr_swapfiles))
> +		return NULL;
> +
> +	smp_rmb();	/* Pairs with smp_wmb in alloc_swap_info. */
> +	return READ_ONCE(swap_info[type]);
> +}
> +
>  static inline unsigned char swap_count(unsigned char ent)
>  {
>  	return ent & ~SWAP_HAS_CACHE;	/* may include COUNT_CONTINUED flag */
> @@ -1045,12 +1054,14 @@ int get_swap_pages(int n_goal, swp_entry_t swp_entries[], int entry_size)
>  /* The only caller of this function is now suspend routine */
>  swp_entry_t get_swap_page_of_type(int type)
>  {
> -	struct swap_info_struct *si;
> +	struct swap_info_struct *si = swap_type_to_swap_info(type);
>  	pgoff_t offset;
>  
> -	si = swap_info[type];
> +	if (!si)
> +		goto fail;
> +
>  	spin_lock(&si->lock);
> -	if (si && (si->flags & SWP_WRITEOK)) {
> +	if (si->flags & SWP_WRITEOK) {
>  		atomic_long_dec(&nr_swap_pages);
>  		/* This is called for allocating swap entry, not cache */
>  		offset = scan_swap_map(si, 1);
> @@ -1061,6 +1072,7 @@ swp_entry_t get_swap_page_of_type(int type)
>  		atomic_long_inc(&nr_swap_pages);
>  	}
>  	spin_unlock(&si->lock);
> +fail:
>  	return (swp_entry_t) {0};
>  }
>  
> @@ -1072,9 +1084,9 @@ static struct swap_info_struct *__swap_info_get(swp_entry_t entry)
>  	if (!entry.val)
>  		goto out;
>  	type = swp_type(entry);
> -	if (type >= nr_swapfiles)
> +	p = swap_type_to_swap_info(type);
> +	if (!p)
>  		goto bad_nofile;
> -	p = swap_info[type];
>  	if (!(p->flags & SWP_USED))
>  		goto bad_device;
>  	offset = swp_offset(entry);
> @@ -1212,9 +1224,9 @@ struct swap_info_struct *get_swap_device(swp_entry_t entry)
>  	if (!entry.val)
>  		goto out;
>  	type = swp_type(entry);
> -	if (type >= nr_swapfiles)
> +	si = swap_type_to_swap_info(type);
> +	if (!si)
>  		goto bad_nofile;
> -	si = swap_info[type];
>  
>  	preempt_disable();
>  	if (!(si->flags & SWP_VALID))
> @@ -1765,10 +1777,9 @@ int swap_type_of(dev_t device, sector_t offset, struct block_device **bdev_p)
>  sector_t swapdev_block(int type, pgoff_t offset)
>  {
>  	struct block_device *bdev;
> +	struct swap_info_struct *si = swap_type_to_swap_info(type);
>  
> -	if ((unsigned int)type >= nr_swapfiles)
> -		return 0;
> -	if (!(swap_info[type]->flags & SWP_WRITEOK))
> +	if (!si || !(si->flags & SWP_WRITEOK))
>  		return 0;
>  	return map_swap_entry(swp_entry(type, offset), &bdev);
>  }
> @@ -2799,9 +2810,9 @@ static void *swap_start(struct seq_file *swap, loff_t *pos)
>  	if (!l)
>  		return SEQ_START_TOKEN;
>  
> -	for (type = 0; type < nr_swapfiles; type++) {
> +	for (type = 0; type < READ_ONCE(nr_swapfiles); type++) {
>  		smp_rmb();	/* read nr_swapfiles before swap_info[type] */
> -		si = swap_info[type];
> +		si = READ_ONCE(swap_info[type]);
>  		if (!(si->flags & SWP_USED) || !si->swap_map)
>  			continue;
>  		if (!--l)
> @@ -2821,9 +2832,9 @@ static void *swap_next(struct seq_file *swap, void *v, loff_t *pos)
>  	else
>  		type = si->type + 1;
>  
> -	for (; type < nr_swapfiles; type++) {
> +	for (; type < READ_ONCE(nr_swapfiles); type++) {
>  		smp_rmb();	/* read nr_swapfiles before swap_info[type] */
> -		si = swap_info[type];
> +		si = READ_ONCE(swap_info[type]);
>  		if (!(si->flags & SWP_USED) || !si->swap_map)
>  			continue;
>  		++*pos;
> @@ -2930,14 +2941,14 @@ static struct swap_info_struct *alloc_swap_info(void)
>  	}
>  	if (type >= nr_swapfiles) {
>  		p->type = type;
> -		swap_info[type] = p;
> +		WRITE_ONCE(swap_info[type], p);
>  		/*
>  		 * Write swap_info[type] before nr_swapfiles, in case a
>  		 * racing procfs swap_start() or swap_next() is reading them.
>  		 * (We never shrink nr_swapfiles, we never free this entry.)
>  		 */
>  		smp_wmb();
> -		nr_swapfiles++;
> +		WRITE_ONCE(nr_swapfiles, nr_swapfiles + 1);
>  	} else {
>  		kvfree(p);
>  		p = swap_info[type];
> -- 
> 2.20.0
> 



[Index of Archives]     [Kernel Development]     [Kernel Announce]     [Kernel Newbies]     [Linux Networking Development]     [Share Photos]     [IDE]     [Security]     [Git]     [Netfilter]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Device Mapper]

  Powered by Linux