Re: [PATCH 4/6] lib: radix-tree: check accounting of existing slot replacement users

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon 07-11-16 14:07:39, Johannes Weiner wrote:
> The bug in khugepaged (fixed in the first patch of this series) shows
> that radix tree slot replacement is fragile; and it will become more
> so when not only NULL<->!NULL transitions need to be accounted but
> transitions from and to exceptional entries as well. We need checks.
> 
> Re-implement radix_tree_replace_slot() on top of the sanity-checked
> __radix_tree_replace(). This requires existing callers to also pass
> the radix tree root, but it'll warn us when somebody replaces slots
> with contents that need proper accounting (transitions between NULL
> entries, real entries, exceptional entries) and where a replacement
> through the slot pointer would corrupt the radix tree node counts.
> 
> Suggested-by: Jan Kara <jack@xxxxxxx>
> Signed-off-by: Johannes Weiner <hannes@xxxxxxxxxxx>

The patch looks good. You can add:

Reviewed-by: Jan Kara <jack@xxxxxxx>

								Honza

> ---
>  arch/s390/mm/gmap.c                   |  2 +-
>  drivers/sh/intc/virq.c                |  2 +-
>  fs/dax.c                              |  4 +--
>  include/linux/radix-tree.h            | 16 ++-------
>  lib/radix-tree.c                      | 64 +++++++++++++++++++++++++++--------
>  mm/filemap.c                          |  4 +--
>  mm/khugepaged.c                       |  5 +--
>  mm/migrate.c                          |  4 +--
>  mm/truncate.c                         |  2 +-
>  tools/testing/radix-tree/multiorder.c |  2 +-
>  10 files changed, 64 insertions(+), 41 deletions(-)
> 
> diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
> index 3ba622702ce4..ec1f0dedb948 100644
> --- a/arch/s390/mm/gmap.c
> +++ b/arch/s390/mm/gmap.c
> @@ -1015,7 +1015,7 @@ static inline void gmap_insert_rmap(struct gmap *sg, unsigned long vmaddr,
>  	if (slot) {
>  		rmap->next = radix_tree_deref_slot_protected(slot,
>  							&sg->guest_table_lock);
> -		radix_tree_replace_slot(slot, rmap);
> +		radix_tree_replace_slot(&sg->host_to_rmap, slot, rmap);
>  	} else {
>  		rmap->next = NULL;
>  		radix_tree_insert(&sg->host_to_rmap, vmaddr >> PAGE_SHIFT,
> diff --git a/drivers/sh/intc/virq.c b/drivers/sh/intc/virq.c
> index e7899624aa0b..35bbe288ddb4 100644
> --- a/drivers/sh/intc/virq.c
> +++ b/drivers/sh/intc/virq.c
> @@ -254,7 +254,7 @@ static void __init intc_subgroup_map(struct intc_desc_int *d)
>  
>  		radix_tree_tag_clear(&d->tree, entry->enum_id,
>  				     INTC_TAG_VIRQ_NEEDS_ALLOC);
> -		radix_tree_replace_slot((void **)entries[i],
> +		radix_tree_replace_slot(&d->tree, (void **)entries[i],
>  					&intc_irq_xlate[irq]);
>  	}
>  
> diff --git a/fs/dax.c b/fs/dax.c
> index db78bae0dc0f..85930c2a2749 100644
> --- a/fs/dax.c
> +++ b/fs/dax.c
> @@ -342,7 +342,7 @@ static inline void *lock_slot(struct address_space *mapping, void **slot)
>  		radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
>  
>  	entry |= RADIX_DAX_ENTRY_LOCK;
> -	radix_tree_replace_slot(slot, (void *)entry);
> +	radix_tree_replace_slot(&mapping->page_tree, slot, (void *)entry);
>  	return (void *)entry;
>  }
>  
> @@ -356,7 +356,7 @@ static inline void *unlock_slot(struct address_space *mapping, void **slot)
>  		radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
>  
>  	entry &= ~(unsigned long)RADIX_DAX_ENTRY_LOCK;
> -	radix_tree_replace_slot(slot, (void *)entry);
> +	radix_tree_replace_slot(&mapping->page_tree, slot, (void *)entry);
>  	return (void *)entry;
>  }
>  
> diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
> index 7ced8a70cc8b..2d1b9b8be983 100644
> --- a/include/linux/radix-tree.h
> +++ b/include/linux/radix-tree.h
> @@ -249,20 +249,6 @@ static inline int radix_tree_exception(void *arg)
>  	return unlikely((unsigned long)arg & RADIX_TREE_ENTRY_MASK);
>  }
>  
> -/**
> - * radix_tree_replace_slot	- replace item in a slot
> - * @pslot:	pointer to slot, returned by radix_tree_lookup_slot
> - * @item:	new item to store in the slot.
> - *
> - * For use with radix_tree_lookup_slot().  Caller must hold tree write locked
> - * across slot lookup and replacement.
> - */
> -static inline void radix_tree_replace_slot(void **pslot, void *item)
> -{
> -	BUG_ON(radix_tree_is_internal_node(item));
> -	rcu_assign_pointer(*pslot, item);
> -}
> -
>  int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
>  			unsigned order, struct radix_tree_node **nodep,
>  			void ***slotp);
> @@ -280,6 +266,8 @@ void **radix_tree_lookup_slot(struct radix_tree_root *, unsigned long);
>  void __radix_tree_replace(struct radix_tree_root *root,
>  			  struct radix_tree_node *node,
>  			  void **slot, void *item);
> +void radix_tree_replace_slot(struct radix_tree_root *root,
> +			     void **slot, void *item);
>  bool __radix_tree_delete_node(struct radix_tree_root *root,
>  			      struct radix_tree_node *node);
>  void *radix_tree_delete_item(struct radix_tree_root *, unsigned long, void *);
> diff --git a/lib/radix-tree.c b/lib/radix-tree.c
> index ddc6facbf4da..5cbdd911931e 100644
> --- a/lib/radix-tree.c
> +++ b/lib/radix-tree.c
> @@ -753,19 +753,10 @@ void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index)
>  }
>  EXPORT_SYMBOL(radix_tree_lookup);
>  
> -/**
> - * __radix_tree_replace		- replace item in a slot
> - * @root:	radix tree root
> - * @node:	pointer to tree node
> - * @slot:	pointer to slot in @node
> - * @item:	new item to store in the slot.
> - *
> - * For use with __radix_tree_lookup().  Caller must hold tree write locked
> - * across slot lookup and replacement.
> - */
> -void __radix_tree_replace(struct radix_tree_root *root,
> -			  struct radix_tree_node *node,
> -			  void **slot, void *item)
> +static void replace_slot(struct radix_tree_root *root,
> +			 struct radix_tree_node *node,
> +			 void **slot, void *item,
> +			 bool warn_typeswitch)
>  {
>  	void *old = rcu_dereference_raw(*slot);
>  	int count, exceptional;
> @@ -776,8 +767,7 @@ void __radix_tree_replace(struct radix_tree_root *root,
>  	exceptional = !!radix_tree_exceptional_entry(item) -
>  		      !!radix_tree_exceptional_entry(old);
>  
> -	WARN_ONCE(!node && slot != (void **)&root->rnode &&
> -		  (count || exceptional),
> +	WARN_ONCE(warn_typeswitch && (count || exceptional),
>  		  "Unaccounted slot replacement: old=%p item=%p count=%d exceptional=%d\n",
>  		  old, item, count, exceptional);
>  
> @@ -789,6 +779,50 @@ void __radix_tree_replace(struct radix_tree_root *root,
>  }
>  
>  /**
> + * __radix_tree_replace		- replace item in a slot
> + * @root:	radix tree root
> + * @node:	pointer to tree node
> + * @slot:	pointer to slot in @node
> + * @item:	new item to store in the slot.
> + *
> + * For use with __radix_tree_lookup().  Caller must hold tree write locked
> + * across slot lookup and replacement.
> + */
> +void __radix_tree_replace(struct radix_tree_root *root,
> +			  struct radix_tree_node *node,
> +			  void **slot, void *item)
> +{
> +	/*
> +	 * This function supports replacing entries with different types
> +	 * (NULL, regular entries, exceptional entries), but that needs
> +	 * accounting against the node - unless the slot is root->rnode.
> +	 */
> +	replace_slot(root, node, slot, item,
> +		     !node && slot != (void **)&root->rnode);
> +}
> +
> +/**
> + * radix_tree_replace_slot	- replace item in a slot
> + * @root:	radix tree root
> + * @slot:	pointer to slot
> + * @item:	new item to store in the slot.
> + *
> + * For use with radix_tree_lookup_slot(), radix_tree_gang_lookup_slot(),
> + * radix_tree_gang_lookup_tag_slot().  Caller must hold tree write locked
> + * across slot lookup and replacement.
> + *
> + * NOTE: This cannot be used to switch between non-entries (empty slots),
> + * regular entries, and exceptional entries, as that requires accounting
> + * inside the radix tree node. When switching from one type of entry to
> + * another, use __radix_tree_lookup() and __radix_tree_replace().
> + */
> +void radix_tree_replace_slot(struct radix_tree_root *root,
> +			     void **slot, void *item)
> +{
> +	replace_slot(root, NULL, slot, item, true);
> +}
> +
> +/**
>   *	radix_tree_tag_set - set a tag on a radix tree node
>   *	@root:		radix tree root
>   *	@index:		index key
> diff --git a/mm/filemap.c b/mm/filemap.c
> index c7fe2f16503f..eb463156f29a 100644
> --- a/mm/filemap.c
> +++ b/mm/filemap.c
> @@ -147,7 +147,7 @@ static int page_cache_tree_insert(struct address_space *mapping,
>  						      false);
>  		}
>  	}
> -	radix_tree_replace_slot(slot, page);
> +	radix_tree_replace_slot(&mapping->page_tree, slot, page);
>  	mapping->nrpages++;
>  	if (node) {
>  		workingset_node_pages_inc(node);
> @@ -193,7 +193,7 @@ static void page_cache_tree_delete(struct address_space *mapping,
>  			shadow = NULL;
>  		}
>  
> -		radix_tree_replace_slot(slot, shadow);
> +		radix_tree_replace_slot(&mapping->page_tree, slot, shadow);
>  
>  		if (!node)
>  			break;
> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> index eac6f0580e26..fed8d5e96978 100644
> --- a/mm/khugepaged.c
> +++ b/mm/khugepaged.c
> @@ -1421,7 +1421,7 @@ static void collapse_shmem(struct mm_struct *mm,
>  		list_add_tail(&page->lru, &pagelist);
>  
>  		/* Finally, replace with the new page. */
> -		radix_tree_replace_slot(slot,
> +		radix_tree_replace_slot(&mapping->page_tree, slot,
>  				new_page + (index % HPAGE_PMD_NR));
>  
>  		index++;
> @@ -1531,7 +1531,8 @@ static void collapse_shmem(struct mm_struct *mm,
>  			/* Unfreeze the page. */
>  			list_del(&page->lru);
>  			page_ref_unfreeze(page, 2);
> -			radix_tree_replace_slot(slot, page);
> +			radix_tree_replace_slot(&mapping->page_tree,
> +						slot, page);
>  			spin_unlock_irq(&mapping->tree_lock);
>  			putback_lru_page(page);
>  			unlock_page(page);
> diff --git a/mm/migrate.c b/mm/migrate.c
> index 99250aee1ac1..9b88e4e37d0a 100644
> --- a/mm/migrate.c
> +++ b/mm/migrate.c
> @@ -482,7 +482,7 @@ int migrate_page_move_mapping(struct address_space *mapping,
>  		SetPageDirty(newpage);
>  	}
>  
> -	radix_tree_replace_slot(pslot, newpage);
> +	radix_tree_replace_slot(&mapping->page_tree, pslot, newpage);
>  
>  	/*
>  	 * Drop cache reference from old page by unfreezing
> @@ -556,7 +556,7 @@ int migrate_huge_page_move_mapping(struct address_space *mapping,
>  
>  	get_page(newpage);
>  
> -	radix_tree_replace_slot(pslot, newpage);
> +	radix_tree_replace_slot(&mapping->page_tree, pslot, newpage);
>  
>  	page_ref_unfreeze(page, expected_count - 1);
>  
> diff --git a/mm/truncate.c b/mm/truncate.c
> index a01cce450a26..6ae44571d4c7 100644
> --- a/mm/truncate.c
> +++ b/mm/truncate.c
> @@ -49,7 +49,7 @@ static void clear_exceptional_entry(struct address_space *mapping,
>  		goto unlock;
>  	if (*slot != entry)
>  		goto unlock;
> -	radix_tree_replace_slot(slot, NULL);
> +	radix_tree_replace_slot(&mapping->page_tree, slot, NULL);
>  	mapping->nrexceptional--;
>  	if (!node)
>  		goto unlock;
> diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c
> index 05d7bc488971..d1be94667a30 100644
> --- a/tools/testing/radix-tree/multiorder.c
> +++ b/tools/testing/radix-tree/multiorder.c
> @@ -146,7 +146,7 @@ static void multiorder_check(unsigned long index, int order)
>  
>  	slot = radix_tree_lookup_slot(&tree, index);
>  	free(*slot);
> -	radix_tree_replace_slot(slot, item2);
> +	radix_tree_replace_slot(&tree, slot, item2);
>  	for (i = min; i < max; i++) {
>  		struct item *item = item_lookup(&tree, i);
>  		assert(item != 0);
> -- 
> 2.10.1
> 
-- 
Jan Kara <jack@xxxxxxxx>
SUSE Labs, CR

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@xxxxxxxxx.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@xxxxxxxxx";> email@xxxxxxxxx </a>



[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux]     [Linux OMAP]     [Linux MIPS]     [ECOS]     [Asterisk Internet PBX]     [Linux API]