+ mm-lockless-pagecache.patch added to -mm tree

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The patch titled

     mm: lockless pagecache

has been added to the -mm tree.  Its filename is

     mm-lockless-pagecache.patch

See http://www.zip.com.au/~akpm/linux/patches/stuff/added-to-mm.txt to find
out what to do about this

------------------------------------------------------
Subject: mm: lockless pagecache
From: Nick Piggin <npiggin@xxxxxxx>

Combine page_cache_get_speculative with lockless radix tree lookups to
introduce lockless page cache lookups (ie.  no mapping->tree_lock on the
read-side).

The only atomicity changes this introduces is that the gang pagecache
lookup functions now behave as if they are implemented with multiple
find_get_page calls, rather than operating on a snapshot of the pages.  In
practice, this atomicity guarantee is not used anyway, and it is to replace
individual lookups, so these semantics are natural.

Swapcache can no longer use find_get_page, because it has a different
method of encoding swapcache position into the page.  Introduce a new
find_get_swap_page for it.

Signed-off-by: Nick Piggin <npiggin@xxxxxxx>
Cc: Hugh Dickins <hugh@xxxxxxxxxxx>
Signed-off-by: Andrew Morton <akpm@xxxxxxxx>
---

 include/linux/swap.h |    1 
 mm/filemap.c         |  161 +++++++++++++++++++++++++++++------------
 mm/page-writeback.c  |    8 --
 mm/readahead.c       |    7 -
 mm/swap_state.c      |   27 ++++++
 mm/swapfile.c        |    2 
 6 files changed, 150 insertions(+), 56 deletions(-)

diff -puN include/linux/swap.h~mm-lockless-pagecache include/linux/swap.h
--- a/include/linux/swap.h~mm-lockless-pagecache
+++ a/include/linux/swap.h
@@ -229,6 +229,7 @@ extern int move_from_swap_cache(struct p
 		struct address_space *);
 extern void free_page_and_swap_cache(struct page *);
 extern void free_pages_and_swap_cache(struct page **, int);
+extern struct page * find_get_swap_page(swp_entry_t);
 extern struct page * lookup_swap_cache(swp_entry_t);
 extern struct page * read_swap_cache_async(swp_entry_t, struct vm_area_struct *vma,
 					   unsigned long addr);
diff -puN mm/filemap.c~mm-lockless-pagecache mm/filemap.c
--- a/mm/filemap.c~mm-lockless-pagecache
+++ a/mm/filemap.c
@@ -630,11 +630,22 @@ struct page *find_trylock_page(struct ad
 {
 	struct page *page;
 
-	read_lock_irq(&mapping->tree_lock);
+	rcu_read_lock();
+repeat:
 	page = radix_tree_lookup(&mapping->page_tree, offset);
-	if (page && TestSetPageLocked(page))
-		page = NULL;
-	read_unlock_irq(&mapping->tree_lock);
+	if (page) {
+		page = page_cache_get_speculative(page);
+		if (unlikely(!page))
+			goto repeat;
+		/* Has the page been truncated? */
+		if (unlikely(page->mapping != mapping
+				|| page->index != offset)) {
+			page_cache_release(page);
+			goto repeat;
+		}
+	}
+	rcu_read_unlock();
+
 	return page;
 }
 EXPORT_SYMBOL(find_trylock_page);
@@ -654,26 +665,25 @@ struct page *find_lock_page(struct addre
 {
 	struct page *page;
 
-	read_lock_irq(&mapping->tree_lock);
 repeat:
+	rcu_read_lock();
 	page = radix_tree_lookup(&mapping->page_tree, offset);
 	if (page) {
-		page_cache_get(page);
-		if (TestSetPageLocked(page)) {
-			read_unlock_irq(&mapping->tree_lock);
-			__lock_page(page);
-			read_lock_irq(&mapping->tree_lock);
-
-			/* Has the page been truncated while we slept? */
-			if (unlikely(page->mapping != mapping ||
-				     page->index != offset)) {
-				unlock_page(page);
-				page_cache_release(page);
-				goto repeat;
-			}
+		page = page_cache_get_speculative(page);
+		rcu_read_unlock();
+		if (unlikely(!page))
+			goto repeat;
+		lock_page(page);
+		/* Has the page been truncated? */
+		if (unlikely(page->mapping != mapping
+				|| page->index != offset)) {
+			unlock_page(page);
+			page_cache_release(page);
+			goto repeat;
 		}
-	}
-	read_unlock_irq(&mapping->tree_lock);
+	} else
+		rcu_read_unlock();
+
 	return page;
 }
 EXPORT_SYMBOL(find_lock_page);
@@ -741,16 +751,40 @@ EXPORT_SYMBOL(find_or_create_page);
 unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
 			    unsigned int nr_pages, struct page **pages)
 {
+
 	unsigned int i;
-	unsigned int ret;
+	unsigned int nr_found;
 
-	read_lock_irq(&mapping->tree_lock);
-	ret = radix_tree_gang_lookup(&mapping->page_tree,
+	rcu_read_lock();
+repeat:
+	nr_found = radix_tree_gang_lookup(&mapping->page_tree,
 				(void **)pages, start, nr_pages);
-	for (i = 0; i < ret; i++)
-		page_cache_get(pages[i]);
-	read_unlock_irq(&mapping->tree_lock);
-	return ret;
+	for (i = 0; i < nr_found; i++) {
+		struct page *page;
+		page = page_cache_get_speculative(pages[i]);
+		if (unlikely(!page)) {
+bail:
+			/*
+			 * must return at least 1 page, so caller continues
+			 * calling in.
+			 */
+			if (i == 0)
+				goto repeat;
+			break;
+		}
+
+		/* Has the page been truncated? */
+		if (unlikely(page->mapping != mapping
+				|| page->index < start)) {
+			page_cache_release(page);
+			goto bail;
+		}
+
+		/* ensure we don't pick up pages that have moved behind us */
+		start = page->index+1;
+	}
+	rcu_read_unlock();
+	return i;
 }
 
 /**
@@ -769,19 +803,35 @@ unsigned find_get_pages_contig(struct ad
 			       unsigned int nr_pages, struct page **pages)
 {
 	unsigned int i;
-	unsigned int ret;
+	unsigned int nr_found;
 
-	read_lock_irq(&mapping->tree_lock);
-	ret = radix_tree_gang_lookup(&mapping->page_tree,
+	rcu_read_lock();
+repeat:
+	nr_found = radix_tree_gang_lookup(&mapping->page_tree,
 				(void **)pages, index, nr_pages);
-	for (i = 0; i < ret; i++) {
-		if (pages[i]->mapping == NULL || pages[i]->index != index)
+	for (i = 0; i < nr_found; i++) {
+		struct page *page;
+		page = page_cache_get_speculative(pages[i]);
+		if (unlikely(!page)) {
+bail:
+			/*
+			 * must return at least 1 page, so caller continues
+			 * calling in.
+			 */
+			if (i == 0)
+				goto repeat;
 			break;
+		}
 
-		page_cache_get(pages[i]);
+		/* Has the page been truncated? */
+		if (unlikely(page->mapping != mapping
+				|| page->index != index)) {
+			page_cache_release(page);
+			goto bail;
+		}
 		index++;
 	}
-	read_unlock_irq(&mapping->tree_lock);
+	rcu_read_unlock();
 	return i;
 }
 
@@ -800,17 +850,40 @@ unsigned find_get_pages_tag(struct addre
 			int tag, unsigned int nr_pages, struct page **pages)
 {
 	unsigned int i;
-	unsigned int ret;
+	unsigned int nr_found;
+	pgoff_t start = *index;
 
-	read_lock_irq(&mapping->tree_lock);
-	ret = radix_tree_gang_lookup_tag(&mapping->page_tree,
-				(void **)pages, *index, nr_pages, tag);
-	for (i = 0; i < ret; i++)
-		page_cache_get(pages[i]);
-	if (ret)
-		*index = pages[ret - 1]->index + 1;
-	read_unlock_irq(&mapping->tree_lock);
-	return ret;
+	rcu_read_lock();
+repeat:
+	nr_found = radix_tree_gang_lookup_tag(&mapping->page_tree,
+				(void **)pages, start, nr_pages, tag);
+	for (i = 0; i < nr_found; i++) {
+		struct page *page;
+		page = page_cache_get_speculative(pages[i]);
+		if (unlikely(!page)) {
+bail:
+			/*
+			 * must return at least 1 page, so caller continues
+			 * calling in.
+			 */
+			if (i == 0)
+				goto repeat;
+			break;
+		}
+
+		/* Has the page been truncated? */
+		if (unlikely(page->mapping != mapping
+				|| page->index < start)) {
+			page_cache_release(page);
+			goto bail;
+		}
+
+		/* ensure we don't pick up pages that have moved behind us */
+		start = page->index+1;
+	}
+	rcu_read_unlock();
+	*index = start;
+	return i;
 }
 
 /**
diff -puN mm/page-writeback.c~mm-lockless-pagecache mm/page-writeback.c
--- a/mm/page-writeback.c~mm-lockless-pagecache
+++ a/mm/page-writeback.c
@@ -824,17 +824,15 @@ int test_set_page_writeback(struct page 
 EXPORT_SYMBOL(test_set_page_writeback);
 
 /*
- * Return true if any of the pages in the mapping are marged with the
+ * Return true if any of the pages in the mapping are marked with the
  * passed tag.
  */
 int mapping_tagged(struct address_space *mapping, int tag)
 {
-	unsigned long flags;
 	int ret;
-
-	read_lock_irqsave(&mapping->tree_lock, flags);
+	rcu_read_lock();
 	ret = radix_tree_tagged(&mapping->page_tree, tag);
-	read_unlock_irqrestore(&mapping->tree_lock, flags);
+	rcu_read_unlock();
 	return ret;
 }
 EXPORT_SYMBOL(mapping_tagged);
diff -puN mm/readahead.c~mm-lockless-pagecache mm/readahead.c
--- a/mm/readahead.c~mm-lockless-pagecache
+++ a/mm/readahead.c
@@ -283,27 +283,26 @@ __do_page_cache_readahead(struct address
 	/*
 	 * Preallocate as many pages as we will need.
 	 */
-	read_lock_irq(&mapping->tree_lock);
 	for (page_idx = 0; page_idx < nr_to_read; page_idx++) {
 		pgoff_t page_offset = offset + page_idx;
 		
 		if (page_offset > end_index)
 			break;
 
+		/* Don't need mapping->tree_lock - lookup can be racy */
+		rcu_read_lock();
 		page = radix_tree_lookup(&mapping->page_tree, page_offset);
+		rcu_read_unlock();
 		if (page)
 			continue;
 
-		read_unlock_irq(&mapping->tree_lock);
 		page = page_cache_alloc_cold(mapping);
-		read_lock_irq(&mapping->tree_lock);
 		if (!page)
 			break;
 		page->index = page_offset;
 		list_add(&page->lru, &page_pool);
 		ret++;
 	}
-	read_unlock_irq(&mapping->tree_lock);
 
 	/*
 	 * Now start the IO.  We ignore I/O errors - if the page is not
diff -puN mm/swapfile.c~mm-lockless-pagecache mm/swapfile.c
--- a/mm/swapfile.c~mm-lockless-pagecache
+++ a/mm/swapfile.c
@@ -400,7 +400,7 @@ void free_swap_and_cache(swp_entry_t ent
 	p = swap_info_get(entry);
 	if (p) {
 		if (swap_entry_free(p, swp_offset(entry)) == 1) {
-			page = find_get_page(&swapper_space, entry.val);
+			page = find_get_swap_page(entry);
 			if (page && unlikely(TestSetPageLocked(page))) {
 				page_cache_release(page);
 				page = NULL;
diff -puN mm/swap_state.c~mm-lockless-pagecache mm/swap_state.c
--- a/mm/swap_state.c~mm-lockless-pagecache
+++ a/mm/swap_state.c
@@ -293,6 +293,29 @@ void free_pages_and_swap_cache(struct pa
 	}
 }
 
+struct page *find_get_swap_page(swp_entry_t entry)
+{
+	struct page *page;
+
+	rcu_read_lock();
+repeat:
+	page = radix_tree_lookup(&swapper_space.page_tree, entry.val);
+	if (page) {
+		page = page_cache_get_speculative(page);
+		if (unlikely(!page))
+			goto repeat;
+		/* Has the page been truncated? */
+		if (unlikely(!PageSwapCache(page)
+				|| page_private(page) != entry.val)) {
+			page_cache_release(page);
+			goto repeat;
+		}
+	}
+	rcu_read_unlock();
+
+	return page;
+}
+
 /*
  * Lookup a swap entry in the swap cache. A found page will be returned
  * unlocked and with its refcount incremented - we rely on the kernel
@@ -303,7 +326,7 @@ struct page * lookup_swap_cache(swp_entr
 {
 	struct page *page;
 
-	page = find_get_page(&swapper_space, entry.val);
+	page = find_get_swap_page(entry);
 
 	if (page)
 		INC_CACHE_INFO(find_success);
@@ -330,7 +353,7 @@ struct page *read_swap_cache_async(swp_e
 		 * called after lookup_swap_cache() failed, re-calling
 		 * that would confuse statistics.
 		 */
-		found_page = find_get_page(&swapper_space, entry.val);
+		found_page = find_get_swap_page(entry);
 		if (found_page)
 			break;
 
_

Patches currently in -mm which might be from npiggin@xxxxxxx are

git-block.patch
mm-vm_bug_on.patch
radix-tree-rcu-lockless-readside.patch
redo-radix-tree-fixes.patch
adix-tree-rcu-lockless-readside-update.patch
adix-tree-rcu-lockless-readside-fix-2.patch
update-some-mm-comments.patch
mm-remove_mapping-safeness.patch
mm-non-syncing-lock_page.patch
oom-use-unreclaimable-info.patch
oom-reclaim_mapped-on-oom.patch
cpuset-oom-panic-fix.patch
oom-cpuset-hint.patch
oom-handle-current-exiting.patch
oom-handle-oom_disable-exiting.patch
oom-swapoff-tasks-tweak.patch
oom-kthread-infinite-loop-fix.patch
oom-more-printk.patch
mm-speculative-get_page.patch
mm-lockless-pagecache.patch
sched-force-sbin-init-off-isolated-cpus.patch

-
To unsubscribe from this list: send the line "unsubscribe mm-commits" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Kernel Newbies FAQ]     [Kernel Archive]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [Bugtraq]     [Photo]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]

  Powered by Linux