[PATCH 1 of 8] Introduce a place holder page for the pagecache

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



mm/filemap.c is changed to wait on these before adding a page into the page
cache, and truncates are changed to wait for all of the place holder pages to
disappear.

Place holder pages can only be examined with the mapping lock held.  They
cannot be locked, and cannot have references increased or decreased on them.

Placeholders can span a range bigger than one page.  The placeholder is
inserted into the radix slot for the end of the range, and the flags field in
the page struct is used to record the start of the range.

A bit is added for the radix root (PAGECACHE_TAG_EXTENTS), and when
mm/filemap.c finds that bit set, searches for an index in the pagecache
look forward to find any placeholders that index may intersect.

Signed-off-by: Chris Mason <chris.mason@xxxxxxxxxx>

diff -r fc2d683623bb -r 7819e6e3f674 drivers/mtd/devices/block2mtd.c
--- a/drivers/mtd/devices/block2mtd.c	Sun Feb 04 10:44:54 2007 -0800
+++ b/drivers/mtd/devices/block2mtd.c	Tue Feb 06 19:45:28 2007 -0500
@@ -66,7 +66,7 @@ static void cache_readahead(struct addre
 			INFO("Overrun end of disk in cache readahead\n");
 			break;
 		}
-		page = radix_tree_lookup(&mapping->page_tree, pagei);
+		page = radix_tree_lookup_extent(&mapping->page_tree, pagei);
 		if (page && (!i))
 			break;
 		if (page)
diff -r fc2d683623bb -r 7819e6e3f674 include/linux/fs.h
--- a/include/linux/fs.h	Sun Feb 04 10:44:54 2007 -0800
+++ b/include/linux/fs.h	Tue Feb 06 19:45:28 2007 -0500
@@ -490,6 +490,11 @@ struct block_device {
  */
 #define PAGECACHE_TAG_DIRTY	0
 #define PAGECACHE_TAG_WRITEBACK	1
+
+/*
+ * This tag is only valid on the root of the radix tree
+ */
+#define PAGE_CACHE_TAG_EXTENTS 2
 
 int mapping_tagged(struct address_space *mapping, int tag);
 
diff -r fc2d683623bb -r 7819e6e3f674 include/linux/page-flags.h
--- a/include/linux/page-flags.h	Sun Feb 04 10:44:54 2007 -0800
+++ b/include/linux/page-flags.h	Tue Feb 06 19:45:28 2007 -0500
@@ -263,4 +263,6 @@ static inline void set_page_writeback(st
 	test_set_page_writeback(page);
 }
 
+void set_page_placeholder(struct page *page, pgoff_t start, pgoff_t end);
+
 #endif	/* PAGE_FLAGS_H */
diff -r fc2d683623bb -r 7819e6e3f674 include/linux/pagemap.h
--- a/include/linux/pagemap.h	Sun Feb 04 10:44:54 2007 -0800
+++ b/include/linux/pagemap.h	Tue Feb 06 19:45:28 2007 -0500
@@ -76,6 +76,9 @@ extern struct page * find_get_page(struc
 				unsigned long index);
 extern struct page * find_lock_page(struct address_space *mapping,
 				unsigned long index);
+int find_or_insert_placeholders(struct address_space *mapping,
+                                  unsigned long start, unsigned long end,
+                                  gfp_t gfp_mask, int wait);
 extern __deprecated_for_modules struct page * find_trylock_page(
 			struct address_space *mapping, unsigned long index);
 extern struct page * find_or_create_page(struct address_space *mapping,
@@ -86,6 +89,12 @@ unsigned find_get_pages_contig(struct ad
 			       unsigned int nr_pages, struct page **pages);
 unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
 			int tag, unsigned int nr_pages, struct page **pages);
+void remove_placeholder_pages(struct address_space *mapping,
+                             unsigned long offset, unsigned long end);
+void wake_up_placeholder_page(struct page *page);
+void wait_on_placeholder_pages_range(struct address_space *mapping, pgoff_t start,
+			       pgoff_t end);
+
 
 /*
  * Returns locked page at given index in given cache, creating it if needed.
@@ -116,6 +125,8 @@ int add_to_page_cache_lru(struct page *p
 				unsigned long index, gfp_t gfp_mask);
 extern void remove_from_page_cache(struct page *page);
 extern void __remove_from_page_cache(struct page *page);
+struct page *radix_tree_lookup_extent(struct radix_tree_root *root,
+					     unsigned long index);
 
 /*
  * Return byte-offset into filesystem object for page.
diff -r fc2d683623bb -r 7819e6e3f674 include/linux/radix-tree.h
--- a/include/linux/radix-tree.h	Sun Feb 04 10:44:54 2007 -0800
+++ b/include/linux/radix-tree.h	Tue Feb 06 19:45:28 2007 -0500
@@ -53,6 +53,7 @@ static inline int radix_tree_is_direct_p
 /*** radix-tree API starts here ***/
 
 #define RADIX_TREE_MAX_TAGS 2
+#define RADIX_TREE_MAX_ROOT_TAGS 3
 
 /* root tags are stored in gfp_mask, shifted by __GFP_BITS_SHIFT */
 struct radix_tree_root {
@@ -168,6 +169,7 @@ radix_tree_gang_lookup_tag(struct radix_
 		unsigned long first_index, unsigned int max_items,
 		unsigned int tag);
 int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag);
+void radix_tree_root_tag_set(struct radix_tree_root *root, unsigned int tag);
 
 static inline void radix_tree_preload_end(void)
 {
diff -r fc2d683623bb -r 7819e6e3f674 lib/radix-tree.c
--- a/lib/radix-tree.c	Sun Feb 04 10:44:54 2007 -0800
+++ b/lib/radix-tree.c	Tue Feb 06 19:45:28 2007 -0500
@@ -468,6 +468,12 @@ void *radix_tree_tag_set(struct radix_tr
 	return slot;
 }
 EXPORT_SYMBOL(radix_tree_tag_set);
+
+void radix_tree_root_tag_set(struct radix_tree_root *root, unsigned int tag)
+{
+	root_tag_set(root, tag);
+}
+EXPORT_SYMBOL(radix_tree_root_tag_set);
 
 /**
  *	radix_tree_tag_clear - clear a tag on a radix tree node
diff -r fc2d683623bb -r 7819e6e3f674 mm/filemap.c
--- a/mm/filemap.c	Sun Feb 04 10:44:54 2007 -0800
+++ b/mm/filemap.c	Tue Feb 06 19:45:28 2007 -0500
@@ -44,6 +44,11 @@ generic_file_direct_IO(int rw, struct ki
 generic_file_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
 	loff_t offset, unsigned long nr_segs);
 
+static wait_queue_head_t *page_waitqueue(struct page *page);
+
+static struct address_space placeholder_address_space;
+#define PagePlaceHolder(page) ((page)->mapping == &placeholder_address_space)
+
 /*
  * Shared mappings implemented 30.11.1994. It's not fully working yet,
  * though.
@@ -421,6 +426,35 @@ int filemap_write_and_wait_range(struct 
 	return err;
 }
 
+/*
+ * When the radix tree has the extent bit set, a lookup needs to search
+ * forward in the tree to find any extent the index might intersect.
+ * When extents are off, a faster radix_tree_lookup can be done instead.
+ * + * This does the appropriate lookup based on the PAGE_CACHE_TAG_EXTENTS + * bit on the root node
+ */
+struct page *radix_tree_lookup_extent(struct radix_tree_root *root,
+					     unsigned long index)
+{
+	if (radix_tree_tagged(root, PAGE_CACHE_TAG_EXTENTS)) {
+		struct page *p;
+		unsigned int found;
+		found = radix_tree_gang_lookup(root, (void **)(&p), index, 1);
+		if (found) {
+			if (PagePlaceHolder(p)) {
+				pgoff_t start = p->flags;
+				pgoff_t end = p->index;
+				if (end >= index && start <= index)
+					return p;
+			} else if (p->index == index)
+				return p;
+		}
+		return NULL;
+	}
+	return radix_tree_lookup(root, index);
+}
+
 /**
  * add_to_page_cache - add newly allocated pagecache pages
  * @page:	page to add
@@ -437,22 +471,62 @@ int add_to_page_cache(struct page *page,
 int add_to_page_cache(struct page *page, struct address_space *mapping,
 		pgoff_t offset, gfp_t gfp_mask)
 {
-	int error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM);
-
-	if (error == 0) {
-		write_lock_irq(&mapping->tree_lock);
+	int error;
+	struct page *tmp;
+
+	BUG_ON(PagePlaceHolder(page));
+
+again:
+	tmp = NULL;
+	error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM);
+
+	if (error)
+		goto out;
+
+	write_lock_irq(&mapping->tree_lock);
+	/*
+	 * If extents are on for this radix tree, we have to do
+	 * the more expensive search for an overlapping extent
+	 * before we try to insert.
+	 */
+	if (radix_tree_tagged(&mapping->page_tree, PAGE_CACHE_TAG_EXTENTS)) {
+		tmp = radix_tree_lookup_extent(&mapping->page_tree,
+					       offset);
+		if (tmp)
+			error = -EEXIST;
+	}
+	if (!error)
 		error = radix_tree_insert(&mapping->page_tree, offset, page);
-		if (!error) {
-			page_cache_get(page);
-			SetPageLocked(page);
-			page->mapping = mapping;
-			page->index = offset;
-			mapping->nrpages++;
-			__inc_zone_page_state(page, NR_FILE_PAGES);
-		}
-		write_unlock_irq(&mapping->tree_lock);
-		radix_tree_preload_end();
-	}
+	if (error == -EEXIST && (gfp_mask & __GFP_WAIT)) {
+		/*
+		 * we need this second search because not every
+		 * placeholder forces the extent bit on the root
+		 */
+		if (!tmp)
+			tmp = radix_tree_lookup_extent(&mapping->page_tree,
+					       offset);
+		if (tmp && PagePlaceHolder(tmp)) {
+			DEFINE_WAIT(wait);
+			wait_queue_head_t *wqh = page_waitqueue(tmp);
+			radix_tree_preload_end();
+			prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
+			write_unlock_irq(&mapping->tree_lock);
+			io_schedule();
+			finish_wait(wqh, &wait);
+			goto again;
+		}
+	}
+	if (!error) {
+		page_cache_get(page);
+		SetPageLocked(page);
+		page->mapping = mapping;
+		page->index = offset;
+		mapping->nrpages++;
+		__inc_zone_page_state(page, NR_FILE_PAGES);
+	}
+	write_unlock_irq(&mapping->tree_lock);
+	radix_tree_preload_end();
+out:
 	return error;
 }
 EXPORT_SYMBOL(add_to_page_cache);
@@ -516,6 +590,70 @@ void fastcall wait_on_page_bit(struct pa
 }
 EXPORT_SYMBOL(wait_on_page_bit);
 
+void wake_up_placeholder_page(struct page *page)
+{
+	__wake_up_bit(page_waitqueue(page), &page->flags, PG_locked);
+}
+EXPORT_SYMBOL_GPL(wake_up_placeholder_page);
+
+/**
+ * wait_on_placeholder_pages - gang placeholder page waiter
+ * @mapping:	The address_space to search
+ * @start:	The starting page index
+ * @end:	The max page index (inclusive)
+ *
+ * wait_on_placeholder_pages() will search for and wait on a range of pages
+ * in the mapping
+ *
+ * On return, the range has no placeholder pages sitting in it.
+ */
+void wait_on_placeholder_pages_range(struct address_space *mapping,
+			       pgoff_t start, pgoff_t end)
+{
+	unsigned int i;
+	unsigned int ret;
+	struct page *pages[8];
+	DEFINE_WAIT(wait);
+
+	/*
+	 * we expect a very small number of place holder pages, so
+	 * this code isn't trying to be very fast.
+	 */
+again:
+	read_lock_irq(&mapping->tree_lock);
+	while(start <= end) {
+		ret = radix_tree_gang_lookup(&mapping->page_tree,
+					(void **)pages, start,
+					ARRAY_SIZE(pages));
+		if (!ret)
+			break;
+		for (i = 0; i < ret; i++) {
+			if (PagePlaceHolder(pages[i]) &&
+			    pages[i]->flags <= end) {
+				wait_queue_head_t *wqh;
+				wqh = page_waitqueue(pages[i]);
+				prepare_to_wait(wqh, &wait,
+						TASK_UNINTERRUPTIBLE);
+				read_unlock_irq(&mapping->tree_lock);
+				io_schedule();
+				finish_wait(wqh, &wait);
+				goto again;
+			}
+			start = pages[i]->index + 1;
+			if (pages[i]->index > end)
+				goto done;
+		}
+		if (need_resched()) {
+			read_unlock_irq(&mapping->tree_lock);
+			cond_resched();
+			read_lock_irq(&mapping->tree_lock);
+		}
+	}
+done:
+	read_unlock_irq(&mapping->tree_lock);
+}
+EXPORT_SYMBOL_GPL(wait_on_placeholder_pages_range);
+
 /**
  * unlock_page - unlock a locked page
  * @page: the page
@@ -532,6 +670,7 @@ EXPORT_SYMBOL(wait_on_page_bit);
  */
 void fastcall unlock_page(struct page *page)
 {
+	BUG_ON(PagePlaceHolder(page));
 	smp_mb__before_clear_bit();
 	if (!TestClearPageLocked(page))
 		BUG();
@@ -568,6 +707,7 @@ void fastcall __lock_page(struct page *p
 {
 	DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
 
+	BUG_ON(PagePlaceHolder(page));
 	__wait_on_bit_lock(page_waitqueue(page), &wait, sync_page,
 							TASK_UNINTERRUPTIBLE);
 }
@@ -580,6 +720,7 @@ void fastcall __lock_page_nosync(struct 
 void fastcall __lock_page_nosync(struct page *page)
 {
 	DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
+	BUG_ON(PagePlaceHolder(page));
 	__wait_on_bit_lock(page_waitqueue(page), &wait, __sleep_on_page_lock,
 							TASK_UNINTERRUPTIBLE);
 }
@@ -597,13 +738,269 @@ struct page * find_get_page(struct addre
 	struct page *page;
 
 	read_lock_irq(&mapping->tree_lock);
-	page = radix_tree_lookup(&mapping->page_tree, offset);
-	if (page)
-		page_cache_get(page);
+	page = radix_tree_lookup_extent(&mapping->page_tree, offset);
+	if (page) {
+		if (PagePlaceHolder(page))
+			page = NULL;
+		else
+			page_cache_get(page);
+	}
 	read_unlock_irq(&mapping->tree_lock);
 	return page;
 }
 EXPORT_SYMBOL(find_get_page);
+
+/**
+ * remove_placeholder_pages - remove a range of placeholder or locked pages
+ * @mapping: the page's address_space
+ * @placeholder: the placeholder page previously inserted (for verification)
+ * @start: the search starting point
+ * @end: the search end point (offsets >= end are not touched)
+ *
+ * Any placeholder pages in the range specified are removed.  Any real
+ * pages are unlocked and released.
+ */
+void remove_placeholder_pages(struct address_space *mapping,
+			     unsigned long start,
+			     unsigned long end)
+{
+	struct page *page;
+	int ret;
+	int i;
+	unsigned long num;
+	struct page *pages[8];
+
+	write_lock_irq(&mapping->tree_lock);
+	while (start < end) {
+		num = min(ARRAY_SIZE(pages), end - start);
+		ret = radix_tree_gang_lookup(&mapping->page_tree,
+						(void **)pages, start, num);
+		for (i = 0; i < ret; i++) {
+			page = pages[i];
+			start = page->index + 1;
+			if (page->index >= end)
+				break;
+			if (PagePlaceHolder(page)) {
+				radix_tree_delete(&mapping->page_tree,
+						  page->index);
+				wake_up_placeholder_page(page);
+				kfree(page);
+			} else {
+				unlock_page(page);
+				page_cache_release(page);
+			}
+		}
+	}
+	write_unlock_irq(&mapping->tree_lock);
+}
+EXPORT_SYMBOL_GPL(remove_placeholder_pages);
+
+/*
+ * a helper function to insert a placeholder into multiple slots
+ * in the radix tree.  This could probably use an optimized version
+ * in the radix code.  It may insert fewer than the request number
+ * of placeholders if we need to reschedule or the radix tree needs to
+ * be preloaded again.
+ *
+ * returns zero on error or the number actually inserted.
+ */
+static int insert_placeholder(struct address_space *mapping,
+					 struct page *insert)
+{
+	int err;
+	unsigned int found;
+	struct page *debug_page;
+	/* sanity check, make sure other extents don't exist in this range */
+	found = radix_tree_gang_lookup(&mapping->page_tree,
+				    (void **)(&debug_page),
+				    insert->flags, 1);
+	BUG_ON(found > 0 && debug_page->flags <= (insert->index));
+	err = radix_tree_insert(&mapping->page_tree, insert->index, insert);
+	return err;
+}
+
+
+static struct page *alloc_placeholder(gfp_t gfp_mask)
+{
+	struct page *p;
+	p = kzalloc(sizeof(*p), gfp_mask);
+	if (p)
+		p->mapping = &placeholder_address_space;
+	return p;
+}
+
+/**
+ * find_or_insert_placeholders - locate a group of pagecache pages or insert one
+ * @mapping: the page's address_space
+ * @start: the search starting point
+ * @end: the search end point (offsets >= end are not touched)
+ * @gfp_mask: page allocation mode
+ * @insert: the page to insert if none is found
+ * @iowait: 1 if you want to wait for dirty or writeback pages.
+ *
+ * This locks down a range of offsets in the address space.  Any pages
+ * already present are locked and a placeholder page is inserted into
+ * the radix tree for any offsets without pages.
+ */
+int find_or_insert_placeholders(struct address_space *mapping,
+				  unsigned long start, unsigned long end,
+				  gfp_t gfp_mask, int iowait)
+{
+	int err = 0;
+	int i, ret;
+	unsigned long cur = start;
+	struct page *page;
+	int restart;
+	struct page *insert = NULL;
+	struct page *pages[8];
+	/*
+	 * this gets complicated.  Placeholders and page locks need to
+	 * be taken in order.  We use gang lookup to cut down on the cpu
+	 * cost, but we need to keep track of holes in the results and
+	 * insert placeholders as appropriate.
+	 *
+	 * If a locked page or a placeholder is found, we wait for it and
+	 * pick up where we left off.  If a dirty or PG_writeback page is found
+	 * and iowait==1, we have to drop all of our locks, kick/wait for the
+	 * io and resume again.
+	 */
+repeat:
+	if (!insert) {
+		insert = alloc_placeholder(gfp_mask);
+		if (!insert) {
+			err = -ENOMEM;
+			goto fail;
+		}
+	}
+	if (cur != start )
+		cond_resched();
+	err = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM);
+	if (err)
+		goto fail;
+	write_lock_irq(&mapping->tree_lock);
+
+	/* only set the extent tag if we are inserting placeholders for more
+	 * than one page worth of slots.  This way small random ios don't
+	 * suffer from slower lookups.
+	 */
+	if (cur == start && end - start > 1)
+		radix_tree_root_tag_set(&mapping->page_tree,
+					PAGE_CACHE_TAG_EXTENTS);
+repeat_lock:
+	ret = radix_tree_gang_lookup(&mapping->page_tree,
+					(void **)pages, cur,
+					min(ARRAY_SIZE(pages), end-cur));
+	for (i = 0 ; i < ret ; i++) {
+		restart = 0;
+		page = pages[i];
+
+		if (PagePlaceHolder(page) && page->flags < end) {
+			DEFINE_WAIT(wait);
+			wait_queue_head_t *wqh = page_waitqueue(page);
+			radix_tree_preload_end();
+			prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
+			write_unlock_irq(&mapping->tree_lock);
+			io_schedule();
+			finish_wait(wqh, &wait);
+			goto repeat;
+		}
+
+		if (page->index > cur) {
+			unsigned long top = min(end, page->index);
+			insert->index = top - 1;
+			insert->flags = cur;
+			err = insert_placeholder(mapping, insert);
+			write_unlock_irq(&mapping->tree_lock);
+			radix_tree_preload_end();
+			insert = NULL;
+			if (err)
+				goto fail;
+			cur = top;
+			if (cur < end)
+				goto repeat;
+			else
+				goto done;
+		}
+		if (page->index >= end) {
+			ret = 0;
+			break;
+		}
+		page_cache_get(page);
+		BUG_ON(page->index != cur);
+		BUG_ON(PagePlaceHolder(page));
+		if (TestSetPageLocked(page)) {
+			unsigned long tmpoff = page->index;
+			page_cache_get(page);
+			write_unlock_irq(&mapping->tree_lock);
+			radix_tree_preload_end();
+			__lock_page(page);
+			/* Has the page been truncated while we slept? */
+			if (unlikely(page->mapping != mapping ||
+				     page->index != tmpoff)) {
+				unlock_page(page);
+				page_cache_release(page);
+				goto repeat;
+			} else {
+				/* we've locked the page, but  we need to
+				 *  check it for dirty/writeback
+				 */
+				restart = 1;
+			}
+		}
+		if (iowait && (PageDirty(page) || PageWriteback(page))) {
+			unlock_page(page);
+			page_cache_release(page);
+			if (!restart) {
+				write_unlock_irq(&mapping->tree_lock);
+				radix_tree_preload_end();
+			}
+			err = filemap_write_and_wait_range(mapping,
+						 cur << PAGE_CACHE_SHIFT,
+						 end << PAGE_CACHE_SHIFT);
+			if (err)
+				goto fail;
+			goto repeat;
+		}
+		cur++;
+		if (restart)
+			goto repeat;
+		if (cur >= end)
+			break;
+	}
+
+	/* we haven't yet filled the range */
+	if (cur < end) {
+		/* if the search filled our array, there is more to do. */
+		if (ret && ret == ARRAY_SIZE(pages))
+			goto repeat_lock;
+
+		/* otherwise insert placeholders for the remaining offsets */
+		insert->index = end - 1;
+		insert->flags = cur;
+		err = insert_placeholder(mapping, insert);
+		write_unlock_irq(&mapping->tree_lock);
+		radix_tree_preload_end();
+		if (err)
+			goto fail;
+		insert = NULL;
+		cur = end;
+	} else {
+		write_unlock_irq(&mapping->tree_lock);
+		radix_tree_preload_end();
+	}
+done:
+	BUG_ON(cur < end);
+	BUG_ON(cur > end);
+	if (insert)
+		kfree(insert);
+	return err;
+fail:
+	remove_placeholder_pages(mapping, start, cur);
+	if (insert)
+		kfree(insert);
+	return err;
+}
+EXPORT_SYMBOL_GPL(find_or_insert_placeholders);
 
 /**
  * find_trylock_page - find and lock a page
@@ -617,8 +1014,8 @@ struct page *find_trylock_page(struct ad
 	struct page *page;
 
 	read_lock_irq(&mapping->tree_lock);
-	page = radix_tree_lookup(&mapping->page_tree, offset);
-	if (page && TestSetPageLocked(page))
+	page = radix_tree_lookup_extent(&mapping->page_tree, offset);
+	if (page && (PagePlaceHolder(page) || TestSetPageLocked(page)))
 		page = NULL;
 	read_unlock_irq(&mapping->tree_lock);
 	return page;
@@ -642,8 +1039,18 @@ struct page *find_lock_page(struct addre
 
 	read_lock_irq(&mapping->tree_lock);
 repeat:
-	page = radix_tree_lookup(&mapping->page_tree, offset);
+	page = radix_tree_lookup_extent(&mapping->page_tree, offset);
 	if (page) {
+		if (PagePlaceHolder(page)) {
+			DEFINE_WAIT(wait);
+			wait_queue_head_t *wqh = page_waitqueue(page);
+			prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
+			read_unlock_irq(&mapping->tree_lock);
+			io_schedule();
+			finish_wait(wqh, &wait);
+			read_lock_irq(&mapping->tree_lock);
+			goto repeat;
+		}
 		page_cache_get(page);
 		if (TestSetPageLocked(page)) {
 			read_unlock_irq(&mapping->tree_lock);
@@ -727,14 +1134,25 @@ unsigned find_get_pages(struct address_s
 unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
 			    unsigned int nr_pages, struct page **pages)
 {
-	unsigned int i;
+	unsigned int i = 0;
 	unsigned int ret;
 
 	read_lock_irq(&mapping->tree_lock);
 	ret = radix_tree_gang_lookup(&mapping->page_tree,
 				(void **)pages, start, nr_pages);
-	for (i = 0; i < ret; i++)
-		page_cache_get(pages[i]);
+	while(i < ret) {
+		if (PagePlaceHolder(pages[i])) {
+			/* we can't return a place holder, shift it away */
+			if (i + 1 < ret) {
+				memcpy(&pages[i], &pages[i+1],
+		                       (ret - i - 1) * sizeof(struct page *));
+			}
+			ret--;
+			continue;
+		} else
+			page_cache_get(pages[i]);
+		i++;
+	}
 	read_unlock_irq(&mapping->tree_lock);
 	return ret;
 }
@@ -761,6 +1179,8 @@ unsigned find_get_pages_contig(struct ad
 	ret = radix_tree_gang_lookup(&mapping->page_tree,
 				(void **)pages, index, nr_pages);
 	for (i = 0; i < ret; i++) {
+		if (PagePlaceHolder(pages[i]))
+			break;
 		if (pages[i]->mapping == NULL || pages[i]->index != index)
 			break;
 
@@ -785,14 +1205,25 @@ unsigned find_get_pages_tag(struct addre
 unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
 			int tag, unsigned int nr_pages, struct page **pages)
 {
-	unsigned int i;
+	unsigned int i = 0;
 	unsigned int ret;
 
 	read_lock_irq(&mapping->tree_lock);
 	ret = radix_tree_gang_lookup_tag(&mapping->page_tree,
 				(void **)pages, *index, nr_pages, tag);
-	for (i = 0; i < ret; i++)
-		page_cache_get(pages[i]);
+	while(i < ret) {
+		if (PagePlaceHolder(pages[i])) {
+			/* we can't return a place holder, shift it away */
+			if (i + 1 < ret) {
+				memcpy(&pages[i], &pages[i+1],
+		                       (ret - i - 1) * sizeof(struct page *));
+			}
+			ret--;
+			continue;
+		} else
+			page_cache_get(pages[i]);
+		i++;
+	}
 	if (ret)
 		*index = pages[ret - 1]->index + 1;
 	read_unlock_irq(&mapping->tree_lock);
@@ -2406,18 +2837,15 @@ generic_file_direct_IO(int rw, struct ki
 			unmap_mapping_range(mapping, offset, write_len, 0);
 	}
 
-	retval = filemap_write_and_wait(mapping);
-	if (retval == 0) {
-		retval = mapping->a_ops->direct_IO(rw, iocb, iov,
-						offset, nr_segs);
-		if (rw == WRITE && mapping->nrpages) {
-			pgoff_t end = (offset + write_len - 1)
-						>> PAGE_CACHE_SHIFT;
-			int err = invalidate_inode_pages2_range(mapping,
-					offset >> PAGE_CACHE_SHIFT, end);
-			if (err)
-				retval = err;
-		}
+	retval = mapping->a_ops->direct_IO(rw, iocb, iov,
+					offset, nr_segs);
+	if (rw == WRITE && mapping->nrpages) {
+		pgoff_t end = (offset + write_len - 1)
+					>> PAGE_CACHE_SHIFT;
+		int err = invalidate_inode_pages2_range(mapping,
+				offset >> PAGE_CACHE_SHIFT, end);
+		if (err)
+			retval = err;
 	}
 	return retval;
 }
diff -r fc2d683623bb -r 7819e6e3f674 mm/migrate.c
--- a/mm/migrate.c	Sun Feb 04 10:44:54 2007 -0800
+++ b/mm/migrate.c	Tue Feb 06 19:45:28 2007 -0500
@@ -305,8 +305,12 @@ static int migrate_page_move_mapping(str
 
 	write_lock_irq(&mapping->tree_lock);
 
+	/*
+	 * we don't need to worry about placeholders here,
+	 * the slot in the tree is verified
+	 */
 	pslot = radix_tree_lookup_slot(&mapping->page_tree,
- 					page_index(page));
+					page_index(page));
 
 	if (page_count(page) != 2 + !!PagePrivate(page) ||
 			(struct page *)radix_tree_deref_slot(pslot) != page) {
diff -r fc2d683623bb -r 7819e6e3f674 mm/readahead.c
--- a/mm/readahead.c	Sun Feb 04 10:44:54 2007 -0800
+++ b/mm/readahead.c	Tue Feb 06 19:45:28 2007 -0500
@@ -288,7 +288,8 @@ __do_page_cache_readahead(struct address
 		if (page_offset > end_index)
 			break;
 
-		page = radix_tree_lookup(&mapping->page_tree, page_offset);
+		page = radix_tree_lookup_extent(&mapping->page_tree,
+						page_offset);
 		if (page)
 			continue;
 
diff -r fc2d683623bb -r 7819e6e3f674 mm/truncate.c
--- a/mm/truncate.c	Sun Feb 04 10:44:54 2007 -0800
+++ b/mm/truncate.c	Tue Feb 06 19:45:28 2007 -0500
@@ -236,6 +236,7 @@ void truncate_inode_pages_range(struct a
 		}
 		pagevec_release(&pvec);
 	}
+	wait_on_placeholder_pages_range(mapping, start, end);
 }
 EXPORT_SYMBOL(truncate_inode_pages_range);
 


-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux