Re: [patch] fsblock preview

Nick Piggin <npiggin@xxxxxxx> · Fri, 12 Sep 2008 11:48:17 +0200

On Thu, Sep 11, 2008 at 06:53:35PM +0200, Nick Piggin wrote:
> 
> I've been doing some work on fsblock again lately, so in case anybody might
> find it interesting, here is a "preview" patch. Basically it compiles and
> runs OK for me here, under a few stress tests. I wouldn't say it is close to
> bug free, and it needs a lot of bits and pieces to polish up like error
> handling.

For some testing with a *real* filesystem :)

Here is an incremental patch which switches XFS to fsblock (not quite
complete or well stress tested, but boots and runs with block size == page
size).

This adds incremental support for delayed allocation and stuff to fsblock,
which turned out to be pretty easy..

---

Index: linux-2.6/fs/xfs/linux-2.6/xfs_aops.c
===================================================================

--- linux-2.6.orig/fs/xfs/linux-2.6/xfs_aops.c
+++ linux-2.6/fs/xfs/linux-2.6/xfs_aops.c
@@ -43,25 +43,38 @@
 #include <linux/writeback.h>
 
 STATIC void
+__xfs_count_block_state(
+	struct fsblock		*fsb,
+	int			*delalloc,
+	int			*unmapped,
+	int			*unwritten)
+{
+	if ((fsb->flags & (BL_uptodate|BL_mapped)) == BL_uptodate)
+		(*unmapped) = 1;
+	else if (fsb->flags & BL_unwritten)
+		(*unwritten) = 1;
+	else if (fsb->flags & BL_delay)
+		(*delalloc) = 1;
+}
+STATIC void
 xfs_count_page_state(
 	struct page		*page,
 	int			*delalloc,
 	int			*unmapped,
 	int			*unwritten)
 {
-	struct buffer_head	*bh, *head;
+	struct fsblock	*fsb;
 
 	*delalloc = *unmapped = *unwritten = 0;
 
-	bh = head = page_buffers(page);
-	do {
-		if (buffer_uptodate(bh) && !buffer_mapped(bh))
-			(*unmapped) = 1;
-		else if (buffer_unwritten(bh))
-			(*unwritten) = 1;
-		else if (buffer_delay(bh))
-			(*delalloc) = 1;
-	} while ((bh = bh->b_this_page) != head);
+	fsb = page_blocks(page);
+	if (fsblock_midpage(fsb)) {
+		__xfs_count_block_state(fsb, delalloc, unmapped, unwritten);
+	} else {
+		struct fsblock *b;
+		for_each_block(fsb, b)
+			__xfs_count_block_state(b, delalloc, unmapped, unwritten);
+	}
 }
 
 #if defined(XFS_RW_TRACE)
@@ -77,7 +90,7 @@ xfs_page_trace(
 	loff_t		offset = page_offset(page);
 	int		delalloc = -1, unmapped = -1, unwritten = -1;
 
-	if (page_has_buffers(page))
+	if (PageBlocks(page))
 		xfs_count_page_state(page, &delalloc, &unmapped, &unwritten);
 
 	ip = XFS_I(inode);
@@ -137,7 +150,7 @@ xfs_finish_ioend(
 
 /*
  * We're now finished for good with this ioend structure.
- * Update the page state via the associated buffer_heads,
+ * Update the page state via the associated fsblocks,
  * release holds on the inode and bio, and finally free
  * up memory.  Do not use the ioend after this.
  */
@@ -145,11 +158,13 @@ STATIC void
 xfs_destroy_ioend(
 	xfs_ioend_t		*ioend)
 {
-	struct buffer_head	*bh, *next;
+	struct fsblock	*fsb, *next;
 
-	for (bh = ioend->io_buffer_head; bh; bh = next) {
-		next = bh->b_private;
-		bh->b_end_io(bh, !ioend->io_error);
+	for (fsb = ioend->io_fsb_head; fsb; fsb = next) {
+		next = fsb->private;
+		fsb->private = NULL;
+		unlock_block(fsb);
+		fsblock_end_io(fsb, !ioend->io_error);
 	}
 	if (unlikely(ioend->io_error)) {
 		vn_ioerror(XFS_I(ioend->io_inode), ioend->io_error,
@@ -291,8 +306,8 @@ xfs_alloc_ioend(
 	ioend->io_list = NULL;
 	ioend->io_type = type;
 	ioend->io_inode = inode;
-	ioend->io_buffer_head = NULL;
-	ioend->io_buffer_tail = NULL;
+	ioend->io_fsb_head = NULL;
+	ioend->io_fsb_tail = NULL;
 	atomic_inc(&XFS_I(ioend->io_inode)->i_iocount);
 	ioend->io_offset = 0;
 	ioend->io_size = 0;
@@ -374,10 +389,11 @@ xfs_submit_ioend_bio(
 
 STATIC struct bio *
 xfs_alloc_ioend_bio(
-	struct buffer_head	*bh)
+	struct fsblock	*fsb)
 {
 	struct bio		*bio;
-	int			nvecs = bio_get_nr_vecs(bh->b_bdev);
+	struct block_device	*bdev = fsb->page->mapping->host->i_sb->s_bdev;
+	int			nvecs = bio_get_nr_vecs(bdev);
 
 	do {
 		bio = bio_alloc(GFP_NOIO, nvecs);
@@ -385,24 +401,27 @@ xfs_alloc_ioend_bio(
 	} while (!bio);
 
 	ASSERT(bio->bi_private == NULL);
-	bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
-	bio->bi_bdev = bh->b_bdev;
+	bio->bi_sector = fsb->block_nr << (fsblock_bits(fsb) - 9);
+	bio->bi_bdev = bdev;
 	bio_get(bio);
 	return bio;
 }
 
 STATIC void
 xfs_start_buffer_writeback(
-	struct buffer_head	*bh)
+	struct fsblock	*fsb)
 {
-	ASSERT(buffer_mapped(bh));
-	ASSERT(buffer_locked(bh));
-	ASSERT(!buffer_delay(bh));
-	ASSERT(!buffer_unwritten(bh));
-
-	mark_buffer_async_write(bh);
-	set_buffer_uptodate(bh);
-	clear_buffer_dirty(bh);
+	ASSERT(fsb->flags & BL_mapped);
+	ASSERT(fsb->flags & BL_locked);
+	ASSERT(!(fsb->flags & BL_delay));
+	ASSERT(!(fsb->flags & BL_unwritten));
+	ASSERT(!(fsb->flags & BL_uptodate));
+
+	spin_lock_block_irq(fsb);
+	fsb->count++;
+	fsb->flags |= BL_writeback;
+	clear_block_dirty(fsb);
+	spin_unlock_block_irq(fsb);
 }
 
 STATIC void
@@ -420,11 +439,15 @@ xfs_start_page_writeback(
 	/* If no buffers on the page are to be written, finish it here */
 	if (!buffers)
 		end_page_writeback(page);
+	else
+		page_cache_get(page);
 }
 
-static inline int bio_add_buffer(struct bio *bio, struct buffer_head *bh)
+static inline int bio_add_buffer(struct bio *bio, struct fsblock *fsb)
 {
-	return bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
+	unsigned int size = fsblock_size(fsb);
+	unsigned int offset = block_page_offset(fsb, size);
+	return bio_add_page(bio, fsb->page, size, offset);
 }
 
 /*
@@ -433,16 +456,16 @@ static inline int bio_add_buffer(struct 
  *
  * Because we may have multiple ioends spanning a page, we need to start
  * writeback on all the buffers before we submit them for I/O. If we mark the
- * buffers as we got, then we can end up with a page that only has buffers
+ * buffers as we got, then we can end up with a page that only has fsblocks
  * marked async write and I/O complete on can occur before we mark the other
- * buffers async write.
+ * fsblocks async write.
  *
  * The end result of this is that we trip a bug in end_page_writeback() because
- * we call it twice for the one page as the code in end_buffer_async_write()
- * assumes that all buffers on the page are started at the same time.
+ * we call it twice for the one page as the code in fsblock_end_io()
+ * assumes that all fsblocks on the page are started at the same time.
  *
  * The fix is two passes across the ioend list - one to start writeback on the
- * buffer_heads, and then submit them for I/O on the second pass.
+ * fsblocks, and then submit them for I/O on the second pass.
  */
 STATIC void
 xfs_submit_ioend(
@@ -450,15 +473,15 @@ xfs_submit_ioend(
 {
 	xfs_ioend_t		*head = ioend;
 	xfs_ioend_t		*next;
-	struct buffer_head	*bh;
+	struct fsblock		*fsb;
 	struct bio		*bio;
 	sector_t		lastblock = 0;
 
 	/* Pass 1 - start writeback */
 	do {
 		next = ioend->io_list;
-		for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) {
-			xfs_start_buffer_writeback(bh);
+		for (fsb = ioend->io_fsb_head; fsb; fsb = fsb->private) {
+			xfs_start_buffer_writeback(fsb);
 		}
 	} while ((ioend = next) != NULL);
 
@@ -468,22 +491,22 @@ xfs_submit_ioend(
 		next = ioend->io_list;
 		bio = NULL;
 
-		for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) {
+		for (fsb = ioend->io_fsb_head; fsb; fsb = fsb->private) {
 
 			if (!bio) {
  retry:
-				bio = xfs_alloc_ioend_bio(bh);
-			} else if (bh->b_blocknr != lastblock + 1) {
+				bio = xfs_alloc_ioend_bio(fsb);
+			} else if (fsb->block_nr != lastblock + 1) {
 				xfs_submit_ioend_bio(ioend, bio);
 				goto retry;
 			}
 
-			if (bio_add_buffer(bio, bh) != bh->b_size) {
+			if (bio_add_buffer(bio, fsb) != fsblock_size(fsb)) {
 				xfs_submit_ioend_bio(ioend, bio);
 				goto retry;
 			}
 
-			lastblock = bh->b_blocknr;
+			lastblock = fsb->block_nr;
 		}
 		if (bio)
 			xfs_submit_ioend_bio(ioend, bio);
@@ -492,7 +515,7 @@ xfs_submit_ioend(
 }
 
 /*
- * Cancel submission of all buffer_heads so far in this endio.
+ * Cancel submission of all fsblocks so far in this endio.
  * Toss the endio too.  Only ever called for the initial page
  * in a writepage request, so only ever one page.
  */
@@ -501,16 +524,19 @@ xfs_cancel_ioend(
 	xfs_ioend_t		*ioend)
 {
 	xfs_ioend_t		*next;
-	struct buffer_head	*bh, *next_bh;
+	struct fsblock		*fsb, *next_fsb;
 
 	do {
 		next = ioend->io_list;
-		bh = ioend->io_buffer_head;
+		fsb = ioend->io_fsb_head;
 		do {
-			next_bh = bh->b_private;
-			clear_buffer_async_write(bh);
-			unlock_buffer(bh);
-		} while ((bh = next_bh) != NULL);
+			next_fsb = fsb->private;
+			spin_lock_block_irq(fsb);
+			fsb->flags &= ~BL_writeback;
+			fsb->count--;
+			spin_unlock_block_irq(fsb);
+			unlock_block(fsb);
+		} while ((fsb = next_fsb) != NULL);
 
 		vn_iowake(XFS_I(ioend->io_inode));
 		mempool_free(ioend, xfs_ioend_pool);
@@ -519,14 +545,14 @@ xfs_cancel_ioend(
 
 /*
  * Test to see if we've been building up a completion structure for
- * earlier buffers -- if so, we try to append to this ioend if we
+ * earlier fsblocks -- if so, we try to append to this ioend if we
  * can, otherwise we finish off any current ioend and start another.
  * Return true if we've finished the given ioend.
  */
 STATIC void
 xfs_add_to_ioend(
 	struct inode		*inode,
-	struct buffer_head	*bh,
+	struct fsblock		*fsb,
 	xfs_off_t		offset,
 	unsigned int		type,
 	xfs_ioend_t		**result,
@@ -539,23 +565,23 @@ xfs_add_to_ioend(
 
 		ioend = xfs_alloc_ioend(inode, type);
 		ioend->io_offset = offset;
-		ioend->io_buffer_head = bh;
-		ioend->io_buffer_tail = bh;
+		ioend->io_fsb_head = fsb;
+		ioend->io_fsb_tail = fsb;
 		if (previous)
 			previous->io_list = ioend;
 		*result = ioend;
 	} else {
-		ioend->io_buffer_tail->b_private = bh;
-		ioend->io_buffer_tail = bh;
+		ioend->io_fsb_tail->private = fsb;
+		ioend->io_fsb_tail = fsb;
 	}
 
-	bh->b_private = NULL;
-	ioend->io_size += bh->b_size;
+	fsb->private = NULL;
+	ioend->io_size += fsblock_size(fsb);
 }
 
 STATIC void
 xfs_map_buffer(
-	struct buffer_head	*bh,
+	struct fsblock		*fsb,
 	xfs_iomap_t		*mp,
 	xfs_off_t		offset,
 	uint			block_bits)
@@ -569,13 +595,12 @@ xfs_map_buffer(
 
 	ASSERT(bn || (mp->iomap_flags & IOMAP_REALTIME));
 
-	bh->b_blocknr = bn;
-	set_buffer_mapped(bh);
+	map_fsblock(fsb, bn);
 }
 
 STATIC void
 xfs_map_at_offset(
-	struct buffer_head	*bh,
+	struct fsblock		*fsb,
 	loff_t			offset,
 	int			block_bits,
 	xfs_iomap_t		*iomapp)
@@ -583,12 +608,16 @@ xfs_map_at_offset(
 	ASSERT(!(iomapp->iomap_flags & IOMAP_HOLE));
 	ASSERT(!(iomapp->iomap_flags & IOMAP_DELAY));
 
-	lock_buffer(bh);
-	xfs_map_buffer(bh, iomapp, offset, block_bits);
-	bh->b_bdev = iomapp->iomap_target->bt_bdev;
-	set_buffer_mapped(bh);
-	clear_buffer_delay(bh);
-	clear_buffer_unwritten(bh);
+	spin_lock_block_irq(fsb);
+	fsb->count++; // XXX: hack
+	spin_unlock_block_irq(fsb);
+
+	lock_block(fsb);
+	spin_lock_block_irq(fsb);
+	xfs_map_buffer(fsb, iomapp, offset, block_bits);
+	fsb->count--;
+	spin_unlock_block_irq(fsb);
+//XXX?	bh->b_bdev = iomapp->iomap_target->bt_bdev;
 }
 
 /*
@@ -606,19 +635,28 @@ xfs_probe_page(
 		return 0;
 
 	if (page->mapping && PageDirty(page)) {
-		if (page_has_buffers(page)) {
-			struct buffer_head	*bh, *head;
+		if (PageBlocks(page)) {
+			struct fsblock	*fsb;
 
-			bh = head = page_buffers(page);
-			do {
-				if (!buffer_uptodate(bh))
-					break;
-				if (mapped != buffer_mapped(bh))
-					break;
-				ret += bh->b_size;
-				if (ret >= pg_offset)
-					break;
-			} while ((bh = bh->b_this_page) != head);
+			fsb = page_blocks(page);
+			if (fsblock_midpage(fsb)) {
+				if (!(fsb->flags & BL_uptodate))
+					return 0;
+				if (mapped != (fsb->flags & BL_mapped))
+					return 0;
+				return PAGE_CACHE_SIZE;
+			} else {
+				struct fsblock *b;
+				for_each_block(fsb, b) {
+					if (!(b->flags & BL_uptodate))
+						break;
+					if (mapped != (b->flags & BL_mapped))
+						break;
+					ret += fsblock_size(fsb);
+					if (ret >= pg_offset)
+						break;
+				}
+			}
 		} else
 			ret = mapped ? 0 : PAGE_CACHE_SIZE;
 	}
@@ -630,8 +668,8 @@ STATIC size_t
 xfs_probe_cluster(
 	struct inode		*inode,
 	struct page		*startpage,
-	struct buffer_head	*bh,
-	struct buffer_head	*head,
+	struct fsblock		*fsb,
+	struct fsblock		*head,
 	int			mapped)
 {
 	struct pagevec		pvec;
@@ -640,11 +678,12 @@ xfs_probe_cluster(
 	int			done = 0, i;
 
 	/* First sum forwards in this page */
-	do {
-		if (!buffer_uptodate(bh) || (mapped != buffer_mapped(bh)))
+	if (fsblock_midpage(fsb)) {
+		if (!(fsb->flags & BL_uptodate) ||
+				mapped != (fsb->flags & BL_mapped))
 			return total;
-		total += bh->b_size;
-	} while ((bh = bh->b_this_page) != head);
+		total += fsblock_size(fsb);
+	}
 
 	/* if we reached the end of the page, sum forwards in following pages */
 	tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT;
@@ -707,21 +746,21 @@ xfs_is_delayed_page(
 	if (PageWriteback(page))
 		return 0;
 
-	if (page->mapping && page_has_buffers(page)) {
-		struct buffer_head	*bh, *head;
+	if (page->mapping && PageBlocks(page)) {
+		struct fsblock		*fsb;
 		int			acceptable = 0;
 
-		bh = head = page_buffers(page);
-		do {
-			if (buffer_unwritten(bh))
+		fsb = page_blocks(page);
+		if (fsblock_midpage(fsb)) { /* XXX: midpage! */
+			if (fsb->flags & BL_unwritten)
 				acceptable = (type == IOMAP_UNWRITTEN);
-			else if (buffer_delay(bh))
+			else if (fsb->flags & BL_delay)
 				acceptable = (type == IOMAP_DELAY);
-			else if (buffer_dirty(bh) && buffer_mapped(bh))
+			else if ((fsb->flags & (BL_dirty|BL_mapped)) == (BL_dirty|BL_mapped))
 				acceptable = (type == IOMAP_NEW);
 			else
-				break;
-		} while ((bh = bh->b_this_page) != head);
+				return 0;
+		}
 
 		if (acceptable)
 			return 1;
@@ -747,7 +786,7 @@ xfs_convert_page(
 	int			startio,
 	int			all_bh)
 {
-	struct buffer_head	*bh, *head;
+	struct fsblock		*fsb;
 	xfs_off_t		end_offset;
 	unsigned long		p_offset;
 	unsigned int		type;
@@ -790,19 +829,20 @@ xfs_convert_page(
 	p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE;
 	page_dirty = p_offset / len;
 
-	bh = head = page_buffers(page);
+	/* XXX: midpage */
+	fsb = page_blocks(page);
 	do {
 		if (offset >= end_offset)
 			break;
-		if (!buffer_uptodate(bh))
+		if (!(fsb->flags & BL_uptodate))
 			uptodate = 0;
-		if (!(PageUptodate(page) || buffer_uptodate(bh))) {
+		if (!(PageUptodate(page) || (fsb->flags & BL_uptodate))) {
 			done = 1;
 			continue;
 		}
 
-		if (buffer_unwritten(bh) || buffer_delay(bh)) {
-			if (buffer_unwritten(bh))
+		if (fsb->flags & (BL_unwritten|BL_delay)) {
+			if (fsb->flags & BL_unwritten)
 				type = IOMAP_UNWRITTEN;
 			else
 				type = IOMAP_DELAY;
@@ -815,22 +855,21 @@ xfs_convert_page(
 			ASSERT(!(mp->iomap_flags & IOMAP_HOLE));
 			ASSERT(!(mp->iomap_flags & IOMAP_DELAY));
 
-			xfs_map_at_offset(bh, offset, bbits, mp);
+			xfs_map_at_offset(fsb, offset, bbits, mp);
 			if (startio) {
-				xfs_add_to_ioend(inode, bh, offset,
+				xfs_add_to_ioend(inode, fsb, offset,
 						type, ioendp, done);
 			} else {
-				set_buffer_dirty(bh);
-				unlock_buffer(bh);
-				mark_buffer_dirty(bh);
+				mark_mblock_dirty(fsb);
+				unlock_block(fsb);
 			}
 			page_dirty--;
 			count++;
 		} else {
 			type = IOMAP_NEW;
-			if (buffer_mapped(bh) && all_bh && startio) {
-				lock_buffer(bh);
-				xfs_add_to_ioend(inode, bh, offset,
+			if (fsb->flags & BL_mapped && all_bh && startio) {
+				lock_block(fsb);
+				xfs_add_to_ioend(inode, fsb, offset,
 						type, ioendp, done);
 				count++;
 				page_dirty--;
@@ -838,9 +877,9 @@ xfs_convert_page(
 				done = 1;
 			}
 		}
-	} while (offset += len, (bh = bh->b_this_page) != head);
+	} while (offset += len, 1);
 
-	if (uptodate && bh == head)
+	if (uptodate && 1) // fsb == head)
 		SetPageUptodate(page);
 
 	if (startio) {
@@ -930,7 +969,7 @@ xfs_page_state_convert(
 	int		startio,
 	int		unmapped) /* also implies page uptodate */
 {
-	struct buffer_head	*bh, *head;
+	struct fsblock		*fsb;
 	xfs_iomap_t		iomap;
 	xfs_ioend_t		*ioend = NULL, *iohead = NULL;
 	loff_t			offset;
@@ -983,7 +1022,7 @@ xfs_page_state_convert(
 	p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE;
 	page_dirty = p_offset / len;
 
-	bh = head = page_buffers(page);
+	fsb = page_blocks(page);
 	offset = page_offset(page);
 	flags = BMAPI_READ;
 	type = IOMAP_NEW;
@@ -993,9 +1032,9 @@ xfs_page_state_convert(
 	do {
 		if (offset >= end_offset)
 			break;
-		if (!buffer_uptodate(bh))
+		if (!(fsb->flags & BL_uptodate))
 			uptodate = 0;
-		if (!(PageUptodate(page) || buffer_uptodate(bh)) && !startio) {
+		if (!(PageUptodate(page) || fsb->flags & BL_uptodate) && !startio) {
 			/*
 			 * the iomap is actually still valid, but the ioend
 			 * isn't.  shouldn't happen too often.
@@ -1017,9 +1056,9 @@ xfs_page_state_convert(
 		 * Third case, an unmapped buffer was found, and we are
 		 * in a path where we need to write the whole page out.
 		 */
-		if (buffer_unwritten(bh) || buffer_delay(bh) ||
-		    ((buffer_uptodate(bh) || PageUptodate(page)) &&
-		     !buffer_mapped(bh) && (unmapped || startio))) {
+		if (fsb->flags & (BL_unwritten|BL_delay) ||
+		    ((fsb->flags & BL_uptodate || PageUptodate(page)) &&
+		     !(fsb->flags & BL_mapped) && (unmapped || startio))) {
 			int new_ioend = 0;
 
 			/*
@@ -1028,10 +1067,10 @@ xfs_page_state_convert(
 			if (flags == BMAPI_READ)
 				iomap_valid = 0;
 
-			if (buffer_unwritten(bh)) {
+			if (fsb->flags & BL_unwritten) {
 				type = IOMAP_UNWRITTEN;
 				flags = BMAPI_WRITE | BMAPI_IGNSTATE;
-			} else if (buffer_delay(bh)) {
+			} else if (fsb->flags & BL_delay) {
 				type = IOMAP_DELAY;
 				flags = BMAPI_ALLOCATE | trylock;
 			} else {
@@ -1051,7 +1090,7 @@ xfs_page_state_convert(
 				new_ioend = 1;
 				if (type == IOMAP_NEW) {
 					size = xfs_probe_cluster(inode,
-							page, bh, head, 0);
+							page, fsb, NULL, 0);
 				} else {
 					size = len;
 				}
@@ -1063,21 +1102,20 @@ xfs_page_state_convert(
 				iomap_valid = xfs_iomap_valid(&iomap, offset);
 			}
 			if (iomap_valid) {
-				xfs_map_at_offset(bh, offset,
+				xfs_map_at_offset(fsb, offset,
 						inode->i_blkbits, &iomap);
 				if (startio) {
-					xfs_add_to_ioend(inode, bh, offset,
+					xfs_add_to_ioend(inode, fsb, offset,
 							type, &ioend,
 							new_ioend);
 				} else {
-					set_buffer_dirty(bh);
-					unlock_buffer(bh);
-					mark_buffer_dirty(bh);
+					mark_mblock_dirty(fsb);
+					unlock_block(fsb);
 				}
 				page_dirty--;
 				count++;
 			}
-		} else if (buffer_uptodate(bh) && startio) {
+		} else if (fsb->flags & BL_uptodate && startio) {
 			/*
 			 * we got here because the buffer is already mapped.
 			 * That means it must already have extents allocated
@@ -1085,8 +1123,8 @@ xfs_page_state_convert(
 			 */
 			if (!iomap_valid || flags != BMAPI_READ) {
 				flags = BMAPI_READ;
-				size = xfs_probe_cluster(inode, page, bh,
-								head, 1);
+				size = xfs_probe_cluster(inode, page, fsb,
+								NULL, 1);
 				err = xfs_map_blocks(inode, offset, size,
 						&iomap, flags);
 				if (err)
@@ -1103,18 +1141,18 @@ xfs_page_state_convert(
 			 * that we are writing into for the first time.
 			 */
 			type = IOMAP_NEW;
-			if (trylock_buffer(bh)) {
-				ASSERT(buffer_mapped(bh));
+			if (trylock_block(fsb)) {
+				ASSERT(fsb->flags & BL_mapped);
 				if (iomap_valid)
 					all_bh = 1;
-				xfs_add_to_ioend(inode, bh, offset, type,
+				xfs_add_to_ioend(inode, fsb, offset, type,
 						&ioend, !iomap_valid);
 				page_dirty--;
 				count++;
 			} else {
 				iomap_valid = 0;
 			}
-		} else if ((buffer_uptodate(bh) || PageUptodate(page)) &&
+		} else if ((fsb->flags & BL_uptodate || PageUptodate(page)) &&
 			   (unmapped || startio)) {
 			iomap_valid = 0;
 		}
@@ -1122,9 +1160,9 @@ xfs_page_state_convert(
 		if (!iohead)
 			iohead = ioend;
 
-	} while (offset += len, ((bh = bh->b_this_page) != head));
+	} while (offset += len, 1);
 
-	if (uptodate && bh == head)
+	if (uptodate && 1) //bh == head)
 		SetPageUptodate(page);
 
 	if (startio)
@@ -1154,7 +1192,7 @@ error:
 	 */
 	if (err != -EAGAIN) {
 		if (!unmapped)
-			block_invalidatepage(page, 0);
+			fsblock_invalidate_page(page, 0);
 		ClearPageUptodate(page);
 	}
 	return err;
@@ -1200,7 +1238,7 @@ xfs_vm_writepage(
 	 *  4. There are unwritten buffers on the page
 	 */
 
-	if (!page_has_buffers(page)) {
+	if (!PageBlocks(page)) {
 		unmapped = 1;
 		need_trans = 1;
 	} else {
@@ -1223,8 +1261,8 @@ xfs_vm_writepage(
 	 * Delay hooking up buffer heads until we have
 	 * made our go/no-go decision.
 	 */
-	if (!page_has_buffers(page))
-		create_empty_buffers(page, 1 << inode->i_blkbits, 0);
+	if (!PageBlocks(page))
+		create_unmapped_blocks(page, GFP_NOIO, 1 << inode->i_blkbits, 0);
 
 	/*
 	 * Convert delayed allocate, unwritten or unmapped space
@@ -1289,7 +1327,7 @@ xfs_vm_releasepage(
 
 	xfs_page_trace(XFS_RELEASEPAGE_ENTER, inode, page, 0);
 
-	if (!page_has_buffers(page))
+	if (!PageBlocks(page))
 		return 0;
 
 	xfs_count_page_state(page, &delalloc, &unmapped, &unwritten);
@@ -1317,14 +1355,14 @@ xfs_vm_releasepage(
 	return 0;
 
 free_buffers:
-	return try_to_free_buffers(page);
+	return try_to_free_blocks(page);
 }
 
 STATIC int
 __xfs_get_blocks(
 	struct inode		*inode,
 	sector_t		iblock,
-	struct buffer_head	*bh_result,
+	struct fsblock		*fsb_result,
 	int			create,
 	int			direct,
 	bmapi_flags_t		flags)
@@ -1336,8 +1374,8 @@ __xfs_get_blocks(
 	int			error;
 
 	offset = (xfs_off_t)iblock << inode->i_blkbits;
-	ASSERT(bh_result->b_size >= (1 << inode->i_blkbits));
-	size = bh_result->b_size;
+	ASSERT(fsblock_size(fsb_result) >= (1 << inode->i_blkbits));
+	size = fsblock_size(fsb_result);
 	error = xfs_iomap(XFS_I(inode), offset, size,
 			     create ? flags : BMAPI_READ, &iomap, &niomap);
 	if (error)
@@ -1345,19 +1383,20 @@ __xfs_get_blocks(
 	if (niomap == 0)
 		return 0;
 
+	spin_lock_block_irq(fsb_result);
 	if (iomap.iomap_bn != IOMAP_DADDR_NULL) {
 		/*
 		 * For unwritten extents do not report a disk address on
 		 * the read case (treat as if we're reading into a hole).
 		 */
 		if (create || !(iomap.iomap_flags & IOMAP_UNWRITTEN)) {
-			xfs_map_buffer(bh_result, &iomap, offset,
+			xfs_map_buffer(fsb_result, &iomap, offset,
 				       inode->i_blkbits);
 		}
 		if (create && (iomap.iomap_flags & IOMAP_UNWRITTEN)) {
 			if (direct)
-				bh_result->b_private = inode;
-			set_buffer_unwritten(bh_result);
+				fsb_result->private = inode;
+			fsb_result->flags |= BL_unwritten;
 		}
 	}
 
@@ -1365,7 +1404,7 @@ __xfs_get_blocks(
 	 * If this is a realtime file, data may be on a different device.
 	 * to that pointed to from the buffer_head b_bdev currently.
 	 */
-	bh_result->b_bdev = iomap.iomap_target->bt_bdev;
+//XXX	bh_result->b_bdev = iomap.iomap_target->bt_bdev;
 
 	/*
 	 * If we previously allocated a block out beyond eof and we are now
@@ -1377,50 +1416,50 @@ __xfs_get_blocks(
 	 * correctly zeroed.
 	 */
 	if (create &&
-	    ((!buffer_mapped(bh_result) && !buffer_uptodate(bh_result)) ||
+	    ((!(fsb_result->flags & (BL_mapped|BL_uptodate))) ||
 	     (offset >= i_size_read(inode)) ||
 	     (iomap.iomap_flags & (IOMAP_NEW|IOMAP_UNWRITTEN))))
-		set_buffer_new(bh_result);
+		fsb_result->flags |= BL_new;
 
 	if (iomap.iomap_flags & IOMAP_DELAY) {
 		BUG_ON(direct);
-		if (create) {
-			set_buffer_uptodate(bh_result);
-			set_buffer_mapped(bh_result);
-			set_buffer_delay(bh_result);
-		}
+		if (create)
+			fsb_result->flags |= BL_uptodate|BL_mapped|BL_delay;
 	}
 
 	if (direct || size > (1 << inode->i_blkbits)) {
 		ASSERT(iomap.iomap_bsize - iomap.iomap_delta > 0);
 		offset = min_t(xfs_off_t,
 				iomap.iomap_bsize - iomap.iomap_delta, size);
-		bh_result->b_size = (ssize_t)min_t(xfs_off_t, LONG_MAX, offset);
+//XXX: could change fsb size bits		fsb_result->size = (ssize_t)min_t(xfs_off_t, LONG_MAX, offset);
 	}
+	spin_unlock_block_irq(fsb_result);
 
 	return 0;
 }
 
 int
 xfs_get_blocks(
-	struct inode		*inode,
-	sector_t		iblock,
-	struct buffer_head	*bh_result,
-	int			create)
+	struct address_space	*mapping,
+	struct fsblock		*fsb_result,
+	loff_t			pos,
+	int			mode)
 {
-	return __xfs_get_blocks(inode, iblock,
-				bh_result, create, 0, BMAPI_WRITE);
+	sector_t iblock;
+	iblock = pos >> fsblock_bits(fsb_result);
+	return __xfs_get_blocks(mapping->host, iblock,
+				fsb_result, mode, 0, BMAPI_WRITE);
 }
 
 STATIC int
 xfs_get_blocks_direct(
 	struct inode		*inode,
 	sector_t		iblock,
-	struct buffer_head	*bh_result,
+	struct fsblock		*fsb_result,
 	int			create)
 {
 	return __xfs_get_blocks(inode, iblock,
-				bh_result, create, 1, BMAPI_WRITE|BMAPI_DIRECT);
+				fsb_result, create, 1, BMAPI_WRITE|BMAPI_DIRECT);
 }
 
 STATIC void
@@ -1521,7 +1560,7 @@ xfs_vm_write_begin(
 	void			**fsdata)
 {
 	*pagep = NULL;
-	return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
+	return fsblock_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
 								xfs_get_blocks);
 }
 
@@ -1537,7 +1576,7 @@ xfs_vm_bmap(
 	xfs_ilock(ip, XFS_IOLOCK_SHARED);
 	xfs_flush_pages(ip, (xfs_off_t)0, -1, 0, FI_REMAPF);
 	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
-	return generic_block_bmap(mapping, block, xfs_get_blocks);
+	return fsblock_bmap(mapping, block, xfs_get_blocks);
 }
 
 STATIC int
@@ -1545,17 +1584,7 @@ xfs_vm_readpage(
 	struct file		*unused,
 	struct page		*page)
 {
-	return mpage_readpage(page, xfs_get_blocks);
-}
-
-STATIC int
-xfs_vm_readpages(
-	struct file		*unused,
-	struct address_space	*mapping,
-	struct list_head	*pages,
-	unsigned		nr_pages)
-{
-	return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks);
+	return fsblock_read_page(page, xfs_get_blocks);
 }
 
 STATIC void
@@ -1565,20 +1594,18 @@ xfs_vm_invalidatepage(
 {
 	xfs_page_trace(XFS_INVALIDPAGE_ENTER,
 			page->mapping->host, page, offset);
-	block_invalidatepage(page, offset);
+	fsblock_invalidate_page(page, offset);
 }
 
 const struct address_space_operations xfs_address_space_operations = {
 	.readpage		= xfs_vm_readpage,
-	.readpages		= xfs_vm_readpages,
 	.writepage		= xfs_vm_writepage,
 	.writepages		= xfs_vm_writepages,
-	.sync_page		= block_sync_page,
 	.releasepage		= xfs_vm_releasepage,
 	.invalidatepage		= xfs_vm_invalidatepage,
 	.write_begin		= xfs_vm_write_begin,
-	.write_end		= generic_write_end,
+	.write_end		= fsblock_write_end,
 	.bmap			= xfs_vm_bmap,
 	.direct_IO		= xfs_vm_direct_IO,
-	.migratepage		= buffer_migrate_page,
+	.set_page_dirty		= fsblock_set_page_dirty,
 };
Index: linux-2.6/fs/xfs/linux-2.6/xfs_buf.h
===================================================================
--- linux-2.6.orig/fs/xfs/linux-2.6/xfs_buf.h
+++ linux-2.6/fs/xfs/linux-2.6/xfs_buf.h
@@ -24,7 +24,7 @@
 #include <asm/system.h>
 #include <linux/mm.h>
 #include <linux/fs.h>
-#include <linux/buffer_head.h>
+#include <linux/fsblock.h>
 #include <linux/uio.h>
 
 /*
Index: linux-2.6/fs/fsblock.c
===================================================================
--- linux-2.6.orig/fs/fsblock.c
+++ linux-2.6/fs/fsblock.c
@@ -76,38 +76,6 @@ void __init fsblock_init(void)
 #endif
 }
 
-#ifdef BDFLUSH_FLUSHING
-static void clear_block_dirty(struct fsblock *block);
-
-static int test_and_set_block_dirty(struct fsblock *block);
-
-static void set_block_dirty(struct fsblock *block)
-{
-	test_and_set_block_dirty(block);
-}
-#else
-static inline void clear_block_dirty(struct fsblock *block)
-{
-	FSB_BUG_ON(!spin_is_locked_block(block));
-	block->flags &= ~BL_dirty;
-}
-
-static inline int test_and_set_block_dirty(struct fsblock *block)
-{
-	FSB_BUG_ON(!spin_is_locked_block(block));
-	if (block->flags & BL_dirty)
-		return 1;
-	block->flags |= BL_dirty;
-	return 0;
-}
-
-static inline void set_block_dirty(struct fsblock *block)
-{
-	FSB_BUG_ON(!spin_is_locked_block(block));
-	block->flags |= BL_dirty;
-}
-#endif
-
 static void init_block(struct page *page, struct fsblock *block, unsigned int bits)
 {
 	block->flags = 0;
@@ -257,7 +225,7 @@ static void free_block_check(struct fsbl
 		printk("block count = %u\n", count);
 		printk("block private = %p\n", private);
 		printk("vmap count  = %u\n", vmap_count);
-		BUG();
+		FSB_BUG();
 	}
 }
 #endif
@@ -407,7 +375,7 @@ int trylock_block(struct fsblock *block)
 	unsigned long flags;
 	int ret;
 
-	FSB_BUG_ON(block->count == 0);
+	FSB_BUG_ON(!some_refcounted(block));
 	/* XXX: audit for possible irq uses */
 	spin_lock_block_irqsave(block, flags);
 	ret = !(block->flags & BL_locked);
@@ -431,7 +399,7 @@ void unlock_block(struct fsblock *block)
 {
 	unsigned long flags;
 
-	FSB_BUG_ON(block->count == 0);
+	FSB_BUG_ON(!some_refcounted(block));
 	spin_lock_block_irqsave(block, flags);
 	FSB_BUG_ON(!(block->flags & BL_locked));
 	block->flags &= ~BL_locked;
@@ -1048,7 +1016,7 @@ static int invalidate_aliasing_blocks(st
 }
 
 #define CREATE_METADATA	0x01
-static int create_unmapped_blocks(struct page *page, gfp_t gfp_flags, unsigned int size, unsigned int flags)
+int create_unmapped_blocks(struct page *page, gfp_t gfp_flags, unsigned int size, unsigned int flags)
 {
 	unsigned int bits = ffs(size) - 1;
 	struct fsblock *block;
@@ -1119,6 +1087,7 @@ static int create_unmapped_blocks(struct
 
 	return 0;
 }
+EXPORT_SYMBOL(create_unmapped_blocks);
 
 static int create_unmapped_blocks_oneref(struct page *page, gfp_t gfp_flags, unsigned int size, unsigned int flags)
 {
@@ -1445,6 +1414,15 @@ static void block_end_write(struct fsblo
 	local_irq_restore(flags);
 }
 
+void fsblock_end_io(struct fsblock *block, int uptodate)
+{
+	if (block->flags & BL_readin)
+		block_end_read(block, uptodate);
+	else
+		block_end_write(block, uptodate);
+}
+EXPORT_SYMBOL(fsblock_end_io);
+
 static void block_end_bio_io(struct bio *bio, int err)
 {
 	struct fsblock *block = bio->bi_private;
@@ -1459,10 +1437,7 @@ static void block_end_bio_io(struct bio 
 	FSB_BUG_ON((block->flags & (BL_readin|BL_writeback)) == (BL_readin|BL_writeback));
 	FSB_BUG_ON((block->flags & (BL_readin|BL_writeback)) == 0);
 
-	if (block->flags & BL_readin)
-		block_end_read(block, uptodate);
-	else
-		block_end_write(block, uptodate);
+	fsblock_end_io(block, uptodate);
 
 	bio_put(bio);
 }
@@ -2059,7 +2034,8 @@ sector_t fsblock_bmap(struct address_spa
 			loff_t off;
 			spin_unlock_block_irq(block);
 			off = sector_offset(blocknr, inode->i_blkbits);
-			ret = map_block(mapping, block, off, 0); /* create? */
+			/* create block? */
+			ret = map_block(mapping, block, off, MAP_BLOCK_READ);
 			spin_lock_block_irq(block);
 			if (ret)
 				goto out_unlock;
@@ -2167,7 +2143,7 @@ int fsblock_read_page(struct page *page,
 		for_each_block(block, b) {
 			if (!(b->flags & (BL_mapped|BL_hole))) {
 				spin_unlock_block_irq(block);
-				ret = map_block(mapping, b, off, 0);
+				ret = map_block(mapping, b, off, MAP_BLOCK_READ);
 				spin_lock_block_irq(block);
 				/* XXX: SetPageError on failure? */
 				if (ret)
@@ -2206,7 +2182,7 @@ int fsblock_read_page(struct page *page,
 	} else if (fsblock_midpage(block)) {
 		if (!(block->flags & (BL_mapped|BL_hole))) {
 			spin_unlock_block_irq(block);
-			ret = map_block(mapping, block, off, 0);
+			ret = map_block(mapping, block, off, MAP_BLOCK_READ);
 			/* XXX: SetPageError on failure? */
 			if (ret)
 				goto out_drop;
@@ -2233,7 +2209,7 @@ int fsblock_read_page(struct page *page,
 		FSB_BUG_ON(block->flags & BL_dirty);
 
 		if (!(block->flags & (BL_mapped|BL_hole))) {
-			ret = map_block(mapping, block, off, 0);
+			ret = map_block(mapping, block, off, MAP_BLOCK_READ);
 			if (ret)
 				goto out_unlock;
 		}
@@ -2340,6 +2316,12 @@ int fsblock_write_page(struct page *page
 		struct fsblock *b;
 
 		for_each_block(block, b) {
+
+			if ((b->flags & (BL_delay|BL_dirty)) == (BL_delay|BL_dirty)) {
+				spin_unlock_block_irq(b);
+				ret = map_block(mapping, b, off, MAP_BLOCK_ALLOCATE);
+				spin_lock_block_irq(b);
+			}
 			nr += block_write_helper(page, b);
 
 			off += size;
@@ -2376,6 +2358,11 @@ int fsblock_write_page(struct page *page
 		unlock_page(page);
 
 	} else if (fsblock_midpage(block)) {
+		if ((block->flags & (BL_delay|BL_dirty)) == (BL_delay|BL_dirty)) {
+			spin_unlock_block_irq(block);
+			ret = map_block(mapping, block, off, MAP_BLOCK_ALLOCATE);
+			spin_lock_block_irq(block);
+		}
 		if (block_write_helper(page, block)) {
 			FSB_BUG_ON(PageWriteback(page));
 			set_page_writeback(page);
@@ -2511,7 +2498,7 @@ static int block_dirty_helper(struct pag
 		 */
 		__set_page_dirty_noblocks_nowarn(page);
 		return 0;
-	} else if (block->flags & BL_uptodate) {
+	} else if (block->flags & (BL_uptodate|BL_delay|BL_unwritten)) {
 		return 0;
 	} else {
 		if (from <= offset && to >= offset+size)
@@ -2542,7 +2529,7 @@ static int fsblock_write_begin_super(str
 		return ret;
 
 	if (!(block->flags & BL_mapped)) {
-		ret = map_block(mapping, block, pos & ~(size-1), 1);
+		ret = map_block(mapping, block, pos & ~(size-1), MAP_BLOCK_RESERVE);
 		if (ret)
 			goto out_unlock;
 	}
@@ -2637,7 +2624,7 @@ int fsblock_write_begin(struct file *fil
 		for_each_block(block, b) {
 			if (off < to && off + size > from) {
 				if (!(b->flags & BL_mapped)) {
-					ret = map_block(mapping, b, pos+off, 1);
+					ret = map_block(mapping, b, pos+off, MAP_BLOCK_RESERVE);
 					if (ret)
 						goto out_zero_new;
 				}
@@ -2684,7 +2671,7 @@ int fsblock_write_begin(struct file *fil
 		 */
 		if (!(block->flags & BL_mapped)) {
 			spin_unlock_block_irq(block);
-			ret = map_block(mapping, block, pos, 1);
+			ret = map_block(mapping, block, pos, MAP_BLOCK_RESERVE);
 			if (ret)
 				goto out_zero_new;
 			spin_lock_block_irq(block);
@@ -3111,7 +3098,12 @@ static void invalidate_block(struct fsbl
 	FSB_BUG_ON(block->flags & BL_locked);
 	FSB_BUG_ON(!block->page->mapping);
 
-//	lock_block(block); XXX: why lock?
+#if 0
+	__block_get(block);
+	spin_unlock_block_irq(block);
+	lock_block(block); /* XXX: why lock? For XFS */
+	spin_lock_block_irq(block);
+#endif
 	/*
 	 * XXX
 	 * FSB_BUG_ON(block->flags & BL_new);
@@ -3121,9 +3113,14 @@ static void invalidate_block(struct fsbl
 	clear_block_dirty(block);
 	block->flags &= ~BL_new;
 	/* Don't clear uptodate because if the block essentially turns into a hole and remains uptodate */
-	block->flags &= ~(BL_mapped|BL_hole);
+	block->flags &= ~(BL_mapped|BL_hole|BL_delay|BL_unwritten);
 	block->block_nr = -1;
-//	unlock_block(block);
+#if 0
+	spin_unlock_block_irq(block);
+	unlock_block(block);
+	spin_lock_block_irq(block);
+	block->count--;
+#endif
 	/* XXX: if metadata, then have an fs-private release? */
 }
 
@@ -3549,10 +3546,11 @@ static void fbd_del_dirty_block(struct f
 	spin_unlock(&fbd->lock);
 }
 
-static void clear_block_dirty(struct fsblock *block)
+void clear_block_dirty(struct fsblock *block)
 {
 	struct fsblock_bd *fbd;
 
+	FSB_BUG_ON(!spin_is_locked_block(block));
 	if (!(block->flags & BL_dirty))
 		return;
 
@@ -3563,10 +3561,11 @@ static void clear_block_dirty(struct fsb
 		fbd_del_dirty_block(fbd, block);
 }
 
-static int test_and_set_block_dirty(struct fsblock *block)
+int test_and_set_block_dirty(struct fsblock *block)
 {
 	struct fsblock_bd *fbd;
 
+	FSB_BUG_ON(!spin_is_locked_block(block));
 	if (block->flags & BL_dirty)
 		return 1;
 
Index: linux-2.6/fs/xfs/linux-2.6/xfs_aops.h
===================================================================
--- linux-2.6.orig/fs/xfs/linux-2.6/xfs_aops.h
+++ linux-2.6/fs/xfs/linux-2.6/xfs_aops.h
@@ -33,14 +33,18 @@ typedef struct xfs_ioend {
 	int			io_error;	/* I/O error code */
 	atomic_t		io_remaining;	/* hold count */
 	struct inode		*io_inode;	/* file being written to */
-	struct buffer_head	*io_buffer_head;/* buffer linked list head */
-	struct buffer_head	*io_buffer_tail;/* buffer linked list tail */
+	struct fsblock		*io_fsb_head;	/* fsb linked list head */
+	struct fsblock		*io_fsb_tail;	/* fsb linked list tail */
 	size_t			io_size;	/* size of the extent */
 	xfs_off_t		io_offset;	/* offset in the file */
 	struct work_struct	io_work;	/* xfsdatad work queue */
 } xfs_ioend_t;
 
 extern const struct address_space_operations xfs_address_space_operations;
-extern int xfs_get_blocks(struct inode *, sector_t, struct buffer_head *, int);
+extern int xfs_get_blocks(
+	struct address_space	*mapping,
+	struct fsblock		*fsb_result,
+	loff_t			pos,
+	int			create);
 
 #endif /* __XFS_AOPS_H__ */
Index: linux-2.6/include/linux/fsblock.h
===================================================================
--- linux-2.6.orig/include/linux/fsblock.h
+++ linux-2.6/include/linux/fsblock.h
@@ -15,6 +15,10 @@
 
 #include <linux/kallsyms.h>
 #define MIN_SECTOR_SHIFT	9 /* 512 bytes */
+#define MIN_SECTOR_SIZE		(1UL<<MIN_SECTOR_SHIFT)
+#ifndef MAX_BUF_PER_PAGE
+#define MAX_BUF_PER_PAGE (PAGE_CACHE_SIZE / MIN_SECTOR_SIZE)
+#endif
 
 #define BL_bits_mask	0x000f
 
@@ -39,6 +43,8 @@
 #endif
 
 #define BL_dirty_acct	0x10000
+#define BL_unwritten	0x20000
+#define BL_delay	0x40000
 
 #ifndef FSB_DEBUG
 static inline void assert_block(struct fsblock *block)
@@ -48,6 +54,11 @@ static inline void assert_block(struct f
 void assert_block(struct fsblock *block);
 #endif
 
+
+#define MAP_BLOCK_READ		0
+#define MAP_BLOCK_RESERVE	1
+#define MAP_BLOCK_ALLOCATE	2
+
 /*
  * XXX: should distinguish data buffer and metadata buffer. data buffer
  * attachment (or dirtyment?) could cause the page to *also* be added to
@@ -236,9 +247,10 @@ static inline void clear_page_blocks(str
 static inline void map_fsblock(struct fsblock *block, sector_t blocknr)
 {
 //	FSB_BUG_ON(!spin_is_locked_block(block));
-	FSB_BUG_ON(block->flags & BL_mapped);
+//XXX	FSB_BUG_ON(block->flags & BL_mapped); XFS
 	block->block_nr = blocknr;
 	block->flags |= BL_mapped;
+	block->flags &= ~(BL_delay|BL_unwritten);
 #ifdef FSB_DEBUG
 	/* XXX: test for inside bdev? */
 	if (block->flags & BL_metadata) {
@@ -378,6 +390,7 @@ static inline struct fsblock_meta *sb_mb
 
 void mbforget(struct fsblock_meta *mblock);
 
+int create_unmapped_blocks(struct page *page, gfp_t gfp_flags, unsigned int size, unsigned int flags);
 void mark_mblock_uptodate(struct fsblock_meta *mblock);
 int mark_mblock_dirty(struct fsblock_meta *mblock);
 int mark_mblock_dirty_inode(struct fsblock_meta *mblock, struct inode *inode);
@@ -522,6 +535,38 @@ int trylock_block(struct fsblock *block)
 void lock_block(struct fsblock *block);
 void unlock_block(struct fsblock *block);
 
+#ifdef BDFLUSH_FLUSHING
+void clear_block_dirty(struct fsblock *block);
+
+int test_and_set_block_dirty(struct fsblock *block);
+
+static inline void set_block_dirty(struct fsblock *block)
+{
+	test_and_set_block_dirty(block);
+}
+#else
+static inline void clear_block_dirty(struct fsblock *block)
+{
+	FSB_BUG_ON(!spin_is_locked_block(block));
+	block->flags &= ~BL_dirty;
+}
+
+static inline int test_and_set_block_dirty(struct fsblock *block)
+{
+	FSB_BUG_ON(!spin_is_locked_block(block));
+	if (block->flags & BL_dirty)
+		return 1;
+	block->flags |= BL_dirty;
+	return 0;
+}
+
+static inline void set_block_dirty(struct fsblock *block)
+{
+	FSB_BUG_ON(!spin_is_locked_block(block));
+	block->flags |= BL_dirty;
+}
+#endif
+
 sector_t fsblock_bmap(struct address_space *mapping, sector_t block, map_block_fn *insert_mapping);
 
 int fsblock_read_page(struct page *page, map_block_fn *insert_mapping);
@@ -561,5 +606,6 @@ static inline void writeback_blockdevs_b
 #endif
 
 void fsblock_init(void);
+void fsblock_end_io(struct fsblock *block, int uptodate);
 
 #endif
Index: linux-2.6/fs/xfs/linux-2.6/xfs_buf.c
===================================================================
--- linux-2.6.orig/fs/xfs/linux-2.6/xfs_buf.c
+++ linux-2.6/fs/xfs/linux-2.6/xfs_buf.c
@@ -1501,7 +1501,6 @@ xfs_mapping_buftarg(
 	struct inode		*inode;
 	struct address_space	*mapping;
 	static const struct address_space_operations mapping_aops = {
-		.sync_page = block_sync_page,
 		.migratepage = fail_migrate_page,
 	};
 
Index: linux-2.6/fs/xfs/linux-2.6/xfs_file.c
===================================================================
--- linux-2.6.orig/fs/xfs/linux-2.6/xfs_file.c
+++ linux-2.6/fs/xfs/linux-2.6/xfs_file.c
@@ -429,7 +429,7 @@ xfs_vm_page_mkwrite(
 	struct vm_area_struct	*vma,
 	struct page		*page)
 {
-	return block_page_mkwrite(vma, page, xfs_get_blocks);
+	return fsblock_page_mkwrite(vma, page, xfs_get_blocks);
 }
 
 const struct file_operations xfs_file_operations = {
Index: linux-2.6/fs/xfs/linux-2.6/xfs_iops.c
===================================================================
--- linux-2.6.orig/fs/xfs/linux-2.6/xfs_iops.c
+++ linux-2.6/fs/xfs/linux-2.6/xfs_iops.c
@@ -614,8 +614,7 @@ xfs_vn_truncate(
 	struct inode	*inode)
 {
 	int	error;
-	error = block_truncate_page(inode->i_mapping, inode->i_size,
-							xfs_get_blocks);
+	error = fsblock_truncate_page(inode->i_mapping, inode->i_size);
 	WARN_ON(error);
 }
 
Index: linux-2.6/fs/ext2/inode.c
===================================================================
--- linux-2.6.orig/fs/ext2/inode.c
+++ linux-2.6/fs/ext2/inode.c
@@ -591,6 +591,8 @@ static int ext2_get_blocks(struct inode 
 	int count = 0;
 	ext2_fsblk_t first_block = 0;
 
+	FSB_BUG_ON(create == MAP_BLOCK_ALLOCATE);
+
 	*flags = 0;
 
 	depth = ext2_block_to_path(inode, blocknr, offsets,&blocks_to_boundary);
@@ -724,16 +726,17 @@ static int ext2_map_extent(struct addres
 }
 
 static int ext2_map_block(struct address_space *mapping,
-			struct fsblock *block, loff_t pos, int create)
+			struct fsblock *block, loff_t pos, int mode)
 {
 	FSB_BUG_ON(block->flags & BL_mapped);
+	FSB_BUG_ON(mode == MAP_BLOCK_ALLOCATE);
 
-	return fsb_ext_map_fsblock(mapping, pos, block, create, &EXT2_I(mapping->host)->fsb_ext_root, ext2_map_extent);
+	return fsb_ext_map_fsblock(mapping, pos, block, mode, &EXT2_I(mapping->host)->fsb_ext_root, ext2_map_extent);
 }
 #else
 
 static int ext2_map_block(struct address_space *mapping,
-			struct fsblock *b, loff_t pos, int create)
+			struct fsblock *b, loff_t pos, int mode)
 {
 	struct inode *inode = mapping->host;
 	sector_t blocknr;
@@ -743,10 +746,11 @@ static int ext2_map_block(struct address
 	int ret;
 
 	FSB_BUG_ON(b->flags & BL_mapped);
+	FSB_BUG_ON(mode == MAP_BLOCK_ALLOCATE);
 
 	blocknr = pos >> inode->i_blkbits;
 
-	ret = ext2_get_blocks(inode, blocknr, 1, create, &offset, &block, &size, &flags);
+	ret = ext2_get_blocks(inode, blocknr, 1, mode, &offset, &block, &size, &flags);
 	if (ret > 0) {
 		ret = 0;
 	}
Index: linux-2.6/fs/fsb_extentmap.c
===================================================================
--- linux-2.6.orig/fs/fsb_extentmap.c
+++ linux-2.6/fs/fsb_extentmap.c
@@ -99,7 +99,7 @@ static int fsb_ext_merge_before(struct f
 }
 
 int fsb_ext_map_fsblock(struct address_space *mapping, loff_t off,
-			struct fsblock *fsblock, int create,
+			struct fsblock *fsblock, int mode,
 			struct fsb_ext_root *root, map_fsb_extent_fn mapfn)
 {
 	struct inode *inode = mapping->host;
@@ -113,7 +113,7 @@ int fsb_ext_map_fsblock(struct address_s
 	if (!ext)
 		goto get_new;
 
-	if (ext->flags & FE_mapped || (ext->flags & FE_hole && !create)) {
+	if (ext->flags & FE_mapped || (ext->flags & FE_hole && mode == MAP_BLOCK_READ)) {
 		spin_lock_block_irq(fsblock);
 		if (ext->flags & FE_mapped) {
 			blocknr = ext->block + (offset - ext->offset);
@@ -147,7 +147,7 @@ get_new:
 			return -ENOMEM;
 		}
 
-		ret = mapfn(mapping, off, create, &new->offset, &new->block,
+		ret = mapfn(mapping, off, mode, &new->offset, &new->block,
 						&new->size, &new->flags);
 		if (ret) {
 			kmem_cache_free(extent_cache, split);
@@ -268,7 +268,7 @@ try_next:
 				fsblock->flags |= BL_new;
 		} else {
 			FSB_BUG_ON(!(new->flags & FE_hole));
-			FSB_BUG_ON(create);
+			FSB_BUG_ON(mode != MAP_BLOCK_READ);
 			fsblock->flags |= BL_hole;
 		}
 		spin_unlock_block_irq(fsblock);
Index: linux-2.6/include/linux/fsblock_types.h
===================================================================
--- linux-2.6.orig/include/linux/fsblock_types.h
+++ linux-2.6/include/linux/fsblock_types.h
@@ -36,7 +36,7 @@
  */
 //#define VMAP_CACHE	1
 
-#define BDFLUSH_FLUSHING 1
+//#define BDFLUSH_FLUSHING 1
 
 struct address_space;
 
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html