[PATCH 08/12] xfs: support multiple irec maps in buffer code

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Dave Chinner <dchinner@xxxxxxxxxx>

Add support for initialising and doing IO on multi-segment compound
buffers.

Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx>
---
 fs/xfs/xfs_buf.c |  317 ++++++++++++++++++++++++++++++++++++++----------------
 fs/xfs/xfs_buf.h |    7 +-
 2 files changed, 228 insertions(+), 96 deletions(-)

diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 2ca9086..c533597 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -166,13 +166,15 @@ xfs_buf_stale(
 }
 
 struct xfs_buf *
-xfs_buf_alloc(
+xfs_buf_alloc_irec(
 	struct xfs_buftarg	*target,
-	xfs_daddr_t		blkno,
-	size_t			numblks,
+	struct xfs_bmbt_irec	*map,
+	int			nmaps,
 	xfs_buf_flags_t		flags)
 {
+	xfs_daddr_t		blkno;
 	struct xfs_buf		*bp;
+	int			i;
 
 	bp = kmem_zone_alloc(xfs_buf_zone, xb_to_km(flags));
 	if (unlikely(!bp))
@@ -193,21 +195,45 @@ xfs_buf_alloc(
 	sema_init(&bp->b_sema, 0); /* held, no waiters */
 	XB_SET_OWNER(bp);
 	bp->b_target = target;
-	bp->b_file_offset = blkno << BBSHIFT;
+	bp->b_flags = flags;
+
+	/* initialise the buffer IO vector array appropriately */
+	if (nmaps <= XB_VECS) {
+		bp->b_vec = &bp->b_vec_array[0];
+	} else {
+		bp->b_vec = kmem_alloc(nmaps * sizeof(*bp->b_vec),
+					xb_to_km(flags));
+		if (!bp->b_vec) {
+			kmem_zone_free(xfs_buf_zone, bp);
+			return NULL;
+		}
+	}
+	bp->b_vec_count = nmaps;
+	bp->b_buffer_length = 0;
+
+	if (map[0].br_state == XFS_EXT_DADDR)
+		blkno = map[0].br_startblock;
+	else
+		blkno = XFS_FSB_TO_DADDR(target->bt_mount, map[0].br_startblock);
+	bp->b_file_offset = BBTOB(blkno);
+
+	for (i = 0; i < nmaps; i++) {
+		if (map[0].br_state == XFS_EXT_DADDR) {
+			bp->b_vec[i].bv_len += BBTOB(map[i].br_blockcount);
+		} else {
+			bp->b_vec[i].bv_len += XFS_FSB_TO_B(target->bt_mount,
+						 map[i].br_blockcount);
+		}
+		bp->b_buffer_length += bp->b_vec[i].bv_len;
+		bp->b_vec[i].bv_bn = XFS_BUF_DADDR_NULL;
+	}
+
 	/*
 	 * Set buffer_length and count_desired to the same value initially.
 	 * I/O routines should use count_desired, which will be the same in
 	 * most cases but may be reset (e.g. XFS recovery).
 	 */
-	bp->b_buffer_length = bp->b_count_desired = numblks << BBSHIFT;
-	bp->b_flags = flags;
-
-	/* XXX: we have the block number. Why don't we just set it here? */
-	/* initialise the buffer IO vector array appropriately */
-	bp->b_vec_count = 1;
-	bp->b_vec = &bp->b_vec_array[0];
-	bp->b_vec[0].bv_bn = XFS_BUF_DADDR_NULL;
-	bp->b_vec[0].bv_len = bp->b_buffer_length;
+	bp->b_count_desired = bp->b_buffer_length;
 
 	atomic_set(&bp->b_pin_count, 0);
 	init_waitqueue_head(&bp->b_waiters);
@@ -218,6 +244,22 @@ xfs_buf_alloc(
 	return bp;
 }
 
+struct xfs_buf *
+xfs_buf_alloc(
+	struct xfs_buftarg	*target,
+	xfs_daddr_t		blkno,
+	size_t			numblks,
+	xfs_buf_flags_t		flags)
+{
+	struct xfs_bmbt_irec	map = {
+		.br_startblock = blkno,
+		.br_blockcount = numblks,
+		.br_state = XFS_EXT_DADDR,
+	};
+
+	return xfs_buf_alloc_irec(target, &map, 1, flags);
+}
+
 /*
  *	Allocate a page array capable of holding a specified number
  *	of pages, and point the page buf at it.
@@ -287,6 +329,10 @@ xfs_buf_free(
 		}
 	} else if (bp->b_flags & _XBF_KMEM)
 		kmem_free(bp->b_addr);
+
+	if (bp->b_vec_count > XB_VECS)
+		kmem_free(bp->b_vec);
+
 	_xfs_buf_free_pages(bp);
 	kmem_zone_free(xfs_buf_zone, bp);
 }
@@ -429,11 +475,11 @@ _xfs_buf_map_pages(
  *	a given range of an inode.  The buffer is returned
  *	locked.	No I/O is implied by this call.
  */
-xfs_buf_t *
-_xfs_buf_find(
+static xfs_buf_t *
+xfs_buf_find_irec(
 	xfs_buftarg_t		*btp,
-	xfs_daddr_t		blkno,
-	size_t			numblks,
+	struct xfs_bmbt_irec	*map,
+	int			nmaps,
 	xfs_buf_flags_t		flags,
 	xfs_buf_t		*new_bp)
 {
@@ -442,14 +488,37 @@ _xfs_buf_find(
 	struct xfs_perag	*pag;
 	struct rb_node		**rbp;
 	struct rb_node		*parent;
-	xfs_buf_t		*bp;
+	struct xfs_buf		*bp;
+	xfs_daddr_t		blkno;
+	int			i;
 
-	offset = blkno << BBSHIFT;
-	numbytes = numblks << BBSHIFT;
+	/*
+	 * Check for IOs smaller than the sector size or not sector aligned,
+	 * calculate the size of the buffer and initialise variables that ggc
+	 * thinks get unused without initialisation because it can't grok the
+	 * fact they are initialised within the loop body.
+	 */
+	numbytes = 0;
+	offset = 0;
+	blkno = XFS_BUF_DADDR_NULL;
+	for (i = 0; i < nmaps; i++) {
+		size_t	len;
+
+		if (map[0].br_state == XFS_EXT_DADDR) {
+			len = BBTOB(map[i].br_blockcount);
+			blkno = map[i].br_startblock;
+		} else {
+			len = XFS_FSB_TO_B(btp->bt_mount, map[i].br_blockcount);
+			blkno = XFS_FSB_TO_DADDR(btp->bt_mount,
+							map[i].br_startblock);
+		}
+		ASSERT(!(len < (1 << btp->bt_sshift)));
+		ASSERT(!(BBTOB(blkno) & (xfs_off_t)btp->bt_smask));
+		numbytes += len;
+		if (i == 0)
+			offset = BBTOB(blkno);
 
-	/* Check for IOs smaller than the sector size / not sector aligned */
-	ASSERT(!(numbytes < (1 << btp->bt_sshift)));
-	ASSERT(!(offset & (xfs_off_t)btp->bt_smask));
+	}
 
 	/* get tree root */
 	pag = xfs_perag_get(btp->bt_mount,
@@ -530,27 +599,6 @@ found:
 	return bp;
 }
 
-/*
- * Assembles a buffer covering the specified range. The code is optimised for
- * cache hits, as metadata intensive workloads will see 3 orders of magnitude
- * more hits than misses.
- */
-struct xfs_buf *
-xfs_buf_get(
-	xfs_buftarg_t		*target,
-	xfs_daddr_t		blkno,
-	size_t			numblks,
-	xfs_buf_flags_t		flags)
-{
-	struct xfs_bmbt_irec	map = {
-		.br_startblock = blkno,
-		.br_blockcount = numblks,
-		.br_state = XFS_EXT_DADDR,
-	};
-
-	return xfs_buf_get_irec(target, &map, 1, flags);
-}
-
 STATIC int
 _xfs_buf_read(
 	xfs_buf_t		*bp,
@@ -571,7 +619,9 @@ _xfs_buf_read(
 }
 
 /*
- * XXX: only supports a single map for now
+ * Assembles a buffer covering the specified range. The code is optimised for
+ * cache hits, as metadata intensive workloads will see 3 orders of magnitude
+ * more hits than misses.
  */
 struct xfs_buf *
 xfs_buf_get_irec(
@@ -580,31 +630,20 @@ xfs_buf_get_irec(
 	int			nmaps,
 	xfs_buf_flags_t		flags)
 {
-	xfs_daddr_t		blkno;
-	size_t			numblks;
 	struct xfs_buf		*bp;
 	struct xfs_buf		*new_bp;
 	int			error = 0;
+	int			i;
 
-	ASSERT_ALWAYS(nmaps == 1);
-
-	if (map->br_state == XFS_EXT_DADDR) {
-		blkno = map->br_startblock;
-		numblks = map->br_blockcount;
-	} else {
-		blkno = XFS_FSB_TO_DADDR(target->bt_mount, map->br_startblock);
-		numblks = XFS_FSB_TO_BB(target->bt_mount, map->br_blockcount);
-	}
-
-	bp = _xfs_buf_find(target, blkno, numblks, flags, NULL);
+	bp = xfs_buf_find_irec(target, map, nmaps, flags, NULL);
 	if (likely(bp))
 		goto found;
 
-	new_bp = xfs_buf_alloc(target, blkno, numblks, flags);
+	new_bp = xfs_buf_alloc_irec(target, map, nmaps, flags);
 	if (unlikely(!new_bp))
 		return NULL;
 
-	bp = _xfs_buf_find(target, blkno, numblks, flags, new_bp);
+	bp = xfs_buf_find_irec(target, map, nmaps, flags, new_bp);
 	if (!bp) {
 		kmem_zone_free(xfs_buf_zone, new_bp);
 		return NULL;
@@ -618,11 +657,17 @@ xfs_buf_get_irec(
 		kmem_zone_free(xfs_buf_zone, new_bp);
 
 	/*
-	 * Now we have a workable buffer, fill in the block number so
-	 * that we can do IO on it.
+	 * Now we have a workable buffer, fill in the block vector addresses
+	 * so that we can do IO on it. The lengths have already been filled in
+	 * by xfs_buf_alloc_irec().
 	 */
-	bp->b_vec[0].bv_bn = blkno;
-	bp->b_vec[0].bv_len = bp->b_buffer_length;
+	for (i = 0; i < nmaps; i++) {
+		if (map[0].br_state == XFS_EXT_DADDR)
+			bp->b_vec[i].bv_bn = map[i].br_startblock;
+		else
+			bp->b_vec[i].bv_bn = XFS_FSB_TO_DADDR(target->bt_mount,
+							map[i].br_startblock);
+	}
 	bp->b_count_desired = bp->b_buffer_length;
 
 found:
@@ -699,6 +744,39 @@ xfs_buf_readahead_irec(
 }
 
 xfs_buf_t *
+xfs_buf_find(
+	xfs_buftarg_t		*target,
+	xfs_daddr_t		blkno,
+	size_t			numblks,
+	xfs_buf_flags_t		flags)
+{
+	struct xfs_bmbt_irec	map = {
+		.br_startblock = blkno,
+		.br_blockcount = numblks,
+		.br_state = XFS_EXT_DADDR,
+	};
+
+	return xfs_buf_find_irec(target, &map, 1, flags, NULL);
+}
+
+struct xfs_buf *
+xfs_buf_get(
+	xfs_buftarg_t		*target,
+	xfs_daddr_t		blkno,
+	size_t			numblks,
+	xfs_buf_flags_t		flags)
+{
+	struct xfs_bmbt_irec	map = {
+		.br_startblock = blkno,
+		.br_blockcount = numblks,
+		.br_state = XFS_EXT_DADDR,
+	};
+
+	return xfs_buf_get_irec(target, &map, 1, flags);
+}
+
+
+xfs_buf_t *
 xfs_buf_read(
 	xfs_buftarg_t		*target,
 	xfs_daddr_t		blkno,
@@ -773,6 +851,8 @@ xfs_buf_set_empty(
 	struct xfs_buf		*bp,
 	size_t			numblks)
 {
+	ASSERT(bp->b_vec_count == 1);
+
 	if (bp->b_pages)
 		_xfs_buf_free_pages(bp);
 
@@ -780,7 +860,7 @@ xfs_buf_set_empty(
 	bp->b_page_count = 0;
 	bp->b_addr = NULL;
 	bp->b_file_offset = 0;
-	bp->b_buffer_length = bp->b_count_desired = numblks << BBSHIFT;
+	bp->b_buffer_length = bp->b_count_desired = BBTOB(numblks);
 	bp->b_vec[0].bv_bn = XFS_BUF_DADDR_NULL;
 	bp->b_vec[0].bv_len = bp->b_buffer_length;
 	bp->b_flags &= ~XBF_MAPPED;
@@ -849,12 +929,16 @@ xfs_buf_get_uncached(
 	unsigned long		page_count;
 	int			error, i;
 	xfs_buf_t		*bp;
+	struct xfs_bmbt_irec	map = {
+		.br_blockcount = numblks,
+		.br_state = XFS_EXT_DADDR,
+	};
 
-	bp = xfs_buf_alloc(target, 0, numblks, 0);
+	bp = xfs_buf_alloc_irec(target, &map, 1, 0);
 	if (unlikely(bp == NULL))
 		goto fail;
 
-	page_count = PAGE_ALIGN(numblks << BBSHIFT) >> PAGE_SHIFT;
+	page_count = PAGE_ALIGN(BBTOB(numblks)) >> PAGE_SHIFT;
 	error = _xfs_buf_get_pages(bp, page_count, 0);
 	if (error)
 		goto fail_free_buf;
@@ -1248,36 +1332,38 @@ xfs_buf_bio_end_io(
 	bio_put(bio);
 }
 
-STATIC void
-_xfs_buf_ioapply(
-	xfs_buf_t		*bp)
+static void
+_xfs_buf_ioapply_vec(
+	struct xfs_buf	*bp,
+	int		vec,
+	int		*buf_offset,
+	int		*count,
+	int		rw)
 {
-	int			rw, map_i, total_nr_pages, nr_pages;
+	int			map_i;
+	int			total_nr_pages = bp->b_page_count;
+	int			nr_pages;
 	struct bio		*bio;
-	int			offset = bp->b_offset;
-	int			size = bp->b_count_desired;
-	sector_t		sector = bp->b_vec[0].bv_bn;
+	sector_t		sector =  bp->b_vec[vec].bv_bn;
+	int			size;
+	int			offset;
 
 	total_nr_pages = bp->b_page_count;
-	map_i = 0;
 
-	if (bp->b_flags & XBF_WRITE) {
-		if (bp->b_flags & XBF_SYNCIO)
-			rw = WRITE_SYNC;
-		else
-			rw = WRITE;
-		if (bp->b_flags & XBF_FUA)
-			rw |= REQ_FUA;
-		if (bp->b_flags & XBF_FLUSH)
-			rw |= REQ_FLUSH;
-	} else if (bp->b_flags & XBF_READ_AHEAD) {
-		rw = READA;
-	} else {
-		rw = READ;
+	/* skip the pages in the buffer before the start offset */
+	map_i = 0;
+	offset = *buf_offset;
+	while (offset >= PAGE_SIZE) {
+		map_i++;
+		offset -= PAGE_SIZE;
 	}
 
-	/* we only use the buffer cache for meta-data */
-	rw |= REQ_META;
+	/*
+	 * Limit the IO size to the length of the current vector, and update the
+	 * remaining IO count for the next time around.
+	 */
+	size = min_t(int, bp->b_vec[vec].bv_len, *count);
+	*count -= size;
 
 next_chunk:
 	atomic_inc(&bp->b_io_remaining);
@@ -1291,7 +1377,6 @@ next_chunk:
 	bio->bi_end_io = xfs_buf_bio_end_io;
 	bio->bi_private = bp;
 
-
 	for (; size && nr_pages; nr_pages--, map_i++) {
 		int	rbytes, nbytes = PAGE_SIZE - offset;
 
@@ -1303,7 +1388,7 @@ next_chunk:
 			break;
 
 		offset = 0;
-		sector += nbytes >> BBSHIFT;
+		sector += BTOBB(nbytes);
 		size -= nbytes;
 		total_nr_pages--;
 	}
@@ -1320,8 +1405,56 @@ next_chunk:
 		xfs_buf_ioerror(bp, EIO);
 		bio_put(bio);
 	}
+
+	/* update the start offset for the next IO */
+	*buf_offset += bp->b_vec[vec].bv_len;
+}
+
+STATIC void
+_xfs_buf_ioapply(
+	struct xfs_buf	*bp)
+{
+	int		rw;
+	int		offset;
+	int		size;
+	int		i;
+
+	if (bp->b_flags & XBF_WRITE) {
+		if (bp->b_flags & XBF_SYNCIO)
+			rw = WRITE_SYNC;
+		else
+			rw = WRITE;
+		if (bp->b_flags & XBF_FUA)
+			rw |= REQ_FUA;
+		if (bp->b_flags & XBF_FLUSH)
+			rw |= REQ_FLUSH;
+	} else if (bp->b_flags & XBF_READ_AHEAD) {
+		rw = READA;
+	} else {
+		rw = READ;
+	}
+
+	/* we only use the buffer cache for meta-data */
+	rw |= REQ_META;
+
+	/*
+	 * Walk all the vectors issuing IO on them. Set up the initial offset
+	 * into the buffer and the desired IO size before we start -
+	 * _xfs_buf_ioapply_vec() will modify them appropriately for each
+	 * subsequent call.
+	 */
+	offset = bp->b_offset;
+	size = bp->b_count_desired;
+	for (i = 0; i < bp->b_vec_count; i++) {
+		_xfs_buf_ioapply_vec(bp, i, &offset, &size, rw);
+		if (bp->b_error)
+			break;
+		if (size <= 0)
+			break;	/* all done */
+	}
 }
 
+
 int
 xfs_buf_iorequest(
 	xfs_buf_t		*bp)
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index e3cbd73..71c9665 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -177,11 +177,10 @@ typedef struct xfs_buf {
 
 
 /* Finding and Reading Buffers */
-struct xfs_buf *_xfs_buf_find(struct xfs_buftarg *target, xfs_daddr_t blkno,
-				size_t numblks, xfs_buf_flags_t flags,
-				struct xfs_buf *new_bp);
+struct xfs_buf *xfs_buf_find(struct xfs_buftarg *target, xfs_daddr_t blkno,
+				size_t numblks, xfs_buf_flags_t flags);
 #define xfs_incore(buftarg,blkno,len,lockit) \
-	_xfs_buf_find(buftarg, blkno ,len, lockit, NULL)
+	xfs_buf_find(buftarg, blkno, len, lockit)
 
 struct xfs_buf *xfs_buf_get(struct xfs_buftarg *target, xfs_daddr_t blkno,
 				size_t numblks, xfs_buf_flags_t flags);
-- 
1.7.5.4

_______________________________________________
xfs mailing list
xfs@xxxxxxxxxxx
http://oss.sgi.com/mailman/listinfo/xfs


[Index of Archives]     [Linux XFS Devel]     [Linux Filesystem Development]     [Filesystem Testing]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux