From: Dave Chinner <dchinner@xxxxxxxxxx> Add support for initialising and doing IO on multi-segment compound buffers. Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx> --- fs/xfs/xfs_buf.c | 317 ++++++++++++++++++++++++++++++++++++++---------------- fs/xfs/xfs_buf.h | 7 +- 2 files changed, 228 insertions(+), 96 deletions(-) diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 2ca9086..c533597 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -166,13 +166,15 @@ xfs_buf_stale( } struct xfs_buf * -xfs_buf_alloc( +xfs_buf_alloc_irec( struct xfs_buftarg *target, - xfs_daddr_t blkno, - size_t numblks, + struct xfs_bmbt_irec *map, + int nmaps, xfs_buf_flags_t flags) { + xfs_daddr_t blkno; struct xfs_buf *bp; + int i; bp = kmem_zone_alloc(xfs_buf_zone, xb_to_km(flags)); if (unlikely(!bp)) @@ -193,21 +195,45 @@ xfs_buf_alloc( sema_init(&bp->b_sema, 0); /* held, no waiters */ XB_SET_OWNER(bp); bp->b_target = target; - bp->b_file_offset = blkno << BBSHIFT; + bp->b_flags = flags; + + /* initialise the buffer IO vector array appropriately */ + if (nmaps <= XB_VECS) { + bp->b_vec = &bp->b_vec_array[0]; + } else { + bp->b_vec = kmem_alloc(nmaps * sizeof(*bp->b_vec), + xb_to_km(flags)); + if (!bp->b_vec) { + kmem_zone_free(xfs_buf_zone, bp); + return NULL; + } + } + bp->b_vec_count = nmaps; + bp->b_buffer_length = 0; + + if (map[0].br_state == XFS_EXT_DADDR) + blkno = map[0].br_startblock; + else + blkno = XFS_FSB_TO_DADDR(target->bt_mount, map[0].br_startblock); + bp->b_file_offset = BBTOB(blkno); + + for (i = 0; i < nmaps; i++) { + if (map[0].br_state == XFS_EXT_DADDR) { + bp->b_vec[i].bv_len += BBTOB(map[i].br_blockcount); + } else { + bp->b_vec[i].bv_len += XFS_FSB_TO_B(target->bt_mount, + map[i].br_blockcount); + } + bp->b_buffer_length += bp->b_vec[i].bv_len; + bp->b_vec[i].bv_bn = XFS_BUF_DADDR_NULL; + } + /* * Set buffer_length and count_desired to the same value initially. * I/O routines should use count_desired, which will be the same in * most cases but may be reset (e.g. XFS recovery). */ - bp->b_buffer_length = bp->b_count_desired = numblks << BBSHIFT; - bp->b_flags = flags; - - /* XXX: we have the block number. Why don't we just set it here? */ - /* initialise the buffer IO vector array appropriately */ - bp->b_vec_count = 1; - bp->b_vec = &bp->b_vec_array[0]; - bp->b_vec[0].bv_bn = XFS_BUF_DADDR_NULL; - bp->b_vec[0].bv_len = bp->b_buffer_length; + bp->b_count_desired = bp->b_buffer_length; atomic_set(&bp->b_pin_count, 0); init_waitqueue_head(&bp->b_waiters); @@ -218,6 +244,22 @@ xfs_buf_alloc( return bp; } +struct xfs_buf * +xfs_buf_alloc( + struct xfs_buftarg *target, + xfs_daddr_t blkno, + size_t numblks, + xfs_buf_flags_t flags) +{ + struct xfs_bmbt_irec map = { + .br_startblock = blkno, + .br_blockcount = numblks, + .br_state = XFS_EXT_DADDR, + }; + + return xfs_buf_alloc_irec(target, &map, 1, flags); +} + /* * Allocate a page array capable of holding a specified number * of pages, and point the page buf at it. @@ -287,6 +329,10 @@ xfs_buf_free( } } else if (bp->b_flags & _XBF_KMEM) kmem_free(bp->b_addr); + + if (bp->b_vec_count > XB_VECS) + kmem_free(bp->b_vec); + _xfs_buf_free_pages(bp); kmem_zone_free(xfs_buf_zone, bp); } @@ -429,11 +475,11 @@ _xfs_buf_map_pages( * a given range of an inode. The buffer is returned * locked. No I/O is implied by this call. */ -xfs_buf_t * -_xfs_buf_find( +static xfs_buf_t * +xfs_buf_find_irec( xfs_buftarg_t *btp, - xfs_daddr_t blkno, - size_t numblks, + struct xfs_bmbt_irec *map, + int nmaps, xfs_buf_flags_t flags, xfs_buf_t *new_bp) { @@ -442,14 +488,37 @@ _xfs_buf_find( struct xfs_perag *pag; struct rb_node **rbp; struct rb_node *parent; - xfs_buf_t *bp; + struct xfs_buf *bp; + xfs_daddr_t blkno; + int i; - offset = blkno << BBSHIFT; - numbytes = numblks << BBSHIFT; + /* + * Check for IOs smaller than the sector size or not sector aligned, + * calculate the size of the buffer and initialise variables that ggc + * thinks get unused without initialisation because it can't grok the + * fact they are initialised within the loop body. + */ + numbytes = 0; + offset = 0; + blkno = XFS_BUF_DADDR_NULL; + for (i = 0; i < nmaps; i++) { + size_t len; + + if (map[0].br_state == XFS_EXT_DADDR) { + len = BBTOB(map[i].br_blockcount); + blkno = map[i].br_startblock; + } else { + len = XFS_FSB_TO_B(btp->bt_mount, map[i].br_blockcount); + blkno = XFS_FSB_TO_DADDR(btp->bt_mount, + map[i].br_startblock); + } + ASSERT(!(len < (1 << btp->bt_sshift))); + ASSERT(!(BBTOB(blkno) & (xfs_off_t)btp->bt_smask)); + numbytes += len; + if (i == 0) + offset = BBTOB(blkno); - /* Check for IOs smaller than the sector size / not sector aligned */ - ASSERT(!(numbytes < (1 << btp->bt_sshift))); - ASSERT(!(offset & (xfs_off_t)btp->bt_smask)); + } /* get tree root */ pag = xfs_perag_get(btp->bt_mount, @@ -530,27 +599,6 @@ found: return bp; } -/* - * Assembles a buffer covering the specified range. The code is optimised for - * cache hits, as metadata intensive workloads will see 3 orders of magnitude - * more hits than misses. - */ -struct xfs_buf * -xfs_buf_get( - xfs_buftarg_t *target, - xfs_daddr_t blkno, - size_t numblks, - xfs_buf_flags_t flags) -{ - struct xfs_bmbt_irec map = { - .br_startblock = blkno, - .br_blockcount = numblks, - .br_state = XFS_EXT_DADDR, - }; - - return xfs_buf_get_irec(target, &map, 1, flags); -} - STATIC int _xfs_buf_read( xfs_buf_t *bp, @@ -571,7 +619,9 @@ _xfs_buf_read( } /* - * XXX: only supports a single map for now + * Assembles a buffer covering the specified range. The code is optimised for + * cache hits, as metadata intensive workloads will see 3 orders of magnitude + * more hits than misses. */ struct xfs_buf * xfs_buf_get_irec( @@ -580,31 +630,20 @@ xfs_buf_get_irec( int nmaps, xfs_buf_flags_t flags) { - xfs_daddr_t blkno; - size_t numblks; struct xfs_buf *bp; struct xfs_buf *new_bp; int error = 0; + int i; - ASSERT_ALWAYS(nmaps == 1); - - if (map->br_state == XFS_EXT_DADDR) { - blkno = map->br_startblock; - numblks = map->br_blockcount; - } else { - blkno = XFS_FSB_TO_DADDR(target->bt_mount, map->br_startblock); - numblks = XFS_FSB_TO_BB(target->bt_mount, map->br_blockcount); - } - - bp = _xfs_buf_find(target, blkno, numblks, flags, NULL); + bp = xfs_buf_find_irec(target, map, nmaps, flags, NULL); if (likely(bp)) goto found; - new_bp = xfs_buf_alloc(target, blkno, numblks, flags); + new_bp = xfs_buf_alloc_irec(target, map, nmaps, flags); if (unlikely(!new_bp)) return NULL; - bp = _xfs_buf_find(target, blkno, numblks, flags, new_bp); + bp = xfs_buf_find_irec(target, map, nmaps, flags, new_bp); if (!bp) { kmem_zone_free(xfs_buf_zone, new_bp); return NULL; @@ -618,11 +657,17 @@ xfs_buf_get_irec( kmem_zone_free(xfs_buf_zone, new_bp); /* - * Now we have a workable buffer, fill in the block number so - * that we can do IO on it. + * Now we have a workable buffer, fill in the block vector addresses + * so that we can do IO on it. The lengths have already been filled in + * by xfs_buf_alloc_irec(). */ - bp->b_vec[0].bv_bn = blkno; - bp->b_vec[0].bv_len = bp->b_buffer_length; + for (i = 0; i < nmaps; i++) { + if (map[0].br_state == XFS_EXT_DADDR) + bp->b_vec[i].bv_bn = map[i].br_startblock; + else + bp->b_vec[i].bv_bn = XFS_FSB_TO_DADDR(target->bt_mount, + map[i].br_startblock); + } bp->b_count_desired = bp->b_buffer_length; found: @@ -699,6 +744,39 @@ xfs_buf_readahead_irec( } xfs_buf_t * +xfs_buf_find( + xfs_buftarg_t *target, + xfs_daddr_t blkno, + size_t numblks, + xfs_buf_flags_t flags) +{ + struct xfs_bmbt_irec map = { + .br_startblock = blkno, + .br_blockcount = numblks, + .br_state = XFS_EXT_DADDR, + }; + + return xfs_buf_find_irec(target, &map, 1, flags, NULL); +} + +struct xfs_buf * +xfs_buf_get( + xfs_buftarg_t *target, + xfs_daddr_t blkno, + size_t numblks, + xfs_buf_flags_t flags) +{ + struct xfs_bmbt_irec map = { + .br_startblock = blkno, + .br_blockcount = numblks, + .br_state = XFS_EXT_DADDR, + }; + + return xfs_buf_get_irec(target, &map, 1, flags); +} + + +xfs_buf_t * xfs_buf_read( xfs_buftarg_t *target, xfs_daddr_t blkno, @@ -773,6 +851,8 @@ xfs_buf_set_empty( struct xfs_buf *bp, size_t numblks) { + ASSERT(bp->b_vec_count == 1); + if (bp->b_pages) _xfs_buf_free_pages(bp); @@ -780,7 +860,7 @@ xfs_buf_set_empty( bp->b_page_count = 0; bp->b_addr = NULL; bp->b_file_offset = 0; - bp->b_buffer_length = bp->b_count_desired = numblks << BBSHIFT; + bp->b_buffer_length = bp->b_count_desired = BBTOB(numblks); bp->b_vec[0].bv_bn = XFS_BUF_DADDR_NULL; bp->b_vec[0].bv_len = bp->b_buffer_length; bp->b_flags &= ~XBF_MAPPED; @@ -849,12 +929,16 @@ xfs_buf_get_uncached( unsigned long page_count; int error, i; xfs_buf_t *bp; + struct xfs_bmbt_irec map = { + .br_blockcount = numblks, + .br_state = XFS_EXT_DADDR, + }; - bp = xfs_buf_alloc(target, 0, numblks, 0); + bp = xfs_buf_alloc_irec(target, &map, 1, 0); if (unlikely(bp == NULL)) goto fail; - page_count = PAGE_ALIGN(numblks << BBSHIFT) >> PAGE_SHIFT; + page_count = PAGE_ALIGN(BBTOB(numblks)) >> PAGE_SHIFT; error = _xfs_buf_get_pages(bp, page_count, 0); if (error) goto fail_free_buf; @@ -1248,36 +1332,38 @@ xfs_buf_bio_end_io( bio_put(bio); } -STATIC void -_xfs_buf_ioapply( - xfs_buf_t *bp) +static void +_xfs_buf_ioapply_vec( + struct xfs_buf *bp, + int vec, + int *buf_offset, + int *count, + int rw) { - int rw, map_i, total_nr_pages, nr_pages; + int map_i; + int total_nr_pages = bp->b_page_count; + int nr_pages; struct bio *bio; - int offset = bp->b_offset; - int size = bp->b_count_desired; - sector_t sector = bp->b_vec[0].bv_bn; + sector_t sector = bp->b_vec[vec].bv_bn; + int size; + int offset; total_nr_pages = bp->b_page_count; - map_i = 0; - if (bp->b_flags & XBF_WRITE) { - if (bp->b_flags & XBF_SYNCIO) - rw = WRITE_SYNC; - else - rw = WRITE; - if (bp->b_flags & XBF_FUA) - rw |= REQ_FUA; - if (bp->b_flags & XBF_FLUSH) - rw |= REQ_FLUSH; - } else if (bp->b_flags & XBF_READ_AHEAD) { - rw = READA; - } else { - rw = READ; + /* skip the pages in the buffer before the start offset */ + map_i = 0; + offset = *buf_offset; + while (offset >= PAGE_SIZE) { + map_i++; + offset -= PAGE_SIZE; } - /* we only use the buffer cache for meta-data */ - rw |= REQ_META; + /* + * Limit the IO size to the length of the current vector, and update the + * remaining IO count for the next time around. + */ + size = min_t(int, bp->b_vec[vec].bv_len, *count); + *count -= size; next_chunk: atomic_inc(&bp->b_io_remaining); @@ -1291,7 +1377,6 @@ next_chunk: bio->bi_end_io = xfs_buf_bio_end_io; bio->bi_private = bp; - for (; size && nr_pages; nr_pages--, map_i++) { int rbytes, nbytes = PAGE_SIZE - offset; @@ -1303,7 +1388,7 @@ next_chunk: break; offset = 0; - sector += nbytes >> BBSHIFT; + sector += BTOBB(nbytes); size -= nbytes; total_nr_pages--; } @@ -1320,8 +1405,56 @@ next_chunk: xfs_buf_ioerror(bp, EIO); bio_put(bio); } + + /* update the start offset for the next IO */ + *buf_offset += bp->b_vec[vec].bv_len; +} + +STATIC void +_xfs_buf_ioapply( + struct xfs_buf *bp) +{ + int rw; + int offset; + int size; + int i; + + if (bp->b_flags & XBF_WRITE) { + if (bp->b_flags & XBF_SYNCIO) + rw = WRITE_SYNC; + else + rw = WRITE; + if (bp->b_flags & XBF_FUA) + rw |= REQ_FUA; + if (bp->b_flags & XBF_FLUSH) + rw |= REQ_FLUSH; + } else if (bp->b_flags & XBF_READ_AHEAD) { + rw = READA; + } else { + rw = READ; + } + + /* we only use the buffer cache for meta-data */ + rw |= REQ_META; + + /* + * Walk all the vectors issuing IO on them. Set up the initial offset + * into the buffer and the desired IO size before we start - + * _xfs_buf_ioapply_vec() will modify them appropriately for each + * subsequent call. + */ + offset = bp->b_offset; + size = bp->b_count_desired; + for (i = 0; i < bp->b_vec_count; i++) { + _xfs_buf_ioapply_vec(bp, i, &offset, &size, rw); + if (bp->b_error) + break; + if (size <= 0) + break; /* all done */ + } } + int xfs_buf_iorequest( xfs_buf_t *bp) diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index e3cbd73..71c9665 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -177,11 +177,10 @@ typedef struct xfs_buf { /* Finding and Reading Buffers */ -struct xfs_buf *_xfs_buf_find(struct xfs_buftarg *target, xfs_daddr_t blkno, - size_t numblks, xfs_buf_flags_t flags, - struct xfs_buf *new_bp); +struct xfs_buf *xfs_buf_find(struct xfs_buftarg *target, xfs_daddr_t blkno, + size_t numblks, xfs_buf_flags_t flags); #define xfs_incore(buftarg,blkno,len,lockit) \ - _xfs_buf_find(buftarg, blkno ,len, lockit, NULL) + xfs_buf_find(buftarg, blkno, len, lockit) struct xfs_buf *xfs_buf_get(struct xfs_buftarg *target, xfs_daddr_t blkno, size_t numblks, xfs_buf_flags_t flags); -- 1.7.5.4 _______________________________________________ xfs mailing list xfs@xxxxxxxxxxx http://oss.sgi.com/mailman/listinfo/xfs