From: Darrick J. Wong <djwong@xxxxxxxxxx> Map the xfile pages directly into xfs_buf to reduce memory overhead. It's silly to use memory to stage changes to shmem pages for ephemeral btrees that don't care about transactionality. Signed-off-by: Darrick J. Wong <djwong@xxxxxxxxxx> --- fs/xfs/libxfs/xfs_btree_mem.h | 6 ++ fs/xfs/libxfs/xfs_rmap_btree.c | 1 fs/xfs/scrub/rcbag_btree.c | 1 fs/xfs/scrub/xfbtree.c | 23 +++++- fs/xfs/xfs_buf.c | 110 ++++++++++++++++++++++------- fs/xfs/xfs_buf.h | 16 ++++ fs/xfs/xfs_buf_xfile.c | 152 ++++++++++++++++++++++++++++++++++++++++ fs/xfs/xfs_buf_xfile.h | 11 +++ 8 files changed, 292 insertions(+), 28 deletions(-) diff --git a/fs/xfs/libxfs/xfs_btree_mem.h b/fs/xfs/libxfs/xfs_btree_mem.h index 1f961f3f55444..cfb30cb1aabc6 100644 --- a/fs/xfs/libxfs/xfs_btree_mem.h +++ b/fs/xfs/libxfs/xfs_btree_mem.h @@ -17,8 +17,14 @@ struct xfbtree_config { /* Owner of this btree. */ unsigned long long owner; + + /* XFBTREE_* flags */ + unsigned int flags; }; +/* buffers should be directly mapped from memory */ +#define XFBTREE_DIRECT_MAP (1U << 0) + #ifdef CONFIG_XFS_BTREE_IN_XFILE unsigned int xfs_btree_mem_head_nlevels(struct xfs_buf *head_bp); diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c index 23841ee6e2ff6..71d32f9fee14d 100644 --- a/fs/xfs/libxfs/xfs_rmap_btree.c +++ b/fs/xfs/libxfs/xfs_rmap_btree.c @@ -672,6 +672,7 @@ xfs_rmapbt_mem_create( .btree_ops = &xfs_rmapbt_mem_ops, .target = target, .owner = agno, + .flags = XFBTREE_DIRECT_MAP, }; return xfbtree_create(mp, &cfg, xfbtreep); diff --git a/fs/xfs/scrub/rcbag_btree.c b/fs/xfs/scrub/rcbag_btree.c index 3d66e80b7bc25..9807e08129fe4 100644 --- a/fs/xfs/scrub/rcbag_btree.c +++ b/fs/xfs/scrub/rcbag_btree.c @@ -233,6 +233,7 @@ rcbagbt_mem_create( struct xfbtree_config cfg = { .btree_ops = &rcbagbt_mem_ops, .target = target, + .flags = XFBTREE_DIRECT_MAP, }; return xfbtree_create(mp, &cfg, xfbtreep); diff --git a/fs/xfs/scrub/xfbtree.c b/fs/xfs/scrub/xfbtree.c index 016026947019a..9e557d87d1c9c 100644 --- a/fs/xfs/scrub/xfbtree.c +++ b/fs/xfs/scrub/xfbtree.c @@ -501,6 +501,9 @@ xfbtree_create( if (!xfbt) return -ENOMEM; xfbt->target = cfg->target; + if (cfg->flags & XFBTREE_DIRECT_MAP) + xfbt->target->bt_flags |= XFS_BUFTARG_DIRECT_MAP; + xfboff_bitmap_init(&xfbt->freespace); /* Set up min/maxrecs for this btree. */ @@ -753,7 +756,7 @@ xfbtree_trans_commit( dirty = xfbtree_trans_bdetach(tp, bp); if (dirty && !corrupt) { - xfs_failaddr_t fa = bp->b_ops->verify_struct(bp); + xfs_failaddr_t fa; /* * Because this btree is ephemeral, validate the buffer @@ -761,16 +764,30 @@ xfbtree_trans_commit( * corruption errors to the caller without shutting * down the filesystem. * + * Buffers that are directly mapped to the xfile do not + * need to be queued for IO at all. Check if the DRAM + * has been poisoned, however. + * * If the buffer fails verification, log the failure * but continue walking the transaction items so that * we remove all ephemeral btree buffers. */ + if (xfs_buf_check_poisoned(bp)) { + corrupt = true; + xfs_verifier_error(bp, -EFSCORRUPTED, + __this_address); + continue; + } + + fa = bp->b_ops->verify_struct(bp); if (fa) { corrupt = true; xfs_verifier_error(bp, -EFSCORRUPTED, fa); - } else { + continue; + } + + if (!(bp->b_flags & _XBF_DIRECT_MAP)) xfs_buf_delwri_queue_here(bp, &buffer_list); - } } xfs_buf_relse(bp); diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index b62518968e784..ca7657d0ea592 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -280,19 +280,26 @@ xfs_buf_free_pages( ASSERT(bp->b_flags & _XBF_PAGES); - if (xfs_buf_is_vmapped(bp)) - vm_unmap_ram(bp->b_addr, bp->b_page_count); - for (i = 0; i < bp->b_page_count; i++) { if (bp->b_pages[i]) __free_page(bp->b_pages[i]); } mm_account_reclaimed_pages(bp->b_page_count); + xfs_buf_free_page_array(bp); +} + +void +xfs_buf_free_page_array( + struct xfs_buf *bp) +{ + ASSERT(bp->b_flags & _XBF_PAGES); + if (bp->b_pages != bp->b_page_array) kmem_free(bp->b_pages); bp->b_pages = NULL; bp->b_flags &= ~_XBF_PAGES; + bp->b_page_count = 0; } static void @@ -313,7 +320,12 @@ xfs_buf_free( ASSERT(list_empty(&bp->b_lru)); - if (bp->b_flags & _XBF_PAGES) + if (xfs_buf_is_vmapped(bp)) + vm_unmap_ram(bp->b_addr, bp->b_page_count); + + if (bp->b_flags & _XBF_DIRECT_MAP) + xfile_buf_unmap_pages(bp); + else if (bp->b_flags & _XBF_PAGES) xfs_buf_free_pages(bp); else if (bp->b_flags & _XBF_KMEM) kmem_free(bp->b_addr); @@ -352,20 +364,14 @@ xfs_buf_alloc_kmem( return 0; } -static int -xfs_buf_alloc_pages( +/* Make sure that we have a page list */ +int +xfs_buf_alloc_page_array( struct xfs_buf *bp, - xfs_buf_flags_t flags) + gfp_t gfp_mask) { - gfp_t gfp_mask = __GFP_NOWARN; - long filled = 0; + ASSERT(!(bp->b_flags & _XBF_PAGES)); - if (flags & XBF_READ_AHEAD) - gfp_mask |= __GFP_NORETRY; - else - gfp_mask |= GFP_NOFS; - - /* Make sure that we have a page list */ bp->b_page_count = DIV_ROUND_UP(BBTOB(bp->b_length), PAGE_SIZE); if (bp->b_page_count <= XB_PAGES) { bp->b_pages = bp->b_page_array; @@ -375,7 +381,28 @@ xfs_buf_alloc_pages( if (!bp->b_pages) return -ENOMEM; } + bp->b_flags |= _XBF_PAGES; + return 0; +} + +static int +xfs_buf_alloc_pages( + struct xfs_buf *bp, + xfs_buf_flags_t flags) +{ + gfp_t gfp_mask = __GFP_NOWARN; + long filled = 0; + int error; + + if (flags & XBF_READ_AHEAD) + gfp_mask |= __GFP_NORETRY; + else + gfp_mask |= GFP_NOFS; + + error = xfs_buf_alloc_page_array(bp, gfp_mask); + if (error) + return error; /* Assure zeroed buffer for non-read cases. */ if (!(flags & XBF_READ)) @@ -418,7 +445,8 @@ _xfs_buf_map_pages( struct xfs_buf *bp, xfs_buf_flags_t flags) { - ASSERT(bp->b_flags & _XBF_PAGES); + ASSERT(bp->b_flags & (_XBF_PAGES | _XBF_DIRECT_MAP)); + if (bp->b_page_count == 1) { /* A single page buffer is always mappable */ bp->b_addr = page_address(bp->b_pages[0]); @@ -569,7 +597,7 @@ xfs_buf_find_lock( return -ENOENT; } ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0); - bp->b_flags &= _XBF_KMEM | _XBF_PAGES; + bp->b_flags &= _XBF_KMEM | _XBF_PAGES | _XBF_DIRECT_MAP; bp->b_ops = NULL; } return 0; @@ -628,18 +656,36 @@ xfs_buf_find_insert( goto out_drop_pag; /* - * For buffers that fit entirely within a single page, first attempt to - * allocate the memory from the heap to minimise memory usage. If we - * can't get heap memory for these small buffers, we fall back to using - * the page allocator. + * If the caller is ok with direct maps to xfile pages, try that. + * ENOTBLK is the magic code to fall back to allocating memory. */ - if (BBTOB(new_bp->b_length) >= PAGE_SIZE || - xfs_buf_alloc_kmem(new_bp, flags) < 0) { - error = xfs_buf_alloc_pages(new_bp, flags); - if (error) + if (xfile_buftarg_can_direct_map(btp)) { + error = xfile_buf_map_pages(new_bp, flags); + if (error && error != -ENOTBLK) goto out_free_buf; + if (!error) + goto insert; } + /* + * For buffers that fit entirely within a single page, first attempt to + * allocate the memory from the heap to minimise memory usage. + */ + if (BBTOB(new_bp->b_length) < PAGE_SIZE) { + error = xfs_buf_alloc_kmem(new_bp, flags); + if (!error) + goto insert; + } + + /* + * For larger buffers or if we can't get heap memory for these small + * buffers, fall back to using the page allocator. + */ + error = xfs_buf_alloc_pages(new_bp, flags); + if (error) + goto out_free_buf; + +insert: spin_lock(&bch->bc_lock); bp = rhashtable_lookup_get_insert_fast(&bch->bc_hash, &new_bp->b_rhash_head, xfs_buf_hash_params); @@ -1584,6 +1630,20 @@ xfs_buf_end_sync_io( xfs_buf_ioend(bp); } +bool +xfs_buf_check_poisoned( + struct xfs_buf *bp) +{ + unsigned int i; + + for (i = 0; i < bp->b_page_count; i++) { + if (PageHWPoison(bp->b_pages[i])) + return true; + } + + return false; +} + STATIC void _xfs_buf_ioapply( struct xfs_buf *bp) diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 5a6cf3d5a9f53..9d05c376d9dd8 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -43,6 +43,11 @@ struct xfile; #define _XBF_PAGES (1u << 20)/* backed by refcounted pages */ #define _XBF_KMEM (1u << 21)/* backed by heap memory */ #define _XBF_DELWRI_Q (1u << 22)/* buffer on a delwri queue */ +#ifdef CONFIG_XFS_IN_MEMORY_FILE +# define _XBF_DIRECT_MAP (1u << 23)/* pages directly mapped to storage */ +#else +# define _XBF_DIRECT_MAP (0) +#endif /* flags used only as arguments to access routines */ /* @@ -72,6 +77,7 @@ typedef unsigned int xfs_buf_flags_t; { _XBF_PAGES, "PAGES" }, \ { _XBF_KMEM, "KMEM" }, \ { _XBF_DELWRI_Q, "DELWRI_Q" }, \ + { _XBF_DIRECT_MAP, "DIRECT_MAP" }, \ /* The following interface flags should never be set */ \ { XBF_LIVESCAN, "LIVESCAN" }, \ { XBF_INCORE, "INCORE" }, \ @@ -131,8 +137,14 @@ typedef struct xfs_buftarg { #ifdef CONFIG_XFS_IN_MEMORY_FILE /* in-memory buftarg via bt_xfile */ # define XFS_BUFTARG_XFILE (1U << 0) +/* + * Buffer pages are direct-mapped to the xfile; caller does not care about + * transactional updates. + */ +# define XFS_BUFTARG_DIRECT_MAP (1U << 1) #else # define XFS_BUFTARG_XFILE (0) +# define XFS_BUFTARG_DIRECT_MAP (0) #endif #define XB_PAGES 2 @@ -382,6 +394,9 @@ xfs_buf_update_cksum(struct xfs_buf *bp, unsigned long cksum_offset) cksum_offset); } +int xfs_buf_alloc_page_array(struct xfs_buf *bp, gfp_t gfp_mask); +void xfs_buf_free_page_array(struct xfs_buf *bp); + /* * Handling of buftargs. */ @@ -453,5 +468,6 @@ xfs_buftarg_verify_daddr( int xfs_buf_reverify(struct xfs_buf *bp, const struct xfs_buf_ops *ops); bool xfs_verify_magic(struct xfs_buf *bp, __be32 dmagic); bool xfs_verify_magic16(struct xfs_buf *bp, __be16 dmagic); +bool xfs_buf_check_poisoned(struct xfs_buf *bp); #endif /* __XFS_BUF_H__ */ diff --git a/fs/xfs/xfs_buf_xfile.c b/fs/xfs/xfs_buf_xfile.c index 51c5c692156b1..be1e54be070ce 100644 --- a/fs/xfs/xfs_buf_xfile.c +++ b/fs/xfs/xfs_buf_xfile.c @@ -18,6 +18,11 @@ xfile_buf_ioapply( loff_t pos = BBTOB(xfs_buf_daddr(bp)); size_t size = BBTOB(bp->b_length); + if (bp->b_target->bt_flags & XFS_BUFTARG_DIRECT_MAP) { + /* direct mapping means no io necessary */ + return 0; + } + if (bp->b_map_count > 1) { /* We don't need or support multi-map buffers. */ ASSERT(0); @@ -95,3 +100,150 @@ xfile_buftarg_nr_sectors( { return xfile_size(btp->bt_xfile) >> SECTOR_SHIFT; } + +/* Free an xfile page that was directly mapped into the buffer cache. */ +static int +xfile_buf_put_page( + struct xfile *xfile, + loff_t pos, + struct page *page) +{ + struct xfile_page xfpage = { + .page = page, + .pos = round_down(pos, PAGE_SIZE), + }; + + lock_page(xfpage.page); + + return xfile_put_page(xfile, &xfpage); +} + +/* Grab the xfile page for this part of the xfile. */ +static int +xfile_buf_get_page( + struct xfile *xfile, + loff_t pos, + unsigned int len, + struct page **pagep) +{ + struct xfile_page xfpage = { NULL }; + int error; + + error = xfile_get_page(xfile, pos, len, &xfpage); + if (error) + return error; + + /* + * Fall back to regular DRAM buffers if tmpfs gives us fsdata or the + * page pos isn't what we were expecting. + */ + if (xfpage.fsdata || xfpage.pos != round_down(pos, PAGE_SIZE)) { + xfile_put_page(xfile, &xfpage); + return -ENOTBLK; + } + + /* Unlock the page before we start using them for the buffer cache. */ + ASSERT(PageUptodate(xfpage.page)); + unlock_page(xfpage.page); + + *pagep = xfpage.page; + return 0; +} + +/* + * Try to map storage directly, if the target supports it. Returns 0 for + * success, -ENOTBLK to mean "not supported", or the usual negative errno. + */ +int +xfile_buf_map_pages( + struct xfs_buf *bp, + xfs_buf_flags_t flags) +{ + struct xfs_buf_map *map; + gfp_t gfp_mask = __GFP_NOWARN; + const unsigned int page_align_mask = PAGE_SIZE - 1; + unsigned int m, p, n; + int error; + + ASSERT(xfile_buftarg_can_direct_map(bp->b_target)); + + /* For direct-map buffers, each map has to be page aligned. */ + for (m = 0, map = bp->b_maps; m < bp->b_map_count; m++, map++) + if (BBTOB(map->bm_bn | map->bm_len) & page_align_mask) + return -ENOTBLK; + + if (flags & XBF_READ_AHEAD) + gfp_mask |= __GFP_NORETRY; + else + gfp_mask |= GFP_NOFS; + + error = xfs_buf_alloc_page_array(bp, gfp_mask); + if (error) + return error; + + /* Map in the xfile pages. */ + for (m = 0, p = 0, map = bp->b_maps; m < bp->b_map_count; m++, map++) { + for (n = 0; n < map->bm_len; n += BTOBB(PAGE_SIZE)) { + unsigned int len; + + len = min_t(unsigned int, BBTOB(map->bm_len - n), + PAGE_SIZE); + + error = xfile_buf_get_page(bp->b_target->bt_xfile, + BBTOB(map->bm_bn + n), len, + &bp->b_pages[p++]); + if (error) + goto fail; + } + } + + bp->b_flags |= _XBF_DIRECT_MAP; + return 0; + +fail: + /* + * Release all the xfile pages and free the page array, we're falling + * back to a DRAM buffer, which could be pages or a slab allocation. + */ + for (m = 0, p = 0, map = bp->b_maps; m < bp->b_map_count; m++, map++) { + for (n = 0; n < map->bm_len; n += BTOBB(PAGE_SIZE)) { + if (bp->b_pages[p] == NULL) + continue; + + xfile_buf_put_page(bp->b_target->bt_xfile, + BBTOB(map->bm_bn + n), + bp->b_pages[p++]); + } + } + + xfs_buf_free_page_array(bp); + return error; +} + +/* Unmap all the direct-mapped buffer pages. */ +void +xfile_buf_unmap_pages( + struct xfs_buf *bp) +{ + struct xfs_buf_map *map; + unsigned int m, p, n; + int error = 0, err2; + + ASSERT(xfile_buftarg_can_direct_map(bp->b_target)); + + for (m = 0, p = 0, map = bp->b_maps; m < bp->b_map_count; m++, map++) { + for (n = 0; n < map->bm_len; n += BTOBB(PAGE_SIZE)) { + err2 = xfile_buf_put_page(bp->b_target->bt_xfile, + BBTOB(map->bm_bn + n), + bp->b_pages[p++]); + if (!error && err2) + error = err2; + } + } + + if (error) + xfs_err(bp->b_mount, "%s failed errno %d", __func__, error); + + bp->b_flags &= ~_XBF_DIRECT_MAP; + xfs_buf_free_page_array(bp); +} diff --git a/fs/xfs/xfs_buf_xfile.h b/fs/xfs/xfs_buf_xfile.h index c8d78d01ea5df..6ff2104780010 100644 --- a/fs/xfs/xfs_buf_xfile.h +++ b/fs/xfs/xfs_buf_xfile.h @@ -12,9 +12,20 @@ int xfile_alloc_buftarg(struct xfs_mount *mp, const char *descr, struct xfs_buftarg **btpp); void xfile_free_buftarg(struct xfs_buftarg *btp); xfs_daddr_t xfile_buftarg_nr_sectors(struct xfs_buftarg *btp); +int xfile_buf_map_pages(struct xfs_buf *bp, xfs_buf_flags_t flags); +void xfile_buf_unmap_pages(struct xfs_buf *bp); + +static inline bool xfile_buftarg_can_direct_map(const struct xfs_buftarg *btp) +{ + return (btp->bt_flags & XFS_BUFTARG_XFILE) && + (btp->bt_flags & XFS_BUFTARG_DIRECT_MAP); +} #else # define xfile_buf_ioapply(bp) (-EOPNOTSUPP) # define xfile_buftarg_nr_sectors(btp) (0) +# define xfile_buf_map_pages(b,f) (-ENOTBLK) +# define xfile_buf_unmap_pages(bp) ((void)0) +# define xfile_buftarg_can_direct_map(btp) (false) #endif /* CONFIG_XFS_IN_MEMORY_FILE */ #endif /* __XFS_BUF_XFILE_H__ */