Implement a copy-on-write handler for the buffered write path. When writepages is called, allocate a new block (which we then tell the log that we intend to delete so that it's freed if we crash), and then write the buffer to the new block. Upon completion, remove the freed block intent from the log and remap the file so that the changes appear. Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx> --- fs/xfs/Makefile | 1 fs/xfs/xfs_aops.c | 52 +++ fs/xfs/xfs_aops.h | 5 fs/xfs/xfs_file.c | 11 + fs/xfs/xfs_icache.c | 3 fs/xfs/xfs_inode.h | 2 fs/xfs/xfs_reflink.c | 756 ++++++++++++++++++++++++++++++++++++++++++++ fs/xfs/xfs_reflink.h | 41 ++ fs/xfs/xfs_trans.h | 3 fs/xfs/xfs_trans_extfree.c | 27 ++ 10 files changed, 894 insertions(+), 7 deletions(-) create mode 100644 fs/xfs/xfs_reflink.c create mode 100644 fs/xfs/xfs_reflink.h diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 3565db6..0b7fa41 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -88,6 +88,7 @@ xfs-y += xfs_aops.o \ xfs_message.o \ xfs_mount.o \ xfs_mru_cache.o \ + xfs_reflink.o \ xfs_super.o \ xfs_symlink.o \ xfs_sysfs.o \ diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 50ab287..06a1d2f 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -31,6 +31,8 @@ #include "xfs_bmap.h" #include "xfs_bmap_util.h" #include "xfs_bmap_btree.h" +#include "xfs_reflink.h" +#include <linux/aio.h> #include <linux/gfp.h> #include <linux/mpage.h> #include <linux/pagevec.h> @@ -190,6 +192,8 @@ xfs_finish_ioend( if (ioend->io_type == XFS_IO_UNWRITTEN) queue_work(mp->m_unwritten_workqueue, &ioend->io_work); + else if (ioend->io_type == XFS_IO_FORKED) + queue_work(mp->m_cow_workqueue, &ioend->io_work); else if (ioend->io_append_trans) queue_work(mp->m_data_workqueue, &ioend->io_work); else @@ -212,6 +216,25 @@ xfs_end_io( ioend->io_error = -EIO; goto done; } + + /* + * If we forked the block, we need to remap the bmbt and possibly + * finish up the i_size transaction too... or clean up after a + * failed write. + */ + if (ioend->io_type == XFS_IO_FORKED) { + if (ioend->io_error) { + error = xfs_reflink_cancel_fork_ioend(ioend); + goto done; + } + error = xfs_reflink_fork_ioend(ioend); + if (error) + goto done; + if (ioend->io_append_trans) + error = xfs_setfilesize_ioend(ioend); + goto done; + } + if (ioend->io_error) goto done; @@ -266,6 +289,7 @@ xfs_alloc_ioend( ioend->io_append_trans = NULL; INIT_WORK(&ioend->io_work, xfs_end_io); + INIT_LIST_HEAD(&ioend->io_reflink_endio_list); return ioend; } @@ -547,6 +571,7 @@ xfs_cancel_ioend( } while ((bh = next_bh) != NULL); mempool_free(ioend, xfs_ioend_pool); + xfs_reflink_cancel_fork_ioend(ioend); } while ((ioend = next) != NULL); } @@ -563,7 +588,8 @@ xfs_add_to_ioend( xfs_off_t offset, unsigned int type, xfs_ioend_t **result, - int need_ioend) + int need_ioend, + struct xfs_reflink_ioend *eio) { xfs_ioend_t *ioend = *result; @@ -584,6 +610,8 @@ xfs_add_to_ioend( bh->b_private = NULL; ioend->io_size += bh->b_size; + if (eio) + xfs_reflink_add_ioend(ioend, eio); } STATIC void @@ -784,7 +812,7 @@ xfs_convert_page( if (type != XFS_IO_OVERWRITE) xfs_map_at_offset(inode, bh, imap, offset); xfs_add_to_ioend(inode, bh, offset, type, - ioendp, done); + ioendp, done, NULL); page_dirty--; count++; @@ -947,6 +975,8 @@ xfs_vm_writepage( int err, imap_valid = 0, uptodate = 1; int count = 0; int nonblocking = 0; + struct xfs_inode *ip = XFS_I(inode); + int err2 = 0; trace_xfs_writepage(inode, page, 0, 0); @@ -1115,11 +1145,15 @@ xfs_vm_writepage( imap_valid = xfs_imap_valid(inode, &imap, offset); } if (imap_valid) { + struct xfs_reflink_ioend *eio = NULL; + + err2 = xfs_reflink_write_fork_block(ip, &imap, offset, + &type, &eio); lock_buffer(bh); if (type != XFS_IO_OVERWRITE) xfs_map_at_offset(inode, bh, &imap, offset); xfs_add_to_ioend(inode, bh, offset, type, &ioend, - new_ioend); + new_ioend, eio); count++; } @@ -1133,6 +1167,9 @@ xfs_vm_writepage( xfs_start_page_writeback(page, 1, count); + if (err) + goto error; + /* if there is no IO to be submitted for this page, we are done */ if (!ioend) return 0; @@ -1167,8 +1204,9 @@ xfs_vm_writepage( /* * Reserve log space if we might write beyond the on-disk inode size. */ - err = 0; - if (ioend->io_type != XFS_IO_UNWRITTEN && xfs_ioend_is_append(ioend)) + err = err2; + if (!err && ioend->io_type != XFS_IO_UNWRITTEN && + xfs_ioend_is_append(ioend)) err = xfs_setfilesize_trans_alloc(ioend); xfs_submit_ioend(wbc, iohead, err); @@ -1818,6 +1856,10 @@ xfs_vm_write_begin( if (!page) return -ENOMEM; + status = xfs_reflink_reserve_fork_block(XFS_I(mapping->host), pos, len); + if (status) + return status; + status = __block_write_begin(page, pos, len, xfs_get_blocks); if (unlikely(status)) { struct inode *inode = mapping->host; diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h index 86afd1a..9cf206a 100644 --- a/fs/xfs/xfs_aops.h +++ b/fs/xfs/xfs_aops.h @@ -27,12 +27,14 @@ enum { XFS_IO_DELALLOC, /* covers delalloc region */ XFS_IO_UNWRITTEN, /* covers allocated but uninitialized data */ XFS_IO_OVERWRITE, /* covers already allocated extent */ + XFS_IO_FORKED, /* covers copy-on-write region */ }; #define XFS_IO_TYPES \ { XFS_IO_DELALLOC, "delalloc" }, \ { XFS_IO_UNWRITTEN, "unwritten" }, \ - { XFS_IO_OVERWRITE, "overwrite" } + { XFS_IO_OVERWRITE, "overwrite" }, \ + { XFS_IO_FORKED, "forked" } /* * xfs_ioend struct manages large extent writes for XFS. @@ -50,6 +52,7 @@ typedef struct xfs_ioend { xfs_off_t io_offset; /* offset in the file */ struct work_struct io_work; /* xfsdatad work queue */ struct xfs_trans *io_append_trans;/* xact. for size update */ + struct list_head io_reflink_endio_list;/* remappings for CoW */ } xfs_ioend_t; extern const struct address_space_operations xfs_address_space_operations; diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index e78feb4..593223f 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -37,6 +37,7 @@ #include "xfs_log.h" #include "xfs_icache.h" #include "xfs_pnfs.h" +#include "xfs_reflink.h" #include <linux/dcache.h> #include <linux/falloc.h> @@ -1502,6 +1503,14 @@ xfs_filemap_page_mkwrite( file_update_time(vma->vm_file); xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); + /* Set up the remapping for a CoW mmap'd page */ + ret = xfs_reflink_reserve_fork_block(XFS_I(inode), + vmf->page->index << PAGE_CACHE_SHIFT, PAGE_CACHE_SIZE); + if (ret) { + ret = block_page_mkwrite_return(ret); + goto out; + } + if (IS_DAX(inode)) { ret = __dax_mkwrite(vma, vmf, xfs_get_blocks_direct, xfs_end_io_dax_write); @@ -1509,7 +1518,7 @@ xfs_filemap_page_mkwrite( ret = __block_page_mkwrite(vma, vmf, xfs_get_blocks); ret = block_page_mkwrite_return(ret); } - +out: xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); sb_end_pagefault(inode->i_sb); diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 0a326bd..c409576 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -33,6 +33,7 @@ #include "xfs_bmap_util.h" #include "xfs_dquot_item.h" #include "xfs_dquot.h" +#include "xfs_reflink.h" #include <linux/kthread.h> #include <linux/freezer.h> @@ -80,6 +81,7 @@ xfs_inode_alloc( ip->i_flags = 0; ip->i_delayed_blks = 0; memset(&ip->i_d, 0, sizeof(xfs_icdinode_t)); + ip->i_remaps = RB_ROOT; return ip; } @@ -115,6 +117,7 @@ xfs_inode_free( ip->i_itemp = NULL; } + xfs_reflink_cancel_fork_blocks(ip); /* * Because we use RCU freeing we need to ensure the inode always * appears to be reclaimed with an invalid inode number when in the diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 6436a96..f4cf967 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -65,6 +65,8 @@ typedef struct xfs_inode { xfs_icdinode_t i_d; /* most of ondisk inode */ + struct rb_root i_remaps; /* CoW remappings in progress */ + /* VFS inode */ struct inode i_vnode; /* embedded VFS inode */ } xfs_inode_t; diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c new file mode 100644 index 0000000..1e00be2 --- /dev/null +++ b/fs/xfs/xfs_reflink.c @@ -0,0 +1,756 @@ +/* + * Copyright (c) 2015 Oracle. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_da_format.h" +#include "xfs_da_btree.h" +#include "xfs_inode.h" +#include "xfs_trans.h" +#include "xfs_inode_item.h" +#include "xfs_bmap.h" +#include "xfs_bmap_util.h" +#include "xfs_error.h" +#include "xfs_dir2.h" +#include "xfs_dir2_priv.h" +#include "xfs_ioctl.h" +#include "xfs_trace.h" +#include "xfs_log.h" +#include "xfs_icache.h" +#include "xfs_pnfs.h" +#include "xfs_refcount_btree.h" +#include "xfs_refcount.h" +#include "xfs_bmap_btree.h" +#include "xfs_trans_space.h" +#include "xfs_bit.h" +#include "xfs_alloc.h" +#include "xfs_quota_defs.h" +#include "xfs_quota.h" +#include "xfs_btree.h" +#include "xfs_bmap_btree.h" +#include "xfs_reflink.h" + +#define CHECK_AG_NUMBER(mp, agno) \ + do { \ + ASSERT((agno) != NULLAGNUMBER); \ + ASSERT((agno) < (mp)->m_sb.sb_agcount); \ + } while (0) + +#define CHECK_AG_EXTENT(mp, agbno, len) \ + do { \ + ASSERT((agbno) != NULLAGBLOCK); \ + ASSERT((len) > 0); \ + ASSERT((unsigned long long)(agbno) + (len) <= \ + (mp)->m_sb.sb_agblocks); \ + } while (0) + +struct xfs_reflink_ioend { + struct rb_node rlei_node; /* tree of pending remappings */ + struct list_head rlei_list; /* list of reflink ioends */ + struct xfs_bmbt_irec rlei_mapping; /* new bmbt mapping to put in */ + struct xfs_efi_log_item *rlei_efi; /* efi log item to cancel */ + xfs_fsblock_t rlei_oldfsbno; /* old fsbno */ +}; + +/** + * xfs_reflink_get_refcount() - get refcount and extent length for a given pblk + * + * @mp: XFS mount object + * @agno: AG number + * @agbno: AG block number + * @len: length of extent + * @nr: refcount + */ +int +xfs_reflink_get_refcount( + struct xfs_mount *mp, + xfs_agnumber_t agno, + xfs_agblock_t agbno, + xfs_extlen_t *len, + xfs_nlink_t *nr) +{ + struct xfs_btree_cur *cur; + struct xfs_buf *agbp; + struct xfs_refcount_irec tmp; + xfs_extlen_t aglen; + int error; + int i, have; + int bt_error; + + if (!xfs_sb_version_hasreflink(&mp->m_sb)) { + *len = 0; + *nr = 1; + return 0; + } + + error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp); + if (error) + return error; + aglen = be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_length); + ASSERT(agbno < aglen); + + /* + * See if there's an extent covering the block we want. + */ + bt_error = XFS_BTREE_ERROR; + cur = xfs_refcountbt_init_cursor(mp, NULL, agbp, agno, NULL); + error = xfs_refcountbt_lookup_le(cur, agbno, &have); + if (error) + goto out_error; + if (!have) + goto hole; + error = xfs_refcountbt_get_rec(cur, &tmp, &i); + if (error) + goto out_error; + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error); + if (tmp.rc_startblock + tmp.rc_blockcount <= agbno) + goto hole; + + *len = tmp.rc_blockcount - (agbno - tmp.rc_startblock); + *nr = tmp.rc_refcount; + goto out; + +hole: + /* + * We're in a hole, so pretend that this we have a refcount=1 extent + * going to the next rlextent or the end of the AG. + */ + error = xfs_btree_increment(cur, 0, &have); + if (error) + goto out_error; + if (!have) + *len = aglen - agbno; + else { + error = xfs_refcountbt_get_rec(cur, &tmp, &i); + if (error) + goto out_error; + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error); + *len = tmp.rc_startblock - agbno; + } + *nr = 1; + +out: + bt_error = XFS_BTREE_NOERROR; +out_error: + xfs_btree_del_cursor(cur, bt_error); + xfs_buf_relse(agbp); + return error; +} + +/* + * Allocate a replacement block for a copy-on-write operation. + * + * XXX: Ideally we'd scan up and down the incore extent list + * looking for a block, but do this stupid thing for now. + */ +STATIC int +fork_one_block( + struct xfs_mount *mp, + struct xfs_trans *tp, + struct xfs_inode *ip, + xfs_fsblock_t old, + xfs_fsblock_t *new, + xfs_fileoff_t offset) +{ + int error; + struct xfs_alloc_arg args; /* allocation arguments */ + + memset(&args, 0, sizeof(args)); + args.tp = tp; + args.mp = mp; + args.type = XFS_ALLOCTYPE_NEAR_BNO; + args.firstblock = args.fsbno = old; + args.minlen = args.maxlen = args.prod = 1; + args.userdata = XFS_ALLOC_USERDATA; + error = xfs_alloc_vextent(&args); + if (error) + goto out_error; + ASSERT(args.len == 1); + ASSERT(args.fsbno != old); + *new = args.fsbno; + +out_error: + return error; +} + +/* Compare two reflink ioend structures */ +STATIC int +ioend_compare( + struct xfs_reflink_ioend *i1, + struct xfs_reflink_ioend *i2) +{ + if (i1->rlei_mapping.br_startoff > i2->rlei_mapping.br_startoff) + return 1; + if (i1->rlei_mapping.br_startoff < i2->rlei_mapping.br_startoff) + return -1; + return 0; +} + +/* Attach a remapping object to an inode. */ +STATIC int +remap_insert( + struct xfs_inode *ip, + struct xfs_reflink_ioend *eio) +{ + struct rb_node **new = &(ip->i_remaps.rb_node); + struct rb_node *parent = NULL; + struct xfs_reflink_ioend *this; + int result; + + /* Figure out where to put new node */ + while (*new) { + this = rb_entry(*new, struct xfs_reflink_ioend, rlei_node); + result = ioend_compare(eio, this); + + parent = *new; + if (result < 0) + new = &((*new)->rb_left); + else if (result > 0) + new = &((*new)->rb_right); + else + return -EEXIST; + } + + /* Add new node and rebalance tree. */ + rb_link_node(&eio->rlei_node, parent, new); + rb_insert_color(&eio->rlei_node, &ip->i_remaps); + + return 0; +} + +/* Find a remapping object for a block in an inode */ +STATIC int +remap_search( + struct xfs_inode *ip, + xfs_fileoff_t fsbno, + struct xfs_reflink_ioend **peio) +{ + struct rb_node *node = ip->i_remaps.rb_node; + struct xfs_reflink_ioend *data; + int result; + struct xfs_reflink_ioend f; + + f.rlei_mapping.br_startoff = fsbno; + while (node) { + data = rb_entry(node, struct xfs_reflink_ioend, rlei_node); + result = ioend_compare(&f, data); + + if (result < 0) + node = node->rb_left; + else if (result > 0) + node = node->rb_right; + else { + *peio = data; + return 0; + } + } + + return -ENOENT; +} + +/* Allocate a block to handle a copy on write later. */ +STATIC int +__reserve_fork_block( + struct xfs_inode *ip, + struct xfs_bmbt_irec *imap, + xfs_off_t offset) +{ + xfs_fsblock_t fsbno; + xfs_fsblock_t new_fsbno; + xfs_off_t iomap_offset; + xfs_agnumber_t agno; /* allocation group number */ + xfs_agblock_t agbno; /* ag start of range to free */ + struct xfs_trans *tp = NULL; + int error; + struct xfs_reflink_ioend *eio; + struct xfs_mount *mp = ip->i_mount; + + ASSERT(xfs_is_reflink_inode(ip)); + iomap_offset = XFS_FSB_TO_B(mp, imap->br_startoff); + fsbno = imap->br_startblock + XFS_B_TO_FSB(mp, offset - iomap_offset); + agno = XFS_FSB_TO_AGNO(mp, fsbno); + agbno = XFS_FSB_TO_AGBNO(mp, fsbno); + CHECK_AG_NUMBER(mp, agno); + CHECK_AG_EXTENT(mp, agbno, 1); + ASSERT(imap->br_state == XFS_EXT_NORM); + + /* If we've already got a remapping, we're done. */ + error = remap_search(ip, XFS_B_TO_FSB(mp, offset), &eio); + if (!error) + return 0; + + /* + * Ok, we have to fork this block. Allocate a replacement block, + * stash the new mapping, and add an EFI entry for recovery. When + * the (redirected) IO completes, we'll deal with remapping. + */ + tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE); + error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, + XFS_DIOSTRAT_SPACE_RES(mp, 2), 0); + if (error) + goto out_cancel; + + error = fork_one_block(mp, tp, ip, fsbno, &new_fsbno, + XFS_B_TO_FSB(mp, offset)); + if (error) + goto out_cancel; + + trace_xfs_reflink_reserve_fork_block(ip, XFS_B_TO_FSB(mp, offset), + fsbno, 1, new_fsbno); + + eio = kmem_zalloc(sizeof(*eio), KM_SLEEP | KM_NOFS); + eio->rlei_mapping.br_startblock = new_fsbno; + eio->rlei_mapping.br_startoff = XFS_B_TO_FSB(mp, offset); + eio->rlei_mapping.br_blockcount = 1; + eio->rlei_mapping.br_state = XFS_EXT_NORM; + eio->rlei_oldfsbno = fsbno; + eio->rlei_efi = xfs_trans_get_efi(tp, 1); + xfs_trans_log_efi_extent(tp, eio->rlei_efi, new_fsbno, 1); + + error = remap_insert(ip, eio); + if (error) + goto out_cancel; + + /* + * ...and we're done. + */ + error = xfs_trans_commit(tp); + if (error) + goto out_error; + + return error; + +out_cancel: + xfs_trans_cancel(tp); +out_error: + trace_xfs_reflink_reserve_fork_block_error(ip, error, _RET_IP_); + return error; +} + +/** + * xfs_reflink_reserve_fork_block() -- Allocate blocks to satisfy a copy on + * write operation. + * @ip: XFS inode + * @pos: file offset to start forking + * @len: number of bytes to fork + */ +int +xfs_reflink_reserve_fork_block( + struct xfs_inode *ip, + xfs_off_t pos, + xfs_off_t len) +{ + struct xfs_bmbt_irec imap; + int nimaps; + int error; + xfs_fileoff_t lblk; + xfs_fileoff_t next_lblk; + xfs_off_t offset; + bool type; + + if (!xfs_is_reflink_inode(ip)) + return 0; + + trace_xfs_reflink_force_getblocks(ip, len, pos, 0); + + error = 0; + lblk = XFS_B_TO_FSBT(ip->i_mount, pos); + next_lblk = 1 + XFS_B_TO_FSBT(ip->i_mount, pos + len - 1); + while (lblk < next_lblk) { + offset = XFS_FSB_TO_B(ip->i_mount, lblk); + /* Read extent from the source file */ + nimaps = 1; + xfs_ilock(ip, XFS_ILOCK_EXCL); + error = xfs_bmapi_read(ip, lblk, next_lblk - lblk, &imap, + &nimaps, 0); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + if (error) + break; + + if (nimaps == 0) + break; + + error = xfs_reflink_should_fork_block(ip, &imap, offset, &type); + if (error) + break; + if (!type) + goto advloop; + + error = __reserve_fork_block(ip, &imap, offset); + if (error) + break; + +advloop: + lblk += imap.br_blockcount; + } + + return error; +} + +/** + * xfs_reflink_write_fork_block() -- find a remapping object and redirect the + * write. + * + * @ip: XFS inode + * @offset: file offset we're trying to write + * @imap: the mapping for this block (I/O) + * @type: the io type (I/O) + * @peio: pointer to a reflink ioend; caller must attach to an ioend (O) + */ +int +xfs_reflink_write_fork_block( + struct xfs_inode *ip, + struct xfs_bmbt_irec *imap, + xfs_off_t offset, + unsigned int *type, + struct xfs_reflink_ioend **peio) +{ + int error; + struct xfs_reflink_ioend *eio = NULL; + + if (!xfs_is_reflink_inode(ip)) + return 0; + if (*type == XFS_IO_DELALLOC || *type == XFS_IO_UNWRITTEN) + return 0; + + error = remap_search(ip, XFS_B_TO_FSB(ip->i_mount, offset), &eio); + if (error == -ENOENT) + return 0; + else if (error) { + trace_xfs_reflink_write_fork_block_error(ip, error, _RET_IP_); + return error; + } + + trace_xfs_reflink_write_fork_block(ip, eio->rlei_mapping.br_startoff, + eio->rlei_oldfsbno, 1, eio->rlei_mapping.br_startblock); + + *imap = eio->rlei_mapping; + *type = XFS_IO_FORKED; + *peio = eio; + return 0; +} + +/* Remap a range of file blocks after forking. */ +STATIC int +xfs_reflink_remap_after_io( + struct xfs_mount *mp, + struct xfs_inode *ip, + struct xfs_reflink_ioend *eio) +{ + struct xfs_trans *tp = NULL; + int error; + xfs_agnumber_t agno; /* allocation group number */ + xfs_agblock_t agbno; /* ag start of range to free */ + xfs_fsblock_t firstfsb; + int committed; + struct xfs_bmbt_irec imaps[1]; + int nimaps = 1; + int done; + struct xfs_bmap_free free_list; + struct xfs_bmbt_irec *imap = &eio->rlei_mapping; + struct xfs_efd_log_item *efd; + unsigned int resblks; + + ASSERT(xfs_is_reflink_inode(ip)); + agno = XFS_FSB_TO_AGNO(mp, imap->br_startblock); + agbno = XFS_FSB_TO_AGBNO(mp, imap->br_startblock); + CHECK_AG_NUMBER(mp, agno); + CHECK_AG_EXTENT(mp, agbno, 1); + ASSERT(imap->br_state == XFS_EXT_NORM); + + trace_xfs_reflink_remap_after_io(ip, imap->br_startoff, + eio->rlei_oldfsbno, imap->br_blockcount, + imap->br_startblock); + + + /* Delete temporary mapping */ + error = remap_search(ip, imap->br_startoff, &eio); + if (error) + return error; + rb_erase(&eio->rlei_node, &ip->i_remaps); + + /* Unmap the old blocks */ + resblks = XFS_DIOSTRAT_SPACE_RES(mp, imap->br_blockcount * 3); + tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE); + error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, resblks, 0); + if (error) + goto out_cancel; + + xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); + + xfs_bmap_init(&free_list, &firstfsb); + error = xfs_bunmapi(tp, ip, imap->br_startoff, imap->br_blockcount, 0, + imap->br_blockcount, &firstfsb, &free_list, &done); + if (error) + goto out_freelist; + + error = xfs_bmap_finish(&tp, &free_list, &committed); + if (error) + goto out_cancel; + + error = xfs_trans_commit(tp); + if (error) + goto out_error; + + /* Remove the EFD and map the new block into the file. */ + resblks = XFS_DIOSTRAT_SPACE_RES(mp, imap->br_blockcount * 3); + tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE); + error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, resblks, 0); + if (error) + goto out_cancel; + + xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); + + efd = xfs_trans_get_efd(tp, eio->rlei_efi, 1); + xfs_trans_undelete_extent(tp, efd, imap->br_startblock, + imap->br_blockcount); + + error = xfs_bmapi_write(tp, ip, imap->br_startoff, imap->br_blockcount, + XFS_BMAPI_REMAP, &imap->br_startblock, + imap->br_blockcount, &imaps[0], &nimaps, + &free_list); + if (error) + goto out_freelist; + + error = xfs_bmap_finish(&tp, &free_list, &committed); + if (error) + goto out_cancel; + + error = xfs_trans_commit(tp); + if (error) + goto out_error; + return error; + +out_freelist: + xfs_bmap_cancel(&free_list); +out_cancel: + xfs_trans_cancel(tp); +out_error: + trace_xfs_reflink_remap_after_io_error(ip, error, _RET_IP_); + return error; +} + +/** + * xfs_reflink_fork_ioend() - remap all blocks after forking + * + * @ioend: the io completion object + */ +int +xfs_reflink_fork_ioend( + struct xfs_ioend *ioend) +{ + int error, err2; + struct list_head *pos, *n; + struct xfs_reflink_ioend *eio; + struct xfs_inode *ip = XFS_I(ioend->io_inode); + struct xfs_mount *mp = ip->i_mount; + + error = 0; + list_for_each_safe(pos, n, &ioend->io_reflink_endio_list) { + eio = list_entry(pos, struct xfs_reflink_ioend, rlei_list); + err2 = xfs_reflink_remap_after_io(mp, ip, eio); + if (error == 0) + error = err2; + kfree(eio); + } + return error; +} + +/** + * xfs_reflink_should_fork_block() - determine if a block should be forked + * + * @ip: XFS inode object + * @imap: the fileoff:fsblock mapping that we might fork + * @offset: the file offset of the block we're examining + * @type: set to true if reflinked, false otherwise. + */ +int +xfs_reflink_should_fork_block( + struct xfs_inode *ip, + struct xfs_bmbt_irec *imap, + xfs_off_t offset, + bool *type) +{ + xfs_fsblock_t fsbno; + xfs_off_t iomap_offset; + xfs_agnumber_t agno; /* allocation group number */ + xfs_agblock_t agbno; /* ag start of range to free */ + xfs_extlen_t len; + xfs_nlink_t nr; + int error; + struct xfs_mount *mp = ip->i_mount; + + if (!xfs_is_reflink_inode(ip) || + ISUNWRITTEN(imap) || + imap->br_startblock == HOLESTARTBLOCK || + imap->br_startblock == DELAYSTARTBLOCK) { + *type = false; + return 0; + } + + iomap_offset = XFS_FSB_TO_B(mp, imap->br_startoff); + fsbno = imap->br_startblock + XFS_B_TO_FSB(mp, offset - iomap_offset); + agno = XFS_FSB_TO_AGNO(mp, fsbno); + agbno = XFS_FSB_TO_AGBNO(mp, fsbno); + CHECK_AG_NUMBER(mp, agno); + CHECK_AG_EXTENT(mp, agbno, 1); + ASSERT(imap->br_state == XFS_EXT_NORM); + + error = xfs_reflink_get_refcount(mp, agno, agbno, &len, &nr); + if (error) + return error; + ASSERT(len != 0); + *type = (nr > 1); + return error; +} + +/* Cancel a forked block being held for a CoW operation */ +STATIC int +xfs_reflink_free_forked( + struct xfs_mount *mp, + struct xfs_inode *ip, + struct xfs_reflink_ioend *eio) +{ + struct xfs_trans *tp = NULL; + int error; + xfs_agnumber_t agno; /* allocation group number */ + xfs_agblock_t agbno; /* ag start of range to free */ + xfs_fsblock_t firstfsb; + int committed; + struct xfs_bmap_free free_list; + struct xfs_bmbt_irec *imap = &eio->rlei_mapping; + struct xfs_efd_log_item *efd; + unsigned int resblks; + + ASSERT(xfs_is_reflink_inode(ip)); + agno = XFS_FSB_TO_AGNO(mp, imap->br_startblock); + agbno = XFS_FSB_TO_AGBNO(mp, imap->br_startblock); + CHECK_AG_NUMBER(mp, agno); + CHECK_AG_EXTENT(mp, agbno, 1); + ASSERT(imap->br_state == XFS_EXT_NORM); + + trace_xfs_reflink_free_forked(ip, imap->br_startoff, + eio->rlei_oldfsbno, imap->br_blockcount, + imap->br_startblock); + + /* Remove the EFD and map the new block into the file. */ + resblks = XFS_DIOSTRAT_SPACE_RES(mp, imap->br_blockcount * 3); + tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE); + error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, resblks, 0); + if (error) + goto out_cancel; + + xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); + efd = xfs_trans_get_efd(tp, eio->rlei_efi, 1); + xfs_trans_undelete_extent(tp, efd, imap->br_startblock, + imap->br_blockcount); + + xfs_bmap_init(&free_list, &firstfsb); + xfs_bmap_add_free(mp, &free_list, imap->br_startblock, 1, NULL); + + error = xfs_bmap_finish(&tp, &free_list, &committed); + if (error) + goto out_cancel; + + error = xfs_trans_commit(tp); + if (error) + goto out_error; + return error; + +out_cancel: + xfs_trans_cancel(tp); +out_error: + trace_xfs_reflink_free_forked_error(ip, error, _RET_IP_); + return error; +} + +/** + * xfs_reflink_cancel_fork_ioend() - free all forked blocks attached to an ioend + * + * @ioend: the io completion object + */ +int +xfs_reflink_cancel_fork_ioend( + struct xfs_ioend *ioend) +{ + int error, err2; + struct list_head *pos, *n; + struct xfs_reflink_ioend *eio; + struct xfs_inode *ip = XFS_I(ioend->io_inode); + struct xfs_mount *mp = ip->i_mount; + + error = 0; + list_for_each_safe(pos, n, &ioend->io_reflink_endio_list) { + eio = list_entry(pos, struct xfs_reflink_ioend, rlei_list); + err2 = xfs_reflink_free_forked(mp, ip, eio); + if (error == 0) + error = err2; + kfree(eio); + } + return error; +} + +/** + * xfs_reflink_cancel_fork_blocks() -- Free all forked blocks attached to + * an inode. + * + * @ip: The inode. + */ +int +xfs_reflink_cancel_fork_blocks( + struct xfs_inode *ip) +{ + struct rb_node *node; + struct xfs_reflink_ioend *eio; + int error = 0; + int err2; + + while ((node = rb_first(&ip->i_remaps))) { + eio = rb_entry(node, struct xfs_reflink_ioend, rlei_node); + err2 = xfs_reflink_free_forked(ip->i_mount, ip, eio); + if (error == 0) + error = err2; + rb_erase(node, &ip->i_remaps); + kfree(eio); + } + + return error; +} + +/** + * xfs_reflink_add_ioend() -- Hook ourselves up to the ioend processing + * so that we can finish forking a block after + * the write completes. + * + * @ioend: The regular ioend structure. + * @eio: The reflink ioend context. + */ +void +xfs_reflink_add_ioend( + struct xfs_ioend *ioend, + struct xfs_reflink_ioend *eio) +{ + list_add_tail(&eio->rlei_list, &ioend->io_reflink_endio_list); +} diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h new file mode 100644 index 0000000..b3e12d2 --- /dev/null +++ b/fs/xfs/xfs_reflink.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2015 Oracle. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_REFLINK_H +#define __XFS_REFLINK_H 1 + +struct xfs_reflink_ioend; + +extern int xfs_reflink_get_refcount(struct xfs_mount *mp, xfs_agnumber_t agno, + xfs_agblock_t agbno, xfs_extlen_t *len, xfs_nlink_t *nr); +extern int xfs_reflink_write_fork_block(struct xfs_inode *ip, + struct xfs_bmbt_irec *imap, xfs_off_t offset, + unsigned int *type, struct xfs_reflink_ioend **peio); +extern int xfs_reflink_reserve_fork_block(struct xfs_inode *ip, + xfs_off_t pos, xfs_off_t len); +extern int xfs_reflink_redirect_directio_write(struct xfs_inode *ip, + struct xfs_bmbt_irec *imap, xfs_off_t offset); +extern int xfs_reflink_cancel_fork_ioend(struct xfs_ioend *ioend); +extern int xfs_reflink_cancel_fork_blocks(struct xfs_inode *ip); +extern int xfs_reflink_fork_ioend(struct xfs_ioend *ioend); +extern void xfs_reflink_add_ioend(struct xfs_ioend *ioend, + struct xfs_reflink_ioend *eio); + +extern int xfs_reflink_should_fork_block(struct xfs_inode *ip, + struct xfs_bmbt_irec *imap, xfs_off_t offset, bool *type); + +#endif /* __XFS_REFLINK_H */ diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 50fe77e..07e8460 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -223,6 +223,9 @@ struct xfs_efd_log_item *xfs_trans_get_efd(xfs_trans_t *, int xfs_trans_free_extent(struct xfs_trans *, struct xfs_efd_log_item *, xfs_fsblock_t, xfs_extlen_t, struct xfs_owner_info *); +void xfs_trans_undelete_extent(struct xfs_trans *, + struct xfs_efd_log_item *, xfs_fsblock_t, + xfs_extlen_t); int xfs_trans_commit(struct xfs_trans *); int __xfs_trans_roll(struct xfs_trans **, struct xfs_inode *, int *); int xfs_trans_roll(struct xfs_trans **, struct xfs_inode *); diff --git a/fs/xfs/xfs_trans_extfree.c b/fs/xfs/xfs_trans_extfree.c index d1b8833..a2fed6e 100644 --- a/fs/xfs/xfs_trans_extfree.c +++ b/fs/xfs/xfs_trans_extfree.c @@ -146,3 +146,30 @@ xfs_trans_free_extent( return error; } + +/* + * Undelete this extent, by logging it to the EFD. Note that the transaction is + * marked dirty regardless of whether the extent free succeeds or fails to + * support the EFI/EFD lifecycle rules. This should only be used when the + * ownership of the extent hasn't changed, i.e. reflink copy-on-write. + */ +void +xfs_trans_undelete_extent( + struct xfs_trans *tp, + struct xfs_efd_log_item *efdp, + xfs_fsblock_t start_block, + xfs_extlen_t ext_len) +{ + uint next_extent; + struct xfs_extent *extp; + + tp->t_flags |= XFS_TRANS_DIRTY; + efdp->efd_item.li_desc->lid_flags |= XFS_LID_DIRTY; + + next_extent = efdp->efd_next_extent; + ASSERT(next_extent < efdp->efd_format.efd_nextents); + extp = &(efdp->efd_format.efd_extents[next_extent]); + extp->ext_start = start_block; + extp->ext_len = ext_len; + efdp->efd_next_extent++; +} _______________________________________________ xfs mailing list xfs@xxxxxxxxxxx http://oss.sgi.com/mailman/listinfo/xfs