On Tue, Oct 06, 2015 at 10:00:25PM -0700, Darrick J. Wong wrote: > When we're writing zeroes to a reflinked block (such as when we're > punching a reflinked range), we need to fork the the block and write > to that, otherwise we can corrupt the other reflinks. Something that's missing from this patch series: when we shorten a file, the VFS will zero the page contents between the new and old EOF. Therefore, we must ensure that the block is forked prior to this happening, or else we end up changing the shared block, corrupting the other files. --D > > Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx> > --- > fs/xfs/xfs_bmap_util.c | 25 +++++++- > fs/xfs/xfs_reflink.c | 154 ++++++++++++++++++++++++++++++++++++++++++++++++ > fs/xfs/xfs_reflink.h | 6 ++ > 3 files changed, 183 insertions(+), 2 deletions(-) > > > diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c > index 245a34a..b054b28 100644 > --- a/fs/xfs/xfs_bmap_util.c > +++ b/fs/xfs/xfs_bmap_util.c > @@ -41,6 +41,7 @@ > #include "xfs_icache.h" > #include "xfs_log.h" > #include "xfs_rmap_btree.h" > +#include "xfs_reflink.h" > > /* Kernel only BMAP related definitions and functions */ > > @@ -1095,7 +1096,9 @@ xfs_zero_remaining_bytes( > xfs_buf_t *bp; > xfs_mount_t *mp = ip->i_mount; > int nimap; > - int error = 0; > + int error = 0, err2; > + bool should_fork; > + struct xfs_trans *tp; > > /* > * Avoid doing I/O beyond eof - it's not necessary > @@ -1136,8 +1139,14 @@ xfs_zero_remaining_bytes( > if (lastoffset > endoff) > lastoffset = endoff; > > + /* Do we need to CoW this block? */ > + error = xfs_reflink_should_fork_block(ip, &imap, offset, > + &should_fork); > + if (error) > + return error; > + > /* DAX can just zero the backing device directly */ > - if (IS_DAX(VFS_I(ip))) { > + if (IS_DAX(VFS_I(ip)) && !should_fork) { > error = dax_zero_page_range(VFS_I(ip), offset, > lastoffset - offset + 1, > xfs_get_blocks_direct); > @@ -1158,10 +1167,22 @@ xfs_zero_remaining_bytes( > (offset - XFS_FSB_TO_B(mp, imap.br_startoff)), > 0, lastoffset - offset + 1); > > + tp = NULL; > + if (should_fork) { > + error = xfs_reflink_fork_buf(ip, bp, offset_fsb, &tp); > + if (error) > + return error; > + } > + > error = xfs_bwrite(bp); > + > + err2 = xfs_reflink_finish_fork_buf(ip, bp, offset_fsb, tp, > + error, imap.br_startblock); > xfs_buf_relse(bp); > if (error) > return error; > + if (err2) > + return err2; > } > return error; > } > diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c > index 226e23f..f5eed2f 100644 > --- a/fs/xfs/xfs_reflink.c > +++ b/fs/xfs/xfs_reflink.c > @@ -788,3 +788,157 @@ xfs_reflink_add_ioend( > { > list_add_tail(&eio->rlei_list, &ioend->io_reflink_endio_list); > } > + > +/** > + * xfs_reflink_fork_buf() - start a transaction to fork a buffer (if needed) > + * > + * @mp: XFS mount point > + * @ip: XFS inode > + * @bp: the buffer that we might need to fork > + * @fileoff: file offset of the buffer > + * @ptp: pointer to an XFS transaction > + */ > +int > +xfs_reflink_fork_buf( > + struct xfs_inode *ip, > + struct xfs_buf *bp, > + xfs_fileoff_t fileoff, > + struct xfs_trans **ptp) > +{ > + struct xfs_mount *mp = ip->i_mount; > + struct xfs_trans *tp; > + xfs_fsblock_t fsbno; > + xfs_fsblock_t new_fsbno; > + xfs_agnumber_t agno; > + xfs_agblock_t agbno; > + uint resblks; > + int error; > + > + fsbno = XFS_DADDR_TO_FSB(mp, XFS_BUF_ADDR(bp)); > + agno = XFS_FSB_TO_AGNO(mp, fsbno); > + agbno = XFS_FSB_TO_AGBNO(mp, fsbno); > + CHECK_AG_NUMBER(mp, agno); > + CHECK_AG_EXTENT(mp, agno, 1); > + > + /* > + * Get ready to remap the thing... > + */ > + resblks = XFS_DIOSTRAT_SPACE_RES(mp, 3); > + tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE); > + error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, resblks, 0); > + > + /* > + * check for running out of space > + */ > + if (error) { > + /* > + * Free the transaction structure. > + */ > + ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp)); > + goto out_cancel; > + } > + error = xfs_trans_reserve_quota(tp, mp, > + ip->i_udquot, ip->i_gdquot, ip->i_pdquot, > + resblks, 0, XFS_QMOPT_RES_REGBLKS); > + if (error) > + goto out_cancel; > + > + xfs_ilock(ip, XFS_ILOCK_EXCL); > + xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); > + > + /* fork block, remap buffer */ > + error = fork_one_block(mp, tp, ip, fsbno, &new_fsbno, fileoff); > + if (error) > + goto out_cancel; > + > + trace_xfs_reflink_fork_buf(ip, fileoff, fsbno, 1, new_fsbno); > + > + XFS_BUF_SET_ADDR(bp, XFS_FSB_TO_DADDR(mp, new_fsbno)); > + *ptp = tp; > + return error; > + > +out_cancel: > + xfs_trans_cancel(tp); > + trace_xfs_reflink_fork_buf_error(ip, error, _RET_IP_); > + return error; > +} > + > +/** > + * xfs_reflink_finish_fork_buf() - finish forking a file buffer > + * > + * @ip: XFS inode > + * @bp: the buffer that was forked > + * @fileoff: file offset of the buffer > + * @tp: transaction that was returned from xfs_reflink_fork_buf() > + * @write_error: status code from writing the block > + */ > +int > +xfs_reflink_finish_fork_buf( > + struct xfs_inode *ip, > + struct xfs_buf *bp, > + xfs_fileoff_t fileoff, > + struct xfs_trans *tp, > + int write_error, > + xfs_fsblock_t old_fsbno) > +{ > + struct xfs_mount *mp = ip->i_mount; > + struct xfs_bmap_free free_list; > + xfs_fsblock_t firstfsb; > + xfs_fsblock_t fsbno; > + struct xfs_bmbt_irec imaps[1]; > + xfs_agnumber_t agno; > + int nimaps = 1; > + int done; > + int error = write_error; > + int committed; > + struct xfs_owner_info oinfo; > + > + if (tp == NULL) > + return 0; > + > + fsbno = XFS_DADDR_TO_FSB(mp, XFS_BUF_ADDR(bp)); > + agno = XFS_FSB_TO_AGNO(mp, fsbno); > + XFS_RMAP_INO_OWNER(&oinfo, ip->i_ino, XFS_DATA_FORK, fileoff); > + if (write_error != 0) > + goto out_write_error; > + > + trace_xfs_reflink_fork_buf(ip, fileoff, old_fsbno, 1, fsbno); > + /* > + * Remap the old blocks. > + */ > + xfs_bmap_init(&free_list, &firstfsb); > + error = xfs_bunmapi(tp, ip, fileoff, 1, 0, 1, &firstfsb, &free_list, > + &done); > + if (error) > + goto out_free; > + ASSERT(done == 1); > + > + error = xfs_bmapi_write(tp, ip, fileoff, 1, XFS_BMAPI_REMAP, &fsbno, > + 1, &imaps[0], &nimaps, &free_list); > + if (error) > + goto out_free; > + > + /* > + * complete the transaction > + */ > + error = xfs_bmap_finish(&tp, &free_list, &committed); > + if (error) > + goto out_cancel; > + > + error = xfs_trans_commit(tp); > + if (error) > + goto out_error; > + > + return error; > +out_free: > + xfs_bmap_finish(&tp, &free_list, &committed); > +out_write_error: > + done = xfs_free_extent(tp, fsbno, 1, &oinfo); > + if (error == 0) > + error = done; > +out_cancel: > + xfs_trans_cancel(tp); > +out_error: > + trace_xfs_reflink_finish_fork_buf_error(ip, error, _RET_IP_); > + return error; > +} > diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h > index b3e12d2..ce00cf6 100644 > --- a/fs/xfs/xfs_reflink.h > +++ b/fs/xfs/xfs_reflink.h > @@ -38,4 +38,10 @@ extern void xfs_reflink_add_ioend(struct xfs_ioend *ioend, > extern int xfs_reflink_should_fork_block(struct xfs_inode *ip, > struct xfs_bmbt_irec *imap, xfs_off_t offset, bool *type); > > +extern int xfs_reflink_fork_buf(struct xfs_inode *ip, struct xfs_buf *bp, > + xfs_fileoff_t fileoff, struct xfs_trans **ptp); > +extern int xfs_reflink_finish_fork_buf(struct xfs_inode *ip, struct xfs_buf *bp, > + xfs_fileoff_t fileoff, struct xfs_trans *tp, int write_error, > + xfs_fsblock_t old_fsbno); > + > #endif /* __XFS_REFLINK_H */ > > _______________________________________________ > xfs mailing list > xfs@xxxxxxxxxxx > http://oss.sgi.com/mailman/listinfo/xfs -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html