On Thu, Sep 29, 2016 at 08:07:56PM -0700, Darrick J. Wong wrote: > Teach the bmap routine to know how to map a range of file blocks to a > specific range of physical blocks, instead of simply allocating fresh > blocks. This enables reflink to map a file to blocks that are already > in use. > > Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx> > --- > fs/xfs/libxfs/xfs_bmap.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++ > fs/xfs/libxfs/xfs_bmap.h | 10 +++++++ > fs/xfs/xfs_trace.h | 54 +++++++++++++++++++++++++++++++++++++++ > 3 files changed, 126 insertions(+), 1 deletion(-) > > > diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c > index 907d7b8d..9f145ed 100644 > --- a/fs/xfs/libxfs/xfs_bmap.c > +++ b/fs/xfs/libxfs/xfs_bmap.c > @@ -3877,6 +3877,55 @@ xfs_bmap_btalloc( > } > > /* > + * For a remap operation, just "allocate" an extent at the address that the > + * caller passed in, and ensure that the AGFL is the right size. The caller > + * will then map the "allocated" extent into the file somewhere. > + */ > +STATIC int > +xfs_bmap_remap_alloc( > + struct xfs_bmalloca *ap) > +{ > + struct xfs_trans *tp = ap->tp; > + struct xfs_mount *mp = tp->t_mountp; > + xfs_agblock_t bno; > + struct xfs_alloc_arg args; > + int error; > + > + /* > + * validate that the block number is legal - the enables us to detect > + * and handle a silent filesystem corruption rather than crashing. > + */ > + memset(&args, 0, sizeof(struct xfs_alloc_arg)); > + args.tp = ap->tp; > + args.mp = ap->tp->t_mountp; > + bno = *ap->firstblock; > + args.agno = XFS_FSB_TO_AGNO(mp, bno); > + ASSERT(args.agno < mp->m_sb.sb_agcount); > + args.agbno = XFS_FSB_TO_AGBNO(mp, bno); > + ASSERT(args.agbno < mp->m_sb.sb_agblocks); > + > + /* "Allocate" the extent from the range we passed in. */ > + trace_xfs_bmap_remap_alloc(ap->ip, *ap->firstblock, ap->length); > + ap->blkno = bno; > + ap->ip->i_d.di_nblocks += ap->length; > + xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE); > + > + /* Fix the freelist, like a real allocator does. */ > + args.datatype = ap->datatype; > + args.pag = xfs_perag_get(args.mp, args.agno); > + ASSERT(args.pag); > + > + error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING); Why the FREEING flag? > + if (error) > + goto error0; > +error0: > + xfs_perag_put(args.pag); > + if (error) > + trace_xfs_bmap_remap_alloc_error(ap->ip, error, _RET_IP_); > + return error; > +} > + > +/* > * xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file. > * It figures out where to ask the underlying allocator to put the new extent. > */ > @@ -3884,6 +3933,8 @@ STATIC int > xfs_bmap_alloc( > struct xfs_bmalloca *ap) /* bmap alloc argument struct */ > { > + if (ap->flags & XFS_BMAPI_REMAP) > + return xfs_bmap_remap_alloc(ap); > if (XFS_IS_REALTIME_INODE(ap->ip) && > xfs_alloc_is_userdata(ap->datatype)) > return xfs_bmap_rtalloc(ap); > @@ -4442,6 +4493,12 @@ xfs_bmapi_write( > ASSERT(len > 0); > ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL); > ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); > + if (whichfork == XFS_ATTR_FORK) > + ASSERT(!(flags & XFS_BMAPI_REMAP)); I think it's better to avoid conditionals if the only affected code consists of ASSERT() statements (which can be compiled out). E.g., ASSERT(!((flags & XFS_BMAPI_REMAP) && whichfork == XFS_ATTR_FORK)); ... and so on, but not a big deal. Brian > + if (flags & XFS_BMAPI_REMAP) { > + ASSERT(!(flags & XFS_BMAPI_PREALLOC)); > + ASSERT(!(flags & XFS_BMAPI_CONVERT)); > + } > > /* zeroing is for currently only for data extents, not metadata */ > ASSERT((flags & (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO)) != > @@ -4503,6 +4560,12 @@ xfs_bmapi_write( > wasdelay = !inhole && isnullstartblock(bma.got.br_startblock); > > /* > + * Make sure we only reflink into a hole. > + */ > + if (flags & XFS_BMAPI_REMAP) > + ASSERT(inhole); > + > + /* > * First, deal with the hole before the allocated space > * that we found, if any. > */ > diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h > index fcdb094..877b6f9 100644 > --- a/fs/xfs/libxfs/xfs_bmap.h > +++ b/fs/xfs/libxfs/xfs_bmap.h > @@ -97,6 +97,13 @@ struct xfs_extent_free_item > */ > #define XFS_BMAPI_ZERO 0x080 > > +/* > + * Map the inode offset to the block given in ap->firstblock. Primarily > + * used for reflink. The range must be in a hole, and this flag cannot be > + * turned on with PREALLOC or CONVERT, and cannot be used on the attr fork. > + */ > +#define XFS_BMAPI_REMAP 0x100 > + > #define XFS_BMAPI_FLAGS \ > { XFS_BMAPI_ENTIRE, "ENTIRE" }, \ > { XFS_BMAPI_METADATA, "METADATA" }, \ > @@ -105,7 +112,8 @@ struct xfs_extent_free_item > { XFS_BMAPI_IGSTATE, "IGSTATE" }, \ > { XFS_BMAPI_CONTIG, "CONTIG" }, \ > { XFS_BMAPI_CONVERT, "CONVERT" }, \ > - { XFS_BMAPI_ZERO, "ZERO" } > + { XFS_BMAPI_ZERO, "ZERO" }, \ > + { XFS_BMAPI_REMAP, "REMAP" } > > > static inline int xfs_bmapi_aflag(int w) > diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h > index 195a168..8485984 100644 > --- a/fs/xfs/xfs_trace.h > +++ b/fs/xfs/xfs_trace.h > @@ -2965,6 +2965,60 @@ TRACE_EVENT(xfs_refcount_finish_one_leftover, > __entry->adjusted) > ); > > +/* simple inode-based error/%ip tracepoint class */ > +DECLARE_EVENT_CLASS(xfs_inode_error_class, > + TP_PROTO(struct xfs_inode *ip, int error, unsigned long caller_ip), > + TP_ARGS(ip, error, caller_ip), > + TP_STRUCT__entry( > + __field(dev_t, dev) > + __field(xfs_ino_t, ino) > + __field(int, error) > + __field(unsigned long, caller_ip) > + ), > + TP_fast_assign( > + __entry->dev = VFS_I(ip)->i_sb->s_dev; > + __entry->ino = ip->i_ino; > + __entry->error = error; > + __entry->caller_ip = caller_ip; > + ), > + TP_printk("dev %d:%d ino %llx error %d caller %ps", > + MAJOR(__entry->dev), MINOR(__entry->dev), > + __entry->ino, > + __entry->error, > + (char *)__entry->caller_ip) > +); > + > +#define DEFINE_INODE_ERROR_EVENT(name) \ > +DEFINE_EVENT(xfs_inode_error_class, name, \ > + TP_PROTO(struct xfs_inode *ip, int error, \ > + unsigned long caller_ip), \ > + TP_ARGS(ip, error, caller_ip)) > + > +/* reflink allocator */ > +TRACE_EVENT(xfs_bmap_remap_alloc, > + TP_PROTO(struct xfs_inode *ip, xfs_fsblock_t fsbno, > + xfs_extlen_t len), > + TP_ARGS(ip, fsbno, len), > + TP_STRUCT__entry( > + __field(dev_t, dev) > + __field(xfs_ino_t, ino) > + __field(xfs_fsblock_t, fsbno) > + __field(xfs_extlen_t, len) > + ), > + TP_fast_assign( > + __entry->dev = VFS_I(ip)->i_sb->s_dev; > + __entry->ino = ip->i_ino; > + __entry->fsbno = fsbno; > + __entry->len = len; > + ), > + TP_printk("dev %d:%d ino 0x%llx fsbno 0x%llx len %x", > + MAJOR(__entry->dev), MINOR(__entry->dev), > + __entry->ino, > + __entry->fsbno, > + __entry->len) > +); > +DEFINE_INODE_ERROR_EVENT(xfs_bmap_remap_alloc_error); > + > #endif /* _TRACE_XFS_H */ > > #undef TRACE_INCLUDE_PATH > > -- > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-xfs" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html