Re: [PATCH 21/63] xfs: map an inode's offset to an exact physical block

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, Oct 03, 2016 at 05:11:19PM -0700, Darrick J. Wong wrote:
> On Mon, Oct 03, 2016 at 03:03:49PM -0400, Brian Foster wrote:
> > On Thu, Sep 29, 2016 at 08:07:56PM -0700, Darrick J. Wong wrote:
> > > Teach the bmap routine to know how to map a range of file blocks to a
> > > specific range of physical blocks, instead of simply allocating fresh
> > > blocks.  This enables reflink to map a file to blocks that are already
> > > in use.
> > > 
> > > Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx>
> > > ---
> > >  fs/xfs/libxfs/xfs_bmap.c |   63 ++++++++++++++++++++++++++++++++++++++++++++++
> > >  fs/xfs/libxfs/xfs_bmap.h |   10 +++++++
> > >  fs/xfs/xfs_trace.h       |   54 +++++++++++++++++++++++++++++++++++++++
> > >  3 files changed, 126 insertions(+), 1 deletion(-)
> > > 
> > > 
> > > diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
> > > index 907d7b8d..9f145ed 100644
> > > --- a/fs/xfs/libxfs/xfs_bmap.c
> > > +++ b/fs/xfs/libxfs/xfs_bmap.c
> > > @@ -3877,6 +3877,55 @@ xfs_bmap_btalloc(
> > >  }
> > >  
> > >  /*
> > > + * For a remap operation, just "allocate" an extent at the address that the
> > > + * caller passed in, and ensure that the AGFL is the right size.  The caller
> > > + * will then map the "allocated" extent into the file somewhere.
> > > + */
> > > +STATIC int
> > > +xfs_bmap_remap_alloc(
> > > +	struct xfs_bmalloca	*ap)
> > > +{
> > > +	struct xfs_trans	*tp = ap->tp;
> > > +	struct xfs_mount	*mp = tp->t_mountp;
> > > +	xfs_agblock_t		bno;
> > > +	struct xfs_alloc_arg	args;
> > > +	int			error;
> > > +
> > > +	/*
> > > +	 * validate that the block number is legal - the enables us to detect
> > > +	 * and handle a silent filesystem corruption rather than crashing.
> > > +	 */
> > > +	memset(&args, 0, sizeof(struct xfs_alloc_arg));
> > > +	args.tp = ap->tp;
> > > +	args.mp = ap->tp->t_mountp;
> > > +	bno = *ap->firstblock;
> > > +	args.agno = XFS_FSB_TO_AGNO(mp, bno);
> > > +	ASSERT(args.agno < mp->m_sb.sb_agcount);
> > > +	args.agbno = XFS_FSB_TO_AGBNO(mp, bno);
> > > +	ASSERT(args.agbno < mp->m_sb.sb_agblocks);
> > > +
> > > +	/* "Allocate" the extent from the range we passed in. */
> > > +	trace_xfs_bmap_remap_alloc(ap->ip, *ap->firstblock, ap->length);
> > > +	ap->blkno = bno;
> > > +	ap->ip->i_d.di_nblocks += ap->length;
> > > +	xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
> > > +
> > > +	/* Fix the freelist, like a real allocator does. */
> > > +	args.datatype = ap->datatype;
> > > +	args.pag = xfs_perag_get(args.mp, args.agno);
> > > +	ASSERT(args.pag);
> > > +
> > > +	error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING);
> > 
> > Why the FREEING flag? 
> 
> /*
>  * The freelist fixing code will decline the allocation if
>  * the size and shape of the free space doesn't allow for
>  * allocating the extent and updating all the metadata that
>  * happens during an allocation.  We're remapping, not
>  * allocating, so skip that check by pretending to be freeing.
>  */
> 

Thanks. This also can bypass fixing up the AG freelist. I suppose that
won't matter since we aren't going to update either allocbt, but are we
safe from the rmapbt perspective as well?

Brian

> > > +	if (error)
> > > +		goto error0;
> > > +error0:
> > > +	xfs_perag_put(args.pag);
> > > +	if (error)
> > > +		trace_xfs_bmap_remap_alloc_error(ap->ip, error, _RET_IP_);
> > > +	return error;
> > > +}
> > > +
> > > +/*
> > >   * xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file.
> > >   * It figures out where to ask the underlying allocator to put the new extent.
> > >   */
> > > @@ -3884,6 +3933,8 @@ STATIC int
> > >  xfs_bmap_alloc(
> > >  	struct xfs_bmalloca	*ap)	/* bmap alloc argument struct */
> > >  {
> > > +	if (ap->flags & XFS_BMAPI_REMAP)
> > > +		return xfs_bmap_remap_alloc(ap);
> > >  	if (XFS_IS_REALTIME_INODE(ap->ip) &&
> > >  	    xfs_alloc_is_userdata(ap->datatype))
> > >  		return xfs_bmap_rtalloc(ap);
> > > @@ -4442,6 +4493,12 @@ xfs_bmapi_write(
> > >  	ASSERT(len > 0);
> > >  	ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL);
> > >  	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
> > > +	if (whichfork == XFS_ATTR_FORK)
> > > +		ASSERT(!(flags & XFS_BMAPI_REMAP));
> > 
> > I think it's better to avoid conditionals if the only affected code
> > consists of ASSERT() statements (which can be compiled out). E.g., 
> > 
> > 	ASSERT(!((flags & XFS_BMAPI_REMAP) && whichfork == XFS_ATTR_FORK));
> > 
> > ... and so on, but not a big deal.
> 
> <nod> I might as well fix that up too...
> 
> --D
> 
> > Brian
> > 
> > > +	if (flags & XFS_BMAPI_REMAP) {
> > > +		ASSERT(!(flags & XFS_BMAPI_PREALLOC));
> > > +		ASSERT(!(flags & XFS_BMAPI_CONVERT));
> > > +	}
> > >  
> > >  	/* zeroing is for currently only for data extents, not metadata */
> > >  	ASSERT((flags & (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO)) !=
> > > @@ -4503,6 +4560,12 @@ xfs_bmapi_write(
> > >  		wasdelay = !inhole && isnullstartblock(bma.got.br_startblock);
> > >  
> > >  		/*
> > > +		 * Make sure we only reflink into a hole.
> > > +		 */
> > > +		if (flags & XFS_BMAPI_REMAP)
> > > +			ASSERT(inhole);
> > > +
> > > +		/*
> > >  		 * First, deal with the hole before the allocated space
> > >  		 * that we found, if any.
> > >  		 */
> > > diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
> > > index fcdb094..877b6f9 100644
> > > --- a/fs/xfs/libxfs/xfs_bmap.h
> > > +++ b/fs/xfs/libxfs/xfs_bmap.h
> > > @@ -97,6 +97,13 @@ struct xfs_extent_free_item
> > >   */
> > >  #define XFS_BMAPI_ZERO		0x080
> > >  
> > > +/*
> > > + * Map the inode offset to the block given in ap->firstblock.  Primarily
> > > + * used for reflink.  The range must be in a hole, and this flag cannot be
> > > + * turned on with PREALLOC or CONVERT, and cannot be used on the attr fork.
> > > + */
> > > +#define XFS_BMAPI_REMAP		0x100
> > > +
> > >  #define XFS_BMAPI_FLAGS \
> > >  	{ XFS_BMAPI_ENTIRE,	"ENTIRE" }, \
> > >  	{ XFS_BMAPI_METADATA,	"METADATA" }, \
> > > @@ -105,7 +112,8 @@ struct xfs_extent_free_item
> > >  	{ XFS_BMAPI_IGSTATE,	"IGSTATE" }, \
> > >  	{ XFS_BMAPI_CONTIG,	"CONTIG" }, \
> > >  	{ XFS_BMAPI_CONVERT,	"CONVERT" }, \
> > > -	{ XFS_BMAPI_ZERO,	"ZERO" }
> > > +	{ XFS_BMAPI_ZERO,	"ZERO" }, \
> > > +	{ XFS_BMAPI_REMAP,	"REMAP" }
> > >  
> > >  
> > >  static inline int xfs_bmapi_aflag(int w)
> > > diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
> > > index 195a168..8485984 100644
> > > --- a/fs/xfs/xfs_trace.h
> > > +++ b/fs/xfs/xfs_trace.h
> > > @@ -2965,6 +2965,60 @@ TRACE_EVENT(xfs_refcount_finish_one_leftover,
> > >  		  __entry->adjusted)
> > >  );
> > >  
> > > +/* simple inode-based error/%ip tracepoint class */
> > > +DECLARE_EVENT_CLASS(xfs_inode_error_class,
> > > +	TP_PROTO(struct xfs_inode *ip, int error, unsigned long caller_ip),
> > > +	TP_ARGS(ip, error, caller_ip),
> > > +	TP_STRUCT__entry(
> > > +		__field(dev_t, dev)
> > > +		__field(xfs_ino_t, ino)
> > > +		__field(int, error)
> > > +		__field(unsigned long, caller_ip)
> > > +	),
> > > +	TP_fast_assign(
> > > +		__entry->dev = VFS_I(ip)->i_sb->s_dev;
> > > +		__entry->ino = ip->i_ino;
> > > +		__entry->error = error;
> > > +		__entry->caller_ip = caller_ip;
> > > +	),
> > > +	TP_printk("dev %d:%d ino %llx error %d caller %ps",
> > > +		  MAJOR(__entry->dev), MINOR(__entry->dev),
> > > +		  __entry->ino,
> > > +		  __entry->error,
> > > +		  (char *)__entry->caller_ip)
> > > +);
> > > +
> > > +#define DEFINE_INODE_ERROR_EVENT(name) \
> > > +DEFINE_EVENT(xfs_inode_error_class, name, \
> > > +	TP_PROTO(struct xfs_inode *ip, int error, \
> > > +		 unsigned long caller_ip), \
> > > +	TP_ARGS(ip, error, caller_ip))
> > > +
> > > +/* reflink allocator */
> > > +TRACE_EVENT(xfs_bmap_remap_alloc,
> > > +	TP_PROTO(struct xfs_inode *ip, xfs_fsblock_t fsbno,
> > > +		 xfs_extlen_t len),
> > > +	TP_ARGS(ip, fsbno, len),
> > > +	TP_STRUCT__entry(
> > > +		__field(dev_t, dev)
> > > +		__field(xfs_ino_t, ino)
> > > +		__field(xfs_fsblock_t, fsbno)
> > > +		__field(xfs_extlen_t, len)
> > > +	),
> > > +	TP_fast_assign(
> > > +		__entry->dev = VFS_I(ip)->i_sb->s_dev;
> > > +		__entry->ino = ip->i_ino;
> > > +		__entry->fsbno = fsbno;
> > > +		__entry->len = len;
> > > +	),
> > > +	TP_printk("dev %d:%d ino 0x%llx fsbno 0x%llx len %x",
> > > +		  MAJOR(__entry->dev), MINOR(__entry->dev),
> > > +		  __entry->ino,
> > > +		  __entry->fsbno,
> > > +		  __entry->len)
> > > +);
> > > +DEFINE_INODE_ERROR_EVENT(xfs_bmap_remap_alloc_error);
> > > +
> > >  #endif /* _TRACE_XFS_H */
> > >  
> > >  #undef TRACE_INCLUDE_PATH
> > > 
> > > --
> > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > > the body of a message to majordomo@xxxxxxxxxxxxxxx
> > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [XFS Filesystem Development (older mail)]     [Linux Filesystem Development]     [Linux Audio Users]     [Yosemite Trails]     [Linux Kernel]     [Linux RAID]     [Linux SCSI]


  Powered by Linux