Re: [PATCH 04/10] xfs: convert bulkstat to new iwalk infrastructure

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, Jun 10, 2019 at 10:02:29AM -0400, Brian Foster wrote:
> On Tue, Jun 04, 2019 at 02:49:53PM -0700, Darrick J. Wong wrote:
> > From: Darrick J. Wong <darrick.wong@xxxxxxxxxx>
> > 
> > Create a new ibulk structure incore to help us deal with bulk inode stat
> > state tracking and then convert the bulkstat code to use the new iwalk
> > iterator.  This disentangles inode walking from bulk stat control for
> > simpler code and enables us to isolate the formatter functions to the
> > ioctl handling code.
> > 
> > Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx>
> > ---
> >  fs/xfs/xfs_ioctl.c   |   65 ++++++--
> >  fs/xfs/xfs_ioctl.h   |    5 +
> >  fs/xfs/xfs_ioctl32.c |   88 +++++------
> >  fs/xfs/xfs_itable.c  |  407 ++++++++++++++------------------------------------
> >  fs/xfs/xfs_itable.h  |   79 ++++------
> >  5 files changed, 245 insertions(+), 399 deletions(-)
> > 
> > 
> > diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
> > index 5ffbdcff3dba..43734901aeb9 100644
> > --- a/fs/xfs/xfs_ioctl.c
> > +++ b/fs/xfs/xfs_ioctl.c
> ...
> > @@ -745,35 +757,54 @@ xfs_ioc_bulkstat(
> >  	if (copy_from_user(&bulkreq, arg, sizeof(xfs_fsop_bulkreq_t)))
> >  		return -EFAULT;
> >  
> > -	if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64)))
> > +	if (copy_from_user(&lastino, bulkreq.lastip, sizeof(__s64)))
> >  		return -EFAULT;
> >  
> > -	if ((count = bulkreq.icount) <= 0)
> > +	if (bulkreq.icount <= 0)
> >  		return -EINVAL;
> >  
> >  	if (bulkreq.ubuffer == NULL)
> >  		return -EINVAL;
> >  
> > -	if (cmd == XFS_IOC_FSINUMBERS)
> > -		error = xfs_inumbers(mp, &inlast, &count,
> > +	breq.ubuffer = bulkreq.ubuffer;
> > +	breq.icount = bulkreq.icount;
> > +
> > +	/*
> > +	 * FSBULKSTAT_SINGLE expects that *lastip contains the inode number
> > +	 * that we want to stat.  However, FSINUMBERS and FSBULKSTAT expect
> > +	 * that *lastip contains either zero or the number of the last inode to
> > +	 * be examined by the previous call and return results starting with
> > +	 * the next inode after that.  The new bulk request functions take the
> > +	 * inode to start with, so we have to adjust the lastino/startino
> > +	 * parameter to maintain correct function.
> > +	 */
> 
> It's kind of difficult to tell what's new or old when just looking at
> the code. The comment suggests FSINUMBERS and FSBULKSTAT use the same
> interface wrt to lastip, but they aren't handled the same below. I take
> it this is because xfs_inumbers() still has the same behavior whereas
> xfs_bulkstat() has been changed to operate based on breq rather than
> lastip..?

Yes.  By the end of the series we'll have converted FSINUMBERS too, but
for the ~5 or so patches until we get there, the only way to tell new
vs. old interface is whether it takes breq or pointers to stuff in breq.

> > +	if (cmd == XFS_IOC_FSINUMBERS) {
> > +		int	count = breq.icount;
> > +
> > +		breq.startino = lastino;
> > +		error = xfs_inumbers(mp, &breq.startino, &count,
> >  					bulkreq.ubuffer, xfs_inumbers_fmt);
> > -	else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE)
> > -		error = xfs_bulkstat_one(mp, inlast, bulkreq.ubuffer,
> > -					sizeof(xfs_bstat_t), NULL, &done);
> > -	else	/* XFS_IOC_FSBULKSTAT */
> > -		error = xfs_bulkstat(mp, &inlast, &count, xfs_bulkstat_one,
> > -				     sizeof(xfs_bstat_t), bulkreq.ubuffer,
> > -				     &done);
> > +		breq.ocount = count;
> > +		lastino = breq.startino;
> > +	} else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE) {
> > +		breq.startino = lastino;
> > +		error = xfs_bulkstat_one(&breq, xfs_bulkstat_one_fmt);
> > +		lastino = breq.startino;
> > +	} else {	/* XFS_IOC_FSBULKSTAT */
> > +		breq.startino = lastino + 1;
> > +		error = xfs_bulkstat(&breq, xfs_bulkstat_one_fmt);
> > +		lastino = breq.startino - 1;
> > +	}
> >  
> ...
> > diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
> > index 814ffe6fbab7..add15819daf3 100644
> > --- a/fs/xfs/xfs_ioctl32.c
> > +++ b/fs/xfs/xfs_ioctl32.c
> ...
> > @@ -284,38 +263,57 @@ xfs_compat_ioc_bulkstat(
> >  		return -EFAULT;
> >  	bulkreq.ocount = compat_ptr(addr);
> >  
> > -	if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64)))
> > +	if (copy_from_user(&lastino, bulkreq.lastip, sizeof(__s64)))
> >  		return -EFAULT;
> > +	breq.startino = lastino + 1;
> >  
> > -	if ((count = bulkreq.icount) <= 0)
> > +	if (bulkreq.icount <= 0)
> >  		return -EINVAL;
> >  
> >  	if (bulkreq.ubuffer == NULL)
> >  		return -EINVAL;
> >  
> > +	breq.ubuffer = bulkreq.ubuffer;
> > +	breq.icount = bulkreq.icount;
> > +
> > +	/*
> > +	 * FSBULKSTAT_SINGLE expects that *lastip contains the inode number
> > +	 * that we want to stat.  However, FSINUMBERS and FSBULKSTAT expect
> > +	 * that *lastip contains either zero or the number of the last inode to
> > +	 * be examined by the previous call and return results starting with
> > +	 * the next inode after that.  The new bulk request functions take the
> > +	 * inode to start with, so we have to adjust the lastino/startino
> > +	 * parameter to maintain correct function.
> > +	 */
> 
> (Same comment here.)
> 
> >  	if (cmd == XFS_IOC_FSINUMBERS_32) {
> > -		error = xfs_inumbers(mp, &inlast, &count,
> > +		int	count = breq.icount;
> > +
> > +		breq.startino = lastino;
> > +		error = xfs_inumbers(mp, &breq.startino, &count,
> >  				bulkreq.ubuffer, inumbers_func);
> > +		breq.ocount = count;
> > +		lastino = breq.startino;
> >  	} else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE_32) {
> > -		int res;
> > -
> > -		error = bs_one_func(mp, inlast, bulkreq.ubuffer,
> > -				bs_one_size, NULL, &res);
> > +		breq.startino = lastino;
> > +		error = xfs_bulkstat_one(&breq, bs_one_func);
> > +		lastino = breq.startino;
> >  	} else if (cmd == XFS_IOC_FSBULKSTAT_32) {
> > -		error = xfs_bulkstat(mp, &inlast, &count,
> > -			bs_one_func, bs_one_size,
> > -			bulkreq.ubuffer, &done);
> > -	} else
> > +		breq.startino = lastino + 1;
> > +		error = xfs_bulkstat(&breq, bs_one_func);
> > +		lastino = breq.startino - 1;
> > +	} else {
> >  		error = -EINVAL;
> > +	}
> >  	if (error)
> >  		return error;
> >  
> > +	lastino = breq.startino - 1;
> 
> Should this be here?

Nope.

> >  	if (bulkreq.lastip != NULL &&
> > -	    copy_to_user(bulkreq.lastip, &inlast, sizeof(xfs_ino_t)))
> > +	    copy_to_user(bulkreq.lastip, &lastino, sizeof(xfs_ino_t)))
> >  		return -EFAULT;
> >  
> >  	if (bulkreq.ocount != NULL &&
> > -	    copy_to_user(bulkreq.ocount, &count, sizeof(count)))
> > +	    copy_to_user(bulkreq.ocount, &breq.ocount, sizeof(__s32)))
> >  		return -EFAULT;
> >  
> >  	return 0;
> > diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
> > index 3ca1c454afe6..87c597ea1df7 100644
> > --- a/fs/xfs/xfs_itable.c
> > +++ b/fs/xfs/xfs_itable.c
> > @@ -22,37 +22,63 @@
> >  #include "xfs_iwalk.h"
> >  
> >  /*
> > - * Return stat information for one inode.
> > - * Return 0 if ok, else errno.
> > + * Bulk Stat
> > + * =========
> > + *
> > + * Use the inode walking functions to fill out struct xfs_bstat for every
> > + * allocated inode, then pass the stat information to some externally provided
> > + * iteration function.
> >   */
> > -int
> > +
> > +struct xfs_bstat_chunk {
> > +	bulkstat_one_fmt_pf	formatter;
> > +	struct xfs_ibulk	*breq;
> > +};
> > +
> > +/*
> > + * Fill out the bulkstat info for a single inode and report it somewhere.
> > + *
> > + * bc->breq->lastino is effectively the inode cursor as we walk through the
> > + * filesystem.  Therefore, we update it any time we need to move the cursor
> > + * forward, regardless of whether or not we're sending any bstat information
> > + * back to userspace.  If the inode is internal metadata or, has been freed
> > + * out from under us, we just simply keep going.
> > + *
> > + * However, if any other type of error happens we want to stop right where we
> > + * are so that userspace will call back with exact number of the bad inode and
> > + * we can send back an error code.
> > + *
> > + * Note that if the formatter tells us there's no space left in the buffer we
> > + * move the cursor forward and abort the walk.
> > + */
> > +STATIC int
> >  xfs_bulkstat_one_int(
> > -	struct xfs_mount	*mp,		/* mount point for filesystem */
> > -	xfs_ino_t		ino,		/* inode to get data for */
> > -	void __user		*buffer,	/* buffer to place output in */
> > -	int			ubsize,		/* size of buffer */
> > -	bulkstat_one_fmt_pf	formatter,	/* formatter, copy to user */
> > -	int			*ubused,	/* bytes used by me */
> > -	int			*stat)		/* BULKSTAT_RV_... */
> > +	struct xfs_mount	*mp,
> > +	struct xfs_trans	*tp,
> > +	xfs_ino_t		ino,
> > +	void			*data)
> >  {
> 
> Any reason this function takes an 'ino' param considering it's sourced
> from breq->startino and we bump that value from within this function?
> The latter seems slightly misplaced to me since it doesn't appear to
> control the iteration.
>
> It also looks like we bump startino in almost all cases. Exceptions are
> memory allocation failure of the buffer and formatter error. Hmm.. could
> we lift the buffer to the bc and reuse it to avoid that error entirely
> (along with repeated allocs/frees)? From there, perhaps we could lift
> the ->startino update to the callers based on something like error !=
> -EFAULT..? (Alternatively, the caller could update it first and then
> walk it back if error == -EFAULT).

xfs_iwalk doesn't know anything about the xfs_bstat_chunk or the
xfs_ibulk that are being passed to the iterator function via the void
*data parameter.  iwalk is a generic iterator which shouldn't ever know
about the bulkreq interface.

I plan to reuse the xfs_iwalk code for other parts of online repair in
the future, so that's why bulkstat_one takes the inode number that
_iwalk gives it, and then uses that to mess around with bc and breq.

I also sort of prefer to keep the startino update the way it is now
because I only want to push it forward for the ~4 cases where we do now
(internal, invalid number, past eofs, or successfully statted).
Any other runtime error leaves the cursor where it was.

> > +	struct xfs_bstat_chunk	*bc = data;
> >  	struct xfs_icdinode	*dic;		/* dinode core info pointer */
> >  	struct xfs_inode	*ip;		/* incore inode pointer */
> >  	struct inode		*inode;
> >  	struct xfs_bstat	*buf;		/* return buffer */
> >  	int			error = 0;	/* error value */
> >  
> > -	*stat = BULKSTAT_RV_NOTHING;
> > -
> > -	if (!buffer || xfs_internal_inum(mp, ino))
> > +	if (xfs_internal_inum(mp, ino)) {
> > +		bc->breq->startino = ino + 1;
> >  		return -EINVAL;
> > +	}
> >  
> >  	buf = kmem_zalloc(sizeof(*buf), KM_SLEEP | KM_MAYFAIL);
> >  	if (!buf)
> >  		return -ENOMEM;
> >  
> > -	error = xfs_iget(mp, NULL, ino,
> > +	error = xfs_iget(mp, tp, ino,
> >  			 (XFS_IGET_DONTCACHE | XFS_IGET_UNTRUSTED),
> >  			 XFS_ILOCK_SHARED, &ip);
> > +	if (error == -ENOENT || error == -EINVAL)
> > +		bc->breq->startino = ino + 1;
> >  	if (error)
> >  		goto out_free;
> >  
> > @@ -119,43 +145,45 @@ xfs_bulkstat_one_int(
> >  	xfs_iunlock(ip, XFS_ILOCK_SHARED);
> >  	xfs_irele(ip);
> >  
> > -	error = formatter(buffer, ubsize, ubused, buf);
> > -	if (!error)
> > -		*stat = BULKSTAT_RV_DIDONE;
> > -
> > - out_free:
> > +	error = bc->formatter(bc->breq, buf);
> > +	switch (error) {
> > +	case XFS_IBULK_BUFFER_FULL:
> > +		error = XFS_IWALK_ABORT;
> > +		/* fall through */
> > +	case 0:
> > +		bc->breq->startino = ino + 1;
> > +		break;
> > +	}
> > +out_free:
> >  	kmem_free(buf);
> >  	return error;
> >  }
> >  
> > -/* Return 0 on success or positive error */
> > -STATIC int
> > -xfs_bulkstat_one_fmt(
> > -	void			__user *ubuffer,
> > -	int			ubsize,
> > -	int			*ubused,
> > -	const xfs_bstat_t	*buffer)
> > -{
> > -	if (ubsize < sizeof(*buffer))
> > -		return -ENOMEM;
> > -	if (copy_to_user(ubuffer, buffer, sizeof(*buffer)))
> > -		return -EFAULT;
> > -	if (ubused)
> > -		*ubused = sizeof(*buffer);
> > -	return 0;
> > -}
> > -
> > +/* Bulkstat a single inode. */
> >  int
> >  xfs_bulkstat_one(
> > -	xfs_mount_t	*mp,		/* mount point for filesystem */
> > -	xfs_ino_t	ino,		/* inode number to get data for */
> > -	void		__user *buffer,	/* buffer to place output in */
> > -	int		ubsize,		/* size of buffer */
> > -	int		*ubused,	/* bytes used by me */
> > -	int		*stat)		/* BULKSTAT_RV_... */
> > +	struct xfs_ibulk	*breq,
> > +	bulkstat_one_fmt_pf	formatter)
> >  {
> > -	return xfs_bulkstat_one_int(mp, ino, buffer, ubsize,
> > -				    xfs_bulkstat_one_fmt, ubused, stat);
> > +	struct xfs_bstat_chunk	bc = {
> > +		.formatter	= formatter,
> > +		.breq		= breq,
> > +	};
> > +	int			error;
> > +
> > +	breq->icount = 1;
> > +	breq->ocount = 0;
> > +
> 
> So ->icount is already set by the caller based on user input. I'd guess
> this is set here to guarantee a single cycle in the event that the user
> provided value is >1, but that seems unnecessary since we're calling the
> internal helper to handle a single inode.
> 
> If we want to set ->icount for whatever reason, can we do it in the
> caller where it's more obvious? That also shows that the ->ocount init
> is unnecessary since the whole structure is initialized in the caller.

Ok.

> > +	error = xfs_bulkstat_one_int(breq->mp, NULL, breq->startino, &bc);
> > +
> > +	/*
> > +	 * If we reported one inode to userspace then we abort because we hit
> > +	 * the end of the buffer.  Don't leak that back to userspace.
> > +	 */
> > +	if (error == XFS_IWALK_ABORT)
> > +		error = 0;
> > +
> > +	return error;
> >  }
> >  
> >  /*
> ...
> > diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h
> > index 369e3f159d4e..366d391eb11f 100644
> > --- a/fs/xfs/xfs_itable.h
> > +++ b/fs/xfs/xfs_itable.h
> > @@ -5,63 +5,46 @@
> >  #ifndef __XFS_ITABLE_H__
> >  #define	__XFS_ITABLE_H__
> >  
> > -/*
> > - * xfs_bulkstat() is used to fill in xfs_bstat structures as well as dm_stat
> > - * structures (by the dmi library). This is a pointer to a formatter function
> > - * that will iget the inode and fill in the appropriate structure.
> > - * see xfs_bulkstat_one() and xfs_dm_bulkstat_one() in dmapi_xfs.c
> > - */
> > -typedef int (*bulkstat_one_pf)(struct xfs_mount	*mp,
> > -			       xfs_ino_t	ino,
> > -			       void		__user *buffer,
> > -			       int		ubsize,
> > -			       int		*ubused,
> > -			       int		*stat);
> > +/* In-memory representation of a userspace request for batch inode data. */
> > +struct xfs_ibulk {
> > +	struct xfs_mount	*mp;
> > +	void __user		*ubuffer; /* user output buffer */
> > +	xfs_ino_t		startino; /* start with this inode */
> > +	unsigned int		icount;   /* number of elements in ubuffer */
> > +	unsigned int		ocount;   /* number of records returned */
> > +};
> > +
> > +/* Return value that means we want to abort the walk. */
> > +#define XFS_IBULK_ABORT		(XFS_IWALK_ABORT)
> > +
> > +/* Return value that means the formatting buffer is now full. */
> > +#define XFS_IBULK_BUFFER_FULL	(2)
> >  
> 
> It might be wise to define this such that it's guaranteed not to be the
> same as the abort value, since that is defined externally to this
> header. IBULK_ABORT + 1 perhaps?

FWIW I'm of half a mind to establish a generic "abort walk" error code
and key all of these iterator functions to use it instead of having all
these #define XFS_FOO_ABORT	1 things everywhere.

I'll do #define XFS_IBULK_BUFFER_FULL IBULK_ABORT+1 here.

> Brian
> 
> >  /*
> > - * Values for stat return value.
> > + * Advance the user buffer pointer by one record of the given size.  If the
> > + * buffer is now full, return the appropriate error code.
> >   */
> > -#define BULKSTAT_RV_NOTHING	0
> > -#define BULKSTAT_RV_DIDONE	1
> > -#define BULKSTAT_RV_GIVEUP	2
> > +static inline int
> > +xfs_ibulk_advance(
> > +	struct xfs_ibulk	*breq,
> > +	size_t			bytes)
> > +{
> > +	char __user		*b = breq->ubuffer;
> > +
> > +	breq->ubuffer = b + bytes;
> > +	breq->ocount++;
> > +	return breq->ocount == breq->icount ? XFS_IBULK_BUFFER_FULL : 0;
> > +}
> >  
> >  /*
> >   * Return stat information in bulk (by-inode) for the filesystem.
> >   */
> > -int					/* error status */
> > -xfs_bulkstat(
> > -	xfs_mount_t	*mp,		/* mount point for filesystem */
> > -	xfs_ino_t	*lastino,	/* last inode returned */
> > -	int		*count,		/* size of buffer/count returned */
> > -	bulkstat_one_pf formatter,	/* func that'd fill a single buf */
> > -	size_t		statstruct_size,/* sizeof struct that we're filling */
> > -	char		__user *ubuffer,/* buffer with inode stats */
> > -	int		*done);		/* 1 if there are more stats to get */
> >  
> > -typedef int (*bulkstat_one_fmt_pf)(  /* used size in bytes or negative error */
> > -	void			__user *ubuffer, /* buffer to write to */
> > -	int			ubsize,		 /* remaining user buffer sz */
> > -	int			*ubused,	 /* bytes used by formatter */
> > -	const xfs_bstat_t	*buffer);        /* buffer to read from */
> > +typedef int (*bulkstat_one_fmt_pf)(struct xfs_ibulk *breq,
> > +		const struct xfs_bstat *bstat);
> >  
> > -int
> > -xfs_bulkstat_one_int(
> > -	xfs_mount_t		*mp,
> > -	xfs_ino_t		ino,
> > -	void			__user *buffer,
> > -	int			ubsize,
> > -	bulkstat_one_fmt_pf	formatter,
> > -	int			*ubused,
> > -	int			*stat);
> > -
> > -int
> > -xfs_bulkstat_one(
> > -	xfs_mount_t		*mp,
> > -	xfs_ino_t		ino,
> > -	void			__user *buffer,
> > -	int			ubsize,
> > -	int			*ubused,
> > -	int			*stat);
> > +int xfs_bulkstat_one(struct xfs_ibulk *breq, bulkstat_one_fmt_pf formatter);
> > +int xfs_bulkstat(struct xfs_ibulk *breq, bulkstat_one_fmt_pf formatter);
> >  
> >  typedef int (*inumbers_fmt_pf)(
> >  	void			__user *ubuffer, /* buffer to write to */
> > 



[Index of Archives]     [XFS Filesystem Development (older mail)]     [Linux Filesystem Development]     [Linux Audio Users]     [Yosemite Trails]     [Linux Kernel]     [Linux RAID]     [Linux SCSI]


  Powered by Linux