On Mon, Jun 10, 2019 at 10:38:39AM -0700, Darrick J. Wong wrote: > On Mon, Jun 10, 2019 at 10:02:29AM -0400, Brian Foster wrote: > > On Tue, Jun 04, 2019 at 02:49:53PM -0700, Darrick J. Wong wrote: > > > From: Darrick J. Wong <darrick.wong@xxxxxxxxxx> > > > > > > Create a new ibulk structure incore to help us deal with bulk inode stat > > > state tracking and then convert the bulkstat code to use the new iwalk > > > iterator. This disentangles inode walking from bulk stat control for > > > simpler code and enables us to isolate the formatter functions to the > > > ioctl handling code. > > > > > > Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx> > > > --- > > > fs/xfs/xfs_ioctl.c | 65 ++++++-- > > > fs/xfs/xfs_ioctl.h | 5 + > > > fs/xfs/xfs_ioctl32.c | 88 +++++------ > > > fs/xfs/xfs_itable.c | 407 ++++++++++++++------------------------------------ > > > fs/xfs/xfs_itable.h | 79 ++++------ > > > 5 files changed, 245 insertions(+), 399 deletions(-) > > > > > > > > > diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c > > > index 5ffbdcff3dba..43734901aeb9 100644 > > > --- a/fs/xfs/xfs_ioctl.c > > > +++ b/fs/xfs/xfs_ioctl.c > > ... > > > @@ -745,35 +757,54 @@ xfs_ioc_bulkstat( > > > if (copy_from_user(&bulkreq, arg, sizeof(xfs_fsop_bulkreq_t))) > > > return -EFAULT; > > > > > > - if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64))) > > > + if (copy_from_user(&lastino, bulkreq.lastip, sizeof(__s64))) > > > return -EFAULT; > > > > > > - if ((count = bulkreq.icount) <= 0) > > > + if (bulkreq.icount <= 0) > > > return -EINVAL; > > > > > > if (bulkreq.ubuffer == NULL) > > > return -EINVAL; > > > > > > - if (cmd == XFS_IOC_FSINUMBERS) > > > - error = xfs_inumbers(mp, &inlast, &count, > > > + breq.ubuffer = bulkreq.ubuffer; > > > + breq.icount = bulkreq.icount; > > > + > > > + /* > > > + * FSBULKSTAT_SINGLE expects that *lastip contains the inode number > > > + * that we want to stat. However, FSINUMBERS and FSBULKSTAT expect > > > + * that *lastip contains either zero or the number of the last inode to > > > + * be examined by the previous call and return results starting with > > > + * the next inode after that. The new bulk request functions take the > > > + * inode to start with, so we have to adjust the lastino/startino > > > + * parameter to maintain correct function. > > > + */ > > > > It's kind of difficult to tell what's new or old when just looking at > > the code. The comment suggests FSINUMBERS and FSBULKSTAT use the same > > interface wrt to lastip, but they aren't handled the same below. I take > > it this is because xfs_inumbers() still has the same behavior whereas > > xfs_bulkstat() has been changed to operate based on breq rather than > > lastip..? > > Yes. By the end of the series we'll have converted FSINUMBERS too, but > for the ~5 or so patches until we get there, the only way to tell new > vs. old interface is whether it takes breq or pointers to stuff in breq. > > > > + if (cmd == XFS_IOC_FSINUMBERS) { > > > + int count = breq.icount; > > > + > > > + breq.startino = lastino; > > > + error = xfs_inumbers(mp, &breq.startino, &count, > > > bulkreq.ubuffer, xfs_inumbers_fmt); > > > - else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE) > > > - error = xfs_bulkstat_one(mp, inlast, bulkreq.ubuffer, > > > - sizeof(xfs_bstat_t), NULL, &done); > > > - else /* XFS_IOC_FSBULKSTAT */ > > > - error = xfs_bulkstat(mp, &inlast, &count, xfs_bulkstat_one, > > > - sizeof(xfs_bstat_t), bulkreq.ubuffer, > > > - &done); > > > + breq.ocount = count; > > > + lastino = breq.startino; > > > + } else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE) { > > > + breq.startino = lastino; > > > + error = xfs_bulkstat_one(&breq, xfs_bulkstat_one_fmt); > > > + lastino = breq.startino; > > > + } else { /* XFS_IOC_FSBULKSTAT */ > > > + breq.startino = lastino + 1; > > > + error = xfs_bulkstat(&breq, xfs_bulkstat_one_fmt); > > > + lastino = breq.startino - 1; > > > + } > > > > > ... > > > diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c > > > index 814ffe6fbab7..add15819daf3 100644 > > > --- a/fs/xfs/xfs_ioctl32.c > > > +++ b/fs/xfs/xfs_ioctl32.c > > ... > > > @@ -284,38 +263,57 @@ xfs_compat_ioc_bulkstat( > > > return -EFAULT; > > > bulkreq.ocount = compat_ptr(addr); > > > > > > - if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64))) > > > + if (copy_from_user(&lastino, bulkreq.lastip, sizeof(__s64))) > > > return -EFAULT; > > > + breq.startino = lastino + 1; > > > > > > - if ((count = bulkreq.icount) <= 0) > > > + if (bulkreq.icount <= 0) > > > return -EINVAL; > > > > > > if (bulkreq.ubuffer == NULL) > > > return -EINVAL; > > > > > > + breq.ubuffer = bulkreq.ubuffer; > > > + breq.icount = bulkreq.icount; > > > + > > > + /* > > > + * FSBULKSTAT_SINGLE expects that *lastip contains the inode number > > > + * that we want to stat. However, FSINUMBERS and FSBULKSTAT expect > > > + * that *lastip contains either zero or the number of the last inode to > > > + * be examined by the previous call and return results starting with > > > + * the next inode after that. The new bulk request functions take the > > > + * inode to start with, so we have to adjust the lastino/startino > > > + * parameter to maintain correct function. > > > + */ > > > > (Same comment here.) > > > > > if (cmd == XFS_IOC_FSINUMBERS_32) { > > > - error = xfs_inumbers(mp, &inlast, &count, > > > + int count = breq.icount; > > > + > > > + breq.startino = lastino; > > > + error = xfs_inumbers(mp, &breq.startino, &count, > > > bulkreq.ubuffer, inumbers_func); > > > + breq.ocount = count; > > > + lastino = breq.startino; > > > } else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE_32) { > > > - int res; > > > - > > > - error = bs_one_func(mp, inlast, bulkreq.ubuffer, > > > - bs_one_size, NULL, &res); > > > + breq.startino = lastino; > > > + error = xfs_bulkstat_one(&breq, bs_one_func); > > > + lastino = breq.startino; > > > } else if (cmd == XFS_IOC_FSBULKSTAT_32) { > > > - error = xfs_bulkstat(mp, &inlast, &count, > > > - bs_one_func, bs_one_size, > > > - bulkreq.ubuffer, &done); > > > - } else > > > + breq.startino = lastino + 1; > > > + error = xfs_bulkstat(&breq, bs_one_func); > > > + lastino = breq.startino - 1; > > > + } else { > > > error = -EINVAL; > > > + } > > > if (error) > > > return error; > > > > > > + lastino = breq.startino - 1; > > > > Should this be here? > > Nope. > > > > if (bulkreq.lastip != NULL && > > > - copy_to_user(bulkreq.lastip, &inlast, sizeof(xfs_ino_t))) > > > + copy_to_user(bulkreq.lastip, &lastino, sizeof(xfs_ino_t))) > > > return -EFAULT; > > > > > > if (bulkreq.ocount != NULL && > > > - copy_to_user(bulkreq.ocount, &count, sizeof(count))) > > > + copy_to_user(bulkreq.ocount, &breq.ocount, sizeof(__s32))) > > > return -EFAULT; > > > > > > return 0; > > > diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c > > > index 3ca1c454afe6..87c597ea1df7 100644 > > > --- a/fs/xfs/xfs_itable.c > > > +++ b/fs/xfs/xfs_itable.c > > > @@ -22,37 +22,63 @@ > > > #include "xfs_iwalk.h" > > > > > > /* > > > - * Return stat information for one inode. > > > - * Return 0 if ok, else errno. > > > + * Bulk Stat > > > + * ========= > > > + * > > > + * Use the inode walking functions to fill out struct xfs_bstat for every > > > + * allocated inode, then pass the stat information to some externally provided > > > + * iteration function. > > > */ > > > -int > > > + > > > +struct xfs_bstat_chunk { > > > + bulkstat_one_fmt_pf formatter; > > > + struct xfs_ibulk *breq; > > > +}; > > > + > > > +/* > > > + * Fill out the bulkstat info for a single inode and report it somewhere. > > > + * > > > + * bc->breq->lastino is effectively the inode cursor as we walk through the > > > + * filesystem. Therefore, we update it any time we need to move the cursor > > > + * forward, regardless of whether or not we're sending any bstat information > > > + * back to userspace. If the inode is internal metadata or, has been freed > > > + * out from under us, we just simply keep going. > > > + * > > > + * However, if any other type of error happens we want to stop right where we > > > + * are so that userspace will call back with exact number of the bad inode and > > > + * we can send back an error code. > > > + * > > > + * Note that if the formatter tells us there's no space left in the buffer we > > > + * move the cursor forward and abort the walk. > > > + */ > > > +STATIC int > > > xfs_bulkstat_one_int( > > > - struct xfs_mount *mp, /* mount point for filesystem */ > > > - xfs_ino_t ino, /* inode to get data for */ > > > - void __user *buffer, /* buffer to place output in */ > > > - int ubsize, /* size of buffer */ > > > - bulkstat_one_fmt_pf formatter, /* formatter, copy to user */ > > > - int *ubused, /* bytes used by me */ > > > - int *stat) /* BULKSTAT_RV_... */ > > > + struct xfs_mount *mp, > > > + struct xfs_trans *tp, > > > + xfs_ino_t ino, > > > + void *data) > > > { > > > > Any reason this function takes an 'ino' param considering it's sourced > > from breq->startino and we bump that value from within this function? > > The latter seems slightly misplaced to me since it doesn't appear to > > control the iteration. > > > > It also looks like we bump startino in almost all cases. Exceptions are > > memory allocation failure of the buffer and formatter error. Hmm.. could > > we lift the buffer to the bc and reuse it to avoid that error entirely > > (along with repeated allocs/frees)? From there, perhaps we could lift > > the ->startino update to the callers based on something like error != > > -EFAULT..? (Alternatively, the caller could update it first and then > > walk it back if error == -EFAULT). > > xfs_iwalk doesn't know anything about the xfs_bstat_chunk or the > xfs_ibulk that are being passed to the iterator function via the void > *data parameter. iwalk is a generic iterator which shouldn't ever know > about the bulkreq interface. > Sure, I'm not suggesting to push anything into the iwalk code. That's obviously a separate layer. I'm referring to the direct callers of xfs_bulkstat_one_int(), which is still bulkstat code AFAICT. > I plan to reuse the xfs_iwalk code for other parts of online repair in > the future, so that's why bulkstat_one takes the inode number that > _iwalk gives it, and then uses that to mess around with bc and breq. > Ok. BTW, looking again I notice that xfs_bulkstat_one_int() passes bc as a void pointer. That could probably be fixed up to take the xfs_bstat_chunk from the current callers. > I also sort of prefer to keep the startino update the way it is now > because I only want to push it forward for the ~4 cases where we do now > (internal, invalid number, past eofs, or successfully statted). > Any other runtime error leaves the cursor where it was. > Fair enough, though it could also be cleaned up a bit with an exit label IMO. Brian > > > + struct xfs_bstat_chunk *bc = data; > > > struct xfs_icdinode *dic; /* dinode core info pointer */ > > > struct xfs_inode *ip; /* incore inode pointer */ > > > struct inode *inode; > > > struct xfs_bstat *buf; /* return buffer */ > > > int error = 0; /* error value */ > > > > > > - *stat = BULKSTAT_RV_NOTHING; > > > - > > > - if (!buffer || xfs_internal_inum(mp, ino)) > > > + if (xfs_internal_inum(mp, ino)) { > > > + bc->breq->startino = ino + 1; > > > return -EINVAL; > > > + } > > > > > > buf = kmem_zalloc(sizeof(*buf), KM_SLEEP | KM_MAYFAIL); > > > if (!buf) > > > return -ENOMEM; > > > > > > - error = xfs_iget(mp, NULL, ino, > > > + error = xfs_iget(mp, tp, ino, > > > (XFS_IGET_DONTCACHE | XFS_IGET_UNTRUSTED), > > > XFS_ILOCK_SHARED, &ip); > > > + if (error == -ENOENT || error == -EINVAL) > > > + bc->breq->startino = ino + 1; > > > if (error) > > > goto out_free; > > > > > > @@ -119,43 +145,45 @@ xfs_bulkstat_one_int( > > > xfs_iunlock(ip, XFS_ILOCK_SHARED); > > > xfs_irele(ip); > > > > > > - error = formatter(buffer, ubsize, ubused, buf); > > > - if (!error) > > > - *stat = BULKSTAT_RV_DIDONE; > > > - > > > - out_free: > > > + error = bc->formatter(bc->breq, buf); > > > + switch (error) { > > > + case XFS_IBULK_BUFFER_FULL: > > > + error = XFS_IWALK_ABORT; > > > + /* fall through */ > > > + case 0: > > > + bc->breq->startino = ino + 1; > > > + break; > > > + } > > > +out_free: > > > kmem_free(buf); > > > return error; > > > } > > > > > > -/* Return 0 on success or positive error */ > > > -STATIC int > > > -xfs_bulkstat_one_fmt( > > > - void __user *ubuffer, > > > - int ubsize, > > > - int *ubused, > > > - const xfs_bstat_t *buffer) > > > -{ > > > - if (ubsize < sizeof(*buffer)) > > > - return -ENOMEM; > > > - if (copy_to_user(ubuffer, buffer, sizeof(*buffer))) > > > - return -EFAULT; > > > - if (ubused) > > > - *ubused = sizeof(*buffer); > > > - return 0; > > > -} > > > - > > > +/* Bulkstat a single inode. */ > > > int > > > xfs_bulkstat_one( > > > - xfs_mount_t *mp, /* mount point for filesystem */ > > > - xfs_ino_t ino, /* inode number to get data for */ > > > - void __user *buffer, /* buffer to place output in */ > > > - int ubsize, /* size of buffer */ > > > - int *ubused, /* bytes used by me */ > > > - int *stat) /* BULKSTAT_RV_... */ > > > + struct xfs_ibulk *breq, > > > + bulkstat_one_fmt_pf formatter) > > > { > > > - return xfs_bulkstat_one_int(mp, ino, buffer, ubsize, > > > - xfs_bulkstat_one_fmt, ubused, stat); > > > + struct xfs_bstat_chunk bc = { > > > + .formatter = formatter, > > > + .breq = breq, > > > + }; > > > + int error; > > > + > > > + breq->icount = 1; > > > + breq->ocount = 0; > > > + > > > > So ->icount is already set by the caller based on user input. I'd guess > > this is set here to guarantee a single cycle in the event that the user > > provided value is >1, but that seems unnecessary since we're calling the > > internal helper to handle a single inode. > > > > If we want to set ->icount for whatever reason, can we do it in the > > caller where it's more obvious? That also shows that the ->ocount init > > is unnecessary since the whole structure is initialized in the caller. > > Ok. > > > > + error = xfs_bulkstat_one_int(breq->mp, NULL, breq->startino, &bc); > > > + > > > + /* > > > + * If we reported one inode to userspace then we abort because we hit > > > + * the end of the buffer. Don't leak that back to userspace. > > > + */ > > > + if (error == XFS_IWALK_ABORT) > > > + error = 0; > > > + > > > + return error; > > > } > > > > > > /* > > ... > > > diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h > > > index 369e3f159d4e..366d391eb11f 100644 > > > --- a/fs/xfs/xfs_itable.h > > > +++ b/fs/xfs/xfs_itable.h > > > @@ -5,63 +5,46 @@ > > > #ifndef __XFS_ITABLE_H__ > > > #define __XFS_ITABLE_H__ > > > > > > -/* > > > - * xfs_bulkstat() is used to fill in xfs_bstat structures as well as dm_stat > > > - * structures (by the dmi library). This is a pointer to a formatter function > > > - * that will iget the inode and fill in the appropriate structure. > > > - * see xfs_bulkstat_one() and xfs_dm_bulkstat_one() in dmapi_xfs.c > > > - */ > > > -typedef int (*bulkstat_one_pf)(struct xfs_mount *mp, > > > - xfs_ino_t ino, > > > - void __user *buffer, > > > - int ubsize, > > > - int *ubused, > > > - int *stat); > > > +/* In-memory representation of a userspace request for batch inode data. */ > > > +struct xfs_ibulk { > > > + struct xfs_mount *mp; > > > + void __user *ubuffer; /* user output buffer */ > > > + xfs_ino_t startino; /* start with this inode */ > > > + unsigned int icount; /* number of elements in ubuffer */ > > > + unsigned int ocount; /* number of records returned */ > > > +}; > > > + > > > +/* Return value that means we want to abort the walk. */ > > > +#define XFS_IBULK_ABORT (XFS_IWALK_ABORT) > > > + > > > +/* Return value that means the formatting buffer is now full. */ > > > +#define XFS_IBULK_BUFFER_FULL (2) > > > > > > > It might be wise to define this such that it's guaranteed not to be the > > same as the abort value, since that is defined externally to this > > header. IBULK_ABORT + 1 perhaps? > > FWIW I'm of half a mind to establish a generic "abort walk" error code > and key all of these iterator functions to use it instead of having all > these #define XFS_FOO_ABORT 1 things everywhere. > > I'll do #define XFS_IBULK_BUFFER_FULL IBULK_ABORT+1 here. > > > Brian > > > > > /* > > > - * Values for stat return value. > > > + * Advance the user buffer pointer by one record of the given size. If the > > > + * buffer is now full, return the appropriate error code. > > > */ > > > -#define BULKSTAT_RV_NOTHING 0 > > > -#define BULKSTAT_RV_DIDONE 1 > > > -#define BULKSTAT_RV_GIVEUP 2 > > > +static inline int > > > +xfs_ibulk_advance( > > > + struct xfs_ibulk *breq, > > > + size_t bytes) > > > +{ > > > + char __user *b = breq->ubuffer; > > > + > > > + breq->ubuffer = b + bytes; > > > + breq->ocount++; > > > + return breq->ocount == breq->icount ? XFS_IBULK_BUFFER_FULL : 0; > > > +} > > > > > > /* > > > * Return stat information in bulk (by-inode) for the filesystem. > > > */ > > > -int /* error status */ > > > -xfs_bulkstat( > > > - xfs_mount_t *mp, /* mount point for filesystem */ > > > - xfs_ino_t *lastino, /* last inode returned */ > > > - int *count, /* size of buffer/count returned */ > > > - bulkstat_one_pf formatter, /* func that'd fill a single buf */ > > > - size_t statstruct_size,/* sizeof struct that we're filling */ > > > - char __user *ubuffer,/* buffer with inode stats */ > > > - int *done); /* 1 if there are more stats to get */ > > > > > > -typedef int (*bulkstat_one_fmt_pf)( /* used size in bytes or negative error */ > > > - void __user *ubuffer, /* buffer to write to */ > > > - int ubsize, /* remaining user buffer sz */ > > > - int *ubused, /* bytes used by formatter */ > > > - const xfs_bstat_t *buffer); /* buffer to read from */ > > > +typedef int (*bulkstat_one_fmt_pf)(struct xfs_ibulk *breq, > > > + const struct xfs_bstat *bstat); > > > > > > -int > > > -xfs_bulkstat_one_int( > > > - xfs_mount_t *mp, > > > - xfs_ino_t ino, > > > - void __user *buffer, > > > - int ubsize, > > > - bulkstat_one_fmt_pf formatter, > > > - int *ubused, > > > - int *stat); > > > - > > > -int > > > -xfs_bulkstat_one( > > > - xfs_mount_t *mp, > > > - xfs_ino_t ino, > > > - void __user *buffer, > > > - int ubsize, > > > - int *ubused, > > > - int *stat); > > > +int xfs_bulkstat_one(struct xfs_ibulk *breq, bulkstat_one_fmt_pf formatter); > > > +int xfs_bulkstat(struct xfs_ibulk *breq, bulkstat_one_fmt_pf formatter); > > > > > > typedef int (*inumbers_fmt_pf)( > > > void __user *ubuffer, /* buffer to write to */ > > >