On Fri, Jun 19, 2015 at 01:01:50PM +0200, Jan Ťulák wrote: > From: Dave Chinner <dchinner@xxxxxxxxxx> > > Both xfs_repair and mkfs.xfs need to agree on what is a "multidisk: > configuration - mkfs for determining the AG count of the filesystem, > repair for determining how to automatically parallelise it's > execution. This requires a bunch of common defines that both mkfs > and reapir need to share. > > In fact, most of the defines in xfs_mkfs.h could be shared with > other programs (i.e. all the defaults mkfs uses) and so it is > simplest to move xfs_mkfs.h to the shared include directory and add > the new defines to it directly. > > Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx> > Signed-off-by: Jan Ťulák <jtulak@xxxxxxxxxx> > --- > include/Makefile | 8 ++++- > include/xfs_mkfs.h | 98 +++++++++++++++++++++++++++++++++++++++++++++++++++++ > mkfs/Makefile | 2 +- > mkfs/xfs_mkfs.c | 56 +++++++++++++++--------------- > mkfs/xfs_mkfs.h | 89 ------------------------------------------------ > repair/xfs_repair.c | 45 ++++++++++++++++++++++-- > 6 files changed, 178 insertions(+), 120 deletions(-) > create mode 100644 include/xfs_mkfs.h > delete mode 100644 mkfs/xfs_mkfs.h > > diff --git a/include/Makefile b/include/Makefile > index 70e43a0..3269ec3 100644 > --- a/include/Makefile > +++ b/include/Makefile > @@ -26,9 +26,15 @@ QAHFILES = libxfs.h libxlog.h \ > xfs_inode.h \ > xfs_log_recover.h \ > xfs_metadump.h \ > + xfs_mkfs.h \ > xfs_mount.h \ > + xfs_quota_defs.h \ > + xfs_sb.h \ > + xfs_shared.h \ > xfs_trace.h \ > - xfs_trans.h > + xfs_trans.h \ > + xfs_trans_resv.h \ > + xfs_trans_space.h > > HFILES = handle.h jdm.h xqm.h xfs.h > HFILES += $(PKG_PLATFORM).h > diff --git a/include/xfs_mkfs.h b/include/xfs_mkfs.h > new file mode 100644 > index 0000000..3388f6d > --- /dev/null > +++ b/include/xfs_mkfs.h > @@ -0,0 +1,98 @@ > +/* > + * Copyright (c) 2000-2001,2004-2005 Silicon Graphics, Inc. > + * All Rights Reserved. > + * > + * This program is free software; you can redistribute it and/or > + * modify it under the terms of the GNU General Public License as > + * published by the Free Software Foundation. > + * > + * This program is distributed in the hope that it would be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + * > + * You should have received a copy of the GNU General Public License > + * along with this program; if not, write the Free Software Foundation, > + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA > + */ > +#ifndef __XFS_MKFS_H__ > +#define __XFS_MKFS_H__ > + > +#define XFS_DFL_SB_VERSION_BITS \ > + (XFS_SB_VERSION_NLINKBIT | \ > + XFS_SB_VERSION_EXTFLGBIT | \ > + XFS_SB_VERSION_DIRV2BIT) > + > +#define XFS_SB_VERSION_MKFS(crc,ia,dia,log2,attr1,sflag,ci,more) (\ > + ((crc)||(ia)||(dia)||(log2)||(attr1)||(sflag)||(ci)||(more)) ? \ > + (((crc) ? XFS_SB_VERSION_5 : XFS_SB_VERSION_4) | \ > + ((ia) ? XFS_SB_VERSION_ALIGNBIT : 0) | \ > + ((dia) ? XFS_SB_VERSION_DALIGNBIT : 0) | \ > + ((log2) ? XFS_SB_VERSION_LOGV2BIT : 0) | \ > + ((attr1) ? XFS_SB_VERSION_ATTRBIT : 0) | \ > + ((sflag) ? XFS_SB_VERSION_SECTORBIT : 0) | \ > + ((ci) ? XFS_SB_VERSION_BORGBIT : 0) | \ > + ((more) ? XFS_SB_VERSION_MOREBITSBIT : 0) | \ > + XFS_DFL_SB_VERSION_BITS | \ > + 0 ) : XFS_SB_VERSION_1 ) > + > +#define XFS_SB_VERSION2_MKFS(crc, lazycount, attr2, projid32bit, parent, \ > + ftype) (\ > + ((lazycount) ? XFS_SB_VERSION2_LAZYSBCOUNTBIT : 0) | \ > + ((attr2) ? XFS_SB_VERSION2_ATTR2BIT : 0) | \ > + ((projid32bit) ? XFS_SB_VERSION2_PROJID32BIT : 0) | \ > + ((parent) ? XFS_SB_VERSION2_PARENTBIT : 0) | \ > + ((crc) ? XFS_SB_VERSION2_CRCBIT : 0) | \ > + ((ftype) ? XFS_SB_VERSION2_FTYPE : 0) | \ > + 0 ) > + > +#define XFS_DFL_BLOCKSIZE_LOG 12 /* 4096 byte blocks */ > +#define XFS_DINODE_DFL_LOG 8 /* 256 byte inodes */ > +#define XFS_DINODE_DFL_CRC_LOG 9 /* 512 byte inodes for CRCs */ > +#define XFS_MIN_DATA_BLOCKS 100 > +#define XFS_MIN_INODE_PERBLOCK 2 /* min inodes per block */ > +#define XFS_DFL_IMAXIMUM_PCT 25 /* max % of space for inodes */ > +#define XFS_IFLAG_ALIGN 1 /* -i align defaults on */ > +#define XFS_MIN_REC_DIRSIZE 12 /* 4096 byte dirblocks (V2) */ > +#define XFS_DFL_DIR_VERSION 2 /* default directory version */ > +#define XFS_DFL_LOG_SIZE 1000 /* default log size, blocks */ > +#define XFS_DFL_LOG_FACTOR 5 /* default log size, factor */ > + /* with max trans reservation */ > +#define XFS_MAX_INODE_SIG_BITS 32 /* most significant bits in an > + * inode number that we'll > + * accept w/o warnings > + */ > + > +#define XFS_AG_BYTES(bblog) ((long long)BBSIZE << (bblog)) > +#define XFS_AG_MIN_BYTES ((XFS_AG_BYTES(15))) /* 16 MB */ > +#define XFS_AG_MIN_BLOCKS(blog) ((XFS_AG_BYTES(15)) >> (blog)) > +#define XFS_AG_MAX_BLOCKS(blog) ((XFS_AG_BYTES(31) - 1) >> (blog)) > + > +#define XFS_MAX_AGNUMBER ((xfs_agnumber_t)(NULLAGNUMBER - 1)) > + > +/* > + * These values define what we consider a "multi-disk" filesystem. That is, a > + * filesystem that is likely to be made up of multiple devices, and hence have > + * some level of parallelism avoid to it at the IO level. > + */ > +#define XFS_MULTIDISK_AGLOG 5 /* 32 AGs */ > +#define XFS_NOMULTIDISK_AGLOG 2 /* 4 AGs */ > +#define XFS_MULTIDISK_AGCOUNT (1 << XFS_MULTIDISK_AGLOG) > + > + > +/* xfs_mkfs.c */ > +extern int isdigits (char *str); > +extern long long cvtnum (unsigned int blocksize, > + unsigned int sectorsize, char *s); > + > +/* proto.c */ > +extern char *setup_proto (char *fname); > +extern void parse_proto (xfs_mount_t *mp, struct fsxattr *fsx, char **pp); > +extern void res_failed (int err); > + > +/* maxtrres.c */ > +extern int max_trans_res (int crcs_enabled, int dirversion, > + int sectorlog, int blocklog, int inodelog, int dirblocklog, > + int logversion, int log_sunit); > + > +#endif /* __XFS_MKFS_H__ */ > diff --git a/mkfs/Makefile b/mkfs/Makefile > index fd1f615..82326e0 100644 > --- a/mkfs/Makefile > +++ b/mkfs/Makefile > @@ -8,7 +8,7 @@ include $(TOPDIR)/include/builddefs > LTCOMMAND = mkfs.xfs > FSTYP = fstyp > > -HFILES = xfs_mkfs.h > +HFILES = > CFILES = maxtrres.c proto.c xfs_mkfs.c > > ifeq ($(ENABLE_BLKID),yes) > diff --git a/mkfs/xfs_mkfs.c b/mkfs/xfs_mkfs.c > index 83f7749..d0de90d 100644 > --- a/mkfs/xfs_mkfs.c > +++ b/mkfs/xfs_mkfs.c > @@ -24,7 +24,7 @@ > #include <disk/fstyp.h> > #include <disk/volume.h> > #endif > -#include "xfs_mkfs.h" > +#include <xfs/xfs_mkfs.h> > > /* > * Device topology information. > @@ -688,43 +688,45 @@ calc_default_ag_geometry( > } > > /* > - * For the remainder we choose an AG size based on the > - * number of data blocks available, trying to keep the > - * number of AGs relatively small (especially compared > - * to the original algorithm). AG count is calculated > - * based on the preferred AG size, not vice-versa - the > - * count can be increased by growfs, so prefer to use > - * smaller counts at mkfs time. > - * > - * For a single underlying storage device between 128MB > - * and 4TB in size, just use 4 AGs, otherwise scale up > - * smoothly between min/max AG sizes. > + * For a single underlying storage device between 128MB and 4TB in size > + * just use 4 AGs and scale up smoothly between min/max AG sizes. > */ > - > - if (!multidisk && dblocks >= MEGABYTES(128, blocklog)) { > + if (!multidisk) { > if (dblocks >= TERABYTES(4, blocklog)) { > blocks = XFS_AG_MAX_BLOCKS(blocklog); > goto done; > + } else if (dblocks >= MEGABYTES(128, blocklog)) { > + shift = XFS_NOMULTIDISK_AGLOG; > + goto calc_blocks; > } > - shift = 2; > - } else if (dblocks > GIGABYTES(512, blocklog)) > - shift = 5; > - else if (dblocks > GIGABYTES(8, blocklog)) > - shift = 4; > - else if (dblocks >= MEGABYTES(128, blocklog)) > - shift = 3; > - else if (dblocks >= MEGABYTES(64, blocklog)) > - shift = 2; > - else if (dblocks >= MEGABYTES(32, blocklog)) > - shift = 1; > - else > - shift = 0; > + } > + > + /* > + * For the multidisk configs we choose an AG count based on the number > + * of data blocks available, trying to keep the number of AGs higher > + * than the single disk configurations. This makes the assumption that > + * larger filesystems have more parallelism available to them. > + */ > + shift = XFS_MULTIDISK_AGLOG; > + if (dblocks < GIGABYTES(512, blocklog)) > + shift--; > + if (dblocks < GIGABYTES(8, blocklog)) > + shift--; > + if (dblocks < MEGABYTES(128, blocklog)) > + shift--; > + if (dblocks < MEGABYTES(64, blocklog)) > + shift--; > + if (dblocks < MEGABYTES(32, blocklog)) > + shift--; > + Intended change in behavior of the defaults for fs' that match these size thresholds (the 512g and 8g ones anyways)? For example, in the old code a 512GB fs gets a shift of 4 while the same fs gets shift = 5 in the new code. Brian > /* > * If dblocks is not evenly divisible by the number of > * desired AGs, round "blocks" up so we don't lose the > * last bit of the filesystem. The same principle applies > * to the AG count, so we don't lose the last AG! > */ > +calc_blocks: > + ASSERT(shift >= 0 && shift <= XFS_MULTIDISK_AGLOG); > blocks = dblocks >> shift; > if (dblocks & xfs_mask32lo(shift)) { > if (blocks < XFS_AG_MAX_BLOCKS(blocklog)) > diff --git a/mkfs/xfs_mkfs.h b/mkfs/xfs_mkfs.h > deleted file mode 100644 > index 9df5f37..0000000 > --- a/mkfs/xfs_mkfs.h > +++ /dev/null > @@ -1,89 +0,0 @@ > -/* > - * Copyright (c) 2000-2001,2004-2005 Silicon Graphics, Inc. > - * All Rights Reserved. > - * > - * This program is free software; you can redistribute it and/or > - * modify it under the terms of the GNU General Public License as > - * published by the Free Software Foundation. > - * > - * This program is distributed in the hope that it would be useful, > - * but WITHOUT ANY WARRANTY; without even the implied warranty of > - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > - * GNU General Public License for more details. > - * > - * You should have received a copy of the GNU General Public License > - * along with this program; if not, write the Free Software Foundation, > - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA > - */ > -#ifndef __XFS_MKFS_H__ > -#define __XFS_MKFS_H__ > - > -#define XFS_DFL_SB_VERSION_BITS \ > - (XFS_SB_VERSION_NLINKBIT | \ > - XFS_SB_VERSION_EXTFLGBIT | \ > - XFS_SB_VERSION_DIRV2BIT) > - > -#define XFS_SB_VERSION_MKFS(crc,ia,dia,log2,attr1,sflag,ci,more) (\ > - ((crc)||(ia)||(dia)||(log2)||(attr1)||(sflag)||(ci)||(more)) ? \ > - (((crc) ? XFS_SB_VERSION_5 : XFS_SB_VERSION_4) | \ > - ((ia) ? XFS_SB_VERSION_ALIGNBIT : 0) | \ > - ((dia) ? XFS_SB_VERSION_DALIGNBIT : 0) | \ > - ((log2) ? XFS_SB_VERSION_LOGV2BIT : 0) | \ > - ((attr1) ? XFS_SB_VERSION_ATTRBIT : 0) | \ > - ((sflag) ? XFS_SB_VERSION_SECTORBIT : 0) | \ > - ((ci) ? XFS_SB_VERSION_BORGBIT : 0) | \ > - ((more) ? XFS_SB_VERSION_MOREBITSBIT : 0) | \ > - XFS_DFL_SB_VERSION_BITS | \ > - 0 ) : XFS_SB_VERSION_1 ) > - > -#define XFS_SB_VERSION2_MKFS(crc, lazycount, attr2, projid32bit, parent, \ > - ftype) (\ > - ((lazycount) ? XFS_SB_VERSION2_LAZYSBCOUNTBIT : 0) | \ > - ((attr2) ? XFS_SB_VERSION2_ATTR2BIT : 0) | \ > - ((projid32bit) ? XFS_SB_VERSION2_PROJID32BIT : 0) | \ > - ((parent) ? XFS_SB_VERSION2_PARENTBIT : 0) | \ > - ((crc) ? XFS_SB_VERSION2_CRCBIT : 0) | \ > - ((ftype) ? XFS_SB_VERSION2_FTYPE : 0) | \ > - 0 ) > - > -#define XFS_DFL_BLOCKSIZE_LOG 12 /* 4096 byte blocks */ > -#define XFS_DINODE_DFL_LOG 8 /* 256 byte inodes */ > -#define XFS_DINODE_DFL_CRC_LOG 9 /* 512 byte inodes for CRCs */ > -#define XFS_MIN_DATA_BLOCKS 100 > -#define XFS_MIN_INODE_PERBLOCK 2 /* min inodes per block */ > -#define XFS_DFL_IMAXIMUM_PCT 25 /* max % of space for inodes */ > -#define XFS_IFLAG_ALIGN 1 /* -i align defaults on */ > -#define XFS_MIN_REC_DIRSIZE 12 /* 4096 byte dirblocks (V2) */ > -#define XFS_DFL_DIR_VERSION 2 /* default directory version */ > -#define XFS_DFL_LOG_SIZE 1000 /* default log size, blocks */ > -#define XFS_DFL_LOG_FACTOR 5 /* default log size, factor */ > - /* with max trans reservation */ > -#define XFS_MAX_INODE_SIG_BITS 32 /* most significant bits in an > - * inode number that we'll > - * accept w/o warnings > - */ > - > -#define XFS_AG_BYTES(bblog) ((long long)BBSIZE << (bblog)) > -#define XFS_AG_MIN_BYTES ((XFS_AG_BYTES(15))) /* 16 MB */ > -#define XFS_AG_MIN_BLOCKS(blog) ((XFS_AG_BYTES(15)) >> (blog)) > -#define XFS_AG_MAX_BLOCKS(blog) ((XFS_AG_BYTES(31) - 1) >> (blog)) > - > -#define XFS_MAX_AGNUMBER ((xfs_agnumber_t)(NULLAGNUMBER - 1)) > - > - > -/* xfs_mkfs.c */ > -extern int isdigits (char *str); > -extern long long cvtnum (unsigned int blocksize, > - unsigned int sectorsize, char *s); > - > -/* proto.c */ > -extern char *setup_proto (char *fname); > -extern void parse_proto (xfs_mount_t *mp, struct fsxattr *fsx, char **pp); > -extern void res_failed (int err); > - > -/* maxtrres.c */ > -extern int max_trans_res (int crcs_enabled, int dirversion, > - int sectorlog, int blocklog, int inodelog, int dirblocklog, > - int logversion, int log_sunit); > - > -#endif /* __XFS_MKFS_H__ */ > diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c > index 11a6069..7e2d7ff 100644 > --- a/repair/xfs_repair.c > +++ b/repair/xfs_repair.c > @@ -19,6 +19,7 @@ > #include <xfs/libxfs.h> > #include <xfs/libxlog.h> > #include <sys/resource.h> > +#include <xfs/xfs_mkfs.h> > #include "avl.h" > #include "avl64.h" > #include "globals.h" > @@ -526,6 +527,33 @@ _("sb realtime summary inode %" PRIu64 " %sinconsistent with calculated value %u > > } > > +/* > + * mkfs increases the AG count for "multidisk" configurations, we want > + * to target these for an increase in thread count. Hence check the superlock > + * geometry information to determine if mkfs considered this a multidisk > + * configuration. > + */ > +static bool > +is_multidisk_filesystem( > + struct xfs_mount *mp) > +{ > + struct xfs_sb *sbp = &mp->m_sb; > + > + /* High agcount filesystems are always considered "multidisk" */ > + if (sbp->sb_agcount >= XFS_MULTIDISK_AGCOUNT) > + return true; > + > + /* > + * If it doesn't have a sunit/swidth, mkfs didn't consider it a > + * multi-disk array, so we don't either. > + */ > + if (!sbp->sb_unit) > + return false; > + > + ASSERT(sbp->sb_width); > + return true; > +} > + > int > main(int argc, char **argv) > { > @@ -644,9 +672,22 @@ main(int argc, char **argv) > * threads/CPU as this is enough threads to saturate a CPU on fast > * devices, yet few enough that it will saturate but won't overload slow > * devices. > + * > + * Multidisk filesystems can handle more IO parallelism so we should try > + * to process multiple AGs at a time in such a configuration to try to > + * saturate the underlying storage and speed the repair process. Only do > + * this if prefetching is enabled. > */ > - if (!ag_stride && glob_agcount >= 16 && do_prefetch) > - ag_stride = 15; > + if (!ag_stride && do_prefetch && is_multidisk_filesystem(mp)) { > + /* > + * For small agcount multidisk systems, just double the > + * parallelism. For larger AG count filesystems (32 and above) > + * use more parallelism, and linearly increase the parallelism > + * with the number of AGs. > + */ > + ag_stride = glob_agcount; > + ag_stride = min(glob_agcount, XFS_MULTIDISK_AGCOUNT / 2) - 1; > + } > > if (ag_stride) { > int max_threads = platform_nproc() * 8; > -- > 2.1.0 > > _______________________________________________ > xfs mailing list > xfs@xxxxxxxxxxx > http://oss.sgi.com/mailman/listinfo/xfs _______________________________________________ xfs mailing list xfs@xxxxxxxxxxx http://oss.sgi.com/mailman/listinfo/xfs