On 05 Jan 2022 at 06:12, Darrick J. Wong wrote: > On Tue, Dec 14, 2021 at 02:15:14PM +0530, Chandan Babu R wrote: >> This commit defines new macros to represent maximum extent counts allowed by >> filesystems which have support for large per-inode extent counters. >> >> Signed-off-by: Chandan Babu R <chandan.babu@xxxxxxxxxx> >> --- >> fs/xfs/libxfs/xfs_bmap.c | 8 +++----- >> fs/xfs/libxfs/xfs_bmap_btree.c | 2 +- >> fs/xfs/libxfs/xfs_format.h | 8 +++++--- >> fs/xfs/libxfs/xfs_inode_buf.c | 3 ++- >> fs/xfs/libxfs/xfs_inode_fork.c | 2 +- >> fs/xfs/libxfs/xfs_inode_fork.h | 19 +++++++++++++++---- >> 6 files changed, 27 insertions(+), 15 deletions(-) >> >> diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c >> index 4113622e9733..0ce58e4a9c44 100644 >> --- a/fs/xfs/libxfs/xfs_bmap.c >> +++ b/fs/xfs/libxfs/xfs_bmap.c >> @@ -61,10 +61,8 @@ xfs_bmap_compute_maxlevels( >> int sz; /* root block size */ >> >> /* >> - * The maximum number of extents in a file, hence the maximum number of >> - * leaf entries, is controlled by the size of the on-disk extent count, >> - * either a signed 32-bit number for the data fork, or a signed 16-bit >> - * number for the attr fork. >> + * The maximum number of extents in a fork, hence the maximum number of >> + * leaf entries, is controlled by the size of the on-disk extent count. >> * >> * Note that we can no longer assume that if we are in ATTR1 that the >> * fork offset of all the inodes will be >> @@ -74,7 +72,7 @@ xfs_bmap_compute_maxlevels( >> * ATTR2 we have to assume the worst case scenario of a minimum size >> * available. >> */ >> - maxleafents = xfs_iext_max_nextents(whichfork); >> + maxleafents = xfs_iext_max_nextents(xfs_has_nrext64(mp), whichfork); >> if (whichfork == XFS_DATA_FORK) >> sz = XFS_BMDR_SPACE_CALC(MINDBTPTRS); >> else >> diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c >> index 453309fc85f2..e8d21d69b9ff 100644 >> --- a/fs/xfs/libxfs/xfs_bmap_btree.c >> +++ b/fs/xfs/libxfs/xfs_bmap_btree.c >> @@ -611,7 +611,7 @@ xfs_bmbt_maxlevels_ondisk(void) >> minrecs[1] = xfs_bmbt_block_maxrecs(blocklen, false) / 2; >> >> /* One extra level for the inode root. */ >> - return xfs_btree_compute_maxlevels(minrecs, MAXEXTNUM) + 1; >> + return xfs_btree_compute_maxlevels(minrecs, XFS_MAX_EXTCNT_DATA_FORK) + 1; >> } >> >> /* >> diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h >> index 9934c320bf01..eff86f6c4c99 100644 >> --- a/fs/xfs/libxfs/xfs_format.h >> +++ b/fs/xfs/libxfs/xfs_format.h >> @@ -873,9 +873,11 @@ enum xfs_dinode_fmt { >> /* >> * Max values for extlen, extnum, aextnum. >> */ >> -#define MAXEXTLEN ((xfs_extlen_t)0x001fffff) /* 21 bits */ >> -#define MAXEXTNUM ((xfs_extnum_t)0x7fffffff) /* signed int */ >> -#define MAXAEXTNUM ((xfs_aextnum_t)0x7fff) /* signed short */ >> +#define MAXEXTLEN ((xfs_extlen_t)0x1fffff) /* 21 bits */ >> +#define XFS_MAX_EXTCNT_DATA_FORK ((xfs_extnum_t)0xffffffffffff) /* Unsigned 48-bits */ >> +#define XFS_MAX_EXTCNT_ATTR_FORK ((xfs_aextnum_t)0xffffffff) /* Unsigned 32-bits */ >> +#define XFS_MAX_EXTCNT_DATA_FORK_OLD ((xfs_extnum_t)0x7fffffff) /* Signed 32-bits */ >> +#define XFS_MAX_EXTCNT_ATTR_FORK_OLD ((xfs_aextnum_t)0x7fff) /* Signed 16-bits */ > > Could you change the #define value to a shift and subtract like you do > for MAXEXTLEN^WXFS_MAX_BMBT_EXTLEN in patch 16? > > e.g. > > #define XFS_MAX_EXTCNT_DATA_FORK ((xfs_extnum_t)((1ULL << 48) - 1)) Sure. I will incorporate the above change. > > Also, you might want to document briefly in this header file why it is > that the bmbt is limited to 2^48 extents even though the dinode fields > are 64 bits wide and there can be up to 2^54 blocks mapped by a fork. > ISTR the reason is to avoid having the bmbt cursor cache have to handle > a 12-level btree or something, right? > > (Sorry, it's been a while...) > The discussion initially started with the observation that anything more than 2^43 extents on a 1k block sized filesystem can cause a BMBT tree's height to become larger than XFS_BTREE_MAXLEVELS (i.e. 9). Increasing the value of XFS_BTREE_MAXLEVELS was not an option since that would cause the following sequence of events, 1. An increase in the rmapbt's maximum height (on filesystems which have both reflink and rmap features enabled). 2. An increase in transaction reservation values. However this is no longer an issue since the btree cursor now contains a variable length array and XFS_BTREE_MAXLEVELS is now removed. 2^48 as the maximum extent count was arrived at based on the following logic, 2^63 (max file size) / 64k (max block size) = 2^47 i.e. 2^47 can be a valid upper bound for all block sizes. Rounding up 47 to the nearest multiple of bits-per-byte results in 48. Hence 2^48 was chosen as the maximum data fork extent count. I will include the above description in the next version of the patchset. >> >> /* >> * Inode minimum and maximum sizes. >> diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c >> index 860d32816909..34f360a38603 100644 >> --- a/fs/xfs/libxfs/xfs_inode_buf.c >> +++ b/fs/xfs/libxfs/xfs_inode_buf.c >> @@ -361,7 +361,8 @@ xfs_dinode_verify_fork( >> return __this_address; >> break; >> case XFS_DINODE_FMT_BTREE: >> - max_extents = xfs_iext_max_nextents(whichfork); >> + max_extents = xfs_iext_max_nextents(xfs_dinode_has_nrext64(dip), >> + whichfork); >> if (di_nextents > max_extents) >> return __this_address; >> break; >> diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c >> index ce690abe5dce..a3a3b54f9c55 100644 >> --- a/fs/xfs/libxfs/xfs_inode_fork.c >> +++ b/fs/xfs/libxfs/xfs_inode_fork.c >> @@ -746,7 +746,7 @@ xfs_iext_count_may_overflow( >> if (whichfork == XFS_COW_FORK) >> return 0; >> >> - max_exts = xfs_iext_max_nextents(whichfork); >> + max_exts = xfs_iext_max_nextents(xfs_inode_has_nrext64(ip), whichfork); >> >> if (XFS_TEST_ERROR(false, ip->i_mount, XFS_ERRTAG_REDUCE_MAX_IEXTENTS)) >> max_exts = 10; >> diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h >> index 4a8b77d425df..0cfc351648f9 100644 >> --- a/fs/xfs/libxfs/xfs_inode_fork.h >> +++ b/fs/xfs/libxfs/xfs_inode_fork.h >> @@ -133,12 +133,23 @@ static inline int8_t xfs_ifork_format(struct xfs_ifork *ifp) >> return ifp->if_format; >> } >> >> -static inline xfs_extnum_t xfs_iext_max_nextents(int whichfork) >> +static inline xfs_extnum_t xfs_iext_max_nextents(bool has_big_extcnt, > > has_nrext64, to be consistent with most everywhere else? > You are right. I will fix this. > --D > >> + int whichfork) >> { >> - if (whichfork == XFS_DATA_FORK || whichfork == XFS_COW_FORK) >> - return MAXEXTNUM; >> + switch (whichfork) { >> + case XFS_DATA_FORK: >> + case XFS_COW_FORK: >> + return has_big_extcnt ? XFS_MAX_EXTCNT_DATA_FORK >> + : XFS_MAX_EXTCNT_DATA_FORK_OLD; >> + >> + case XFS_ATTR_FORK: >> + return has_big_extcnt ? XFS_MAX_EXTCNT_ATTR_FORK >> + : XFS_MAX_EXTCNT_ATTR_FORK_OLD; >> >> - return MAXAEXTNUM; >> + default: >> + ASSERT(0); >> + return 0; >> + } >> } >> >> static inline xfs_extnum_t >> -- >> 2.30.2 >> -- chandan