On Thu, Feb 24, 2022 at 06:32:06PM +0530, Chandan Babu R wrote: > This commit introduces new fields in the on-disk inode format to support > 64-bit data fork extent counters and 32-bit attribute fork extent > counters. The new fields will be used only when an inode has > XFS_DIFLAG2_NREXT64 flag set. Otherwise we continue to use the regular 32-bit > data fork extent counters and 16-bit attribute fork extent counters. > > Signed-off-by: Chandan Babu R <chandan.babu@xxxxxxxxxx> > Suggested-by: Dave Chinner <dchinner@xxxxxxxxxx> Looks good! Reviewed-by: Darrick J. Wong <djwong@xxxxxxxxxx> --D > --- > fs/xfs/libxfs/xfs_format.h | 33 ++++++++++++-- > fs/xfs/libxfs/xfs_inode_buf.c | 49 ++++++++++++++++++-- > fs/xfs/libxfs/xfs_inode_fork.h | 6 +++ > fs/xfs/libxfs/xfs_log_format.h | 33 ++++++++++++-- > fs/xfs/xfs_inode_item.c | 23 ++++++++-- > fs/xfs/xfs_inode_item_recover.c | 79 ++++++++++++++++++++++++++++----- > 6 files changed, 196 insertions(+), 27 deletions(-) > > diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h > index d3dfd45c39e0..1a5b194da191 100644 > --- a/fs/xfs/libxfs/xfs_format.h > +++ b/fs/xfs/libxfs/xfs_format.h > @@ -792,16 +792,41 @@ struct xfs_dinode { > __be32 di_nlink; /* number of links to file */ > __be16 di_projid_lo; /* lower part of owner's project id */ > __be16 di_projid_hi; /* higher part owner's project id */ > - __u8 di_pad[6]; /* unused, zeroed space */ > - __be16 di_flushiter; /* incremented on flush */ > + union { > + /* Number of data fork extents if NREXT64 is set */ > + __be64 di_big_nextents; > + > + /* Padding for V3 inodes without NREXT64 set. */ > + __be64 di_v3_pad; > + > + /* Padding and inode flush counter for V2 inodes. */ > + struct { > + __u8 di_v2_pad[6]; > + __be16 di_flushiter; > + }; > + }; > xfs_timestamp_t di_atime; /* time last accessed */ > xfs_timestamp_t di_mtime; /* time last modified */ > xfs_timestamp_t di_ctime; /* time created/inode modified */ > __be64 di_size; /* number of bytes in file */ > __be64 di_nblocks; /* # of direct & btree blocks used */ > __be32 di_extsize; /* basic/minimum extent size for file */ > - __be32 di_nextents; /* number of extents in data fork */ > - __be16 di_anextents; /* number of extents in attribute fork*/ > + union { > + /* > + * For V2 inodes and V3 inodes without NREXT64 set, this > + * is the number of data and attr fork extents. > + */ > + struct { > + __be32 di_nextents; > + __be16 di_anextents; > + } __packed; > + > + /* Number of attr fork extents if NREXT64 is set. */ > + struct { > + __be32 di_big_anextents; > + __be16 di_nrext64_pad; > + } __packed; > + } __packed; > __u8 di_forkoff; /* attr fork offs, <<3 for 64b align */ > __s8 di_aformat; /* format of attr fork's data */ > __be32 di_dmevmask; /* DMIG event mask */ > diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c > index 34f360a38603..a11d3ea5ebfe 100644 > --- a/fs/xfs/libxfs/xfs_inode_buf.c > +++ b/fs/xfs/libxfs/xfs_inode_buf.c > @@ -279,6 +279,25 @@ xfs_inode_to_disk_ts( > return ts; > } > > +static inline void > +xfs_inode_to_disk_iext_counters( > + struct xfs_inode *ip, > + struct xfs_dinode *to) > +{ > + if (xfs_inode_has_nrext64(ip)) { > + to->di_big_nextents = cpu_to_be64(xfs_ifork_nextents(&ip->i_df)); > + to->di_big_anextents = cpu_to_be32(xfs_ifork_nextents(ip->i_afp)); > + /* > + * We might be upgrading the inode to use larger extent counters > + * than was previously used. Hence zero the unused field. > + */ > + to->di_nrext64_pad = cpu_to_be16(0); > + } else { > + to->di_nextents = cpu_to_be32(xfs_ifork_nextents(&ip->i_df)); > + to->di_anextents = cpu_to_be16(xfs_ifork_nextents(ip->i_afp)); > + } > +} > + > void > xfs_inode_to_disk( > struct xfs_inode *ip, > @@ -296,7 +315,6 @@ xfs_inode_to_disk( > to->di_projid_lo = cpu_to_be16(ip->i_projid & 0xffff); > to->di_projid_hi = cpu_to_be16(ip->i_projid >> 16); > > - memset(to->di_pad, 0, sizeof(to->di_pad)); > to->di_atime = xfs_inode_to_disk_ts(ip, inode->i_atime); > to->di_mtime = xfs_inode_to_disk_ts(ip, inode->i_mtime); > to->di_ctime = xfs_inode_to_disk_ts(ip, inode->i_ctime); > @@ -307,8 +325,6 @@ xfs_inode_to_disk( > to->di_size = cpu_to_be64(ip->i_disk_size); > to->di_nblocks = cpu_to_be64(ip->i_nblocks); > to->di_extsize = cpu_to_be32(ip->i_extsize); > - to->di_nextents = cpu_to_be32(xfs_ifork_nextents(&ip->i_df)); > - to->di_anextents = cpu_to_be16(xfs_ifork_nextents(ip->i_afp)); > to->di_forkoff = ip->i_forkoff; > to->di_aformat = xfs_ifork_format(ip->i_afp); > to->di_flags = cpu_to_be16(ip->i_diflags); > @@ -323,11 +339,14 @@ xfs_inode_to_disk( > to->di_lsn = cpu_to_be64(lsn); > memset(to->di_pad2, 0, sizeof(to->di_pad2)); > uuid_copy(&to->di_uuid, &ip->i_mount->m_sb.sb_meta_uuid); > - to->di_flushiter = 0; > + to->di_v3_pad = 0; > } else { > to->di_version = 2; > to->di_flushiter = cpu_to_be16(ip->i_flushiter); > + memset(to->di_v2_pad, 0, sizeof(to->di_v2_pad)); > } > + > + xfs_inode_to_disk_iext_counters(ip, to); > } > > static xfs_failaddr_t > @@ -397,6 +416,24 @@ xfs_dinode_verify_forkoff( > return NULL; > } > > +static xfs_failaddr_t > +xfs_dinode_verify_nrext64( > + struct xfs_mount *mp, > + struct xfs_dinode *dip) > +{ > + if (xfs_dinode_has_nrext64(dip)) { > + if (!xfs_has_nrext64(mp)) > + return __this_address; > + if (dip->di_nrext64_pad != 0) > + return __this_address; > + } else if (dip->di_version >= 3) { > + if (dip->di_v3_pad != 0) > + return __this_address; > + } > + > + return NULL; > +} > + > xfs_failaddr_t > xfs_dinode_verify( > struct xfs_mount *mp, > @@ -440,6 +477,10 @@ xfs_dinode_verify( > if ((S_ISLNK(mode) || S_ISDIR(mode)) && di_size == 0) > return __this_address; > > + fa = xfs_dinode_verify_nrext64(mp, dip); > + if (fa) > + return fa; > + > nextents = xfs_dfork_data_extents(dip); > nextents += xfs_dfork_attr_extents(dip); > nblocks = be64_to_cpu(dip->di_nblocks); > diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h > index e56803436c61..8e6221e32660 100644 > --- a/fs/xfs/libxfs/xfs_inode_fork.h > +++ b/fs/xfs/libxfs/xfs_inode_fork.h > @@ -156,6 +156,9 @@ static inline xfs_extnum_t > xfs_dfork_data_extents( > struct xfs_dinode *dip) > { > + if (xfs_dinode_has_nrext64(dip)) > + return be64_to_cpu(dip->di_big_nextents); > + > return be32_to_cpu(dip->di_nextents); > } > > @@ -163,6 +166,9 @@ static inline xfs_extnum_t > xfs_dfork_attr_extents( > struct xfs_dinode *dip) > { > + if (xfs_dinode_has_nrext64(dip)) > + return be32_to_cpu(dip->di_big_anextents); > + > return be16_to_cpu(dip->di_anextents); > } > > diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h > index fd66e70248f7..12234a880e94 100644 > --- a/fs/xfs/libxfs/xfs_log_format.h > +++ b/fs/xfs/libxfs/xfs_log_format.h > @@ -388,16 +388,41 @@ struct xfs_log_dinode { > uint32_t di_nlink; /* number of links to file */ > uint16_t di_projid_lo; /* lower part of owner's project id */ > uint16_t di_projid_hi; /* higher part of owner's project id */ > - uint8_t di_pad[6]; /* unused, zeroed space */ > - uint16_t di_flushiter; /* incremented on flush */ > + union { > + /* Number of data fork extents if NREXT64 is set */ > + uint64_t di_big_nextents; > + > + /* Padding for V3 inodes without NREXT64 set. */ > + uint64_t di_v3_pad; > + > + /* Padding and inode flush counter for V2 inodes. */ > + struct { > + uint8_t di_v2_pad[6]; /* V2 inode zeroed space */ > + uint16_t di_flushiter; /* V2 inode incremented on flush */ > + }; > + }; > xfs_log_timestamp_t di_atime; /* time last accessed */ > xfs_log_timestamp_t di_mtime; /* time last modified */ > xfs_log_timestamp_t di_ctime; /* time created/inode modified */ > xfs_fsize_t di_size; /* number of bytes in file */ > xfs_rfsblock_t di_nblocks; /* # of direct & btree blocks used */ > xfs_extlen_t di_extsize; /* basic/minimum extent size for file */ > - uint32_t di_nextents; /* number of extents in data fork */ > - uint16_t di_anextents; /* number of extents in attribute fork*/ > + union { > + /* > + * For V2 inodes and V3 inodes without NREXT64 set, this > + * is the number of data and attr fork extents. > + */ > + struct { > + uint32_t di_nextents; > + uint16_t di_anextents; > + } __packed; > + > + /* Number of attr fork extents if NREXT64 is set. */ > + struct { > + uint32_t di_big_anextents; > + uint16_t di_nrext64_pad; > + } __packed; > + } __packed; > uint8_t di_forkoff; /* attr fork offs, <<3 for 64b align */ > int8_t di_aformat; /* format of attr fork's data */ > uint32_t di_dmevmask; /* DMIG event mask */ > diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c > index 90d8e591baf8..0d2fe38dc6e5 100644 > --- a/fs/xfs/xfs_inode_item.c > +++ b/fs/xfs/xfs_inode_item.c > @@ -358,6 +358,21 @@ xfs_copy_dm_fields_to_log_dinode( > } > } > > +static inline void > +xfs_inode_to_log_dinode_iext_counters( > + struct xfs_inode *ip, > + struct xfs_log_dinode *to) > +{ > + if (xfs_inode_has_nrext64(ip)) { > + to->di_big_nextents = xfs_ifork_nextents(&ip->i_df); > + to->di_big_anextents = xfs_ifork_nextents(ip->i_afp); > + to->di_nrext64_pad = 0; > + } else { > + to->di_nextents = xfs_ifork_nextents(&ip->i_df); > + to->di_anextents = xfs_ifork_nextents(ip->i_afp); > + } > +} > + > static void > xfs_inode_to_log_dinode( > struct xfs_inode *ip, > @@ -373,7 +388,6 @@ xfs_inode_to_log_dinode( > to->di_projid_lo = ip->i_projid & 0xffff; > to->di_projid_hi = ip->i_projid >> 16; > > - memset(to->di_pad, 0, sizeof(to->di_pad)); > memset(to->di_pad3, 0, sizeof(to->di_pad3)); > to->di_atime = xfs_inode_to_log_dinode_ts(ip, inode->i_atime); > to->di_mtime = xfs_inode_to_log_dinode_ts(ip, inode->i_mtime); > @@ -385,8 +399,6 @@ xfs_inode_to_log_dinode( > to->di_size = ip->i_disk_size; > to->di_nblocks = ip->i_nblocks; > to->di_extsize = ip->i_extsize; > - to->di_nextents = xfs_ifork_nextents(&ip->i_df); > - to->di_anextents = xfs_ifork_nextents(ip->i_afp); > to->di_forkoff = ip->i_forkoff; > to->di_aformat = xfs_ifork_format(ip->i_afp); > to->di_flags = ip->i_diflags; > @@ -406,11 +418,14 @@ xfs_inode_to_log_dinode( > to->di_lsn = lsn; > memset(to->di_pad2, 0, sizeof(to->di_pad2)); > uuid_copy(&to->di_uuid, &ip->i_mount->m_sb.sb_meta_uuid); > - to->di_flushiter = 0; > + to->di_v3_pad = 0; > } else { > to->di_version = 2; > to->di_flushiter = ip->i_flushiter; > + memset(to->di_v2_pad, 0, sizeof(to->di_v2_pad)); > } > + > + xfs_inode_to_log_dinode_iext_counters(ip, to); > } > > /* > diff --git a/fs/xfs/xfs_inode_item_recover.c b/fs/xfs/xfs_inode_item_recover.c > index 767a551816a0..c35796a4e9c5 100644 > --- a/fs/xfs/xfs_inode_item_recover.c > +++ b/fs/xfs/xfs_inode_item_recover.c > @@ -148,6 +148,22 @@ static inline bool xfs_log_dinode_has_nrext64(const struct xfs_log_dinode *ld) > (ld->di_flags2 & XFS_DIFLAG2_NREXT64); > } > > +static inline void > +xfs_log_dinode_to_disk_iext_counters( > + struct xfs_log_dinode *from, > + struct xfs_dinode *to) > +{ > + if (xfs_log_dinode_has_nrext64(from)) { > + to->di_big_nextents = cpu_to_be64(from->di_big_nextents); > + to->di_big_anextents = cpu_to_be32(from->di_big_anextents); > + to->di_nrext64_pad = cpu_to_be16(from->di_nrext64_pad); > + } else { > + to->di_nextents = cpu_to_be32(from->di_nextents); > + to->di_anextents = cpu_to_be16(from->di_anextents); > + } > + > +} > + > STATIC void > xfs_log_dinode_to_disk( > struct xfs_log_dinode *from, > @@ -164,7 +180,6 @@ xfs_log_dinode_to_disk( > to->di_nlink = cpu_to_be32(from->di_nlink); > to->di_projid_lo = cpu_to_be16(from->di_projid_lo); > to->di_projid_hi = cpu_to_be16(from->di_projid_hi); > - memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad)); > > to->di_atime = xfs_log_dinode_to_disk_ts(from, from->di_atime); > to->di_mtime = xfs_log_dinode_to_disk_ts(from, from->di_mtime); > @@ -173,8 +188,6 @@ xfs_log_dinode_to_disk( > to->di_size = cpu_to_be64(from->di_size); > to->di_nblocks = cpu_to_be64(from->di_nblocks); > to->di_extsize = cpu_to_be32(from->di_extsize); > - to->di_nextents = cpu_to_be32(from->di_nextents); > - to->di_anextents = cpu_to_be16(from->di_anextents); > to->di_forkoff = from->di_forkoff; > to->di_aformat = from->di_aformat; > to->di_dmevmask = cpu_to_be32(from->di_dmevmask); > @@ -192,10 +205,13 @@ xfs_log_dinode_to_disk( > to->di_lsn = cpu_to_be64(lsn); > memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2)); > uuid_copy(&to->di_uuid, &from->di_uuid); > - to->di_flushiter = 0; > + to->di_v3_pad = from->di_v3_pad; > } else { > to->di_flushiter = cpu_to_be16(from->di_flushiter); > + memcpy(to->di_v2_pad, from->di_v2_pad, sizeof(to->di_v2_pad)); > } > + > + xfs_log_dinode_to_disk_iext_counters(from, to); > } > > STATIC int > @@ -209,6 +225,8 @@ xlog_recover_inode_commit_pass2( > struct xfs_mount *mp = log->l_mp; > struct xfs_buf *bp; > struct xfs_dinode *dip; > + xfs_extnum_t nextents; > + xfs_aextnum_t anextents; > int len; > char *src; > char *dest; > @@ -348,21 +366,60 @@ xlog_recover_inode_commit_pass2( > goto out_release; > } > } > - if (unlikely(ldip->di_nextents + ldip->di_anextents > ldip->di_nblocks)){ > - XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)", > + > + if (xfs_log_dinode_has_nrext64(ldip)) { > + if (!xfs_has_nrext64(mp) || (ldip->di_nrext64_pad != 0)) { > + XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)", > + XFS_ERRLEVEL_LOW, mp, ldip, > + sizeof(*ldip)); > + xfs_alert(mp, > + "%s: Bad inode log record, rec ptr "PTR_FMT", " > + "dino ptr "PTR_FMT", dino bp "PTR_FMT", " > + "ino %Ld, xfs_has_nrext64(mp) = %d, " > + "ldip->di_nrext64_pad = %u", > + __func__, item, dip, bp, in_f->ilf_ino, > + xfs_has_nrext64(mp), ldip->di_nrext64_pad); > + error = -EFSCORRUPTED; > + goto out_release; > + } > + } else { > + if (ldip->di_version == 3 && ldip->di_big_nextents != 0) { > + XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(6)", > + XFS_ERRLEVEL_LOW, mp, ldip, > + sizeof(*ldip)); > + xfs_alert(mp, > + "%s: Bad inode log record, rec ptr "PTR_FMT", " > + "dino ptr "PTR_FMT", dino bp "PTR_FMT", " > + "ino %Ld, ldip->di_big_dextcnt = %llu", > + __func__, item, dip, bp, in_f->ilf_ino, > + ldip->di_big_nextents); > + error = -EFSCORRUPTED; > + goto out_release; > + } > + } > + > + if (xfs_log_dinode_has_nrext64(ldip)) { > + nextents = ldip->di_big_nextents; > + anextents = ldip->di_big_anextents; > + } else { > + nextents = ldip->di_nextents; > + anextents = ldip->di_anextents; > + } > + > + if (unlikely(nextents + anextents > ldip->di_nblocks)) { > + XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)", > XFS_ERRLEVEL_LOW, mp, ldip, > sizeof(*ldip)); > xfs_alert(mp, > "%s: Bad inode log record, rec ptr "PTR_FMT", dino ptr "PTR_FMT", " > - "dino bp "PTR_FMT", ino %Ld, total extents = %d, nblocks = %Ld", > + "dino bp "PTR_FMT", ino %Ld, total extents = %llu, nblocks = %Ld", > __func__, item, dip, bp, in_f->ilf_ino, > - ldip->di_nextents + ldip->di_anextents, > - ldip->di_nblocks); > + nextents + anextents, ldip->di_nblocks); > error = -EFSCORRUPTED; > goto out_release; > } > if (unlikely(ldip->di_forkoff > mp->m_sb.sb_inodesize)) { > - XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(6)", > + XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(8)", > XFS_ERRLEVEL_LOW, mp, ldip, > sizeof(*ldip)); > xfs_alert(mp, > @@ -374,7 +431,7 @@ xlog_recover_inode_commit_pass2( > } > isize = xfs_log_dinode_size(mp); > if (unlikely(item->ri_buf[1].i_len > isize)) { > - XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)", > + XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(9)", > XFS_ERRLEVEL_LOW, mp, ldip, > sizeof(*ldip)); > xfs_alert(mp, > -- > 2.30.2 >