From: Darrick J. Wong <djwong@xxxxxxxxxx> Define the on-disk layout and feature flags for the metadata inode directory feature. Signed-off-by: Darrick J. Wong <djwong@xxxxxxxxxx> --- fs/xfs/libxfs/xfs_format.h | 60 +++++++++++++++++++++++++++++++++++----- fs/xfs/libxfs/xfs_inode_util.c | 2 + fs/xfs/libxfs/xfs_sb.c | 2 + fs/xfs/xfs_inode.h | 13 +++++++++ fs/xfs/xfs_mount.h | 2 + fs/xfs/xfs_super.c | 4 +++ 6 files changed, 75 insertions(+), 8 deletions(-) diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index c60af43696326..7596b9286988e 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -174,6 +174,16 @@ typedef struct xfs_sb { xfs_lsn_t sb_lsn; /* last write sequence */ uuid_t sb_meta_uuid; /* metadata file system unique id */ + /* Fields beyond here do not match xfs_dsb. Be very careful! */ + + /* + * Metadata Directory Inode. On disk this lives in the sb_rbmino slot, + * but we continue to use the in-core superblock to cache the classic + * inodes (rt bitmap; rt summary; user, group, and project quotas) so + * we cache the metadir inode value here too. + */ + xfs_ino_t sb_metadirino; + /* must be padded to 64 bit alignment */ } xfs_sb_t; @@ -190,7 +200,14 @@ struct xfs_dsb { uuid_t sb_uuid; /* user-visible file system unique id */ __be64 sb_logstart; /* starting block of log if internal */ __be64 sb_rootino; /* root inode number */ - __be64 sb_rbmino; /* bitmap inode for realtime extents */ + /* + * bitmap inode for realtime extents. + * + * The metadata directory feature uses the sb_rbmino field to point to + * the root of the metadata directory tree. All other sb inode + * pointers are no longer used. + */ + __be64 sb_rbmino; __be64 sb_rsumino; /* summary inode for rt bitmap */ __be32 sb_rextsize; /* realtime extent size, blocks */ __be32 sb_agblocks; /* size of an allocation group */ @@ -373,6 +390,7 @@ xfs_sb_has_ro_compat_feature( #define XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR (1 << 4) /* needs xfs_repair */ #define XFS_SB_FEAT_INCOMPAT_NREXT64 (1 << 5) /* large extent counters */ #define XFS_SB_FEAT_INCOMPAT_PARENT (1 << 6) /* parent pointers */ +#define XFS_SB_FEAT_INCOMPAT_METADIR (1U << 31) /* metadata dir tree */ #define XFS_SB_FEAT_INCOMPAT_ALL \ (XFS_SB_FEAT_INCOMPAT_FTYPE| \ XFS_SB_FEAT_INCOMPAT_SPINODES| \ @@ -1109,17 +1127,43 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev) #define XFS_DIFLAG2_REFLINK_BIT 1 /* file's blocks may be shared */ #define XFS_DIFLAG2_COWEXTSIZE_BIT 2 /* copy on write extent size hint */ #define XFS_DIFLAG2_BIGTIME_BIT 3 /* big timestamps */ -#define XFS_DIFLAG2_NREXT64_BIT 4 /* large extent counters */ +#define XFS_DIFLAG2_NREXT64_BIT 4 /* large extent counters */ +#define XFS_DIFLAG2_METADIR_BIT 63 /* filesystem metadata */ -#define XFS_DIFLAG2_DAX (1 << XFS_DIFLAG2_DAX_BIT) -#define XFS_DIFLAG2_REFLINK (1 << XFS_DIFLAG2_REFLINK_BIT) -#define XFS_DIFLAG2_COWEXTSIZE (1 << XFS_DIFLAG2_COWEXTSIZE_BIT) -#define XFS_DIFLAG2_BIGTIME (1 << XFS_DIFLAG2_BIGTIME_BIT) -#define XFS_DIFLAG2_NREXT64 (1 << XFS_DIFLAG2_NREXT64_BIT) +#define XFS_DIFLAG2_DAX (1ULL << XFS_DIFLAG2_DAX_BIT) +#define XFS_DIFLAG2_REFLINK (1ULL << XFS_DIFLAG2_REFLINK_BIT) +#define XFS_DIFLAG2_COWEXTSIZE (1ULL << XFS_DIFLAG2_COWEXTSIZE_BIT) +#define XFS_DIFLAG2_BIGTIME (1ULL << XFS_DIFLAG2_BIGTIME_BIT) +#define XFS_DIFLAG2_NREXT64 (1ULL << XFS_DIFLAG2_NREXT64_BIT) + +/* + * The inode contains filesystem metadata and can be found through the metadata + * directory tree. Metadata inodes must satisfy the following constraints: + * + * - V5 filesystem (and ftype) are enabled; + * - The only valid modes are regular files and directories; + * - The access bits must be zero; + * - DMAPI event and state masks are zero; + * - The user, group, and project IDs must be zero; + * - The immutable, sync, noatime, nodump, nodefrag flags must be set. + * - The dax flag must not be set. + * - Directories must have nosymlinks set. + * + * These requirements are chosen defensively to minimize the ability of + * userspace to read or modify the contents, should a metadata file ever + * escape to userspace. + * + * There are further constraints on the directory tree itself: + * + * - Metadata inodes must never be resolvable through the root directory; + * - They must never be accessed by userspace; + * - Metadata directory entries must have correct ftype. + */ +#define XFS_DIFLAG2_METADIR (1ULL << XFS_DIFLAG2_METADIR_BIT) #define XFS_DIFLAG2_ANY \ (XFS_DIFLAG2_DAX | XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE | \ - XFS_DIFLAG2_BIGTIME | XFS_DIFLAG2_NREXT64) + XFS_DIFLAG2_BIGTIME | XFS_DIFLAG2_NREXT64 | XFS_DIFLAG2_METADIR) static inline bool xfs_dinode_has_bigtime(const struct xfs_dinode *dip) { diff --git a/fs/xfs/libxfs/xfs_inode_util.c b/fs/xfs/libxfs/xfs_inode_util.c index 8f85c74603dba..2f190b875c7f8 100644 --- a/fs/xfs/libxfs/xfs_inode_util.c +++ b/fs/xfs/libxfs/xfs_inode_util.c @@ -225,6 +225,8 @@ xfs_inode_inherit_flags2( } if (pip->i_diflags2 & XFS_DIFLAG2_DAX) ip->i_diflags2 |= XFS_DIFLAG2_DAX; + if (pip->i_diflags2 & XFS_DIFLAG2_METADIR) + ip->i_diflags2 |= XFS_DIFLAG2_METADIR; /* Don't let invalid cowextsize hints propagate. */ failaddr = xfs_inode_validate_cowextsize(ip->i_mount, ip->i_cowextsize, diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index d4f72d4c85f83..2d2f55099db04 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -178,6 +178,8 @@ xfs_sb_version_to_features( features |= XFS_FEAT_NREXT64; if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_PARENT) features |= XFS_FEAT_PARENT; + if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR) + features |= XFS_FEAT_METADIR; return features; } diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index cc6a075505636..10fbb3d4d193e 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -276,10 +276,23 @@ static inline bool xfs_is_reflink_inode(struct xfs_inode *ip) return ip->i_diflags2 & XFS_DIFLAG2_REFLINK; } +static inline bool xfs_is_metadir_inode(struct xfs_inode *ip) +{ + return ip->i_diflags2 & XFS_DIFLAG2_METADIR; +} + static inline bool xfs_is_metadata_inode(struct xfs_inode *ip) { struct xfs_mount *mp = ip->i_mount; + /* Any file in the metadata directory tree is a metadata inode. */ + if (xfs_has_metadir(mp)) + return xfs_is_metadir_inode(ip); + + /* + * Before metadata directories, the only metadata inodes were the + * three quota files, the realtime bitmap, and the realtime summary. + */ return ip == mp->m_rbmip || ip == mp->m_rsumip || xfs_is_quota_inode(&mp->m_sb, ip->i_ino); } diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index f80f04335acc5..953386ad4b13f 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -292,6 +292,7 @@ typedef struct xfs_mount { #define XFS_FEAT_BIGTIME (1ULL << 24) /* large timestamps */ #define XFS_FEAT_NEEDSREPAIR (1ULL << 25) /* needs xfs_repair */ #define XFS_FEAT_NREXT64 (1ULL << 26) /* large extent counters */ +#define XFS_FEAT_METADIR (1ULL << 27) /* metadata directory tree */ /* Mount features */ #define XFS_FEAT_NOATTR2 (1ULL << 48) /* disable attr2 creation */ @@ -355,6 +356,7 @@ __XFS_HAS_FEAT(inobtcounts, INOBTCNT) __XFS_HAS_FEAT(bigtime, BIGTIME) __XFS_HAS_FEAT(needsrepair, NEEDSREPAIR) __XFS_HAS_FEAT(large_extent_counts, NREXT64) +__XFS_HAS_FEAT(metadir, METADIR) /* * Mount features diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 8b4e5b8579a9b..998fa006af5bb 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1713,6 +1713,10 @@ xfs_fs_fill_super( mp->m_features &= ~XFS_FEAT_DISCARD; } + if (xfs_has_metadir(mp)) + xfs_warn(mp, +"EXPERIMENTAL metadata directory feature in use. Use at your own risk!"); + if (xfs_has_reflink(mp)) { if (mp->m_sb.sb_rblocks) { xfs_alert(mp,