From: "Darrick J. Wong" <djwong@xxxxxxxxxx>
Add a new inode flag to require that all file data extent mappings must
be aligned (both the file offset range and the allocated space itself)
to the extent size hint. Having a separate COW extent size hint is no
longer allowed.
The goal here is to enable sysadmins and users to mandate that all space
mappings in a file must have a startoff/blockcount that are aligned to
(say) a 2MB alignment and that the startblock/blockcount will follow the
same alignment.
Allocated space will be aligned to start of the AG, and not necessarily
aligned with disk blocks. The upcoming atomic writes feature will rely and
forcealign and will also require allocated space will also be aligned to
disk blocks.
Currently RT is not be supported for forcealign, so reject a mount under
that condition. In future, it should be possible to support forcealign for
RT. In this case, the extent size hint will need to be aligned with
rtextsize, so add inode verification for that now.
reflink link will not be supported for forcealign. This is because we have
the limitation of pageache writeback not knowing how to writeback an
entire allocation unut, so reject a mount with relink.
Signed-off-by: "Darrick J. Wong" <djwong@xxxxxxxxxx>
Co-developed-by: John Garry <john.g.garry@xxxxxxxxxx>
[jpg: many changes from orig, including forcealign inode verification
rework, disallow RT and forcealign mount, ioctl setattr rework,
disallow reflink a forcealign inode]
Signed-off-by: John Garry <john.g.garry@xxxxxxxxxx>
---
fs/xfs/libxfs/xfs_format.h | 6 +++-
fs/xfs/libxfs/xfs_inode_buf.c | 55 +++++++++++++++++++++++++++++++++++
fs/xfs/libxfs/xfs_inode_buf.h | 3 ++
fs/xfs/libxfs/xfs_sb.c | 2 ++
fs/xfs/xfs_inode.c | 13 +++++++++
fs/xfs/xfs_inode.h | 20 ++++++++++++-
fs/xfs/xfs_ioctl.c | 51 ++++++++++++++++++++++++++++++--
fs/xfs/xfs_mount.h | 2 ++
fs/xfs/xfs_reflink.c | 5 ++--
fs/xfs/xfs_reflink.h | 10 -------
fs/xfs/xfs_super.c | 11 +++++++
include/uapi/linux/fs.h | 2 ++
12 files changed, 164 insertions(+), 16 deletions(-)
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 61f51becff4f..b48cd75d34a6 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -353,6 +353,7 @@ xfs_sb_has_compat_feature(
#define XFS_SB_FEAT_RO_COMPAT_RMAPBT (1 << 1) /* reverse map btree */
#define XFS_SB_FEAT_RO_COMPAT_REFLINK (1 << 2) /* reflinked files */
#define XFS_SB_FEAT_RO_COMPAT_INOBTCNT (1 << 3) /* inobt block counts */
+#define XFS_SB_FEAT_RO_COMPAT_FORCEALIGN (1 << 30) /* aligned file data extents */
#define XFS_SB_FEAT_RO_COMPAT_ALL \
(XFS_SB_FEAT_RO_COMPAT_FINOBT | \
XFS_SB_FEAT_RO_COMPAT_RMAPBT | \
@@ -1094,16 +1095,19 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
#define XFS_DIFLAG2_COWEXTSIZE_BIT 2 /* copy on write extent size hint */
#define XFS_DIFLAG2_BIGTIME_BIT 3 /* big timestamps */
#define XFS_DIFLAG2_NREXT64_BIT 4 /* large extent counters */
+/* data extent mappings for regular files must be aligned to extent size hint */
+#define XFS_DIFLAG2_FORCEALIGN_BIT 5
#define XFS_DIFLAG2_DAX (1 << XFS_DIFLAG2_DAX_BIT)
#define XFS_DIFLAG2_REFLINK (1 << XFS_DIFLAG2_REFLINK_BIT)
#define XFS_DIFLAG2_COWEXTSIZE (1 << XFS_DIFLAG2_COWEXTSIZE_BIT)
#define XFS_DIFLAG2_BIGTIME (1 << XFS_DIFLAG2_BIGTIME_BIT)
#define XFS_DIFLAG2_NREXT64 (1 << XFS_DIFLAG2_NREXT64_BIT)
+#define XFS_DIFLAG2_FORCEALIGN (1 << XFS_DIFLAG2_FORCEALIGN_BIT)
#define XFS_DIFLAG2_ANY \
(XFS_DIFLAG2_DAX | XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE | \
- XFS_DIFLAG2_BIGTIME | XFS_DIFLAG2_NREXT64)
+ XFS_DIFLAG2_BIGTIME | XFS_DIFLAG2_NREXT64 | XFS_DIFLAG2_FORCEALIGN)
static inline bool xfs_dinode_has_bigtime(const struct xfs_dinode *dip)
{
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index 513b50da6215..5c61a1d1bb2b 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -657,6 +657,15 @@ xfs_dinode_verify(
!xfs_has_bigtime(mp))
return __this_address;
+ if (flags2 & XFS_DIFLAG2_FORCEALIGN) {
+ fa = xfs_inode_validate_forcealign(mp,
+ be32_to_cpu(dip->di_extsize),
+ be32_to_cpu(dip->di_cowextsize),
+ mode, flags, flags2);
+ if (fa)
+ return fa;
+ }
+
return NULL;
}
@@ -824,3 +833,49 @@ xfs_inode_validate_cowextsize(
return NULL;
}
+
+/* Validate the forcealign inode flag */
+xfs_failaddr_t
+xfs_inode_validate_forcealign(
+ struct xfs_mount *mp,
+ uint32_t extsize,
+ uint32_t cowextsize,
+ uint16_t mode,
+ uint16_t flags,
+ uint64_t flags2)
+{
+ bool rt = flags & XFS_DIFLAG_REALTIME;
+
+ /* superblock rocompat feature flag */
+ if (!xfs_has_forcealign(mp))
+ return __this_address;
+
+ /* Only regular files and directories */
+ if (!S_ISDIR(mode) && !S_ISREG(mode))
+ return __this_address;
+
+ /* We require EXTSIZE or EXTSZINHERIT */
+ if (!(flags & (XFS_DIFLAG_EXTSIZE | XFS_DIFLAG_EXTSZINHERIT)))
+ return __this_address;
+
+ /* We require a non-zero extsize */
+ if (!extsize)
+ return __this_address;
+
+ /* Reflink'ed disallowed */
+ if (flags2 & XFS_DIFLAG2_REFLINK)
+ return __this_address;