Add initial support for FS_XFLAG_ATOMICWRITES for forcealign enabled. Current kernel support for atomic writes is based on HW support (for atomic writes). As such, it is required to ensure extent alignment with atomic_write_unit_max so that an atomic write can result in a single HW-compliant IO operation. rtvol also guarantees extent alignment, but we are basing support initially on forcealign, which is not supported for rtvol yet. Signed-off-by: John Garry <john.g.garry@xxxxxxxxxx> --- fs/xfs/libxfs/xfs_format.h | 11 +++++++++-- fs/xfs/libxfs/xfs_inode_buf.c | 36 +++++++++++++++++++++++++++++++++++ fs/xfs/libxfs/xfs_sb.c | 2 ++ fs/xfs/xfs_buf.c | 15 ++++++++++++++- fs/xfs/xfs_buf.h | 4 +++- fs/xfs/xfs_inode.c | 2 ++ fs/xfs/xfs_inode.h | 5 +++++ fs/xfs/xfs_ioctl.c | 21 ++++++++++++++++++-- fs/xfs/xfs_mount.h | 2 ++ fs/xfs/xfs_super.c | 4 ++++ 10 files changed, 96 insertions(+), 6 deletions(-) diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index 0c73b96dbefc..8e32fb068430 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -354,12 +354,16 @@ xfs_sb_has_compat_feature( #define XFS_SB_FEAT_RO_COMPAT_REFLINK (1 << 2) /* reflinked files */ #define XFS_SB_FEAT_RO_COMPAT_INOBTCNT (1 << 3) /* inobt block counts */ #define XFS_SB_FEAT_RO_COMPAT_FORCEALIGN (1 << 30) /* aligned file data extents */ +#define XFS_SB_FEAT_RO_COMPAT_ATOMICWRITES (1 << 31) /* atomicwrites enabled */ + #define XFS_SB_FEAT_RO_COMPAT_ALL \ (XFS_SB_FEAT_RO_COMPAT_FINOBT | \ XFS_SB_FEAT_RO_COMPAT_RMAPBT | \ XFS_SB_FEAT_RO_COMPAT_REFLINK| \ XFS_SB_FEAT_RO_COMPAT_INOBTCNT | \ - XFS_SB_FEAT_RO_COMPAT_FORCEALIGN) + XFS_SB_FEAT_RO_COMPAT_FORCEALIGN | \ + XFS_SB_FEAT_RO_COMPAT_ATOMICWRITES) + #define XFS_SB_FEAT_RO_COMPAT_UNKNOWN ~XFS_SB_FEAT_RO_COMPAT_ALL static inline bool xfs_sb_has_ro_compat_feature( @@ -1088,6 +1092,7 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev) #define XFS_DIFLAG2_NREXT64_BIT 4 /* large extent counters */ /* data extent mappings for regular files must be aligned to extent size hint */ #define XFS_DIFLAG2_FORCEALIGN_BIT 5 +#define XFS_DIFLAG2_ATOMICWRITES_BIT 6 #define XFS_DIFLAG2_DAX (1 << XFS_DIFLAG2_DAX_BIT) #define XFS_DIFLAG2_REFLINK (1 << XFS_DIFLAG2_REFLINK_BIT) @@ -1095,10 +1100,12 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev) #define XFS_DIFLAG2_BIGTIME (1 << XFS_DIFLAG2_BIGTIME_BIT) #define XFS_DIFLAG2_NREXT64 (1 << XFS_DIFLAG2_NREXT64_BIT) #define XFS_DIFLAG2_FORCEALIGN (1 << XFS_DIFLAG2_FORCEALIGN_BIT) +#define XFS_DIFLAG2_ATOMICWRITES (1 << XFS_DIFLAG2_ATOMICWRITES_BIT) #define XFS_DIFLAG2_ANY \ (XFS_DIFLAG2_DAX | XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE | \ - XFS_DIFLAG2_BIGTIME | XFS_DIFLAG2_NREXT64 | XFS_DIFLAG2_FORCEALIGN) + XFS_DIFLAG2_BIGTIME | XFS_DIFLAG2_NREXT64 | XFS_DIFLAG2_FORCEALIGN | \ + XFS_DIFLAG2_ATOMICWRITES) static inline bool xfs_dinode_has_bigtime(const struct xfs_dinode *dip) { diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index 12f128f12824..5e42ec1dadb6 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -178,7 +178,10 @@ xfs_inode_from_disk( struct xfs_inode *ip, struct xfs_dinode *from) { + struct xfs_buftarg *target = xfs_inode_buftarg(ip); struct inode *inode = VFS_I(ip); + struct xfs_mount *mp = ip->i_mount; + struct xfs_sb *sbp = &mp->m_sb; int error; xfs_failaddr_t fa; @@ -261,6 +264,13 @@ xfs_inode_from_disk( } if (xfs_is_reflink_inode(ip)) xfs_ifork_init_cow(ip); + + if (xfs_inode_has_atomicwrites(ip)) { + if (sbp->sb_blocksize < target->bt_bdev_awu_min || + sbp->sb_blocksize * ip->i_extsize > target->bt_bdev_awu_max) + ip->i_diflags2 &= ~XFS_DIFLAG2_ATOMICWRITES; + } + return 0; out_destroy_data_fork: @@ -460,6 +470,25 @@ xfs_dinode_verify_nrext64( return NULL; } +static xfs_failaddr_t +xfs_inode_validate_atomicwrites( + struct xfs_mount *mp, + bool forcealign) +{ + /* superblock rocompat feature flag */ + if (!xfs_has_atomicwrites(mp)) + return __this_address; + + /* + * forcealign is required, so rely on sanity checks in + * xfs_inode_validate_forcealign() + */ + if (!forcealign) + return __this_address; + + return NULL; +} + xfs_failaddr_t xfs_dinode_verify( struct xfs_mount *mp, @@ -624,6 +653,13 @@ xfs_dinode_verify( return fa; } + if (flags2 & XFS_DIFLAG2_ATOMICWRITES) { + fa = xfs_inode_validate_atomicwrites(mp, + flags2 & XFS_DIFLAG2_FORCEALIGN); + if (fa) + return fa; + } + return NULL; } diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index e746c57c4cc4..a9ae8ab7d610 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -165,6 +165,8 @@ xfs_sb_version_to_features( features |= XFS_FEAT_INOBTCNT; if (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_FORCEALIGN) features |= XFS_FEAT_FORCEALIGN; + if (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_ATOMICWRITES) + features |= XFS_FEAT_ATOMICWRITES; if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_FTYPE) features |= XFS_FEAT_FTYPE; if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_SPINODES) diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 1a18c381127e..6e7ac6c90ec1 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -2057,6 +2057,8 @@ int xfs_init_buftarg( struct xfs_buftarg *btp, size_t logical_sectorsize, + unsigned int awu_min, + unsigned int awu_max, const char *descr) { /* Set up device logical sector size mask */ @@ -2083,6 +2085,9 @@ xfs_init_buftarg( btp->bt_shrinker->scan_objects = xfs_buftarg_shrink_scan; btp->bt_shrinker->private_data = btp; shrinker_register(btp->bt_shrinker); + + btp->bt_bdev_awu_min = awu_min; + btp->bt_bdev_awu_max = awu_max; return 0; out_destroy_io_count: @@ -2099,6 +2104,7 @@ xfs_alloc_buftarg( { struct xfs_buftarg *btp; const struct dax_holder_operations *ops = NULL; + unsigned int awu_min = 0, awu_max = 0; #if defined(CONFIG_FS_DAX) && defined(CONFIG_MEMORY_FAILURE) ops = &xfs_dax_holder_operations; @@ -2112,6 +2118,13 @@ xfs_alloc_buftarg( btp->bt_daxdev = fs_dax_get_by_bdev(btp->bt_bdev, &btp->bt_dax_part_off, mp, ops); + if (bdev_can_atomic_write(btp->bt_bdev)) { + struct request_queue *q = bdev_get_queue(btp->bt_bdev); + + awu_min = queue_atomic_write_unit_min_bytes(q); + awu_max = queue_atomic_write_unit_max_bytes(q); + } + /* * When allocating the buftargs we have not yet read the super block and * thus don't know the file system sector size yet. @@ -2119,7 +2132,7 @@ xfs_alloc_buftarg( if (xfs_setsize_buftarg(btp, bdev_logical_block_size(btp->bt_bdev))) goto error_free; if (xfs_init_buftarg(btp, bdev_logical_block_size(btp->bt_bdev), - mp->m_super->s_id)) + awu_min, awu_max, mp->m_super->s_id)) goto error_free; return btp; diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index b1580644501f..3bcd8137d739 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -124,6 +124,8 @@ struct xfs_buftarg { struct percpu_counter bt_io_count; struct ratelimit_state bt_ioerror_rl; + unsigned int bt_bdev_awu_min, bt_bdev_awu_max; + /* built-in cache, if we're not using the perag one */ struct xfs_buf_cache bt_cache[]; }; @@ -393,7 +395,7 @@ bool xfs_verify_magic16(struct xfs_buf *bp, __be16 dmagic); /* for xfs_buf_mem.c only: */ int xfs_init_buftarg(struct xfs_buftarg *btp, size_t logical_sectorsize, - const char *descr); + unsigned int awu_min, unsigned int awu_max, const char *descr); void xfs_destroy_buftarg(struct xfs_buftarg *btp); #endif /* __XFS_BUF_H__ */ diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index db5a0f66a121..d674fca22de9 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -609,6 +609,8 @@ xfs_ip2xflags( flags |= FS_XFLAG_COWEXTSIZE; if (ip->i_diflags2 & XFS_DIFLAG2_FORCEALIGN) flags |= FS_XFLAG_FORCEALIGN; + if (ip->i_diflags2 & XFS_DIFLAG2_ATOMICWRITES) + flags |= FS_XFLAG_ATOMICWRITES; } if (xfs_inode_has_attr_fork(ip)) diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 3f13943ab3a3..d796456215e2 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -321,6 +321,11 @@ static inline bool xfs_inode_has_extsize(struct xfs_inode *ip) return ip->i_diflags & XFS_DIFLAG_EXTSIZE; } +static inline bool xfs_inode_has_atomicwrites(struct xfs_inode *ip) +{ + return ip->i_diflags2 & XFS_DIFLAG2_ATOMICWRITES; +} + /* * Return the buftarg used for data allocations on a given inode. */ diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index d1126509ceb9..94a6d9f6d0d8 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -1112,6 +1112,8 @@ xfs_flags2diflags2( di_flags2 |= XFS_DIFLAG2_COWEXTSIZE; if (xflags & FS_XFLAG_FORCEALIGN) di_flags2 |= XFS_DIFLAG2_FORCEALIGN; + if (xflags & FS_XFLAG_ATOMICWRITES) + di_flags2 |= XFS_DIFLAG2_ATOMICWRITES; return di_flags2; } @@ -1122,12 +1124,16 @@ xfs_ioctl_setattr_xflags( struct xfs_inode *ip, struct fileattr *fa) { + struct xfs_buftarg *target = xfs_inode_buftarg(ip); struct xfs_mount *mp = ip->i_mount; + struct xfs_sb *sbp = &mp->m_sb; bool rtflag = (fa->fsx_xflags & FS_XFLAG_REALTIME); + bool atomic_writes = fa->fsx_xflags & FS_XFLAG_ATOMICWRITES; uint64_t i_flags2; - if (rtflag != XFS_IS_REALTIME_INODE(ip)) { - /* Can't change realtime flag if any extents are allocated. */ + /* Can't change RT or atomic flags if any extents are allocated. */ + if (rtflag != XFS_IS_REALTIME_INODE(ip) || + atomic_writes != xfs_inode_has_atomicwrites(ip)) { if (ip->i_df.if_nextents || ip->i_delayed_blks) return -EINVAL; } @@ -1164,6 +1170,17 @@ xfs_ioctl_setattr_xflags( return -EINVAL; } + if (atomic_writes) { + if (!xfs_has_atomicwrites(mp)) + return -EINVAL; + if (target->bt_bdev_awu_min > sbp->sb_blocksize) + return -EINVAL; + if (target->bt_bdev_awu_max < fa->fsx_extsize) + return -EINVAL; + if (!(fa->fsx_xflags & FS_XFLAG_FORCEALIGN)) + return -EINVAL; + } + ip->i_diflags = xfs_flags2diflags(ip, fa->fsx_xflags); ip->i_diflags2 = i_flags2; diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index a8266cf654c4..5856a72d431e 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -293,6 +293,7 @@ typedef struct xfs_mount { #define XFS_FEAT_NEEDSREPAIR (1ULL << 25) /* needs xfs_repair */ #define XFS_FEAT_NREXT64 (1ULL << 26) /* large extent counters */ #define XFS_FEAT_FORCEALIGN (1ULL << 27) /* aligned file data extents */ +#define XFS_FEAT_ATOMICWRITES (1ULL << 28) /* atomic writes support */ /* Mount features */ #define XFS_FEAT_NOATTR2 (1ULL << 48) /* disable attr2 creation */ @@ -357,6 +358,7 @@ __XFS_HAS_FEAT(bigtime, BIGTIME) __XFS_HAS_FEAT(needsrepair, NEEDSREPAIR) __XFS_HAS_FEAT(large_extent_counts, NREXT64) __XFS_HAS_FEAT(forcealign, FORCEALIGN) +__XFS_HAS_FEAT(atomicwrites, ATOMICWRITES) /* * Mount features diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 63d4312785ef..757c90b3d71b 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1710,6 +1710,10 @@ xfs_fs_fill_super( xfs_warn(mp, "EXPERIMENTAL forced data extent alignment feature in use. Use at your own risk!"); + if (xfs_has_atomicwrites(mp)) + xfs_warn(mp, +"EXPERIMENTAL atomicwrites feature in use. Use at your own risk!"); + if (xfs_has_reflink(mp)) { if (mp->m_sb.sb_rblocks) { xfs_alert(mp, -- 2.31.1