From: Allison Henderson <allison.henderson@xxxxxxxxxx> Add support for fallocate2 ioctl, which is xfs' own version of fallocate. Struct xfs_fallocate2 is passed in the ioctl, and xfs_fallocate2.alignment allows the user to specify required extent alignment. This is key for atomic write support, as we expect extents to be aligned on atomic_write_unit_max boundaries. The alignment flag is not sticky, so further extent mutation will not obey this original alignment request. In addition, extent lengths should always be a multiple of atomic_write_unit_max, which they are not yet. So this really just works for scenarios when we were lucky enough to get a single extent. The following is sample usage and c code: mkfs.xfs -f /dev/sda mount /dev/sda mnt xfs_fallocate2 mnt/test_file1.img 0 20971520 262144 filefrag -v mnt/test_file1.img xfs_fallocate2.c struct xfs_fallocate2 { int64_t offset; /* bytes */ int64_t length; /* bytes */ uint64_t flags; uint32_t alignment; /* bytes */ uint32_t padding[9]; }; int main(int argc, char **argv) { char *file; int fd, ret; struct xfs_fallocate2 fa = {}; if (argc != 5) { printf("expected 5 arguments\n"); exit(0); } argv++; file = *argv; argv++; fa.offset = atoi(*argv); argv++; fa.length = atoi(*argv); argv++; fa.alignment = atoi(*argv); argv++; if (fa.alignment) fa.flags = XFS_FALLOC2_ALIGNED; fd = open(file, O_RDWR | O_CREAT, 0600); if (fd < 0) exit(0); ret = ioctl(fd, XFS_IOC_FALLOCATE2, &fa); close(fd); return ret; } Signed-off-by: Allison Henderson <allison.henderson@xxxxxxxxxx> Signed-off-by: Catherine Hoang <catherine.hoang@xxxxxxxxxx> Signed-off-by: John Garry <john.g.garry@xxxxxxxxxx> --- fs/xfs/Makefile | 1 + fs/xfs/libxfs/xfs_attr_remote.c | 2 +- fs/xfs/libxfs/xfs_bmap.c | 9 ++- fs/xfs/libxfs/xfs_bmap.h | 4 +- fs/xfs/libxfs/xfs_da_btree.c | 4 +- fs/xfs/libxfs/xfs_fs.h | 1 + fs/xfs/xfs_bmap_util.c | 7 ++- fs/xfs/xfs_bmap_util.h | 2 +- fs/xfs/xfs_dquot.c | 2 +- fs/xfs/xfs_file.c | 19 +++++-- fs/xfs/xfs_fs_staging.c | 99 +++++++++++++++++++++++++++++++++ fs/xfs/xfs_fs_staging.h | 21 +++++++ fs/xfs/xfs_ioctl.c | 4 ++ fs/xfs/xfs_iomap.c | 4 +- fs/xfs/xfs_reflink.c | 4 +- fs/xfs/xfs_rtalloc.c | 2 +- fs/xfs/xfs_symlink.c | 2 +- security/security.c | 1 + 18 files changed, 168 insertions(+), 20 deletions(-) create mode 100644 fs/xfs/xfs_fs_staging.c create mode 100644 fs/xfs/xfs_fs_staging.h diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 92d88dc3c9f7..9b413544d358 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -93,6 +93,7 @@ xfs-y += xfs_aops.o \ xfs_sysfs.o \ xfs_trans.o \ xfs_xattr.o \ + xfs_fs_staging.o \ kmem.o # low-level transaction/log code diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c index d440393b40eb..c5f190fef1b5 100644 --- a/fs/xfs/libxfs/xfs_attr_remote.c +++ b/fs/xfs/libxfs/xfs_attr_remote.c @@ -615,7 +615,7 @@ xfs_attr_rmtval_set_blk( error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)attr->xattri_lblkno, attr->xattri_blkcnt, XFS_BMAPI_ATTRFORK, args->total, - map, &nmap); + map, &nmap, 0); if (error) return error; diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 34de6e6898c4..52a6e2b61228 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -3275,7 +3275,9 @@ xfs_bmap_compute_alignments( struct xfs_alloc_arg *args) { struct xfs_mount *mp = args->mp; - xfs_extlen_t align = 0; /* minimum allocation alignment */ + + /* minimum allocation alignment */ + xfs_extlen_t align = args->alignment; int stripe_align = 0; /* stripe alignment for allocation is determined by mount parameters */ @@ -3652,6 +3654,7 @@ xfs_bmap_btalloc( .datatype = ap->datatype, .alignment = 1, .minalignslop = 0, + .alignment = ap->align, }; xfs_fileoff_t orig_offset; xfs_extlen_t orig_length; @@ -4279,12 +4282,14 @@ xfs_bmapi_write( uint32_t flags, /* XFS_BMAPI_... */ xfs_extlen_t total, /* total blocks needed */ struct xfs_bmbt_irec *mval, /* output: map values */ - int *nmap) /* i/o: mval size/count */ + int *nmap, + xfs_extlen_t align) /* i/o: mval size/count */ { struct xfs_bmalloca bma = { .tp = tp, .ip = ip, .total = total, + .align = align, }; struct xfs_mount *mp = ip->i_mount; int whichfork = xfs_bmapi_whichfork(flags); diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index dd08361ca5a6..0573dfc5fa6b 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h @@ -26,6 +26,7 @@ struct xfs_bmalloca { xfs_fileoff_t offset; /* offset in file filling in */ xfs_extlen_t length; /* i/o length asked/allocated */ xfs_fsblock_t blkno; /* starting block of new extent */ + xfs_extlen_t align; struct xfs_btree_cur *cur; /* btree cursor */ struct xfs_iext_cursor icur; /* incore extent cursor */ @@ -189,7 +190,8 @@ int xfs_bmapi_read(struct xfs_inode *ip, xfs_fileoff_t bno, int *nmap, uint32_t flags); int xfs_bmapi_write(struct xfs_trans *tp, struct xfs_inode *ip, xfs_fileoff_t bno, xfs_filblks_t len, uint32_t flags, - xfs_extlen_t total, struct xfs_bmbt_irec *mval, int *nmap); + xfs_extlen_t total, struct xfs_bmbt_irec *mval, int *nmap, + xfs_extlen_t align); int __xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip, xfs_fileoff_t bno, xfs_filblks_t *rlen, uint32_t flags, xfs_extnum_t nexts); diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c index e576560b46e9..e6581254092f 100644 --- a/fs/xfs/libxfs/xfs_da_btree.c +++ b/fs/xfs/libxfs/xfs_da_btree.c @@ -2174,7 +2174,7 @@ xfs_da_grow_inode_int( nmap = 1; error = xfs_bmapi_write(tp, dp, *bno, count, xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG, - args->total, &map, &nmap); + args->total, &map, &nmap, 0); if (error) return error; @@ -2196,7 +2196,7 @@ xfs_da_grow_inode_int( nmap = min(XFS_BMAP_MAX_NMAP, c); error = xfs_bmapi_write(tp, dp, b, c, xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA, - args->total, &mapp[mapi], &nmap); + args->total, &mapp[mapi], &nmap, 0); if (error) goto out_free_map; if (nmap < 1) diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index 1cfd5bc6520a..829316ca01ea 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -831,6 +831,7 @@ struct xfs_scrub_metadata { #define XFS_IOC_FSGEOMETRY _IOR ('X', 126, struct xfs_fsop_geom) #define XFS_IOC_BULKSTAT _IOR ('X', 127, struct xfs_bulkstat_req) #define XFS_IOC_INUMBERS _IOR ('X', 128, struct xfs_inumbers_req) +#define XFS_IOC_FALLOCATE2 _IOR ('X', 129, struct xfs_fallocate2) /* XFS_IOC_GETFSUUID ---------- deprecated 140 */ diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index a09dd2606479..a0c55af6f051 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -776,10 +776,12 @@ int xfs_alloc_file_space( struct xfs_inode *ip, xfs_off_t offset, - xfs_off_t len) + xfs_off_t len, + xfs_off_t align) { xfs_mount_t *mp = ip->i_mount; xfs_off_t count; + xfs_filblks_t align_fsb; xfs_filblks_t allocated_fsb; xfs_filblks_t allocatesize_fsb; xfs_extlen_t extsz, temp; @@ -811,6 +813,7 @@ xfs_alloc_file_space( nimaps = 1; startoffset_fsb = XFS_B_TO_FSBT(mp, offset); endoffset_fsb = XFS_B_TO_FSB(mp, offset + count); + align_fsb = XFS_B_TO_FSB(mp, align); allocatesize_fsb = endoffset_fsb - startoffset_fsb; /* @@ -872,7 +875,7 @@ xfs_alloc_file_space( error = xfs_bmapi_write(tp, ip, startoffset_fsb, allocatesize_fsb, XFS_BMAPI_PREALLOC, 0, imapp, - &nimaps); + &nimaps, align_fsb); if (error) goto error; diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h index 6888078f5c31..476f610ad617 100644 --- a/fs/xfs/xfs_bmap_util.h +++ b/fs/xfs/xfs_bmap_util.h @@ -54,7 +54,7 @@ int xfs_bmap_last_extent(struct xfs_trans *tp, struct xfs_inode *ip, /* preallocation and hole punch interface */ int xfs_alloc_file_space(struct xfs_inode *ip, xfs_off_t offset, - xfs_off_t len); + xfs_off_t len, xfs_off_t align); int xfs_free_file_space(struct xfs_inode *ip, xfs_off_t offset, xfs_off_t len); int xfs_collapse_file_space(struct xfs_inode *, xfs_off_t offset, diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index 8fb90da89787..475e1a56d1b0 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -328,7 +328,7 @@ xfs_dquot_disk_alloc( /* Create the block mapping. */ error = xfs_bmapi_write(tp, quotip, dqp->q_fileoffset, XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA, 0, &map, - &nmaps); + &nmaps, 0); if (error) goto err_cancel; diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 705250f9f90a..9b1db42a8d33 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -883,12 +883,13 @@ static inline bool xfs_file_sync_writes(struct file *filp) FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE | \ FALLOC_FL_INSERT_RANGE | FALLOC_FL_UNSHARE_RANGE) -STATIC long -xfs_file_fallocate( +long +_xfs_file_fallocate( struct file *file, int mode, loff_t offset, - loff_t len) + loff_t len, + loff_t alignment) { struct inode *inode = file_inode(file); struct xfs_inode *ip = XFS_I(inode); @@ -1035,7 +1036,7 @@ xfs_file_fallocate( } if (!xfs_is_always_cow_inode(ip)) { - error = xfs_alloc_file_space(ip, offset, len); + error = xfs_alloc_file_space(ip, offset, len, alignment); if (error) goto out_unlock; } @@ -1073,6 +1074,16 @@ xfs_file_fallocate( return error; } +STATIC long +xfs_file_fallocate( + struct file *file, + int mode, + loff_t offset, + loff_t len) +{ + return _xfs_file_fallocate(file, mode, offset, len, 0); +} + STATIC int xfs_file_fadvise( struct file *file, diff --git a/fs/xfs/xfs_fs_staging.c b/fs/xfs/xfs_fs_staging.c new file mode 100644 index 000000000000..1d635c0a9f49 --- /dev/null +++ b/fs/xfs/xfs_fs_staging.c @@ -0,0 +1,99 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2023 Oracle. All Rights Reserved. + */ + +#include "xfs.h" +#include "xfs_fs_staging.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_inode.h" + +#include "linux/security.h" +#include "linux/fsnotify.h" + +extern long _xfs_file_fallocate( + struct file *file, + int mode, + loff_t offset, + loff_t len, + loff_t alignment); + +int xfs_fallocate2( struct file *filp, + void __user *arg) +{ + struct inode *inode = file_inode(filp); + //struct xfs_inode *ip = XFS_I(inode); + struct xfs_fallocate2 fallocate2; + int ret; + + if (copy_from_user(&fallocate2, arg, sizeof(fallocate2))) + return -EFAULT; + + if (fallocate2.flags & XFS_FALLOC2_ALIGNED) { + if (!fallocate2.alignment || !is_power_of_2(fallocate2.alignment)) + return -EINVAL; + + if (fallocate2.offset % fallocate2.alignment) + return -EINVAL; + + if (fallocate2.length % fallocate2.alignment) + return -EINVAL; + } else if (fallocate2.alignment) { + return -EINVAL; + } + + /* These are all just copied from vfs_fallocate() */ + if (fallocate2.offset < 0 || fallocate2.length <= 0) + return -EINVAL; + + if (!(filp->f_mode & FMODE_WRITE)) + return -EBADF; + + if (IS_IMMUTABLE(inode)) + return -EPERM; + + /* + * We cannot allow any fallocate operation on an active swapfile + */ + if (IS_SWAPFILE(inode)) + return -ETXTBSY; + + /* + * Revalidate the write permissions, in case security policy has + * changed since the files were opened. + */ + ret = security_file_permission(filp, MAY_WRITE); + if (ret) + return ret; + + if (S_ISFIFO(inode->i_mode)) + return -ESPIPE; + + if (S_ISDIR(inode->i_mode)) + return -EISDIR; + + if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode)) + return -ENODEV; + + /* Check for wrap through zero too */ + if (((fallocate2.offset + fallocate2.length) > inode->i_sb->s_maxbytes) || + ((fallocate2.offset + fallocate2.length) < 0)) + return -EFBIG; + + if (!filp->f_op->fallocate) + return -EOPNOTSUPP; + + file_start_write(filp); + ret = _xfs_file_fallocate(filp, 0, fallocate2.offset, fallocate2.length, fallocate2.alignment); + + if (ret == 0) + fsnotify_modify(filp); + + file_end_write(filp); + + return ret; +} diff --git a/fs/xfs/xfs_fs_staging.h b/fs/xfs/xfs_fs_staging.h new file mode 100644 index 000000000000..a82e61063dba --- /dev/null +++ b/fs/xfs/xfs_fs_staging.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2023 Oracle. All Rights Reserved. + */ +#ifndef __XFS_FS_STAGING_H__ +#define __XFS_FS_STAGING_H__ + +struct xfs_fallocate2 { + s64 offset; /* bytes */ + s64 length; /* bytes */ + u64 flags; + u32 alignment; /* bytes */ + u32 padding[8]; +}; + +#define XFS_FALLOC2_ALIGNED (1U << 0) + +int xfs_fallocate2( struct file *filp, + void __user *arg); + +#endif /* __XFS_FS_STAGING_H__ */ diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 55bb01173cde..6e60fce44068 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -4,6 +4,7 @@ * All Rights Reserved. */ #include "xfs.h" +#include "xfs_fs_staging.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" @@ -2149,6 +2150,9 @@ xfs_file_ioctl( return error; } + case XFS_IOC_FALLOCATE2: + return xfs_fallocate2(filp, arg); + default: return -ENOTTY; } diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 285885c308bd..a4389a0c4bf2 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -306,7 +306,7 @@ xfs_iomap_write_direct( */ nimaps = 1; error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, bmapi_flags, 0, - imap, &nimaps); + imap, &nimaps, 0); if (error) goto out_trans_cancel; @@ -614,7 +614,7 @@ xfs_iomap_write_unwritten( nimaps = 1; error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, XFS_BMAPI_CONVERT, resblks, &imap, - &nimaps); + &nimaps, 0); if (error) goto error_on_bmapi_transaction; diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index f5dc46ce9803..a2e5ba6cf7f3 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -420,7 +420,7 @@ xfs_reflink_fill_cow_hole( nimaps = 1; error = xfs_bmapi_write(tp, ip, imap->br_startoff, imap->br_blockcount, XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC, 0, cmap, - &nimaps); + &nimaps, 0); if (error) goto out_trans_cancel; @@ -490,7 +490,7 @@ xfs_reflink_fill_delalloc( error = xfs_bmapi_write(tp, ip, cmap->br_startoff, cmap->br_blockcount, XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC, 0, - cmap, &nimaps); + cmap, &nimaps, 0); if (error) goto out_trans_cancel; diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 16534e9873f6..a57a8a4d8294 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -817,7 +817,7 @@ xfs_growfs_rt_alloc( */ nmap = 1; error = xfs_bmapi_write(tp, ip, oblocks, nblocks - oblocks, - XFS_BMAPI_METADATA, 0, &map, &nmap); + XFS_BMAPI_METADATA, 0, &map, &nmap, 0); if (!error && nmap < 1) error = -ENOSPC; if (error) diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c index 85e433df6a3f..2a4524bf34a5 100644 --- a/fs/xfs/xfs_symlink.c +++ b/fs/xfs/xfs_symlink.c @@ -269,7 +269,7 @@ xfs_symlink( nmaps = XFS_SYMLINK_MAPS; error = xfs_bmapi_write(tp, ip, first_fsb, fs_blocks, - XFS_BMAPI_METADATA, resblks, mval, &nmaps); + XFS_BMAPI_METADATA, resblks, mval, &nmaps, 0); if (error) goto out_trans_cancel; diff --git a/security/security.c b/security/security.c index cf6cc576736f..d53b1b6c2d59 100644 --- a/security/security.c +++ b/security/security.c @@ -1593,6 +1593,7 @@ int security_file_permission(struct file *file, int mask) return fsnotify_perm(file, mask); } +EXPORT_SYMBOL(security_file_permission); int security_file_alloc(struct file *file) { -- 2.31.1