On Mon, Feb 03, 2025 at 10:43:10AM +0100, Christoph Hellwig wrote: > Add support for generating / verifying protection information in the > file system. This is done by hooking into the bio submission in > iomap and then using the generic PI helpers. Compared to just using > the block layer auto PI this extends the protection envelope and also > prepares for eventually passing through PI from userspace at least > for direct I/O. > > Right now this is still pretty hacky, e.g. the single PI buffer can > get pretty gigantic and has no mempool backing it. The deferring of > I/O completions is done unconditionally instead only when needed, > and we assume the device can actually handle these huge segments. > The latter should be fixed by doing proper splitting based on metadata > limits in the block layer, but the rest needs to be addressed here. Seems reasonable to me modulo the XXX parts. :) --D > Signed-off-by: Christoph Hellwig <hch@xxxxxx> > --- > fs/xfs/Makefile | 1 + > fs/xfs/xfs_aops.c | 29 +++++++++++++++-- > fs/xfs/xfs_aops.h | 1 + > fs/xfs/xfs_data_csum.c | 73 ++++++++++++++++++++++++++++++++++++++++++ > fs/xfs/xfs_data_csum.h | 7 ++++ > fs/xfs/xfs_file.c | 27 +++++++++++++++- > fs/xfs/xfs_inode.h | 6 ++-- > 7 files changed, 136 insertions(+), 8 deletions(-) > create mode 100644 fs/xfs/xfs_data_csum.c > create mode 100644 fs/xfs/xfs_data_csum.h > > diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile > index 7afa51e41427..aa8749d640e7 100644 > --- a/fs/xfs/Makefile > +++ b/fs/xfs/Makefile > @@ -73,6 +73,7 @@ xfs-y += xfs_aops.o \ > xfs_bmap_util.o \ > xfs_bio_io.o \ > xfs_buf.o \ > + xfs_data_csum.o \ > xfs_dahash_test.o \ > xfs_dir2_readdir.o \ > xfs_discard.o \ > diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c > index 3e42a684cce1..291f5d4dbce6 100644 > --- a/fs/xfs/xfs_aops.c > +++ b/fs/xfs/xfs_aops.c > @@ -19,6 +19,7 @@ > #include "xfs_reflink.h" > #include "xfs_errortag.h" > #include "xfs_error.h" > +#include "xfs_data_csum.h" > > struct xfs_writepage_ctx { > struct iomap_writepage_ctx ctx; > @@ -122,6 +123,11 @@ xfs_end_ioend( > goto done; > } > > + if (bio_op(&ioend->io_bio) == REQ_OP_READ) { > + error = xfs_data_csum_verify(ioend); > + goto done; > + } > + > /* > * Success: commit the COW or unwritten blocks if needed. > */ > @@ -175,7 +181,7 @@ xfs_end_io( > } > } > > -STATIC void > +void > xfs_end_bio( > struct bio *bio) > { > @@ -417,6 +423,8 @@ xfs_submit_ioend( > > memalloc_nofs_restore(nofs_flag); > > + xfs_data_csum_generate(&ioend->io_bio); > + > /* send ioends that might require a transaction to the completion wq */ > if (xfs_ioend_is_append(ioend) || > (ioend->io_flags & (IOMAP_IOEND_UNWRITTEN | IOMAP_IOEND_SHARED))) > @@ -517,19 +525,34 @@ xfs_vm_bmap( > return iomap_bmap(mapping, block, &xfs_read_iomap_ops); > } > > +static void xfs_buffered_read_submit_io(struct inode *inode, > + struct bio *bio, loff_t file_offset) > +{ > + xfs_data_csum_alloc(bio); > + iomap_init_ioend(inode, bio, file_offset, 0); > + bio->bi_end_io = xfs_end_bio; > + submit_bio(bio); > +} > + > +static const struct iomap_read_folio_ops xfs_iomap_read_ops = { > + .bio_set = &iomap_ioend_bioset, > + .submit_io = xfs_buffered_read_submit_io, > +}; > + > STATIC int > xfs_vm_read_folio( > struct file *unused, > struct folio *folio) > { > - return iomap_read_folio(folio, &xfs_read_iomap_ops, NULL); > + return iomap_read_folio(folio, &xfs_read_iomap_ops, > + &xfs_iomap_read_ops); > } > > STATIC void > xfs_vm_readahead( > struct readahead_control *rac) > { > - iomap_readahead(rac, &xfs_read_iomap_ops, NULL); > + iomap_readahead(rac, &xfs_read_iomap_ops, &xfs_iomap_read_ops); > } > > static int > diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h > index e0bd68419764..efed1ab59dbf 100644 > --- a/fs/xfs/xfs_aops.h > +++ b/fs/xfs/xfs_aops.h > @@ -10,5 +10,6 @@ extern const struct address_space_operations xfs_address_space_operations; > extern const struct address_space_operations xfs_dax_aops; > > int xfs_setfilesize(struct xfs_inode *ip, xfs_off_t offset, size_t size); > +void xfs_end_bio(struct bio *bio); > > #endif /* __XFS_AOPS_H__ */ > diff --git a/fs/xfs/xfs_data_csum.c b/fs/xfs/xfs_data_csum.c > new file mode 100644 > index 000000000000..d9d3620654b1 > --- /dev/null > +++ b/fs/xfs/xfs_data_csum.c > @@ -0,0 +1,73 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* > + * Copyright (c) 2022-2025 Christoph Hellwig. > + */ > +#include "xfs.h" > +#include "xfs_format.h" > +#include "xfs_shared.h" > +#include "xfs_trans_resv.h" > +#include "xfs_mount.h" > +#include "xfs_inode.h" > +#include "xfs_cksum.h" > +#include "xfs_data_csum.h" > +#include <linux/iomap.h> > +#include <linux/blk-integrity.h> > +#include <linux/bio-integrity.h> > + > +void * > +xfs_data_csum_alloc( > + struct bio *bio) > +{ > + struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk); > + struct bio_integrity_payload *bip; > + unsigned int buf_size; > + void *buf; > + > + if (!bi) > + return NULL; > + > + buf_size = bio_integrity_bytes(bi, bio_sectors(bio)); > + /* XXX: this needs proper mempools */ > + /* XXX: needs (partial) zeroing if tuple_size > csum_size */ > + buf = kmalloc(buf_size, GFP_NOFS | __GFP_NOFAIL); > + bip = bio_integrity_alloc(bio, GFP_NOFS | __GFP_NOFAIL, 1); > + if (!bio_integrity_add_page(bio, virt_to_page(buf), buf_size, > + offset_in_page(buf))) > + WARN_ON_ONCE(1); > + > + if (bi->csum_type) { > + if (bi->csum_type == BLK_INTEGRITY_CSUM_IP) > + bip->bip_flags |= BIP_IP_CHECKSUM; > + bip->bip_flags |= BIP_CHECK_GUARD; > + } > + if (bi->flags & BLK_INTEGRITY_REF_TAG) > + bip->bip_flags |= BIP_CHECK_REFTAG; > + bip_set_seed(bip, bio->bi_iter.bi_sector); > + > + return buf; > +} > + > +void > +xfs_data_csum_generate( > + struct bio *bio) > +{ > + struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk); > + > + if (!bi || !bi->csum_type) > + return; > + > + xfs_data_csum_alloc(bio); > + blk_integrity_generate(bio); > +} > + > +int > +xfs_data_csum_verify( > + struct iomap_ioend *ioend) > +{ > + struct bio *bio = &ioend->io_bio; > + struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk); > + > + if (!bi || !bi->csum_type) > + return 0; > + return blk_integrity_verify_all(bio, ioend->io_sector); > +} > diff --git a/fs/xfs/xfs_data_csum.h b/fs/xfs/xfs_data_csum.h > new file mode 100644 > index 000000000000..f32215e8f46c > --- /dev/null > +++ b/fs/xfs/xfs_data_csum.h > @@ -0,0 +1,7 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > + > +struct iomap_ioend; > + > +void *xfs_data_csum_alloc(struct bio *bio); > +void xfs_data_csum_generate(struct bio *bio); > +int xfs_data_csum_verify(struct iomap_ioend *ioend); > diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c > index f7a7d89c345e..0d64c54017f0 100644 > --- a/fs/xfs/xfs_file.c > +++ b/fs/xfs/xfs_file.c > @@ -25,6 +25,8 @@ > #include "xfs_iomap.h" > #include "xfs_reflink.h" > #include "xfs_file.h" > +#include "xfs_data_csum.h" > +#include "xfs_aops.h" > > #include <linux/dax.h> > #include <linux/falloc.h> > @@ -227,6 +229,20 @@ xfs_ilock_iocb_for_write( > return 0; > } > > +static void xfs_dio_read_submit_io(const struct iomap_iter *iter, > + struct bio *bio, loff_t file_offset) > +{ > + xfs_data_csum_alloc(bio); > + iomap_init_ioend(iter->inode, bio, file_offset, IOMAP_IOEND_DIRECT); > + bio->bi_end_io = xfs_end_bio; > + submit_bio(bio); > +} > + > +static const struct iomap_dio_ops xfs_dio_read_ops = { > + .bio_set = &iomap_ioend_bioset, > + .submit_io = xfs_dio_read_submit_io, > +}; > + > STATIC ssize_t > xfs_file_dio_read( > struct kiocb *iocb, > @@ -245,7 +261,8 @@ xfs_file_dio_read( > ret = xfs_ilock_iocb(iocb, XFS_IOLOCK_SHARED); > if (ret) > return ret; > - ret = iomap_dio_rw(iocb, to, &xfs_read_iomap_ops, NULL, 0, NULL, 0); > + ret = iomap_dio_rw(iocb, to, &xfs_read_iomap_ops, &xfs_dio_read_ops, 0, > + NULL, 0); > xfs_iunlock(ip, XFS_IOLOCK_SHARED); > > return ret; > @@ -578,8 +595,16 @@ xfs_dio_write_end_io( > return error; > } > > +static void xfs_dio_write_submit_io(const struct iomap_iter *iter, > + struct bio *bio, loff_t file_offset) > +{ > + xfs_data_csum_generate(bio); > + submit_bio(bio); > +} > + > static const struct iomap_dio_ops xfs_dio_write_ops = { > .end_io = xfs_dio_write_end_io, > + .submit_io = xfs_dio_write_submit_io, > }; > > /* > diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h > index c08093a65352..ff346bbe454c 100644 > --- a/fs/xfs/xfs_inode.h > +++ b/fs/xfs/xfs_inode.h > @@ -609,10 +609,8 @@ int xfs_break_layouts(struct inode *inode, uint *iolock, > > static inline void xfs_update_stable_writes(struct xfs_inode *ip) > { > - if (bdev_stable_writes(xfs_inode_buftarg(ip)->bt_bdev)) > - mapping_set_stable_writes(VFS_I(ip)->i_mapping); > - else > - mapping_clear_stable_writes(VFS_I(ip)->i_mapping); > + /* XXX: unconditional for now */ > + mapping_set_stable_writes(VFS_I(ip)->i_mapping); > } > > /* > -- > 2.45.2 > >