Since userspace can now pass PI buffers through to the block integrity provider, provide a means for userspace to specify a flags argument with the PI buffer. The initial user for this will be sd_dif, which will enable user programs to ask the kernel to fill in whichever fields they don't want to provide. This is intended, for example, to satisfy programs that really only care to provide an app tag. Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx> --- Documentation/block/data-integrity.txt | 11 ++++ block/blk-integrity.c | 1 drivers/scsi/sd_dif.c | 76 ++++++++++++++++++++++++++---- fs/aio.c | 3 + fs/bio-integrity.c | 80 ++++++++++++++++++++++++++++++++ fs/direct-io.c | 1 include/linux/bio.h | 3 + include/linux/blkdev.h | 2 + include/uapi/linux/aio_abi.h | 1 9 files changed, 162 insertions(+), 16 deletions(-) diff --git a/Documentation/block/data-integrity.txt b/Documentation/block/data-integrity.txt index 1d1f070..b72a54f 100644 --- a/Documentation/block/data-integrity.txt +++ b/Documentation/block/data-integrity.txt @@ -292,7 +292,10 @@ will require extra work due to the application tag. The bio_integrity_prep_iter should contain the page offset and buffer length of the PI buffer, the number of pages, and the actual array of - pages, as returned by get_user_pages. + pages, as returned by get_user_pages. The user_flags argument should + contain whatever flag values were passed in by userspace; the values + of the flags are specific to the block integrity provider, and are + passed to the mod_user_buf_fn handler. 5.4 REGISTERING A BLOCK DEVICE AS CAPABLE OF EXCHANGING INTEGRITY METADATA @@ -332,6 +335,12 @@ will require extra work due to the application tag. are available per hardware sector. For DIF this is either 2 or 0 depending on the value of the Control Mode Page ATO bit. + 'mod_user_buf_fn' updates the appropriate integrity metadata for + a WRITE operation. This function is called when userspace passes + in a PI buffer along with file data; the flags argument (which is + specific to the blk_integrity provider) arrange for pre-processing + of the user buffer prior to issuing the IO. + See 6.2 for a description of get_tag_fn and set_tag_fn. ---------------------------------------------------------------------- diff --git a/block/blk-integrity.c b/block/blk-integrity.c index 7fbab84..1cb1eb2 100644 --- a/block/blk-integrity.c +++ b/block/blk-integrity.c @@ -421,6 +421,7 @@ int blk_integrity_register(struct gendisk *disk, struct blk_integrity *template) bi->set_tag_fn = template->set_tag_fn; bi->get_tag_fn = template->get_tag_fn; bi->tag_size = template->tag_size; + bi->mod_user_buf_fn = template->mod_user_buf_fn; } else bi->name = bi_unsupported_name; diff --git a/drivers/scsi/sd_dif.c b/drivers/scsi/sd_dif.c index a7a691d..74182c9 100644 --- a/drivers/scsi/sd_dif.c +++ b/drivers/scsi/sd_dif.c @@ -53,31 +53,58 @@ static __u16 sd_dif_ip_fn(void *data, unsigned int len) * Type 1 and Type 2 protection use the same format: 16 bit guard tag, * 16 bit app tag, 32 bit reference tag. */ -static void sd_dif_type1_generate(struct blk_integrity_exchg *bix, csum_fn *fn) +#define GENERATE_GUARD (1) +#define GENERATE_REF (2) +#define GENERATE_APP (4) +#define GENERATE_ALL (7) +static int sd_dif_type1_generate(struct blk_integrity_exchg *bix, csum_fn *fn, + int flags) { void *buf = bix->data_buf; struct sd_dif_tuple *sdt = bix->prot_buf; sector_t sector = bix->sector; unsigned int i; + if (flags & ~GENERATE_ALL) + return -EINVAL; + if (!flags) + return -ENOTTY; + for (i = 0 ; i < bix->data_size ; i += bix->sector_size, sdt++) { - sdt->guard_tag = fn(buf, bix->sector_size); - sdt->ref_tag = cpu_to_be32(sector & 0xffffffff); - sdt->app_tag = 0; + if (flags & GENERATE_GUARD) + sdt->guard_tag = fn(buf, bix->sector_size); + if (flags & GENERATE_REF) + sdt->ref_tag = cpu_to_be32(sector & 0xffffffff); + if (flags & GENERATE_APP) + sdt->app_tag = 0; buf += bix->sector_size; sector++; } + + return 0; } static void sd_dif_type1_generate_crc(struct blk_integrity_exchg *bix) { - sd_dif_type1_generate(bix, sd_dif_crc_fn); + sd_dif_type1_generate(bix, sd_dif_crc_fn, GENERATE_ALL); } static void sd_dif_type1_generate_ip(struct blk_integrity_exchg *bix) { - sd_dif_type1_generate(bix, sd_dif_ip_fn); + sd_dif_type1_generate(bix, sd_dif_ip_fn, GENERATE_ALL); +} + +static int sd_dif_type1_mod_crc(struct blk_integrity_exchg *bix, + int flags) +{ + return sd_dif_type1_generate(bix, sd_dif_crc_fn, flags); +} + +static int sd_dif_type1_mod_ip(struct blk_integrity_exchg *bix, + int flags) +{ + return sd_dif_type1_generate(bix, sd_dif_ip_fn, flags); } static int sd_dif_type1_verify(struct blk_integrity_exchg *bix, csum_fn *fn) @@ -163,6 +190,7 @@ static struct blk_integrity dif_type1_integrity_crc = { .set_tag_fn = sd_dif_type1_set_tag, .tuple_size = sizeof(struct sd_dif_tuple), .tag_size = 0, + .mod_user_buf_fn = sd_dif_type1_mod_crc, }; static struct blk_integrity dif_type1_integrity_ip = { @@ -173,6 +201,7 @@ static struct blk_integrity dif_type1_integrity_ip = { .set_tag_fn = sd_dif_type1_set_tag, .tuple_size = sizeof(struct sd_dif_tuple), .tag_size = 0, + .mod_user_buf_fn = sd_dif_type1_mod_ip, }; @@ -180,29 +209,50 @@ static struct blk_integrity dif_type1_integrity_ip = { * Type 3 protection has a 16-bit guard tag and 16 + 32 bits of opaque * tag space. */ -static void sd_dif_type3_generate(struct blk_integrity_exchg *bix, csum_fn *fn) +static int sd_dif_type3_generate(struct blk_integrity_exchg *bix, csum_fn *fn, + int flags) { void *buf = bix->data_buf; struct sd_dif_tuple *sdt = bix->prot_buf; unsigned int i; + if (flags & (~GENERATE_ALL | GENERATE_REF)) + return -EINVAL; + if (!flags) + return -ENOTTY; + for (i = 0 ; i < bix->data_size ; i += bix->sector_size, sdt++) { - sdt->guard_tag = fn(buf, bix->sector_size); - sdt->ref_tag = 0; - sdt->app_tag = 0; + if (flags & GENERATE_GUARD) + sdt->guard_tag = fn(buf, bix->sector_size); + if (flags & GENERATE_APP) { + sdt->ref_tag = 0; + sdt->app_tag = 0; + } buf += bix->sector_size; } + + return 0; } static void sd_dif_type3_generate_crc(struct blk_integrity_exchg *bix) { - sd_dif_type3_generate(bix, sd_dif_crc_fn); + sd_dif_type3_generate(bix, sd_dif_crc_fn, GENERATE_ALL); } static void sd_dif_type3_generate_ip(struct blk_integrity_exchg *bix) { - sd_dif_type3_generate(bix, sd_dif_ip_fn); + sd_dif_type3_generate(bix, sd_dif_ip_fn, GENERATE_ALL); +} + +static int sd_dif_type3_mod_crc(struct blk_integrity_exchg *bix, int flags) +{ + return sd_dif_type3_generate(bix, sd_dif_crc_fn, flags); +} + +static int sd_dif_type3_mod_ip(struct blk_integrity_exchg *bix, int flags) +{ + return sd_dif_type3_generate(bix, sd_dif_ip_fn, flags); } static int sd_dif_type3_verify(struct blk_integrity_exchg *bix, csum_fn *fn) @@ -283,6 +333,7 @@ static struct blk_integrity dif_type3_integrity_crc = { .set_tag_fn = sd_dif_type3_set_tag, .tuple_size = sizeof(struct sd_dif_tuple), .tag_size = 0, + .mod_user_buf_fn = sd_dif_type3_mod_crc, }; static struct blk_integrity dif_type3_integrity_ip = { @@ -293,6 +344,7 @@ static struct blk_integrity dif_type3_integrity_ip = { .set_tag_fn = sd_dif_type3_set_tag, .tuple_size = sizeof(struct sd_dif_tuple), .tag_size = 0, + .mod_user_buf_fn = sd_dif_type3_mod_ip, }; /* diff --git a/fs/aio.c b/fs/aio.c index 3f932c3..2e80fbb 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1424,6 +1424,7 @@ static int setup_pi_ext(struct kiocb *req, int is_write) return -ENOMEM; } req->ki_ioext->ke_pi_iter.pi_userpages = p; + req->ki_ioext->ke_pi_iter.pi_userflags = ext->ie_pi_flags; retval = get_user_pages_fast((unsigned long)ext->ie_pi_buf, req->ki_ioext->ke_pi_iter.pi_nrpages, @@ -1441,7 +1442,7 @@ static int setup_pi_ext(struct kiocb *req, int is_write) #endif static struct io_extension_type extensions[] = { - {IO_EXT_PI, IO_EXT_SIZE(ie_pi_ret), setup_pi_ext, destroy_pi_ext}, + {IO_EXT_PI, IO_EXT_SIZE(ie_pi_flags), setup_pi_ext, destroy_pi_ext}, {IO_EXT_INVALID, 0, NULL, NULL}, }; diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c index 3df9aeb..381ee38 100644 --- a/fs/bio-integrity.c +++ b/fs/bio-integrity.c @@ -301,6 +301,82 @@ int bio_integrity_get_tag(struct bio *bio, void *tag_buf, unsigned int len) EXPORT_SYMBOL(bio_integrity_get_tag); /** + * bio_integrity_update_user_buffer - Update user-provided PI buffers for a bio + * @bio: bio to generate/verify integrity metadata for + */ +int bio_integrity_update_user_buffer(struct bio *bio) +{ + struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); + struct blk_integrity_exchg bix; + struct bio_vec bv; + struct bvec_iter iter; + sector_t sector; + unsigned int sectors, total, ret; + void *prot_buf; + unsigned int prot_offset, prot_len, bv_offset, bv_len; + struct bio_vec *iv; + struct bio_integrity_payload *bip = bio->bi_integrity; + + if (!bi->mod_user_buf_fn) + return 0; + + sector = bio->bi_iter.bi_sector; + + total = ret = 0; + bix.disk_name = bio->bi_bdev->bd_disk->disk_name; + bix.sector_size = bi->sector_size; + + iv = bip->bip_vec; + prot_offset = iv->bv_offset; + prot_len = iv->bv_len; + prot_buf = kmap_atomic(iv->bv_page); + + bio_for_each_segment(bv, bio, iter) { + void *kaddr = kmap_atomic(bv.bv_page); + bv_len = bv.bv_len; + bv_offset = bv.bv_offset; + + while (bv_len > 0) { + if (prot_len < bi->tuple_size) { + kunmap_atomic(prot_buf); + iv++; + BUG_ON(iv >= bip->bip_vec + bip->bip_vcnt); + prot_offset = iv->bv_offset; + prot_len = iv->bv_len; + prot_buf = kmap_atomic(iv->bv_page); + } + bix.data_buf = kaddr + bv_offset; + bix.data_size = min(bv_len, + prot_len / bi->tuple_size * bix.sector_size); + bix.prot_buf = prot_buf + prot_offset; + bix.sector = sector; + + ret = bi->mod_user_buf_fn(&bix, bip->bip_user_flags); + if (ret) { + if (ret == -ENOTTY) + ret = 0; + kunmap_atomic(kaddr); + kunmap_atomic(prot_buf); + return ret; + } + + bv_offset += bix.data_size; + bv_len -= bix.data_size; + sectors = bix.data_size / bi->sector_size; + sector += sectors; + prot_offset += sectors * bi->tuple_size; + prot_len -= sectors * bi->tuple_size; + total += sectors * bi->tuple_size; + BUG_ON(total > bio->bi_integrity->bip_iter.bi_size); + } + kunmap_atomic(kaddr); + } + kunmap_atomic(prot_buf); + return ret; +} +EXPORT_SYMBOL_GPL(bio_integrity_update_user_buffer); + +/** * bio_integrity_generate_verify - Generate/verify integrity metadata for a bio * @bio: bio to generate/verify integrity metadata for * @operate: operate number, 1 for generate, 0 for verify @@ -425,6 +501,7 @@ int bio_integrity_prep_buffer(struct bio *bio, int rw, bip->bip_buf = NULL; bip->bip_iter.bi_size = len; bip->bip_iter.bi_sector = bio->bi_iter.bi_sector; + bip->bip_user_flags = pi->pi_userflags; /* Map it */ for (i = 0 ; i < nr_pages ; i++) { @@ -458,7 +535,8 @@ int bio_integrity_prep_buffer(struct bio *bio, int rw, bip->bip_end_io = bio->bi_end_io; bio->bi_end_io = bio_integrity_endio; ret = 0; - } + } else + ret = bio_integrity_update_user_buffer(bio); return ret; } diff --git a/fs/direct-io.c b/fs/direct-io.c index 3f591f8..6740638 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -225,7 +225,6 @@ static inline struct page *dio_get_page(struct dio *dio, return dio->pages[sdio->head++]; } - /** * dio_complete() - called when all DIO BIO I/O has been completed * @offset: the byte offset in the file of the completed operation diff --git a/include/linux/bio.h b/include/linux/bio.h index 4729ab1..5bd9618 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -304,7 +304,9 @@ struct bio_integrity_payload { struct work_struct bip_work; /* I/O completion */ struct bio_vec *bip_vec; + unsigned int bip_user_flags; struct bio_vec bip_inline_vecs[0];/* embedded bvec array */ + /* This must be last! */ }; #endif /* CONFIG_BLK_DEV_INTEGRITY */ @@ -640,6 +642,7 @@ struct bio_integrity_prep_iter { size_t pi_nrpages; /* Number of PI data pages */ size_t pi_offset; /* Offset into the page */ size_t pi_len; /* Length of the buffer */ + unsigned int pi_userflags; /* Userspace flags */ }; /* diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 4afa4f8..cf1ec22 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1426,12 +1426,14 @@ typedef void (integrity_gen_fn) (struct blk_integrity_exchg *); typedef int (integrity_vrfy_fn) (struct blk_integrity_exchg *); typedef void (integrity_set_tag_fn) (void *, void *, unsigned int); typedef void (integrity_get_tag_fn) (void *, void *, unsigned int); +typedef int (integrity_mod_user_buf_fn) (struct blk_integrity_exchg *, int); struct blk_integrity { integrity_gen_fn *generate_fn; integrity_vrfy_fn *verify_fn; integrity_set_tag_fn *set_tag_fn; integrity_get_tag_fn *get_tag_fn; + integrity_mod_user_buf_fn *mod_user_buf_fn; unsigned short flags; unsigned short tuple_size; diff --git a/include/uapi/linux/aio_abi.h b/include/uapi/linux/aio_abi.h index d7b8c68..f6556a6 100644 --- a/include/uapi/linux/aio_abi.h +++ b/include/uapi/linux/aio_abi.h @@ -85,6 +85,7 @@ struct io_extension { __u64 ie_pi_buf; __u32 ie_pi_buflen; __u32 ie_pi_ret; + __u32 ie_pi_flags; }; /* -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>