Metadata runs are supposed to be aligned on 4k boundary (so that they work efficiently with disks with 4k sectors). However, there was a programming bug that makes them aligned on 128k boundary instead. The unused space is wasted. This patch fixes the bug by providing a proper alignment. In order to keep existing volumes working, we introduce a new flag SB_FLAG_FIXED_PADDING - when the flag is clear, we calculate the padding the old way. In order to make sure that the old version cannot mount the volume created by the new version, we increase superblock version to 4. In order to not break with old integritysetup, we fix alignment only if the parameter "fix_padding" is present when formatting the device. Signed-off-by: Mikulas Patocka <mpatocka@xxxxxxxxxx> --- Documentation/admin-guide/device-mapper/dm-integrity.rst | 5 ++ drivers/md/dm-integrity.c | 26 ++++++++++++--- 2 files changed, 27 insertions(+), 4 deletions(-) Index: linux-2.6/drivers/md/dm-integrity.c =================================================================== --- linux-2.6.orig/drivers/md/dm-integrity.c 2019-11-12 21:13:57.000000000 +0100 +++ linux-2.6/drivers/md/dm-integrity.c 2019-11-13 12:29:00.000000000 +0100 @@ -53,6 +53,7 @@ #define SB_VERSION_1 1 #define SB_VERSION_2 2 #define SB_VERSION_3 3 +#define SB_VERSION_4 4 #define SB_SECTORS 8 #define MAX_SECTORS_PER_BLOCK 8 @@ -73,6 +74,7 @@ struct superblock { #define SB_FLAG_HAVE_JOURNAL_MAC 0x1 #define SB_FLAG_RECALCULATING 0x2 #define SB_FLAG_DIRTY_BITMAP 0x4 +#define SB_FLAG_FIXED_PADDING 0x8 #define JOURNAL_ENTRY_ROUNDUP 8 @@ -250,6 +252,7 @@ struct dm_integrity_c { bool journal_uptodate; bool just_formatted; bool recalculate_flag; + bool fix_padding; struct alg_spec internal_hash_alg; struct alg_spec journal_crypt_alg; @@ -463,7 +466,9 @@ static void wraparound_section(struct dm static void sb_set_version(struct dm_integrity_c *ic) { - if (ic->mode == 'B' || ic->sb->flags & cpu_to_le32(SB_FLAG_DIRTY_BITMAP)) + if (ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_PADDING)) + ic->sb->version = SB_VERSION_4; + else if (ic->mode == 'B' || ic->sb->flags & cpu_to_le32(SB_FLAG_DIRTY_BITMAP)) ic->sb->version = SB_VERSION_3; else if (ic->meta_dev || ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) ic->sb->version = SB_VERSION_2; @@ -2955,6 +2960,7 @@ static void dm_integrity_status(struct d arg_count += !!ic->internal_hash_alg.alg_string; arg_count += !!ic->journal_crypt_alg.alg_string; arg_count += !!ic->journal_mac_alg.alg_string; + arg_count += (ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_PADDING)) != 0; DMEMIT("%s %llu %u %c %u", ic->dev->name, (unsigned long long)ic->start, ic->tag_size, ic->mode, arg_count); if (ic->meta_dev) @@ -2974,6 +2980,8 @@ static void dm_integrity_status(struct d DMEMIT(" sectors_per_bit:%llu", (unsigned long long)ic->sectors_per_block << ic->log2_blocks_per_bitmap_bit); DMEMIT(" bitmap_flush_interval:%u", jiffies_to_msecs(ic->bitmap_flush_interval)); } + if ((ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_PADDING)) != 0) + DMEMIT(" fix_padding"); #define EMIT_ALG(a, n) \ do { \ @@ -3042,8 +3050,14 @@ static int calculate_device_limits(struc if (!ic->meta_dev) { sector_t last_sector, last_area, last_offset; + /* we have to maintain excessive padding for compatibility with existing volumes */ + u64 metadata_run_padding = + ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_PADDING) ? + (__u64)(METADATA_PADDING_SECTORS << SECTOR_SHIFT) : + (__u64)(1 << SECTOR_SHIFT << METADATA_PADDING_SECTORS); + ic->metadata_run = roundup((__u64)ic->tag_size << (ic->sb->log2_interleave_sectors - ic->sb->log2_sectors_per_block), - (__u64)(1 << SECTOR_SHIFT << METADATA_PADDING_SECTORS)) >> SECTOR_SHIFT; + metadata_run_padding) >> SECTOR_SHIFT; if (!(ic->metadata_run & (ic->metadata_run - 1))) ic->log2_metadata_run = __ffs(ic->metadata_run); else @@ -3086,6 +3100,8 @@ static int initialize_superblock(struct journal_sections = 1; if (!ic->meta_dev) { + if (ic->fix_padding) + ic->sb->flags |= cpu_to_le32(SB_FLAG_FIXED_PADDING); ic->sb->journal_sections = cpu_to_le32(journal_sections); if (!interleave_sectors) interleave_sectors = DEFAULT_INTERLEAVE_SECTORS; @@ -3725,6 +3741,8 @@ static int dm_integrity_ctr(struct dm_ta goto bad; } else if (!strcmp(opt_string, "recalculate")) { ic->recalculate_flag = true; + } else if (!strcmp(opt_string, "fix_padding")) { + ic->fix_padding = true; } else { r = -EINVAL; ti->error = "Invalid argument"; @@ -3867,7 +3885,7 @@ static int dm_integrity_ctr(struct dm_ta should_write_sb = true; } - if (!ic->sb->version || ic->sb->version > SB_VERSION_3) { + if (!ic->sb->version || ic->sb->version > SB_VERSION_4) { r = -EINVAL; ti->error = "Unknown version"; goto bad; @@ -4182,7 +4200,7 @@ static void dm_integrity_dtr(struct dm_t static struct target_type integrity_target = { .name = "integrity", - .version = {1, 3, 0}, + .version = {1, 4, 0}, .module = THIS_MODULE, .features = DM_TARGET_SINGLETON | DM_TARGET_INTEGRITY, .ctr = dm_integrity_ctr, Index: linux-2.6/Documentation/admin-guide/device-mapper/dm-integrity.rst =================================================================== --- linux-2.6.orig/Documentation/admin-guide/device-mapper/dm-integrity.rst 2019-10-10 16:51:56.000000000 +0200 +++ linux-2.6/Documentation/admin-guide/device-mapper/dm-integrity.rst 2019-11-13 12:24:48.000000000 +0100 @@ -177,6 +177,11 @@ bitmap_flush_interval:number The bitmap flush interval in milliseconds. The metadata buffers are synchronized when this interval expires. +fix_padding + Use a smaller padding of the tag area that is more + space-efficient. If this option is not present, large padding is + used - that is for compatibility with older kernels. + The journal mode (D/J), buffer_sectors, journal_watermark, commit_time can be changed when reloading the target (load an inactive table and swap the -- dm-devel mailing list dm-devel@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/dm-devel