Hello Ted! Thank you very much for the new version of the fix. The problem can be easily reproduced, using loop device. Here is how I reproduced it originally (without the patch) # truncate -s 512MB /tmp/lustre-ost # losetup -b 4096 /dev/loop0 /tmp/lustre-ost # mkfs.ext4 /dev/loop0 mke2fs 1.45.6.wc1 (20-Mar-2020) detected raid stride 4096 too large, use optimum 512 Discarding device blocks: done Creating filesystem with 125000 4k blocks and 125056 inodes Filesystem UUID: 541ad524-556a-45be-84fe-5b68c1ca4562 Superblock backups stored on blocks: 32768, 98304 Allocating group tables: done Writing inode tables: done Creating journal (4096 blocks): done Writing superblocks and filesystem accounting information: done # git rev-parse HEAD 0f880f553dfa09dabe4cb03752b97a07b66eb998 [root@CO82 e2fsprogs-hpe]# misc/e2image /dev/loop0 /tmp/ost-image e2image 1.45.2.cr2 (09-Apr-2020) misc/e2image: Attempt to read block from filesystem resulted in short read while trying to open /dev/loop0 Couldn't find valid filesystem superblock. With your patch e2image works fine. [root@CO82 e2fsprogs-kernel]# truncate -s 512MB /tmp/lustre-ost [root@CO82 e2fsprogs-kernel]# losetup -b 4096 /dev/loop0 /tmp/lustre-ost [root@CO82 e2fsprogs-kernel]# mkfs.ext4 /dev/loop0 mke2fs 1.45.6.wc1 (20-Mar-2020) detected raid stride 4096 too large, use optimum 512 Discarding device blocks: done Creating filesystem with 125000 4k blocks and 125056 inodes Filesystem UUID: 42f6fc7d-382a-4681-99b8-79f3fcd2b4bf Superblock backups stored on blocks: 32768, 98304 Allocating group tables: done Writing inode tables: done Creating journal (4096 blocks): done Writing superblocks and filesystem accounting information: done [root@CO82 e2fsprogs-kernel]# git rev-parse HEAD 67f2b54667e65cf5a478fcea8b85722be9ee6e8d [root@CO82 e2fsprogs-kernel]# misc/e2image /dev/loop0 /tmp/ost-image e2image 1.46.1 (9-Feb-2021) Thanks again. Best regards, Artem Blagodarenko. > On 27 Feb 2021, at 02:17, Theodore Ts'o <tytso@xxxxxxx> wrote: > > I've rewritten the patch because it was simplest way to review the > code. The resulting code is simpler and has a smaller number of lines > of code. I don't have any 4k advanced format disks handy, so I'd > appreciate it if someone can test it. It passes the existing > regression tests, and I've run a number of manual tests to simulate a > advanced format HDD, with the tests being run with clang and UBSAN and > ADDRSAN enabled. > > If someone with access to an advanced format disk can test running > debugfs -D on an advanced format disk, that would be great, thanks. > > - Ted > > commit c001596110e834a85b01a47a20811b318cb3b9e4 > Author: Theodore Ts'o <tytso@xxxxxxx> > Date: Fri Feb 26 17:41:06 2021 -0500 > > libext2fs: fix unix_io's Direct I/O support > > The previous Direct I/O support worked on HDD's with 512 byte logical > sector sizes, and on FreeBSD which required 4k aligned memory buffers. > However, it was incomplete and was not correctly working on HDD's with > 4k logical sector sizes (aka Advanced Format Disks). > > Based on a patch from Alexey Lyashkov <alexey.lyashkov@xxxxxxx> but > rewritten to work with the latest e2fsprogs and to minimize changes to > make it easier to review. > > Signed-off-by: Theodore Ts'o <tytso@xxxxxxx> > Reported-by: Artem Blagodarenko <artem.blagodarenko@xxxxxxxxx> > > diff --git a/lib/ext2fs/io_manager.c b/lib/ext2fs/io_manager.c > index c395d615..996c31a1 100644 > --- a/lib/ext2fs/io_manager.c > +++ b/lib/ext2fs/io_manager.c > @@ -134,9 +134,11 @@ errcode_t io_channel_alloc_buf(io_channel io, int count, void *ptr) > else > size = -count; > > - if (io->align) > + if (io->align) { > + if (io->align > size) > + size = io->align; > return ext2fs_get_memalign(size, io->align, ptr); > - else > + } else > return ext2fs_get_mem(size, ptr); > } > > diff --git a/lib/ext2fs/unix_io.c b/lib/ext2fs/unix_io.c > index 73f5667e..8965535c 100644 > --- a/lib/ext2fs/unix_io.c > +++ b/lib/ext2fs/unix_io.c > @@ -218,6 +218,8 @@ static errcode_t raw_read_blk(io_channel channel, > int actual = 0; > unsigned char *buf = bufv; > ssize_t really_read = 0; > + unsigned long long aligned_blk; > + int align_size, offset; > > size = (count < 0) ? -count : (ext2_loff_t) count * channel->block_size; > mutex_lock(data, STATS_MTX); > @@ -226,7 +228,7 @@ static errcode_t raw_read_blk(io_channel channel, > location = ((ext2_loff_t) block * channel->block_size) + data->offset; > > if (data->flags & IO_FLAG_FORCE_BOUNCE) { > - if (ext2fs_llseek(data->dev, location, SEEK_SET) != location) { > + if (ext2fs_llseek(data->dev, location, SEEK_SET) < 0) { > retval = errno ? errno : EXT2_ET_LLSEEK_FAILED; > goto error_out; > } > @@ -237,6 +239,7 @@ static errcode_t raw_read_blk(io_channel channel, > /* Try an aligned pread */ > if ((channel->align == 0) || > (IS_ALIGNED(buf, channel->align) && > + IS_ALIGNED(location, channel->align) && > IS_ALIGNED(size, channel->align))) { > actual = pread64(data->dev, buf, size, location); > if (actual == size) > @@ -248,6 +251,7 @@ static errcode_t raw_read_blk(io_channel channel, > if ((sizeof(off_t) >= sizeof(ext2_loff_t)) && > ((channel->align == 0) || > (IS_ALIGNED(buf, channel->align) && > + IS_ALIGNED(location, channel->align) && > IS_ALIGNED(size, channel->align)))) { > actual = pread(data->dev, buf, size, location); > if (actual == size) > @@ -256,12 +260,13 @@ static errcode_t raw_read_blk(io_channel channel, > } > #endif /* HAVE_PREAD */ > > - if (ext2fs_llseek(data->dev, location, SEEK_SET) != location) { > + if (ext2fs_llseek(data->dev, location, SEEK_SET) < 0) { > retval = errno ? errno : EXT2_ET_LLSEEK_FAILED; > goto error_out; > } > if ((channel->align == 0) || > (IS_ALIGNED(buf, channel->align) && > + IS_ALIGNED(location, channel->align) && > IS_ALIGNED(size, channel->align))) { > actual = read(data->dev, buf, size); > if (actual != size) { > @@ -286,23 +291,39 @@ static errcode_t raw_read_blk(io_channel channel, > * to the O_DIRECT rules, so we need to do this the hard way... > */ > bounce_read: > + if ((channel->block_size > channel->align) && > + (channel->block_size % channel->align) == 0) > + align_size = channel->block_size; > + else > + align_size = channel->align; > + aligned_blk = location / align_size; > + offset = location % align_size; > + > + if (ext2fs_llseek(data->dev, aligned_blk * align_size, SEEK_SET) < 0) { > + retval = errno ? errno : EXT2_ET_LLSEEK_FAILED; > + goto error_out; > + } > while (size > 0) { > mutex_lock(data, BOUNCE_MTX); > - actual = read(data->dev, data->bounce, channel->block_size); > - if (actual != channel->block_size) { > + actual = read(data->dev, data->bounce, align_size); > + if (actual != align_size) { > mutex_unlock(data, BOUNCE_MTX); > actual = really_read; > buf -= really_read; > size += really_read; > goto short_read; > } > - actual = size; > - if (size > channel->block_size) > - actual = channel->block_size; > - memcpy(buf, data->bounce, actual); > + if ((actual + offset) > align_size) > + actual = align_size - offset; > + if (actual > size) > + actual = size; > + memcpy(buf, data->bounce + offset, actual); > + > really_read += actual; > size -= actual; > buf += actual; > + offset = 0; > + aligned_blk++; > mutex_unlock(data, BOUNCE_MTX); > } > return 0; > @@ -326,6 +347,8 @@ static errcode_t raw_write_blk(io_channel channel, > int actual = 0; > errcode_t retval; > const unsigned char *buf = bufv; > + unsigned long long aligned_blk; > + int align_size, offset; > > if (count == 1) > size = channel->block_size; > @@ -342,7 +365,7 @@ static errcode_t raw_write_blk(io_channel channel, > location = ((ext2_loff_t) block * channel->block_size) + data->offset; > > if (data->flags & IO_FLAG_FORCE_BOUNCE) { > - if (ext2fs_llseek(data->dev, location, SEEK_SET) != location) { > + if (ext2fs_llseek(data->dev, location, SEEK_SET) < 0) { > retval = errno ? errno : EXT2_ET_LLSEEK_FAILED; > goto error_out; > } > @@ -353,6 +376,7 @@ static errcode_t raw_write_blk(io_channel channel, > /* Try an aligned pwrite */ > if ((channel->align == 0) || > (IS_ALIGNED(buf, channel->align) && > + IS_ALIGNED(location, channel->align) && > IS_ALIGNED(size, channel->align))) { > actual = pwrite64(data->dev, buf, size, location); > if (actual == size) > @@ -363,6 +387,7 @@ static errcode_t raw_write_blk(io_channel channel, > if ((sizeof(off_t) >= sizeof(ext2_loff_t)) && > ((channel->align == 0) || > (IS_ALIGNED(buf, channel->align) && > + IS_ALIGNED(location, channel->align) && > IS_ALIGNED(size, channel->align)))) { > actual = pwrite(data->dev, buf, size, location); > if (actual == size) > @@ -370,13 +395,14 @@ static errcode_t raw_write_blk(io_channel channel, > } > #endif /* HAVE_PWRITE */ > > - if (ext2fs_llseek(data->dev, location, SEEK_SET) != location) { > + if (ext2fs_llseek(data->dev, location, SEEK_SET) < 0) { > retval = errno ? errno : EXT2_ET_LLSEEK_FAILED; > goto error_out; > } > > if ((channel->align == 0) || > (IS_ALIGNED(buf, channel->align) && > + IS_ALIGNED(location, channel->align) && > IS_ALIGNED(size, channel->align))) { > actual = write(data->dev, buf, size); > if (actual < 0) { > @@ -400,40 +426,59 @@ static errcode_t raw_write_blk(io_channel channel, > * to the O_DIRECT rules, so we need to do this the hard way... > */ > bounce_write: > + if ((channel->block_size > channel->align) && > + (channel->block_size % channel->align) == 0) > + align_size = channel->block_size; > + else > + align_size = channel->align; > + aligned_blk = location / align_size; > + offset = location % align_size; > + > while (size > 0) { > + int actual_w; > + > mutex_lock(data, BOUNCE_MTX); > - if (size < channel->block_size) { > + if (size < align_size || offset) { > + if (ext2fs_llseek(data->dev, aligned_blk * align_size, > + SEEK_SET) < 0) { > + retval = errno ? errno : EXT2_ET_LLSEEK_FAILED; > + goto error_out; > + } > actual = read(data->dev, data->bounce, > - channel->block_size); > - if (actual != channel->block_size) { > + align_size); > + if (actual != align_size) { > if (actual < 0) { > mutex_unlock(data, BOUNCE_MTX); > retval = errno; > goto error_out; > } > memset((char *) data->bounce + actual, 0, > - channel->block_size - actual); > + align_size - actual); > } > } > actual = size; > - if (size > channel->block_size) > - actual = channel->block_size; > - memcpy(data->bounce, buf, actual); > - if (ext2fs_llseek(data->dev, location, SEEK_SET) != location) { > + if ((actual + offset) > align_size) > + actual = align_size - offset; > + if (actual > size) > + actual = size; > + memcpy(((char *)data->bounce) + offset, buf, actual); > + if (ext2fs_llseek(data->dev, aligned_blk * align_size, SEEK_SET) < 0) { > retval = errno ? errno : EXT2_ET_LLSEEK_FAILED; > goto error_out; > } > - actual = write(data->dev, data->bounce, channel->block_size); > + actual_w = write(data->dev, data->bounce, align_size); > mutex_unlock(data, BOUNCE_MTX); > - if (actual < 0) { > + if (actual_w < 0) { > retval = errno; > goto error_out; > } > - if (actual != channel->block_size) > + if (actual_w != align_size) > goto short_write; > size -= actual; > buf += actual; > location += actual; > + aligned_blk++; > + offset = 0; > } > return 0; >