On 2011-08-11, at 8:48 AM, Lukas Czerner wrote: > If e2fsprogs tools (mke2fs, e2fsck) is run on regular file instead of > on block device, we can use punch hole instead of regular discard > command which would not work on regular file anyway. This gives us > several advantages. First of all when e2fsck is run with '-E discard' > parameter it will punch out all ununsed space from the image, hence > trimming down the file system image. And secondly, when creating an > file system on regular file (with '-E discard' which is default), we > can use punch hole to clear the file content, hence we can skip inode > table initialization, because reads from sparse area returns zeros. This > will result in faster file system creation (without the need to specify > lazy_itable_init) and smaller images. > > This commit also fixes some tests that would wail due to mke2fs showing > discard progress, hence the output would differ. > > Signed-off-by: Lukas Czerner <lczerner@xxxxxxxxxx> > --- > lib/ext2fs/ext2_io.h | 1 + > lib/ext2fs/unix_io.c | 46 ++++++++++++++++++++++++++++++++++---- > misc/mke2fs.c | 10 ++++++- > tests/f_resize_inode/expect | 1 + > tests/m_dasd_bs/expect.1 | 1 + > tests/m_extent_journal/expect.1 | 1 + > tests/m_large_file/expect.1 | 1 + > tests/m_meta_bg/expect.1 | 1 + > tests/m_no_opt/expect.1 | 1 + > tests/m_raid_opt/expect.1 | 1 + > tests/m_std/expect.1 | 1 + > tests/m_uninit/expect.1 | 1 + > 12 files changed, 59 insertions(+), 7 deletions(-) > > diff --git a/lib/ext2fs/ext2_io.h b/lib/ext2fs/ext2_io.h > index e71ada9..bcc2f87 100644 > --- a/lib/ext2fs/ext2_io.h > +++ b/lib/ext2fs/ext2_io.h > @@ -30,6 +30,7 @@ typedef struct struct_io_stats *io_stats; > > #define CHANNEL_FLAGS_WRITETHROUGH 0x01 > #define CHANNEL_FLAGS_DISCARD_ZEROES 0x02 > +#define CHANNEL_FLAGS_BLOCK_DEVICE 0x04 > > #define io_channel_discard_zeroes_data(i) (i->flags & CHANNEL_FLAGS_DISCARD_ZEROES) > > diff --git a/lib/ext2fs/unix_io.c b/lib/ext2fs/unix_io.c > index c1d0561..eb5df55 100644 > --- a/lib/ext2fs/unix_io.c > +++ b/lib/ext2fs/unix_io.c > @@ -441,7 +441,11 @@ static errcode_t unix_open(const char *name, int flags, io_channel *channel) > struct unix_private_data *data = NULL; > errcode_t retval; > int open_flags, zeroes = 0; > +#ifdef HAVE_OPEN64 > + struct stat64 st; > +#else > struct stat st; > +#endif > #ifdef __linux__ > struct utsname ut; > #endif > @@ -482,11 +486,26 @@ static errcode_t unix_open(const char *name, int flags, io_channel *channel) > #endif > data->flags = flags; > > + st.st_mode = 0; > #ifdef HAVE_OPEN64 > data->dev = open64(io->name, open_flags); > + stat64(io->name, &st); > #else > data->dev = open(io->name, open_flags); > + stat(io->name, &st); > #endif IMHO it would be nicer to have wrappers for open() and stat() instead of #ifdef spread around the code. > + /* > + * If the device is really a block device, then set the > + * appropriate flag, otherwise we can set DISCARD_ZEROES flag > + * because we are going to use punch hole instead of discard > + * and if it succeed, subsequent read from sparse area returns > + * zero. > + */ > + if (S_ISBLK(st.st_mode)) > + io->flags |= CHANNEL_FLAGS_BLOCK_DEVICE; > + else > + io->flags |= CHANNEL_FLAGS_DISCARD_ZEROES; > + > if (data->dev < 0) { > retval = errno; > goto cleanup; > @@ -552,7 +571,11 @@ static errcode_t unix_open(const char *name, int flags, io_channel *channel) > (ut.release[2] == '4') && (ut.release[3] == '.') && > (ut.release[4] == '1') && (ut.release[5] >= '0') && > (ut.release[5] < '8')) && > - (fstat(data->dev, &st) == 0) && > +#ifdef HAVE_OPEN64 > + (stat64(io->name, &st) == 0) && > +#else > + (stat(io->name, &st) == 0) && > +#endif > (S_ISBLK(st.st_mode))) { > struct rlimit rlim; > > @@ -857,7 +880,9 @@ static errcode_t unix_set_option(io_channel channel, const char *option, > } > > #if defined(__linux__) && !defined(BLKDISCARD) > -#define BLKDISCARD _IO(0x12,119) > +#define BLKDISCARD _IO(0x12,119) > +#define FALLOC_FL_KEEP_SIZE 0x01 > +#define FALLOC_FL_PUNCH_HOLE 0x02 Should these get their own #ifndef FALLOC_FL_KEEP_SIZE and also #ifndef FALLOC_FL_PUNCH_HOLE? Otherwise the compiler may complain if they are defined multiple times. FALLOC_FL_PUNCH_HOLE was added a lot later than the others. > @@ -872,10 +897,21 @@ static errcode_t unix_discard(io_channel channel, unsigned long long block, > data = (struct unix_private_data *) channel->private_data; > EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); > > - range[0] = (__uint64_t)(block) * channel->block_size; > - range[1] = (__uint64_t)(count) * channel->block_size; > + if (channel->flags & CHANNEL_FLAGS_BLOCK_DEVICE) { > + range[0] = (__uint64_t)(block) * channel->block_size; > + range[1] = (__uint64_t)(count) * channel->block_size; > > - ret = ioctl(data->dev, BLKDISCARD, &range); > + ret = ioctl(data->dev, BLKDISCARD, &range); > + } else { > + /* > + * If we are not on block device, try to use punch hole > + * to reclaim free space. > + */ > + ret = fallocate(data->dev, > + FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, > + (off_t)(block) * channel->block_size, > + (off_t)(count) * channel->block_size); > + } > if (ret < 0) > return errno; This will fail on kernels/userspace that don't have fallocate() or FALLOC_FL_PUNCH_HOLE (which is very new). Maybe it is better to wrap this whole thing in "#ifdef FALLOC_FL_PUNCH_HOLE" so that it is only compiled on systems that have such support, and convert EOPNOTSUPP into EXT2_ET_UNIMPLEMENTED. > diff --git a/tests/f_resize_inode/expect b/tests/f_resize_inode/expect > index a396927..cfc699c 100644 > --- a/tests/f_resize_inode/expect > +++ b/tests/f_resize_inode/expect > @@ -1,4 +1,5 @@ > mke2fs -F -O resize_inode -o Linux -b 1024 -g 1024 test.img 16384 > +Discarding device blocks: 1024/16384 done What happens on devices/kernels that don't support discard? In newer e2fsprogs this message isn't printed at all if the device doesn't support discard. It seems better to have the run_e2fsck script strip out these lines so that the tests pass regardless of whether discard is working or not. > Filesystem label= > OS type: Linux > Block size=1024 (log=0) > diff --git a/tests/m_dasd_bs/expect.1 b/tests/m_dasd_bs/expect.1 > index 31db54b..310fc95 100644 > --- a/tests/m_dasd_bs/expect.1 > +++ b/tests/m_dasd_bs/expect.1 > @@ -1,3 +1,4 @@ > +Discarding device blocks: 2048/32768 done > Filesystem label= > OS type: Linux > Block size=2048 (log=1) > diff --git a/tests/m_extent_journal/expect.1 b/tests/m_extent_journal/expect.1 > index 88ea2d9..fb07588 100644 > --- a/tests/m_extent_journal/expect.1 > +++ b/tests/m_extent_journal/expect.1 > @@ -1,3 +1,4 @@ > +Discarding device blocks: 1024/65536 done > Filesystem label= > OS type: Linux > Block size=1024 (log=0) > diff --git a/tests/m_large_file/expect.1 b/tests/m_large_file/expect.1 > index 2b40c84..3ebd03c 100644 > --- a/tests/m_large_file/expect.1 > +++ b/tests/m_large_file/expect.1 > @@ -1,3 +1,4 @@ > +Discarding device blocks: 4096/16384 done > Filesystem label= > OS type: Linux > Block size=4096 (log=2) > diff --git a/tests/m_meta_bg/expect.1 b/tests/m_meta_bg/expect.1 > index f1c9cef..f947da9 100644 > --- a/tests/m_meta_bg/expect.1 > +++ b/tests/m_meta_bg/expect.1 > @@ -1,3 +1,4 @@ > +Discarding device blocks: 1024/131072 done > Filesystem label= > OS type: Linux > Block size=1024 (log=0) > diff --git a/tests/m_no_opt/expect.1 b/tests/m_no_opt/expect.1 > index 7f3e5aa..81764d3 100644 > --- a/tests/m_no_opt/expect.1 > +++ b/tests/m_no_opt/expect.1 > @@ -1,3 +1,4 @@ > +Discarding device blocks: 1024/65536 done > Filesystem label= > OS type: Linux > Block size=1024 (log=0) > diff --git a/tests/m_raid_opt/expect.1 b/tests/m_raid_opt/expect.1 > index 0c6acc1..c8667e8 100644 > --- a/tests/m_raid_opt/expect.1 > +++ b/tests/m_raid_opt/expect.1 > @@ -1,3 +1,4 @@ > +Discarding device blocks: 1024/131072 done > Filesystem label= > OS type: Linux > Block size=1024 (log=0) > diff --git a/tests/m_std/expect.1 b/tests/m_std/expect.1 > index d0bf27c..a2b6c3f 100644 > --- a/tests/m_std/expect.1 > +++ b/tests/m_std/expect.1 > @@ -1,3 +1,4 @@ > +Discarding device blocks: 1024/65536 done > Filesystem label= > OS type: Linux > Block size=1024 (log=0) > diff --git a/tests/m_uninit/expect.1 b/tests/m_uninit/expect.1 > index 173c072..72c652c 100644 > --- a/tests/m_uninit/expect.1 > +++ b/tests/m_uninit/expect.1 > @@ -1,3 +1,4 @@ > +Discarding device blocks: 1024/131072 done > Filesystem label= > OS type: Linux > Block size=1024 (log=0) > -- > 1.7.4.4 > > -- > To unsubscribe from this list: send the line "unsubscribe linux-ext4" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at http://vger.kernel.org/majordomo-info.html Cheers, Andreas -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html