It's really inefficient to (ab)use the TDB key store as a bitmap to find out if we've already written a block to the undo file, because the tdb code is reads the database key btree disk blocks for *every* query. Changing that logic to a bitmap reduces overhead by a large margin -- the overhead of using undo_io while converting a 2TB FS to metadata_csum is reduced from 55 minutes to 45. Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx> --- lib/ext2fs/undo_io.c | 69 +++++++++++++++++++++++++++++++++++++------------- 1 file changed, 51 insertions(+), 18 deletions(-) diff --git a/lib/ext2fs/undo_io.c b/lib/ext2fs/undo_io.c index 70b90d5..9a01e30 100644 --- a/lib/ext2fs/undo_io.c +++ b/lib/ext2fs/undo_io.c @@ -70,6 +70,9 @@ struct undo_private_data { /* to support offset in unix I/O manager */ ext2_loff_t offset; + + ext2fs_block_bitmap written_block_map; + struct struct_ext2_filsys fake_fs; }; static io_manager undo_io_backing_manager; @@ -164,6 +167,38 @@ static errcode_t write_block_size(TDB_CONTEXT *tdb, int block_size) return retval; } +static errcode_t undo_setup_tdb(struct undo_private_data *data) +{ + errcode_t retval; + + if (data->tdb_written == 1) + return 0; + + data->tdb_written = 1; + + /* Make a bitmap to track what we've written */ + memset(&data->fake_fs, 0, sizeof(data->fake_fs)); + data->fake_fs.blocksize = data->tdb_data_size; + retval = ext2fs_alloc_generic_bmap(&data->fake_fs, + EXT2_ET_MAGIC_BLOCK_BITMAP64, + EXT2FS_BMAP64_RBTREE, + 0, ~1ULL, ~1ULL, + "undo block map", &data->written_block_map); + if (retval) + return retval; + + /* Write the blocksize to tdb file */ + tdb_transaction_start(data->tdb); + retval = write_block_size(data->tdb, + data->tdb_data_size); + if (retval) { + tdb_transaction_cancel(data->tdb); + return EXT2_ET_TDB_ERR_IO; + } + tdb_transaction_commit(data->tdb); + return 0; +} + static errcode_t undo_write_tdb(io_channel channel, unsigned long long block, int count) @@ -194,6 +229,10 @@ static errcode_t undo_write_tdb(io_channel channel, else size = count * channel->block_size; } + + retval = undo_setup_tdb(data); + if (retval) + return retval; /* * Data is stored in tdb database as blocks of tdb_data_size size * This helps in efficient lookup further. @@ -212,11 +251,14 @@ static errcode_t undo_write_tdb(io_channel channel, /* * Check if we have the record already */ - if (tdb_exists(data->tdb, tdb_key)) { + if (ext2fs_test_block_bitmap2(data->written_block_map, + block_num)) { /* Try the next block */ block_num++; continue; } + ext2fs_mark_block_bitmap2(data->written_block_map, block_num); + /* * Read one block using the backing I/O manager * The backing I/O manager block size may be @@ -265,19 +307,7 @@ static errcode_t undo_write_tdb(io_channel channel, tdb_data.dptr, tdb_data.dsize); #endif - if (data->tdb_written != 1) { - data->tdb_written = 1; - /* Write the blocksize to tdb file */ - retval = write_block_size(data->tdb, - data->tdb_data_size); - if (retval) { - tdb_transaction_cancel(data->tdb); - retval = EXT2_ET_TDB_ERR_IO; - free(read_ptr); - return retval; - } - } - retval = tdb_store(data->tdb, tdb_key, tdb_data, TDB_INSERT); + retval = tdb_store(data->tdb, tdb_key, tdb_data, TDB_REPLACE); if (retval == -1) { /* * TDB_ERR_EXISTS cannot happen because we @@ -345,6 +375,7 @@ static errcode_t undo_open(const char *name, int flags, io_channel *channel) memset(data, 0, sizeof(struct undo_private_data)); data->magic = EXT2_ET_MAGIC_UNIX_IO_CHANNEL; + data->written_block_map = NULL; if (undo_io_backing_manager) { retval = undo_io_backing_manager->open(name, flags, @@ -390,7 +421,7 @@ cleanup: static errcode_t undo_close(io_channel channel) { struct undo_private_data *data; - errcode_t retval = 0; + errcode_t err, retval = 0; EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); data = (struct undo_private_data *) channel->private_data; @@ -399,20 +430,22 @@ static errcode_t undo_close(io_channel channel) if (--channel->refcount > 0) return 0; /* Before closing write the file system identity */ - retval = write_file_system_identity(channel, data->tdb); - if (retval) - return retval; + err = write_file_system_identity(channel, data->tdb); if (data->real) retval = io_channel_close(data->real); if (data->tdb) { tdb_flush(data->tdb); tdb_close(data->tdb); } + if (data->written_block_map) + ext2fs_free_generic_bitmap(data->written_block_map); ext2fs_free_mem(&channel->private_data); if (channel->name) ext2fs_free_mem(&channel->name); ext2fs_free_mem(&channel); + if (err) + return err; return retval; } -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html