On Tue, 7 Jun 2011, Lukas Czerner wrote: > On Tue, 7 Jun 2011, Eric Sandeen wrote: > > > On 6/7/11 9:50 AM, Eric Sandeen wrote: > > > On 6/7/11 8:35 AM, Lukas Czerner wrote: > > >> For a long time now orlov is the default block allocator in the ext4. It > > >> performs better than the old one and no one seems to claim otherwise so > > >> we can safely drop it and make oldalloc and orlov mount option > > >> deprecated. > > >> > > >> This is a part of the effort to reduce number of ext4 options hence the > > >> test matrix. > > >> > > >> Signed-off-by: Lukas Czerner <lczerner@xxxxxxxxxx> > > > > > > Seems like a good idea to me. > > > > > > But I'm doing a little digging into why find_group_flex() was there; > > > why all that flex_bg-related inode allocation work for a deprecated option? > > > > > > commit 772cb7c83ba256a11c7bf99a11bef3858d23767c > > > Author: Jose R. Santos <jrs@xxxxxxxxxx> > > > Date: Fri Jul 11 19:27:31 2008 -0400 > > > > > > ext4: New inode allocation for FLEX_BG meta-data groups. > > > > > > This patch mostly controls the way inode are allocated in order to > > > make ialloc aware of flex_bg block group grouping. It achieves this > > > by bypassing the Orlov allocator when block group meta-data are packed > > > toghether through mke2fs. <snip> > > > > > > find_group_flex() used to be called by ext4_new_inode() regardless of > > > OLDALLOC, (I think) so just want to see for sure what happened to that plan... > > > > Ah, ok: > > > > commit a4912123b688e057084e6557cef8924f7ae5bbde > > Author: Theodore Ts'o <tytso@xxxxxxx> > > Date: Thu Mar 12 12:18:34 2009 -0400 > > > > ext4: New inode/block allocation algorithms for flex_bg filesystems > > > > The find_group_flex() inode allocator is now only used if the > > filesystem is mounted using the "oldalloc" mount option. It is > > replaced with the original Orlov allocator that has been updated for > > flex_bg filesystems <snip> > > > > So: > > > > Reviewed-by: Eric Sandeen <sandeen@xxxxxxxxxx> > > Thanks Eric, but I need to take it back for the moment. You've pointed > me to more code which is not needed anymore, so I have to update the patch > to remove all the useless pieces. > > Thanks! > -Lukas Nope, I was wrong, there is no more code to remove wrt. oldalloc. So the patch is fine. Sorry for the noise. Thanks! -Lukas > > > > > > > > -eric > > > > > >> --- > > >> Documentation/filesystems/ext4.txt | 8 -- > > >> fs/ext4/ext4.h | 1 - > > >> fs/ext4/ialloc.c | 136 +----------------------------------- > > >> fs/ext4/super.c | 8 +- > > >> 4 files changed, 7 insertions(+), 146 deletions(-) > > >> > > >> diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt > > >> index 3ae9bc9..ec469fa 100644 > > >> --- a/Documentation/filesystems/ext4.txt > > >> +++ b/Documentation/filesystems/ext4.txt > > >> @@ -201,14 +201,6 @@ inode_readahead_blks=n This tuning parameter controls the maximum > > >> table readahead algorithm will pre-read into > > >> the buffer cache. The default value is 32 blocks. > > >> > > >> -orlov (*) This enables the new Orlov block allocator. It is > > >> - enabled by default. > > >> - > > >> -oldalloc This disables the Orlov block allocator and enables > > >> - the old block allocator. Orlov should have better > > >> - performance - we'd like to get some feedback if it's > > >> - the contrary for you. > > >> - > > >> user_xattr Enables Extended User Attributes. Additionally, you > > >> need to have extended attribute support enabled in the > > >> kernel configuration (CONFIG_EXT4_FS_XATTR). See the > > >> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h > > >> index 1921392..7e0b8aa 100644 > > >> --- a/fs/ext4/ext4.h > > >> +++ b/fs/ext4/ext4.h > > >> @@ -884,7 +884,6 @@ struct ext4_inode_info { > > >> /* > > >> * Mount flags > > >> */ > > >> -#define EXT4_MOUNT_OLDALLOC 0x00002 /* Don't use the new Orlov allocator */ > > >> #define EXT4_MOUNT_GRPID 0x00004 /* Create files with directory's group */ > > >> #define EXT4_MOUNT_DEBUG 0x00008 /* Some debugging messages */ > > >> #define EXT4_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */ > > >> diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c > > >> index 21bb2f6..0b5ec23 100644 > > >> --- a/fs/ext4/ialloc.c > > >> +++ b/fs/ext4/ialloc.c > > >> @@ -293,118 +293,6 @@ error_return: > > >> ext4_std_error(sb, fatal); > > >> } > > >> > > >> -/* > > >> - * There are two policies for allocating an inode. If the new inode is > > >> - * a directory, then a forward search is made for a block group with both > > >> - * free space and a low directory-to-inode ratio; if that fails, then of > > >> - * the groups with above-average free space, that group with the fewest > > >> - * directories already is chosen. > > >> - * > > >> - * For other inodes, search forward from the parent directory\'s block > > >> - * group to find a free inode. > > >> - */ > > >> -static int find_group_dir(struct super_block *sb, struct inode *parent, > > >> - ext4_group_t *best_group) > > >> -{ > > >> - ext4_group_t ngroups = ext4_get_groups_count(sb); > > >> - unsigned int freei, avefreei; > > >> - struct ext4_group_desc *desc, *best_desc = NULL; > > >> - ext4_group_t group; > > >> - int ret = -1; > > >> - > > >> - freei = percpu_counter_read_positive(&EXT4_SB(sb)->s_freeinodes_counter); > > >> - avefreei = freei / ngroups; > > >> - > > >> - for (group = 0; group < ngroups; group++) { > > >> - desc = ext4_get_group_desc(sb, group, NULL); > > >> - if (!desc || !ext4_free_inodes_count(sb, desc)) > > >> - continue; > > >> - if (ext4_free_inodes_count(sb, desc) < avefreei) > > >> - continue; > > >> - if (!best_desc || > > >> - (ext4_free_blks_count(sb, desc) > > > >> - ext4_free_blks_count(sb, best_desc))) { > > >> - *best_group = group; > > >> - best_desc = desc; > > >> - ret = 0; > > >> - } > > >> - } > > >> - return ret; > > >> -} > > >> - > > >> -#define free_block_ratio 10 > > >> - > > >> -static int find_group_flex(struct super_block *sb, struct inode *parent, > > >> - ext4_group_t *best_group) > > >> -{ > > >> - struct ext4_sb_info *sbi = EXT4_SB(sb); > > >> - struct ext4_group_desc *desc; > > >> - struct flex_groups *flex_group = sbi->s_flex_groups; > > >> - ext4_group_t parent_group = EXT4_I(parent)->i_block_group; > > >> - ext4_group_t parent_fbg_group = ext4_flex_group(sbi, parent_group); > > >> - ext4_group_t ngroups = ext4_get_groups_count(sb); > > >> - int flex_size = ext4_flex_bg_size(sbi); > > >> - ext4_group_t best_flex = parent_fbg_group; > > >> - int blocks_per_flex = sbi->s_blocks_per_group * flex_size; > > >> - int flexbg_free_blocks; > > >> - int flex_freeb_ratio; > > >> - ext4_group_t n_fbg_groups; > > >> - ext4_group_t i; > > >> - > > >> - n_fbg_groups = (ngroups + flex_size - 1) >> > > >> - sbi->s_log_groups_per_flex; > > >> - > > >> -find_close_to_parent: > > >> - flexbg_free_blocks = atomic_read(&flex_group[best_flex].free_blocks); > > >> - flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex; > > >> - if (atomic_read(&flex_group[best_flex].free_inodes) && > > >> - flex_freeb_ratio > free_block_ratio) > > >> - goto found_flexbg; > > >> - > > >> - if (best_flex && best_flex == parent_fbg_group) { > > >> - best_flex--; > > >> - goto find_close_to_parent; > > >> - } > > >> - > > >> - for (i = 0; i < n_fbg_groups; i++) { > > >> - if (i == parent_fbg_group || i == parent_fbg_group - 1) > > >> - continue; > > >> - > > >> - flexbg_free_blocks = atomic_read(&flex_group[i].free_blocks); > > >> - flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex; > > >> - > > >> - if (flex_freeb_ratio > free_block_ratio && > > >> - (atomic_read(&flex_group[i].free_inodes))) { > > >> - best_flex = i; > > >> - goto found_flexbg; > > >> - } > > >> - > > >> - if ((atomic_read(&flex_group[best_flex].free_inodes) == 0) || > > >> - ((atomic_read(&flex_group[i].free_blocks) > > > >> - atomic_read(&flex_group[best_flex].free_blocks)) && > > >> - atomic_read(&flex_group[i].free_inodes))) > > >> - best_flex = i; > > >> - } > > >> - > > >> - if (!atomic_read(&flex_group[best_flex].free_inodes) || > > >> - !atomic_read(&flex_group[best_flex].free_blocks)) > > >> - return -1; > > >> - > > >> -found_flexbg: > > >> - for (i = best_flex * flex_size; i < ngroups && > > >> - i < (best_flex + 1) * flex_size; i++) { > > >> - desc = ext4_get_group_desc(sb, i, NULL); > > >> - if (ext4_free_inodes_count(sb, desc)) { > > >> - *best_group = i; > > >> - goto out; > > >> - } > > >> - } > > >> - > > >> - return -1; > > >> -out: > > >> - return 0; > > >> -} > > >> - > > >> struct orlov_stats { > > >> __u32 free_inodes; > > >> __u32 free_blocks; > > >> @@ -817,7 +705,6 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode, > > >> struct inode *ret; > > >> ext4_group_t i; > > >> int free = 0; > > >> - static int once = 1; > > >> ext4_group_t flex_group; > > >> > > >> /* Cannot create files in a deleted directory */ > > >> @@ -843,26 +730,9 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode, > > >> goto got_group; > > >> } > > >> > > >> - if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) { > > >> - ret2 = find_group_flex(sb, dir, &group); > > >> - if (ret2 == -1) { > > >> - ret2 = find_group_other(sb, dir, &group, mode); > > >> - if (ret2 == 0 && once) { > > >> - once = 0; > > >> - printk(KERN_NOTICE "ext4: find_group_flex " > > >> - "failed, fallback succeeded dir %lu\n", > > >> - dir->i_ino); > > >> - } > > >> - } > > >> - goto got_group; > > >> - } > > >> - > > >> - if (S_ISDIR(mode)) { > > >> - if (test_opt(sb, OLDALLOC)) > > >> - ret2 = find_group_dir(sb, dir, &group); > > >> - else > > >> - ret2 = find_group_orlov(sb, dir, &group, mode, qstr); > > >> - } else > > >> + if (S_ISDIR(mode)) > > >> + ret2 = find_group_orlov(sb, dir, &group, mode, qstr); > > >> + else > > >> ret2 = find_group_other(sb, dir, &group, mode); > > >> > > >> got_group: > > >> diff --git a/fs/ext4/super.c b/fs/ext4/super.c > > >> index cc5c157..e1f8f73 100644 > > >> --- a/fs/ext4/super.c > > >> +++ b/fs/ext4/super.c > > >> @@ -1031,8 +1031,6 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) > > >> seq_puts(seq, ",nouid32"); > > >> if (test_opt(sb, DEBUG) && !(def_mount_opts & EXT4_DEFM_DEBUG)) > > >> seq_puts(seq, ",debug"); > > >> - if (test_opt(sb, OLDALLOC)) > > >> - seq_puts(seq, ",oldalloc"); > > >> #ifdef CONFIG_EXT4_FS_XATTR > > >> if (test_opt(sb, XATTR_USER)) > > >> seq_puts(seq, ",user_xattr"); > > >> @@ -1541,10 +1539,12 @@ static int parse_options(char *options, struct super_block *sb, > > >> set_opt(sb, DEBUG); > > >> break; > > >> case Opt_oldalloc: > > >> - set_opt(sb, OLDALLOC); > > >> + ext4_msg(sb, KERN_WARNING, > > >> + "Ignoring deprecated oldalloc option"); > > >> break; > > >> case Opt_orlov: > > >> - clear_opt(sb, OLDALLOC); > > >> + ext4_msg(sb, KERN_WARNING, > > >> + "Ignoring deprecated orlov option"); > > >> break; > > >> #ifdef CONFIG_EXT4_FS_XATTR > > >> case Opt_user_xattr: > > > > > > -- > > > To unsubscribe from this list: send the line "unsubscribe linux-ext4" in > > > the body of a message to majordomo@xxxxxxxxxxxxxxx > > > More majordomo info at http://vger.kernel.org/majordomo-info.html > > > > > > -- -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html