On Wed, 2003-11-05 at 22:38, Theodore Ts'o wrote: > > I will note that with insane-machines-that-only-SGI-would-love with > 1024 processors, percpu_counters are always going to take huge amounts > of memory. So the ext2/ext3 superblock can't be the only place where > you're running in to problems here.... This is the only place that we've run into with 512 processors. How about the following patch? It uses Andreas' idea and simply changes the percpu_counters to be pointers. I did it for all cases, not just for large numbers of cpus. I've only done it for ext3. mh -- Martin Hicks Wild Open Source Inc. mort@xxxxxxxxxxxxxxxxxx 613-266-2296 # This is a BitKeeper generated patch for the following project: # Project Name: Linux kernel tree # This patch format is intended for GNU patch command version 2.5 or higher. # This patch includes the following deltas: # ChangeSet 1.1356 -> 1.1357 # fs/ext3/balloc.c 1.18 -> 1.19 # fs/ext3/ialloc.c 1.31 -> 1.32 # fs/ext3/super.c 1.79 -> 1.80 # include/linux/ext3_fs_sb.h 1.7 -> 1.8 # # The following is the BitKeeper ChangeSet Log # -------------------------------------------- # 03/11/06 mort@xxxxxxxxxxxxxxxx 1.1357 # Change the percpu_counters in the ext3_sb_info struct to be pointers. # This gets ext3 linking with a large NR_CPUS. Tested with 512. # -------------------------------------------- # diff -Nru a/fs/ext3/balloc.c b/fs/ext3/balloc.c --- a/fs/ext3/balloc.c Thu Nov 6 10:54:31 2003 +++ b/fs/ext3/balloc.c Thu Nov 6 10:54:31 2003 @@ -254,7 +254,7 @@ cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + dquot_freed_blocks); spin_unlock(sb_bgl_lock(sbi, block_group)); - percpu_counter_mod(&sbi->s_freeblocks_counter, count); + percpu_counter_mod(sbi->s_freeblocks_counter, count); /* We dirtied the bitmap block */ BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); @@ -511,7 +511,7 @@ es = EXT3_SB(sb)->s_es; ext3_debug("goal=%lu.\n", goal); - free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); + free_blocks = percpu_counter_read_positive(sbi->s_freeblocks_counter); root_blocks = le32_to_cpu(es->s_r_blocks_count); if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) && sbi->s_resuid != current->fsuid && @@ -652,7 +652,7 @@ gdp->bg_free_blocks_count = cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) - 1); spin_unlock(sb_bgl_lock(sbi, group_no)); - percpu_counter_mod(&sbi->s_freeblocks_counter, -1); + percpu_counter_mod(sbi->s_freeblocks_counter, -1); BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor"); err = ext3_journal_dirty_metadata(handle, gdp_bh); diff -Nru a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c --- a/fs/ext3/ialloc.c Thu Nov 6 10:54:31 2003 +++ b/fs/ext3/ialloc.c Thu Nov 6 10:54:31 2003 @@ -169,9 +169,9 @@ gdp->bg_used_dirs_count = cpu_to_le16( le16_to_cpu(gdp->bg_used_dirs_count) - 1); spin_unlock(sb_bgl_lock(sbi, block_group)); - percpu_counter_inc(&sbi->s_freeinodes_counter); + percpu_counter_inc(sbi->s_freeinodes_counter); if (is_directory) - percpu_counter_dec(&sbi->s_dirs_counter); + percpu_counter_dec(sbi->s_dirs_counter); } BUFFER_TRACE(bh2, "call ext3_journal_dirty_metadata"); @@ -206,7 +206,7 @@ struct buffer_head *bh; int group, best_group = -1; - freei = percpu_counter_read_positive(&EXT3_SB(sb)->s_freeinodes_counter); + freei = percpu_counter_read_positive(EXT3_SB(sb)->s_freeinodes_counter); avefreei = freei / ngroups; for (group = 0; group < ngroups; group++) { @@ -268,11 +268,11 @@ struct ext3_group_desc *desc; struct buffer_head *bh; - freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter); + freei = percpu_counter_read_positive(sbi->s_freeinodes_counter); avefreei = freei / ngroups; - freeb = percpu_counter_read_positive(&sbi->s_freeblocks_counter); + freeb = percpu_counter_read_positive(sbi->s_freeblocks_counter); avefreeb = freeb / ngroups; - ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter); + ndirs = percpu_counter_read_positive(sbi->s_dirs_counter); if ((parent == sb->s_root->d_inode) || (parent->i_flags & EXT3_TOPDIR_FL)) { @@ -533,9 +533,9 @@ err = ext3_journal_dirty_metadata(handle, bh2); if (err) goto fail; - percpu_counter_dec(&sbi->s_freeinodes_counter); + percpu_counter_dec(sbi->s_freeinodes_counter); if (S_ISDIR(mode)) - percpu_counter_inc(&sbi->s_dirs_counter); + percpu_counter_inc(sbi->s_dirs_counter); sb->s_dirt = 1; inode->i_uid = current->fsuid; diff -Nru a/fs/ext3/super.c b/fs/ext3/super.c --- a/fs/ext3/super.c Thu Nov 6 10:54:31 2003 +++ b/fs/ext3/super.c Thu Nov 6 10:54:31 2003 @@ -421,6 +421,9 @@ ext3_blkdev_remove(sbi); } sb->s_fs_info = NULL; + kfree(sbi->s_freeblocks_counter); + kfree(sbi->s_freeinodes_counter); + kfree(sbi->s_dirs_counter); kfree(sbi); return; } @@ -848,6 +851,32 @@ return res; } +static int ext3_setup_sbi (struct ext3_sb_info *sbi) +{ + memset(sbi, 0, sizeof(*sbi)); + + sbi->s_freeblocks_counter = kmalloc(sizeof(struct percpu_counter), GFP_KERNEL); + if (!sbi->s_freeblocks_counter) + return -ENOMEM; + sbi->s_freeinodes_counter = kmalloc(sizeof(struct percpu_counter), GFP_KERNEL); + if (!sbi->s_freeinodes_counter) + goto out_freeblocks; + sbi->s_dirs_counter = kmalloc(sizeof(struct percpu_counter), GFP_KERNEL); + if (!sbi->s_dirs_counter) + goto out_freeinode; + + sbi->s_mount_opt = 0; + sbi->s_resuid = EXT3_DEF_RESUID; + sbi->s_resgid = EXT3_DEF_RESGID; + return 0; + + out_freeinode: + kfree(sbi->s_freeinodes_counter); + out_freeblocks: + kfree(sbi->s_freeblocks_counter); + return -ENOMEM; +} + static int ext3_check_descriptors (struct super_block * sb) { struct ext3_sb_info *sbi = EXT3_SB(sb); @@ -1048,10 +1077,10 @@ if (!sbi) return -ENOMEM; sb->s_fs_info = sbi; - memset(sbi, 0, sizeof(*sbi)); - sbi->s_mount_opt = 0; - sbi->s_resuid = EXT3_DEF_RESUID; - sbi->s_resgid = EXT3_DEF_RESGID; + if (ext3_setup_sbi(sbi)) { + kfree(sbi); + return -ENOMEM; + } blocksize = sb_min_blocksize(sb, EXT3_MIN_BLOCK_SIZE); if (!blocksize) { @@ -1266,9 +1295,9 @@ } memset(sbi->s_debts, 0, sbi->s_groups_count * sizeof(u8)); - percpu_counter_init(&sbi->s_freeblocks_counter); - percpu_counter_init(&sbi->s_freeinodes_counter); - percpu_counter_init(&sbi->s_dirs_counter); + percpu_counter_init(sbi->s_freeblocks_counter); + percpu_counter_init(sbi->s_freeinodes_counter); + percpu_counter_init(sbi->s_dirs_counter); bgl_lock_init(&sbi->s_blockgroup_lock); for (i = 0; i < db_count; i++) { @@ -1383,11 +1412,11 @@ test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered": "writeback"); - percpu_counter_mod(&sbi->s_freeblocks_counter, + percpu_counter_mod(sbi->s_freeblocks_counter, ext3_count_free_blocks(sb)); - percpu_counter_mod(&sbi->s_freeinodes_counter, + percpu_counter_mod(sbi->s_freeinodes_counter, ext3_count_free_inodes(sb)); - percpu_counter_mod(&sbi->s_dirs_counter, + percpu_counter_mod(sbi->s_dirs_counter, ext3_count_dirs(sb)); return 0; @@ -1404,6 +1433,9 @@ brelse(bh); out_fail: sb->s_fs_info = NULL; + kfree(sbi->s_freeblocks_counter); + kfree(sbi->s_freeinodes_counter); + kfree(sbi->s_dirs_counter); kfree(sbi); return -EINVAL; } diff -Nru a/include/linux/ext3_fs_sb.h b/include/linux/ext3_fs_sb.h --- a/include/linux/ext3_fs_sb.h Thu Nov 6 10:54:31 2003 +++ b/include/linux/ext3_fs_sb.h Thu Nov 6 10:54:31 2003 @@ -53,9 +53,9 @@ u32 s_hash_seed[4]; int s_def_hash_version; u8 *s_debts; - struct percpu_counter s_freeblocks_counter; - struct percpu_counter s_freeinodes_counter; - struct percpu_counter s_dirs_counter; + struct percpu_counter *s_freeblocks_counter; + struct percpu_counter *s_freeinodes_counter; + struct percpu_counter *s_dirs_counter; struct blockgroup_lock s_blockgroup_lock; /* Journaling */ _______________________________________________ Ext3-users@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/ext3-users