Re: [Ext2-devel] Re: [PATCH] Link breaks for large NR_CPUS

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, 2003-11-06 at 15:01, Martin Hicks wrote:
> 
> I think it's possible to use alloc_percpu().  The best way would be to
> overhaul the percpu_counter structure to use alloc_percpu().  With just 
> a quick inspection it looks like this would solve the kmalloc size
> problems for a long time, although it would effect performance because
> you do one kmalloc() per processor when you do alloc_percpu()
> 
> Let me take a look at that...

Here's the patch.  I tested it on linux-2.6.0-test9 on ia64.  It seems
to work.  (Please test with caution, of course)

I didn't do any benchmarking, because I don't have a reasonable
filesystem benchmark setup.  Any opinions?

patches attached are to change percpu_counters to use alloc_percpu() and
the patch to ext3 to make it have pointers to percpu_counters.  The
latter is probably not even required anymore, since there is no array of
size NR_CPUS in the struct percpu_counters anymore.

mh

-- 
Martin Hicks                Wild Open Source Inc.
mort@xxxxxxxxxxxxxxxxxx     613-266-2296

# This is a BitKeeper generated patch for the following project:
# Project Name: Linux kernel tree
# This patch format is intended for GNU patch command version 2.5 or higher.
# This patch includes the following deltas:
#	           ChangeSet	1.1356  -> 1.1357 
#	    fs/ext3/balloc.c	1.18    -> 1.19   
#	    fs/ext3/ialloc.c	1.31    -> 1.32   
#	     fs/ext3/super.c	1.79    -> 1.80   
#	include/linux/ext3_fs_sb.h	1.7     -> 1.8    
#
# The following is the BitKeeper ChangeSet Log
# --------------------------------------------
# 03/11/06	mort@xxxxxxxxxxxxxxxx	1.1357
# Change the percpu_counters in the ext3_sb_info struct to be pointers.
# This gets ext3 linking with a large NR_CPUS.  Tested with 512.
# --------------------------------------------
#
diff -Nru a/fs/ext3/balloc.c b/fs/ext3/balloc.c
--- a/fs/ext3/balloc.c	Fri Nov  7 13:30:23 2003
+++ b/fs/ext3/balloc.c	Fri Nov  7 13:30:23 2003
@@ -254,7 +254,7 @@
 		cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) +
 			dquot_freed_blocks);
 	spin_unlock(sb_bgl_lock(sbi, block_group));
-	percpu_counter_mod(&sbi->s_freeblocks_counter, count);
+	percpu_counter_mod(sbi->s_freeblocks_counter, count);
 
 	/* We dirtied the bitmap block */
 	BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
@@ -511,7 +511,7 @@
 	es = EXT3_SB(sb)->s_es;
 	ext3_debug("goal=%lu.\n", goal);
 
-	free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
+	free_blocks = percpu_counter_read_positive(sbi->s_freeblocks_counter);
 	root_blocks = le32_to_cpu(es->s_r_blocks_count);
 	if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) &&
 		sbi->s_resuid != current->fsuid &&
@@ -652,7 +652,7 @@
 	gdp->bg_free_blocks_count =
 			cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) - 1);
 	spin_unlock(sb_bgl_lock(sbi, group_no));
-	percpu_counter_mod(&sbi->s_freeblocks_counter, -1);
+	percpu_counter_mod(sbi->s_freeblocks_counter, -1);
 
 	BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor");
 	err = ext3_journal_dirty_metadata(handle, gdp_bh);
diff -Nru a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
--- a/fs/ext3/ialloc.c	Fri Nov  7 13:30:23 2003
+++ b/fs/ext3/ialloc.c	Fri Nov  7 13:30:23 2003
@@ -169,9 +169,9 @@
 				gdp->bg_used_dirs_count = cpu_to_le16(
 				  le16_to_cpu(gdp->bg_used_dirs_count) - 1);
 			spin_unlock(sb_bgl_lock(sbi, block_group));
-			percpu_counter_inc(&sbi->s_freeinodes_counter);
+			percpu_counter_inc(sbi->s_freeinodes_counter);
 			if (is_directory)
-				percpu_counter_dec(&sbi->s_dirs_counter);
+				percpu_counter_dec(sbi->s_dirs_counter);
 
 		}
 		BUFFER_TRACE(bh2, "call ext3_journal_dirty_metadata");
@@ -206,7 +206,7 @@
 	struct buffer_head *bh;
 	int group, best_group = -1;
 
-	freei = percpu_counter_read_positive(&EXT3_SB(sb)->s_freeinodes_counter);
+	freei = percpu_counter_read_positive(EXT3_SB(sb)->s_freeinodes_counter);
 	avefreei = freei / ngroups;
 
 	for (group = 0; group < ngroups; group++) {
@@ -268,11 +268,11 @@
 	struct ext3_group_desc *desc;
 	struct buffer_head *bh;
 
-	freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter);
+	freei = percpu_counter_read_positive(sbi->s_freeinodes_counter);
 	avefreei = freei / ngroups;
-	freeb = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
+	freeb = percpu_counter_read_positive(sbi->s_freeblocks_counter);
 	avefreeb = freeb / ngroups;
-	ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter);
+	ndirs = percpu_counter_read_positive(sbi->s_dirs_counter);
 
 	if ((parent == sb->s_root->d_inode) ||
 	    (parent->i_flags & EXT3_TOPDIR_FL)) {
@@ -533,9 +533,9 @@
 	err = ext3_journal_dirty_metadata(handle, bh2);
 	if (err) goto fail;
 
-	percpu_counter_dec(&sbi->s_freeinodes_counter);
+	percpu_counter_dec(sbi->s_freeinodes_counter);
 	if (S_ISDIR(mode))
-		percpu_counter_inc(&sbi->s_dirs_counter);
+		percpu_counter_inc(sbi->s_dirs_counter);
 	sb->s_dirt = 1;
 
 	inode->i_uid = current->fsuid;
diff -Nru a/fs/ext3/super.c b/fs/ext3/super.c
--- a/fs/ext3/super.c	Fri Nov  7 13:30:23 2003
+++ b/fs/ext3/super.c	Fri Nov  7 13:30:24 2003
@@ -421,6 +421,9 @@
 		ext3_blkdev_remove(sbi);
 	}
 	sb->s_fs_info = NULL;
+	kfree(sbi->s_freeblocks_counter);
+	kfree(sbi->s_freeinodes_counter);
+	kfree(sbi->s_dirs_counter);
 	kfree(sbi);
 	return;
 }
@@ -848,6 +851,32 @@
 	return res;
 }
 
+static int ext3_setup_sbi (struct ext3_sb_info *sbi)
+{
+	memset(sbi, 0, sizeof(*sbi));
+
+	sbi->s_freeblocks_counter = kmalloc(sizeof(struct percpu_counter), GFP_KERNEL);
+	if (!sbi->s_freeblocks_counter)
+		return -ENOMEM;
+	sbi->s_freeinodes_counter = kmalloc(sizeof(struct percpu_counter), GFP_KERNEL);
+	if (!sbi->s_freeinodes_counter)
+		goto out_freeblocks;
+	sbi->s_dirs_counter = kmalloc(sizeof(struct percpu_counter), GFP_KERNEL);
+	if (!sbi->s_dirs_counter)
+		goto out_freeinode;
+
+	sbi->s_mount_opt = 0;
+	sbi->s_resuid = EXT3_DEF_RESUID;
+	sbi->s_resgid = EXT3_DEF_RESGID;
+	return 0;
+
+ out_freeinode:
+	kfree(sbi->s_freeinodes_counter);
+ out_freeblocks:
+	kfree(sbi->s_freeblocks_counter);
+	return -ENOMEM;
+}
+
 static int ext3_check_descriptors (struct super_block * sb)
 {
 	struct ext3_sb_info *sbi = EXT3_SB(sb);
@@ -1048,10 +1077,10 @@
 	if (!sbi)
 		return -ENOMEM;
 	sb->s_fs_info = sbi;
-	memset(sbi, 0, sizeof(*sbi));
-	sbi->s_mount_opt = 0;
-	sbi->s_resuid = EXT3_DEF_RESUID;
-	sbi->s_resgid = EXT3_DEF_RESGID;
+	if (ext3_setup_sbi(sbi)) {
+		kfree(sbi);
+		return -ENOMEM;
+	}
 
 	blocksize = sb_min_blocksize(sb, EXT3_MIN_BLOCK_SIZE);
 	if (!blocksize) {
@@ -1266,9 +1295,9 @@
 	}
 	memset(sbi->s_debts, 0,  sbi->s_groups_count * sizeof(u8));
 
-	percpu_counter_init(&sbi->s_freeblocks_counter);
-	percpu_counter_init(&sbi->s_freeinodes_counter);
-	percpu_counter_init(&sbi->s_dirs_counter);
+	percpu_counter_init(sbi->s_freeblocks_counter);
+	percpu_counter_init(sbi->s_freeinodes_counter);
+	percpu_counter_init(sbi->s_dirs_counter);
 	bgl_lock_init(&sbi->s_blockgroup_lock);
 
 	for (i = 0; i < db_count; i++) {
@@ -1383,11 +1412,11 @@
 		test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered":
 		"writeback");
 
-	percpu_counter_mod(&sbi->s_freeblocks_counter,
+	percpu_counter_mod(sbi->s_freeblocks_counter,
 		ext3_count_free_blocks(sb));
-	percpu_counter_mod(&sbi->s_freeinodes_counter,
+	percpu_counter_mod(sbi->s_freeinodes_counter,
 		ext3_count_free_inodes(sb));
-	percpu_counter_mod(&sbi->s_dirs_counter,
+	percpu_counter_mod(sbi->s_dirs_counter,
 		ext3_count_dirs(sb));
 
 	return 0;
@@ -1404,6 +1433,9 @@
 	brelse(bh);
 out_fail:
 	sb->s_fs_info = NULL;
+	kfree(sbi->s_freeblocks_counter);
+	kfree(sbi->s_freeinodes_counter);
+	kfree(sbi->s_dirs_counter);
 	kfree(sbi);
 	return -EINVAL;
 }
diff -Nru a/include/linux/ext3_fs_sb.h b/include/linux/ext3_fs_sb.h
--- a/include/linux/ext3_fs_sb.h	Fri Nov  7 13:30:24 2003
+++ b/include/linux/ext3_fs_sb.h	Fri Nov  7 13:30:24 2003
@@ -53,9 +53,9 @@
 	u32 s_hash_seed[4];
 	int s_def_hash_version;
         u8 *s_debts;
-	struct percpu_counter s_freeblocks_counter;
-	struct percpu_counter s_freeinodes_counter;
-	struct percpu_counter s_dirs_counter;
+	struct percpu_counter *s_freeblocks_counter;
+	struct percpu_counter *s_freeinodes_counter;
+	struct percpu_counter *s_dirs_counter;
 	struct blockgroup_lock s_blockgroup_lock;
 
 	/* Journaling */
# This is a BitKeeper generated patch for the following project:
# Project Name: Linux kernel tree
# This patch format is intended for GNU patch command version 2.5 or higher.
# This patch includes the following deltas:
#	           ChangeSet	1.1357  -> 1.1358 
#	lib/percpu_counter.c	1.3     -> 1.4    
#	include/linux/percpu_counter.h	1.2     -> 1.3    
#
# The following is the BitKeeper ChangeSet Log
# --------------------------------------------
# 03/11/07	mort@xxxxxxxxxxxxxxxx	1.1358
# percpu_counters-update.diff
# --------------------------------------------
#
diff -Nru a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h
--- a/include/linux/percpu_counter.h	Fri Nov  7 13:30:04 2003
+++ b/include/linux/percpu_counter.h	Fri Nov  7 13:30:04 2003
@@ -8,6 +8,7 @@
 #include <linux/spinlock.h>
 #include <linux/smp.h>
 #include <linux/threads.h>
+#include <linux/percpu.h>
 
 #ifdef CONFIG_SMP
 
@@ -18,7 +19,7 @@
 struct percpu_counter {
 	spinlock_t lock;
 	long count;
-	struct __percpu_counter counters[NR_CPUS];
+	struct __percpu_counter *counters;
 };
 
 #if NR_CPUS >= 16
@@ -29,12 +30,14 @@
 
 static inline void percpu_counter_init(struct percpu_counter *fbc)
 {
-	int i;
-
 	spin_lock_init(&fbc->lock);
 	fbc->count = 0;
-	for (i = 0; i < NR_CPUS; i++)
-		fbc->counters[i].count = 0;
+	fbc->counters = alloc_percpu(struct __percpu_counter);
+}
+
+static inline void percpu_counter_destroy(struct percpu_counter *fbc)
+{
+	free_percpu(fbc->counters);
 }
 
 void percpu_counter_mod(struct percpu_counter *fbc, long amount);
@@ -67,6 +70,10 @@
 static inline void percpu_counter_init(struct percpu_counter *fbc)
 {
 	fbc->count = 0;
+}
+
+static inline void percpu_counter_destroy(struct percpu_counter *fbc)
+{
 }
 
 static inline void
diff -Nru a/lib/percpu_counter.c b/lib/percpu_counter.c
--- a/lib/percpu_counter.c	Fri Nov  7 13:30:04 2003
+++ b/lib/percpu_counter.c	Fri Nov  7 13:30:04 2003
@@ -4,9 +4,10 @@
 
 void percpu_counter_mod(struct percpu_counter *fbc, long amount)
 {
-	int cpu = get_cpu();
-	long count = fbc->counters[cpu].count;
+	long count;
 
+	count = *(long *)get_cpu_ptr(fbc->counters);
+	put_cpu_ptr();
 	count += amount;
 	if (count >= FBC_BATCH || count <= -FBC_BATCH) {
 		spin_lock(&fbc->lock);
@@ -14,8 +15,8 @@
 		spin_unlock(&fbc->lock);
 		count = 0;
 	}
-	fbc->counters[cpu].count = count;
-	put_cpu();
+	*(long *)get_cpu_ptr(fbc->counters) = count;
+	put_cpu_ptr();
 }
 
 EXPORT_SYMBOL(percpu_counter_mod);

[Index of Archives]         [Linux RAID]     [Kernel Development]     [Red Hat Install]     [Video 4 Linux]     [Postgresql]     [Fedora]     [Gimp]     [Yosemite News]

  Powered by Linux