On Thu, Dec 11, 2008 at 11:38:56PM +0100, Eric Dumazet wrote: > Adding a percpu_counter nr_dentry avoids cache line ping pongs > between cpus to maintain this metric, and dcache_lock is > no more needed to protect dentry_stat.nr_dentry > > We centralize nr_dentry updates at the right place : > - increments in d_alloc() > - decrements in d_free() > > d_alloc() can avoid taking dcache_lock if parent is NULL > > ("socketallocbench -n8" result : 27.5s to 25s) Looks good! (At least once I realised that nr_dentry was global rather than per-dentry!!!) Reviewed-by: Paul E. McKenney <paulmck@xxxxxxxxxxxxxxxxxx> > Signed-off-by: Eric Dumazet <dada1@xxxxxxxxxxxxx> > --- > fs/dcache.c | 49 +++++++++++++++++++++++++------------------ > include/linux/fs.h | 2 + > kernel/sysctl.c | 2 - > 3 files changed, 32 insertions(+), 21 deletions(-) > > diff --git a/fs/dcache.c b/fs/dcache.c > index fa1ba03..f463a81 100644 > --- a/fs/dcache.c > +++ b/fs/dcache.c > @@ -61,12 +61,31 @@ static struct kmem_cache *dentry_cache __read_mostly; > static unsigned int d_hash_mask __read_mostly; > static unsigned int d_hash_shift __read_mostly; > static struct hlist_head *dentry_hashtable __read_mostly; > +static struct percpu_counter nr_dentry; > > /* Statistics gathering. */ > struct dentry_stat_t dentry_stat = { > .age_limit = 45, > }; > > +/* > + * Handle nr_dentry sysctl > + */ > +#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS) > +int proc_nr_dentry(ctl_table *table, int write, struct file *filp, > + void __user *buffer, size_t *lenp, loff_t *ppos) > +{ > + dentry_stat.nr_dentry = percpu_counter_sum_positive(&nr_dentry); > + return proc_dointvec(table, write, filp, buffer, lenp, ppos); > +} > +#else > +int proc_nr_dentry(ctl_table *table, int write, struct file *filp, > + void __user *buffer, size_t *lenp, loff_t *ppos) > +{ > + return -ENOSYS; > +} > +#endif > + > static void __d_free(struct dentry *dentry) > { > WARN_ON(!list_empty(&dentry->d_alias)); > @@ -82,8 +101,7 @@ static void d_callback(struct rcu_head *head) > } > > /* > - * no dcache_lock, please. The caller must decrement dentry_stat.nr_dentry > - * inside dcache_lock. > + * no dcache_lock, please. > */ > static void d_free(struct dentry *dentry) > { > @@ -94,6 +112,7 @@ static void d_free(struct dentry *dentry) > __d_free(dentry); > else > call_rcu(&dentry->d_u.d_rcu, d_callback); > + percpu_counter_dec(&nr_dentry); > } > > /* > @@ -172,7 +191,6 @@ static struct dentry *d_kill(struct dentry *dentry) > struct dentry *parent; > > list_del(&dentry->d_u.d_child); > - dentry_stat.nr_dentry--; /* For d_free, below */ > /*drops the locks, at that point nobody can reach this dentry */ > dentry_iput(dentry); > if (IS_ROOT(dentry)) > @@ -619,7 +637,6 @@ void shrink_dcache_sb(struct super_block * sb) > static void shrink_dcache_for_umount_subtree(struct dentry *dentry) > { > struct dentry *parent; > - unsigned detached = 0; > > BUG_ON(!IS_ROOT(dentry)); > > @@ -678,7 +695,6 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) > } > > list_del(&dentry->d_u.d_child); > - detached++; > > inode = dentry->d_inode; > if (inode) { > @@ -696,7 +712,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) > * otherwise we ascend to the parent and move to the > * next sibling if there is one */ > if (!parent) > - goto out; > + return; > > dentry = parent; > > @@ -705,11 +721,6 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) > dentry = list_entry(dentry->d_subdirs.next, > struct dentry, d_u.d_child); > } > -out: > - /* several dentries were freed, need to correct nr_dentry */ > - spin_lock(&dcache_lock); > - dentry_stat.nr_dentry -= detached; > - spin_unlock(&dcache_lock); > } > > /* > @@ -943,8 +954,6 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) > dentry->d_flags = DCACHE_UNHASHED; > spin_lock_init(&dentry->d_lock); > dentry->d_inode = NULL; > - dentry->d_parent = NULL; > - dentry->d_sb = NULL; > dentry->d_op = NULL; > dentry->d_fsdata = NULL; > dentry->d_mounted = 0; > @@ -959,16 +968,15 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) > if (parent) { > dentry->d_parent = dget(parent); > dentry->d_sb = parent->d_sb; > + spin_lock(&dcache_lock); > + list_add(&dentry->d_u.d_child, &parent->d_subdirs); > + spin_unlock(&dcache_lock); > } else { > + dentry->d_parent = NULL; > + dentry->d_sb = NULL; > INIT_LIST_HEAD(&dentry->d_u.d_child); > } > - > - spin_lock(&dcache_lock); > - if (parent) > - list_add(&dentry->d_u.d_child, &parent->d_subdirs); > - dentry_stat.nr_dentry++; > - spin_unlock(&dcache_lock); > - > + percpu_counter_inc(&nr_dentry); > return dentry; > } > > @@ -2282,6 +2290,7 @@ static void __init dcache_init(void) > { > int loop; > > + percpu_counter_init(&nr_dentry, 0); > /* > * A constructor could be added for stable state like the lists, > * but it is probably not worth it because of the cache nature > diff --git a/include/linux/fs.h b/include/linux/fs.h > index 4a853ef..114cb65 100644 > --- a/include/linux/fs.h > +++ b/include/linux/fs.h > @@ -2217,6 +2217,8 @@ static inline void free_secdata(void *secdata) > struct ctl_table; > int proc_nr_files(struct ctl_table *table, int write, struct file *filp, > void __user *buffer, size_t *lenp, loff_t *ppos); > +int proc_nr_dentry(struct ctl_table *table, int write, struct file *filp, > + void __user *buffer, size_t *lenp, loff_t *ppos); > > int get_filesystem_list(char * buf); > > diff --git a/kernel/sysctl.c b/kernel/sysctl.c > index 3d56fe7..777bee7 100644 > --- a/kernel/sysctl.c > +++ b/kernel/sysctl.c > @@ -1246,7 +1246,7 @@ static struct ctl_table fs_table[] = { > .data = &dentry_stat, > .maxlen = 6*sizeof(int), > .mode = 0444, > - .proc_handler = &proc_dointvec, > + .proc_handler = &proc_nr_dentry, > }, > { > .ctl_name = FS_OVERFLOWUID, -- To unsubscribe from this list: send the line "unsubscribe kernel-testers" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html