On Fri 23-04-10 09:31:41, Dmitry Monakhov wrote: > > I've written this patch long time ago, it is pretty simple, and > allow more non obvious locking optimization to be done later. The patch looks OK to me. Will merge it. Honza > From 234b13091fcb16966c2abcd936c8a0ed99822952 Mon Sep 17 00:00:00 2001 > From: Dmitry Monakhov <dmonakhov@xxxxxxxxxx> > Date: Fri, 23 Apr 2010 09:06:41 +0400 > Subject: [PATCH] quota: Make quota stat accounting lockless. > > Quota stats it mostly writable data structure. Let's alloc percpu > bucket for each value. > > NOTE: dqstats_read() function is racy against dqstats_{inc,dec} > and may return inconsistent value. But this is ok since absolute > accuracy is not required. > > Signed-off-by: Dmitry Monakhov <dmonakhov@xxxxxxxxxx> > --- > fs/quota/dquot.c | 122 +++++++++++++++++++++++++++++++++++-------------- > fs/quota/quota_tree.c | 4 +- > fs/quota/quota_v1.c | 4 +- > include/linux/quota.h | 26 ++++++---- > 4 files changed, 106 insertions(+), 50 deletions(-) > > diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c > index 05c590e..aa78e26 100644 > --- a/fs/quota/dquot.c > +++ b/fs/quota/dquot.c > @@ -82,7 +82,7 @@ > > /* > * There are three quota SMP locks. dq_list_lock protects all lists with quotas > - * and quota formats, dqstats structure containing statistics about the lists > + * and quota formats. > * dq_data_lock protects data from dq_dqb and also mem_dqinfo structures and > * also guards consistency of dquot->dq_dqb with inode->i_blocks, i_bytes. > * i_blocks and i_bytes updates itself are guarded by i_lock acquired directly > @@ -132,6 +132,11 @@ static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_state_lock); > __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_data_lock); > EXPORT_SYMBOL(dq_data_lock); > > +#ifdef CONFIG_SMP > +struct dqstats *dqstats_pcpu; > +#endif > +struct dqstats dqstats; > + > static char *quotatypes[] = INITQFNAMES; > static struct quota_format_type *quota_formats; /* List of registered formats */ > static struct quota_module_name module_names[] = INIT_QUOTA_MODULE_NAMES; > @@ -273,7 +278,7 @@ static struct dquot *find_dquot(unsigned int hashent, struct super_block *sb, > static inline void put_dquot_last(struct dquot *dquot) > { > list_add_tail(&dquot->dq_free, &free_dquots); > - dqstats.free_dquots++; > + dqstats_inc(DQST_FREE_DQUOTS); > } > > static inline void remove_free_dquot(struct dquot *dquot) > @@ -281,7 +286,7 @@ static inline void remove_free_dquot(struct dquot *dquot) > if (list_empty(&dquot->dq_free)) > return; > list_del_init(&dquot->dq_free); > - dqstats.free_dquots--; > + dqstats_dec(DQST_FREE_DQUOTS); > } > > static inline void put_inuse(struct dquot *dquot) > @@ -289,12 +294,12 @@ static inline void put_inuse(struct dquot *dquot) > /* We add to the back of inuse list so we don't have to restart > * when traversing this list and we block */ > list_add_tail(&dquot->dq_inuse, &inuse_list); > - dqstats.allocated_dquots++; > + dqstats_inc(DQST_ALLOC_DQUOTS); > } > > static inline void remove_inuse(struct dquot *dquot) > { > - dqstats.allocated_dquots--; > + dqstats_dec(DQST_ALLOC_DQUOTS); > list_del(&dquot->dq_inuse); > } > /* > @@ -559,8 +564,8 @@ int dquot_scan_active(struct super_block *sb, > continue; > /* Now we have active dquot so we can just increase use count */ > atomic_inc(&dquot->dq_count); > - dqstats.lookups++; > spin_unlock(&dq_list_lock); > + dqstats_inc(DQST_LOOKUPS); > dqput(old_dquot); > old_dquot = dquot; > ret = fn(dquot, priv); > @@ -605,8 +610,8 @@ int vfs_quota_sync(struct super_block *sb, int type, int wait) > * holding reference so we can safely just increase > * use count */ > atomic_inc(&dquot->dq_count); > - dqstats.lookups++; > spin_unlock(&dq_list_lock); > + dqstats_inc(DQST_LOOKUPS); > sb->dq_op->write_dquot(dquot); > dqput(dquot); > spin_lock(&dq_list_lock); > @@ -618,9 +623,7 @@ int vfs_quota_sync(struct super_block *sb, int type, int wait) > if ((cnt == type || type == -1) && sb_has_quota_active(sb, cnt) > && info_dirty(&dqopt->info[cnt])) > sb->dq_op->write_info(sb, cnt); > - spin_lock(&dq_list_lock); > - dqstats.syncs++; > - spin_unlock(&dq_list_lock); > + dqstats_inc(DQST_SYNCS); > mutex_unlock(&dqopt->dqonoff_mutex); > > if (!wait || (sb_dqopt(sb)->flags & DQUOT_QUOTA_SYS_FILE)) > @@ -684,7 +687,7 @@ static int shrink_dqcache_memory(int nr, gfp_t gfp_mask) > prune_dqcache(nr); > spin_unlock(&dq_list_lock); > } > - return (dqstats.free_dquots / 100) * sysctl_vfs_cache_pressure; > + return (dqstats_read(DQST_FREE_DQUOTS)/100) * sysctl_vfs_cache_pressure; > } > > static struct shrinker dqcache_shrinker = { > @@ -712,10 +715,7 @@ void dqput(struct dquot *dquot) > BUG(); > } > #endif > - > - spin_lock(&dq_list_lock); > - dqstats.drops++; > - spin_unlock(&dq_list_lock); > + dqstats_inc(DQST_DROPS); > we_slept: > spin_lock(&dq_list_lock); > if (atomic_read(&dquot->dq_count) > 1) { > @@ -832,15 +832,16 @@ we_slept: > put_inuse(dquot); > /* hash it first so it can be found */ > insert_dquot_hash(dquot); > - dqstats.lookups++; > spin_unlock(&dq_list_lock); > + dqstats_inc(DQST_LOOKUPS); > + > } else { > if (!atomic_read(&dquot->dq_count)) > remove_free_dquot(dquot); > atomic_inc(&dquot->dq_count); > - dqstats.cache_hits++; > - dqstats.lookups++; > spin_unlock(&dq_list_lock); > + dqstats_inc(DQST_CACHE_HITS); > + dqstats_inc(DQST_LOOKUPS); > } > /* Wait for dq_lock - after this we know that either dquot_release() is > * already finished or it will be canceled due to dq_count > 1 test */ > @@ -2474,68 +2475,112 @@ const struct quotactl_ops vfs_quotactl_ops = { > .set_dqblk = vfs_set_dqblk > }; > > +void dqstats_inc(unsigned int type) > +{ > +#ifdef CONFIG_SMP > + per_cpu_ptr(dqstats_pcpu, smp_processor_id())->stat[type]++; > +#else > + dqstats[type]++; > +#endif > +} > + > +void dqstats_dec(unsigned int type) > +{ > +#ifdef CONFIG_SMP > + per_cpu_ptr(dqstats_pcpu, smp_processor_id())->stat[type]--; > +#else > + dqstats[type]--; > +#endif > +} > + > +int dqstats_read(unsigned int type) > +{ > + int count = 0; > + int cpu; > +#ifdef CONFIG_SMP > + for_each_possible_cpu(cpu) > + count += per_cpu_ptr(dqstats_pcpu, cpu)->stat[type]; > + /* Statistics reading is racy, but absolute accuracy isn't required */ > + if (count < 0) > + count = 0; > +#else > + count = dqstats[type]; > +#endif > + return count; > +} > + > +static int do_proc_dqstats(struct ctl_table *table, int write, > + void __user *buffer, size_t *lenp, loff_t *ppos) > +{ > +#ifdef CONFIG_SMP > + /* Update global table */ > + unsigned int type = (int *)table->data - dqstats.stat; > + dqstats.stat[type] = dqstats_read(type); > +#endif > + return proc_dointvec(table, write, buffer, lenp, ppos); > +} > + > static ctl_table fs_dqstats_table[] = { > { > .procname = "lookups", > - .data = &dqstats.lookups, > + .data = &dqstats.stat[DQST_LOOKUPS], > .maxlen = sizeof(int), > .mode = 0444, > - .proc_handler = proc_dointvec, > + .proc_handler = do_proc_dqstats, > }, > { > .procname = "drops", > - .data = &dqstats.drops, > + .data = &dqstats.stat[DQST_DROPS], > .maxlen = sizeof(int), > .mode = 0444, > - .proc_handler = proc_dointvec, > + .proc_handler = do_proc_dqstats, > }, > { > .procname = "reads", > - .data = &dqstats.reads, > + .data = &dqstats.stat[DQST_READS], > .maxlen = sizeof(int), > .mode = 0444, > - .proc_handler = proc_dointvec, > + .proc_handler = do_proc_dqstats, > }, > { > .procname = "writes", > - .data = &dqstats.writes, > + .data = &dqstats.stat[DQST_WRITES], > .maxlen = sizeof(int), > .mode = 0444, > - .proc_handler = proc_dointvec, > + .proc_handler = do_proc_dqstats, > }, > { > .procname = "cache_hits", > - .data = &dqstats.cache_hits, > + .data = &dqstats.stat[DQST_CACHE_HITS], > .maxlen = sizeof(int), > .mode = 0444, > - .proc_handler = proc_dointvec, > + .proc_handler = do_proc_dqstats, > }, > { > .procname = "allocated_dquots", > - .data = &dqstats.allocated_dquots, > + .data = &dqstats.stat[DQST_ALLOC_DQUOTS], > .maxlen = sizeof(int), > .mode = 0444, > - .proc_handler = proc_dointvec, > + .proc_handler = do_proc_dqstats, > }, > { > .procname = "free_dquots", > - .data = &dqstats.free_dquots, > + .data = &dqstats.stat[DQST_FREE_DQUOTS], > .maxlen = sizeof(int), > .mode = 0444, > - .proc_handler = proc_dointvec, > + .proc_handler = do_proc_dqstats, > }, > { > .procname = "syncs", > - .data = &dqstats.syncs, > + .data = &dqstats.stat[DQST_SYNCS], > .maxlen = sizeof(int), > .mode = 0444, > - .proc_handler = proc_dointvec, > + .proc_handler = do_proc_dqstats, > }, > #ifdef CONFIG_PRINT_QUOTA_WARNING > { > .procname = "warnings", > .data = &flag_print_warnings, > - .maxlen = sizeof(int), > .mode = 0644, > .proc_handler = proc_dointvec, > }, > @@ -2581,6 +2626,13 @@ static int __init dquot_init(void) > if (!dquot_hash) > panic("Cannot create dquot hash table"); > > +#ifdef CONFIG_SMP > + dqstats_pcpu = alloc_percpu(struct dqstats); > + if (!dqstats_pcpu) > + panic("Cannot create dquot stats table"); > +#endif > + memset(&dqstats, 0, sizeof(struct dqstats)); > + > /* Find power-of-two hlist_heads which can fit into allocation */ > nr_hash = (1UL << order) * PAGE_SIZE / sizeof(struct hlist_head); > dq_hash_bits = 0; > diff --git a/fs/quota/quota_tree.c b/fs/quota/quota_tree.c > index f81f4bc..5b7f741 100644 > --- a/fs/quota/quota_tree.c > +++ b/fs/quota/quota_tree.c > @@ -384,7 +384,7 @@ int qtree_write_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot) > } else { > ret = 0; > } > - dqstats.writes++; > + dqstats_inc(DQST_WRITES); > kfree(ddquot); > > return ret; > @@ -634,7 +634,7 @@ int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot) > spin_unlock(&dq_data_lock); > kfree(ddquot); > out: > - dqstats.reads++; > + dqstats_inc(DQST_READS); > return ret; > } > EXPORT_SYMBOL(qtree_read_dquot); > diff --git a/fs/quota/quota_v1.c b/fs/quota/quota_v1.c > index 2ae757e..4af344c 100644 > --- a/fs/quota/quota_v1.c > +++ b/fs/quota/quota_v1.c > @@ -71,7 +71,7 @@ static int v1_read_dqblk(struct dquot *dquot) > dquot->dq_dqb.dqb_ihardlimit == 0 && > dquot->dq_dqb.dqb_isoftlimit == 0) > set_bit(DQ_FAKE_B, &dquot->dq_flags); > - dqstats.reads++; > + dqstats_inc(DQST_READS); > > return 0; > } > @@ -104,7 +104,7 @@ static int v1_commit_dqblk(struct dquot *dquot) > ret = 0; > > out: > - dqstats.writes++; > + dqstats_inc(DQST_WRITES); > > return ret; > } > diff --git a/include/linux/quota.h b/include/linux/quota.h > index b462916..40e0157 100644 > --- a/include/linux/quota.h > +++ b/include/linux/quota.h > @@ -237,19 +237,23 @@ static inline int info_dirty(struct mem_dqinfo *info) > { > return test_bit(DQF_INFO_DIRTY_B, &info->dqi_flags); > } > - > +enum { > + DQST_LOOKUPS, > + DQST_DROPS, > + DQST_READS, > + DQST_WRITES, > + DQST_CACHE_HITS, > + DQST_ALLOC_DQUOTS, > + DQST_FREE_DQUOTS, > + DQST_SYNCS, > + _DQST_DQSTAT_LAST > +}; > struct dqstats { > - int lookups; > - int drops; > - int reads; > - int writes; > - int cache_hits; > - int allocated_dquots; > - int free_dquots; > - int syncs; > + int stat[_DQST_DQSTAT_LAST]; > }; > - > -extern struct dqstats dqstats; > +extern void dqstats_inc(unsigned int type); > +extern void dqstats_dec(unsigned int type); > +extern int dqstats_read(unsigned int type); > > #define DQ_MOD_B 0 /* dquot modified since read */ > #define DQ_BLKS_B 1 /* uid/gid has been warned about blk limit */ > -- > 1.6.6.1 > -- Jan Kara <jack@xxxxxxx> SUSE Labs, CR -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html