Jan Kara <jack@xxxxxxx> writes: > On Mon 26-04-10 17:16:23, Jan Kara wrote: >> On Fri 23-04-10 09:31:41, Dmitry Monakhov wrote: >> > >> > I've written this patch long time ago, it is pretty simple, and >> > allow more non obvious locking optimization to be done later. >> The patch looks OK to me. Will merge it. > Hmm, maybe I spoke to soon. One question: > >> > +void dqstats_inc(unsigned int type) >> > +{ >> > +#ifdef CONFIG_SMP >> > + per_cpu_ptr(dqstats_pcpu, smp_processor_id())->stat[type]++; >> > +#else >> > + dqstats[type]++; >> > +#endif >> > +} >> > + >> > +void dqstats_dec(unsigned int type) >> > +{ >> > +#ifdef CONFIG_SMP >> > + per_cpu_ptr(dqstats_pcpu, smp_processor_id())->stat[type]--; >> > +#else >> > + dqstats[type]--; >> > +#endif >> > +} > Why not make these two functions inline? Since they are used > also from quota_tree.c and quota_v1.c, we'd need to move them to a > quota.h header in that case. My initial goal was to keep inc/dec/read helpers together. We still need to export dqstats_pcpu. But if you prefer this style i'm ok with it. New version attached.
>From 6d3b422f18882896df1a862d081209c1938bc7a4 Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov <dmonakhov@xxxxxxxxxx> Date: Mon, 26 Apr 2010 20:03:33 +0400 Subject: [PATCH] quota: Make quota stat accounting lockless. Quota stats it mostly writable data structure. Let's alloc percpu bucket for each value. NOTE: dqstats_read() function is racy against dqstats_{inc,dec} and may return inconsistent value. But this is ok since absolute accuracy is not required. Signed-off-by: Dmitry Monakhov <dmonakhov@xxxxxxxxxx> --- fs/quota/dquot.c | 106 ++++++++++++++++++++++++++++++++----------------- fs/quota/quota_tree.c | 4 +- fs/quota/quota_v1.c | 4 +- include/linux/quota.h | 41 ++++++++++++++----- 4 files changed, 105 insertions(+), 50 deletions(-) diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 05c590e..244e198 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -82,7 +82,7 @@ /* * There are three quota SMP locks. dq_list_lock protects all lists with quotas - * and quota formats, dqstats structure containing statistics about the lists + * and quota formats. * dq_data_lock protects data from dq_dqb and also mem_dqinfo structures and * also guards consistency of dquot->dq_dqb with inode->i_blocks, i_bytes. * i_blocks and i_bytes updates itself are guarded by i_lock acquired directly @@ -131,7 +131,11 @@ static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_list_lock); static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_state_lock); __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_data_lock); EXPORT_SYMBOL(dq_data_lock); - +#ifdef CONFIG_SMP +struct dqstats *dqstats_pcpu; +EXPORT_SYMBOL(dqstats_pcpu); +#endif +struct dqstats dqstats; static char *quotatypes[] = INITQFNAMES; static struct quota_format_type *quota_formats; /* List of registered formats */ static struct quota_module_name module_names[] = INIT_QUOTA_MODULE_NAMES; @@ -273,7 +277,7 @@ static struct dquot *find_dquot(unsigned int hashent, struct super_block *sb, static inline void put_dquot_last(struct dquot *dquot) { list_add_tail(&dquot->dq_free, &free_dquots); - dqstats.free_dquots++; + dqstats_inc(DQST_FREE_DQUOTS); } static inline void remove_free_dquot(struct dquot *dquot) @@ -281,7 +285,7 @@ static inline void remove_free_dquot(struct dquot *dquot) if (list_empty(&dquot->dq_free)) return; list_del_init(&dquot->dq_free); - dqstats.free_dquots--; + dqstats_dec(DQST_FREE_DQUOTS); } static inline void put_inuse(struct dquot *dquot) @@ -289,12 +293,12 @@ static inline void put_inuse(struct dquot *dquot) /* We add to the back of inuse list so we don't have to restart * when traversing this list and we block */ list_add_tail(&dquot->dq_inuse, &inuse_list); - dqstats.allocated_dquots++; + dqstats_inc(DQST_ALLOC_DQUOTS); } static inline void remove_inuse(struct dquot *dquot) { - dqstats.allocated_dquots--; + dqstats_dec(DQST_ALLOC_DQUOTS); list_del(&dquot->dq_inuse); } /* @@ -559,8 +563,8 @@ int dquot_scan_active(struct super_block *sb, continue; /* Now we have active dquot so we can just increase use count */ atomic_inc(&dquot->dq_count); - dqstats.lookups++; spin_unlock(&dq_list_lock); + dqstats_inc(DQST_LOOKUPS); dqput(old_dquot); old_dquot = dquot; ret = fn(dquot, priv); @@ -605,8 +609,8 @@ int vfs_quota_sync(struct super_block *sb, int type, int wait) * holding reference so we can safely just increase * use count */ atomic_inc(&dquot->dq_count); - dqstats.lookups++; spin_unlock(&dq_list_lock); + dqstats_inc(DQST_LOOKUPS); sb->dq_op->write_dquot(dquot); dqput(dquot); spin_lock(&dq_list_lock); @@ -618,9 +622,7 @@ int vfs_quota_sync(struct super_block *sb, int type, int wait) if ((cnt == type || type == -1) && sb_has_quota_active(sb, cnt) && info_dirty(&dqopt->info[cnt])) sb->dq_op->write_info(sb, cnt); - spin_lock(&dq_list_lock); - dqstats.syncs++; - spin_unlock(&dq_list_lock); + dqstats_inc(DQST_SYNCS); mutex_unlock(&dqopt->dqonoff_mutex); if (!wait || (sb_dqopt(sb)->flags & DQUOT_QUOTA_SYS_FILE)) @@ -672,6 +674,22 @@ static void prune_dqcache(int count) } } +static int dqstats_read(unsigned int type) +{ + int count = 0; + int cpu; +#ifdef CONFIG_SMP + for_each_possible_cpu(cpu) + count += per_cpu_ptr(dqstats_pcpu, cpu)->stat[type]; + /* Statistics reading is racy, but absolute accuracy isn't required */ + if (count < 0) + count = 0; +#else + count = dqstats[type]; +#endif + return count; +} + /* * This is called from kswapd when we think we need some * more memory @@ -684,7 +702,7 @@ static int shrink_dqcache_memory(int nr, gfp_t gfp_mask) prune_dqcache(nr); spin_unlock(&dq_list_lock); } - return (dqstats.free_dquots / 100) * sysctl_vfs_cache_pressure; + return (dqstats_read(DQST_FREE_DQUOTS)/100) * sysctl_vfs_cache_pressure; } static struct shrinker dqcache_shrinker = { @@ -712,10 +730,7 @@ void dqput(struct dquot *dquot) BUG(); } #endif - - spin_lock(&dq_list_lock); - dqstats.drops++; - spin_unlock(&dq_list_lock); + dqstats_inc(DQST_DROPS); we_slept: spin_lock(&dq_list_lock); if (atomic_read(&dquot->dq_count) > 1) { @@ -832,15 +847,16 @@ we_slept: put_inuse(dquot); /* hash it first so it can be found */ insert_dquot_hash(dquot); - dqstats.lookups++; spin_unlock(&dq_list_lock); + dqstats_inc(DQST_LOOKUPS); + } else { if (!atomic_read(&dquot->dq_count)) remove_free_dquot(dquot); atomic_inc(&dquot->dq_count); - dqstats.cache_hits++; - dqstats.lookups++; spin_unlock(&dq_list_lock); + dqstats_inc(DQST_CACHE_HITS); + dqstats_inc(DQST_LOOKUPS); } /* Wait for dq_lock - after this we know that either dquot_release() is * already finished or it will be canceled due to dq_count > 1 test */ @@ -2474,68 +2490,79 @@ const struct quotactl_ops vfs_quotactl_ops = { .set_dqblk = vfs_set_dqblk }; + +static int do_proc_dqstats(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ +#ifdef CONFIG_SMP + /* Update global table */ + unsigned int type = (int *)table->data - dqstats.stat; + dqstats.stat[type] = dqstats_read(type); +#endif + return proc_dointvec(table, write, buffer, lenp, ppos); +} + static ctl_table fs_dqstats_table[] = { { .procname = "lookups", - .data = &dqstats.lookups, + .data = &dqstats.stat[DQST_LOOKUPS], .maxlen = sizeof(int), .mode = 0444, - .proc_handler = proc_dointvec, + .proc_handler = do_proc_dqstats, }, { .procname = "drops", - .data = &dqstats.drops, + .data = &dqstats.stat[DQST_DROPS], .maxlen = sizeof(int), .mode = 0444, - .proc_handler = proc_dointvec, + .proc_handler = do_proc_dqstats, }, { .procname = "reads", - .data = &dqstats.reads, + .data = &dqstats.stat[DQST_READS], .maxlen = sizeof(int), .mode = 0444, - .proc_handler = proc_dointvec, + .proc_handler = do_proc_dqstats, }, { .procname = "writes", - .data = &dqstats.writes, + .data = &dqstats.stat[DQST_WRITES], .maxlen = sizeof(int), .mode = 0444, - .proc_handler = proc_dointvec, + .proc_handler = do_proc_dqstats, }, { .procname = "cache_hits", - .data = &dqstats.cache_hits, + .data = &dqstats.stat[DQST_CACHE_HITS], .maxlen = sizeof(int), .mode = 0444, - .proc_handler = proc_dointvec, + .proc_handler = do_proc_dqstats, }, { .procname = "allocated_dquots", - .data = &dqstats.allocated_dquots, + .data = &dqstats.stat[DQST_ALLOC_DQUOTS], .maxlen = sizeof(int), .mode = 0444, - .proc_handler = proc_dointvec, + .proc_handler = do_proc_dqstats, }, { .procname = "free_dquots", - .data = &dqstats.free_dquots, + .data = &dqstats.stat[DQST_FREE_DQUOTS], .maxlen = sizeof(int), .mode = 0444, - .proc_handler = proc_dointvec, + .proc_handler = do_proc_dqstats, }, { .procname = "syncs", - .data = &dqstats.syncs, + .data = &dqstats.stat[DQST_SYNCS], .maxlen = sizeof(int), .mode = 0444, - .proc_handler = proc_dointvec, + .proc_handler = do_proc_dqstats, }, #ifdef CONFIG_PRINT_QUOTA_WARNING { .procname = "warnings", .data = &flag_print_warnings, - .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, @@ -2581,6 +2608,13 @@ static int __init dquot_init(void) if (!dquot_hash) panic("Cannot create dquot hash table"); +#ifdef CONFIG_SMP + dqstats_pcpu = alloc_percpu(struct dqstats); + if (!dqstats_pcpu) + panic("Cannot create dquot stats table"); +#endif + memset(&dqstats, 0, sizeof(struct dqstats)); + /* Find power-of-two hlist_heads which can fit into allocation */ nr_hash = (1UL << order) * PAGE_SIZE / sizeof(struct hlist_head); dq_hash_bits = 0; diff --git a/fs/quota/quota_tree.c b/fs/quota/quota_tree.c index f81f4bc..5b7f741 100644 --- a/fs/quota/quota_tree.c +++ b/fs/quota/quota_tree.c @@ -384,7 +384,7 @@ int qtree_write_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot) } else { ret = 0; } - dqstats.writes++; + dqstats_inc(DQST_WRITES); kfree(ddquot); return ret; @@ -634,7 +634,7 @@ int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot) spin_unlock(&dq_data_lock); kfree(ddquot); out: - dqstats.reads++; + dqstats_inc(DQST_READS); return ret; } EXPORT_SYMBOL(qtree_read_dquot); diff --git a/fs/quota/quota_v1.c b/fs/quota/quota_v1.c index 2ae757e..4af344c 100644 --- a/fs/quota/quota_v1.c +++ b/fs/quota/quota_v1.c @@ -71,7 +71,7 @@ static int v1_read_dqblk(struct dquot *dquot) dquot->dq_dqb.dqb_ihardlimit == 0 && dquot->dq_dqb.dqb_isoftlimit == 0) set_bit(DQ_FAKE_B, &dquot->dq_flags); - dqstats.reads++; + dqstats_inc(DQST_READS); return 0; } @@ -104,7 +104,7 @@ static int v1_commit_dqblk(struct dquot *dquot) ret = 0; out: - dqstats.writes++; + dqstats_inc(DQST_WRITES); return ret; } diff --git a/include/linux/quota.h b/include/linux/quota.h index b462916..0f5b4a7 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -174,6 +174,8 @@ enum { #include <linux/rwsem.h> #include <linux/spinlock.h> #include <linux/wait.h> +#include <linux/percpu.h> +#include <linux/smp.h> #include <linux/dqblk_xfs.h> #include <linux/dqblk_v1.h> @@ -237,19 +239,38 @@ static inline int info_dirty(struct mem_dqinfo *info) { return test_bit(DQF_INFO_DIRTY_B, &info->dqi_flags); } - +enum { + DQST_LOOKUPS, + DQST_DROPS, + DQST_READS, + DQST_WRITES, + DQST_CACHE_HITS, + DQST_ALLOC_DQUOTS, + DQST_FREE_DQUOTS, + DQST_SYNCS, + _DQST_DQSTAT_LAST +}; struct dqstats { - int lookups; - int drops; - int reads; - int writes; - int cache_hits; - int allocated_dquots; - int free_dquots; - int syncs; + int stat[_DQST_DQSTAT_LAST]; }; -extern struct dqstats dqstats; +extern struct dqstats *dqstats_pcpu; +static inline void dqstats_inc(unsigned int type) +{ +#ifdef CONFIG_SMP + per_cpu_ptr(dqstats_pcpu, smp_processor_id())->stat[type]++; +#else + dqstats[type]++; +#endif +} +static inline void dqstats_dec(unsigned int type) +{ +#ifdef CONFIG_SMP + per_cpu_ptr(dqstats_pcpu, smp_processor_id())->stat[type]--; +#else + dqstats[type]--; +#endif +} #define DQ_MOD_B 0 /* dquot modified since read */ #define DQ_BLKS_B 1 /* uid/gid has been warned about blk limit */ -- 1.6.6.1