Re: [PATCH] quota: Make quota stat accounting lockless.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri 23-04-10 09:31:41, Dmitry Monakhov wrote:
> 
> I've written this patch long time ago, it is pretty simple, and
> allow more non obvious locking optimization to be done later.
  The patch looks OK to me. Will merge it.

								Honza

> From 234b13091fcb16966c2abcd936c8a0ed99822952 Mon Sep 17 00:00:00 2001
> From: Dmitry Monakhov <dmonakhov@xxxxxxxxxx>
> Date: Fri, 23 Apr 2010 09:06:41 +0400
> Subject: [PATCH] quota: Make quota stat accounting lockless.
> 
> Quota stats it mostly writable data structure. Let's alloc percpu
> bucket for each value.
> 
> NOTE: dqstats_read() function is racy against dqstats_{inc,dec}
> and may return inconsistent value. But this is ok since absolute
> accuracy is not required.
> 
> Signed-off-by: Dmitry Monakhov <dmonakhov@xxxxxxxxxx>
> ---
>  fs/quota/dquot.c      |  122 +++++++++++++++++++++++++++++++++++--------------
>  fs/quota/quota_tree.c |    4 +-
>  fs/quota/quota_v1.c   |    4 +-
>  include/linux/quota.h |   26 ++++++----
>  4 files changed, 106 insertions(+), 50 deletions(-)
> 
> diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
> index 05c590e..aa78e26 100644
> --- a/fs/quota/dquot.c
> +++ b/fs/quota/dquot.c
> @@ -82,7 +82,7 @@
>  
>  /*
>   * There are three quota SMP locks. dq_list_lock protects all lists with quotas
> - * and quota formats, dqstats structure containing statistics about the lists
> + * and quota formats.
>   * dq_data_lock protects data from dq_dqb and also mem_dqinfo structures and
>   * also guards consistency of dquot->dq_dqb with inode->i_blocks, i_bytes.
>   * i_blocks and i_bytes updates itself are guarded by i_lock acquired directly
> @@ -132,6 +132,11 @@ static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_state_lock);
>  __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_data_lock);
>  EXPORT_SYMBOL(dq_data_lock);
>  
> +#ifdef CONFIG_SMP
> +struct dqstats *dqstats_pcpu;
> +#endif
> +struct dqstats  dqstats;
> +
>  static char *quotatypes[] = INITQFNAMES;
>  static struct quota_format_type *quota_formats;	/* List of registered formats */
>  static struct quota_module_name module_names[] = INIT_QUOTA_MODULE_NAMES;
> @@ -273,7 +278,7 @@ static struct dquot *find_dquot(unsigned int hashent, struct super_block *sb,
>  static inline void put_dquot_last(struct dquot *dquot)
>  {
>  	list_add_tail(&dquot->dq_free, &free_dquots);
> -	dqstats.free_dquots++;
> +	dqstats_inc(DQST_FREE_DQUOTS);
>  }
>  
>  static inline void remove_free_dquot(struct dquot *dquot)
> @@ -281,7 +286,7 @@ static inline void remove_free_dquot(struct dquot *dquot)
>  	if (list_empty(&dquot->dq_free))
>  		return;
>  	list_del_init(&dquot->dq_free);
> -	dqstats.free_dquots--;
> +	dqstats_dec(DQST_FREE_DQUOTS);
>  }
>  
>  static inline void put_inuse(struct dquot *dquot)
> @@ -289,12 +294,12 @@ static inline void put_inuse(struct dquot *dquot)
>  	/* We add to the back of inuse list so we don't have to restart
>  	 * when traversing this list and we block */
>  	list_add_tail(&dquot->dq_inuse, &inuse_list);
> -	dqstats.allocated_dquots++;
> +	dqstats_inc(DQST_ALLOC_DQUOTS);
>  }
>  
>  static inline void remove_inuse(struct dquot *dquot)
>  {
> -	dqstats.allocated_dquots--;
> +	dqstats_dec(DQST_ALLOC_DQUOTS);
>  	list_del(&dquot->dq_inuse);
>  }
>  /*
> @@ -559,8 +564,8 @@ int dquot_scan_active(struct super_block *sb,
>  			continue;
>  		/* Now we have active dquot so we can just increase use count */
>  		atomic_inc(&dquot->dq_count);
> -		dqstats.lookups++;
>  		spin_unlock(&dq_list_lock);
> +		dqstats_inc(DQST_LOOKUPS);
>  		dqput(old_dquot);
>  		old_dquot = dquot;
>  		ret = fn(dquot, priv);
> @@ -605,8 +610,8 @@ int vfs_quota_sync(struct super_block *sb, int type, int wait)
>   			 * holding reference so we can safely just increase
>  			 * use count */
>  			atomic_inc(&dquot->dq_count);
> -			dqstats.lookups++;
>  			spin_unlock(&dq_list_lock);
> +			dqstats_inc(DQST_LOOKUPS);
>  			sb->dq_op->write_dquot(dquot);
>  			dqput(dquot);
>  			spin_lock(&dq_list_lock);
> @@ -618,9 +623,7 @@ int vfs_quota_sync(struct super_block *sb, int type, int wait)
>  		if ((cnt == type || type == -1) && sb_has_quota_active(sb, cnt)
>  		    && info_dirty(&dqopt->info[cnt]))
>  			sb->dq_op->write_info(sb, cnt);
> -	spin_lock(&dq_list_lock);
> -	dqstats.syncs++;
> -	spin_unlock(&dq_list_lock);
> +	dqstats_inc(DQST_SYNCS);
>  	mutex_unlock(&dqopt->dqonoff_mutex);
>  
>  	if (!wait || (sb_dqopt(sb)->flags & DQUOT_QUOTA_SYS_FILE))
> @@ -684,7 +687,7 @@ static int shrink_dqcache_memory(int nr, gfp_t gfp_mask)
>  		prune_dqcache(nr);
>  		spin_unlock(&dq_list_lock);
>  	}
> -	return (dqstats.free_dquots / 100) * sysctl_vfs_cache_pressure;
> +	return (dqstats_read(DQST_FREE_DQUOTS)/100) * sysctl_vfs_cache_pressure;
>  }
>  
>  static struct shrinker dqcache_shrinker = {
> @@ -712,10 +715,7 @@ void dqput(struct dquot *dquot)
>  		BUG();
>  	}
>  #endif
> -	
> -	spin_lock(&dq_list_lock);
> -	dqstats.drops++;
> -	spin_unlock(&dq_list_lock);
> +	dqstats_inc(DQST_DROPS);
>  we_slept:
>  	spin_lock(&dq_list_lock);
>  	if (atomic_read(&dquot->dq_count) > 1) {
> @@ -832,15 +832,16 @@ we_slept:
>  		put_inuse(dquot);
>  		/* hash it first so it can be found */
>  		insert_dquot_hash(dquot);
> -		dqstats.lookups++;
>  		spin_unlock(&dq_list_lock);
> +		dqstats_inc(DQST_LOOKUPS);
> +
>  	} else {
>  		if (!atomic_read(&dquot->dq_count))
>  			remove_free_dquot(dquot);
>  		atomic_inc(&dquot->dq_count);
> -		dqstats.cache_hits++;
> -		dqstats.lookups++;
>  		spin_unlock(&dq_list_lock);
> +		dqstats_inc(DQST_CACHE_HITS);
> +		dqstats_inc(DQST_LOOKUPS);
>  	}
>  	/* Wait for dq_lock - after this we know that either dquot_release() is
>  	 * already finished or it will be canceled due to dq_count > 1 test */
> @@ -2474,68 +2475,112 @@ const struct quotactl_ops vfs_quotactl_ops = {
>  	.set_dqblk	= vfs_set_dqblk
>  };
>  
> +void dqstats_inc(unsigned int type)
> +{
> +#ifdef CONFIG_SMP
> +	per_cpu_ptr(dqstats_pcpu, smp_processor_id())->stat[type]++;
> +#else
> +	dqstats[type]++;
> +#endif
> +}
> +
> +void dqstats_dec(unsigned int type)
> +{
> +#ifdef CONFIG_SMP
> +	per_cpu_ptr(dqstats_pcpu, smp_processor_id())->stat[type]--;
> +#else
> +	dqstats[type]--;
> +#endif
> +}
> +
> +int dqstats_read(unsigned int type)
> +{
> +	int count = 0;
> +	int cpu;
> +#ifdef CONFIG_SMP
> +	for_each_possible_cpu(cpu)
> +		count += per_cpu_ptr(dqstats_pcpu, cpu)->stat[type];
> +	/* Statistics reading is racy, but absolute accuracy isn't required */
> +	if (count < 0)
> +		count = 0;
> +#else
> +	count = dqstats[type];
> +#endif
> +	return count;
> +}
> +
> +static int do_proc_dqstats(struct ctl_table *table, int write,
> +		     void __user *buffer, size_t *lenp, loff_t *ppos)
> +{
> +#ifdef CONFIG_SMP
> +	/* Update global table */
> +	unsigned int type = (int *)table->data - dqstats.stat;
> +	dqstats.stat[type] = dqstats_read(type);
> +#endif
> +	return proc_dointvec(table, write, buffer, lenp, ppos);
> +}
> +
>  static ctl_table fs_dqstats_table[] = {
>  	{
>  		.procname	= "lookups",
> -		.data		= &dqstats.lookups,
> +		.data		= &dqstats.stat[DQST_LOOKUPS],
>  		.maxlen		= sizeof(int),
>  		.mode		= 0444,
> -		.proc_handler	= proc_dointvec,
> +		.proc_handler	= do_proc_dqstats,
>  	},
>  	{
>  		.procname	= "drops",
> -		.data		= &dqstats.drops,
> +		.data		= &dqstats.stat[DQST_DROPS],
>  		.maxlen		= sizeof(int),
>  		.mode		= 0444,
> -		.proc_handler	= proc_dointvec,
> +		.proc_handler	= do_proc_dqstats,
>  	},
>  	{
>  		.procname	= "reads",
> -		.data		= &dqstats.reads,
> +		.data		= &dqstats.stat[DQST_READS],
>  		.maxlen		= sizeof(int),
>  		.mode		= 0444,
> -		.proc_handler	= proc_dointvec,
> +		.proc_handler	= do_proc_dqstats,
>  	},
>  	{
>  		.procname	= "writes",
> -		.data		= &dqstats.writes,
> +		.data		= &dqstats.stat[DQST_WRITES],
>  		.maxlen		= sizeof(int),
>  		.mode		= 0444,
> -		.proc_handler	= proc_dointvec,
> +		.proc_handler	= do_proc_dqstats,
>  	},
>  	{
>  		.procname	= "cache_hits",
> -		.data		= &dqstats.cache_hits,
> +		.data		= &dqstats.stat[DQST_CACHE_HITS],
>  		.maxlen		= sizeof(int),
>  		.mode		= 0444,
> -		.proc_handler	= proc_dointvec,
> +		.proc_handler	= do_proc_dqstats,
>  	},
>  	{
>  		.procname	= "allocated_dquots",
> -		.data		= &dqstats.allocated_dquots,
> +		.data		= &dqstats.stat[DQST_ALLOC_DQUOTS],
>  		.maxlen		= sizeof(int),
>  		.mode		= 0444,
> -		.proc_handler	= proc_dointvec,
> +		.proc_handler	= do_proc_dqstats,
>  	},
>  	{
>  		.procname	= "free_dquots",
> -		.data		= &dqstats.free_dquots,
> +		.data		= &dqstats.stat[DQST_FREE_DQUOTS],
>  		.maxlen		= sizeof(int),
>  		.mode		= 0444,
> -		.proc_handler	= proc_dointvec,
> +		.proc_handler	= do_proc_dqstats,
>  	},
>  	{
>  		.procname	= "syncs",
> -		.data		= &dqstats.syncs,
> +		.data		= &dqstats.stat[DQST_SYNCS],
>  		.maxlen		= sizeof(int),
>  		.mode		= 0444,
> -		.proc_handler	= proc_dointvec,
> +		.proc_handler	= do_proc_dqstats,
>  	},
>  #ifdef CONFIG_PRINT_QUOTA_WARNING
>  	{
>  		.procname	= "warnings",
>  		.data		= &flag_print_warnings,
> -		.maxlen		= sizeof(int),
>  		.mode		= 0644,
>  		.proc_handler	= proc_dointvec,
>  	},
> @@ -2581,6 +2626,13 @@ static int __init dquot_init(void)
>  	if (!dquot_hash)
>  		panic("Cannot create dquot hash table");
>  
> +#ifdef CONFIG_SMP
> +	dqstats_pcpu = alloc_percpu(struct dqstats);
> +	if (!dqstats_pcpu)
> +		panic("Cannot create dquot stats table");
> +#endif
> +	memset(&dqstats, 0, sizeof(struct dqstats));
> +
>  	/* Find power-of-two hlist_heads which can fit into allocation */
>  	nr_hash = (1UL << order) * PAGE_SIZE / sizeof(struct hlist_head);
>  	dq_hash_bits = 0;
> diff --git a/fs/quota/quota_tree.c b/fs/quota/quota_tree.c
> index f81f4bc..5b7f741 100644
> --- a/fs/quota/quota_tree.c
> +++ b/fs/quota/quota_tree.c
> @@ -384,7 +384,7 @@ int qtree_write_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
>  	} else {
>  		ret = 0;
>  	}
> -	dqstats.writes++;
> +	dqstats_inc(DQST_WRITES);
>  	kfree(ddquot);
>  
>  	return ret;
> @@ -634,7 +634,7 @@ int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
>  	spin_unlock(&dq_data_lock);
>  	kfree(ddquot);
>  out:
> -	dqstats.reads++;
> +	dqstats_inc(DQST_READS);
>  	return ret;
>  }
>  EXPORT_SYMBOL(qtree_read_dquot);
> diff --git a/fs/quota/quota_v1.c b/fs/quota/quota_v1.c
> index 2ae757e..4af344c 100644
> --- a/fs/quota/quota_v1.c
> +++ b/fs/quota/quota_v1.c
> @@ -71,7 +71,7 @@ static int v1_read_dqblk(struct dquot *dquot)
>  	    dquot->dq_dqb.dqb_ihardlimit == 0 &&
>  	    dquot->dq_dqb.dqb_isoftlimit == 0)
>  		set_bit(DQ_FAKE_B, &dquot->dq_flags);
> -	dqstats.reads++;
> +	dqstats_inc(DQST_READS);
>  
>  	return 0;
>  }
> @@ -104,7 +104,7 @@ static int v1_commit_dqblk(struct dquot *dquot)
>  	ret = 0;
>  
>  out:
> -	dqstats.writes++;
> +	dqstats_inc(DQST_WRITES);
>  
>  	return ret;
>  }
> diff --git a/include/linux/quota.h b/include/linux/quota.h
> index b462916..40e0157 100644
> --- a/include/linux/quota.h
> +++ b/include/linux/quota.h
> @@ -237,19 +237,23 @@ static inline int info_dirty(struct mem_dqinfo *info)
>  {
>  	return test_bit(DQF_INFO_DIRTY_B, &info->dqi_flags);
>  }
> -
> +enum {
> +	DQST_LOOKUPS,
> +	DQST_DROPS,
> +	DQST_READS,
> +	DQST_WRITES,
> +	DQST_CACHE_HITS,
> +	DQST_ALLOC_DQUOTS,
> +	DQST_FREE_DQUOTS,
> +	DQST_SYNCS,
> +	_DQST_DQSTAT_LAST
> +};
>  struct dqstats {
> -	int lookups;
> -	int drops;
> -	int reads;
> -	int writes;
> -	int cache_hits;
> -	int allocated_dquots;
> -	int free_dquots;
> -	int syncs;
> +	int stat[_DQST_DQSTAT_LAST];
>  };
> -
> -extern struct dqstats dqstats;
> +extern void dqstats_inc(unsigned int type);
> +extern void dqstats_dec(unsigned int type);
> +extern int dqstats_read(unsigned int type);
>  
>  #define DQ_MOD_B	0	/* dquot modified since read */
>  #define DQ_BLKS_B	1	/* uid/gid has been warned about blk limit */
> -- 
> 1.6.6.1
> 

-- 
Jan Kara <jack@xxxxxxx>
SUSE Labs, CR
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux