[PATCH 2/3] xfs: dquot refcounting by atomics

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Dave Chinner <dchinner@xxxxxxxxxx>

On concurrent workloads, the dquot lock completely serialises the
workload. One of the contributors to that is that taking a reference
count on the dquot requires taking the dquot lock. If we make the
reference count atomic, we don't need to take the lock to bump the
count.

Profiles showed that the reference count locking really hurt:

-   5.02%  [kernel]  [k] __mutex_lock_slowpath
   - __mutex_lock_slowpath
      - 99.66% mutex_lock
         - 31.04% xfs_qm_vop_create_dqattach
         - 30.03% xfs_qm_vop_dqalloc
         - 20.56% xfs_qm_dqrele
         - 9.16% xfs_trans_dqresv
         - 7.31% xfs_trans_dqlockedjoin

Primarily in xfs_qm_vop_create_dqattach and xfs_qm_vop_dqalloc().
Baseline performance looked like:

FSUse%        Count         Size    Files/sec     App Overhead
     0      1600000            0      17666.5         15377143
     0      3200000            0      17018.6         15922906
     0      4800000            0      17373.5         16149660
     0      6400000            0      16564.9         17234139
     0      8000000            0      17022.4         15987230
     0      9600000            0      16684.2         14834507
     0     11200000            0      16770.3         27330353
     0     12800000            0      15921.4         18935868


So, convert the refcount to an atomic, slightly rearrange the dquot
structure to separate read-mostly and contended fields, and the
profile changes drastically:

-   5.54%  [kernel]  [k] __mutex_lock_slowpath
   - __mutex_lock_slowpath
      - 99.67% mutex_lock
         - 45.15% xfs_trans_dqlockedjoin
         - 44.71% xfs_trans_dqresv
         - 8.23% xfs_qm_dqrele

The reference count locking is gone completely and now all
contention is within the transaction subsystem. The result:

FSUse%        Count         Size    Files/sec     App Overhead
     0      1600000            0      17559.3         15606077
     0      3200000            0      18738.9         14026009
     0      4800000            0      18960.0         14381162
     0      6400000            0      19026.5         14422024
     0      8000000            0      18456.6         15369059
     0      9600000            0      17828.4         21075613
     0     11200000            0      17903.9         16474615
     0     12800000            0      17546.0         13919798

is a roughly 10% improvement in performance.

Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx>
---
 fs/xfs/xfs_dquot.c | 16 ++++++++++------
 fs/xfs/xfs_dquot.h | 16 +++++++---------
 fs/xfs/xfs_qm.c    |  6 +++---
 fs/xfs/xfs_trace.h |  2 +-
 4 files changed, 21 insertions(+), 19 deletions(-)

diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 4ce4984..975a46c 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -748,7 +748,7 @@ restart:
 			goto restart;
 		}
 
-		dqp->q_nrefs++;
+		atomic_inc(&dqp->q_nrefs);
 		mutex_unlock(&qi->qi_tree_lock);
 
 		trace_xfs_dqget_hit(dqp);
@@ -799,6 +799,12 @@ restart:
 		}
 	}
 
+	/*
+	 * set the reference count before we insert the dquot into the tree
+	 * so it is safe from reclaim by default.
+	 */
+	atomic_set(&dqp->q_nrefs, 1);
+
 	mutex_lock(&qi->qi_tree_lock);
 	error = -radix_tree_insert(tree, id, dqp);
 	if (unlikely(error)) {
@@ -816,11 +822,9 @@ restart:
 	}
 
 	/*
-	 * We return a locked dquot to the caller, with a reference taken
+	 * We return a locked, referenced dquot to the caller.
 	 */
 	xfs_dqlock(dqp);
-	dqp->q_nrefs = 1;
-
 	qi->qi_dquots++;
 	mutex_unlock(&qi->qi_tree_lock);
 
@@ -841,12 +845,12 @@ void
 xfs_qm_dqput(
 	struct xfs_dquot	*dqp)
 {
-	ASSERT(dqp->q_nrefs > 0);
+	ASSERT(atomic_read(&dqp->q_nrefs) > 0);
 	ASSERT(XFS_DQ_IS_LOCKED(dqp));
 
 	trace_xfs_dqput(dqp);
 
-	if (--dqp->q_nrefs == 0) {
+	if (atomic_dec_and_test(&dqp->q_nrefs)) {
 		struct xfs_quotainfo	*qi = dqp->q_mount->m_quotainfo;
 		trace_xfs_dqput_free(dqp);
 
diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h
index 68a68f7..949a47b 100644
--- a/fs/xfs/xfs_dquot.h
+++ b/fs/xfs/xfs_dquot.h
@@ -44,26 +44,26 @@ enum {
  */
 typedef struct xfs_dquot {
 	uint		 dq_flags;	/* various flags (XFS_DQ_*) */
-	struct list_head q_lru;		/* global free list of dquots */
 	struct xfs_mount*q_mount;	/* filesystem this relates to */
-	struct xfs_trans*q_transp;	/* trans this belongs to currently */
-	uint		 q_nrefs;	/* # active refs from inodes */
 	xfs_daddr_t	 q_blkno;	/* blkno of dquot buffer */
 	int		 q_bufoffset;	/* off of dq in buffer (# dquots) */
 	xfs_fileoff_t	 q_fileoffset;	/* offset in quotas file */
-
-	xfs_disk_dquot_t q_core;	/* actual usage & quotas */
-	xfs_dq_logitem_t q_logitem;	/* dquot log item */
 	xfs_qcnt_t	 q_res_bcount;	/* total regular nblks used+reserved */
 	xfs_qcnt_t	 q_res_icount;	/* total inos allocd+reserved */
 	xfs_qcnt_t	 q_res_rtbcount;/* total realtime blks used+reserved */
 	xfs_qcnt_t	 q_prealloc_lo_wmark;/* prealloc throttle wmark */
 	xfs_qcnt_t	 q_prealloc_hi_wmark;/* prealloc disabled wmark */
 	int64_t		 q_low_space[XFS_QLOWSP_MAX];
+
+	atomic_t	 q_nrefs;	/* # active refs from inodes */
+	xfs_disk_dquot_t q_core;	/* actual usage & quotas */
+	xfs_dq_logitem_t q_logitem;	/* dquot log item */
 	struct mutex	 q_qlock;	/* quota lock */
 	struct completion q_flush;	/* flush completion queue */
 	atomic_t          q_pincount;	/* dquot pin count */
 	wait_queue_head_t q_pinwait;	/* dquot pinning wait queue */
+	struct list_head q_lru;		/* global free list of dquots */
+	struct xfs_trans *q_transp;	/* trans this belongs to currently */
 } xfs_dquot_t;
 
 /*
@@ -164,9 +164,7 @@ extern void		xfs_dquot_set_prealloc_limits(struct xfs_dquot *);
 
 static inline struct xfs_dquot *xfs_qm_dqhold(struct xfs_dquot *dqp)
 {
-	xfs_dqlock(dqp);
-	dqp->q_nrefs++;
-	xfs_dqunlock(dqp);
+	atomic_inc(&dqp->q_nrefs);
 	return dqp;
 }
 
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index d31b88e..31c0f85 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -136,7 +136,7 @@ xfs_qm_dqpurge(
 	struct xfs_quotainfo	*qi = mp->m_quotainfo;
 
 	xfs_dqlock(dqp);
-	if ((dqp->dq_flags & XFS_DQ_FREEING) || dqp->q_nrefs != 0) {
+	if ((dqp->dq_flags & XFS_DQ_FREEING) || atomic_read(&dqp->q_nrefs)) {
 		xfs_dqunlock(dqp);
 		return EAGAIN;
 	}
@@ -540,7 +540,7 @@ xfs_qm_dquot_isolate(
 	 * This dquot has acquired a reference in the meantime remove it from
 	 * the freelist and try again.
 	 */
-	if (dqp->q_nrefs) {
+	if (atomic_read(&dqp->q_nrefs)) {
 		xfs_dqunlock(dqp);
 		XFS_STATS_INC(xs_qm_dqwants);
 
@@ -588,7 +588,7 @@ xfs_qm_dquot_isolate(
 	dqp->dq_flags |= XFS_DQ_FREEING;
 	xfs_dqunlock(dqp);
 
-	ASSERT(dqp->q_nrefs == 0);
+	ASSERT(atomic_read(&dqp->q_nrefs) == 0);
 	list_move_tail(&dqp->q_lru, &isol->dispose);
 	XFS_STATS_DEC(xs_qm_dquot_unused);
 	trace_xfs_dqreclaim_done(dqp);
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 425dfa4..051813c 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -769,7 +769,7 @@ DECLARE_EVENT_CLASS(xfs_dquot_class,
 		__entry->dev = dqp->q_mount->m_super->s_dev;
 		__entry->id = be32_to_cpu(dqp->q_core.d_id);
 		__entry->flags = dqp->dq_flags;
-		__entry->nrefs = dqp->q_nrefs;
+		__entry->nrefs = atomic_read(&dqp->q_nrefs);
 		__entry->res_bcount = dqp->q_res_bcount;
 		__entry->bcount = be64_to_cpu(dqp->q_core.d_bcount);
 		__entry->icount = be64_to_cpu(dqp->q_core.d_icount);
-- 
1.8.4.rc3

_______________________________________________
xfs mailing list
xfs@xxxxxxxxxxx
http://oss.sgi.com/mailman/listinfo/xfs




[Index of Archives]     [Linux XFS Devel]     [Linux Filesystem Development]     [Filesystem Testing]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux