[PATCH 6/6] xfs: make discard operations asynchronous

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Instead of waiting for each discard request keep the CIL context alive
until all of them are done, at which point we can tear it down completly
and remove the busy extents from the rbtree.

At this point I'm doing the I/O completion from IRQ context for simplicity,
but I'll benchmark it against a version that uses a workqueue.

Signed-off-by: Christoph Hellwig <hch@xxxxxx>

Index: xfs/fs/xfs/linux-2.6/xfs_discard.c
===================================================================
--- xfs.orig/fs/xfs/linux-2.6/xfs_discard.c	2011-03-22 15:58:10.301855813 +0100
+++ xfs/fs/xfs/linux-2.6/xfs_discard.c	2011-03-22 18:39:09.000000000 +0100
@@ -30,6 +30,7 @@
 #include "xfs_inode.h"
 #include "xfs_alloc.h"
 #include "xfs_error.h"
+#include "xfs_log_priv.h"
 #include "xfs_discard.h"
 #include "xfs_trace.h"
 
@@ -192,37 +193,119 @@ xfs_ioc_trim(
 	return 0;
 }
 
+void
+xfs_cil_discard_done(
+	struct xfs_cil_ctx	*ctx)
+{
+	if (atomic_dec_and_test(&ctx->discards)) {
+		struct xfs_busy_extent	*busyp, *n;
+
+		list_for_each_entry_safe(busyp, n, &ctx->busy_extents, list)
+			xfs_alloc_busy_clear(ctx->cil->xc_log->l_mp, busyp);
+		kmem_free(ctx);
+	}
+}
+
+STATIC void
+xfs_discard_end_io(
+	struct bio		*bio,
+	int			err)
+{
+	struct xfs_cil_ctx	*ctx = bio->bi_private;
+
+	if (err && err != -EOPNOTSUPP) {
+		xfs_info(ctx->cil->xc_log->l_mp,
+			 "I/O error during discard\n");
+	}
+
+	bio_put(bio);
+	xfs_cil_discard_done(ctx);
+}
+
+static int
+xfs_issue_discard(
+	struct block_device	*bdev,
+	sector_t		sector,
+	sector_t		nr_sects,
+	gfp_t			gfp_mask,
+	struct xfs_cil_ctx	*ctx)
+{
+	struct request_queue	*q = bdev_get_queue(bdev);
+	unsigned int		max_discard_sectors;
+	struct bio		*bio;
+	int			ret = 0;
+
+	if (!q)
+		return -ENXIO;
+
+	if (!blk_queue_discard(q))
+		return -EOPNOTSUPP;
+
+	/*
+	 * Ensure that max_discard_sectors is of the proper
+	 * granularity
+	 */
+	max_discard_sectors = min(q->limits.max_discard_sectors, UINT_MAX >> 9);
+	if (q->limits.discard_granularity) {
+		unsigned int disc_sects = q->limits.discard_granularity >> 9;
+
+		max_discard_sectors &= ~(disc_sects - 1);
+	}
+
+
+	while (nr_sects && !ret) {
+		bio = bio_alloc(gfp_mask, 1);
+		if (!bio) {
+			ret = -ENOMEM;
+			break;
+		}
+
+		bio->bi_sector = sector;
+		bio->bi_end_io = xfs_discard_end_io;
+		bio->bi_bdev = bdev;
+		bio->bi_private = ctx;
+
+		if (nr_sects > max_discard_sectors) {
+			bio->bi_size = max_discard_sectors << 9;
+			nr_sects -= max_discard_sectors;
+			sector += max_discard_sectors;
+		} else {
+			bio->bi_size = nr_sects << 9;
+			nr_sects = 0;
+		}
+
+		atomic_inc(&ctx->discards);
+		submit_bio(REQ_WRITE | REQ_DISCARD, bio);
+	}
+
+	return ret;
+}
+
 int
 xfs_discard_extent(
 	struct xfs_mount	*mp,
-	struct xfs_busy_extent	*busyp)
+	struct xfs_busy_extent	*busyp,
+	struct xfs_cil_ctx	*ctx)
 {
 	struct xfs_perag	*pag;
-	int			error = 0;
 	xfs_daddr_t		bno;
 	int64_t			len;
 	bool			done  = false;
 
-	if ((mp->m_flags & XFS_MOUNT_DISCARD) == 0)
-		return 0;
-
 	bno = XFS_AGB_TO_DADDR(mp, busyp->agno, busyp->bno);
 	len = XFS_FSB_TO_BB(mp, busyp->length);
 
 	pag = xfs_perag_get(mp, busyp->agno);
-	spin_lock(&pag->pagb_lock);
+	spin_lock_irq(&pag->pagb_lock);
 	if (!busyp->length)
 		done = true;
 	busyp->flags = XFS_ALLOC_BUSY_DISCARDED;
-	spin_unlock(&pag->pagb_lock);
+	spin_unlock_irq(&pag->pagb_lock);
 	xfs_perag_put(pag);
 
 	if (done)
 		return 0;
 
-	error = -blkdev_issue_discard(mp->m_ddev_targp->bt_bdev, bno, len,
-				      GFP_NOFS, 0);
-	if (error && error != EOPNOTSUPP)
-		xfs_info(mp, "discard failed, error %d", error);
-	return error;
+	return -xfs_issue_discard(mp->m_ddev_targp->bt_bdev,
+				  bno, len, GFP_NOFS, ctx);
 }
Index: xfs/fs/xfs/linux-2.6/xfs_discard.h
===================================================================
--- xfs.orig/fs/xfs/linux-2.6/xfs_discard.h	2011-03-22 15:58:10.313857879 +0100
+++ xfs/fs/xfs/linux-2.6/xfs_discard.h	2011-03-22 18:39:09.000000000 +0100
@@ -3,10 +3,13 @@
 
 struct fstrim_range;
 struct xfs_busy_extent;
+struct xfs_cil_ctx;
 
 extern int	xfs_ioc_trim(struct xfs_mount *, struct fstrim_range __user *);
 
 extern int	xfs_discard_extent(struct xfs_mount *,
-				   struct xfs_busy_extent *);
+				   struct xfs_busy_extent *,
+				   struct xfs_cil_ctx *);
+extern void	xfs_cil_discard_done(struct xfs_cil_ctx	*ctx);
 
 #endif /* XFS_DISCARD_H */
Index: xfs/fs/xfs/xfs_log_cil.c
===================================================================
--- xfs.orig/fs/xfs/xfs_log_cil.c	2011-03-22 15:58:10.329855977 +0100
+++ xfs/fs/xfs/xfs_log_cil.c	2011-03-22 18:39:09.000000000 +0100
@@ -68,6 +68,7 @@ xlog_cil_init(
 	INIT_LIST_HEAD(&ctx->busy_extents);
 	ctx->sequence = 1;
 	ctx->cil = cil;
+	atomic_set(&ctx->discards, 1);
 	cil->xc_ctx = ctx;
 	cil->xc_current_sequence = ctx->sequence;
 
@@ -364,14 +365,18 @@ xlog_cil_committed(
 	struct xfs_cil_ctx	*ctx = args;
 	struct xfs_mount	*mp = ctx->cil->xc_log->l_mp;
 	struct xfs_busy_extent	*busyp, *n;
+	bool			keep_alive = false;
 
 	xfs_trans_committed_bulk(ctx->cil->xc_log->l_ailp, ctx->lv_chain,
 					ctx->start_lsn, abort);
 
-	list_for_each_entry_safe(busyp, n, &ctx->busy_extents, list) {
-		if (!abort)
-			xfs_discard_extent(mp, busyp);
-		xfs_alloc_busy_clear(mp, busyp);
+	if (!(mp->m_flags & XFS_MOUNT_DISCARD) || abort) {
+		list_for_each_entry_safe(busyp, n, &ctx->busy_extents, list)
+			xfs_alloc_busy_clear(mp, busyp);
+	} else if (!list_empty(&ctx->busy_extents)) {
+		list_for_each_entry(busyp, &ctx->busy_extents, list)
+			xfs_discard_extent(mp, busyp, ctx);
+		keep_alive = true;
 	}
 
 	spin_lock(&ctx->cil->xc_cil_lock);
@@ -379,7 +384,10 @@ xlog_cil_committed(
 	spin_unlock(&ctx->cil->xc_cil_lock);
 
 	xlog_cil_free_logvec(ctx->lv_chain);
-	kmem_free(ctx);
+	if (keep_alive)
+		xfs_cil_discard_done(ctx);
+	else
+		kmem_free(ctx);
 }
 
 /*
@@ -490,6 +498,7 @@ xlog_cil_push(
 	INIT_LIST_HEAD(&new_ctx->busy_extents);
 	new_ctx->sequence = ctx->sequence + 1;
 	new_ctx->cil = cil;
+	atomic_set(&ctx->discards, 1);
 	cil->xc_ctx = new_ctx;
 
 	/*
Index: xfs/fs/xfs/xfs_alloc.c
===================================================================
--- xfs.orig/fs/xfs/xfs_alloc.c	2011-03-22 18:39:05.173855849 +0100
+++ xfs/fs/xfs/xfs_alloc.c	2011-03-22 18:39:09.000000000 +0100
@@ -2498,7 +2498,7 @@ xfs_alloc_busy_insert(
 	trace_xfs_alloc_busy(tp, agno, bno, len, 0);
 
 	pag = xfs_perag_get(tp->t_mountp, new->agno);
-	spin_lock(&pag->pagb_lock);
+	spin_lock_irq(&pag->pagb_lock);
 	rbp = &pag->pagb_tree.rb_node;
 	while (*rbp) {
 		parent = *rbp;
@@ -2521,7 +2521,7 @@ xfs_alloc_busy_insert(
 	rb_insert_color(&new->rb_node, &pag->pagb_tree);
 
 	list_add(&new->list, &tp->t_busy);
-	spin_unlock(&pag->pagb_lock);
+	spin_unlock_irq(&pag->pagb_lock);
 	xfs_perag_put(pag);
 }
 
@@ -2547,7 +2547,7 @@ xfs_alloc_busy_search(
 	int			match = 0;
 
 	pag = xfs_perag_get(mp, agno);
-	spin_lock(&pag->pagb_lock);
+	spin_lock_irq(&pag->pagb_lock);
 
 	rbp = pag->pagb_tree.rb_node;
 
@@ -2570,7 +2570,7 @@ xfs_alloc_busy_search(
 			break;
 		}
 	}
-	spin_unlock(&pag->pagb_lock);
+	spin_unlock_irq(&pag->pagb_lock);
 	trace_xfs_alloc_busysearch(mp, agno, bno, len, !!match);
 	xfs_perag_put(pag);
 	return match;
@@ -2706,7 +2706,7 @@ xfs_alloc_busy_reuse(
 
 	pag = xfs_perag_get(tp->t_mountp, agno);
 restart:
-	spin_lock(&pag->pagb_lock);
+	spin_lock_irq(&pag->pagb_lock);
 	rbp = pag->pagb_tree.rb_node;
 	while (rbp) {
 		struct xfs_busy_extent *busyp =
@@ -2727,7 +2727,7 @@ restart:
 		overlap = xfs_alloc_busy_try_reuse(pag, busyp,
 						   fbno, fbno + flen);
 		if (overlap == -1 || (overlap && userdata)) {
-			spin_unlock(&pag->pagb_lock);
+			spin_unlock_irq(&pag->pagb_lock);
 			xfs_log_force(tp->t_mountp, XFS_LOG_SYNC);
 			goto restart;
 		}
@@ -2743,7 +2743,7 @@ restart:
 		else
 			rbp = rbp->rb_right;
 	}
-	spin_unlock(&pag->pagb_lock);
+	spin_unlock_irq(&pag->pagb_lock);
 	xfs_perag_put(pag);
 }
 
@@ -2764,7 +2764,7 @@ xfs_alloc_busy_trim(
 	ASSERT(flen > 0);
 
 restart:
-	spin_lock(&args->pag->pagb_lock);
+	spin_lock_irq(&args->pag->pagb_lock);
 	rbp = args->pag->pagb_tree.rb_node;
 	while (rbp && flen >= args->minlen) {
 		struct xfs_busy_extent *busyp =
@@ -2789,7 +2789,7 @@ restart:
 			overlap = xfs_alloc_busy_try_reuse(args->pag, busyp,
 							   fbno, fbno + flen);
 			if (unlikely(overlap == -1)) {
-				spin_unlock(&args->pag->pagb_lock);
+				spin_unlock_irq(&args->pag->pagb_lock);
 				xfs_log_force(args->mp, XFS_LOG_SYNC);
 				goto restart;
 			}
@@ -2935,7 +2935,7 @@ restart:
 		flen = fend - fbno;
 	}
 out:
-	spin_unlock(&args->pag->pagb_lock);
+	spin_unlock_irq(&args->pag->pagb_lock);
 	*rbno = fbno;
 	*rlen = flen;
 	return;
@@ -2944,7 +2944,7 @@ fail:
 	 * Return a zero extent length as failure indications.  All callers
 	 * re-check if the trimmed extent satisfies the minlen requirement.
 	 */
-	spin_unlock(&args->pag->pagb_lock);
+	spin_unlock_irq(&args->pag->pagb_lock);
 	*rbno = fbno;
 	*rlen = 0;
 }
@@ -2955,6 +2955,7 @@ xfs_alloc_busy_clear(
 	struct xfs_busy_extent	*busyp)
 {
 	struct xfs_perag	*pag;
+	unsigned long		flags;
 
 	trace_xfs_alloc_unbusy(mp, busyp->agno, busyp->bno,
 						busyp->length);
@@ -2962,10 +2963,10 @@ xfs_alloc_busy_clear(
 	list_del_init(&busyp->list);
 
 	pag = xfs_perag_get(mp, busyp->agno);
-	spin_lock(&pag->pagb_lock);
+	spin_lock_irqsave(&pag->pagb_lock, flags);
 	if (busyp->length)
 		rb_erase(&busyp->rb_node, &pag->pagb_tree);
-	spin_unlock(&pag->pagb_lock);
+	spin_unlock_irqrestore(&pag->pagb_lock, flags);
 	xfs_perag_put(pag);
 
 	kmem_free(busyp);
Index: xfs/fs/xfs/xfs_log_priv.h
===================================================================
--- xfs.orig/fs/xfs/xfs_log_priv.h	2011-03-22 18:39:05.229883275 +0100
+++ xfs/fs/xfs/xfs_log_priv.h	2011-03-22 18:39:09.000000000 +0100
@@ -389,6 +389,7 @@ struct xfs_cil_ctx {
 	struct xfs_log_vec	*lv_chain;	/* logvecs being pushed */
 	xfs_log_callback_t	log_cb;		/* completion callback hook. */
 	struct list_head	committing;	/* ctx committing list */
+	atomic_t		discards;	/* no. of pending discards */
 };
 
 /*

_______________________________________________
xfs mailing list
xfs@xxxxxxxxxxx
http://oss.sgi.com/mailman/listinfo/xfs


[Index of Archives]     [Linux XFS Devel]     [Linux Filesystem Development]     [Filesystem Testing]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux