On Tue, Mar 22, 2011 at 03:55:56PM -0400, Christoph Hellwig wrote: > Instead of waiting for each discard request keep the CIL context alive > until all of them are done, at which point we can tear it down completly > and remove the busy extents from the rbtree. > > At this point I'm doing the I/O completion from IRQ context for simplicity, > but I'll benchmark it against a version that uses a workqueue. A workqueue is probably a good idea, because then the processing has some level of concurrency built into it. It also means we don't need to convert the locking to irq-safe variants and all the overhead that this introduces. > Signed-off-by: Christoph Hellwig <hch@xxxxxx> > > Index: xfs/fs/xfs/linux-2.6/xfs_discard.c > =================================================================== > --- xfs.orig/fs/xfs/linux-2.6/xfs_discard.c 2011-03-22 15:58:10.301855813 +0100 > +++ xfs/fs/xfs/linux-2.6/xfs_discard.c 2011-03-22 18:39:09.000000000 +0100 > @@ -30,6 +30,7 @@ > #include "xfs_inode.h" > #include "xfs_alloc.h" > #include "xfs_error.h" > +#include "xfs_log_priv.h" > #include "xfs_discard.h" > #include "xfs_trace.h" > > @@ -192,37 +193,119 @@ xfs_ioc_trim( > return 0; > } > > +void > +xfs_cil_discard_done( > + struct xfs_cil_ctx *ctx) > +{ > + if (atomic_dec_and_test(&ctx->discards)) { > + struct xfs_busy_extent *busyp, *n; > + > + list_for_each_entry_safe(busyp, n, &ctx->busy_extents, list) > + xfs_alloc_busy_clear(ctx->cil->xc_log->l_mp, busyp); > + kmem_free(ctx); > + } > +} > + > +STATIC void > +xfs_discard_end_io( > + struct bio *bio, > + int err) > +{ > + struct xfs_cil_ctx *ctx = bio->bi_private; > + > + if (err && err != -EOPNOTSUPP) { > + xfs_info(ctx->cil->xc_log->l_mp, > + "I/O error during discard\n"); > + } Same comment about the bno/len in the error message as the previous patch. > + > + bio_put(bio); > + xfs_cil_discard_done(ctx); > +} > + > +static int > +xfs_issue_discard( > + struct block_device *bdev, > + sector_t sector, > + sector_t nr_sects, > + gfp_t gfp_mask, > + struct xfs_cil_ctx *ctx) > +{ > + struct request_queue *q = bdev_get_queue(bdev); > + unsigned int max_discard_sectors; > + struct bio *bio; > + int ret = 0; > + > + if (!q) > + return -ENXIO; > + > + if (!blk_queue_discard(q)) > + return -EOPNOTSUPP; > + > + /* > + * Ensure that max_discard_sectors is of the proper > + * granularity > + */ > + max_discard_sectors = min(q->limits.max_discard_sectors, UINT_MAX >> 9); > + if (q->limits.discard_granularity) { > + unsigned int disc_sects = q->limits.discard_granularity >> 9; > + > + max_discard_sectors &= ~(disc_sects - 1); > + } This is asking for a helper function.... > + > + > + while (nr_sects && !ret) { no need to check ret here. > + bio = bio_alloc(gfp_mask, 1); > + if (!bio) { > + ret = -ENOMEM; > + break; > + } > + > + bio->bi_sector = sector; > + bio->bi_end_io = xfs_discard_end_io; > + bio->bi_bdev = bdev; > + bio->bi_private = ctx; > + > + if (nr_sects > max_discard_sectors) { > + bio->bi_size = max_discard_sectors << 9; > + nr_sects -= max_discard_sectors; > + sector += max_discard_sectors; > + } else { > + bio->bi_size = nr_sects << 9; > + nr_sects = 0; > + } > + > + atomic_inc(&ctx->discards); > + submit_bio(REQ_WRITE | REQ_DISCARD, bio); > + } > + > + return ret; > +} > + > int > xfs_discard_extent( > struct xfs_mount *mp, > - struct xfs_busy_extent *busyp) > + struct xfs_busy_extent *busyp, > + struct xfs_cil_ctx *ctx) > { > struct xfs_perag *pag; > - int error = 0; > xfs_daddr_t bno; > int64_t len; > bool done = false; > > - if ((mp->m_flags & XFS_MOUNT_DISCARD) == 0) > - return 0; > - > bno = XFS_AGB_TO_DADDR(mp, busyp->agno, busyp->bno); > len = XFS_FSB_TO_BB(mp, busyp->length); > > pag = xfs_perag_get(mp, busyp->agno); > - spin_lock(&pag->pagb_lock); > + spin_lock_irq(&pag->pagb_lock); > if (!busyp->length) > done = true; > busyp->flags = XFS_ALLOC_BUSY_DISCARDED; > - spin_unlock(&pag->pagb_lock); > + spin_unlock_irq(&pag->pagb_lock); Disabling/enabling interrupts on these locks could hurt quite a bit. They are travelled quite frequently, and irq operations add quite a bit of overhead.... > xfs_perag_put(pag); > > if (done) > return 0; > > - error = -blkdev_issue_discard(mp->m_ddev_targp->bt_bdev, bno, len, > - GFP_NOFS, 0); > - if (error && error != EOPNOTSUPP) > - xfs_info(mp, "discard failed, error %d", error); > - return error; > + return -xfs_issue_discard(mp->m_ddev_targp->bt_bdev, > + bno, len, GFP_NOFS, ctx); > } > Index: xfs/fs/xfs/linux-2.6/xfs_discard.h > =================================================================== > --- xfs.orig/fs/xfs/linux-2.6/xfs_discard.h 2011-03-22 15:58:10.313857879 +0100 > +++ xfs/fs/xfs/linux-2.6/xfs_discard.h 2011-03-22 18:39:09.000000000 +0100 > @@ -3,10 +3,13 @@ > > struct fstrim_range; > struct xfs_busy_extent; > +struct xfs_cil_ctx; > > extern int xfs_ioc_trim(struct xfs_mount *, struct fstrim_range __user *); > > extern int xfs_discard_extent(struct xfs_mount *, > - struct xfs_busy_extent *); > + struct xfs_busy_extent *, > + struct xfs_cil_ctx *); > +extern void xfs_cil_discard_done(struct xfs_cil_ctx *ctx); > > #endif /* XFS_DISCARD_H */ > Index: xfs/fs/xfs/xfs_log_cil.c > =================================================================== > --- xfs.orig/fs/xfs/xfs_log_cil.c 2011-03-22 15:58:10.329855977 +0100 > +++ xfs/fs/xfs/xfs_log_cil.c 2011-03-22 18:39:09.000000000 +0100 > @@ -68,6 +68,7 @@ xlog_cil_init( > INIT_LIST_HEAD(&ctx->busy_extents); > ctx->sequence = 1; > ctx->cil = cil; > + atomic_set(&ctx->discards, 1); > cil->xc_ctx = ctx; > cil->xc_current_sequence = ctx->sequence; > > @@ -364,14 +365,18 @@ xlog_cil_committed( > struct xfs_cil_ctx *ctx = args; > struct xfs_mount *mp = ctx->cil->xc_log->l_mp; > struct xfs_busy_extent *busyp, *n; > + bool keep_alive = false; > > xfs_trans_committed_bulk(ctx->cil->xc_log->l_ailp, ctx->lv_chain, > ctx->start_lsn, abort); > > - list_for_each_entry_safe(busyp, n, &ctx->busy_extents, list) { > - if (!abort) > - xfs_discard_extent(mp, busyp); > - xfs_alloc_busy_clear(mp, busyp); > + if (!(mp->m_flags & XFS_MOUNT_DISCARD) || abort) { > + list_for_each_entry_safe(busyp, n, &ctx->busy_extents, list) > + xfs_alloc_busy_clear(mp, busyp); > + } else if (!list_empty(&ctx->busy_extents)) { > + list_for_each_entry(busyp, &ctx->busy_extents, list) > + xfs_discard_extent(mp, busyp, ctx); > + keep_alive = true; > } Oh, I see you've moved the XFS_MOUNT_DISCARD into the loop here... > > spin_lock(&ctx->cil->xc_cil_lock); > @@ -379,7 +384,10 @@ xlog_cil_committed( > spin_unlock(&ctx->cil->xc_cil_lock); > > xlog_cil_free_logvec(ctx->lv_chain); > - kmem_free(ctx); > + if (keep_alive) > + xfs_cil_discard_done(ctx); > + else > + kmem_free(ctx); You could probably just call xfs_cil_discard_done(ctx) here as the busy extent list will be empty in the !keep_alive case and so it will simply do the kmem_free(ctx) call there. Cheers, Dave. -- Dave Chinner david@xxxxxxxxxxxxx _______________________________________________ xfs mailing list xfs@xxxxxxxxxxx http://oss.sgi.com/mailman/listinfo/xfs