On Tue, Mar 22, 2011 at 03:55:53PM -0400, Christoph Hellwig wrote: > Update the extent tree in case we have to reuse a busy extent, so that it > always is kept uptodate. This is done by replacing the busy list searches > with a new xfs_alloc_busy_reuse helper, which updates the busy extent tree > in case of a reuse. Also replace setting transactions to sync with forcing > the log out in case we found a busy extent to reuse. This makes the code a > lot more simple, and is required for discard support later on. While it > will cause performance regressios with just this patch applied, the impact > is more than mitigated by the next patch in the series. > > Signed-off-by: Christoph Hellwig <hch@xxxxxx> > > Index: xfs/fs/xfs/xfs_alloc.c > =================================================================== > --- xfs.orig/fs/xfs/xfs_alloc.c 2011-03-20 19:41:55.835479390 +0100 > +++ xfs/fs/xfs/xfs_alloc.c 2011-03-21 14:49:14.157973188 +0100 ..... > - new->tid = xfs_log_get_trans_ident(tp); > - > INIT_LIST_HEAD(&new->list); > > /* trace before insert to be able to see failed inserts */ > trace_xfs_alloc_busy(tp, agno, bno, len, 0); > > pag = xfs_perag_get(tp->t_mountp, new->agno); > -restart: > spin_lock(&pag->pagb_lock); > rbp = &pag->pagb_tree.rb_node; > - parent = NULL; > - busyp = NULL; > - match = 0; > - while (*rbp && match >= 0) { > + while (*rbp) { > parent = *rbp; > busyp = rb_entry(parent, struct xfs_busy_extent, rb_node); > > if (new->bno < busyp->bno) { > /* may overlap, but exact start block is lower */ > rbp = &(*rbp)->rb_left; > - if (new->bno + new->length > busyp->bno) > - match = busyp->tid == new->tid ? 1 : -1; > + BUG_ON(new->bno + new->length > busyp->bno); BUG_ON() inside a spinlock will effectively lock up the machine as other CPUs try to take the spinlock that was held by the thread that went bad. It causes the machine to die a horrible, undebuggable death rather than just kill the thread that caused the oops and leaving everything else still functional. If it were an ASSERT(), that's probably OK because it is only debug systems that woul dhave this problem, but the use of BUG(_ON) means it will cause production systems to die as well. > } else if (new->bno > busyp->bno) { > /* may overlap, but exact start block is higher */ > rbp = &(*rbp)->rb_right; > - if (bno < busyp->bno + busyp->length) > - match = busyp->tid == new->tid ? 1 : -1; > + BUG_ON(bno < busyp->bno + busyp->length); > } else { > - match = busyp->tid == new->tid ? 1 : -1; > - break; > + BUG(); > } > } > - if (match < 0) { > - /* overlap marked busy in different transaction */ > - spin_unlock(&pag->pagb_lock); > - xfs_log_force(tp->t_mountp, XFS_LOG_SYNC); > - goto restart; > - } > - if (match > 0) { > - /* > - * overlap marked busy in same transaction. Update if exact > - * start block match, otherwise combine the busy extents into > - * a single range. > - */ > - if (busyp->bno == new->bno) { > - busyp->length = max(busyp->length, new->length); > - spin_unlock(&pag->pagb_lock); > - ASSERT(tp->t_flags & XFS_TRANS_SYNC); > - xfs_perag_put(pag); > - kmem_free(new); > - return; > - } > - rb_erase(&busyp->rb_node, &pag->pagb_tree); > - new->length = max(busyp->bno + busyp->length, > - new->bno + new->length) - > - min(busyp->bno, new->bno); > - new->bno = min(busyp->bno, new->bno); > - } else > - busyp = NULL; > > rb_link_node(&new->rb_node, parent, rbp); > rb_insert_color(&new->rb_node, &pag->pagb_tree); > @@ -2650,7 +2518,6 @@ restart: > list_add(&new->list, &tp->t_busy); > spin_unlock(&pag->pagb_lock); > xfs_perag_put(pag); > - kmem_free(busyp); > } > > /* > @@ -2704,6 +2571,173 @@ xfs_alloc_busy_search( > return match; > } > > +STATIC int > +xfs_alloc_busy_try_reuse( > + struct xfs_perag *pag, > + struct xfs_busy_extent *busyp, > + xfs_agblock_t fbno, > + xfs_agblock_t fend) > +{ > + xfs_agblock_t bbno = busyp->bno; > + xfs_agblock_t bend = bbno + busyp->length; > + > + if (bbno < fbno && bend > fend) { > + /* > + * Case 1: > + * bbno bend > + * +BBBBBBBBBBBBBBBBB+ > + * +---------+ > + * fbno fend > + */ > + > + /* > + * We would have to split the busy extent to be able > + * to track it correct, which we cannot do because we > + * would have to modify the list of busy extents > + * attached to the transaction/CIL context, which > + * is mutable. immutable? > + * > + * Force out the log to clear the busy extents > + * and retry the search. > + */ BTW, these comments appear to wrap at 68 columns - any particular reason? > + return -1; > + } else if (bbno >= fbno && bend <= fend) { > + /* > + * Case 2: > + * bbno bend > + * +BBBBBBBBBBBBBBBBB+ > + * +-----------------+ > + * fbno fend > + * > + * Case 3: > + * bbno bend > + * +BBBBBBBBBBBBBBBBB+ > + * +--------------------------+ > + * fbno fend > + * > + * Case 4: > + * bbno bend > + * +BBBBBBBBBBBBBBBBB+ > + * +--------------------------+ > + * fbno fend > + * > + * Case 5: > + * bbno bend > + * +BBBBBBBBBBBBBBBBB+ > + * +-----------------------------------+ > + * fbno fend > + * > + */ > + > + /* > + * The busy extent is fully covered by the extent > + * we are allocating, and can simply be removed from > + * the rbtree. However we cannot remove it from the > + * immutable list tracking busy extents in the > + * transaction or CIL context, so set the length > + * to zero to mark it invalid. > + */ > + rb_erase(&busyp->rb_node, &pag->pagb_tree); > + busyp->length = 0; > + } else if (bbno == fbno) { > + /* > + * Case 6: > + * bbno bend > + * +BBBBBBBBBBBBBBBBB+ > + * +---------+ > + * fbno fend > + * > + * Case 7: > + * bbno bend > + * +BBBBBBBBBBBBBBBBB+ > + * +------------------+ > + * fbno fend > + * > + */ > + > + busyp->bno = fend; > + } else if (bend == fend) { > + /* > + * Case 8: > + * bbno bend > + * +BBBBBBBBBBBBBBBBB+ > + * +-------------+ > + * fbno fend > + * > + * Case 9: > + * bbno bend > + * +BBBBBBBBBBBBBBBBB+ > + * +----------------------+ > + * fbno fend > + */ > + > + busyp->length = fbno - busyp->bno; > + } else { > + BUG(); > + } > + > + return 1; > +} > + > + > +/* > + * For a given extent [fbno, flen], make sure we can reuse it safely. > + */ > +void > +xfs_alloc_busy_reuse( > + struct xfs_trans *tp, > + xfs_agnumber_t agno, > + xfs_agblock_t fbno, > + xfs_extlen_t flen) > +{ > + struct xfs_perag *pag; > + struct rb_node *rbp; > + > + ASSERT(flen > 0); > + > + pag = xfs_perag_get(tp->t_mountp, agno); > +restart: > + spin_lock(&pag->pagb_lock); > + rbp = pag->pagb_tree.rb_node; > + while (rbp) { > + struct xfs_busy_extent *busyp = > + rb_entry(rbp, struct xfs_busy_extent, rb_node); > + xfs_agblock_t fend = fbno + flen; > + xfs_agblock_t bbno = busyp->bno; > + xfs_agblock_t bend = bbno + busyp->length; > + int overlap; > + > + if (fend <= bbno) { > + rbp = rbp->rb_left; > + continue; > + } else if (fbno >= bend) { > + rbp = rbp->rb_right; > + continue; > + } > + > + overlap = xfs_alloc_busy_try_reuse(pag, busyp, > + fbno, fbno + flen); > + if (overlap) { > + spin_unlock(&pag->pagb_lock); > + xfs_log_force(tp->t_mountp, XFS_LOG_SYNC); > + goto restart; > + } xfs_alloc_busy_try_reuse() only ever returns -1 or 1, which means this will always trigger a log force on overlap.... > + > + /* > + * No more busy extents to search. > + */ > + if (bbno <= fbno && bend >= fend) > + break; > + > + if (fbno < bbno) > + rbp = rbp->rb_left; > + else > + rbp = rbp->rb_right; and this code is never executed. Cheers, Dave. -- Dave Chinner david@xxxxxxxxxxxxx _______________________________________________ xfs mailing list xfs@xxxxxxxxxxx http://oss.sgi.com/mailman/listinfo/xfs