On Tue, Jun 05, 2012 at 10:46:54AM -0400, Christoph Hellwig wrote: > Both function contain the same basic loop over all AGs. Merge the two > by creating three passes in the loop instead of duplicating the code. > > Signed-off-by: Christoph Hellwig <hch@xxxxxx> > > --- > fs/xfs/xfs_ialloc.c | 179 +++++++++++++++------------------------------------- > 1 file changed, 55 insertions(+), 124 deletions(-) > > Index: xfs/fs/xfs/xfs_ialloc.c > =================================================================== > --- xfs.orig/fs/xfs/xfs_ialloc.c 2012-06-04 12:58:07.104263361 -0400 > +++ xfs/fs/xfs/xfs_ialloc.c 2012-06-04 13:11:52.512284497 -0400 > @@ -439,114 +439,6 @@ xfs_ialloc_next_ag( > } > > /* > - * Select an allocation group to look for a free inode in, based on the parent > - * inode and then mode. Return the allocation group buffer. > - */ > -STATIC xfs_agnumber_t > -xfs_ialloc_ag_select( > - xfs_trans_t *tp, /* transaction pointer */ > - xfs_ino_t parent, /* parent directory inode number */ > - umode_t mode, /* bits set to indicate file type */ > - int okalloc) /* ok to allocate more space */ > -{ > - xfs_agnumber_t agcount; /* number of ag's in the filesystem */ > - xfs_agnumber_t agno; /* current ag number */ > - int flags; /* alloc buffer locking flags */ > - xfs_extlen_t ineed; /* blocks needed for inode allocation */ > - xfs_extlen_t longest = 0; /* longest extent available */ > - xfs_mount_t *mp; /* mount point structure */ > - int needspace; /* file mode implies space allocated */ > - xfs_perag_t *pag; /* per allocation group data */ > - xfs_agnumber_t pagno; /* parent (starting) ag number */ > - int error; > - > - /* > - * Files of these types need at least one block if length > 0 > - * (and they won't fit in the inode, but that's hard to figure out). > - */ > - needspace = S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode); > - mp = tp->t_mountp; > - agcount = mp->m_maxagi; > - if (S_ISDIR(mode)) > - pagno = xfs_ialloc_next_ag(mp); > - else { > - pagno = XFS_INO_TO_AGNO(mp, parent); > - if (pagno >= agcount) > - pagno = 0; > - } > - > - ASSERT(pagno < agcount); > - > - /* > - * Loop through allocation groups, looking for one with a little > - * free space in it. Note we don't look for free inodes, exactly. > - * Instead, we include whether there is a need to allocate inodes > - * to mean that blocks must be allocated for them, > - * if none are currently free. > - */ > - agno = pagno; > - flags = XFS_ALLOC_FLAG_TRYLOCK; > - for (;;) { > - pag = xfs_perag_get(mp, agno); > - if (!pag->pagi_inodeok) { > - xfs_ialloc_next_ag(mp); > - goto nextag; > - } > - > - if (!pag->pagi_init) { > - error = xfs_ialloc_pagi_init(mp, tp, agno); > - if (error) > - goto nextag; > - } > - > - if (pag->pagi_freecount) { > - xfs_perag_put(pag); > - return agno; > - } > - > - if (!okalloc) > - goto nextag; > - > - if (!pag->pagf_init) { > - error = xfs_alloc_pagf_init(mp, tp, agno, flags); > - if (error) > - goto nextag; > - } > - > - /* > - * Is there enough free space for the file plus a block of > - * inodes? (if we need to allocate some)? > - */ > - ineed = XFS_IALLOC_BLOCKS(mp); > - longest = pag->pagf_longest; > - if (!longest) > - longest = pag->pagf_flcount > 0; > - > - if (pag->pagf_freeblks >= needspace + ineed && > - longest >= ineed) { > - xfs_perag_put(pag); > - return agno; > - } > -nextag: > - xfs_perag_put(pag); > - /* > - * No point in iterating over the rest, if we're shutting > - * down. > - */ > - if (XFS_FORCED_SHUTDOWN(mp)) > - return NULLAGNUMBER; > - agno++; > - if (agno >= agcount) > - agno = 0; > - if (agno == pagno) { > - if (flags == 0) > - return NULLAGNUMBER; > - flags = 0; > - } > - } > -} > - > -/* > * Try to retrieve the next record to the left/right from the current one. > */ > STATIC int > @@ -901,9 +793,9 @@ xfs_dialloc( > xfs_buf_t *agbp; /* allocation group header's buffer */ > xfs_agnumber_t agno; /* allocation group number */ > int error; /* error return value */ > - int ialloced; /* inode allocation status */ > int noroom = 0; /* no space for inode blk allocation */ > xfs_agnumber_t start_agno; /* starting allocation group number */ > + int pass; > struct xfs_perag *pag; > > if (*IO_agbp) { > @@ -917,16 +809,6 @@ xfs_dialloc( > } > > /* > - * We do not have an agbp, so select an initial allocation > - * group for inode allocation. > - */ > - start_agno = xfs_ialloc_ag_select(tp, parent, mode, okalloc); > - if (start_agno == NULLAGNUMBER) { > - *inop = NULLFSINO; > - return 0; > - } > - > - /* > * If we have already hit the ceiling of inode blocks then clear > * okalloc so we scan all available agi structures for a free > * inode. > @@ -938,12 +820,31 @@ xfs_dialloc( > } > > /* > - * Loop until we find an allocation group that either has free inodes > - * or in which we can allocate some inodes. Iterate through the > - * allocation groups upward, wrapping at the end. > + * For directories start with a new allocation groups, for other file > + * types aim to find an inode close to the parent. > */ > + if (S_ISDIR(mode)) { > + start_agno = xfs_ialloc_next_ag(mp); > + ASSERT(start_agno < mp->m_maxagi); > + } else { > + start_agno = XFS_INO_TO_AGNO(mp, parent); > + if (start_agno >= mp->m_maxagi) > + start_agno = 0; > + } > + > + /* > + * Loop through allocation groups, looking for one with a little > + * free space in it. Note we don't look for free inodes, exactly. > + * Instead, we include whether there is a need to allocate inodes > + * to mean that blocks must be allocated for them, if none are > + * currently free. > + */ > + *inop = NULLFSINO; > agno = start_agno; > + pass = 0; Do we even need multiple passes here? The first and second passes appear to be identical except for the XFS_ALLOC_FLAG_TRYLOCK flag on the xfs_alloc_pagf_init() call. After the first time we've allocated in an AG, pag->pagf_init will always be set, so this means pass = 0 and pass = 1 are identical for anything other than a freshly mounted filesystem. Hence I think you can just drop the trylock pass here. And further, I can't see why we need a second pass at all. I.e. what we used to do was: select ag: pass 1: nonblocking scan over all AGI/AGF buffers pass 2 on fail: blocking scan over all AGI/AGF buffers dialloc: if okalloc allocate inode chunk else pass 3: blocking scan over AGIs to find free inodes So the 3 passes were used to work around the blocking nature of AGI/AGF locks and minimising the allocation latency caused by waiting on busy AGI/AGF buffers. By moving to using the per-ag data, we avoid that latency problem altogether except for the initialisation cases, which onyl occur just after mount. Your logic now is: dialloc: pass 1 nonblocking scan over pagi/pagf if free inodes found, allocate else if pagf cannot be read, goto pass 2 else if no contiguous free space could be found goto pass 2 else allocate inode chunk and return pass 2 nonblocking scan over pagi/pagf if free inodes found, allocate else if no contiguous free space could be found goto pass 3 else allocate inode chunk and return pass 3: nonblocking scan over pagi/pagf if free inodes found, allocate else allocate inode chunk and return AFAICS, pass 3 will always fail if pass 2 failed - if there isn't enough contiguous free space in the AGF, we won't be able to allocate a new inode chunk and avoiding that check won't change anything. And given that pass 2 is completely redundant for a filesytem that has been active for a few minutes, I really can't see a need for multiple passes here at all... Cheers, Dave. -- Dave Chinner david@xxxxxxxxxxxxx _______________________________________________ xfs mailing list xfs@xxxxxxxxxxx http://oss.sgi.com/mailman/listinfo/xfs