From: Jie Liu <jeff.liu@xxxxxxxxxx> XFS does quota check at mount time with a single thread if required, and this process must done before a successful file system mount. That is fun if the desired quota options has been enabled when user creating/removing files, however, it need to travel the whole file system to figure out the quota usages if previously those options were not enabled. Hence, the mount procedure will stuck for a long time depending on the how many inodes resides on the storage as well as the disk IO speed. This patch is implement parallism quota check based on allocation groups, therefore the quota check is performed among each AG via work queues combine with a completion. In this way, I can observed significant speedup on faster devices. Signed-off-by: Jie Liu <jeff.liu@xxxxxxxxxx> --- fs/xfs/xfs_qm.c | 357 ++++++++++++++++++++++++++++++++++++++++++++++++++++--- fs/xfs/xfs_qm.h | 18 +++ 2 files changed, 359 insertions(+), 16 deletions(-) diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 14a4996..110df7b 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -35,8 +35,11 @@ #include "xfs_trans.h" #include "xfs_trans_space.h" #include "xfs_qm.h" +#include "xfs_btree.h" +#include "xfs_ialloc_btree.h" #include "xfs_trace.h" #include "xfs_icache.h" +#include "xfs_inum.h" #include "xfs_cksum.h" #include "xfs_dinode.h" @@ -51,6 +54,9 @@ STATIC int xfs_qm_init_quotainfo(xfs_mount_t *); STATIC void xfs_qm_dqfree_one(struct xfs_dquot *dqp); +STATIC int xfs_qm_dqusage_adjust(struct xfs_mount *mp, xfs_ino_t ino, + int *res); + /* * We use the batch lookup interface to iterate over the dquots as it * currently is the only interface into the radix tree code that allows @@ -1349,9 +1355,6 @@ STATIC int xfs_qm_dqusage_adjust( xfs_mount_t *mp, /* mount point for filesystem */ xfs_ino_t ino, /* inode number to get data for */ - void __user *buffer, /* not used */ - int ubsize, /* not used */ - int *ubused, /* not used */ int *res) /* result code value */ { xfs_inode_t *ip; @@ -1439,6 +1442,337 @@ error0: return error; } +static int +xfs_qm_dqusage_adjust_ichunk( + struct xfs_mount *mp, + xfs_agnumber_t agno, + struct xfs_inobt_rec_incore *irbp, + xfs_ino_t *lastinop) +{ + xfs_ino_t lastino = *lastinop; + int chunkidx, clustidx; + int error = 0; + xfs_agino_t agino; + + for (agino = irbp->ir_startino, chunkidx = clustidx = 0; + irbp->ir_freecount < XFS_INODES_PER_CHUNK; + chunkidx++, clustidx++, agino++) { + xfs_ino_t ino = XFS_AGINO_TO_INO(mp, agno, agino); + int stat; + + ASSERT(chunkidx < XFS_INODES_PER_CHUNK); + + /* Skip if this inode is free */ + if (XFS_INOBT_MASK(chunkidx) & irbp->ir_free) { + lastino = ino; + continue; + } + + /* + * Count used inodes as free so we can tell when the + * chunk is used up. + */ + irbp->ir_freecount++; + + error = xfs_qm_dqusage_adjust(mp, ino, &stat); + if (stat == BULKSTAT_RV_NOTHING) { + if (error && error != ENOENT && error != EINVAL) + break; + + lastino = ino; + continue; + } + if (stat == BULKSTAT_RV_GIVEUP) { + ASSERT(error); + break; + } + lastino = ino; + } + + *lastinop = lastino; + return error; +} + +static int +xfs_qm_dqusage_adjust_perag( + struct xfs_dq_adjuster *qa) +{ + struct xfs_mount *mp = qa->qa_mp; + xfs_agnumber_t agno = qa->qa_agno; + xfs_inobt_rec_incore_t *irbp; /* current irec buffer pointer */ + xfs_inobt_rec_incore_t *irbuf; /* start of irec buffer */ + xfs_inobt_rec_incore_t *irbufend; /* end of good irec buffer entries */ + xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */ + xfs_ino_t lastino;/* last inode # in question */ + xfs_agino_t agino; /* inode # in allocation group */ + size_t irbsize; /* size of irec buffer in bytes */ + int nirbuf; /* size of irbuf */ + int rval; /* return value error code */ + int error; /* error code */ + + irbuf = kmem_zalloc_greedy(&irbsize, PAGE_SIZE, PAGE_SIZE * 4); + if (!irbuf) + return ENOMEM; + nirbuf = irbsize / sizeof(*irbuf); + + rval = 0; + agino = 0; + lastino = 0; + + /* + * Loop over the allocation groups, starting from the last + * inode returned; 0 means start of the allocation group. + */ + do { + xfs_buf_t *agbp; /* agi header buffer */ + xfs_agi_t *agi; /* agi header data */ + int stat; /* result value from btree calls */ + bool end_of_ag = false; + + cond_resched(); + + irbp = irbuf; + irbufend = irbuf + nirbuf; + + error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp); + if (error) { + rval = error; + break; + } + agi = XFS_BUF_TO_AGI(agbp); + + /* Allocate and initialize a btree cursor for ialloc btree */ + cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno); + error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_GE, &stat); + + /* + * Loop through inode btree records in this ag until we run out + * of inodes or space in the buffer. + */ + while (irbp < irbufend) { + xfs_inobt_rec_incore_t r; + + /* Loop as long as we're unable to read the inode btree */ + while (error) { + agino += XFS_INODES_PER_CHUNK; + if (XFS_AGINO_TO_AGBNO(mp, agino) >= + be32_to_cpu(agi->agi_length)) + break; + + error = xfs_inobt_lookup(cur, agino, + XFS_LOOKUP_GE, &stat); + cond_resched(); + } + + /* + * If ran off the end of the ag either with an error, + * or the normal way, set end and stop collecting. + */ + if (error) { + end_of_ag = true; + break; + } + + error = xfs_inobt_get_rec(cur, &r, &stat); + if (error || stat == 0) { + end_of_ag = true; + break; + } + + /* + * If this chunk has any allocated inodes, save it. + * Also start read-ahead now for this chunk. + */ + if (r.ir_freecount < XFS_INODES_PER_CHUNK) { + struct blk_plug plug; + + blk_start_plug(&plug); + xfs_inobt_reada_chunk(mp, agno, &r); + blk_finish_plug(&plug); + + irbp->ir_startino = r.ir_startino; + irbp->ir_freecount = r.ir_freecount; + irbp->ir_free = r.ir_free; + irbp++; + } + + /* Set agino to after this chunk and bump the cursor */ + agino = r.ir_startino + XFS_INODES_PER_CHUNK; + error = xfs_btree_increment(cur, 0, &stat); + cond_resched(); + } + + /* + * Drop the btree buffers and the agi buffer. We can't hold + * any of the locks these represent when calling iget. + */ + xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); + xfs_buf_relse(agbp); + + irbufend = irbp; + for (irbp = irbuf; irbp < irbufend; irbp++) { + error = xfs_qm_dqusage_adjust_ichunk(mp, agno, irbp, &lastino); + if (error) + rval = error; + cond_resched(); + } + + if (end_of_ag) + break; + + /* Set up for the next loop iteration */ + agino = XFS_INO_TO_AGINO(mp, lastino); + } while (1); + + /* Done, we're either out of filesystem or space to put the data */ + kmem_free(irbuf); + + return rval; +} + +/* + * Iterate thru the file system to fetch all the inodes in the given + * inode range and adjusting the corresponding dquot counters in core. + */ +STATIC void +xfs_qm_dq_adjust_worker( + struct work_struct *work) +{ + struct xfs_dq_adjuster *qa = container_of(work, + struct xfs_dq_adjuster, qa_work); + int error; + + error = xfs_qm_dqusage_adjust_perag(qa); + complete(&qa->qa_complete); +} + +STATIC int +xfs_qm_init_quotacheck( + struct xfs_mount *mp, + struct xfs_quotacheck *qc) +{ + memset(qc, 0, sizeof(*qc)); + + INIT_LIST_HEAD(&qc->qc_adjusters); + spin_lock_init(&qc->qc_lock); + qc->qc_mp = mp; + qc->qc_wq = alloc_workqueue("xfs-dqcheck/%s", WQ_NON_REENTRANT, + 0, mp->m_fsname); + if (!qc->qc_wq) { + list_del(&qc->qc_adjusters); + return ENOMEM; + } + + return 0; +} + +STATIC void +xfs_qm_destroy_quotacheck( + struct xfs_quotacheck *qc) +{ + destroy_workqueue(qc->qc_wq); + spinlock_destroy(&qc->qc_lock); + list_del(&qc->qc_adjusters); +} + +STATIC void +xfs_qm_destroy_adjusters( + struct xfs_quotacheck *qc) +{ + struct xfs_dq_adjuster *qa, *tmp; + + list_for_each_entry_safe(qa, tmp, &qc->qc_adjusters, qa_node) { + list_del(&qa->qa_node); + kfree(qa); + } +} + +STATIC struct xfs_dq_adjuster * +xfs_qm_alloc_adjuster( + struct xfs_quotacheck *qc, + xfs_agnumber_t agno) +{ + struct xfs_dq_adjuster *qa; + + qa = kzalloc(sizeof(*qa), GFP_NOFS); + if (!qa) + return NULL; + + qa->qa_qc = qc; + qa->qa_mp = qc->qc_mp; + qa->qa_agno = agno; + INIT_LIST_HEAD(&qa->qa_node); + INIT_WORK(&qa->qa_work, xfs_qm_dq_adjust_worker); + init_completion(&qa->qa_complete); + list_add_tail(&qa->qa_node, &qc->qc_adjusters); + + return qa; +} + +STATIC int +xfs_qm_alloc_queue_adjusters( + struct xfs_quotacheck *qc) +{ + xfs_agnumber_t agcount = qc->qc_mp->m_sb.sb_agcount; + int i, error = 0; + + for (i = 0; i < agcount; i++) { + struct xfs_dq_adjuster *qa; + + spin_lock(&qc->qc_lock); + qa = xfs_qm_alloc_adjuster(qc, i); + if (!qa) { + error = ENOMEM; + spin_unlock(&qc->qc_lock); + goto out_destroy_adjusters; + } + queue_work(qc->qc_wq, &qa->qa_work); + spin_unlock(&qc->qc_lock); + } + + return error; + +out_destroy_adjusters: + xfs_qm_destroy_adjusters(qc); + return error; +} + +STATIC void +xfs_qm_wait_for_adjusters( + struct xfs_quotacheck *qc) +{ + struct xfs_dq_adjuster *qa; + + list_for_each_entry(qa, &qc->qc_adjusters, qa_node) + wait_for_completion(&qa->qa_complete); +} + +STATIC int +xfs_qm_do_quotacheck( + struct xfs_mount *mp) +{ + struct xfs_quotacheck qc; + int error; + + error = xfs_qm_init_quotacheck(mp, &qc); + if (error) + return error; + + /* Allocate and queue adjusters */ + error = xfs_qm_alloc_queue_adjusters(&qc); + if (error) + goto out_destroy_quotacheck; + + xfs_qm_wait_for_adjusters(&qc); + + xfs_qm_destroy_adjusters(&qc); + +out_destroy_quotacheck: + xfs_qm_destroy_quotacheck(&qc); + + return error; +} + STATIC int xfs_qm_flush_one( struct xfs_dquot *dqp, @@ -1474,7 +1808,7 @@ int xfs_qm_quotacheck( xfs_mount_t *mp) { - int done, count, error, error2; + int count, error, error2; xfs_ino_t lastino; size_t structsz; uint flags; @@ -1522,18 +1856,9 @@ xfs_qm_quotacheck( flags |= XFS_PQUOTA_CHKD; } - do { - /* - * Iterate thru all the inodes in the file system, - * adjusting the corresponding dquot counters in core. - */ - error = xfs_bulkstat(mp, &lastino, &count, - xfs_qm_dqusage_adjust, - structsz, NULL, &done); - if (error) - break; - - } while (!done); + error = xfs_qm_do_quotacheck(mp); + if (error) + goto error_return; /* * We've made all the changes that we need to make incore. Flush them diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h index a788b66..c7e2e6d 100644 --- a/fs/xfs/xfs_qm.h +++ b/fs/xfs/xfs_qm.h @@ -26,6 +26,24 @@ struct xfs_inode; extern struct kmem_zone *xfs_qm_dqtrxzone; +struct xfs_dq_adjuster { + struct list_head qa_node; + struct xfs_mount *qa_mp; + struct xfs_quotacheck *qa_qc; + xfs_agnumber_t qa_agno; + int qa_error; + struct work_struct qa_work; + struct completion qa_complete; +}; + +struct xfs_quotacheck { + struct list_head qc_adjusters; + spinlock_t qc_lock; + struct xfs_mount *qc_mp; + int qc_done; + struct workqueue_struct *qc_wq; +}; + /* * This defines the unit of allocation of dquots. * Currently, it is just one file system block, and a 4K blk contains 30 -- 1.7.9.5 _______________________________________________ xfs mailing list xfs@xxxxxxxxxxx http://oss.sgi.com/mailman/listinfo/xfs