On Tue, Jan 23, 2018 at 10:10:21AM -0800, Darrick J. Wong wrote: > On Tue, Jan 23, 2018 at 04:55:43PM +0200, Nikolay Borisov wrote: > > > > > > On 23.01.2018 15:05, Carlos Maiolino wrote: > > >>> @@ -1025,17 +1016,19 @@ xfs_buf_do_callbacks( > > >>> lip->li_cb(bp, lip); > > >>> } > > >>> > > >>> - while ((lip = bp->b_li_list) != NULL) { > > >>> - bp->b_li_list = lip->li_bio_list; > > >>> - ASSERT(lip->li_cb != NULL); > > >>> + while(!list_empty(&bp->b_li_list)) > > >>> + { > > >> > > >> nit: Since you are iterating the list why not the canonical > > >> list_for_each_entry_safe it will also obviate the need for > > >> list_first_entry. > > > > > > Because the callback being called in lip->li_cb, can change the list internally, > > > so, even list_for_each_entry_safe here is not really safe, because the "safe" > > > pointer, can actually be gone before before the loop starts again > > > (see xfs_iflush_done()). > > > > But you invoke del_init before calling the callback, _safe suffix in > > list_for_each_entry just means it's safe to call list deletion functions > > without compromising list iteration. > > If I'm reading the inode item code correctly, xfs_iflush_done will skip > ahead in the list to find and process all inode items attached to the > buffer that just completed, which means that the 'n' argument to > list_for_each_entry_safe could be deleted by the time we finish the call > to li_cb, hence the while (!list_empty()) business. The list might be > totally empty. > Ditto. The xfs_buf_do_callbacks() will list_del_init() the item, but the callback can also delete the next item on the list, the one which will be saved by list_for_each_entry_safe(), which will result in an invalid pointer in the next loop iteraction. The only way we can safely remove items from the buffer's log item list in the callback, and keep the loop safe in xfs_buf_do_callbacks, is to not trust in any "next" pointer, and get the first item available after HEAD. > The comment above xfs_buf_do_callbacks states that callbacks are allowed > to play games like this (batch processing of iodone items) for the sake > of reducing ail lock contention, so while the above construction looks > funny, it seems to be the only way to accomodate this behavior. > > --D > > > > > > > > > >> > > >>> + lip = list_first_entry(&bp->b_li_list, struct xfs_log_item, > > >>> + li_bio_list); > > >>> + > > >>> /* > > >>> - * Clear the next pointer so we don't have any > > >>> + * Remove the item from the list, so we don't have any > > >>> * confusion if the item is added to another buf. > > >>> * Don't touch the log item after calling its > > >>> * callback, because it could have freed itself. > > >>> */ > > >>> - lip->li_bio_list = NULL; > > >>> - lip->li_cb(bp, lip); > > >>> + list_del_init(&lip->li_bio_list); > > >>> + lip->li_cb(bp,lip); > > >>> } > > >>> } > > >>> > > >>> @@ -1051,8 +1044,7 @@ STATIC void > > >>> xfs_buf_do_callbacks_fail( > > >>> struct xfs_buf *bp) > > >>> { > > >>> - struct xfs_log_item *lip = bp->b_li_list; > > >>> - struct xfs_log_item *next; > > >>> + struct xfs_log_item *lip; > > >>> struct xfs_ail *ailp; > > >>> > > >>> /* > > >>> @@ -1060,14 +1052,16 @@ xfs_buf_do_callbacks_fail( > > >>> * and xfs_buf_iodone_callback_error, and they have no IO error > > >>> * callbacks. Check only for items in b_li_list. > > >>> */ > > >>> - if (lip == NULL) > > >>> + if (list_empty(&bp->b_li_list)) { > > >>> return; > > >>> - else > > >>> + } else { > > >>> + lip = list_first_entry(&bp->b_li_list, struct xfs_log_item, > > >>> + li_bio_list); > > >>> ailp = lip->li_ailp; > > >>> + } > > >>> > > >>> spin_lock(&ailp->xa_lock); > > >>> - for (; lip; lip = next) { > > >>> - next = lip->li_bio_list; > > >>> + list_for_each_entry(lip, &bp->b_li_list, li_bio_list) { > > >>> if (lip->li_ops->iop_error) > > >>> lip->li_ops->iop_error(lip, bp); > > >>> } > > >>> @@ -1079,7 +1073,7 @@ xfs_buf_iodone_callback_error( > > >>> struct xfs_buf *bp) > > >>> { > > >>> struct xfs_buf_log_item *bip = bp->b_log_item; > > >>> - struct xfs_log_item *lip = bp->b_li_list; > > >>> + struct xfs_log_item *lip; > > >>> struct xfs_mount *mp; > > >>> static ulong lasttime; > > >>> static xfs_buftarg_t *lasttarg; > > >>> @@ -1089,7 +1083,8 @@ xfs_buf_iodone_callback_error( > > >>> * The failed buffer might not have a buf_log_item attached or the > > >>> * log_item list might be empty. Get the mp from the available xfs_log_item > > >>> */ > > >>> - if (bip == NULL) > > >>> + if ((lip = list_first_entry_or_null(&bp->b_li_list, struct xfs_log_item, > > >>> + li_bio_list))) > > >>> mp = lip->li_mountp; > > >>> else > > >>> mp = bip->bli_item.li_mountp; > > >>> @@ -1203,7 +1198,7 @@ xfs_buf_iodone_callbacks( > > >>> > > >>> xfs_buf_do_callbacks(bp); > > >>> bp->b_log_item = NULL; > > >>> - bp->b_li_list = NULL; > > >>> + list_del_init(&bp->b_li_list); > > >>> bp->b_iodone = NULL; > > >>> xfs_buf_ioend(bp); > > >>> } > > >>> @@ -1248,10 +1243,9 @@ xfs_buf_iodone( > > >>> bool > > >>> xfs_buf_resubmit_failed_buffers( > > >>> struct xfs_buf *bp, > > >>> - struct xfs_log_item *lip, > > >>> struct list_head *buffer_list) > > >>> { > > >>> - struct xfs_log_item *next; > > >>> + struct xfs_log_item *lip; > > >>> > > >>> /* > > >>> * Clear XFS_LI_FAILED flag from all items before resubmit > > >>> @@ -1259,10 +1253,8 @@ xfs_buf_resubmit_failed_buffers( > > >>> * XFS_LI_FAILED set/clear is protected by xa_lock, caller this > > >>> * function already have it acquired > > >>> */ > > >>> - for (; lip; lip = next) { > > >>> - next = lip->li_bio_list; > > >>> + list_for_each_entry(lip, &bp->b_li_list, li_bio_list) > > >>> xfs_clear_li_failed(lip); > > >>> - } > > >>> > > >>> /* Add this buffer back to the delayed write list */ > > >>> return xfs_buf_delwri_queue(bp, buffer_list); > > >>> diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h > > >>> index 0febfbbf6ba9..643f53dcfe51 100644 > > >>> --- a/fs/xfs/xfs_buf_item.h > > >>> +++ b/fs/xfs/xfs_buf_item.h > > >>> @@ -71,7 +71,6 @@ void xfs_buf_attach_iodone(struct xfs_buf *, > > >>> void xfs_buf_iodone_callbacks(struct xfs_buf *); > > >>> void xfs_buf_iodone(struct xfs_buf *, struct xfs_log_item *); > > >>> bool xfs_buf_resubmit_failed_buffers(struct xfs_buf *, > > >>> - struct xfs_log_item *, > > >>> struct list_head *); > > >>> > > >>> extern kmem_zone_t *xfs_buf_item_zone; > > >>> diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c > > >>> index 664dea105e76..4a4539a4fad5 100644 > > >>> --- a/fs/xfs/xfs_dquot_item.c > > >>> +++ b/fs/xfs/xfs_dquot_item.c > > >>> @@ -179,7 +179,7 @@ xfs_qm_dquot_logitem_push( > > >>> if (!xfs_buf_trylock(bp)) > > >>> return XFS_ITEM_LOCKED; > > >>> > > >>> - if (!xfs_buf_resubmit_failed_buffers(bp, lip, buffer_list)) > > >>> + if (!xfs_buf_resubmit_failed_buffers(bp, buffer_list)) > > >>> rval = XFS_ITEM_FLUSHING; > > >>> > > >>> xfs_buf_unlock(bp); > > >>> diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c > > >>> index 0a4c2e48402f..8e26d3121be6 100644 > > >>> --- a/fs/xfs/xfs_inode.c > > >>> +++ b/fs/xfs/xfs_inode.c > > >>> @@ -2214,7 +2214,7 @@ xfs_ifree_cluster( > > >>> struct xfs_buf *bp; > > >>> xfs_inode_t *ip; > > >>> xfs_inode_log_item_t *iip; > > >>> - xfs_log_item_t *lip; > > >>> + struct xfs_log_item *lip; > > >>> struct xfs_perag *pag; > > >>> xfs_ino_t inum; > > >>> > > >>> @@ -2272,8 +2272,7 @@ xfs_ifree_cluster( > > >>> * stale first, we will not attempt to lock them in the loop > > >>> * below as the XFS_ISTALE flag will be set. > > >>> */ > > >>> - lip = bp->b_li_list; > > >>> - while (lip) { > > >>> + list_for_each_entry(lip, &bp->b_li_list, li_bio_list) { > > >>> if (lip->li_type == XFS_LI_INODE) { > > >>> iip = (xfs_inode_log_item_t *)lip; > > >>> ASSERT(iip->ili_logged == 1); > > >>> @@ -2283,7 +2282,6 @@ xfs_ifree_cluster( > > >>> &iip->ili_item.li_lsn); > > >>> xfs_iflags_set(iip->ili_inode, XFS_ISTALE); > > >>> } > > >>> - lip = lip->li_bio_list; > > >>> } > > >>> > > >>> > > >>> @@ -3649,7 +3647,7 @@ xfs_iflush_int( > > >>> /* generate the checksum. */ > > >>> xfs_dinode_calc_crc(mp, dip); > > >>> > > >>> - ASSERT(bp->b_li_list != NULL); > > >>> + ASSERT(!list_empty(&bp->b_li_list)); > > >>> ASSERT(bp->b_iodone != NULL); > > >>> return 0; > > >>> > > >>> diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c > > >>> index 993736032b4b..ddfc2c80af5e 100644 > > >>> --- a/fs/xfs/xfs_inode_item.c > > >>> +++ b/fs/xfs/xfs_inode_item.c > > >>> @@ -521,7 +521,7 @@ xfs_inode_item_push( > > >>> if (!xfs_buf_trylock(bp)) > > >>> return XFS_ITEM_LOCKED; > > >>> > > >>> - if (!xfs_buf_resubmit_failed_buffers(bp, lip, buffer_list)) > > >>> + if (!xfs_buf_resubmit_failed_buffers(bp, buffer_list)) > > >>> rval = XFS_ITEM_FLUSHING; > > >>> > > >>> xfs_buf_unlock(bp); > > >>> @@ -712,37 +712,23 @@ xfs_iflush_done( > > >>> struct xfs_log_item *lip) > > >>> { > > >>> struct xfs_inode_log_item *iip; > > >>> - struct xfs_log_item *blip; > > >>> - struct xfs_log_item *next; > > >>> - struct xfs_log_item *prev; > > >>> + struct xfs_log_item *blip, *n; > > >>> struct xfs_ail *ailp = lip->li_ailp; > > >>> int need_ail = 0; > > >>> + LIST_HEAD(tmp); > > >>> > > >>> /* > > >>> * Scan the buffer IO completions for other inodes being completed and > > >>> * attach them to the current inode log item. > > >>> */ > > >>> - blip = bp->b_li_list; > > >>> - prev = NULL; > > >>> - while (blip != NULL) { > > >>> - if (blip->li_cb != xfs_iflush_done) { > > >>> - prev = blip; > > >>> - blip = blip->li_bio_list; > > >>> - continue; > > >>> - } > > >>> > > >>> - /* remove from list */ > > >>> - next = blip->li_bio_list; > > >>> - if (!prev) { > > >>> - bp->b_li_list = next; > > >>> - } else { > > >>> - prev->li_bio_list = next; > > >>> - } > > >>> + list_add_tail(&lip->li_bio_list, &tmp); > > >>> > > >>> - /* add to current list */ > > >>> - blip->li_bio_list = lip->li_bio_list; > > >>> - lip->li_bio_list = blip; > > >>> + list_for_each_entry_safe(blip, n, &bp->b_li_list, li_bio_list) { > > >>> + if (lip->li_cb != xfs_iflush_done) > > >>> + continue; > > >>> > > >>> + list_move_tail(&blip->li_bio_list, &tmp); > > >>> /* > > >>> * while we have the item, do the unlocked check for needing > > >>> * the AIL lock. > > >>> @@ -751,8 +737,6 @@ xfs_iflush_done( > > >>> if ((iip->ili_logged && blip->li_lsn == iip->ili_flush_lsn) || > > >>> (blip->li_flags & XFS_LI_FAILED)) > > >>> need_ail++; > > >>> - > > >>> - blip = next; > > >>> } > > >>> > > >>> /* make sure we capture the state of the initial inode. */ > > >>> @@ -775,7 +759,7 @@ xfs_iflush_done( > > >>> > > >>> /* this is an opencoded batch version of xfs_trans_ail_delete */ > > >>> spin_lock(&ailp->xa_lock); > > >>> - for (blip = lip; blip; blip = blip->li_bio_list) { > > >>> + list_for_each_entry(blip, &tmp, li_bio_list) { > > >>> if (INODE_ITEM(blip)->ili_logged && > > >>> blip->li_lsn == INODE_ITEM(blip)->ili_flush_lsn) > > >>> mlip_changed |= xfs_ail_delete_one(ailp, blip); > > >>> @@ -801,15 +785,14 @@ xfs_iflush_done( > > >>> * ili_last_fields bits now that we know that the data corresponding to > > >>> * them is safely on disk. > > >>> */ > > >>> - for (blip = lip; blip; blip = next) { > > >>> - next = blip->li_bio_list; > > >>> - blip->li_bio_list = NULL; > > >>> - > > >>> + list_for_each_entry_safe(blip, n, &tmp, li_bio_list) { > > >>> + list_del_init(&blip->li_bio_list); > > >>> iip = INODE_ITEM(blip); > > >>> iip->ili_logged = 0; > > >>> iip->ili_last_fields = 0; > > >>> xfs_ifunlock(iip->ili_inode); > > >>> } > > >>> + list_del(&tmp); > > >>> } > > >>> > > >>> /* > > >>> diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c > > >>> index 861c84e1f674..1b5082aeb538 100644 > > >>> --- a/fs/xfs/xfs_log.c > > >>> +++ b/fs/xfs/xfs_log.c > > >>> @@ -1047,6 +1047,7 @@ xfs_log_item_init( > > >>> > > >>> INIT_LIST_HEAD(&item->li_ail); > > >>> INIT_LIST_HEAD(&item->li_cil); > > >>> + INIT_LIST_HEAD(&item->li_bio_list); > > >>> } > > >>> > > >>> /* > > >>> diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h > > >>> index 571fe499a48f..950cb9b4e36e 100644 > > >>> --- a/fs/xfs/xfs_trans.h > > >>> +++ b/fs/xfs/xfs_trans.h > > >>> @@ -50,7 +50,7 @@ typedef struct xfs_log_item { > > >>> uint li_type; /* item type */ > > >>> uint li_flags; /* misc flags */ > > >>> struct xfs_buf *li_buf; /* real buffer pointer */ > > >>> - struct xfs_log_item *li_bio_list; /* buffer item list */ > > >>> + struct list_head li_bio_list; /* buffer item list */ > > >>> void (*li_cb)(struct xfs_buf *, > > >>> struct xfs_log_item *); > > >>> /* buffer item iodone */ > > >>> > > > > > -- > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in > > the body of a message to majordomo@xxxxxxxxxxxxxxx > > More majordomo info at http://vger.kernel.org/majordomo-info.html > -- > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at http://vger.kernel.org/majordomo-info.html -- Carlos -- To unsubscribe from this list: send the line "unsubscribe linux-xfs" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html