Hi, Christoph. On 2023/12/7 15:27, Christoph Hellwig wrote: > The ->map_blocks interface returns a valid range for writeback, but we > still call back into it for every block, which is a bit inefficient. > > Change iomap_writepage_map to use the valid range in the map until the > end of the folio or the dirty range inside the folio instead of calling > back into every block. > > Note that the range is not used over folio boundaries as we need to be > able to check the mapping sequence count under the folio lock. > > Signed-off-by: Christoph Hellwig <hch@xxxxxx> > --- > fs/iomap/buffered-io.c | 116 ++++++++++++++++++++++++++++------------- > include/linux/iomap.h | 7 +++ > 2 files changed, 88 insertions(+), 35 deletions(-) > [..] > @@ -1738,29 +1775,41 @@ static int iomap_add_to_ioend(struct iomap_writepage_ctx *wpc, > > static int iomap_writepage_map_blocks(struct iomap_writepage_ctx *wpc, > struct writeback_control *wbc, struct folio *folio, > - struct inode *inode, u64 pos, unsigned *count) > + struct inode *inode, u64 pos, unsigned dirty_len, > + unsigned *count) > { > int error; > > - error = wpc->ops->map_blocks(wpc, inode, pos); > - if (error) > - goto fail; > - trace_iomap_writepage_map(inode, &wpc->iomap); > - > - switch (wpc->iomap.type) { > - case IOMAP_INLINE: > - WARN_ON_ONCE(1); > - error = -EIO; > - break; > - case IOMAP_HOLE: > - break; > - default: > - error = iomap_add_to_ioend(wpc, wbc, folio, inode, pos); > - if (!error) > - (*count)++; > - } > + do { > + unsigned map_len; > + > + error = wpc->ops->map_blocks(wpc, inode, pos); > + if (error) > + break; > + trace_iomap_writepage_map(inode, &wpc->iomap); > + > + map_len = min_t(u64, dirty_len, > + wpc->iomap.offset + wpc->iomap.length - pos); > + WARN_ON_ONCE(!folio->private && map_len < dirty_len); While I was debugging this series on ext4, I would suggest try to add map_len or dirty_len into this trace point could be more convenient. > + > + switch (wpc->iomap.type) { > + case IOMAP_INLINE: > + WARN_ON_ONCE(1); > + error = -EIO; > + break; > + case IOMAP_HOLE: > + break; BTW, I want to ask an unrelated question of this patch series. Do you agree with me to add a IOMAP_DELAYED case and re-dirty folio here? The background is that on ext4, jbd2 thread call ext4_normal_submit_inode_data_buffers() submit data blocks in data=ordered mode, but it can only submit mapped blocks, now we skip unmapped blocks and re-dirty folios in ext4_do_writepages()->mpage_prepare_extent_to_map()->..->ext4_bio_write_folio(). So we have to inherit this logic when convert to iomap, I suppose ext4's ->map_blocks() return IOMAP_DELALLOC for this case, and iomap do something like: + case IOMAP_DELALLOC: + iomap_set_range_dirty(folio, offset_in_folio(folio, pos), + map_len); + folio_redirty_for_writepage(wbc, folio); + break; Thanks, Yi. > + default: > + error = iomap_add_to_ioend(wpc, wbc, folio, inode, pos, > + map_len); > + if (!error) > + (*count)++; > + break; > + } > + dirty_len -= map_len; > + pos += map_len; > + } while (dirty_len && !error); > > -fail: > /* > * We cannot cancel the ioend directly here on error. We may have > * already set other pages under writeback and hence we have to run I/O > @@ -1827,7 +1876,7 @@ static bool iomap_writepage_handle_eof(struct folio *folio, struct inode *inode, > * beyond i_size. > */ > folio_zero_segment(folio, poff, folio_size(folio)); > - *end_pos = isize; > + *end_pos = round_up(isize, i_blocksize(inode)); > } > > return true;