On Thu, Oct 31, 2024 at 10:04:47AM -0400, Brian Foster wrote: > In preparation for special handling of subranges, lift the zeroed > mapping logic from the iterator into the caller. Since this puts the > pagecache dirty check and flushing in the same place, streamline the > comments a bit as well. > > Signed-off-by: Brian Foster <bfoster@xxxxxxxxxx> > --- > fs/iomap/buffered-io.c | 63 ++++++++++++++---------------------------- > 1 file changed, 21 insertions(+), 42 deletions(-) > > diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c > index aa587b2142e2..60386cb7b9ef 100644 > --- a/fs/iomap/buffered-io.c > +++ b/fs/iomap/buffered-io.c > @@ -1365,40 +1365,12 @@ static inline int iomap_zero_iter_flush_and_stale(struct iomap_iter *i) > return filemap_write_and_wait_range(mapping, i->pos, end); > } > > -static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero, > - bool *range_dirty) > +static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero) > { > - const struct iomap *srcmap = iomap_iter_srcmap(iter); > loff_t pos = iter->pos; > loff_t length = iomap_length(iter); > loff_t written = 0; > > - /* > - * We must zero subranges of unwritten mappings that might be dirty in > - * pagecache from previous writes. We only know whether the entire range > - * was clean or not, however, and dirty folios may have been written > - * back or reclaimed at any point after mapping lookup. > - * > - * The easiest way to deal with this is to flush pagecache to trigger > - * any pending unwritten conversions and then grab the updated extents > - * from the fs. The flush may change the current mapping, so mark it > - * stale for the iterator to remap it for the next pass to handle > - * properly. > - * > - * Note that holes are treated the same as unwritten because zero range > - * is (ab)used for partial folio zeroing in some cases. Hole backed > - * post-eof ranges can be dirtied via mapped write and the flush > - * triggers writeback time post-eof zeroing. > - */ > - if (srcmap->type == IOMAP_HOLE || srcmap->type == IOMAP_UNWRITTEN) { > - if (*range_dirty) { > - *range_dirty = false; > - return iomap_zero_iter_flush_and_stale(iter); > - } > - /* range is clean and already zeroed, nothing to do */ > - return length; > - } > - > do { > struct folio *folio; > int status; > @@ -1448,24 +1420,31 @@ iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero, > bool range_dirty; > > /* > - * Zero range wants to skip pre-zeroed (i.e. unwritten) mappings, but > - * pagecache must be flushed to ensure stale data from previous > - * buffered writes is not exposed. A flush is only required for certain > - * types of mappings, but checking pagecache after mapping lookup is > - * racy with writeback and reclaim. > + * Zero range can skip mappings that are zero on disk so long as > + * pagecache is clean. If pagecache was dirty prior to zero range, the > + * mapping converts on writeback completion and must be zeroed. > * > - * Therefore, check the entire range first and pass along whether any > - * part of it is dirty. If so and an underlying mapping warrants it, > - * flush the cache at that point. This trades off the occasional false > - * positive (and spurious flush, if the dirty data and mapping don't > - * happen to overlap) for simplicity in handling a relatively uncommon > - * situation. > + * The simplest way to deal with this is to flush pagecache and process > + * the updated mappings. To avoid an unconditional flush, check dirty > + * state and defer the flush until a combination of dirty pagecache and > + * at least one mapping that might convert on writeback is seen. > */ > range_dirty = filemap_range_needs_writeback(inode->i_mapping, > pos, pos + len - 1); > + while ((ret = iomap_iter(&iter, ops)) > 0) { > + const struct iomap *s = iomap_iter_srcmap(&iter); Needs a blank line after the declaration, but other than picking nits this looks ok to me. --D > + if (s->type == IOMAP_HOLE || s->type == IOMAP_UNWRITTEN) { > + loff_t p = iomap_length(&iter); > + if (range_dirty) { > + range_dirty = false; > + p = iomap_zero_iter_flush_and_stale(&iter); > + } > + iter.processed = p; > + continue; > + } > > - while ((ret = iomap_iter(&iter, ops)) > 0) > - iter.processed = iomap_zero_iter(&iter, did_zero, &range_dirty); > + iter.processed = iomap_zero_iter(&iter, did_zero); > + } > return ret; > } > EXPORT_SYMBOL_GPL(iomap_zero_range); > -- > 2.46.2 > >