> > index 49938419fcc7..9f791db473e4 100644 > > --- a/fs/iomap/buffered-io.c > > +++ b/fs/iomap/buffered-io.c > > @@ -1990,6 +1990,12 @@ EXPORT_SYMBOL_GPL(iomap_writepages); > > static int __init iomap_init(void) > > { > > + int ret; > > + > > + ret = iomap_dio_init(); > > + if (ret) > > + return ret; > > + > > return bioset_init(&iomap_ioend_bioset, 4 * (PAGE_SIZE / SECTOR_SIZE), > > offsetof(struct iomap_ioend, io_bio), > > BIOSET_NEED_BVECS); > > I suppose that it does not matter that zero_fs_block is leaked if this fails > (or is it even leaked?), as I don't think that failing that bioset_init() > call is handled at all. If bioset_init fails, then we have even more problems than just a leaked 64k memory? ;) Do you have something like this in mind? diff --git a/fs/internal.h b/fs/internal.h index 30217f0ff4c6..def96c7ed9ea 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -39,6 +39,7 @@ int __block_write_begin_int(struct folio *folio, loff_t pos, unsigned len, * iomap/direct-io.c */ int iomap_dio_init(void); +void iomap_dio_exit(void); /* * char_dev.c diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 9f791db473e4..8d8b9e62201f 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -1994,10 +1994,16 @@ static int __init iomap_init(void) ret = iomap_dio_init(); if (ret) - return ret; + goto out; - return bioset_init(&iomap_ioend_bioset, 4 * (PAGE_SIZE / SECTOR_SIZE), + ret = bioset_init(&iomap_ioend_bioset, 4 * (PAGE_SIZE / SECTOR_SIZE), offsetof(struct iomap_ioend, io_bio), BIOSET_NEED_BVECS); + if (!ret) + goto out; + + iomap_dio_exit(); +out: + return ret; } fs_initcall(iomap_init); diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c index b95600b254a3..f4c9445ca50d 100644 --- a/fs/iomap/direct-io.c +++ b/fs/iomap/direct-io.c @@ -69,6 +69,12 @@ int iomap_dio_init(void) return 0; } +void iomap_dio_exit(void) +{ + __free_pages(zero_fs_block, ZERO_FSB_ORDER); + +} + static struct bio *iomap_dio_alloc_bio(const struct iomap_iter *iter, struct iomap_dio *dio, unsigned short nr_vecs, blk_opf_t opf) { > > > + > > static struct bio *iomap_dio_alloc_bio(const struct iomap_iter *iter, > > struct iomap_dio *dio, unsigned short nr_vecs, blk_opf_t opf) > > { > > @@ -236,17 +253,22 @@ static void iomap_dio_zero(const struct iomap_iter *iter, struct iomap_dio *dio, > > loff_t pos, unsigned len) > > { > > struct inode *inode = file_inode(dio->iocb->ki_filp); > > - struct page *page = ZERO_PAGE(0); > > struct bio *bio; > > + /* > > + * Max block size supported is 64k > > + */ > > + WARN_ON_ONCE(len > ZERO_FSB_SIZE); > > JFYI, As mentioned in https://lore.kernel.org/linux-xfs/20240429174746.2132161-1-john.g.garry@xxxxxxxxxx/T/#m5354e2b2531a5552a8b8acd4a95342ed4d7500f2, > we would like to support an arbitrary size. Maybe I will need to loop for > zeroing sizes > 64K. The initial patches were looping with a ZERO_PAGE(0), but the initial feedback was to use a huge zero page. But when I discussed that at LSF, the people thought we will be using a lot of memory for sub-block memory, especially on architectures with 64k base page size. So for now a good tradeoff between memory usage and efficiency was to use a 64k buffer as that is the maximum FSB we support.[1] IIUC, you will be using this function also to zero out the extent and not just a FSB? I think we could resort to looping until we have a way to request arbitrary zero folios without having to allocate at it in iomap_dio_alloc_bio() for every IO. [1] https://lore.kernel.org/linux-xfs/20240529134509.120826-8-kernel@xxxxxxxxxxxxxxxx/ -- Pankaj