On Tue, 10 Dec 2019 13:43:02 -0700 Jens Axboe <axboe@xxxxxxxxx> wrote: > If RWF_UNCACHED is set for io_uring (or pwritev2(2)), we'll drop the > cache instantiated for buffered writes. If new pages aren't > instantiated, we leave them alone. This provides similar semantics to > reads with RWF_UNCACHED set. > Wouid be nice to see a description of the proposed userspace API(s) for exploiting this feature. > --- a/include/linux/fs.h > +++ b/include/linux/fs.h > @@ -285,6 +285,7 @@ enum positive_aop_returns { > #define AOP_FLAG_NOFS 0x0002 /* used by filesystem to direct > * helper code (eg buffer layer) > * to clear GFP_FS from alloc */ > +#define AOP_FLAG_UNCACHED 0x0004 > > /* > * oh the beauties of C type declarations. > @@ -3106,6 +3107,10 @@ extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *); > extern ssize_t generic_perform_write(struct file *, struct iov_iter *, > struct kiocb *); > > +struct pagevec; > +extern void write_drop_cached_pages(struct pagevec *pvec, > + struct address_space *mapping); > + > ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos, > rwf_t flags); > ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos, > diff --git a/mm/filemap.c b/mm/filemap.c > index fe37bd2b2630..2e36129ebe38 100644 > --- a/mm/filemap.c > +++ b/mm/filemap.c > @@ -3287,10 +3287,12 @@ struct page *grab_cache_page_write_begin(struct address_space *mapping, > pgoff_t index, unsigned flags) > { > struct page *page; > - int fgp_flags = FGP_LOCK|FGP_WRITE|FGP_CREAT; > + int fgp_flags = FGP_LOCK|FGP_WRITE; > > if (flags & AOP_FLAG_NOFS) > fgp_flags |= FGP_NOFS; > + if (!(flags & AOP_FLAG_UNCACHED)) > + fgp_flags |= FGP_CREAT; > > page = pagecache_get_page(mapping, index, fgp_flags, > mapping_gfp_mask(mapping)); > @@ -3301,21 +3303,65 @@ struct page *grab_cache_page_write_begin(struct address_space *mapping, > } > EXPORT_SYMBOL(grab_cache_page_write_begin); > > +/* > + * Start writeback on the pages in pgs[], and then try and remove those pages > + * from the page cached. Used with RWF_UNCACHED. > + */ > +void write_drop_cached_pages(struct pagevec *pvec, > + struct address_space *mapping) > +{ > + loff_t start, end; > + int i; > + > + end = 0; > + start = LLONG_MAX; > + for (i = 0; i < pagevec_count(pvec); i++) { > + loff_t off = page_offset(pvec->pages[i]); > + if (off < start) > + start = off; > + if (off > end) > + end = off; > + } > + > + __filemap_fdatawrite_range(mapping, start, end, WB_SYNC_NONE); > + > + for (i = 0; i < pagevec_count(pvec); i++) { > + struct page *page = pvec->pages[i]; > + > + lock_page(page); > + if (page->mapping == mapping) { > + wait_on_page_writeback(page); > + if (!page_has_private(page) || > + try_to_release_page(page, 0)) > + remove_mapping(mapping, page); > + } > + unlock_page(page); > + } This is kinda invalidate_inode_pages2_range(), only much less so? Why doesn't this code need to do all the things which invalidate_inode_pages2_range() does? What happens if these pages are mmapped, faulted in? Not faulted in? > + pagevec_release(pvec); > +}