In order to make informed placement and reclaim decisions, the page cache allocation requires the shadow information of refaulting pages. Every site that does a find_or_create()-style page cache allocation is converted to pass the shadow page found in the faulting slot of the radix tree to page_cache_alloc(), where it can be used in subsequent patches to influence reclaim behavior. Signed-off-by: Johannes Weiner <hannes@xxxxxxxxxxx> --- drivers/staging/lustre/lustre/llite/dir.c | 2 +- fs/btrfs/compression.c | 2 +- fs/cachefiles/rdwr.c | 13 +++++---- fs/ceph/xattr.c | 2 +- fs/logfs/readwrite.c | 6 ++-- fs/ntfs/file.c | 7 +++-- fs/splice.c | 6 ++-- include/linux/pagemap.h | 20 ++++++++------ mm/filemap.c | 46 +++++++++++++++++-------------- mm/readahead.c | 2 +- net/ceph/pagelist.c | 4 +-- net/ceph/pagevec.c | 2 +- 12 files changed, 61 insertions(+), 51 deletions(-) diff --git a/drivers/staging/lustre/lustre/llite/dir.c b/drivers/staging/lustre/lustre/llite/dir.c index 2ca8c45..ac63e4d 100644 --- a/drivers/staging/lustre/lustre/llite/dir.c +++ b/drivers/staging/lustre/lustre/llite/dir.c @@ -172,7 +172,7 @@ static int ll_dir_filler(void *_hash, struct page *page0) max_pages = 1; } for (npages = 1; npages < max_pages; npages++) { - page = page_cache_alloc_cold(inode->i_mapping); + page = page_cache_alloc_cold(inode->i_mapping, NULL); if (!page) break; page_pool[npages] = page; diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 5ce2c0f..f23bb17 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -483,7 +483,7 @@ static noinline int add_ra_bio_pages(struct inode *inode, } page = __page_cache_alloc(mapping_gfp_mask(mapping) & - ~__GFP_FS); + ~__GFP_FS, page); if (!page) break; diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index ebaff36..1b34a42 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -254,13 +254,13 @@ static int cachefiles_read_backing_file_one(struct cachefiles_object *object, newpage = NULL; for (;;) { - backpage = find_get_page(bmapping, netpage->index); - if (backpage) + backpage = __find_get_page(bmapping, netpage->index); + if (backpage && !radix_tree_exceptional_entry(backpage)) goto backing_page_already_present; if (!newpage) { newpage = __page_cache_alloc(cachefiles_gfp | - __GFP_COLD); + __GFP_COLD, backpage); if (!newpage) goto nomem_monitor; } @@ -499,13 +499,14 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object, } for (;;) { - backpage = find_get_page(bmapping, netpage->index); - if (backpage) + backpage = __find_get_page(bmapping, netpage->index); + if (backpage && !radix_tree_exceptional_entry(backpage)) goto backing_page_already_present; if (!newpage) { newpage = __page_cache_alloc(cachefiles_gfp | - __GFP_COLD); + __GFP_COLD, + backpage); if (!newpage) goto nomem; } diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index be661d8..a5d2b86 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -816,7 +816,7 @@ static int ceph_sync_setxattr(struct dentry *dentry, const char *name, return -ENOMEM; err = -ENOMEM; for (i = 0; i < nr_pages; i++) { - pages[i] = __page_cache_alloc(GFP_NOFS); + pages[i] = __page_cache_alloc(GFP_NOFS, NULL); if (!pages[i]) { nr_pages = i; goto out; diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c index 9a59cba..67c669a 100644 --- a/fs/logfs/readwrite.c +++ b/fs/logfs/readwrite.c @@ -316,9 +316,9 @@ static struct page *logfs_get_write_page(struct inode *inode, u64 bix, int err; repeat: - page = find_get_page(mapping, index); - if (!page) { - page = __page_cache_alloc(GFP_NOFS); + page = __find_get_page(mapping, index); + if (!page || radix_tree_exceptional_entry(page)) { + page = __page_cache_alloc(GFP_NOFS, page); if (!page) return NULL; err = add_to_page_cache_lru(page, mapping, index, GFP_NOFS); diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index c5670b8..7aee2d1 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -413,10 +413,11 @@ static inline int __ntfs_grab_cache_pages(struct address_space *mapping, BUG_ON(!nr_pages); err = nr = 0; do { - pages[nr] = find_lock_page(mapping, index); - if (!pages[nr]) { + pages[nr] = __find_lock_page(mapping, index); + if (!pages[nr] || radix_tree_exceptional_entry(pages[nr])) { if (!*cached_page) { - *cached_page = page_cache_alloc(mapping); + *cached_page = page_cache_alloc(mapping, + pages[nr]); if (unlikely(!*cached_page)) { err = -ENOMEM; goto err_out; diff --git a/fs/splice.c b/fs/splice.c index 3b7ee65..edc54ae 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -353,12 +353,12 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, * Page could be there, find_get_pages_contig() breaks on * the first hole. */ - page = find_get_page(mapping, index); - if (!page) { + page = __find_get_page(mapping, index); + if (!page || radix_tree_exceptional_entry(page)) { /* * page didn't exist, allocate one. */ - page = page_cache_alloc_cold(mapping); + page = page_cache_alloc_cold(mapping, page); if (!page) break; diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index db3a78b..4b24236 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -228,28 +228,32 @@ static inline void page_unfreeze_refs(struct page *page, int count) } #ifdef CONFIG_NUMA -extern struct page *__page_cache_alloc(gfp_t gfp); +extern struct page *__page_cache_alloc(gfp_t gfp, struct page *shadow); #else -static inline struct page *__page_cache_alloc(gfp_t gfp) +static inline struct page *__page_cache_alloc(gfp_t gfp, struct page *shadow) { return alloc_pages(gfp, 0); } #endif -static inline struct page *page_cache_alloc(struct address_space *x) +static inline struct page *page_cache_alloc(struct address_space *x, + struct page *shadow) { - return __page_cache_alloc(mapping_gfp_mask(x)); + return __page_cache_alloc(mapping_gfp_mask(x), shadow); } -static inline struct page *page_cache_alloc_cold(struct address_space *x) +static inline struct page *page_cache_alloc_cold(struct address_space *x, + struct page *shadow) { - return __page_cache_alloc(mapping_gfp_mask(x)|__GFP_COLD); + return __page_cache_alloc(mapping_gfp_mask(x)|__GFP_COLD, shadow); } -static inline struct page *page_cache_alloc_readahead(struct address_space *x) +static inline struct page *page_cache_alloc_readahead(struct address_space *x, + struct page *shadow) { return __page_cache_alloc(mapping_gfp_mask(x) | - __GFP_COLD | __GFP_NORETRY | __GFP_NOWARN); + __GFP_COLD | __GFP_NORETRY | __GFP_NOWARN, + shadow); } typedef int filler_t(void *, struct page *); diff --git a/mm/filemap.c b/mm/filemap.c index 34b2f0b..d3e5578 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -538,7 +538,7 @@ int add_to_page_cache_lru(struct page *page, struct address_space *mapping, EXPORT_SYMBOL_GPL(add_to_page_cache_lru); #ifdef CONFIG_NUMA -struct page *__page_cache_alloc(gfp_t gfp) +struct page *__page_cache_alloc(gfp_t gfp, struct page *shadow) { int n; struct page *page; @@ -917,9 +917,9 @@ struct page *find_or_create_page(struct address_space *mapping, struct page *page; int err; repeat: - page = find_lock_page(mapping, index); - if (!page) { - page = __page_cache_alloc(gfp_mask); + page = __find_lock_page(mapping, index); + if (!page || radix_tree_exceptional_entry(page)) { + page = __page_cache_alloc(gfp_mask, page); if (!page) return NULL; /* @@ -1222,15 +1222,16 @@ EXPORT_SYMBOL(find_get_pages_tag); struct page * grab_cache_page_nowait(struct address_space *mapping, pgoff_t index) { - struct page *page = find_get_page(mapping, index); + struct page *page = __find_get_page(mapping, index); - if (page) { + if (page && !radix_tree_exceptional_entry(page)) { if (trylock_page(page)) return page; page_cache_release(page); return NULL; } - page = __page_cache_alloc(mapping_gfp_mask(mapping) & ~__GFP_FS); + page = __page_cache_alloc(mapping_gfp_mask(mapping) & ~__GFP_FS, + page); if (page && add_to_page_cache_lru(page, mapping, index, GFP_NOFS)) { page_cache_release(page); page = NULL; @@ -1304,8 +1305,9 @@ find_page: page_cache_sync_readahead(mapping, ra, filp, index, last_index - index); - page = find_get_page(mapping, index); - if (unlikely(page == NULL)) + page = __find_get_page(mapping, index); + if (unlikely(page == NULL || + radix_tree_exceptional_entry(page))) goto no_cached_page; } if (PageReadahead(page)) { @@ -1464,7 +1466,7 @@ no_cached_page: * Ok, it wasn't cached, so we need to create a new * page.. */ - page = page_cache_alloc_cold(mapping); + page = page_cache_alloc_cold(mapping, page); if (!page) { desc->error = -ENOMEM; goto out; @@ -1673,18 +1675,20 @@ EXPORT_SYMBOL(generic_file_aio_read); * page_cache_read - adds requested page to the page cache if not already there * @file: file to read * @offset: page index + * @shadow: shadow page of the page to be added * * This adds the requested page to the page cache if it isn't already there, * and schedules an I/O to read in its contents from disk. */ -static int page_cache_read(struct file *file, pgoff_t offset) +static int page_cache_read(struct file *file, pgoff_t offset, + struct page *shadow) { struct address_space *mapping = file->f_mapping; struct page *page; int ret; do { - page = page_cache_alloc_cold(mapping); + page = page_cache_alloc_cold(mapping, shadow); if (!page) return -ENOMEM; @@ -1815,8 +1819,8 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT); ret = VM_FAULT_MAJOR; retry_find: - page = find_get_page(mapping, offset); - if (!page) + page = __find_get_page(mapping, offset); + if (!page || radix_tree_exceptional_entry(page)) goto no_cached_page; } @@ -1859,7 +1863,7 @@ no_cached_page: * We're only likely to ever get here if MADV_RANDOM is in * effect. */ - error = page_cache_read(file, offset); + error = page_cache_read(file, offset, page); /* * The page we want has now been added to the page cache. @@ -1981,9 +1985,9 @@ static struct page *__read_cache_page(struct address_space *mapping, struct page *page; int err; repeat: - page = find_get_page(mapping, index); - if (!page) { - page = __page_cache_alloc(gfp | __GFP_COLD); + page = __find_get_page(mapping, index); + if (!page || radix_tree_exceptional_entry(page)) { + page = __page_cache_alloc(gfp | __GFP_COLD, page); if (!page) return ERR_PTR(-ENOMEM); err = add_to_page_cache_lru(page, mapping, index, gfp); @@ -2454,11 +2458,11 @@ struct page *grab_cache_page_write_begin(struct address_space *mapping, if (flags & AOP_FLAG_NOFS) gfp_notmask = __GFP_FS; repeat: - page = find_lock_page(mapping, index); - if (page) + page = __find_lock_page(mapping, index); + if (page && !radix_tree_exceptional_entry(page)) goto found; - page = __page_cache_alloc(gfp_mask & ~gfp_notmask); + page = __page_cache_alloc(gfp_mask & ~gfp_notmask, page); if (!page) return NULL; status = add_to_page_cache_lru(page, mapping, index, diff --git a/mm/readahead.c b/mm/readahead.c index 0f85996..58142ef 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -182,7 +182,7 @@ __do_page_cache_readahead(struct address_space *mapping, struct file *filp, if (page && !radix_tree_exceptional_entry(page)) continue; - page = page_cache_alloc_readahead(mapping); + page = page_cache_alloc_readahead(mapping, page); if (!page) break; page->index = page_offset; diff --git a/net/ceph/pagelist.c b/net/ceph/pagelist.c index 92866be..83fb56e 100644 --- a/net/ceph/pagelist.c +++ b/net/ceph/pagelist.c @@ -32,7 +32,7 @@ static int ceph_pagelist_addpage(struct ceph_pagelist *pl) struct page *page; if (!pl->num_pages_free) { - page = __page_cache_alloc(GFP_NOFS); + page = __page_cache_alloc(GFP_NOFS, NULL); } else { page = list_first_entry(&pl->free_list, struct page, lru); list_del(&page->lru); @@ -83,7 +83,7 @@ int ceph_pagelist_reserve(struct ceph_pagelist *pl, size_t space) space = (space + PAGE_SIZE - 1) >> PAGE_SHIFT; /* conv to num pages */ while (space > pl->num_pages_free) { - struct page *page = __page_cache_alloc(GFP_NOFS); + struct page *page = __page_cache_alloc(GFP_NOFS, NULL); if (!page) return -ENOMEM; list_add_tail(&page->lru, &pl->free_list); diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c index 815a224..ff76422 100644 --- a/net/ceph/pagevec.c +++ b/net/ceph/pagevec.c @@ -79,7 +79,7 @@ struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags) if (!pages) return ERR_PTR(-ENOMEM); for (i = 0; i < num_pages; i++) { - pages[i] = __page_cache_alloc(flags); + pages[i] = __page_cache_alloc(flags, NULL); if (pages[i] == NULL) { ceph_release_page_vector(pages, i); return ERR_PTR(-ENOMEM); -- 1.8.3.2 -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html