If FAULT_FLAG_AROUND is set filemap_fault() will use find_get_pages() for batched pages lookup. Pages returned by find_get_pages() will be handled differently: page with index vmf->pgoff will take normal filemap_fault() code path. For all other pages we will not attempt retry locking or wait page to be up-to-date, just give up and go to the next page. I'm not sure how we should deal with readahead() here. For now I just call do_async_mmap_readahead(). It probably breaks readahead heuristics: interleaving access looks as sequential. Signed-off-by: Kirill A. Shutemov <kirill.shutemov@xxxxxxxxxxxxxxx> --- mm/filemap.c | 77 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 73 insertions(+), 4 deletions(-) diff --git a/mm/filemap.c b/mm/filemap.c index d56d3c145b9f..4d00fc0094f6 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1593,6 +1593,64 @@ static void do_async_mmap_readahead(struct vm_area_struct *vma, page, offset, ra->ra_pages); } +static struct page *lock_secondary_pages(struct vm_area_struct *vma, + struct vm_fault *vmf) +{ + struct file *file = vma->vm_file; + struct address_space *mapping = file->f_mapping; + struct page *primary_page = NULL, **pages = vmf->pages; + pgoff_t size; + int i; + + for (i = 0; i < vmf->nr_pages; i++) { + if (!pages[i]) + continue; + if (pages[i]->index == vmf->pgoff) { + primary_page = pages[i]; + pages[i] = NULL; + continue; + } + if (pages[i]->index > vmf->max) + goto put; + do_async_mmap_readahead(vma, &file->f_ra, file, + pages[i], pages[i]->index); + if (!trylock_page(pages[i])) + goto put; + /* Truncated? */ + if (unlikely(pages[i]->mapping != mapping)) + goto unlock; + if (unlikely(!PageUptodate(pages[i]))) + goto unlock; + size = (i_size_read(mapping->host) + PAGE_CACHE_SIZE - 1) + >> PAGE_CACHE_SHIFT; + if (unlikely(pages[i]->index >= size)) + goto unlock; + continue; +unlock: + unlock_page(pages[i]); +put: + put_page(pages[i]); + pages[i] = NULL; + } + + return primary_page; +} + +static void unlock_and_put_secondary_pages(struct vm_fault *vmf) +{ + int i; + + if (!(vmf->flags & FAULT_FLAG_AROUND)) + return; + for (i = 0; i < vmf->nr_pages; i++) { + if (!vmf->pages[i]) + continue; + unlock_page(vmf->pages[i]); + page_cache_release(vmf->pages[i]); + vmf->pages[i] = NULL; + } +} + /** * filemap_fault - read in file data for page fault handling * @vma: vma in which the fault was taken @@ -1624,7 +1682,15 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) /* * Do we have something in the page cache already? */ - page = find_get_page(mapping, offset); + if (vmf->flags & FAULT_FLAG_AROUND) { + ret = find_get_pages(mapping, vmf->min, vmf->nr_pages, + vmf->pages); + memset(vmf->pages + ret, 0, + sizeof(struct page *) * (vmf->nr_pages - ret)); + page = lock_secondary_pages(vma, vmf); + ret = VM_FAULT_AROUND; + } else + page = find_get_page(mapping, offset); if (likely(page) && !(vmf->flags & FAULT_FLAG_TRIED)) { /* * We found the page, so try async readahead before @@ -1636,7 +1702,7 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) do_sync_mmap_readahead(vma, ra, file, offset); count_vm_event(PGMAJFAULT); mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT); - ret = VM_FAULT_MAJOR; + ret |= VM_FAULT_MAJOR; retry_find: page = find_get_page(mapping, offset); if (!page) @@ -1644,12 +1710,14 @@ retry_find: } if (!lock_page_or_retry(page, vma->vm_mm, vmf->flags)) { + unlock_and_put_secondary_pages(vmf); page_cache_release(page); - return ret | VM_FAULT_RETRY; + return (ret & ~VM_FAULT_AROUND) | VM_FAULT_RETRY; } /* Did it get truncated? */ if (unlikely(page->mapping != mapping)) { + unlock_and_put_secondary_pages(vmf); unlock_page(page); put_page(page); goto retry_find; @@ -1691,7 +1759,7 @@ no_cached_page: */ if (error >= 0) goto retry_find; - + unlock_and_put_secondary_pages(vmf); /* * An error return from page_cache_read can result if the * system is low on memory, or a problem occurs while trying @@ -1719,6 +1787,7 @@ page_not_uptodate: if (!error || error == AOP_TRUNCATED_PAGE) goto retry_find; + unlock_and_put_secondary_pages(vmf); /* Things didn't work out. Return zero to tell the mm layer so. */ shrink_readahead_size_eio(file, ra); -- 1.8.5.2 -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>