If (flags & FAULT_FLAG_AROUND) fault handler asks ->fault to fill ->pages array with ->nr_pages pages if they are ready to map. If a page is not ready to be map, no need to wait for it: skip to the next. It's okay to have some (or all) elements of the array set to NULL. Indexes of pages must be in range between ->min and ->max inclusive. Array must not contain page with index ->pgoff, in should be in ->pages. ->fault must set VM_FAULT_AROUND bit in return code, if it fills the array. Pages must be locked. Signed-off-by: Kirill A. Shutemov <kirill.shutemov@xxxxxxxxxxxxxxx> --- include/linux/mm.h | 24 +++++++++++++++++++++ mm/memory.c | 61 ++++++++++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 79 insertions(+), 6 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index f28f46eade6a..fe5629bc9e5b 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -191,6 +191,7 @@ extern pgprot_t protection_map[16]; #define FAULT_FLAG_KILLABLE 0x20 /* The fault task is in SIGKILL killable region */ #define FAULT_FLAG_TRIED 0x40 /* second try */ #define FAULT_FLAG_USER 0x80 /* The fault originated in userspace */ +#define FAULT_FLAG_AROUND 0x100 /* Try to get few pages a time */ /* * vm_fault is filled by the the pagefault handler and passed to the vma's @@ -210,6 +211,28 @@ struct vm_fault { * is set (which is also implied by * VM_FAULT_ERROR). */ + + /* + * If (flags & FAULT_FLAG_AROUND) fault handler asks ->fault to fill + * ->pages array with ->nr_pages pages if they are ready to map. + * + * If a page is not ready to be map, no need to wait for it: skip to + * the next. + * + * It's okay to have some (or all) elements of the array set to NULL. + * + * Indexes of pages must be in range between ->min and ->max inclusive. + * Array must not contain page with index ->pgoff, in should be in + * ->pages. + * + * ->fault must set VM_FAULT_AROUND bit in return code, if it fills the + * array. + * + * Pages must be locked. + */ + int nr_pages; + pgoff_t min, max; + struct page **pages; }; /* @@ -1004,6 +1027,7 @@ static inline int page_mapped(struct page *page) #define VM_FAULT_LOCKED 0x0200 /* ->fault locked the returned page */ #define VM_FAULT_RETRY 0x0400 /* ->fault blocked, must retry */ #define VM_FAULT_FALLBACK 0x0800 /* huge page fault failed, fall back to small */ +#define VM_FAULT_AROUND 0x1000 /* ->pages is filled */ #define VM_FAULT_HWPOISON_LARGE_MASK 0xf000 /* encodes hpage index for large hwpoison */ diff --git a/mm/memory.c b/mm/memory.c index 68c3dc141059..47ab9d6e1666 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3287,27 +3287,52 @@ oom: } static int __do_fault(struct vm_area_struct *vma, unsigned long address, - pgoff_t pgoff, unsigned int flags, struct page **page) + pgoff_t pgoff, unsigned int flags, struct page **page, + struct page **pages, int nr_pages) { struct vm_fault vmf; - int ret; + int i, ret; vmf.virtual_address = (void __user *)(address & PAGE_MASK); vmf.pgoff = pgoff; vmf.flags = flags; vmf.page = NULL; + if (flags & FAULT_FLAG_AROUND) { + vmf.pages = pages; + vmf.nr_pages = nr_pages; + + /* + * From page for address aligned down to FAULT_AROUND_PAGES + * baundary, to the end of page table. + */ + vmf.min = pgoff - ((address >> PAGE_SHIFT) & (nr_pages - 1)); + vmf.min = min(pgoff, vmf.min); /* underflow */ + vmf.max = pgoff + PTRS_PER_PTE - 1 - + ((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)); + /* Both should be inside the vma */ + vmf.min = max(vma->vm_pgoff, vmf.min); + vmf.max = min(vma_pages(vma) + vma->vm_pgoff - 1, vmf.max); + } + ret = vma->vm_ops->fault(vma, &vmf); if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) return ret; if (unlikely(PageHWPoison(vmf.page))) { + for (i = 0; (ret & VM_FAULT_AROUND) && i < nr_pages; i++) { + if (!pages[i]) + continue; + unlock_page(pages[i]); + page_cache_release(vmf.page); + } if (ret & VM_FAULT_LOCKED) unlock_page(vmf.page); page_cache_release(vmf.page); return VM_FAULT_HWPOISON; } + /* pages on ->nr_pages are always return locked */ if (unlikely(!(ret & VM_FAULT_LOCKED))) lock_page(vmf.page); else @@ -3341,16 +3366,21 @@ static void do_set_pte(struct vm_area_struct *vma, unsigned long address, update_mmu_cache(vma, address, pte); } +#define FAULT_AROUND_PAGES 32 static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, pmd_t *pmd, pgoff_t pgoff, unsigned int flags, pte_t orig_pte) { struct page *fault_page; + struct page *pages[FAULT_AROUND_PAGES]; spinlock_t *ptl; pte_t *pte; - int ret; + int i, ret; - ret = __do_fault(vma, address, pgoff, flags, &fault_page); + if (!(flags & FAULT_FLAG_NONLINEAR)) + flags |= FAULT_FLAG_AROUND; + ret = __do_fault(vma, address, pgoff, flags, &fault_page, + pages, ARRAY_SIZE(pages)); if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) return ret; @@ -3362,6 +3392,25 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma, return ret; } do_set_pte(vma, address, fault_page, pte, false, false); + for (i = 0; (ret & VM_FAULT_AROUND) && i < ARRAY_SIZE(pages); i++) { + pte_t *_pte; + unsigned long addr; + if (!pages[i]) + continue; + VM_BUG_ON_PAGE(!PageLocked(pages[i]), pages[i]); + if (PageHWPoison(pages[i])) + goto skip; + _pte = pte + pages[i]->index - pgoff; + if (!pte_none(*_pte)) + goto skip; + addr = address + PAGE_SIZE * (pages[i]->index - pgoff); + do_set_pte(vma, addr, pages[i], _pte, false, false); + unlock_page(pages[i]); + continue; +skip: + unlock_page(pages[i]); + put_page(pages[i]); + } pte_unmap_unlock(pte, ptl); unlock_page(fault_page); return ret; @@ -3388,7 +3437,7 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma, return VM_FAULT_OOM; } - ret = __do_fault(vma, address, pgoff, flags, &fault_page); + ret = __do_fault(vma, address, pgoff, flags, &fault_page, NULL, 0); if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) goto uncharge_out; @@ -3423,7 +3472,7 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma, int dirtied = 0; int ret, tmp; - ret = __do_fault(vma, address, pgoff, flags, &fault_page); + ret = __do_fault(vma, address, pgoff, flags, &fault_page, NULL, 0); if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) return ret; -- 1.8.5.2 -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>