On Thu, Jan 26, 2017 at 02:57:43PM +0300, Kirill A. Shutemov wrote: > +++ b/include/linux/pagemap.h > @@ -332,6 +332,15 @@ static inline struct page *grab_cache_page_nowait(struct address_space *mapping, > mapping_gfp_mask(mapping)); > } > > +static inline struct page *find_subpage(struct page *page, pgoff_t offset) > +{ > + VM_BUG_ON_PAGE(PageTail(page), page); > + VM_BUG_ON_PAGE(page->index > offset, page); > + VM_BUG_ON_PAGE(page->index + (1 << compound_order(page)) < offset, > + page); > + return page - page->index + offset; > +} What would you think to: static inline void check_page_index(struct page *page, pgoff_t offset) { VM_BUG_ON_PAGE(PageTail(page), page); VM_BUG_ON_PAGE(page->index > offset, page); VM_BUG_ON_PAGE(page->index + (1 << compound_order(page)) <= offset, page); } (I think I fixed an off-by-one error up there ... if index + (1 << order) == offset, this is also a bug, right? because offset would then refer to the next page, not this page) static inline struct page *find_subpage(struct page *page, pgoff_t offset) { check_page_index(page, offset); return page + (offset - page->index); } ... then you can use check_page_index down ... > @@ -1250,7 +1233,6 @@ struct page *find_lock_entry(struct address_space *mapping, pgoff_t offset) > put_page(page); > goto repeat; > } > - VM_BUG_ON_PAGE(page_to_pgoff(page) != offset, page); ... here? > @@ -1472,25 +1451,35 @@ unsigned find_get_pages(struct address_space *mapping, pgoff_t start, > goto repeat; > } > > + /* For multi-order entries, find relevant subpage */ > + if (PageTransHuge(page)) { > + VM_BUG_ON(index - page->index < 0); > + VM_BUG_ON(index - page->index >= HPAGE_PMD_NR); > + page += index - page->index; > + } Use find_subpage() here? > pages[ret] = page; > if (++ret == nr_pages) > break; > + if (!PageTransCompound(page)) > + continue; > + for (refs = 0; ret < nr_pages && > + (index + 1) % HPAGE_PMD_NR; > + ret++, refs++, index++) > + pages[ret] = ++page; > + if (refs) > + page_ref_add(compound_head(page), refs); > + if (ret == nr_pages) > + break; Can we avoid referencing huge pages specifically in the page cache? I'd like us to get to the point where we can put arbitrary compound pages into the page cache. For example, I think this can be written as: if (!PageCompound(page)) continue; for (refs = 0; ret < nr_pages; refs++, index++) { if (index > page->index + (1 << compound_order(page))) break; pages[ret++] = ++page; } if (refs) page_ref_add(compound_head(page), refs); if (ret == nr_pages) break; > @@ -1541,19 +1533,12 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index, > > + /* For multi-order entries, find relevant subpage */ > + if (PageTransHuge(page)) { > + VM_BUG_ON(index - page->index < 0); > + VM_BUG_ON(index - page->index >= HPAGE_PMD_NR); > + page += index - page->index; > + } > + > pages[ret] = page; > if (++ret == nr_pages) > break; > + if (!PageTransCompound(page)) > + continue; > + for (refs = 0; ret < nr_pages && > + (index + 1) % HPAGE_PMD_NR; > + ret++, refs++, index++) > + pages[ret] = ++page; > + if (refs) > + page_ref_add(compound_head(page), refs); > + if (ret == nr_pages) > + break; > } > rcu_read_unlock(); > return ret; Ugh, the same code again. Hmm ... we only need to modify 'ret' as a result of this ... so could we split it out like this? static unsigned populate_pages(struct page **pages, unsigned i, unsigned max, struct page *page) { unsigned refs = 0; for (;;) { pages[i++] = page; if (i == max) break; if (PageHead(page + 1)) break; page++; refs++; } if (refs) page_ref_add(compound_head(page), refs); return i; } > +unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *indexp, > int tag, unsigned int nr_pages, struct page **pages) > break; > + if (!PageTransCompound(page)) > + continue; > + for (refs = 0; ret < nr_pages && > + (index + 1) % HPAGE_PMD_NR; > + ret++, refs++, index++) > + pages[ret] = ++page; > + if (refs) > + page_ref_add(compound_head(page), refs); > + if (ret == nr_pages) > + break; > } ... and again! > @@ -2326,25 +2337,26 @@ void filemap_map_pages(struct vm_fault *vmf, > + /* For multi-order entries, find relevant subpage */ > + if (PageTransHuge(page)) { > + VM_BUG_ON(index - page->index < 0); > + VM_BUG_ON(index - page->index >= HPAGE_PMD_NR); > + page += index - page->index; > + } > + > if (!PageUptodate(page) || > PageReadahead(page) || Readahead is PF_NO_COMPOUND ... so I don't see why this works? > @@ -2378,8 +2390,14 @@ void filemap_map_pages(struct vm_fault *vmf, > /* Huge page is mapped? No need to proceed. */ > if (pmd_trans_huge(*vmf->pmd)) > break; > - if (iter.index == end_pgoff) > + if (index == end_pgoff) > break; > + if (page && PageTransCompound(page) && > + (index & (HPAGE_PMD_NR - 1)) != > + HPAGE_PMD_NR - 1) { > + index++; > + goto repeat; Do we really have to go all the way back to the beginning of the loop? It'd be nice to be able to insert all of the relevant PTEs in a shorter loop here. We'd need to bump the reference count N more times, and I think we do need to check HWPoison for each subpage. -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>