On Tue, Dec 7, 2010 at 1:48 PM, Hugh Dickins <hughd@xxxxxxxxxx> wrote: > On Mon, 6 Dec 2010, Minchan Kim wrote: > >> Now zap_pte_range alwayas activates pages which are pte_young && >> !VM_SequentialReadHint(vma). But in case of calling MADV_DONTNEED, >> it's unnecessary since the page wouldn't use any more. >> >> Signed-off-by: Minchan Kim <minchan.kim@xxxxxxxxx> >> Acked-by: Rik van Riel <riel@xxxxxxxxxx> >> Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@xxxxxxxxxxxxxx> >> Cc: KOSAKI Motohiro <kosaki.motohiro@xxxxxxxxxxxxxx> >> Cc: Johannes Weiner <hannes@xxxxxxxxxxx> >> Cc: Nick Piggin <npiggin@xxxxxxxxx> >> Cc: Mel Gorman <mel@xxxxxxxxx> >> Cc: Wu Fengguang <fengguang.wu@xxxxxxxxx> >> Cc: Hugh Dickins <hughd@xxxxxxxxxx> >> >> Changelog since v3: >> - Change variable name - suggested by Johannes >> - Union ignore_references with zap_details - suggested by Hugh >> >> Changelog since v2: >> - remove unnecessary description >> >> Changelog since v1: >> - change word from promote to activate >> - add activate argument to zap_pte_range and family function >> --- >> include/linux/mm.h | 4 +++- >> mm/madvise.c | 6 +++--- >> mm/memory.c | 5 ++++- >> 3 files changed, 10 insertions(+), 5 deletions(-) >> >> diff --git a/include/linux/mm.h b/include/linux/mm.h >> index 6522ae4..e57190f 100644 >> --- a/include/linux/mm.h >> +++ b/include/linux/mm.h >> @@ -771,12 +771,14 @@ struct zap_details { >> pgoff_t last_index; /* Highest page->index to unmap */ >> spinlock_t *i_mmap_lock; /* For unmap_mapping_range: */ >> unsigned long truncate_count; /* Compare vm_truncate_count */ >> + bool ignore_references; /* For page activation */ >> }; >> >> #define __ZAP_DETAILS_INITIALIZER(name) \ >> { .nonlinear_vma = NULL \ >> , .check_mapping = NULL \ >> - , .i_mmap_lock = NULL } >> + , .i_mmap_lock = NULL \ >> + , .ignore_references = false } > > Okay. > >> >> #define DEFINE_ZAP_DETAILS(name) \ >> struct zap_details name = __ZAP_DETAILS_INITIALIZER(name) >> diff --git a/mm/madvise.c b/mm/madvise.c >> index bfa17aa..8e7aba3 100644 >> --- a/mm/madvise.c >> +++ b/mm/madvise.c >> @@ -163,6 +163,7 @@ static long madvise_dontneed(struct vm_area_struct * vma, >> unsigned long start, unsigned long end) >> { >> DEFINE_ZAP_DETAILS(details); >> + details.ignore_references = true; >> >> *prev = vma; >> if (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP)) >> @@ -173,10 +174,9 @@ static long madvise_dontneed(struct vm_area_struct * vma, >> details.last_index = ULONG_MAX; >> >> zap_page_range(vma, start, end - start, &details); >> - } else { >> - >> + } else >> zap_page_range(vma, start, end - start, &details); >> - } >> + > > As in the previous patch, you have the same in the if {} and the else. > >> return 0; >> } >> >> diff --git a/mm/memory.c b/mm/memory.c >> index c0879bb..44d87e1 100644 >> --- a/mm/memory.c >> +++ b/mm/memory.c >> @@ -897,6 +897,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, >> pte_t *pte; >> spinlock_t *ptl; >> int rss[NR_MM_COUNTERS]; >> + bool ignore_references = details->ignore_references; >> >> init_rss_vec(rss); >> >> @@ -952,7 +953,8 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, >> if (pte_dirty(ptent)) >> set_page_dirty(page); >> if (pte_young(ptent) && >> - likely(!VM_SequentialReadHint(vma))) >> + likely(!VM_SequentialReadHint(vma)) && >> + !ignore_references) > > I think ignore_references is about as likely as VM_SequentialReadHint: > I'd probably just omit that "likely()" nowadays, but you might prefer > to put your "|| !ignore_references" inside. > > Hmm, actually it would probably be better to say something like > > mark_accessed = true; > if (VM_SequentialReadHint(vma) || > (details && details->ignore_references)) > mark_accessed = false; > > on entry to zap_pte_range(). > >> mark_page_accessed(page); >> rss[MM_FILEPAGES]--; >> } >> @@ -1218,6 +1220,7 @@ int zap_vma_ptes(struct vm_area_struct *vma, unsigned long address, >> unsigned long size) >> { >> DEFINE_ZAP_DETAILS(details); >> + details.ignore_references = true; >> if (address < vma->vm_start || address + size > vma->vm_end || >> !(vma->vm_flags & VM_PFNMAP)) >> return -1; >> -- > > Unnecessary here (would make more sense in the truncation case, > but not necessary there either): zap_vma_ptes() is only being used on > GRU's un-cowable VM_PFNMAP area, so vm_normal_page() won't even give > you a non-NULL page to mark. Thanks for the notice. How about this? Although it doesn't remove null dependency, it meet my goal without big overhead. It's just quick patch. If you agree, I will resend this version as formal patch. (If you suffered from seeing below word-wrapped source, see the attachment. I asked to google two time to support text-plain mode in gmail web but I can't receive any response until now. ;(. Lots of kernel developer in google. Please support this mode for us who can't use SMTP although it's a very small VOC) diff --git a/include/linux/mm.h b/include/linux/mm.h index e097df6..14ae918 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -771,6 +771,7 @@ struct zap_details { pgoff_t last_index; /* Highest page->index to unmap */ spinlock_t *i_mmap_lock; /* For unmap_mapping_range: */ unsigned long truncate_count; /* Compare vm_truncate_count */ + int ignore_reference; }; struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, diff --git a/mm/madvise.c b/mm/madvise.c index 319528b..fdb0253 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -162,18 +162,22 @@ static long madvise_dontneed(struct vm_area_struct * vma, struct vm_area_struct ** prev, unsigned long start, unsigned long end) { + struct zap_details details ; + *prev = vma; if (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP)) return -EINVAL; if (unlikely(vma->vm_flags & VM_NONLINEAR)) { - struct zap_details details = { - .nonlinear_vma = vma, - .last_index = ULONG_MAX, - }; - zap_page_range(vma, start, end - start, &details); - } else - zap_page_range(vma, start, end - start, NULL); + details.nonlinear_vma = vma; + details.last_index = ULONG_MAX; + } else { + details.nonlinear_vma = NULL; + details.last_index = NULL; + } + + details.ignore_references = true; + zap_page_range(vma, start, end - start, &details); return 0; } diff --git a/mm/memory.c b/mm/memory.c index ebfeedf..d46ac42 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -897,9 +897,15 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, pte_t *pte; spinlock_t *ptl; int rss[NR_MM_COUNTERS]; - + bool ignore_reference = false; init_rss_vec(rss); + if (details && ((!details->check_mapping && !details->nonlinear_vma) + || !details->ignore_reference)) + details = NULL; + pte = pte_offset_map_lock(mm, pmd, addr, &ptl); arch_enter_lazy_mmu_mode(); do { @@ -949,7 +955,8 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, if (pte_dirty(ptent)) set_page_dirty(page); if (pte_young(ptent) && - likely(!VM_SequentialReadHint(vma))) + likely(!VM_SequentialReadHint(vma)) && + likely(!ignore_reference)) mark_page_accessed(page); rss[MM_FILEPAGES]--; } @@ -1038,8 +1045,6 @@ static unsigned long unmap_page_range(struct mmu_gather *tlb, pgd_t *pgd; unsigned long next; - if (details && !details->check_mapping && !details->nonlinear_vma) - details = NULL; BUG_ON(addr >= end); mem_cgroup_uncharge_start(); @@ -1102,7 +1107,8 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp, unsigned long tlb_start = 0; /* For tlb_finish_mmu */ int tlb_start_valid = 0; unsigned long start = start_addr; - spinlock_t *i_mmap_lock = details? details->i_mmap_lock: NULL; + spinlock_t *i_mmap_lock = details ? + (detais->check_mapping ? details->i_mmap_lock: NULL) : NULL; int fullmm = (*tlbp)->fullmm; struct mm_struct *mm = vma->vm_mm; > > Hugh > -- Kind regards, Minchan Kim
diff --git a/include/linux/mm.h b/include/linux/mm.h index e097df6..14ae918 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -771,6 +771,7 @@ struct zap_details { pgoff_t last_index; /* Highest page->index to unmap */ spinlock_t *i_mmap_lock; /* For unmap_mapping_range: */ unsigned long truncate_count; /* Compare vm_truncate_count */ + int ignore_reference; }; struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, diff --git a/mm/madvise.c b/mm/madvise.c index 319528b..fdb0253 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -162,18 +162,22 @@ static long madvise_dontneed(struct vm_area_struct * vma, struct vm_area_struct ** prev, unsigned long start, unsigned long end) { + struct zap_details details ; + *prev = vma; if (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP)) return -EINVAL; if (unlikely(vma->vm_flags & VM_NONLINEAR)) { - struct zap_details details = { - .nonlinear_vma = vma, - .last_index = ULONG_MAX, - }; - zap_page_range(vma, start, end - start, &details); - } else - zap_page_range(vma, start, end - start, NULL); + details.nonlinear_vma = vma; + details.last_index = ULONG_MAX; + } else { + details.nonlinear_vma = NULL; + details.last_index = NULL; + } + + details.ignore_reference = true; + zap_page_range(vma, start, end - start, &details); return 0; } diff --git a/mm/memory.c b/mm/memory.c index ebfeedf..8aa0190 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -897,9 +897,13 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, pte_t *pte; spinlock_t *ptl; int rss[NR_MM_COUNTERS]; - + bool ignore_reference = false; init_rss_vec(rss); + if (details && ((!details->check_mapping && !details->nonlinear_vma) + || !details->ignore_reference)) + details = NULL; + pte = pte_offset_map_lock(mm, pmd, addr, &ptl); arch_enter_lazy_mmu_mode(); do { @@ -949,7 +953,8 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, if (pte_dirty(ptent)) set_page_dirty(page); if (pte_young(ptent) && - likely(!VM_SequentialReadHint(vma))) + likely(!VM_SequentialReadHint(vma)) && + likely(!ignore_reference)) mark_page_accessed(page); rss[MM_FILEPAGES]--; } @@ -1038,8 +1043,6 @@ static unsigned long unmap_page_range(struct mmu_gather *tlb, pgd_t *pgd; unsigned long next; - if (details && !details->check_mapping && !details->nonlinear_vma) - details = NULL; BUG_ON(addr >= end); mem_cgroup_uncharge_start(); @@ -1102,7 +1105,8 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp, unsigned long tlb_start = 0; /* For tlb_finish_mmu */ int tlb_start_valid = 0; unsigned long start = start_addr; - spinlock_t *i_mmap_lock = details? details->i_mmap_lock: NULL; + spinlock_t *i_mmap_lock = details ? + (detais->check_mapping ? details->i_mmap_lock: NULL) : NULL; int fullmm = (*tlbp)->fullmm; struct mm_struct *mm = vma->vm_mm;