On Mon, Mar 5, 2012 at 3:15 AM, Aneesh Kumar K.V <aneesh.kumar@xxxxxxxxxxxxxxxxxx> wrote: > On Thu, 1 Mar 2012 14:40:29 -0800, Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> wrote: >> I haven't begin to get my head around this yet, but I'd like to draw >> your attention to https://lkml.org/lkml/2012/2/15/548. > > Hmm that's really serious bug. > >> That fix has >> been hanging around for a while, but I haven't done anything with it >> yet because I don't like its additional blurring of the separation >> between hugetlb core code and hugetlbfs. I want to find time to sit >> down and see if the fix can be better architected but haven't got >> around to that yet. >> >> I expect that your patches will conflict at least mechanically with >> David's, which is not a big issue. But I wonder whether your patches >> will copy the same bug into other places, and whether you can think of >> a tidier way of addressing the bug which David is seeing? >> > > I will go through the implementation again and make sure the problem > explained by David doesn't happen in the new code path added by the > patch series. > Hi Aneesh When you tackle that problem, please take the following approach also into account, though it is a draft, in which quota handback is simply eliminated when huge page is freed, if that problem is caused by extra reference count. And get_quota is carefully paired with put_quota for newly allocated page. That is all, and feel free to correct me. Best Regards -hd --- a/mm/hugetlb.c Mon Mar 5 20:20:34 2012 +++ b/mm/hugetlb.c Mon Mar 5 21:20:14 2012 @@ -533,9 +533,7 @@ static void free_huge_page(struct page * */ struct hstate *h = page_hstate(page); int nid = page_to_nid(page); - struct address_space *mapping; - mapping = (struct address_space *) page_private(page); set_page_private(page, 0); page->mapping = NULL; BUG_ON(page_count(page)); @@ -551,8 +549,6 @@ static void free_huge_page(struct page * enqueue_huge_page(h, page); } spin_unlock(&hugetlb_lock); - if (mapping) - hugetlb_put_quota(mapping, 1); } static void prep_new_huge_page(struct hstate *h, struct page *page, int nid) @@ -1021,7 +1017,8 @@ static void vma_commit_reservation(struc } static struct page *alloc_huge_page(struct vm_area_struct *vma, - unsigned long addr, int avoid_reserve) + unsigned long addr, int avoid_reserve, + long *quota) { struct hstate *h = hstate_vma(vma); struct page *page; @@ -1050,7 +1047,8 @@ static struct page *alloc_huge_page(stru if (!page) { page = alloc_buddy_huge_page(h, NUMA_NO_NODE); if (!page) { - hugetlb_put_quota(inode->i_mapping, chg); + if (chg) + hugetlb_put_quota(inode->i_mapping, chg); return ERR_PTR(-VM_FAULT_SIGBUS); } } @@ -1058,6 +1056,8 @@ static struct page *alloc_huge_page(stru set_page_private(page, (unsigned long) mapping); vma_commit_reservation(h, vma, addr); + if (quota) + *quota = chg; return page; } @@ -2365,6 +2365,7 @@ static int hugetlb_cow(struct mm_struct struct page *old_page, *new_page; int avoidcopy; int outside_reserve = 0; + long quota = 0; old_page = pte_page(pte); @@ -2397,7 +2398,8 @@ retry_avoidcopy: /* Drop page_table_lock as buddy allocator may be called */ spin_unlock(&mm->page_table_lock); - new_page = alloc_huge_page(vma, address, outside_reserve); + quota = 0; + new_page = alloc_huge_page(vma, address, outside_reserve, "a); if (IS_ERR(new_page)) { page_cache_release(old_page); @@ -2439,6 +2441,8 @@ retry_avoidcopy: if (unlikely(anon_vma_prepare(vma))) { page_cache_release(new_page); page_cache_release(old_page); + if (quota) + hugetlb_put_quota(vma->vm_file->f_mapping, quota); /* Caller expects lock to be held */ spin_lock(&mm->page_table_lock); return VM_FAULT_OOM; @@ -2470,6 +2474,8 @@ retry_avoidcopy: address & huge_page_mask(h), (address & huge_page_mask(h)) + huge_page_size(h)); } + else if (quota) + hugetlb_put_quota(vma->vm_file->f_mapping, quota); page_cache_release(new_page); page_cache_release(old_page); return 0; @@ -2519,6 +2525,7 @@ static int hugetlb_no_page(struct mm_str struct page *page; struct address_space *mapping; pte_t new_pte; + long quota = 0; /* * Currently, we are forced to kill the process in the event the @@ -2540,12 +2547,13 @@ static int hugetlb_no_page(struct mm_str * before we get page_table_lock. */ retry: + quota = 0; page = find_lock_page(mapping, idx); if (!page) { size = i_size_read(mapping->host) >> huge_page_shift(h); if (idx >= size) goto out; - page = alloc_huge_page(vma, address, 0); + page = alloc_huge_page(vma, address, 0, "a); if (IS_ERR(page)) { ret = -PTR_ERR(page); goto out; @@ -2560,6 +2568,8 @@ retry: err = add_to_page_cache(page, mapping, idx, GFP_KERNEL); if (err) { put_page(page); + if (quota) + hugetlb_put_quota(mapping, quota); if (err == -EEXIST) goto retry; goto out; @@ -2633,6 +2643,8 @@ backout: backout_unlocked: unlock_page(page); put_page(page); + if (quota) + hugetlb_put_quota(mapping, quota); goto out; } -- -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/ Don't email: <a href