Re: [PATCH 1/1] mm: mempolicy: fix mbind_range() && vma_adjust() interaction

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 07/09, Oleg Nesterov wrote:
>
> I can be easily wrong, but to me vma_adjust() and its usage looks a bit
> overcomplicated. Perhaps it makes sense to distinguish mmapped/hole cases.
> mbind_range/madvise/etc need vma_join(vma, ...), not prev/anon_vma/file.
> Perhaps. not sure.

And I am just curious if something like below makes any sense...

Suppose we add the new helper, vma_update(vma, new). Note that "new" is
the fake vma, it just describes how do want to change vma.

static bool
can_merge_vma(struct vm_area_struct *prev, struct vm_area_struct *next)
{
	if (prev->vm_end != next->vm_start)
		return false;
	if (prev->vm_pgoff + vma_pages(prev) != next->vm_pgoff)
		return false;

	if (prev->vm_file != next->vm_file)
		return false;
	if (prev->vm_flags != next->vm_flags)
		return false;
	if (!mpol_equal(vma_policy(prev), vma_policy(next)))
		return false;

	if (prev->anon_vma != next->anon_vma)	/* WRONG, FIXME !!! */
		return false;

	if (next->vm_ops && next->vm_ops->close)
		return false;

	return true;
}

struct vm_area_struct *
vma_update(struct vm_area_struct *vma, struct vm_area_struct *new)
{
	struct vm_area_struct *prev = vma->vm_prev;
	struct vm_area_struct *next = vma->vm_next;
	struct vm_area_struct *new_ret;
	unsigned long new_end;
	int err;

	/* prev/next != vma means we can merge with prev/next */
	if (!prev || !can_merge_vma(prev, new))
		prev = vma;
	if (!next || !can_merge_vma(new, next))
		next = vma;

	if (new->vm_start > vma->vm_start) {	/* prev == vma */
		if (next != vma) {
			/* vma shrinks, next grows, case 4 */
			new_end = new->vm_start;
			new_ret = next;
			goto adjust;
		}
		err = split_vma(vma->vm_mm, vma, new->vm_start, 1);
		if (err)
			return ERR_PTR(err);
	}

	if (new->vm_end < vma->vm_end) {	/* next == vma */
		if (prev != vma) {
			/* prev grows, vma shrinks, case 5 */
			new_end = new->vm_end;
			new_ret = vma;
			goto adjust;
		}
		err = split_vma(vma->vm_mm, vma, new->vm_end, 0);
		if (err)
			return ERR_PTR(err);
	}

	if (prev == next)	/* true if split_vma() was called */
		return vma;

	new_end = next->vm_end;
	new_ret = prev;
adjust:
	err = vma_adjust(prev, prev->vm_start, new_end, prev->vm_pgoff, NULL);
	if (err)
		return ERR_PTR(err);
	khugepaged_enter_vma_merge(new_ret);
	return new_ret;
}

Now we can change madvise_behavior() and other similar users as below.

As for mmap_region() we can add another helper which simply tries to
expand prev/next (case 1-3).

Oleg.

--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -46,10 +46,9 @@ static long madvise_behavior(struct vm_area_struct * vma,
 		     struct vm_area_struct **prev,
 		     unsigned long start, unsigned long end, int behavior)
 {
-	struct mm_struct * mm = vma->vm_mm;
-	int error = 0;
-	pgoff_t pgoff;
 	unsigned long new_flags = vma->vm_flags;
+	struct vm_area_struct new;
+	int error = 0;
 
 	switch (behavior) {
 	case MADV_NORMAL:
@@ -100,34 +99,21 @@ static long madvise_behavior(struct vm_area_struct * vma,
 		goto out;
 	}
 
-	pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
-	*prev = vma_merge(mm, *prev, start, end, new_flags, vma->anon_vma,
-				vma->vm_file, pgoff, vma_policy(vma));
-	if (*prev) {
-		vma = *prev;
-		goto success;
-	}
-
-	*prev = vma;
+	new = *vma;
+	new.vm_flags = new_flags;
+	new.vm_start = start;
+	new.vm_end = end;
+	vma = vma_update(vma, &new);
 
-	if (start != vma->vm_start) {
-		error = split_vma(mm, vma, start, 1);
-		if (error)
-			goto out;
-	}
-
-	if (end != vma->vm_end) {
-		error = split_vma(mm, vma, end, 0);
-		if (error)
-			goto out;
+	if (IS_ERR(vma)) {
+		error = PTR_ERR(vma);
+		goto out;
 	}
-
-success:
 	/*
 	 * vm_flags is protected by the mmap_sem held in write mode.
 	 */
 	vma->vm_flags = new_flags;
-
+	*prev = vma;
 out:
 	if (error == -ENOMEM)
 		error = -EAGAIN;

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@xxxxxxxxx.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@xxxxxxxxx";> email@xxxxxxxxx </a>




[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux]     [Linux OMAP]     [Linux MIPS]     [ECOS]     [Asterisk Internet PBX]     [Linux API]