+ mm-tracking-shared-dirty-pages.patch added to -mm tree

akpm@xxxxxxxx · Wed, 21 Jun 2006 23:27:00 -0700

The patch titled

     mm: tracking shared dirty pages

has been added to the -mm tree.  Its filename is

     mm-tracking-shared-dirty-pages.patch

See http://www.zip.com.au/~akpm/linux/patches/stuff/added-to-mm.txt to find
out what to do about this

------------------------------------------------------
Subject: mm: tracking shared dirty pages
From: Peter Zijlstra <a.p.zijlstra@xxxxxxxxx>


Signed-off-by: Peter Zijlstra <a.p.zijlstra@xxxxxxxxx>
Signed-off-by: Andrew Morton <akpm@xxxxxxxx>
---

 fs/buffer.c          |    2 -
 include/linux/mm.h   |    6 +++
 include/linux/rmap.h |    8 ++++
 mm/memory.c          |   31 +++++++++++++++---
 mm/mmap.c            |   34 ++++++++++++++++----
 mm/mprotect.c        |    7 +++-
 mm/page-writeback.c  |    2 +
 mm/rmap.c            |   68 +++++++++++++++++++++++++++++++++++++++++
 8 files changed, 144 insertions(+), 14 deletions(-)

diff -puN fs/buffer.c~mm-tracking-shared-dirty-pages fs/buffer.c

--- a/fs/buffer.c~mm-tracking-shared-dirty-pages
+++ a/fs/buffer.c
@@ -2983,6 +2983,7 @@ int try_to_free_buffers(struct page *pag
 
 	spin_lock(&mapping->private_lock);
 	ret = drop_buffers(page, &buffers_to_free);
+	spin_unlock(&mapping->private_lock);
 	if (ret) {
 		/*
 		 * If the filesystem writes its buffers by hand (eg ext3)
@@ -2994,7 +2995,6 @@ int try_to_free_buffers(struct page *pag
 		 */
 		clear_page_dirty(page);
 	}
-	spin_unlock(&mapping->private_lock);
 out:
 	if (buffers_to_free) {
 		struct buffer_head *bh = buffers_to_free;
diff -puN include/linux/mm.h~mm-tracking-shared-dirty-pages include/linux/mm.h
--- a/include/linux/mm.h~mm-tracking-shared-dirty-pages
+++ a/include/linux/mm.h
@@ -181,6 +181,12 @@ extern unsigned int kobjsize(const void 
 #define VM_SequentialReadHint(v)	((v)->vm_flags & VM_SEQ_READ)
 #define VM_RandomReadHint(v)		((v)->vm_flags & VM_RAND_READ)
 
+static inline int is_shared_writable(unsigned int flags)
+{
+	return (flags & (VM_SHARED|VM_WRITE|VM_PFNMAP)) ==
+		(VM_SHARED|VM_WRITE);
+}
+
 /*
  * mapping from the currently active vm_flags protection bits (the
  * low four bits) to a page protection mask..
diff -puN include/linux/rmap.h~mm-tracking-shared-dirty-pages include/linux/rmap.h
--- a/include/linux/rmap.h~mm-tracking-shared-dirty-pages
+++ a/include/linux/rmap.h
@@ -103,6 +103,14 @@ pte_t *page_check_address(struct page *,
  */
 unsigned long page_address_in_vma(struct page *, struct vm_area_struct *);
 
+/*
+ * Cleans the PTEs of shared mappings.
+ * (and since clean PTEs should also be readonly, write protects them too)
+ *
+ * returns the number of cleaned PTEs.
+ */
+int page_mkclean(struct page *);
+
 #else	/* !CONFIG_MMU */
 
 #define anon_vma_init()		do {} while (0)
diff -puN mm/memory.c~mm-tracking-shared-dirty-pages mm/memory.c
--- a/mm/memory.c~mm-tracking-shared-dirty-pages
+++ a/mm/memory.c
@@ -937,6 +937,12 @@ struct page *follow_page(struct vm_area_
 	pte = *ptep;
 	if (!pte_present(pte))
 		goto unlock;
+	/*
+	 * This is not fully correct in the light of trapping write faults
+	 * for writable shared mappings. However since we're going to mark
+	 * the page dirty anyway some few lines downward, we might as well
+	 * take the write fault now.
+	 */
 	if ((flags & FOLL_WRITE) && !pte_write(pte))
 		goto unlock;
 	page = vm_normal_page(vma, address, pte);
@@ -1457,14 +1463,15 @@ static int do_wp_page(struct mm_struct *
 {
 	struct page *old_page, *new_page;
 	pte_t entry;
-	int reuse, ret = VM_FAULT_MINOR;
+	int reuse = 0;
+	int ret = VM_FAULT_MINOR;
+	struct page *dirty_page = NULL;
 
 	old_page = vm_normal_page(vma, address, orig_pte);
 	if (!old_page)
 		goto gotten;
 
-	if (unlikely((vma->vm_flags & (VM_SHARED|VM_WRITE)) ==
-				(VM_SHARED|VM_WRITE))) {
+	if (unlikely(is_shared_writable(vma->vm_flags))) {
 		if (vma->vm_ops && vma->vm_ops->page_mkwrite) {
 			/*
 			 * Notify the address space that the page is about to
@@ -1493,13 +1500,12 @@ static int do_wp_page(struct mm_struct *
 			if (!pte_same(*page_table, orig_pte))
 				goto unlock;
 		}
-
+		dirty_page = old_page;
+		get_page(dirty_page);
 		reuse = 1;
 	} else if (PageAnon(old_page) && !TestSetPageLocked(old_page)) {
 		reuse = can_share_swap_page(old_page);
 		unlock_page(old_page);
-	} else {
-		reuse = 0;
 	}
 
 	if (reuse) {
@@ -1565,6 +1571,10 @@ gotten:
 		page_cache_release(old_page);
 unlock:
 	pte_unmap_unlock(page_table, ptl);
+	if (dirty_page) {
+		set_page_dirty(dirty_page);
+		put_page(dirty_page);
+	}
 	return ret;
 oom:
 	if (old_page)
@@ -2094,6 +2104,7 @@ static int do_no_page(struct mm_struct *
 	unsigned int sequence = 0;
 	int ret = VM_FAULT_MINOR;
 	int anon = 0;
+	struct page *dirty_page = NULL;
 
 	pte_unmap(page_table);
 	BUG_ON(vma->vm_flags & VM_PFNMAP);
@@ -2188,6 +2199,10 @@ retry:
 		} else {
 			inc_mm_counter(mm, file_rss);
 			page_add_file_rmap(new_page);
+			if (write_access) {
+				dirty_page = new_page;
+				get_page(dirty_page);
+			}
 		}
 	} else {
 		/* One of our sibling threads was faster, back out. */
@@ -2200,6 +2215,10 @@ retry:
 	lazy_mmu_prot_update(entry);
 unlock:
 	pte_unmap_unlock(page_table, ptl);
+	if (dirty_page) {
+		set_page_dirty(dirty_page);
+		put_page(dirty_page);
+	}
 	return ret;
 oom:
 	page_cache_release(new_page);
diff -puN mm/mmap.c~mm-tracking-shared-dirty-pages mm/mmap.c
--- a/mm/mmap.c~mm-tracking-shared-dirty-pages
+++ a/mm/mmap.c
@@ -25,6 +25,7 @@
 #include <linux/mount.h>
 #include <linux/mempolicy.h>
 #include <linux/rmap.h>
+#include <linux/backing-dev.h>
 
 #include <asm/uaccess.h>
 #include <asm/cacheflush.h>
@@ -888,6 +889,7 @@ unsigned long do_mmap_pgoff(struct file 
 	struct rb_node ** rb_link, * rb_parent;
 	int accountable = 1;
 	unsigned long charged = 0, reqprot = prot;
+	struct address_space *mapping = NULL;
 
 	if (file) {
 		if (is_file_hugepages(file))
@@ -1084,18 +1086,13 @@ munmap_back:
 		error = file->f_op->mmap(file, vma);
 		if (error)
 			goto unmap_and_free_vma;
+
 	} else if (vm_flags & VM_SHARED) {
 		error = shmem_zero_setup(vma);
 		if (error)
 			goto free_vma;
 	}
 
-	/* Don't make the VMA automatically writable if it's shared, but the
-	 * backer wishes to know when pages are first written to */
-	if (vma->vm_ops && vma->vm_ops->page_mkwrite)
-		vma->vm_page_prot =
-			protection_map[vm_flags & (VM_READ|VM_WRITE|VM_EXEC)];
-
 	/* We set VM_ACCOUNT in a shared mapping's vm_flags, to inform
 	 * shmem_zero_setup (perhaps called through /dev/zero's ->mmap)
 	 * that memory reservation must be checked; but that reservation
@@ -1113,6 +1110,31 @@ munmap_back:
 	pgoff = vma->vm_pgoff;
 	vm_flags = vma->vm_flags;
 
+	/*
+	 * Tracking of dirty pages for shared writable mappings. Do this by
+	 * write protecting writable pages, and mark dirty in the write fault.
+	 *
+	 * Modify vma->vm_page_prot (the default protection for new pages)
+	 * to this effect.
+	 *
+	 * Cannot do before because the condition depends on:
+	 *  - backing_dev_info having the right capabilities
+	 *    (set by f_op->open())
+	 *  - vma->vm_flags being fully set
+	 *    (finished in f_op->mmap(), which could call remap_pfn_range())
+	 *
+	 *  Also, cannot reset vma->vm_page_prot from vma->vm_flags because
+	 *  f_op->mmap() can modify it.
+	 */
+	if (is_shared_writable(vm_flags) && vma->vm_file)
+		mapping = vma->vm_file->f_mapping;
+	if ((mapping && mapping_cap_account_dirty(mapping)) ||
+			(vma->vm_ops && vma->vm_ops->page_mkwrite))
+		vma->vm_page_prot =
+			__pgprot(pte_val
+				(pte_wrprotect
+				 (__pte(pgprot_val(vma->vm_page_prot)))));
+
 	if (!file || !vma_merge(mm, prev, addr, vma->vm_end,
 			vma->vm_flags, NULL, file, pgoff, vma_policy(vma))) {
 		file = vma->vm_file;
diff -puN mm/mprotect.c~mm-tracking-shared-dirty-pages mm/mprotect.c
--- a/mm/mprotect.c~mm-tracking-shared-dirty-pages
+++ a/mm/mprotect.c
@@ -21,6 +21,7 @@
 #include <linux/syscalls.h>
 #include <linux/swap.h>
 #include <linux/swapops.h>
+#include <linux/backing-dev.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/cacheflush.h>
@@ -124,6 +125,7 @@ mprotect_fixup(struct vm_area_struct *vm
 	long nrpages = (end - start) >> PAGE_SHIFT;
 	unsigned long charged = 0;
 	unsigned int mask;
+	struct address_space *mapping = NULL;
 	pgprot_t newprot;
 	pgoff_t pgoff;
 	int error;
@@ -179,7 +181,10 @@ success:
 	/* Don't make the VMA automatically writable if it's shared, but the
 	 * backer wishes to know when pages are first written to */
 	mask = VM_READ|VM_WRITE|VM_EXEC|VM_SHARED;
-	if (vma->vm_ops && vma->vm_ops->page_mkwrite)
+	if (is_shared_writable(newflags) && vma->vm_file)
+		mapping = vma->vm_file->f_mapping;
+	if ((mapping && mapping_cap_account_dirty(mapping)) ||
+			(vma->vm_ops && vma->vm_ops->page_mkwrite))
 		mask &= ~VM_SHARED;
 
 	newprot = protection_map[newflags & mask];
diff -puN mm/page-writeback.c~mm-tracking-shared-dirty-pages mm/page-writeback.c
--- a/mm/page-writeback.c~mm-tracking-shared-dirty-pages
+++ a/mm/page-writeback.c
@@ -29,6 +29,7 @@
 #include <linux/sysctl.h>
 #include <linux/cpu.h>
 #include <linux/syscalls.h>
+#include <linux/rmap.h>
 
 /*
  * The maximum number of pages to writeout in a single bdflush/kupdate
@@ -759,6 +760,7 @@ int clear_page_dirty_for_io(struct page 
 
 	if (mapping) {
 		if (TestClearPageDirty(page)) {
+			page_mkclean(page);
 			if (mapping_cap_account_dirty(mapping))
 				dec_page_state(nr_dirty);
 			return 1;
diff -puN mm/rmap.c~mm-tracking-shared-dirty-pages mm/rmap.c
--- a/mm/rmap.c~mm-tracking-shared-dirty-pages
+++ a/mm/rmap.c
@@ -53,6 +53,7 @@
 #include <linux/rmap.h>
 #include <linux/rcupdate.h>
 #include <linux/module.h>
+#include <linux/backing-dev.h>
 
 #include <asm/tlbflush.h>
 
@@ -434,6 +435,73 @@ int page_referenced(struct page *page, i
 	return referenced;
 }
 
+static int page_mkclean_one(struct page *page, struct vm_area_struct *vma, int protect)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	unsigned long address;
+	pte_t *pte, entry;
+	spinlock_t *ptl;
+	int ret = 0;
+
+	address = vma_address(page, vma);
+	if (address == -EFAULT)
+		goto out;
+
+	pte = page_check_address(page, mm, address, &ptl);
+	if (!pte)
+		goto out;
+
+	if (!(pte_dirty(*pte) || (protect && pte_write(*pte))))
+		goto unlock;
+
+	entry = ptep_get_and_clear(mm, address, pte);
+	entry = pte_mkclean(entry);
+	if (protect)
+		entry = pte_wrprotect(entry);
+	ptep_establish(vma, address, pte, entry);
+	lazy_mmu_prot_update(entry);
+	ret = 1;
+
+unlock:
+	pte_unmap_unlock(pte, ptl);
+out:
+	return ret;
+}
+
+static int page_mkclean_file(struct address_space *mapping, struct page *page)
+{
+	pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+	struct vm_area_struct *vma;
+	struct prio_tree_iter iter;
+	int ret = 0;
+
+	BUG_ON(PageAnon(page));
+
+	spin_lock(&mapping->i_mmap_lock);
+	vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
+		int protect = mapping_cap_account_dirty(mapping) &&
+			is_shared_writable(vma->vm_flags);
+		ret += page_mkclean_one(page, vma, protect);
+	}
+	spin_unlock(&mapping->i_mmap_lock);
+	return ret;
+}
+
+int page_mkclean(struct page *page)
+{
+	int ret = 0;
+
+	BUG_ON(!PageLocked(page));
+
+	if (page_mapped(page)) {
+		struct address_space *mapping = page_mapping(page);
+		if (mapping)
+			ret = page_mkclean_file(mapping, page);
+	}
+
+	return ret;
+}
+
 /**
  * page_set_anon_rmap - setup new anonymous rmap
  * @page:	the page to add the mapping to
_

Patches currently in -mm which might be from a.p.zijlstra@xxxxxxxxx are

buglet-in-radix_tree_tag_set.patch
add-page_mkwrite-vm_operations-method-fix.patch
mm-tracking-shared-dirty-pages.patch
mm-balance-dirty-pages.patch
mm-msync-cleanup.patch
mm-optimize-the-new-mprotect-code-a-bit.patch
mm-small-cleanup-of-install_page.patch
mm-remove-some-update_mmu_cache-calls.patch

-
To unsubscribe from this list: send the line "unsubscribe mm-commits" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html