[PATCH/RFC 12/14] Shared Policy: mapped file policy persistence model

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Shared Policy Infrastructure - define mapped file policy persistence model

This patch starts the process of supporting optional shared policy on
shared memory mapped files.

Mapped file policy applies to a range of a linearly memory mapped file
mmap()ed with the MAP_SHARED flag.  The mapping serves as a linear
window onto the mapped range.  Retain the shared policy until the last
shared mapping is removed, so that cached files do not retain policies
installed by defunct applications.

Use rcu deferred free to close possible race between last shared
mapper removing the shared policy and non-mmap page cache access.

Shmem segments [including SHM_HUGETLB segments] look like shared
mapped files to the shared policy infrastructure.  The policy
persistence model for shmem segments is that once a shared policy
is applied, it remains as long as the segment exists.  To retain this
behavior, define a shared policy persistence flag--SPOL_F_PERSIST--and
set this flag when allocating a shared policy for a shmem segment.

Now, we can push the freeing any shmem/hugetlbfs persistent shared
policy when the segment is deleted down into the fs-independent inode
cleanup path.


Signed-off-by: Lee Schermerhorn <lee.schermerhorn@xxxxxx>

 fs/hugetlbfs/inode.c          |    1 
 fs/inode.c                    |    7 ++++
 include/linux/shared_policy.h |   11 ++++--
 mm/mempolicy.c                |   70 ++++++++++++++++++++++++++++++++----------
 mm/mmap.c                     |   11 ++++++
 mm/shmem.c                    |    5 ---
 6 files changed, 81 insertions(+), 24 deletions(-)

Index: linux-2.6.36-mmotm-101103-1217/fs/hugetlbfs/inode.c
===================================================================
--- linux-2.6.36-mmotm-101103-1217.orig/fs/hugetlbfs/inode.c
+++ linux-2.6.36-mmotm-101103-1217/fs/hugetlbfs/inode.c
@@ -663,7 +663,6 @@ static struct inode *hugetlbfs_alloc_ino
 static void hugetlbfs_destroy_inode(struct inode *inode)
 {
 	hugetlbfs_inc_free_inodes(HUGETLBFS_SB(inode->i_sb));
-	mpol_free_shared_policy(inode->i_mapping);
 	kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode));
 }
 
Index: linux-2.6.36-mmotm-101103-1217/fs/inode.c
===================================================================
--- linux-2.6.36-mmotm-101103-1217.orig/fs/inode.c
+++ linux-2.6.36-mmotm-101103-1217/fs/inode.c
@@ -25,6 +25,7 @@
 #include <linux/async.h>
 #include <linux/posix_acl.h>
 #include <linux/ima.h>
+#include <linux/shared_policy.h>
 
 /*
  * This is needed for the following functions:
@@ -305,6 +306,12 @@ void inode_init_once(struct inode *inode
 #ifdef CONFIG_FSNOTIFY
 	INIT_HLIST_HEAD(&inode->i_fsnotify_marks);
 #endif
+	/*
+	 * free any shared policy
+	 */
+	if ((inode->i_mode & S_IFMT) == S_IFREG)
+		mpol_free_shared_policy(inode->i_mapping);
+
 }
 EXPORT_SYMBOL(inode_init_once);
 
Index: linux-2.6.36-mmotm-101103-1217/mm/shmem.c
===================================================================
--- linux-2.6.36-mmotm-101103-1217.orig/mm/shmem.c
+++ linux-2.6.36-mmotm-101103-1217/mm/shmem.c
@@ -1516,6 +1516,7 @@ int shmem_set_policy(struct vm_area_stru
 		if (IS_ERR(sp))
 			return PTR_ERR(sp);
 	}
+	sp->sp_flags |= SPOL_F_PERSIST;
 	return mpol_set_shared_policy(sp, vma_mpol_pgoff(vma, start),
 					(end - start) >> PAGE_SHIFT, new);
 }
@@ -2417,10 +2418,6 @@ static struct inode *shmem_alloc_inode(s
 
 static void shmem_destroy_inode(struct inode *inode)
 {
-	if ((inode->i_mode & S_IFMT) == S_IFREG) {
-		/* only struct inode is valid if it's an inline symlink */
-		mpol_free_shared_policy(inode->i_mapping);
-	}
 	kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode));
 }
 
Index: linux-2.6.36-mmotm-101103-1217/mm/mmap.c
===================================================================
--- linux-2.6.36-mmotm-101103-1217.orig/mm/mmap.c
+++ linux-2.6.36-mmotm-101103-1217/mm/mmap.c
@@ -198,6 +198,17 @@ static void __remove_shared_vm_struct(st
 	if (vma->vm_flags & VM_SHARED)
 		mapping->i_mmap_writable--;
 
+	if (!mapping->i_mmap_writable) {
+		/*
+		 * shared mmap()ed file policy persistence model:
+		 * remove policy when removing last shared mapping,
+		 * unless marked as persistent--e.g., shmem
+		 */
+		struct shared_policy *sp = mapping_shared_policy(mapping);
+		if (sp && !(sp->sp_flags & SPOL_F_PERSIST))
+			mpol_free_shared_policy(mapping);
+	}
+
 	flush_dcache_mmap_lock(mapping);
 	if (unlikely(vma->vm_flags & VM_NONLINEAR))
 		list_del_init(&vma->shared.vm_set.list);
Index: linux-2.6.36-mmotm-101103-1217/include/linux/shared_policy.h
===================================================================
--- linux-2.6.36-mmotm-101103-1217.orig/include/linux/shared_policy.h
+++ linux-2.6.36-mmotm-101103-1217/include/linux/shared_policy.h
@@ -4,6 +4,7 @@
 #include <linux/fs.h>
 #include <linux/spinlock.h>
 #include <linux/rbtree.h>
+#include <linux/rcupdate.h>
 
 /*
  * Tree of shared policies for a shared memory regions and memory
@@ -25,11 +26,15 @@ struct sp_node {
 };
 
 struct shared_policy {
-	struct rb_root root;
-	spinlock_t     lock;		/* protects rb tree */
-	int            nr_sp_nodes;	/* for numa_maps */
+	struct rb_root  root;
+	spinlock_t      lock;		/* protects rb tree, nr_sp_nodes */
+	int             nr_sp_nodes;	/* for numa_maps */
+	int             sp_flags;	/* persistence, ... */
+	struct rcu_head sp_rcu;		/* deferred reclaim */
 };
 
+#define SPOL_F_PERSIST	0x01		/* for shmem use */
+
 extern int shared_file_policy_default;
 
 extern struct shared_policy *mpol_shared_policy_new(
Index: linux-2.6.36-mmotm-101103-1217/mm/mempolicy.c
===================================================================
--- linux-2.6.36-mmotm-101103-1217.orig/mm/mempolicy.c
+++ linux-2.6.36-mmotm-101103-1217/mm/mempolicy.c
@@ -1572,13 +1572,17 @@ asmlinkage long compat_sys_mbind(compat_
  */
 struct mempolicy *get_file_policy(struct address_space *mapping, pgoff_t pgoff)
 {
-	struct shared_policy *sp = mapping->spolicy;
+	struct shared_policy *sp;
 	struct mempolicy *pol = NULL;
 
+	rcu_read_lock();
+	sp = rcu_dereference(mapping->spolicy);
 	if (unlikely(sp))
 		pol = mpol_shared_policy_lookup(sp, pgoff);
 	else if (likely(current))
 		pol = current->mempolicy;
+	rcu_read_unlock();
+
 	if (likely(!pol))
 		pol = &default_policy;
 	return pol;
@@ -2291,6 +2295,10 @@ restart:
  * On entry, the current task has a reference on a non-NULL @mpol.
  * This must be released on exit.
  * This is called at get_inode() calls and we can use GFP_KERNEL.
+ *
+ * Locking:  mapping->spolicy stabilized by current->mm->mmap_sem.
+ * Can't remove last shared mapping while we hold the sem; can't
+ * remove inode/shared policy while inode is mmap()ed shared.
  */
 struct shared_policy *mpol_shared_policy_new(struct address_space *mapping,
 						struct mempolicy *mpol)
@@ -2349,9 +2357,10 @@ put_free:
 	 */
 	spin_lock(&mapping->i_mmap_lock);
 	spx = mapping->spolicy;
-	if (!spx && !err)
-		mapping->spolicy = spx = sp;
-	else
+	if (!spx && !err) {
+		spx = sp;
+		rcu_assign_pointer(mapping->spolicy, sp);
+	} else
 		err = !0;
 	spin_unlock(&mapping->i_mmap_lock);
 	if (err)
@@ -2367,6 +2376,9 @@ put_free:
  * @sz:  size of range [bytes] to which mempolicy applies
  * @mpol:  the mempolicy to install
  *
+ * Locking:  mapping->spolicy stabilized by current->mm->mmap_sem.
+ * Can't remove last shared mapping while we hold the sem; can't
+ * remove inode/shared policy while inode is mmap()ed shared.
  */
 int mpol_set_shared_policy(struct shared_policy *sp,
 				pgoff_t pgoff, unsigned long sz,
@@ -2394,37 +2406,63 @@ int mpol_set_shared_policy(struct shared
 
 /**
  * mpol_free_shared_policy() - Free a backing policy store on inode delete.
- * @mapping - address_space struct containing pointer to shared policy to be freed.
+ * @mapping - address_space struct containing pointer to shared policy to be
+ * freed.
  *
  * Frees the shared policy red-black tree, if any, before freeing the
  * shared policy struct itself, if any.
+
+ * Locking:  only free shared policy on inode deletion [shmem] or
+ * removal of last shared mmap()ing.  Can only delete inode when no
+ * more references.  Removal of last shared mmap()ing protected by
+ * mmap_sem [and mapping->i_mmap_lock].  Still a potential race with
+ * shared policy lookups from page cache on behalf of file descriptor
+ * access to pages.  Use deferred RCU to protect readers [in get_file_policy()]
+ * from shared policy free on removal of last shared mmap()ing.
  */
-void mpol_free_shared_policy(struct address_space *mapping)
+static void __mpol_free_shared_policy(struct rcu_head *rhp)
 {
-	struct shared_policy *sp = mapping->spolicy;
-	struct sp_node *n;
+	struct shared_policy *sp = container_of(rhp, struct shared_policy,
+						sp_rcu);
 	struct rb_node *next;
 
-	if (!sp)
-  		return;
-
-	mapping->spolicy = NULL;
-
+	/*
+	 * Now, we can safely tear down the shared policy tree, if any
+	 */
 	if (sp->root.rb_node) {
-		spin_lock(&sp->lock);
 		next = rb_first(&sp->root);
 		while (next) {
-			n = rb_entry(next, struct sp_node, nd);
+			struct sp_node *n = rb_entry(next, struct sp_node, nd);
 			next = rb_next(&n->nd);
 			rb_erase(&n->nd, &sp->root);
 			mpol_put(n->policy);
 			kmem_cache_free(sn_cache, n);
 		}
-		spin_unlock(&sp->lock);
 	}
 	kmem_cache_free(sp_cache, sp);
 }
 
+void mpol_free_shared_policy(struct address_space *mapping)
+{
+	struct shared_policy *sp = mapping->spolicy;
+
+	if (!sp)
+		return;
+
+	rcu_assign_pointer(mapping->spolicy, NULL);
+
+	/*
+	 * Presence of 'PERSIST flag means we're freeing the
+	 * shared policy in the inode destruction path.  No
+	 * need for RCU synchronization.
+	 */
+	if (sp->sp_flags & SPOL_F_PERSIST)
+		__mpol_free_shared_policy(&sp->sp_rcu);
+	else
+		call_rcu(&sp->sp_rcu, __mpol_free_shared_policy);
+
+}
+
 /* assumes fs == KERNEL_DS */
 void __init numa_policy_init(void)
 {
--
To unsubscribe from this list: send the line "unsubscribe linux-numa" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Kernel]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux SCSI]     [Devices]

  Powered by Linux