Shared Policy Infrastructure - move shared policy to inode/mapping This patch starts the process of cleaning the shmem shared [mem]policy infrastructure: + to eliminate use of on-stack pseudo-vmas for shmem allocations which will simplify several shmem internal APIs and functions. + for use with hugetlb shmem segments and + eventually, I hope, for use with generic mmap()ed files. In this patch, the shared policy struct in the shmem and hugetlbfs extended inodes are moved to the generic address space struct where it will be available to any file type, and fixes up the existing code to accomodate this change. Details: 1) create a shared_policy.h header and move the shared policy support from mempolicy.h to shared_policy.h. 2) add a struct shared_policy pointer to struct address_space This effectively adds it to each inode in i_data. get_policy vma ops will locate this via vma->vm_file->f_mapping->spolicy. Modify [temporarily] mpol_shared_policy_init() to initialize via a shared policy pointer arg. A subsequent patch will replace this with a pointer to a dynamically allocated mempolicy and will make the pointer dependent on CONFIG_NUMA. Then, all accesses to spolicy will also be made dependent on CONFIG_NUMA via wrappers. 3) modify mpol_shared_policy_lookup() to return NULL if spolicy pointer contains NULL. get_vma_policy() will substitute the process policy, if any, else the default policy. 4) modify shmem, the only existing user of shared policy infrastructure, to work with changes above. At this point, just use the shared_policy embedded in the shmem inode info struct. A later patch will dynamically allocate the struct when needed. Actually, hugetlbfs inodes also contain a shared policy, but the vma's get|set_policy ops are not hooked up. This patch modifies hugetlbfs_get_inode() to initialize the shared policy struct embedded in its info struct via the i_mapping's spolicy pointer. A later patch will "hook up" hugetlb mappings to the get|set_policy ops. Signed-off-by: Lee Schermerhorn <lee.schermerhorn@xxxxxx> fs/hugetlbfs/inode.c | 3 + include/linux/fs.h | 2 + include/linux/mempolicy.h | 53 --------------------------------- include/linux/shared_policy.h | 66 ++++++++++++++++++++++++++++++++++++++++++ mm/mempolicy.c | 2 - mm/shmem.c | 37 ++++++++++++----------- 6 files changed, 92 insertions(+), 71 deletions(-) Index: linux-2.6.36-mmotm-101103-1217/include/linux/fs.h =================================================================== --- linux-2.6.36-mmotm-101103-1217.orig/include/linux/fs.h +++ linux-2.6.36-mmotm-101103-1217/include/linux/fs.h @@ -646,6 +646,8 @@ struct address_space { spinlock_t private_lock; /* for use by the address_space */ struct list_head private_list; /* ditto */ struct address_space *assoc_mapping; /* ditto */ + + struct shared_policy *spolicy; } __attribute__((aligned(sizeof(long)))); /* * On most architectures that alignment is already the case; but Index: linux-2.6.36-mmotm-101103-1217/include/linux/mempolicy.h =================================================================== --- linux-2.6.36-mmotm-101103-1217.orig/include/linux/mempolicy.h +++ linux-2.6.36-mmotm-101103-1217/include/linux/mempolicy.h @@ -64,10 +64,9 @@ enum mpol_rebind_step { #include <linux/mmzone.h> #include <linux/slab.h> -#include <linux/rbtree.h> -#include <linux/spinlock.h> #include <linux/nodemask.h> #include <linux/pagemap.h> +#include <linux/shared_policy.h> struct mm_struct; @@ -172,32 +171,6 @@ static inline int mpol_equal(struct memp return __mpol_equal(a, b); } -/* - * Tree of shared policies for a shared memory region. - * Maintain the policies in a pseudo mm that contains vmas. The vmas - * carry the policy. As a special twist the pseudo mm is indexed in pages, not - * bytes, so that we can work with shared memory segments bigger than - * unsigned long. - */ - -struct sp_node { - struct rb_node nd; - unsigned long start, end; - struct mempolicy *policy; -}; - -struct shared_policy { - struct rb_root root; - spinlock_t lock; -}; - -void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol); -int mpol_set_shared_policy(struct shared_policy *sp, - struct vm_area_struct *vma, - struct mempolicy *new); -void mpol_free_shared_policy(struct shared_policy *sp); -struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp, - unsigned long idx); extern void numa_default_policy(void); extern void numa_policy_init(void); @@ -281,30 +254,6 @@ static inline struct mempolicy *mpol_dup { return NULL; } - -struct shared_policy {}; - -static inline int mpol_set_shared_policy(struct shared_policy *sp, - struct vm_area_struct *vma, - struct mempolicy *new) -{ - return -EINVAL; -} - -static inline void mpol_shared_policy_init(struct shared_policy *sp, - struct mempolicy *mpol) -{ -} - -static inline void mpol_free_shared_policy(struct shared_policy *sp) -{ -} - -static inline struct mempolicy * -mpol_shared_policy_lookup(struct shared_policy *sp, unsigned long idx) -{ - return NULL; -} #define vma_policy(vma) NULL #define vma_set_policy(vma, pol) do {} while(0) Index: linux-2.6.36-mmotm-101103-1217/include/linux/shared_policy.h =================================================================== --- /dev/null +++ linux-2.6.36-mmotm-101103-1217/include/linux/shared_policy.h @@ -0,0 +1,66 @@ +#ifndef _LINUX_SHARED_POLICY_H +#define _LINUX_SHARED_POLICY_H 1 + +#include <linux/spinlock.h> +#include <linux/rbtree.h> + +/* + * Tree of shared policies for a shared memory regions and memory + * mapped files. +TODO: wean the low level shared policies from the notion of vmas. + just use inode, offset, length + * Maintain the policies in a pseudo mm that contains vmas. The vmas + * carry the policy. As a special twist the pseudo mm is indexed in pages, not + * bytes, so that we can work with shared memory segments bigger than + * unsigned long. + */ + +#ifdef CONFIG_NUMA + +struct sp_node { + struct rb_node nd; + unsigned long start, end; + struct mempolicy *policy; +}; + +struct shared_policy { + struct rb_root root; + spinlock_t lock; /* protects rb tree */ +}; + +void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol); +int mpol_set_shared_policy(struct shared_policy *, + struct vm_area_struct *, + struct mempolicy *); +void mpol_free_shared_policy(struct shared_policy *); +struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *, + unsigned long); + +#else /* !NUMA */ + +struct shared_policy {}; + +static inline int mpol_set_shared_policy(struct shared_policy *info, + struct vm_area_struct *vma, + struct mempolicy *new) +{ + return -EINVAL; +} + +static inline void mpol_shared_policy_init(struct shared_policy *sp, + struct mempolicy *mpol) +{ +} + +static inline void mpol_free_shared_policy(struct shared_policy *p) +{ +} + +static inline struct mempolicy * +mpol_shared_policy_lookup(struct shared_policy *sp, unsigned long idx) +{ + return NULL; +} +#endif + +#endif /* _LINUX_SHARED_POLICY_H */ Index: linux-2.6.36-mmotm-101103-1217/mm/mempolicy.c =================================================================== --- linux-2.6.36-mmotm-101103-1217.orig/mm/mempolicy.c +++ linux-2.6.36-mmotm-101103-1217/mm/mempolicy.c @@ -2053,7 +2053,7 @@ mpol_shared_policy_lookup(struct shared_ struct mempolicy *pol = NULL; struct sp_node *sn; - if (!sp->root.rb_node) + if (!sp || !sp->root.rb_node) return NULL; spin_lock(&sp->lock); sn = sp_lookup(sp, idx, idx+1); Index: linux-2.6.36-mmotm-101103-1217/mm/shmem.c =================================================================== --- linux-2.6.36-mmotm-101103-1217.orig/mm/shmem.c +++ linux-2.6.36-mmotm-101103-1217/mm/shmem.c @@ -1146,15 +1146,14 @@ static struct mempolicy *shmem_get_sbmpo } #endif /* CONFIG_TMPFS */ -static struct page *shmem_swapin(swp_entry_t entry, gfp_t gfp, - struct shmem_inode_info *info, unsigned long idx) +struct page *shmem_swapin(swp_entry_t entry, gfp_t gfp, + struct shared_policy *sp, unsigned long idx) { struct mempolicy mpol, *spol; struct vm_area_struct pvma; struct page *page; - spol = mpol_cond_copy(&mpol, - mpol_shared_policy_lookup(&info->policy, idx)); + spol = mpol_cond_copy(&mpol, mpol_shared_policy_lookup(sp, idx)); /* Create a pseudo vma that just contains the policy */ pvma.vm_start = 0; @@ -1165,8 +1164,8 @@ static struct page *shmem_swapin(swp_ent return page; } -static struct page *shmem_alloc_page(gfp_t gfp, - struct shmem_inode_info *info, unsigned long idx) +static struct page *shmem_alloc_page(gfp_t gfp, struct shared_policy *sp, + unsigned long idx) { struct vm_area_struct pvma; @@ -1174,7 +1173,7 @@ static struct page *shmem_alloc_page(gfp pvma.vm_start = 0; pvma.vm_pgoff = idx; pvma.vm_ops = NULL; - pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, idx); + pvma.vm_policy = mpol_shared_policy_lookup(sp, idx); /* * alloc_page_vma() will drop the shared policy reference @@ -1188,14 +1187,14 @@ static inline void shmem_show_mpol(struc } #endif /* CONFIG_TMPFS */ -static inline struct page *shmem_swapin(swp_entry_t entry, gfp_t gfp, - struct shmem_inode_info *info, unsigned long idx) +static inline struct page *shmem_swapin(swp_entry_t entry, gfp_t gfp, void *sp, + unsigned long idx) { return swapin_readahead(entry, gfp, NULL, 0); } -static inline struct page *shmem_alloc_page(gfp_t gfp, - struct shmem_inode_info *info, unsigned long idx) +static inline struct page *shmem_alloc_page(gfp_t gfp, void *sp, + unsigned long idx) { return alloc_page(gfp); } @@ -1260,7 +1259,7 @@ repeat: radix_tree_preload_end(); if (sgp != SGP_READ && !prealloc_page) { /* We don't care if this fails */ - prealloc_page = shmem_alloc_page(gfp, info, idx); + prealloc_page = shmem_alloc_page(gfp, mapping->spolicy, idx); if (prealloc_page) { if (mem_cgroup_cache_charge(prealloc_page, current->mm, GFP_KERNEL)) { @@ -1293,7 +1292,8 @@ repeat: *type |= VM_FAULT_MAJOR; } spin_unlock(&info->lock); - swappage = shmem_swapin(swap, gfp, info, idx); + swappage = shmem_swapin(swap, gfp, mapping->spolicy, + idx); if (!swappage) { spin_lock(&info->lock); entry = shmem_swp_alloc(info, idx, sgp); @@ -1420,7 +1420,7 @@ repeat: if (!prealloc_page) { spin_unlock(&info->lock); - filepage = shmem_alloc_page(gfp, info, idx); + filepage = shmem_alloc_page(gfp, mapping->spolicy, idx); if (!filepage) { shmem_unacct_blocks(info->flags, 1); shmem_free_blocks(inode, 1); @@ -1608,7 +1608,8 @@ static struct inode *shmem_get_inode(str inode->i_mapping->a_ops = &shmem_aops; inode->i_op = &shmem_inode_operations; inode->i_fop = &shmem_file_operations; - mpol_shared_policy_init(&info->policy, + inode->i_mapping->spolicy = &info->policy; + mpol_shared_policy_init(inode->i_mapping->spolicy, shmem_get_sbmpol(sbinfo)); break; case S_IFDIR: @@ -1623,7 +1624,9 @@ static struct inode *shmem_get_inode(str * Must not load anything in the rbtree, * mpol_free_shared_policy will not be called. */ - mpol_shared_policy_init(&info->policy, NULL); + inode->i_mapping->spolicy = &info->policy; + mpol_shared_policy_init(inode->i_mapping->spolicy, + NULL); break; } } else @@ -2419,7 +2422,7 @@ static void shmem_destroy_inode(struct i { if ((inode->i_mode & S_IFMT) == S_IFREG) { /* only struct inode is valid if it's an inline symlink */ - mpol_free_shared_policy(&SHMEM_I(inode)->policy); + mpol_free_shared_policy(inode->i_mapping->spolicy); } kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode)); } Index: linux-2.6.36-mmotm-101103-1217/fs/hugetlbfs/inode.c =================================================================== --- linux-2.6.36-mmotm-101103-1217.orig/fs/hugetlbfs/inode.c +++ linux-2.6.36-mmotm-101103-1217/fs/hugetlbfs/inode.c @@ -472,7 +472,8 @@ static struct inode *hugetlbfs_get_inode * call mpol_free_shared_policy() it will just return because * the rb tree will still be empty. */ - mpol_shared_policy_init(&info->policy, NULL); + inode->i_mapping->spolicy = &info->policy; + mpol_shared_policy_init(inode->i_mapping->spolicy, NULL); switch (mode & S_IFMT) { default: init_special_inode(inode, mode, dev); -- To unsubscribe from this list: send the line "unsubscribe linux-numa" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html