On Wed, 18 Jan 2012 10:51:02 -0800 Arun Sharma <asharma@xxxxxx> wrote: > This enables malloc optimizations where we might > madvise(..,MADV_DONTNEED) a page only to fault it > back at a different virtual address. > > To ensure that we don't leak sensitive data to > unprivileged processes, we enable this optimization > only for pages that are reused within a memory > cgroup. > > The idea is to make this opt-in both at the mmap() > level and cgroup level so the default behavior is > unchanged after the patch. > > TODO: Ask for a VM_UNINITIALIZED bit > TODO: Implement a cgroup level opt-in flag > Hmm, then, 1. a new task jumped into this cgroup can see any uncleared data... 2. if a memcg pointer is reused, the information will be leaked. 3. If VM_UNINITALIZED is set, the process can see any data which was freed by other process which doesn't know VM_UNINITALIZED at all. 4. The process will be able to see file cache data which the it has no access right if it's accessed by memcg once. 3 & 4 seems too danger. Isn't it better to have this as per-task rather than per-memcg ? And just allow to reuse pages the page has freed ? Thanks, -Kame > To: linux-kernel@xxxxxxxxxxxxxxx > Cc: linux-mm@xxxxxxxxx > Cc: Balbir Singh <bsingharora@xxxxxxxxx> > Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@xxxxxxxxxxxxxx> > Cc: akpm@xxxxxxxxxxxxxxxxxxxx > Signed-off-by: Arun Sharma <asharma@xxxxxx> > --- > include/asm-generic/mman-common.h | 6 +----- > include/linux/highmem.h | 6 ++++++ > include/linux/mm.h | 2 ++ > include/linux/mman.h | 1 + > include/linux/page_cgroup.h | 29 +++++++++++++++++++++++++++++ > init/Kconfig | 2 +- > mm/mempolicy.c | 29 +++++++++++++++++++++++------ > 7 files changed, 63 insertions(+), 12 deletions(-) > > diff --git a/include/asm-generic/mman-common.h b/include/asm-generic/mman-common.h > index 787abbb..71e079f 100644 > --- a/include/asm-generic/mman-common.h > +++ b/include/asm-generic/mman-common.h > @@ -19,11 +19,7 @@ > #define MAP_TYPE 0x0f /* Mask for type of mapping */ > #define MAP_FIXED 0x10 /* Interpret addr exactly */ > #define MAP_ANONYMOUS 0x20 /* don't use a file */ > -#ifdef CONFIG_MMAP_ALLOW_UNINITIALIZED > -# define MAP_UNINITIALIZED 0x4000000 /* For anonymous mmap, memory could be uninitialized */ > -#else > -# define MAP_UNINITIALIZED 0x0 /* Don't support this flag */ > -#endif > +#define MAP_UNINITIALIZED 0x4000000 /* For anonymous mmap, memory could be uninitialized */ > > #define MS_ASYNC 1 /* sync memory asynchronously */ > #define MS_INVALIDATE 2 /* invalidate the caches */ > diff --git a/include/linux/highmem.h b/include/linux/highmem.h > index 3a93f73..caae922 100644 > --- a/include/linux/highmem.h > +++ b/include/linux/highmem.h > @@ -4,6 +4,7 @@ > #include <linux/fs.h> > #include <linux/kernel.h> > #include <linux/mm.h> > +#include <linux/page_cgroup.h> > #include <linux/uaccess.h> > #include <linux/hardirq.h> > > @@ -156,6 +157,11 @@ __alloc_zeroed_user_highpage(gfp_t movableflags, > struct page *page = alloc_page_vma(GFP_HIGHUSER | movableflags, > vma, vaddr); > > +#ifdef CONFIG_MMAP_ALLOW_UNINITIALIZED > + if (!page_needs_clearing(page, vma)) > + return page; > +#endif > + > if (page) > clear_user_highpage(page, vaddr); > > diff --git a/include/linux/mm.h b/include/linux/mm.h > index 4baadd1..c6bab01 100644 > --- a/include/linux/mm.h > +++ b/include/linux/mm.h > @@ -118,6 +118,8 @@ extern unsigned int kobjsize(const void *objp); > #define VM_SAO 0x20000000 /* Strong Access Ordering (powerpc) */ > #define VM_PFN_AT_MMAP 0x40000000 /* PFNMAP vma that is fully mapped at mmap time */ > #define VM_MERGEABLE 0x80000000 /* KSM may merge identical pages */ > +#define VM_UNINITIALIZED VM_SAO /* Steal a powerpc bit for now, since we're out > + of bits for 32 bit archs */ > > /* Bits set in the VMA until the stack is in its final location */ > #define VM_STACK_INCOMPLETE_SETUP (VM_RAND_READ | VM_SEQ_READ) > diff --git a/include/linux/mman.h b/include/linux/mman.h > index 8b74e9b..9bef6c9 100644 > --- a/include/linux/mman.h > +++ b/include/linux/mman.h > @@ -87,6 +87,7 @@ calc_vm_flag_bits(unsigned long flags) > return _calc_vm_trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN ) | > _calc_vm_trans(flags, MAP_DENYWRITE, VM_DENYWRITE ) | > _calc_vm_trans(flags, MAP_EXECUTABLE, VM_EXECUTABLE) | > + _calc_vm_trans(flags, MAP_UNINITIALIZED, VM_UNINITIALIZED) | > _calc_vm_trans(flags, MAP_LOCKED, VM_LOCKED ); > } > #endif /* __KERNEL__ */ > diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h > index 961ecc7..e959869 100644 > --- a/include/linux/page_cgroup.h > +++ b/include/linux/page_cgroup.h > @@ -155,6 +155,17 @@ static inline unsigned long page_cgroup_array_id(struct page_cgroup *pc) > return (pc->flags >> PCG_ARRAYID_SHIFT) & PCG_ARRAYID_MASK; > } > > +static int mm_match_cgroup(const struct mm_struct *mm, > + const struct mem_cgroup *cgroup); > +static inline bool page_seen_by_cgroup(struct page *page, > + const struct mm_struct *mm) > +{ > + struct page_cgroup *pcg = lookup_page_cgroup(page); > + if (pcg == NULL) > + return false; > + return mm_match_cgroup(mm, pcg->mem_cgroup); > +} > + > #else /* CONFIG_CGROUP_MEM_RES_CTLR */ > struct page_cgroup; > > @@ -175,8 +186,26 @@ static inline void __init page_cgroup_init_flatmem(void) > { > } > > +static inline bool page_seen_by_cgroup(struct page *page, > + const struct mm_struct *mm) > +{ > + return false; > +} > + > #endif /* CONFIG_CGROUP_MEM_RES_CTLR */ > > +static inline bool vma_requests_uninitialized(struct vm_area_struct *vma) > +{ > + return vma && !vma->vm_file && vma->vm_flags & VM_UNINITIALIZED; > +} > + > +static inline bool page_needs_clearing(struct page *page, > + struct vm_area_struct *vma) > +{ > + return !(vma_requests_uninitialized(vma) > + && page_seen_by_cgroup(page, vma->vm_mm)); > +} > + > #include <linux/swap.h> > > #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP > diff --git a/init/Kconfig b/init/Kconfig > index 43298f9..428e047 100644 > --- a/init/Kconfig > +++ b/init/Kconfig > @@ -1259,7 +1259,7 @@ endchoice > > config MMAP_ALLOW_UNINITIALIZED > bool "Allow mmapped anonymous memory to be uninitialized" > - depends on EXPERT && !MMU > + depends on EXPERT > default n > help > Normally, and according to the Linux spec, anonymous memory obtained > diff --git a/mm/mempolicy.c b/mm/mempolicy.c > index c3fdbcb..7c9ab68 100644 > --- a/mm/mempolicy.c > +++ b/mm/mempolicy.c > @@ -90,6 +90,7 @@ > #include <linux/syscalls.h> > #include <linux/ctype.h> > #include <linux/mm_inline.h> > +#include <linux/page_cgroup.h> > > #include <asm/tlbflush.h> > #include <asm/uaccess.h> > @@ -1847,6 +1848,11 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, > struct zonelist *zl; > struct page *page; > > +#ifdef CONFIG_MMAP_ALLOW_UNINITIALIZED > + if (vma_requests_uninitialized(vma)) > + gfp &= ~__GFP_ZERO; > +#endif > + > get_mems_allowed(); > if (unlikely(pol->mode == MPOL_INTERLEAVE)) { > unsigned nid; > @@ -1854,25 +1860,36 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, > nid = interleave_nid(pol, vma, addr, PAGE_SHIFT + order); > mpol_cond_put(pol); > page = alloc_page_interleave(gfp, order, nid); > - put_mems_allowed(); > - return page; > + goto out; > } > zl = policy_zonelist(gfp, pol, node); > if (unlikely(mpol_needs_cond_ref(pol))) { > /* > * slow path: ref counted shared policy > */ > - struct page *page = __alloc_pages_nodemask(gfp, order, > - zl, policy_nodemask(gfp, pol)); > + page = __alloc_pages_nodemask(gfp, order, > + zl, policy_nodemask(gfp, pol)); > __mpol_put(pol); > - put_mems_allowed(); > - return page; > + goto out; > } > + > /* > * fast path: default or task policy > */ > page = __alloc_pages_nodemask(gfp, order, zl, > policy_nodemask(gfp, pol)); > + > +out: > +#ifdef CONFIG_MMAP_ALLOW_UNINITIALIZED > + if (page_needs_clearing(page, vma)) { > + int i; > + for (i = 0; i < (1 << order); i++) { > + void *kaddr = kmap_atomic(page + i, KM_USER0); > + clear_page(kaddr); > + kunmap_atomic(kaddr, KM_USER0); > + } > + } > +#endif > put_mems_allowed(); > return page; > } > -- > 1.7.4 > > -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/ Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>