Re: [PATCH RFC] mm: add MAP_EXCLUSIVE to create exclusive user mappings

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 27.10.19 11:17, Mike Rapoport wrote:
From: Mike Rapoport <rppt@xxxxxxxxxxxxx>

The mappings created with MAP_EXCLUSIVE are visible only in the context of
the owning process and can be used by applications to store secret
information that will not be visible not only to other processes but to the
kernel as well.

The pages in these mappings are removed from the kernel direct map and
marked with PG_user_exclusive flag. When the exclusive area is unmapped,
the pages are mapped back into the direct map.

The MAP_EXCLUSIVE flag implies MAP_POPULATE and MAP_LOCKED.

Signed-off-by: Mike Rapoport <rppt@xxxxxxxxxxxxx>
---
  arch/x86/mm/fault.c                    | 14 ++++++++++
  fs/proc/task_mmu.c                     |  1 +
  include/linux/mm.h                     |  9 +++++++
  include/linux/page-flags.h             |  7 +++++
  include/linux/page_excl.h              | 49 ++++++++++++++++++++++++++++++++++
  include/trace/events/mmflags.h         |  9 ++++++-
  include/uapi/asm-generic/mman-common.h |  1 +
  kernel/fork.c                          |  3 ++-
  mm/Kconfig                             |  3 +++
  mm/gup.c                               |  8 ++++++
  mm/memory.c                            |  3 +++
  mm/mmap.c                              | 16 +++++++++++
  mm/page_alloc.c                        |  5 ++++
  13 files changed, 126 insertions(+), 2 deletions(-)
  create mode 100644 include/linux/page_excl.h

diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 9ceacd1..8f73a75 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -17,6 +17,7 @@
  #include <linux/context_tracking.h>	/* exception_enter(), ...	*/
  #include <linux/uaccess.h>		/* faulthandler_disabled()	*/
  #include <linux/efi.h>			/* efi_recover_from_page_fault()*/
+#include <linux/page_excl.h>		/* page_is_user_exclusive()	*/
  #include <linux/mm_types.h>
#include <asm/cpufeature.h> /* boot_cpu_has, ... */
@@ -1218,6 +1219,13 @@ static int fault_in_kernel_space(unsigned long address)
  	return address >= TASK_SIZE_MAX;
  }
+static bool fault_in_user_exclusive_page(unsigned long address)
+{
+	struct page *page = virt_to_page(address);
+
+	return page_is_user_exclusive(page);
+}
+
  /*
   * Called for all faults where 'address' is part of the kernel address
   * space.  Might get called for faults that originate from *code* that
@@ -1261,6 +1269,12 @@ do_kern_addr_fault(struct pt_regs *regs, unsigned long hw_error_code,
  	if (spurious_kernel_fault(hw_error_code, address))
  		return;
+ /* FIXME: warn and handle gracefully */
+	if (unlikely(fault_in_user_exclusive_page(address))) {
+		pr_err("page fault in user exclusive page at %lx", address);
+		force_sig_fault(SIGSEGV, SEGV_MAPERR, (void __user *)address);
+	}
+
  	/* kprobes don't want to hook the spurious faults: */
  	if (kprobe_page_fault(regs, X86_TRAP_PF))
  		return;
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 9442631..99e14d1 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -655,6 +655,7 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
  #ifdef CONFIG_X86_INTEL_MPX
  		[ilog2(VM_MPX)]		= "mp",
  #endif
+		[ilog2(VM_EXCLUSIVE)]	= "xl",
  		[ilog2(VM_LOCKED)]	= "lo",
  		[ilog2(VM_IO)]		= "io",
  		[ilog2(VM_SEQ_READ)]	= "sr",
diff --git a/include/linux/mm.h b/include/linux/mm.h
index cc29227..9c43375 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -298,11 +298,13 @@ extern unsigned int kobjsize(const void *objp);
  #define VM_HIGH_ARCH_BIT_2	34	/* bit only usable on 64-bit architectures */
  #define VM_HIGH_ARCH_BIT_3	35	/* bit only usable on 64-bit architectures */
  #define VM_HIGH_ARCH_BIT_4	36	/* bit only usable on 64-bit architectures */
+#define VM_HIGH_ARCH_BIT_5	37	/* bit only usable on 64-bit architectures */
  #define VM_HIGH_ARCH_0	BIT(VM_HIGH_ARCH_BIT_0)
  #define VM_HIGH_ARCH_1	BIT(VM_HIGH_ARCH_BIT_1)
  #define VM_HIGH_ARCH_2	BIT(VM_HIGH_ARCH_BIT_2)
  #define VM_HIGH_ARCH_3	BIT(VM_HIGH_ARCH_BIT_3)
  #define VM_HIGH_ARCH_4	BIT(VM_HIGH_ARCH_BIT_4)
+#define VM_HIGH_ARCH_5	BIT(VM_HIGH_ARCH_BIT_5)
  #endif /* CONFIG_ARCH_USES_HIGH_VMA_FLAGS */
#ifdef CONFIG_ARCH_HAS_PKEYS
@@ -340,6 +342,12 @@ extern unsigned int kobjsize(const void *objp);
  # define VM_MPX		VM_NONE
  #endif
+#ifdef CONFIG_ARCH_USES_HIGH_VMA_FLAGS
+# define VM_EXCLUSIVE	VM_HIGH_ARCH_5
+#else
+# define VM_EXCLUSIVE	VM_NONE
+#endif
+
  #ifndef VM_GROWSUP
  # define VM_GROWSUP	VM_NONE
  #endif
@@ -2594,6 +2602,7 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
  #define FOLL_ANON	0x8000	/* don't do file mappings */
  #define FOLL_LONGTERM	0x10000	/* mapping lifetime is indefinite: see below */
  #define FOLL_SPLIT_PMD	0x20000	/* split huge pmd before returning */
+#define FOLL_EXCLUSIVE	0x40000	/* mapping is exclusive to owning mm */
/*
   * NOTE on FOLL_LONGTERM:
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index f91cb88..32d0aee 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -131,6 +131,9 @@ enum pageflags {
  	PG_young,
  	PG_idle,
  #endif
+#if defined(CONFIG_EXCLUSIVE_USER_PAGES)
+	PG_user_exclusive,
+#endif

Last time I tried to introduce a new page flag I learned that this is very much frowned upon. Best you can usually do is reuse another flag - if valid in that context.

--

Thanks,

David / dhildenb





[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux