On Fri, Oct 02, 2020 at 02:09:17PM +0300, Topi Miettinen wrote: > Writing a new value of 3 to /proc/sys/kernel/randomize_va_space > enables full randomization of memory mappings created with mmap(NULL, > ...). With 2, the base of the VMA used for such mappings is random, > but the mappings are created in predictable places within the VMA and > in sequential order. With 3, new VMAs are created to fully randomize > the mappings. Also mremap(..., MREMAP_MAYMOVE) will move the mappings > even if not necessary. > > On 32 bit systems this may cause problems due to increased VM > fragmentation if the address space gets crowded. > > In this example, with value of 2, ld.so.cache, libc, an anonymous mmap > and locale-archive are located close to each other: > $ strace /bin/sync > ... > openat(AT_FDCWD, "/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3 > fstat(3, {st_mode=S_IFREG|0644, st_size=189096, ...}) = 0 > mmap(NULL, 189096, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7d9c1e7f2000 > ... > openat(AT_FDCWD, "/lib/x86_64-linux-gnu/libc.so.6", O_RDONLY|O_CLOEXEC) = 3 > read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\0n\2\0\0\0\0\0"..., 832) = 832 > fstat(3, {st_mode=S_IFREG|0755, st_size=1839792, ...}) = 0 > mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7d9c1e7f0000 > mmap(NULL, 1852680, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7d9c1e62b000 > ... > openat(AT_FDCWD, "/usr/lib/locale/locale-archive", O_RDONLY|O_CLOEXEC) = 3 > fstat(3, {st_mode=S_IFREG|0644, st_size=5642592, ...}) = 0 > mmap(NULL, 5642592, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7d9c1e0c9000 > > With 3, they are located in unrelated addresses: > $ echo 3 > /proc/sys/kernel/randomize_va_space > $ /bin/sync > ... > openat(AT_FDCWD, "/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3 > fstat(3, {st_mode=S_IFREG|0644, st_size=189096, ...}) = 0 > mmap(NULL, 189096, PROT_READ, MAP_PRIVATE, 3, 0) = 0xeda4fbea000 > ... > openat(AT_FDCWD, "/lib/x86_64-linux-gnu/libc.so.6", O_RDONLY|O_CLOEXEC) = 3 > read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\0n\2\0\0\0\0\0"..., 832) = 832 > fstat(3, {st_mode=S_IFREG|0755, st_size=1839792, ...}) = 0 > mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb8fb9c1d000 > mmap(NULL, 1852680, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0xaabd8598000 > ... > openat(AT_FDCWD, "/usr/lib/locale/locale-archive", O_RDONLY|O_CLOEXEC) = 3 > fstat(3, {st_mode=S_IFREG|0644, st_size=5642592, ...}) = 0 > mmap(NULL, 5642592, PROT_READ, MAP_PRIVATE, 3, 0) = 0xbe351ab8000 Nit: this can be better illustrated with /proc/$pid/maps > Signed-off-by: Topi Miettinen <toiwoton@xxxxxxxxx> > --- > v2: also randomize mremap(..., MREMAP_MAYMOVE) > --- > Documentation/admin-guide/hw-vuln/spectre.rst | 6 +++--- > Documentation/admin-guide/sysctl/kernel.rst | 11 +++++++++++ > init/Kconfig | 2 +- > mm/mmap.c | 7 ++++++- > mm/mremap.c | 15 +++++++++++++++ > 5 files changed, 36 insertions(+), 5 deletions(-) > > diff --git a/Documentation/admin-guide/hw-vuln/spectre.rst b/Documentation/admin-guide/hw-vuln/spectre.rst > index e05e581af5cf..9ea250522077 100644 > --- a/Documentation/admin-guide/hw-vuln/spectre.rst > +++ b/Documentation/admin-guide/hw-vuln/spectre.rst > @@ -254,7 +254,7 @@ Spectre variant 2 > left by the previous process will also be cleared. > > User programs should use address space randomization to make attacks > - more difficult (Set /proc/sys/kernel/randomize_va_space = 1 or 2). > + more difficult (Set /proc/sys/kernel/randomize_va_space = 1, 2 or 3). > > 3. A virtualized guest attacking the host > ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ > @@ -499,8 +499,8 @@ Spectre variant 2 > more overhead and run slower. > > User programs should use address space randomization > - (/proc/sys/kernel/randomize_va_space = 1 or 2) to make attacks more > - difficult. > + (/proc/sys/kernel/randomize_va_space = 1, 2 or 3) to make attacks > + more difficult. > > 3. VM mitigation > ^^^^^^^^^^^^^^^^ > diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst > index d4b32cc32bb7..acd0612155d9 100644 > --- a/Documentation/admin-guide/sysctl/kernel.rst > +++ b/Documentation/admin-guide/sysctl/kernel.rst > @@ -1060,6 +1060,17 @@ that support this feature. > Systems with ancient and/or broken binaries should be configured > with ``CONFIG_COMPAT_BRK`` enabled, which excludes the heap from process > address space randomization. > + > +3 Additionally enable full randomization of memory mappings created > + with mmap(NULL, ...). With 2, the base of the VMA used for such > + mappings is random, but the mappings are created in predictable > + places within the VMA and in sequential order. With 3, new VMAs > + are created to fully randomize the mappings. Also mremap(..., > + MREMAP_MAYMOVE) will move the mappings even if not necessary. > + > + On 32 bit systems this may cause problems due to increased VM > + fragmentation if the address space gets crowded. > + > == =========================================================================== > > > diff --git a/init/Kconfig b/init/Kconfig > index d6a0b31b13dc..c5ea2e694f6a 100644 > --- a/init/Kconfig > +++ b/init/Kconfig > @@ -1859,7 +1859,7 @@ config COMPAT_BRK > also breaks ancient binaries (including anything libc5 based). > This option changes the bootup default to heap randomization > disabled, and can be overridden at runtime by setting > - /proc/sys/kernel/randomize_va_space to 2. > + /proc/sys/kernel/randomize_va_space to 2 or 3. > > On non-ancient distros (post-2000 ones) N is usually a safe choice. > > diff --git a/mm/mmap.c b/mm/mmap.c > index 40248d84ad5f..489368f43af1 100644 > --- a/mm/mmap.c > +++ b/mm/mmap.c > @@ -47,6 +47,7 @@ > #include <linux/pkeys.h> > #include <linux/oom.h> > #include <linux/sched/mm.h> > +#include <linux/elf-randomize.h> > > #include <linux/uaccess.h> > #include <asm/cacheflush.h> > @@ -206,7 +207,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) > #ifdef CONFIG_COMPAT_BRK > /* > * CONFIG_COMPAT_BRK can still be overridden by setting > - * randomize_va_space to 2, which will still cause mm->start_brk > + * randomize_va_space to >= 2, which will still cause mm->start_brk > * to be arbitrarily shifted > */ > if (current->brk_randomized) > @@ -1407,6 +1408,10 @@ unsigned long do_mmap(struct file *file, unsigned long addr, > if (mm->map_count > sysctl_max_map_count) > return -ENOMEM; > > + /* Pick a random address even outside current VMAs? */ > + if (!addr && randomize_va_space >= 3) > + addr = arch_mmap_rnd(); > + > /* Obtain the address to map to. we verify (or select) it and ensure > * that it represents a valid section of the address space. > */ > diff --git a/mm/mremap.c b/mm/mremap.c > index 138abbae4f75..c7fd1ab5fb5f 100644 > --- a/mm/mremap.c > +++ b/mm/mremap.c > @@ -24,6 +24,7 @@ > #include <linux/uaccess.h> > #include <linux/mm-arch-hooks.h> > #include <linux/userfaultfd_k.h> > +#include <linux/elf-randomize.h> > > #include <asm/cacheflush.h> > #include <asm/tlbflush.h> > @@ -720,6 +721,20 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, > goto out; > } > > + if ((flags & MREMAP_MAYMOVE) && randomize_va_space >= 3) { > + /* > + * Caller is happy with a different address, so let's > + * move even if not necessary! > + */ > + new_addr = arch_mmap_rnd(); > + > + ret = mremap_to(addr, old_len, new_addr, new_len, > + &locked, flags, &uf, &uf_unmap_early, > + &uf_unmap); > + goto out; > + } > + > + > /* > * Always allow a shrinking remap: that just unmaps > * the unnecessary pages.. > -- > 2.28.0 > > -- Sincerely yours, Mike.