Hi David, Linus, Below is what I understand the suggestions about the UX to be. The full commit is in https://git.zx2c4.com/linux-rng/log/ but here's the part we've been discussing. I've held off on David's suggestion changing "DROPPABLE" to "VOLATILE" to give Linus some time to wake up on the west coast and voice his preference for "DROPPABLE". But the rest is in place. Jason diff --git a/include/uapi/linux/mman.h b/include/uapi/linux/mman.h index a246e11988d5..e89d00528f2f 100644 --- a/include/uapi/linux/mman.h +++ b/include/uapi/linux/mman.h @@ -17,6 +17,7 @@ #define MAP_SHARED 0x01 /* Share changes */ #define MAP_PRIVATE 0x02 /* Changes are private */ #define MAP_SHARED_VALIDATE 0x03 /* share + validate extension flags */ +#define MAP_DROPPABLE 0x08 /* Zero memory under memory pressure. */ /* * Huge page size encoding when MAP_HUGETLB is specified, and a huge page diff --git a/mm/madvise.c b/mm/madvise.c index a77893462b92..cba5bc652fc4 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -1068,13 +1068,16 @@ static int madvise_vma_behavior(struct vm_area_struct *vma, new_flags |= VM_WIPEONFORK; break; case MADV_KEEPONFORK: + if (vma->vm_flags & VM_DROPPABLE) + return -EINVAL; new_flags &= ~VM_WIPEONFORK; break; case MADV_DONTDUMP: new_flags |= VM_DONTDUMP; break; case MADV_DODUMP: - if (!is_vm_hugetlb_page(vma) && new_flags & VM_SPECIAL) + if ((!is_vm_hugetlb_page(vma) && new_flags & VM_SPECIAL) || + (vma->vm_flags & VM_DROPPABLE)) return -EINVAL; new_flags &= ~VM_DONTDUMP; break; diff --git a/mm/mlock.c b/mm/mlock.c index 30b51cdea89d..b87b3d8cc9cc 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -485,7 +485,7 @@ static int mlock_fixup(struct vma_iterator *vmi, struct vm_area_struct *vma, if (newflags == oldflags || (oldflags & VM_SPECIAL) || is_vm_hugetlb_page(vma) || vma == get_gate_vma(current->mm) || - vma_is_dax(vma) || vma_is_secretmem(vma)) + vma_is_dax(vma) || vma_is_secretmem(vma) || (oldflags & VM_DROPPABLE)) /* don't set VM_LOCKED or VM_LOCKONFAULT and don't count */ goto out; diff --git a/mm/mmap.c b/mm/mmap.c index 83b4682ec85c..b3d38179dd42 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1369,6 +1369,34 @@ unsigned long do_mmap(struct file *file, unsigned long addr, pgoff = 0; vm_flags |= VM_SHARED | VM_MAYSHARE; break; + case MAP_DROPPABLE: + /* + * A locked or stack area makes no sense to be droppable. + * + * Also, since droppable pages can just go away at any time + * it makes no sense to copy them on fork or dump them. + * + * And don't attempt to combine with hugetlb for now. + */ + if (flags & (MAP_LOCKED | MAP_HUGETLB)) + return -EINVAL; + if (vm_flags & (VM_GROWSDOWN | VM_GROWSUP)) + return -EINVAL; + + vm_flags |= VM_DROPPABLE; + + /* + * If the pages can be dropped, then it doesn't make + * sense to reserve them. + */ + vm_flags |= VM_NORESERVE; + + /* + * Likewise, they're volatile enough that they + * shouldn't survive forks or coredumps. + */ + vm_flags |= VM_WIPEONFORK | VM_DONTDUMP; + fallthrough; case MAP_PRIVATE: /* * Set pgoff according to addr for anon_vma.