David Hildenbrand <david@xxxxxxxxxx> writes: > On 12.04.23 20:41, David Hildenbrand wrote: >> [...] >>> That will work. >>>> work? IOW, not exporting ksm_add_mm() and not passing a flag to __ksm_enter() -- >>>> it would simply set MMF_VM_MERGEABLE ? >>>> >>> >>> ksm_add_mm() is also used in prctl (kernel/sys.c). Do you want to make a >>> similar change there? >> Yes. >> >>>>> + * >>>>> + * @vma: Pointer to vma >>>>> + */ >>>>> +void ksm_add_vma(struct vm_area_struct *vma) >>>>> +{ >>>>> + struct mm_struct *mm = vma->vm_mm; >>>>> + >>>>> + if (test_bit(MMF_VM_MERGE_ANY, &mm->flags)) >>>>> + __ksm_add_vma(vma); >>>>> +} >>>>> + >>>>> +/** >>>>> + * ksm_add_vmas - Mark all vma's of a process as mergeable >>>>> + * >>>>> + * @mm: Pointer to mm >>>>> + */ >>>>> +void ksm_add_vmas(struct mm_struct *mm) >>>> >>>> I'd suggest calling this >>>> >>> I guess you forgot your name suggestion? >> Yeah, I reconsidered because the first idea I had was not particularly >> good. Maybe >> ksm_enable_for_all_vmas() >> But not so sure. If you think the "add" terminology is a good fit, keep >> it like that. >> Thanks for bearing with me :) >> > > I briefly played with your patch to see how much it can be simplified. > Always enabling ksm (setting MMF_VM_MERGEABLE) before setting > MMF_VM_MERGE_ANY might simplify things. ksm_enable_merge_any() [or however it should > be called] and ksm_fork() contain the interesting bits. > > > Feel free to incorporate what you consider valuable (uncompiled, > untested). > I added most of it. The only change is that I kept ksm_add_vmas as a static function, otherwise I need to define the VMA_ITERATOR at the top of the function. > > diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c > index 5a716bdcba05..5b2eef31398e 100644 > --- a/arch/s390/mm/gmap.c > +++ b/arch/s390/mm/gmap.c > @@ -2591,6 +2591,12 @@ int gmap_mark_unmergeable(void) > int ret; > VMA_ITERATOR(vmi, mm, 0); > + /* > + * Make sure to disable KSM (if enabled for the whole process or > + * individual VMAs). Note that nothing currently hinders user space > + * from re-enabling it. > + */ > + clear_bit(MMF_VM_MERGE_ANY, &mm->flags); > for_each_vma(vmi, vma) { > /* Copy vm_flags to avoid partial modifications in ksm_madvise */ > vm_flags = vma->vm_flags; > diff --git a/include/linux/ksm.h b/include/linux/ksm.h > index 7e232ba59b86..c638b034d586 100644 > --- a/include/linux/ksm.h > +++ b/include/linux/ksm.h > @@ -18,13 +18,24 @@ > #ifdef CONFIG_KSM > int ksm_madvise(struct vm_area_struct *vma, unsigned long start, > unsigned long end, int advice, unsigned long *vm_flags); > + > +void ksm_add_vma(struct vm_area_struct *vma); > +int ksm_enable_merge_any(struct mm_struct *mm); > + > int __ksm_enter(struct mm_struct *mm); > void __ksm_exit(struct mm_struct *mm); > static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm) > { > - if (test_bit(MMF_VM_MERGEABLE, &oldmm->flags)) > - return __ksm_enter(mm); > + int ret; > + > + if (test_bit(MMF_VM_MERGEABLE, &oldmm->flags)) { > + ret = __ksm_enter(mm); > + if (ret) > + return ret; > + } > + if (test_bit(MMF_VM_MERGE_ANY, &oldmm->flags)) > + set_bit(MMF_VM_MERGE_ANY, &mm->flags); > return 0; > } > @@ -53,6 +64,10 @@ void folio_migrate_ksm(struct folio *newfolio, struct folio > *folio); > #else /* !CONFIG_KSM */ > +static inline void ksm_add_vma(struct vm_area_struct *vma) > +{ > +} > + > static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm) > { > return 0; > diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h > index 0e17ae7fbfd3..0ee96ea7a0e9 100644 > --- a/include/linux/sched/coredump.h > +++ b/include/linux/sched/coredump.h > @@ -90,4 +90,5 @@ static inline int get_dumpable(struct mm_struct *mm) > #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\ > MMF_DISABLE_THP_MASK | MMF_HAS_MDWE_MASK) > +#define MMF_VM_MERGE_ANY 29 > #endif /* _LINUX_SCHED_COREDUMP_H */ > diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h > index 1312a137f7fb..759b3f53e53f 100644 > --- a/include/uapi/linux/prctl.h > +++ b/include/uapi/linux/prctl.h > @@ -290,4 +290,6 @@ struct prctl_mm_map { > #define PR_SET_VMA 0x53564d41 > # define PR_SET_VMA_ANON_NAME 0 > +#define PR_SET_MEMORY_MERGE 67 > +#define PR_GET_MEMORY_MERGE 68 > #endif /* _LINUX_PRCTL_H */ > diff --git a/kernel/sys.c b/kernel/sys.c > index 495cd87d9bf4..8c2e50edeb18 100644 > --- a/kernel/sys.c > +++ b/kernel/sys.c > @@ -15,6 +15,7 @@ > #include <linux/highuid.h> > #include <linux/fs.h> > #include <linux/kmod.h> > +#include <linux/ksm.h> > #include <linux/perf_event.h> > #include <linux/resource.h> > #include <linux/kernel.h> > @@ -2661,6 +2662,30 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, > case PR_SET_VMA: > error = prctl_set_vma(arg2, arg3, arg4, arg5); > break; > +#ifdef CONFIG_KSM > + case PR_SET_MEMORY_MERGE: > + if (mmap_write_lock_killable(me->mm)) > + return -EINTR; > + > + if (arg2) { > + error = ksm_enable_merge_any(me->mm); > + } else { > + /* > + * TODO: we might want disable KSM on all VMAs and > + * trigger unsharing to completely disable KSM. > + */ > + clear_bit(MMF_VM_MERGE_ANY, &me->mm->flags); > + error = 0; > + } > + mmap_write_unlock(me->mm); > + break; > + case PR_GET_MEMORY_MERGE: > + if (arg2 || arg3 || arg4 || arg5) > + return -EINVAL; > + > + error = !!test_bit(MMF_VM_MERGE_ANY, &me->mm->flags); > + break; > +#endif > default: > error = -EINVAL; > break; > diff --git a/mm/ksm.c b/mm/ksm.c > index 2b8d30068cbb..76ceec35395c 100644 > --- a/mm/ksm.c > +++ b/mm/ksm.c > @@ -512,6 +512,28 @@ static int break_ksm(struct vm_area_struct *vma, unsigned long addr) > return (ret & VM_FAULT_OOM) ? -ENOMEM : 0; > } > +static bool vma_ksm_compatible(struct vm_area_struct *vma) > +{ > + if (vma->vm_flags & (VM_SHARED | VM_MAYSHARE | VM_PFNMAP | > + VM_IO | VM_DONTEXPAND | VM_HUGETLB | > + VM_MIXEDMAP)) > + return false; /* just ignore the advice */ > + > + if (vma_is_dax(vma)) > + return false; > + > +#ifdef VM_SAO > + if (vma->vm_flags & VM_SAO) > + return false; > +#endif > +#ifdef VM_SPARC_ADI > + if (vma->vm_flags & VM_SPARC_ADI) > + return false; > +#endif > + > + return true; > +} > + > static struct vm_area_struct *find_mergeable_vma(struct mm_struct *mm, > unsigned long addr) > { > @@ -1020,6 +1042,7 @@ static int unmerge_and_remove_all_rmap_items(void) > mm_slot_free(mm_slot_cache, mm_slot); > clear_bit(MMF_VM_MERGEABLE, &mm->flags); > + clear_bit(MMF_VM_MERGE_ANY, &mm->flags); > mmdrop(mm); > } else > spin_unlock(&ksm_mmlist_lock); > @@ -2395,6 +2418,7 @@ static struct ksm_rmap_item *scan_get_next_rmap_item(struct page **page) > mm_slot_free(mm_slot_cache, mm_slot); > clear_bit(MMF_VM_MERGEABLE, &mm->flags); > + clear_bit(MMF_VM_MERGE_ANY, &mm->flags); > mmap_read_unlock(mm); > mmdrop(mm); > } else { > @@ -2471,6 +2495,52 @@ static int ksm_scan_thread(void *nothing) > return 0; > } > +static void __ksm_add_vma(struct vm_area_struct *vma) > +{ > + unsigned long vm_flags = vma->vm_flags; > + > + if (vm_flags & VM_MERGEABLE) > + return; > + > + if (vma_ksm_compatible(vma)) { > + vm_flags |= VM_MERGEABLE; > + vm_flags_reset(vma, vm_flags); > + } > +} > + > +/** > + * ksm_add_vma - Mark vma as mergeable > + * > + * @vma: Pointer to vma > + */ > +void ksm_add_vma(struct vm_area_struct *vma) > +{ > + struct mm_struct *mm = vma->vm_mm; > + > + if (test_bit(MMF_VM_MERGE_ANY, &mm->flags)) > + __ksm_add_vma(vma); > +} > + > +int ksm_enable_merge_any(struct mm_struct *mm) > +{ > + struct vm_area_struct *vma; > + int ret; > + > + if (test_bit(MMF_VM_MERGE_ANY, mm->flags)) > + return 0; > + > + if (!test_bit(MMF_VM_MERGEABLE, mm->flags)) { > + ret = __ksm_enter(mm); > + if (ret) > + return ret; > + } > + set_bit(MMF_VM_MERGE_ANY, &mm->flags); > + > + VMA_ITERATOR(vmi, mm, 0); > + for_each_vma(vmi, vma) > + __ksm_add_vma(vma); > +} > + > int ksm_madvise(struct vm_area_struct *vma, unsigned long start, > unsigned long end, int advice, unsigned long *vm_flags) > { > @@ -2479,25 +2549,10 @@ int ksm_madvise(struct vm_area_struct *vma, unsigned long start, > switch (advice) { > case MADV_MERGEABLE: > - /* > - * Be somewhat over-protective for now! > - */ > - if (*vm_flags & (VM_MERGEABLE | VM_SHARED | VM_MAYSHARE | > - VM_PFNMAP | VM_IO | VM_DONTEXPAND | > - VM_HUGETLB | VM_MIXEDMAP)) > - return 0; /* just ignore the advice */ > - > - if (vma_is_dax(vma)) > + if (vma->vm_flags & VM_MERGEABLE) > return 0; > - > -#ifdef VM_SAO > - if (*vm_flags & VM_SAO) > - return 0; > -#endif > -#ifdef VM_SPARC_ADI > - if (*vm_flags & VM_SPARC_ADI) > + if (!vma_ksm_compatible(vma)) > return 0; > -#endif > if (!test_bit(MMF_VM_MERGEABLE, &mm->flags)) { > err = __ksm_enter(mm); > @@ -2601,6 +2656,7 @@ void __ksm_exit(struct mm_struct *mm) > if (easy_to_free) { > mm_slot_free(mm_slot_cache, mm_slot); > clear_bit(MMF_VM_MERGEABLE, &mm->flags); > + clear_bit(MMF_VM_MERGE_ANY, &mm->flags); > mmdrop(mm); > } else if (mm_slot) { > mmap_write_lock(mm); > diff --git a/mm/mmap.c b/mm/mmap.c > index ff68a67a2a7c..1f8619ff58ca 100644 > --- a/mm/mmap.c > +++ b/mm/mmap.c > @@ -46,6 +46,7 @@ > #include <linux/pkeys.h> > #include <linux/oom.h> > #include <linux/sched/mm.h> > +#include <linux/ksm.h> > #include <linux/uaccess.h> > #include <asm/cacheflush.h> > @@ -2659,6 +2660,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr, > if (file && vm_flags & VM_SHARED) > mapping_unmap_writable(file->f_mapping); > file = vma->vm_file; > + ksm_add_vma(vma); > expanded: > perf_event_mmap(vma); > @@ -2931,6 +2933,7 @@ static int do_brk_flags(struct vma_iterator *vmi, struct > vm_area_struct *vma, > goto mas_store_fail; > mm->map_count++; > + ksm_add_vma(vma); > out: > perf_event_mmap(vma); > mm->total_vm += len >> PAGE_SHIFT; > -- > 2.39.2