* Fares Mehanna <faresx@xxxxxxxxx> [240911 10:36]: > To make sure the kernel mm-local mapping is untouched by the user, we will seal > the VMA before changing the protection to be used by the kernel. > > This will guarantee that userspace can't unmap or alter this VMA while it is > being used by the kernel. > > After the kernel is done with the secret memory, it will unseal the VMA to be > able to unmap and free it. > > Unseal operation is not exposed to userspace. We can't use the mseal feature for this; it is supposed to be a one way transition. Willy describes the feature best here [1]. It is not clear from the change log above or the cover letter as to why you need to go this route instead of using the mmap lock. [1] https://lore.kernel.org/lkml/ZS%2F3GCKvNn5qzhC4@xxxxxxxxxxxxxxxxxxxx/ > > Signed-off-by: Fares Mehanna <faresx@xxxxxxxxx> > Signed-off-by: Roman Kagan <rkagan@xxxxxxxxx> > --- > mm/internal.h | 7 +++++ > mm/mseal.c | 81 ++++++++++++++++++++++++++++++++------------------- > 2 files changed, 58 insertions(+), 30 deletions(-) > > diff --git a/mm/internal.h b/mm/internal.h > index b4d86436565b..cf7280d101e9 100644 > --- a/mm/internal.h > +++ b/mm/internal.h > @@ -1501,6 +1501,8 @@ bool can_modify_mm(struct mm_struct *mm, unsigned long start, > unsigned long end); > bool can_modify_mm_madv(struct mm_struct *mm, unsigned long start, > unsigned long end, int behavior); > +/* mm's mmap write lock must be taken before seal/unseal operation */ > +int do_mseal(unsigned long start, unsigned long end, bool seal); > #else > static inline int can_do_mseal(unsigned long flags) > { > @@ -1518,6 +1520,11 @@ static inline bool can_modify_mm_madv(struct mm_struct *mm, unsigned long start, > { > return true; > } > + > +static inline int do_mseal(unsigned long start, unsigned long end, bool seal) > +{ > + return -EINVAL; > +} > #endif > > #ifdef CONFIG_SHRINKER_DEBUG > diff --git a/mm/mseal.c b/mm/mseal.c > index 15bba28acc00..aac9399ffd5d 100644 > --- a/mm/mseal.c > +++ b/mm/mseal.c > @@ -26,6 +26,11 @@ static inline void set_vma_sealed(struct vm_area_struct *vma) > vm_flags_set(vma, VM_SEALED); > } > > +static inline void clear_vma_sealed(struct vm_area_struct *vma) > +{ > + vm_flags_clear(vma, VM_SEALED); > +} > + > /* > * check if a vma is sealed for modification. > * return true, if modification is allowed. > @@ -117,7 +122,7 @@ bool can_modify_mm_madv(struct mm_struct *mm, unsigned long start, unsigned long > > static int mseal_fixup(struct vma_iterator *vmi, struct vm_area_struct *vma, > struct vm_area_struct **prev, unsigned long start, > - unsigned long end, vm_flags_t newflags) > + unsigned long end, vm_flags_t newflags, bool seal) > { > int ret = 0; > vm_flags_t oldflags = vma->vm_flags; > @@ -131,7 +136,10 @@ static int mseal_fixup(struct vma_iterator *vmi, struct vm_area_struct *vma, > goto out; > } > > - set_vma_sealed(vma); > + if (seal) > + set_vma_sealed(vma); > + else > + clear_vma_sealed(vma); > out: > *prev = vma; > return ret; > @@ -167,9 +175,9 @@ static int check_mm_seal(unsigned long start, unsigned long end) > } > > /* > - * Apply sealing. > + * Apply sealing / unsealing. > */ > -static int apply_mm_seal(unsigned long start, unsigned long end) > +static int apply_mm_seal(unsigned long start, unsigned long end, bool seal) > { > unsigned long nstart; > struct vm_area_struct *vma, *prev; > @@ -191,11 +199,14 @@ static int apply_mm_seal(unsigned long start, unsigned long end) > unsigned long tmp; > vm_flags_t newflags; > > - newflags = vma->vm_flags | VM_SEALED; > + if (seal) > + newflags = vma->vm_flags | VM_SEALED; > + else > + newflags = vma->vm_flags & ~(VM_SEALED); > tmp = vma->vm_end; > if (tmp > end) > tmp = end; > - error = mseal_fixup(&vmi, vma, &prev, nstart, tmp, newflags); > + error = mseal_fixup(&vmi, vma, &prev, nstart, tmp, newflags, seal); > if (error) > return error; > nstart = vma_iter_end(&vmi); > @@ -204,6 +215,37 @@ static int apply_mm_seal(unsigned long start, unsigned long end) > return 0; > } > > +int do_mseal(unsigned long start, unsigned long end, bool seal) > +{ > + int ret; > + > + if (end < start) > + return -EINVAL; > + > + if (end == start) > + return 0; > + > + /* > + * First pass, this helps to avoid > + * partial sealing in case of error in input address range, > + * e.g. ENOMEM error. > + */ > + ret = check_mm_seal(start, end); > + if (ret) > + goto out; > + > + /* > + * Second pass, this should success, unless there are errors > + * from vma_modify_flags, e.g. merge/split error, or process > + * reaching the max supported VMAs, however, those cases shall > + * be rare. > + */ > + ret = apply_mm_seal(start, end, seal); > + > +out: > + return ret; > +} > + > /* > * mseal(2) seals the VM's meta data from > * selected syscalls. > @@ -256,7 +298,7 @@ static int apply_mm_seal(unsigned long start, unsigned long end) > * > * unseal() is not supported. > */ > -static int do_mseal(unsigned long start, size_t len_in, unsigned long flags) > +static int __do_mseal(unsigned long start, size_t len_in, unsigned long flags) > { > size_t len; > int ret = 0; > @@ -277,33 +319,12 @@ static int do_mseal(unsigned long start, size_t len_in, unsigned long flags) > return -EINVAL; > > end = start + len; > - if (end < start) > - return -EINVAL; > - > - if (end == start) > - return 0; > > if (mmap_write_lock_killable(mm)) > return -EINTR; > > - /* > - * First pass, this helps to avoid > - * partial sealing in case of error in input address range, > - * e.g. ENOMEM error. > - */ > - ret = check_mm_seal(start, end); > - if (ret) > - goto out; > - > - /* > - * Second pass, this should success, unless there are errors > - * from vma_modify_flags, e.g. merge/split error, or process > - * reaching the max supported VMAs, however, those cases shall > - * be rare. > - */ > - ret = apply_mm_seal(start, end); > + ret = do_mseal(start, end, true); > > -out: > mmap_write_unlock(current->mm); > return ret; > } > @@ -311,5 +332,5 @@ static int do_mseal(unsigned long start, size_t len_in, unsigned long flags) > SYSCALL_DEFINE3(mseal, unsigned long, start, size_t, len, unsigned long, > flags) > { > - return do_mseal(start, len, flags); > + return __do_mseal(start, len, flags); > } > -- > 2.40.1 > > > > > Amazon Web Services Development Center Germany GmbH > Krausenstr. 38 > 10117 Berlin > Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss > Eingetragen am Amtsgericht Charlottenburg unter HRB 257764 B > Sitz: Berlin > Ust-ID: DE 365 538 597 > >