On Mon, Apr 18, 2016 at 7:23 AM, Dmitry Safonov <dsafonov@xxxxxxxxxxxxx> wrote: > Add possibility for userspace 32-bit applications to move > vdso mapping. Previously, when userspace app called > mremap for vdso, in return path it would land on previous > address of vdso page, resulting in segmentation violation. > Now it lands fine and returns to userspace with remapped vdso. > This will also fix context.vdso pointer for 64-bit, which does not > affect the user of vdso after mremap by now, but this may change. > > As suggested by Andy, return EINVAL for mremap that splits vdso image. > > Renamed and moved text_mapping structure declaration inside > map_vdso, as it used only there and now it complement > vvar_mapping variable. > > There is still problem for remapping vdso in glibc applications: > linker relocates addresses for syscalls on vdso page, so > you need to relink with the new addresses. Or the next syscall > through glibc may fail: > Program received signal SIGSEGV, Segmentation fault. > #0 0xf7fd9b80 in __kernel_vsyscall () > #1 0xf7ec8238 in _exit () from /usr/lib32/libc.so.6 > > Signed-off-by: Dmitry Safonov <dsafonov@xxxxxxxxxxxxx> > --- > v7: that's just not my day: add new_vma parameter to vdso_fix_landing > sorry for the noise > v6: moved vdso_image_32 check and fixup code into vdso_fix_landing function > with ifdefs around > v5: as Andy suggested, add a check that new_vma->vm_mm and current->mm are > the same, also check not only in_ia32_syscall() but image == &vdso_image_32 > v4: drop __maybe_unused & use image from mm->context instead vdso_image_32 > v3: as Andy suggested, return EINVAL in case of splitting vdso blob on mremap; > used is_ia32_task instead of ifdefs > v2: added __maybe_unused for pt_regs in vdso_mremap > > arch/x86/entry/vdso/vma.c | 50 ++++++++++++++++++++++++++++++++++++++++++----- > include/linux/mm_types.h | 3 +++ > mm/mmap.c | 10 ++++++++++ > 3 files changed, 58 insertions(+), 5 deletions(-) > > diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c > index 10f704584922..d94291a19b6e 100644 > --- a/arch/x86/entry/vdso/vma.c > +++ b/arch/x86/entry/vdso/vma.c > @@ -12,6 +12,7 @@ > #include <linux/random.h> > #include <linux/elf.h> > #include <linux/cpu.h> > +#include <linux/ptrace.h> > #include <asm/pvclock.h> > #include <asm/vgtod.h> > #include <asm/proto.h> > @@ -98,10 +99,43 @@ static int vdso_fault(const struct vm_special_mapping *sm, > return 0; > } > > -static const struct vm_special_mapping text_mapping = { > - .name = "[vdso]", > - .fault = vdso_fault, > -}; > +#if defined CONFIG_X86_32 || defined CONFIG_COMPAT I think you mean defined(CONFIG_IA32_EMULATION). IIRC CONFIG_COMPAT is set for X32, !IA32 kernels, too. > +static void vdso_fix_landing(const struct vdso_image *image, > + struct vm_area_struct *new_vma) > +{ > + if (in_ia32_syscall() && image == &vdso_image_32) { > + struct pt_regs *regs = current_pt_regs(); > + unsigned long vdso_land = image->sym_int80_landing_pad; > + unsigned long old_land_addr = vdso_land + > + (unsigned long)current->mm->context.vdso; > + > + /* Fixing userspace landing - look at do_fast_syscall_32 */ > + if (regs->ip == old_land_addr) > + regs->ip = new_vma->vm_start + vdso_land; > + } > +} > +#else > +static void vdso_fix_landing(const struct vdso_image *image, > + struct vm_area_struct *new_vma) {} > +#endif You could also define the function regardless and simply ifdef out the body, thus saving a few lines of code. > + > +static int vdso_mremap(const struct vm_special_mapping *sm, > + struct vm_area_struct *new_vma) > +{ > + unsigned long new_size = new_vma->vm_end - new_vma->vm_start; > + const struct vdso_image *image = current->mm->context.vdso_image; > + > + if (image->size != new_size) > + return -EINVAL; > + > + if (current->mm != new_vma->vm_mm) > + return -EFAULT; Can you make that if (WARN_ON_ONCE(current->mm != new_vma->vm_mm))? It should be impossible. > + > + vdso_fix_landing(image, new_vma); > + current->mm->context.vdso = (void __user *)new_vma->vm_start; > + > + return 0; > +} > > static int vvar_fault(const struct vm_special_mapping *sm, > struct vm_area_struct *vma, struct vm_fault *vmf) > @@ -162,6 +196,12 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr) > struct vm_area_struct *vma; > unsigned long addr, text_start; > int ret = 0; > + > + static const struct vm_special_mapping vdso_mapping = { > + .name = "[vdso]", > + .fault = vdso_fault, > + .mremap = vdso_mremap, > + }; > static const struct vm_special_mapping vvar_mapping = { > .name = "[vvar]", > .fault = vvar_fault, > @@ -195,7 +235,7 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr) > image->size, > VM_READ|VM_EXEC| > VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, > - &text_mapping); > + &vdso_mapping); > > if (IS_ERR(vma)) { > ret = PTR_ERR(vma); > diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h > index c2d75b4fa86c..4d16ab9287af 100644 > --- a/include/linux/mm_types.h > +++ b/include/linux/mm_types.h > @@ -586,6 +586,9 @@ struct vm_special_mapping { > int (*fault)(const struct vm_special_mapping *sm, > struct vm_area_struct *vma, > struct vm_fault *vmf); > + > + int (*mremap)(const struct vm_special_mapping *sm, > + struct vm_area_struct *new_vma); > }; > > enum tlb_flush_reason { > diff --git a/mm/mmap.c b/mm/mmap.c > index bd2e1a533bc1..ba71658dd1a1 100644 > --- a/mm/mmap.c > +++ b/mm/mmap.c > @@ -2930,9 +2930,19 @@ static const char *special_mapping_name(struct vm_area_struct *vma) > return ((struct vm_special_mapping *)vma->vm_private_data)->name; > } > > +static int special_mapping_mremap(struct vm_area_struct *new_vma) > +{ > + struct vm_special_mapping *sm = new_vma->vm_private_data; > + > + if (sm->mremap) > + return sm->mremap(sm, new_vma); > + return 0; > +} > + > static const struct vm_operations_struct special_mapping_vmops = { > .close = special_mapping_close, > .fault = special_mapping_fault, > + .mremap = special_mapping_mremap, > .name = special_mapping_name, > }; > > -- > 2.8.0 > -- Andy Lutomirski AMA Capital Management, LLC -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>