On Tue, Feb 16, 2016 at 3:14 PM, tip-bot for Dave Hansen <tipbot@xxxxxxxxx> wrote: > Commit-ID: 1e9877902dc7e11d2be038371c6fbf2dfcd469d7 > Gitweb: http://git.kernel.org/tip/1e9877902dc7e11d2be038371c6fbf2dfcd469d7 > Author: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx> > AuthorDate: Fri, 12 Feb 2016 13:01:54 -0800 > Committer: Ingo Molnar <mingo@xxxxxxxxxx> > CommitDate: Tue, 16 Feb 2016 10:04:09 +0100 > > mm/gup: Introduce get_user_pages_remote() > > For protection keys, we need to understand whether protections > should be enforced in software or not. In general, we enforce > protections when working on our own task, but not when on others. > We call these "current" and "remote" operations. > > This patch introduces a new get_user_pages() variant: > > get_user_pages_remote() > > Which is a replacement for when get_user_pages() is called on > non-current tsk/mm. As I see task-struct argument could be NULL as well as in old api. They only usage for it is updating task->maj_flt/min_flt. May be just remove arg and always account major/minor faults into current task - currently counters are plain unsigned long, so remote access could corrupt them. > > We also introduce a new gup flag: FOLL_REMOTE which can be used > for the "__" gup variants to get this new behavior. > > The uprobes is_trap_at_addr() location holds mmap_sem and > calls get_user_pages(current->mm) on an instruction address. This > makes it a pretty unique gup caller. Being an instruction access > and also really originating from the kernel (vs. the app), I opted > to consider this a 'remote' access where protection keys will not > be enforced. > > Without protection keys, this patch should not change any behavior. > > Signed-off-by: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx> > Reviewed-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx> > Cc: Andrea Arcangeli <aarcange@xxxxxxxxxx> > Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> > Cc: Andy Lutomirski <luto@xxxxxxxxxxxxxx> > Cc: Borislav Petkov <bp@xxxxxxxxx> > Cc: Brian Gerst <brgerst@xxxxxxxxx> > Cc: Dave Hansen <dave@xxxxxxxx> > Cc: Denys Vlasenko <dvlasenk@xxxxxxxxxx> > Cc: H. Peter Anvin <hpa@xxxxxxxxx> > Cc: Kirill A. Shutemov <kirill.shutemov@xxxxxxxxxxxxxxx> > Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx> > Cc: Naoya Horiguchi <n-horiguchi@xxxxxxxxxxxxx> > Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx> > Cc: Rik van Riel <riel@xxxxxxxxxx> > Cc: Srikar Dronamraju <srikar@xxxxxxxxxxxxxxxxxx> > Cc: Vlastimil Babka <vbabka@xxxxxxx> > Cc: jack@xxxxxxx > Cc: linux-mm@xxxxxxxxx > Link: http://lkml.kernel.org/r/20160212210154.3F0E51EA@xxxxxxxxxxxxxxxxxx > Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx> > --- > drivers/gpu/drm/etnaviv/etnaviv_gem.c | 6 +++--- > drivers/gpu/drm/i915/i915_gem_userptr.c | 10 +++++----- > drivers/infiniband/core/umem_odp.c | 8 ++++---- > fs/exec.c | 8 ++++++-- > include/linux/mm.h | 5 +++++ > kernel/events/uprobes.c | 10 ++++++++-- > mm/gup.c | 27 ++++++++++++++++++++++----- > mm/memory.c | 2 +- > mm/process_vm_access.c | 11 ++++++++--- > security/tomoyo/domain.c | 9 ++++++++- > virt/kvm/async_pf.c | 8 +++++++- > 11 files changed, 77 insertions(+), 27 deletions(-) > > diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c > index 4b519e4..97d4457 100644 > --- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c > +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c > @@ -753,9 +753,9 @@ static struct page **etnaviv_gem_userptr_do_get_pages( > > down_read(&mm->mmap_sem); > while (pinned < npages) { > - ret = get_user_pages(task, mm, ptr, npages - pinned, > - !etnaviv_obj->userptr.ro, 0, > - pvec + pinned, NULL); > + ret = get_user_pages_remote(task, mm, ptr, npages - pinned, > + !etnaviv_obj->userptr.ro, 0, > + pvec + pinned, NULL); > if (ret < 0) > break; > > diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c > index 59e45b3..90dbf81 100644 > --- a/drivers/gpu/drm/i915/i915_gem_userptr.c > +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c > @@ -584,11 +584,11 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work) > > down_read(&mm->mmap_sem); > while (pinned < npages) { > - ret = get_user_pages(work->task, mm, > - obj->userptr.ptr + pinned * PAGE_SIZE, > - npages - pinned, > - !obj->userptr.read_only, 0, > - pvec + pinned, NULL); > + ret = get_user_pages_remote(work->task, mm, > + obj->userptr.ptr + pinned * PAGE_SIZE, > + npages - pinned, > + !obj->userptr.read_only, 0, > + pvec + pinned, NULL); > if (ret < 0) > break; > > diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c > index e69bf26..75077a0 100644 > --- a/drivers/infiniband/core/umem_odp.c > +++ b/drivers/infiniband/core/umem_odp.c > @@ -572,10 +572,10 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt, > * complex (and doesn't gain us much performance in most use > * cases). > */ > - npages = get_user_pages(owning_process, owning_mm, user_virt, > - gup_num_pages, > - access_mask & ODP_WRITE_ALLOWED_BIT, 0, > - local_page_list, NULL); > + npages = get_user_pages_remote(owning_process, owning_mm, > + user_virt, gup_num_pages, > + access_mask & ODP_WRITE_ALLOWED_BIT, > + 0, local_page_list, NULL); > up_read(&owning_mm->mmap_sem); > > if (npages < 0) > diff --git a/fs/exec.c b/fs/exec.c > index dcd4ac7..d885b98 100644 > --- a/fs/exec.c > +++ b/fs/exec.c > @@ -198,8 +198,12 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, > return NULL; > } > #endif > - ret = get_user_pages(current, bprm->mm, pos, > - 1, write, 1, &page, NULL); > + /* > + * We are doing an exec(). 'current' is the process > + * doing the exec and bprm->mm is the new process's mm. > + */ > + ret = get_user_pages_remote(current, bprm->mm, pos, 1, write, > + 1, &page, NULL); > if (ret <= 0) > return NULL; > > diff --git a/include/linux/mm.h b/include/linux/mm.h > index b1d4b8c..faf3b70 100644 > --- a/include/linux/mm.h > +++ b/include/linux/mm.h > @@ -1225,6 +1225,10 @@ long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, > unsigned long start, unsigned long nr_pages, > unsigned int foll_flags, struct page **pages, > struct vm_area_struct **vmas, int *nonblocking); > +long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm, > + unsigned long start, unsigned long nr_pages, > + int write, int force, struct page **pages, > + struct vm_area_struct **vmas); > long get_user_pages(struct task_struct *tsk, struct mm_struct *mm, > unsigned long start, unsigned long nr_pages, > int write, int force, struct page **pages, > @@ -2170,6 +2174,7 @@ static inline struct page *follow_page(struct vm_area_struct *vma, > #define FOLL_MIGRATION 0x400 /* wait for page to replace migration entry */ > #define FOLL_TRIED 0x800 /* a retry, previous pass started an IO */ > #define FOLL_MLOCK 0x1000 /* lock present pages */ > +#define FOLL_REMOTE 0x2000 /* we are working on non-current tsk/mm */ > > typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, > void *data); > diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c > index 0167679..8eef5f5 100644 > --- a/kernel/events/uprobes.c > +++ b/kernel/events/uprobes.c > @@ -299,7 +299,7 @@ int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr, > > retry: > /* Read the page with vaddr into memory */ > - ret = get_user_pages(NULL, mm, vaddr, 1, 0, 1, &old_page, &vma); > + ret = get_user_pages_remote(NULL, mm, vaddr, 1, 0, 1, &old_page, &vma); > if (ret <= 0) > return ret; > > @@ -1700,7 +1700,13 @@ static int is_trap_at_addr(struct mm_struct *mm, unsigned long vaddr) > if (likely(result == 0)) > goto out; > > - result = get_user_pages(NULL, mm, vaddr, 1, 0, 1, &page, NULL); > + /* > + * The NULL 'tsk' here ensures that any faults that occur here > + * will not be accounted to the task. 'mm' *is* current->mm, > + * but we treat this as a 'remote' access since it is > + * essentially a kernel access to the memory. > + */ > + result = get_user_pages_remote(NULL, mm, vaddr, 1, 0, 1, &page, NULL); > if (result < 0) > return result; > > diff --git a/mm/gup.c b/mm/gup.c > index 7bf19ff..36ca850 100644 > --- a/mm/gup.c > +++ b/mm/gup.c > @@ -870,7 +870,7 @@ long get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm, > EXPORT_SYMBOL(get_user_pages_unlocked); > > /* > - * get_user_pages() - pin user pages in memory > + * get_user_pages_remote() - pin user pages in memory > * @tsk: the task_struct to use for page fault accounting, or > * NULL if faults are not to be recorded. > * @mm: mm_struct of target mm > @@ -924,12 +924,29 @@ EXPORT_SYMBOL(get_user_pages_unlocked); > * should use get_user_pages because it cannot pass > * FAULT_FLAG_ALLOW_RETRY to handle_mm_fault. > */ > -long get_user_pages(struct task_struct *tsk, struct mm_struct *mm, > - unsigned long start, unsigned long nr_pages, int write, > - int force, struct page **pages, struct vm_area_struct **vmas) > +long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm, > + unsigned long start, unsigned long nr_pages, > + int write, int force, struct page **pages, > + struct vm_area_struct **vmas) > { > return __get_user_pages_locked(tsk, mm, start, nr_pages, write, force, > - pages, vmas, NULL, false, FOLL_TOUCH); > + pages, vmas, NULL, false, > + FOLL_TOUCH | FOLL_REMOTE); > +} > +EXPORT_SYMBOL(get_user_pages_remote); > + > +/* > + * This is the same as get_user_pages_remote() for the time > + * being. > + */ > +long get_user_pages(struct task_struct *tsk, struct mm_struct *mm, > + unsigned long start, unsigned long nr_pages, > + int write, int force, struct page **pages, > + struct vm_area_struct **vmas) > +{ > + return __get_user_pages_locked(tsk, mm, start, nr_pages, > + write, force, pages, vmas, NULL, false, > + FOLL_TOUCH); > } > EXPORT_SYMBOL(get_user_pages); > > diff --git a/mm/memory.c b/mm/memory.c > index 38090ca..8bfbad0 100644 > --- a/mm/memory.c > +++ b/mm/memory.c > @@ -3685,7 +3685,7 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, > void *maddr; > struct page *page = NULL; > > - ret = get_user_pages(tsk, mm, addr, 1, > + ret = get_user_pages_remote(tsk, mm, addr, 1, > write, 1, &page, &vma); > if (ret <= 0) { > #ifndef CONFIG_HAVE_IOREMAP_PROT > diff --git a/mm/process_vm_access.c b/mm/process_vm_access.c > index 5d453e5..07514d4 100644 > --- a/mm/process_vm_access.c > +++ b/mm/process_vm_access.c > @@ -98,9 +98,14 @@ static int process_vm_rw_single_vec(unsigned long addr, > int pages = min(nr_pages, max_pages_per_loop); > size_t bytes; > > - /* Get the pages we're interested in */ > - pages = get_user_pages_unlocked(task, mm, pa, pages, > - vm_write, 0, process_pages); > + /* > + * Get the pages we're interested in. We must > + * add FOLL_REMOTE because task/mm might not > + * current/current->mm > + */ > + pages = __get_user_pages_unlocked(task, mm, pa, pages, > + vm_write, 0, process_pages, > + FOLL_REMOTE); > if (pages <= 0) > return -EFAULT; > > diff --git a/security/tomoyo/domain.c b/security/tomoyo/domain.c > index 3865145..ade7c6c 100644 > --- a/security/tomoyo/domain.c > +++ b/security/tomoyo/domain.c > @@ -874,7 +874,14 @@ bool tomoyo_dump_page(struct linux_binprm *bprm, unsigned long pos, > } > /* Same with get_arg_page(bprm, pos, 0) in fs/exec.c */ > #ifdef CONFIG_MMU > - if (get_user_pages(current, bprm->mm, pos, 1, 0, 1, &page, NULL) <= 0) > + /* > + * This is called at execve() time in order to dig around > + * in the argv/environment of the new proceess > + * (represented by bprm). 'current' is the process doing > + * the execve(). > + */ > + if (get_user_pages_remote(current, bprm->mm, pos, 1, > + 0, 1, &page, NULL) <= 0) > return false; > #else > page = bprm->page[pos / PAGE_SIZE]; > diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c > index 3531599..d604e87 100644 > --- a/virt/kvm/async_pf.c > +++ b/virt/kvm/async_pf.c > @@ -79,7 +79,13 @@ static void async_pf_execute(struct work_struct *work) > > might_sleep(); > > - get_user_pages_unlocked(NULL, mm, addr, 1, 1, 0, NULL); > + /* > + * This work is run asynchromously to the task which owns > + * mm and might be done in another context, so we must > + * use FOLL_REMOTE. > + */ > + __get_user_pages_unlocked(NULL, mm, addr, 1, 1, 0, NULL, FOLL_REMOTE); > + > kvm_async_page_present_sync(vcpu, apf); > > spin_lock(&vcpu->async_pf.lock); -- To unsubscribe from this list: send the line "unsubscribe linux-tip-commits" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html
![]() |