Re: [RFC perf/core 05/11] uprobes: Add mapping for optimized uprobe trampolines

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, Nov 5, 2024 at 5:35 AM Jiri Olsa <jolsa@xxxxxxxxxx> wrote:
>
> Adding interface to add special mapping for user space page that will be
> used as place holder for uprobe trampoline in following changes.
>
> The get_tramp_area(vaddr) function either finds 'callable' page or create
> new one.  The 'callable' means it's reachable by call instruction (from
> vaddr argument) and is decided by each arch via new arch_uprobe_is_callable
> function.
>
> The put_tramp_area function either drops refcount or destroys the special
> mapping and all the maps are clean up when the process goes down.
>
> Signed-off-by: Jiri Olsa <jolsa@xxxxxxxxxx>
> ---
>  include/linux/uprobes.h |  12 ++++
>  kernel/events/uprobes.c | 141 ++++++++++++++++++++++++++++++++++++++++
>  kernel/fork.c           |   2 +
>  3 files changed, 155 insertions(+)
>
> diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
> index be306028ed59..222d8e82cee2 100644
> --- a/include/linux/uprobes.h
> +++ b/include/linux/uprobes.h
> @@ -172,6 +172,15 @@ struct xol_area;
>
>  struct uprobes_state {
>         struct xol_area         *xol_area;
> +       struct hlist_head       tramp_head;
> +       struct mutex            tramp_mutex;
> +};
> +
> +struct tramp_area {
> +       unsigned long           vaddr;
> +       struct page             *page;
> +       struct hlist_node       node;
> +       refcount_t              ref;

nit: any reason we are unnecessarily trying to save 4 bytes on
refcount (and we don't actually, due to padding)

>  };
>
>  extern void __init uprobes_init(void);
> @@ -219,6 +228,9 @@ extern int uprobe_verify_opcode(struct page *page, unsigned long vaddr, uprobe_o
>  extern int arch_uprobe_verify_opcode(struct page *page, unsigned long vaddr,
>                                      uprobe_opcode_t *new_opcode, void *data);
>  extern bool arch_uprobe_is_register(uprobe_opcode_t *insn, int len, void *data);
> +struct tramp_area *get_tramp_area(unsigned long vaddr);

uprobe_get_tramp_area() to make it clear this is uprobe specific,
given this is exposed function?

and add that extern like we do for other functions?

> +void put_tramp_area(struct tramp_area *area);

uprobe_put_tramp_area() ?

> +bool arch_uprobe_is_callable(unsigned long vtramp, unsigned long vaddr);
>  #else /* !CONFIG_UPROBES */
>  struct uprobes_state {
>  };
> diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
> index 944d9df1f081..a44305c559a4 100644
> --- a/kernel/events/uprobes.c
> +++ b/kernel/events/uprobes.c
> @@ -616,6 +616,145 @@ set_orig_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long v
>                         (uprobe_opcode_t *)&auprobe->insn, UPROBE_SWBP_INSN_SIZE, NULL);
>  }
>
> +bool __weak arch_uprobe_is_callable(unsigned long vtramp, unsigned long vaddr)
> +{
> +       return false;
> +}
> +
> +static unsigned long find_nearest_page(unsigned long vaddr)
> +{
> +       struct mm_struct *mm = current->mm;
> +       struct vm_area_struct *vma, *prev;
> +       VMA_ITERATOR(vmi, mm, 0);
> +
> +       prev = vma_next(&vmi);
> +       vma = vma_next(&vmi);
> +       while (vma) {
> +               if (vma->vm_start - prev->vm_end  >= PAGE_SIZE &&
> +                   arch_uprobe_is_callable(prev->vm_end, vaddr))
> +                       return prev->vm_end;

shouldn't we try both `prev->vm_end` and `vma->vm_start - PAGE_SIZE`
as two possible places

> +
> +               prev = vma;
> +               vma = vma_next(&vmi);
> +       }
> +
> +       return 0;
> +}
> +
> +static vm_fault_t tramp_fault(const struct vm_special_mapping *sm,
> +                             struct vm_area_struct *vma, struct vm_fault *vmf)
> +{
> +       struct hlist_head *head = &vma->vm_mm->uprobes_state.tramp_head;
> +       struct tramp_area *area;
> +
> +       hlist_for_each_entry(area, head, node) {
> +               if (vma->vm_start == area->vaddr) {
> +                       vmf->page = area->page;
> +                       get_page(vmf->page);
> +                       return 0;
> +               }
> +       }
> +
> +       return -EINVAL;
> +}
> +
> +static int tramp_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma)
> +{
> +       return -EPERM;
> +}
> +
> +static const struct vm_special_mapping tramp_mapping = {
> +       .name = "[uprobes-trampoline]",
> +       .fault = tramp_fault,
> +       .mremap = tramp_mremap,
> +};
> +
> +static struct tramp_area *create_tramp_area(unsigned long vaddr)
> +{
> +       struct mm_struct *mm = current->mm;
> +       struct vm_area_struct *vma;
> +       struct tramp_area *area;
> +
> +       vaddr = find_nearest_page(vaddr);
> +       if (!vaddr)
> +               return NULL;
> +
> +       area = kzalloc(sizeof(*area), GFP_KERNEL);
> +       if (unlikely(!area))
> +               return NULL;
> +
> +       area->page = alloc_page(GFP_HIGHUSER);
> +       if (!area->page)
> +               goto free_area;
> +
> +       refcount_set(&area->ref, 1);
> +       area->vaddr = vaddr;
> +
> +       vma = _install_special_mapping(mm, area->vaddr, PAGE_SIZE,
> +                               VM_READ|VM_EXEC|VM_MAYEXEC|VM_MAYREAD|VM_DONTCOPY|VM_IO,
> +                               &tramp_mapping);
> +       if (!IS_ERR(vma))
> +               return area;

please keep a pattern, it's less surprising that way

    if (IS_ERR(vma))
        goto free_page;

    return area;

free_page:

> +
> +       __free_page(area->page);
> + free_area:
> +       kfree(area);
> +       return NULL;
> +}
> +
> +struct tramp_area *get_tramp_area(unsigned long vaddr)
> +{
> +       struct uprobes_state *state = &current->mm->uprobes_state;
> +       struct tramp_area *area = NULL;
> +
> +       mutex_lock(&state->tramp_mutex);
> +       hlist_for_each_entry(area, &state->tramp_head, node) {
> +               if (arch_uprobe_is_callable(area->vaddr, vaddr)) {
> +                       refcount_inc(&area->ref);
> +                       goto unlock;
> +               }
> +       }
> +
> +       area = create_tramp_area(vaddr);
> +       if (area)
> +               hlist_add_head(&area->node, &state->tramp_head);
> +
> +unlock:
> +       mutex_unlock(&state->tramp_mutex);
> +       return area;
> +}
> +
> +static void destroy_tramp_area(struct tramp_area *area)
> +{
> +       hlist_del(&area->node);
> +       put_page(area->page);
> +       kfree(area);
> +}
> +
> +void put_tramp_area(struct tramp_area *area)
> +{
> +       struct mm_struct *mm = current->mm;
> +       struct uprobes_state *state = &mm->uprobes_state;
> +
> +       if (area == NULL)

nit: !area

> +               return;
> +
> +       mutex_lock(&state->tramp_mutex);
> +       if (refcount_dec_and_test(&area->ref))
> +               destroy_tramp_area(area);
> +       mutex_unlock(&state->tramp_mutex);
> +}
> +

[...]





[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux