Conversion is straightforward, mmap_sem is used within the the same function context most of the time. No change in semantics. Signed-off-by: Davidlohr Bueso <dbueso@xxxxxxx> --- kernel/acct.c | 5 +++-- kernel/bpf/stackmap.c | 7 +++++-- kernel/events/core.c | 5 +++-- kernel/events/uprobes.c | 20 ++++++++++++-------- kernel/exit.c | 9 +++++---- kernel/fork.c | 16 ++++++++++------ kernel/futex.c | 5 +++-- kernel/sched/fair.c | 5 +++-- kernel/sys.c | 22 +++++++++++++--------- kernel/trace/trace_output.c | 5 +++-- 10 files changed, 60 insertions(+), 39 deletions(-) diff --git a/kernel/acct.c b/kernel/acct.c index 81f9831a7859..2bbcecbd78ef 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -538,14 +538,15 @@ void acct_collect(long exitcode, int group_dead) if (group_dead && current->mm) { struct vm_area_struct *vma; + DEFINE_RANGE_LOCK_FULL(mmrange); - down_read(¤t->mm->mmap_sem); + mm_read_lock(current->mm, &mmrange); vma = current->mm->mmap; while (vma) { vsize += vma->vm_end - vma->vm_start; vma = vma->vm_next; } - up_read(¤t->mm->mmap_sem); + mm_read_unlock(current->mm, &mmrange); } spin_lock_irq(¤t->sighand->siglock); diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index 950ab2f28922..fdb352bea7e8 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -37,6 +37,7 @@ struct bpf_stack_map { struct stack_map_irq_work { struct irq_work irq_work; struct rw_semaphore *sem; + struct range_lock *mmrange; }; static void do_up_read(struct irq_work *entry) @@ -291,6 +292,7 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs, struct vm_area_struct *vma; bool irq_work_busy = false; struct stack_map_irq_work *work = NULL; + DEFINE_RANGE_LOCK_FULL(mmrange); if (in_nmi()) { work = this_cpu_ptr(&up_read_work); @@ -309,7 +311,7 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs, * with build_id. */ if (!user || !current || !current->mm || irq_work_busy || - down_read_trylock(¤t->mm->mmap_sem) == 0) { + mm_read_trylock(current->mm, &mmrange) == 0) { /* cannot access current->mm, fall back to ips */ for (i = 0; i < trace_nr; i++) { id_offs[i].status = BPF_STACK_BUILD_ID_IP; @@ -334,9 +336,10 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs, } if (!work) { - up_read(¤t->mm->mmap_sem); + mm_read_unlock(current->mm, &mmrange); } else { work->sem = ¤t->mm->mmap_sem; + work->mmrange = &mmrange; irq_work_queue(&work->irq_work); /* * The irq_work will release the mmap_sem with diff --git a/kernel/events/core.c b/kernel/events/core.c index abbd4b3b96c2..3b43cfe63b54 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -9079,6 +9079,7 @@ static void perf_event_addr_filters_apply(struct perf_event *event) struct mm_struct *mm = NULL; unsigned int count = 0; unsigned long flags; + DEFINE_RANGE_LOCK_FULL(mmrange); /* * We may observe TASK_TOMBSTONE, which means that the event tear-down @@ -9092,7 +9093,7 @@ static void perf_event_addr_filters_apply(struct perf_event *event) if (!mm) goto restart; - down_read(&mm->mmap_sem); + mm_read_lock(mm, &mmrange); } raw_spin_lock_irqsave(&ifh->lock, flags); @@ -9118,7 +9119,7 @@ static void perf_event_addr_filters_apply(struct perf_event *event) raw_spin_unlock_irqrestore(&ifh->lock, flags); if (ifh->nr_file_filters) { - up_read(&mm->mmap_sem); + mm_read_unlock(mm, &mmrange); mmput(mm); } diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 3689eceb8d0c..6779c237799a 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -997,6 +997,7 @@ register_for_each_vma(struct uprobe *uprobe, struct uprobe_consumer *new) bool is_register = !!new; struct map_info *info; int err = 0; + DEFINE_RANGE_LOCK_FULL(mmrange); percpu_down_write(&dup_mmap_sem); info = build_map_info(uprobe->inode->i_mapping, @@ -1013,7 +1014,7 @@ register_for_each_vma(struct uprobe *uprobe, struct uprobe_consumer *new) if (err && is_register) goto free; - down_write(&mm->mmap_sem); + mm_write_lock(mm, &mmrange); vma = find_vma(mm, info->vaddr); if (!vma || !valid_vma(vma, is_register) || file_inode(vma->vm_file) != uprobe->inode) @@ -1035,7 +1036,7 @@ register_for_each_vma(struct uprobe *uprobe, struct uprobe_consumer *new) } unlock: - up_write(&mm->mmap_sem); + mm_write_unlock(mm, &mmrange); free: mmput(mm); info = free_map_info(info); @@ -1189,8 +1190,9 @@ static int unapply_uprobe(struct uprobe *uprobe, struct mm_struct *mm) { struct vm_area_struct *vma; int err = 0; + DEFINE_RANGE_LOCK_FULL(mmrange); - down_read(&mm->mmap_sem); + mm_read_lock(mm, &mmrange); for (vma = mm->mmap; vma; vma = vma->vm_next) { unsigned long vaddr; loff_t offset; @@ -1207,7 +1209,7 @@ static int unapply_uprobe(struct uprobe *uprobe, struct mm_struct *mm) vaddr = offset_to_vaddr(vma, uprobe->offset); err |= remove_breakpoint(uprobe, mm, vaddr); } - up_read(&mm->mmap_sem); + mm_read_unlock(mm, &mmrange); return err; } @@ -1391,10 +1393,11 @@ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned lon /* Slot allocation for XOL */ static int xol_add_vma(struct mm_struct *mm, struct xol_area *area) { + DEFINE_RANGE_LOCK_FULL(mmrange); struct vm_area_struct *vma; int ret; - if (down_write_killable(&mm->mmap_sem)) + if (mm_write_lock_killable(mm, &mmrange)) return -EINTR; if (mm->uprobes_state.xol_area) { @@ -1424,7 +1427,7 @@ static int xol_add_vma(struct mm_struct *mm, struct xol_area *area) /* pairs with get_xol_area() */ smp_store_release(&mm->uprobes_state.xol_area, area); /* ^^^ */ fail: - up_write(&mm->mmap_sem); + mm_write_unlock(mm, &mmrange); return ret; } @@ -1993,8 +1996,9 @@ static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp) struct mm_struct *mm = current->mm; struct uprobe *uprobe = NULL; struct vm_area_struct *vma; + DEFINE_RANGE_LOCK_FULL(mmrange); - down_read(&mm->mmap_sem); + mm_read_lock(mm, &mmrange); vma = find_vma(mm, bp_vaddr); if (vma && vma->vm_start <= bp_vaddr) { if (valid_vma(vma, false)) { @@ -2012,7 +2016,7 @@ static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp) if (!uprobe && test_and_clear_bit(MMF_RECALC_UPROBES, &mm->flags)) mmf_recalc_uprobes(mm); - up_read(&mm->mmap_sem); + mm_read_unlock(mm, &mmrange); return uprobe; } diff --git a/kernel/exit.c b/kernel/exit.c index 8361a560cd1d..79bc5ec20694 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -497,6 +497,7 @@ static void exit_mm(void) { struct mm_struct *mm = current->mm; struct core_state *core_state; + DEFINE_RANGE_LOCK_FULL(mmrange); mm_release(current, mm); if (!mm) @@ -509,12 +510,12 @@ static void exit_mm(void) * will increment ->nr_threads for each thread in the * group with ->mm != NULL. */ - down_read(&mm->mmap_sem); + mm_read_lock(mm, &mmrange); core_state = mm->core_state; if (core_state) { struct core_thread self; - up_read(&mm->mmap_sem); + mm_read_unlock(mm, &mmrange); self.task = current; self.next = xchg(&core_state->dumper.next, &self); @@ -532,14 +533,14 @@ static void exit_mm(void) freezable_schedule(); } __set_current_state(TASK_RUNNING); - down_read(&mm->mmap_sem); + mm_read_lock(mm, &mmrange); } mmgrab(mm); BUG_ON(mm != current->active_mm); /* more a memory barrier than a real lock */ task_lock(current); current->mm = NULL; - up_read(&mm->mmap_sem); + mm_read_unlock(mm, &mmrange); enter_lazy_tlb(mm, current); task_unlock(current); mm_update_next_owner(mm); diff --git a/kernel/fork.c b/kernel/fork.c index 45fde571c5dd..cc24e3690532 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -468,10 +468,12 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, struct rb_node **rb_link, *rb_parent; int retval; unsigned long charge; + DEFINE_RANGE_LOCK_FULL(old_mmrange); + DEFINE_RANGE_LOCK_FULL(mmrange); LIST_HEAD(uf); uprobe_start_dup_mmap(); - if (down_write_killable(&oldmm->mmap_sem)) { + if (mm_write_lock_killable(oldmm, &old_mmrange)) { retval = -EINTR; goto fail_uprobe_end; } @@ -480,7 +482,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, /* * Not linked in yet - no deadlock potential: */ - down_write_nested(&mm->mmap_sem, SINGLE_DEPTH_NESTING); + mm_write_lock_nested(mm, &mmrange, SINGLE_DEPTH_NESTING); /* No ordering required: file already has been exposed. */ RCU_INIT_POINTER(mm->exe_file, get_mm_exe_file(oldmm)); @@ -595,9 +597,9 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, /* a new mm has just been created */ retval = arch_dup_mmap(oldmm, mm); out: - up_write(&mm->mmap_sem); + mm_write_unlock(mm, &mmrange); flush_tlb_mm(oldmm); - up_write(&oldmm->mmap_sem); + mm_write_unlock(oldmm, &old_mmrange); dup_userfaultfd_complete(&uf); fail_uprobe_end: uprobe_end_dup_mmap(); @@ -627,9 +629,11 @@ static inline void mm_free_pgd(struct mm_struct *mm) #else static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) { - down_write(&oldmm->mmap_sem); + DEFINE_RANGE_LOCK_FULL(mmrange); + + mm_write_lock(oldmm, &mmrange); RCU_INIT_POINTER(mm->exe_file, get_mm_exe_file(oldmm)); - up_write(&oldmm->mmap_sem); + mm_write_unlock(oldmm, &mmrange); return 0; } #define mm_alloc_pgd(mm) (0) diff --git a/kernel/futex.c b/kernel/futex.c index 4615f9371a6f..53829040791b 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -730,11 +730,12 @@ static int fault_in_user_writeable(u32 __user *uaddr) { struct mm_struct *mm = current->mm; int ret; + DEFINE_RANGE_LOCK_FULL(mmrange); - down_read(&mm->mmap_sem); + mm_read_lock(mm, &mmrange); ret = fixup_user_fault(current, mm, (unsigned long)uaddr, FAULT_FLAG_WRITE, NULL, NULL); - up_read(&mm->mmap_sem); + mm_read_unlock(mm, &mmrange); return ret < 0 ? ret : 0; } diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index f35930f5e528..222b554bf928 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -2461,6 +2461,7 @@ void task_numa_work(struct callback_head *work) struct vm_area_struct *vma; unsigned long start, end; unsigned long nr_pte_updates = 0; + DEFINE_RANGE_LOCK_FULL(mmrange); long pages, virtpages; SCHED_WARN_ON(p != container_of(work, struct task_struct, numa_work)); @@ -2512,7 +2513,7 @@ void task_numa_work(struct callback_head *work) return; - if (!down_read_trylock(&mm->mmap_sem)) + if (!mm_read_trylock(mm, &mmrange)) return; vma = find_vma(mm, start); if (!vma) { @@ -2580,7 +2581,7 @@ void task_numa_work(struct callback_head *work) mm->numa_scan_offset = start; else reset_ptenuma_scan(p); - up_read(&mm->mmap_sem); + mm_read_unlock(mm, &mmrange); /* * Make sure tasks use at least 32x as much time to run other code diff --git a/kernel/sys.c b/kernel/sys.c index bdbfe8d37418..c769293f8a79 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1825,6 +1825,7 @@ static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) struct file *old_exe, *exe_file; struct inode *inode; int err; + DEFINE_RANGE_LOCK_FULL(mmrange); exe = fdget(fd); if (!exe.file) @@ -1853,7 +1854,7 @@ static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) if (exe_file) { struct vm_area_struct *vma; - down_read(&mm->mmap_sem); + mm_read_lock(mm, &mmrange); for (vma = mm->mmap; vma; vma = vma->vm_next) { if (!vma->vm_file) continue; @@ -1862,7 +1863,7 @@ static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) goto exit_err; } - up_read(&mm->mmap_sem); + mm_read_unlock(mm, &mmrange); fput(exe_file); } @@ -1876,7 +1877,7 @@ static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) fdput(exe); return err; exit_err: - up_read(&mm->mmap_sem); + mm_read_unlock(mm, &mmrange); fput(exe_file); goto exit; } @@ -1979,6 +1980,7 @@ static int prctl_set_mm_map(int opt, const void __user *addr, unsigned long data unsigned long user_auxv[AT_VECTOR_SIZE]; struct mm_struct *mm = current->mm; int error; + DEFINE_RANGE_LOCK_FULL(mmrange); BUILD_BUG_ON(sizeof(user_auxv) != sizeof(mm->saved_auxv)); BUILD_BUG_ON(sizeof(struct prctl_mm_map) > 256); @@ -2019,7 +2021,7 @@ static int prctl_set_mm_map(int opt, const void __user *addr, unsigned long data * arg_lock protects concurent updates but we still need mmap_sem for * read to exclude races with sys_brk. */ - down_read(&mm->mmap_sem); + mm_read_lock(mm, &mmrange); /* * We don't validate if these members are pointing to @@ -2058,7 +2060,7 @@ static int prctl_set_mm_map(int opt, const void __user *addr, unsigned long data if (prctl_map.auxv_size) memcpy(mm->saved_auxv, user_auxv, sizeof(user_auxv)); - up_read(&mm->mmap_sem); + mm_read_unlock(mm, &mmrange); return 0; } #endif /* CONFIG_CHECKPOINT_RESTORE */ @@ -2100,6 +2102,7 @@ static int prctl_set_mm(int opt, unsigned long addr, struct prctl_mm_map prctl_map; struct vm_area_struct *vma; int error; + DEFINE_RANGE_LOCK_FULL(mmrange); if (arg5 || (arg4 && (opt != PR_SET_MM_AUXV && opt != PR_SET_MM_MAP && @@ -2125,7 +2128,7 @@ static int prctl_set_mm(int opt, unsigned long addr, error = -EINVAL; - down_write(&mm->mmap_sem); + mm_write_lock(mm, &mmrange); vma = find_vma(mm, addr); prctl_map.start_code = mm->start_code; @@ -2218,7 +2221,7 @@ static int prctl_set_mm(int opt, unsigned long addr, error = 0; out: - up_write(&mm->mmap_sem); + mm_write_unlock(mm, &mmrange); return error; } @@ -2266,6 +2269,7 @@ int __weak arch_prctl_spec_ctrl_set(struct task_struct *t, unsigned long which, SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, unsigned long, arg4, unsigned long, arg5) { + DEFINE_RANGE_LOCK_FULL(mmrange); struct task_struct *me = current; unsigned char comm[sizeof(me->comm)]; long error; @@ -2441,13 +2445,13 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, case PR_SET_THP_DISABLE: if (arg3 || arg4 || arg5) return -EINVAL; - if (down_write_killable(&me->mm->mmap_sem)) + if (mm_write_lock_killable(me->mm, &mmrange)) return -EINTR; if (arg2) set_bit(MMF_DISABLE_THP, &me->mm->flags); else clear_bit(MMF_DISABLE_THP, &me->mm->flags); - up_write(&me->mm->mmap_sem); + mm_write_unlock(me->mm, &mmrange); break; case PR_MPX_ENABLE_MANAGEMENT: if (arg2 || arg3 || arg4 || arg5) diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 54373d93e251..0dbdab621f17 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -377,8 +377,9 @@ static int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm, if (mm) { const struct vm_area_struct *vma; + DEFINE_RANGE_LOCK_FULL(mmrange); - down_read(&mm->mmap_sem); + mm_read_lock(mm, &mmrange); vma = find_vma(mm, ip); if (vma) { file = vma->vm_file; @@ -390,7 +391,7 @@ static int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm, trace_seq_printf(s, "[+0x%lx]", ip - vmstart); } - up_read(&mm->mmap_sem); + mm_read_unlock(mm, &mmrange); } if (ret && ((sym_flags & TRACE_ITER_SYM_ADDR) || !file)) trace_seq_printf(s, " <" IP_FMT ">", ip); -- 2.16.4