From: Leon Romanovsky <leonro@xxxxxxxxxxxx> After releasing ucontext the __mmu_notifier_release will be called again in exit_mmap path. However at that time the driver ucontext (mlx5_ib_ucontext) already will be freed and it will cause to use-after-free error, due to improper use of mmu_notifier API. Convert UMEM ODP to use mmu_notify unregister flow with delayed memory resource freeing. ================================================================== [ 335.696162] BUG: KASAN: use-after-free in __mmu_notifier_release+0x13f/0x450 [ 335.696818] Read of size 8 at addr ffff8801218b9bd0 by task a.out/387 [ 335.697358] [ 335.697461] CPU: 2 PID: 387 Comm: a.out Not tainted 4.19.0-rc1+ #137 [ 335.697844] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.11.0-0-g63451fca13-prebuilt.qemu-project.org 04/01/2014 [ 335.698939] Call Trace: [ 335.699183] dump_stack+0xf0/0x19b [ 335.700798] print_address_description+0x73/0x280 [ 335.702129] kasan_report+0x258/0x380 [ 335.702572] __mmu_notifier_release+0x13f/0x450 [ 335.708154] exit_mmap+0x241/0x280 [ 335.710134] mmput+0x133/0x330 [ 335.714691] do_exit+0xf5e/0x1350 [ 335.728976] do_group_exit+0xe0/0x1c0 [ 335.729911] get_signal+0x447/0xde0 [ 335.732720] do_signal+0x96/0xb50 [ 335.738739] exit_to_usermode_loop+0x163/0x1b0 [ 335.741891] do_syscall_64+0x35c/0x370 [ 335.744658] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 335.745638] RIP: 0033:0x7fa8e124adf9 [ 335.745909] Code: Bad RIP value. [ 335.746187] RSP: 002b:00007fa8e1949e98 EFLAGS: 00000293 ORIG_RAX: 0000000000000001 [ 335.746736] RAX: 0000000000000038 RBX: 0000000000000000 RCX: 00007fa8e124adf9 [ 335.747122] RDX: 0000000000000038 RSI: 00000000200000c0 RDI: 0000000000000003 [ 335.747377] RBP: 00007fa8e1949ec0 R08: 0000000000000000 R09: 0000000000000000 [ 335.749405] R10: 0000000000000000 R11: 0000000000000293 R12: 00007ffda1662cde [ 335.749700] R13: 00007ffda1662cdf R14: 00007ffda1662d70 R15: 00007ffda1662d70 [ 335.749974] [ 335.750077] Allocated by task 387: [ 335.750221] kasan_kmalloc+0xa0/0xd0 [ 335.750374] kmem_cache_alloc_trace+0x134/0x2c0 [ 335.752228] mlx5_ib_alloc_ucontext+0x501/0x1530 [mlx5_ib] [ 335.752402] ib_uverbs_get_context+0x240/0x840 [ib_uverbs] [ 335.752565] ib_uverbs_write+0x57c/0x930 [ib_uverbs] [ 335.752723] __vfs_write+0xc4/0x3c0 [ 335.753128] vfs_write+0xff/0x250 [ 335.753394] ksys_write+0xb6/0x140 [ 335.753804] do_syscall_64+0x105/0x370 [ 335.753924] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 335.754637] [ 335.754724] Freed by task 387: [ 335.754892] __kasan_slab_free+0x12e/0x180 [ 335.755155] kfree+0x121/0x2e0 [ 335.755432] mlx5_ib_dealloc_ucontext+0x94/0xa0 [mlx5_ib] [ 335.755879] uverbs_destroy_ufile_hw+0x22b/0x410 [ib_uverbs] [ 335.756788] ib_uverbs_close+0xd9/0x260 [ib_uverbs] [ 335.756953] __fput+0x210/0x3d0 [ 335.757075] task_work_run+0x13d/0x1a0 [ 335.757484] exit_to_usermode_loop+0x198/0x1b0 [ 335.757647] do_syscall_64+0x35c/0x370 [ 335.757770] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 335.758650] [ 335.758747] The buggy address belongs to the object at ffff8801218b9ae8 [ 335.758747] which belongs to the cache kmalloc-1024 of size 1024 [ 335.759437] The buggy address is located 232 bytes inside of [ 335.759437] 1024-byte region [ffff8801218b9ae8, ffff8801218b9ee8) [ 335.760087] The buggy address belongs to the page: [ 335.760398] page:ffffea0004862e00 count:1 mapcount:0 mapping:ffff880122c0ef00 index:0x0 compound_mapcount: 0 [ 335.761552] flags: 0x8000000000008100(slab|head) [ 335.761713] raw: 8000000000008100 ffffea000487dc08 ffffea0004895408 ffff880122c0ef00 [ 335.762657] raw: 0000000000000000 0000000000170017 00000001ffffffff 0000000000000000 [ 335.762891] page dumped because: kasan: bad access detected [ 335.763057] [ 335.763140] Memory state around the buggy address: [ 335.763742] ffff8801218b9a80: fc fc fc fc fc fc fc fc fc fc fc fc fc fb fb fb [ 335.764272] ffff8801218b9b00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 335.764930] >ffff8801218b9b80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 335.765177] ^ [ 335.765513] ffff8801218b9c00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 335.765755] ffff8801218b9c80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 335.767364] ================================================================== Cc: <stable@xxxxxxxxxxxxxxx> # 3.19 Cc: syzkaller <syzkaller@xxxxxxxxxxxxxxxx> Reported-by: Noa Osherovich <noaos@xxxxxxxxxxxx> Fixes: 882214e2b128 ("IB/core: Implement support for MMU notifiers regarding on demand paging regions") Signed-off-by: Leon Romanovsky <leonro@xxxxxxxxxxxx> --- drivers/infiniband/core/umem_odp.c | 2 +- drivers/infiniband/hw/mlx5/main.c | 15 ++++++++++++++- drivers/infiniband/hw/mlx5/mlx5_ib.h | 4 ++++ 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index 29e34e6a6420..81db03c69c69 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -513,7 +513,7 @@ void ib_umem_odp_release(struct ib_umem *umem) * removed already. */ goto out_put_task; - mmu_notifier_unregister(&context->mn, owning_mm); + mmu_notifier_unregister_no_release(&context->mn, owning_mm); mmput(owning_mm); diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 547fd4f50bd4..1d2b5ee16910 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1884,6 +1884,16 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, return ERR_PTR(err); } +#if IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) +static void mlx5_ib_free_ucontext_delayed(struct rcu_head *rcu) +{ + struct mlx5_ib_ucontext *context = + container_of(rcu, struct mlx5_ib_ucontext, rcu); + + kfree(context); +} +#endif + static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) { struct mlx5_ib_ucontext *context = to_mucontext(ibcontext); @@ -1899,8 +1909,11 @@ static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) deallocate_uars(dev, context); kfree(bfregi->sys_pages); kfree(bfregi->count); +#if IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) + mmu_notifier_call_srcu(&context->rcu, &mlx5_ib_free_ucontext_delayed); +#else kfree(context); - +#endif return 0; } diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 99c853c56d31..93fe372c6086 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -143,6 +143,10 @@ struct mlx5_ib_ucontext { u16 devx_uid; /* For RoCE LAG TX affinity */ atomic_t tx_port_affinity; +#if IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) + /* Need for delayed kfree of mlx5_ib_ucontext for mmu_notifier */ + struct rcu_head rcu; +#endif }; static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext) -- 2.14.4