I'll look at it again. -----Original Message----- From: lizhijian@xxxxxxxxxxx <lizhijian@xxxxxxxxxxx> Sent: Wednesday, August 3, 2022 9:43 PM To: Bob Pearson <rpearsonhpe@xxxxxxxxx>; jg@xxxxxxxxxx; zyjzyj2000@xxxxxxxxx; Hack, Jenny (Ft. Collins) <jhack@xxxxxxx>; linux-rdma@xxxxxxxxxxxxxxx Subject: Re: [PATCH v3 for-next] RDMA/rxe: Fix error paths in MR alloc routines On 04/08/2022 06:41, Bob Pearson wrote: > Currently the rxe driver has incorrect code in error paths for > allocating MR objects. The PD and umem are always freed in > rxe_mr_cleanup() but in some error paths they are already freed or > never set. This patch makes sure that the PD is always set and checks > to see if umem is set before freeing it in rxe_mr_cleanup(). umem and > maps are freed if an error occurs in an allocate mr call. Setting > mr->ibmr.pd is removed since this should always take place in > rdma-core. > > Reported-by: Li Zhijian <lizhijian@xxxxxxxxxxx> > Link: > https://lore.kernel.org/linux-rdma/11dafa5f-c52d-16c1-fe37-2cd45ab2047 > 4@xxxxxxxxxxx/ I haven't reviewed the details, but it still cause the kernel crash after force injecting an error. diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index 56940cf70fe0..bf72b9834575 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -127,7 +127,7 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova, rxe_mr_init(access, mr); - err = rxe_mr_alloc(mr, num_buf); + err = -ENOMEM; // rxe_mr_alloc(mr, num_buf); if (err) { ib_umem_release(umem); return err; > Fixes: 3902b429ca14 ("Implement invalidate MW operations") > Signed-off-by: Bob Pearson <rpearsonhpe@xxxxxxxxx> > --- > v3: > rebased to apply to current for-next after > Revert "RDMA/rxe: Create duplicate mapping tables for FMRs" > v2: > Moved setting mr->umem until after checks to avoid sending > an ERR_PTR to ib_umem_release(). > Cleaned up umem and map sets if errors occur in alloc mr calls. > Rebased to current for-next. > --- > drivers/infiniband/sw/rxe/rxe_loc.h | 6 +- > drivers/infiniband/sw/rxe/rxe_mr.c | 91 +++++++++++---------------- > drivers/infiniband/sw/rxe/rxe_verbs.c | 53 ++++++---------- > 3 files changed, 57 insertions(+), 93 deletions(-) > > diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h > b/drivers/infiniband/sw/rxe/rxe_loc.h > index 22f6cc31d1d6..c2a5c8814a48 100644 > --- a/drivers/infiniband/sw/rxe/rxe_loc.h > +++ b/drivers/infiniband/sw/rxe/rxe_loc.h > @@ -64,10 +64,10 @@ int rxe_mmap(struct ib_ucontext *context, struct > vm_area_struct *vma); > > /* rxe_mr.c */ > u8 rxe_get_next_key(u32 last_key); > -void rxe_mr_init_dma(struct rxe_pd *pd, int access, struct rxe_mr > *mr); -int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, > u64 iova, > +void rxe_mr_init_dma(int access, struct rxe_mr *mr); int > +rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 > +iova, > int access, struct rxe_mr *mr); -int rxe_mr_init_fast(struct > rxe_pd *pd, int max_pages, struct rxe_mr *mr); > +int rxe_mr_init_fast(int max_pages, struct rxe_mr *mr); > int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length, > enum rxe_mr_copy_dir dir); > int copy_data(struct rxe_pd *pd, int access, struct rxe_dma_info > *dma, diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c > b/drivers/infiniband/sw/rxe/rxe_mr.c > index 850b80f5ad8b..56940cf70fe0 100644 > --- a/drivers/infiniband/sw/rxe/rxe_mr.c > +++ b/drivers/infiniband/sw/rxe/rxe_mr.c > @@ -67,20 +67,24 @@ static void rxe_mr_init(int access, struct rxe_mr > *mr) > > static int rxe_mr_alloc(struct rxe_mr *mr, int num_buf) > { > - int i; > - int num_map; > struct rxe_map **map = mr->map; > + int num_map; > + int i; > > num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP; > > mr->map = kmalloc_array(num_map, sizeof(*map), GFP_KERNEL); > if (!mr->map) > - goto err1; > + return -ENOMEM; > > for (i = 0; i < num_map; i++) { > mr->map[i] = kmalloc(sizeof(**map), GFP_KERNEL); > - if (!mr->map[i]) > - goto err2; > + if (!mr->map[i]) { > + for (i--; i >= 0; i--) > + kfree(mr->map[i]); > + kfree(mr->map); > + return -ENOMEM; > + } > } > > BUILD_BUG_ON(!is_power_of_2(RXE_BUF_PER_MAP)); > @@ -93,45 +97,31 @@ static int rxe_mr_alloc(struct rxe_mr *mr, int num_buf) > mr->max_buf = num_map * RXE_BUF_PER_MAP; > > return 0; > - > -err2: > - for (i--; i >= 0; i--) > - kfree(mr->map[i]); > - > - kfree(mr->map); > -err1: > - return -ENOMEM; > } > > -void rxe_mr_init_dma(struct rxe_pd *pd, int access, struct rxe_mr > *mr) > +void rxe_mr_init_dma(int access, struct rxe_mr *mr) > { > rxe_mr_init(access, mr); > > - mr->ibmr.pd = &pd->ibpd; > mr->access = access; > mr->state = RXE_MR_STATE_VALID; > mr->type = IB_MR_TYPE_DMA; > } > > -int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 > iova, > +int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 > +iova, > int access, struct rxe_mr *mr) > { > - struct rxe_map **map; > - struct rxe_phys_buf *buf = NULL; > - struct ib_umem *umem; > - struct sg_page_iter sg_iter; > - int num_buf; > - void *vaddr; > + struct rxe_phys_buf *buf = NULL; > + struct sg_page_iter sg_iter; > + struct rxe_map **map; > + struct ib_umem *umem; > + int num_buf; > + void *vaddr; > int err; > - int i; > > - umem = ib_umem_get(pd->ibpd.device, start, length, access); > - if (IS_ERR(umem)) { > - pr_warn("%s: Unable to pin memory region err = %d\n", > - __func__, (int)PTR_ERR(umem)); > - err = PTR_ERR(umem); > - goto err_out; > - } > + umem = ib_umem_get(&rxe->ib_dev, start, length, access); > + if (IS_ERR(umem)) > + return PTR_ERR(umem); > > num_buf = ib_umem_num_pages(umem); > > @@ -139,9 +129,8 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, > u64 length, u64 iova, > > err = rxe_mr_alloc(mr, num_buf); > if (err) { > - pr_warn("%s: Unable to allocate memory for map\n", > - __func__); > - goto err_release_umem; > + ib_umem_release(umem); > + return err; > } > > mr->page_shift = PAGE_SHIFT; > @@ -152,7 +141,7 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova, > if (length > 0) { > buf = map[0]->buf; > > - for_each_sgtable_page (&umem->sgt_append.sgt, &sg_iter, 0) { > + for_each_sgtable_page(&umem->sgt_append.sgt, &sg_iter, 0) { > if (num_buf >= RXE_BUF_PER_MAP) { > map++; > buf = map[0]->buf; > @@ -161,21 +150,22 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 > start, u64 length, u64 iova, > > vaddr = page_address(sg_page_iter_page(&sg_iter)); > if (!vaddr) { > - pr_warn("%s: Unable to get virtual address\n", > - __func__); > - err = -ENOMEM; > - goto err_cleanup_map; > + int i; > + > + for (i = 0; i < mr->num_map; i++) > + kfree(mr->map[i]); > + kfree(mr->map); > + ib_umem_release(umem); > + return -ENOMEM; > } > > buf->addr = (uintptr_t)vaddr; > buf->size = PAGE_SIZE; > num_buf++; > buf++; > - > } > } > > - mr->ibmr.pd = &pd->ibpd; > mr->umem = umem; > mr->access = access; > mr->length = length; > @@ -186,18 +176,9 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova, > mr->type = IB_MR_TYPE_USER; > > return 0; > - > -err_cleanup_map: > - for (i = 0; i < mr->num_map; i++) > - kfree(mr->map[i]); > - kfree(mr->map); > -err_release_umem: > - ib_umem_release(umem); > -err_out: Could you keep this goto label, i have WIP code which needs such error path. Thanks Zhijian > base-commit: 6b822d408b58c3c4f26dae93245c6b7d8b39e0f9