From: wenglianfa <wenglianfa@xxxxxxxxxx> When mailboxes for resource(QP/CQ/MR/SRQ) destruction fail, it's unable to notify HW about the destruction. In this case, driver will still free the CTX, while HW may still access them, thus leading to a UAF. Introduce a delay-destruction mechanism. When mailboxes for resource destruction fail, the related buffer will not be freed in the normal destruction flow. Instead, link the resource node to a list. Free all buffers in the list when the device is uninited. Fixes: b0969f83890b ("RDMA/hns: Do not destroy QP resources in the hw resetting phase") Signed-off-by: wenglianfa <wenglianfa@xxxxxxxxxx> Signed-off-by: Junxian Huang <huangjunxian6@xxxxxxxxxxxxx> --- drivers/infiniband/hw/hns/hns_roce_cq.c | 12 ++++++-- drivers/infiniband/hw/hns/hns_roce_device.h | 26 +++++++++++++++++ drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 4 ++- drivers/infiniband/hw/hns/hns_roce_main.c | 7 +++++ drivers/infiniband/hw/hns/hns_roce_mr.c | 32 +++++++++++++++++++-- drivers/infiniband/hw/hns/hns_roce_qp.c | 8 +++++- drivers/infiniband/hw/hns/hns_roce_srq.c | 17 ++++++++--- 7 files changed, 94 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 236ee3fefe16..dc49d35ec4ec 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -178,9 +178,11 @@ static void free_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) ret = hns_roce_destroy_hw_ctx(hr_dev, HNS_ROCE_CMD_DESTROY_CQC, hr_cq->cqn); - if (ret) + if (ret) { + hr_cq->delayed_destroy_flag = true; dev_err_ratelimited(dev, "DESTROY_CQ failed (%d) for CQN %06lx\n", ret, hr_cq->cqn); + } xa_erase_irq(&cq_table->array, hr_cq->cqn); @@ -192,7 +194,8 @@ static void free_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) complete(&hr_cq->free); wait_for_completion(&hr_cq->free); - hns_roce_table_put(hr_dev, &cq_table->table, hr_cq->cqn); + if (!hr_cq->delayed_destroy_flag) + hns_roce_table_put(hr_dev, &cq_table->table, hr_cq->cqn); } static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, @@ -220,7 +223,10 @@ static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, static void free_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) { - hns_roce_mtr_destroy(hr_dev, hr_cq->mtr); + if (hr_cq->delayed_destroy_flag) + hns_roce_add_unfree_mtr(hr_dev, hr_cq->mtr); + else + hns_roce_mtr_destroy(hr_dev, hr_cq->mtr); } static int alloc_cq_db(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index ed0fa29f0cff..e010fb3230a2 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -314,6 +314,7 @@ struct hns_roce_mtr { struct ib_umem *umem; /* user space buffer */ struct hns_roce_buf *kmem; /* kernel space buffer */ struct hns_roce_hem_cfg hem_cfg; /* config for hardware addressing */ + struct list_head node; /* list node for delay-destruction */ }; struct hns_roce_mw { @@ -339,6 +340,11 @@ struct hns_roce_mr { struct hns_roce_mtr *pbl_mtr; u32 npages; dma_addr_t *page_list; + /* When this is true, the free and destruction of the related + * resources will be delayed until the device is uninited, ensuring + * no memory leak. + */ + bool delayed_destroy_flag; }; struct hns_roce_mr_table { @@ -442,6 +448,11 @@ struct hns_roce_cq { struct list_head rq_list; /* all qps on this recv cq */ int is_armed; /* cq is armed */ struct list_head node; /* all armed cqs are on a list */ + /* When this is true, the free and destruction of the related + * resources will be delayed until the device is uninited, ensuring + * no memory leak. + */ + bool delayed_destroy_flag; }; struct hns_roce_idx_que { @@ -475,6 +486,11 @@ struct hns_roce_srq { void (*event)(struct hns_roce_srq *srq, enum hns_roce_event event); struct hns_roce_db rdb; u32 cap_flags; + /* When this is true, the free and destruction of the related + * resources will be delayed until the device is uninited, ensuring + * no memory leak. + */ + bool delayed_destroy_flag; }; struct hns_roce_uar_table { @@ -642,6 +658,11 @@ struct hns_roce_qp { /* 0: flush needed, 1: unneeded */ unsigned long flush_flag; + /* When this is true, the free and destruction of the related + * resources will be delayed until the device is uninited, ensuring + * no memory leak. + */ + bool delayed_destroy_flag; struct hns_roce_work flush_work; struct list_head node; /* all qps are on a list */ struct list_head rq_node; /* all recv qps are on a list */ @@ -1025,6 +1046,8 @@ struct hns_roce_dev { u64 dwqe_page; struct hns_roce_dev_debugfs dbgfs; atomic64_t *dfx_cnt; + struct list_head mtr_unfree_list; /* list of unfree mtr on this dev */ + struct mutex mtr_unfree_list_mutex; /* protect mtr_unfree_list */ }; static inline struct hns_roce_dev *to_hr_dev(struct ib_device *ib_dev) @@ -1296,6 +1319,9 @@ int hns_roce_fill_res_qp_entry(struct sk_buff *msg, struct ib_qp *ib_qp); int hns_roce_fill_res_qp_entry_raw(struct sk_buff *msg, struct ib_qp *ib_qp); int hns_roce_fill_res_mr_entry(struct sk_buff *msg, struct ib_mr *ib_mr); int hns_roce_fill_res_mr_entry_raw(struct sk_buff *msg, struct ib_mr *ib_mr); +void hns_roce_add_unfree_mtr(struct hns_roce_dev *hr_dev, + struct hns_roce_mtr *mtr); +void hns_roce_free_unfree_mtr(struct hns_roce_dev *hr_dev); int hns_roce_fill_res_srq_entry(struct sk_buff *msg, struct ib_srq *ib_srq); int hns_roce_fill_res_srq_entry_raw(struct sk_buff *msg, struct ib_srq *ib_srq); struct hns_user_mmap_entry * diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index d60ca0a306e9..86d6a8f2a26d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -5587,10 +5587,12 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev, /* Modify qp to reset before destroying qp */ ret = hns_roce_v2_modify_qp(&hr_qp->ibqp, NULL, 0, hr_qp->state, IB_QPS_RESET, udata); - if (ret) + if (ret) { + hr_qp->delayed_destroy_flag = true; ibdev_err_ratelimited(ibdev, "failed to modify QP to RST, ret = %d.\n", ret); + } } send_cq = hr_qp->ibqp.send_cq ? to_hr_cq(hr_qp->ibqp.send_cq) : NULL; diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index ae24c81c9812..b5ece1bedc11 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -952,6 +952,8 @@ static void hns_roce_teardown_hca(struct hns_roce_dev *hr_dev) if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQ_RECORD_DB || hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB) mutex_destroy(&hr_dev->pgdir_mutex); + + mutex_destroy(&hr_dev->mtr_unfree_list_mutex); } /** @@ -966,6 +968,9 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev) spin_lock_init(&hr_dev->sm_lock); + INIT_LIST_HEAD(&hr_dev->mtr_unfree_list); + mutex_init(&hr_dev->mtr_unfree_list_mutex); + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQ_RECORD_DB || hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB) { INIT_LIST_HEAD(&hr_dev->pgdir_list); @@ -1005,6 +1010,7 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev) if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQ_RECORD_DB || hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB) mutex_destroy(&hr_dev->pgdir_mutex); + mutex_destroy(&hr_dev->mtr_unfree_list_mutex); return ret; } @@ -1179,6 +1185,7 @@ void hns_roce_exit(struct hns_roce_dev *hr_dev) if (hr_dev->hw->hw_exit) hr_dev->hw->hw_exit(hr_dev); + hns_roce_free_unfree_mtr(hr_dev); hns_roce_teardown_hca(hr_dev); hns_roce_cleanup_hem(hr_dev); diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 3902243cac96..228a3512e1a0 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -83,7 +83,8 @@ static void free_mr_key(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr) { unsigned long obj = key_to_hw_index(mr->key); - hns_roce_table_put(hr_dev, &hr_dev->mr_table.mtpt_table, obj); + if (!mr->delayed_destroy_flag) + hns_roce_table_put(hr_dev, &hr_dev->mr_table.mtpt_table, obj); ida_free(&hr_dev->mr_table.mtpt_ida.ida, (int)obj); } @@ -125,7 +126,10 @@ static int alloc_mr_pbl(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr, static void free_mr_pbl(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr) { - hns_roce_mtr_destroy(hr_dev, mr->pbl_mtr); + if (mr->delayed_destroy_flag && mr->type != MR_TYPE_DMA) + hns_roce_add_unfree_mtr(hr_dev, mr->pbl_mtr); + else + hns_roce_mtr_destroy(hr_dev, mr->pbl_mtr); } static void hns_roce_mr_free(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr) @@ -137,9 +141,11 @@ static void hns_roce_mr_free(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr ret = hns_roce_destroy_hw_ctx(hr_dev, HNS_ROCE_CMD_DESTROY_MPT, key_to_hw_index(mr->key) & (hr_dev->caps.num_mtpts - 1)); - if (ret) + if (ret) { + mr->delayed_destroy_flag = true; ibdev_warn_ratelimited(ibdev, "failed to destroy mpt, ret = %d.\n", ret); + } } free_mr_pbl(hr_dev, mr); @@ -1220,3 +1226,23 @@ void hns_roce_mtr_destroy(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr) mtr_free_bufs(hr_dev, mtr); kvfree(mtr); } + +void hns_roce_add_unfree_mtr(struct hns_roce_dev *hr_dev, + struct hns_roce_mtr *mtr) +{ + mutex_lock(&hr_dev->mtr_unfree_list_mutex); + list_add_tail(&mtr->node, &hr_dev->mtr_unfree_list); + mutex_unlock(&hr_dev->mtr_unfree_list_mutex); +} + +void hns_roce_free_unfree_mtr(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_mtr *mtr, *next; + + mutex_lock(&hr_dev->mtr_unfree_list_mutex); + list_for_each_entry_safe(mtr, next, &hr_dev->mtr_unfree_list, node) { + list_del(&mtr->node); + hns_roce_mtr_destroy(hr_dev, mtr); + } + mutex_unlock(&hr_dev->mtr_unfree_list_mutex); +} diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 62fc9a3c784e..91c605f67dca 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -410,6 +410,9 @@ static void free_qpc(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) { struct hns_roce_qp_table *qp_table = &hr_dev->qp_table; + if (hr_qp->delayed_destroy_flag) + return; + if (hr_dev->caps.trrl_entry_sz) hns_roce_table_put(hr_dev, &qp_table->trrl_table, hr_qp->qpn); hns_roce_table_put(hr_dev, &qp_table->irrl_table, hr_qp->qpn); @@ -801,7 +804,10 @@ static int alloc_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, static void free_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) { - hns_roce_mtr_destroy(hr_dev, hr_qp->mtr); + if (hr_qp->delayed_destroy_flag) + hns_roce_add_unfree_mtr(hr_dev, hr_qp->mtr); + else + hns_roce_mtr_destroy(hr_dev, hr_qp->mtr); } static inline bool user_qp_has_sdb(struct hns_roce_dev *hr_dev, diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 6685e5a1afd2..848a82395185 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -150,9 +150,11 @@ static void free_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) ret = hns_roce_destroy_hw_ctx(hr_dev, HNS_ROCE_CMD_DESTROY_SRQ, srq->srqn); - if (ret) + if (ret) { + srq->delayed_destroy_flag = true; dev_err_ratelimited(hr_dev->dev, "DESTROY_SRQ failed (%d) for SRQN %06lx\n", ret, srq->srqn); + } xa_erase_irq(&srq_table->xa, srq->srqn); @@ -160,7 +162,8 @@ static void free_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) complete(&srq->free); wait_for_completion(&srq->free); - hns_roce_table_put(hr_dev, &srq_table->table, srq->srqn); + if (!srq->delayed_destroy_flag) + hns_roce_table_put(hr_dev, &srq_table->table, srq->srqn); } static int alloc_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, @@ -213,7 +216,10 @@ static void free_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) bitmap_free(idx_que->bitmap); idx_que->bitmap = NULL; - hns_roce_mtr_destroy(hr_dev, idx_que->mtr); + if (srq->delayed_destroy_flag) + hns_roce_add_unfree_mtr(hr_dev, idx_que->mtr); + else + hns_roce_mtr_destroy(hr_dev, idx_que->mtr); } static int alloc_srq_wqe_buf(struct hns_roce_dev *hr_dev, @@ -248,7 +254,10 @@ static int alloc_srq_wqe_buf(struct hns_roce_dev *hr_dev, static void free_srq_wqe_buf(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) { - hns_roce_mtr_destroy(hr_dev, srq->buf_mtr); + if (srq->delayed_destroy_flag) + hns_roce_add_unfree_mtr(hr_dev, srq->buf_mtr); + else + hns_roce_mtr_destroy(hr_dev, srq->buf_mtr); } static int alloc_srq_wrid(struct hns_roce_srq *srq) -- 2.33.0