On Mon, Jul 17, 2023 at 02:03:40PM +0800, Junxian Huang wrote: > Currently, RoCE driver gets function resource values from firmware > without validity check. Kernel trusts devices underneath, otherwise why should we stop with capabilities? Let's check all PCI transactions and verify any response from FW too. > As these resources are mostly related to memory, > an invalid value may lead to serious consequence such as kernel panic. > > This patch adds check for these resource values and adjusts the invalid > ones. These are FW bugs which should be fixed. Thanks > > Signed-off-by: Junxian Huang <huangjunxian6@xxxxxxxxxxxxx> > --- > drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 115 ++++++++++++++++++++- > drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 37 +++++++ > 2 files changed, 148 insertions(+), 4 deletions(-) > > diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c > index c4b92d8bd98a..f5649fd25042 100644 > --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c > +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c > @@ -1650,6 +1650,97 @@ static int hns_roce_config_global_param(struct hns_roce_dev *hr_dev) > return hns_roce_cmq_send(hr_dev, &desc, 1); > } > > +static const struct hns_roce_bt_num { > + u32 res_offset; > + u32 min; > + u32 max; > + enum hns_roce_res_invalid_flag invalid_flag; > + enum hns_roce_res_revision revision; > + bool vf_support; > +} bt_num_table[] = { > + {RES_OFFSET_IN_CAPS(qpc_bt_num), 1, > + MAX_QPC_BT_NUM, QPC_BT_NUM_INVALID_FLAG, RES_FOR_ALL, true}, > + {RES_OFFSET_IN_CAPS(srqc_bt_num), 1, > + MAX_SRQC_BT_NUM, SRQC_BT_NUM_INVALID_FLAG, RES_FOR_ALL, true}, > + {RES_OFFSET_IN_CAPS(cqc_bt_num), 1, > + MAX_CQC_BT_NUM, CQC_BT_NUM_INVALID_FLAG, RES_FOR_ALL, true}, > + {RES_OFFSET_IN_CAPS(mpt_bt_num), 1, > + MAX_MPT_BT_NUM, MPT_BT_NUM_INVALID_FLAG, RES_FOR_ALL, true}, > + {RES_OFFSET_IN_CAPS(sl_num), 1, > + MAX_SL_NUM, QID_NUM_INVALID_FLAG, RES_FOR_ALL, true}, > + {RES_OFFSET_IN_CAPS(sccc_bt_num), 1, > + MAX_SCCC_BT_NUM, SCCC_BT_NUM_INVALID_FLAG, RES_FOR_ALL, true}, > + {RES_OFFSET_IN_CAPS(qpc_timer_bt_num), 1, > + MAX_QPC_TIMER_BT_NUM, QPC_TIMER_BT_NUM_INVALID_FLAG, > + RES_FOR_ALL, false}, > + {RES_OFFSET_IN_CAPS(cqc_timer_bt_num), 1, > + MAX_CQC_TIMER_BT_NUM, CQC_TIMER_BT_NUM_INVALID_FLAG, > + RES_FOR_ALL, false}, > + {RES_OFFSET_IN_CAPS(gmv_bt_num), 1, > + MAX_GMV_BT_NUM, GMV_BT_NUM_INVALID_FLAG, > + RES_FOR_HIP09, true}, > + {RES_OFFSET_IN_CAPS(smac_bt_num), 1, > + MAX_SMAC_BT_NUM, SMAC_BT_NUM_INVALID_FLAG, > + RES_FOR_HIP08, true}, > + {RES_OFFSET_IN_CAPS(sgid_bt_num), 1, > + MAX_SGID_BT_NUM, SGID_BT_NUM_INVALID_FLAG, > + RES_FOR_HIP08, true}, > +}; > + > +static bool check_res_is_supported(struct hns_roce_dev *hr_dev, > + struct hns_roce_bt_num *bt_num_entry) > +{ > + if (!bt_num_entry->vf_support && hr_dev->is_vf) > + return false; > + > + if (bt_num_entry->revision == RES_FOR_HIP09 && > + hr_dev->pci_dev->revision <= PCI_REVISION_ID_HIP08) > + return false; > + > + if (bt_num_entry->revision == RES_FOR_HIP08 && > + hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) > + return false; > + > + return true; > +} > + > +static void adjust_eqc_bt_num(struct hns_roce_caps *caps, u16 *invalid_flag) > +{ > + if (caps->eqc_bt_num < caps->num_comp_vectors + caps->num_aeq_vectors || > + caps->eqc_bt_num > MAX_EQC_BT_NUM) { > + caps->eqc_bt_num = caps->eqc_bt_num > MAX_EQC_BT_NUM ? > + MAX_EQC_BT_NUM : caps->num_comp_vectors + > + caps->num_aeq_vectors; > + *invalid_flag |= 1 << EQC_BT_NUM_INVALID_FLAG; > + } > +} > + > +static u16 adjust_res_caps(struct hns_roce_dev *hr_dev) > +{ > + struct hns_roce_caps *caps = &hr_dev->caps; > + u16 invalid_flag = 0; > + u32 min, max; > + u32 *res; > + int i; > + > + for (i = 0; i < ARRAY_SIZE(bt_num_table); i++) { > + if (!check_res_is_supported(hr_dev, &bt_num_table[i])) > + continue; > + > + res = (u32 *)((void *)caps + bt_num_table[i].res_offset); > + min = bt_num_table[i].min; > + max = bt_num_table[i].max; > + if (*res < min || *res > max) { > + *res = *res < min ? min : max; > + invalid_flag |= 1 << bt_num_table[i].invalid_flag; > + } > + } > + > + adjust_eqc_bt_num(caps, &invalid_flag); > + > + return invalid_flag; > +} > + > static int load_func_res_caps(struct hns_roce_dev *hr_dev, bool is_vf) > { > struct hns_roce_cmq_desc desc[2]; > @@ -1730,11 +1821,19 @@ static int hns_roce_query_pf_resource(struct hns_roce_dev *hr_dev) > } > > ret = load_pf_timer_res_caps(hr_dev); > - if (ret) > + if (ret) { > dev_err(dev, "failed to load pf timer resource, ret = %d.\n", > ret); > + return ret; > + } > > - return ret; > + ret = adjust_res_caps(hr_dev); > + if (ret) > + dev_warn(dev, > + "invalid resource values have been adjusted, invalid_flag = 0x%x.\n", > + ret); > + > + return 0; > } > > static int hns_roce_query_vf_resource(struct hns_roce_dev *hr_dev) > @@ -1743,10 +1842,18 @@ static int hns_roce_query_vf_resource(struct hns_roce_dev *hr_dev) > int ret; > > ret = load_func_res_caps(hr_dev, true); > - if (ret) > + if (ret) { > dev_err(dev, "failed to load vf res caps, ret = %d.\n", ret); > + return ret; > + } > > - return ret; > + ret = adjust_res_caps(hr_dev); > + if (ret) > + dev_warn(dev, > + "invalid resource values have been adjusted, invalid_flag = 0x%x.\n", > + ret); > + > + return 0; > } > > static int __hns_roce_set_vf_switch_param(struct hns_roce_dev *hr_dev, > diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h > index d9693f6cc802..c2d46383c88c 100644 > --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h > +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h > @@ -972,6 +972,43 @@ struct hns_roce_func_clear { > #define CFG_GLOBAL_PARAM_1US_CYCLES CMQ_REQ_FIELD_LOC(9, 0) > #define CFG_GLOBAL_PARAM_UDP_PORT CMQ_REQ_FIELD_LOC(31, 16) > > +enum hns_roce_res_invalid_flag { > + QPC_BT_NUM_INVALID_FLAG, > + SRQC_BT_NUM_INVALID_FLAG, > + CQC_BT_NUM_INVALID_FLAG, > + MPT_BT_NUM_INVALID_FLAG, > + EQC_BT_NUM_INVALID_FLAG, > + SMAC_BT_NUM_INVALID_FLAG, > + SGID_BT_NUM_INVALID_FLAG, > + QID_NUM_INVALID_FLAG, > + SCCC_BT_NUM_INVALID_FLAG, > + GMV_BT_NUM_INVALID_FLAG, > + QPC_TIMER_BT_NUM_INVALID_FLAG, > + CQC_TIMER_BT_NUM_INVALID_FLAG, > +}; > + > +enum hns_roce_res_revision { > + RES_FOR_HIP08, > + RES_FOR_HIP09, > + RES_FOR_ALL, > +}; > + > +#define RES_OFFSET_IN_CAPS(res) \ > + (offsetof(struct hns_roce_caps, res)) > + > +#define MAX_QPC_BT_NUM 2048 > +#define MAX_SRQC_BT_NUM 512 > +#define MAX_CQC_BT_NUM 512 > +#define MAX_MPT_BT_NUM 512 > +#define MAX_EQC_BT_NUM 512 > +#define MAX_SMAC_BT_NUM 256 > +#define MAX_SGID_BT_NUM 256 > +#define MAX_SL_NUM 8 > +#define MAX_SCCC_BT_NUM 512 > +#define MAX_GMV_BT_NUM 256 > +#define MAX_QPC_TIMER_BT_NUM 1728 > +#define MAX_CQC_TIMER_BT_NUM 1600 > + > /* > * Fields of HNS_ROCE_OPC_QUERY_PF_RES, HNS_ROCE_OPC_QUERY_VF_RES > * and HNS_ROCE_OPC_ALLOC_VF_RES > -- > 2.30.0 >