From: Tomer Tayar <ttayar@xxxxxxxxx> Part of the undefined opcode data is updated in gaudi2_handle_qman_err_generic() and some in handle_lower_qman_data_on_err(). However, the 'write_enable' flag is checked only in gaudi2_handle_qman_err_generic(), and information of more than a single error can be mixed there. Moreover, handle_lower_qman_data_on_err() is called only for the lower QMAN, so for an error in the upper QMAN there is only a partial info. Move all the data update to be done in a single place, protected by the 'write_enable' flag. As mainly the lower QMAN's info is interesting, avoid saving the partial info for the upper QMAN. Signed-off-by: Tomer Tayar <ttayar@xxxxxxxxx> Reviewed-by: Oded Gabbay <ogabbay@xxxxxxxxxx> Signed-off-by: Oded Gabbay <ogabbay@xxxxxxxxxx> --- drivers/accel/habanalabs/gaudi2/gaudi2.c | 40 +++++++++++------------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c index f81b57649b00..e0e5615ef9b0 100644 --- a/drivers/accel/habanalabs/gaudi2/gaudi2.c +++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c @@ -7858,10 +7858,11 @@ static bool gaudi2_handle_ecc_event(struct hl_device *hdev, u16 event_type, return !!ecc_data->is_critical; } -static void handle_lower_qman_data_on_err(struct hl_device *hdev, u64 qman_base, u64 event_mask) +static void handle_lower_qman_data_on_err(struct hl_device *hdev, u64 qman_base, u32 engine_id) { - u32 lo, hi, cq_ptr_size, cp_sts; + struct undefined_opcode_info *undef_opcode = &hdev->captured_err_info.undef_opcode; u64 cq_ptr, cp_current_inst; + u32 lo, hi, cq_size, cp_sts; bool is_arc_cq; cp_sts = RREG32(qman_base + QM_CP_STS_4_OFFSET); @@ -7871,12 +7872,12 @@ static void handle_lower_qman_data_on_err(struct hl_device *hdev, u64 qman_base, lo = RREG32(qman_base + QM_ARC_CQ_PTR_LO_STS_OFFSET); hi = RREG32(qman_base + QM_ARC_CQ_PTR_HI_STS_OFFSET); cq_ptr = ((u64) hi) << 32 | lo; - cq_ptr_size = RREG32(qman_base + QM_ARC_CQ_TSIZE_STS_OFFSET); + cq_size = RREG32(qman_base + QM_ARC_CQ_TSIZE_STS_OFFSET); } else { lo = RREG32(qman_base + QM_CQ_PTR_LO_STS_4_OFFSET); hi = RREG32(qman_base + QM_CQ_PTR_HI_STS_4_OFFSET); cq_ptr = ((u64) hi) << 32 | lo; - cq_ptr_size = RREG32(qman_base + QM_CQ_TSIZE_STS_4_OFFSET); + cq_size = RREG32(qman_base + QM_CQ_TSIZE_STS_4_OFFSET); } lo = RREG32(qman_base + QM_CP_CURRENT_INST_LO_4_OFFSET); @@ -7885,12 +7886,16 @@ static void handle_lower_qman_data_on_err(struct hl_device *hdev, u64 qman_base, dev_info(hdev->dev, "LowerQM. %sCQ: {ptr %#llx, size %u}, CP: {instruction %#018llx}\n", - is_arc_cq ? "ARC_" : "", cq_ptr, cq_ptr_size, cp_current_inst); + is_arc_cq ? "ARC_" : "", cq_ptr, cq_size, cp_current_inst); - if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) { - hdev->captured_err_info.undef_opcode.cq_addr = cq_ptr; - hdev->captured_err_info.undef_opcode.cq_size = cq_ptr_size; - hdev->captured_err_info.undef_opcode.stream_id = QMAN_STREAMS; + if (undef_opcode->write_enable) { + memset(undef_opcode, 0, sizeof(*undef_opcode)); + undef_opcode->timestamp = ktime_get(); + undef_opcode->cq_addr = cq_ptr; + undef_opcode->cq_size = cq_size; + undef_opcode->engine_id = engine_id; + undef_opcode->stream_id = QMAN_STREAMS; + undef_opcode->write_enable = 0; } } @@ -7929,19 +7934,12 @@ static int gaudi2_handle_qman_err_generic(struct hl_device *hdev, u16 event_type error_count++; } - /* check for undefined opcode */ - if (glbl_sts_val & PDMA0_QM_GLBL_ERR_STS_CP_UNDEF_CMD_ERR_MASK) { + /* Check for undefined opcode error in lower QM */ + if ((i == QMAN_STREAMS) && + (glbl_sts_val & PDMA0_QM_GLBL_ERR_STS_CP_UNDEF_CMD_ERR_MASK)) { + handle_lower_qman_data_on_err(hdev, qman_base, + gaudi2_queue_id_to_engine_id[qid_base]); *event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE; - if (hdev->captured_err_info.undef_opcode.write_enable) { - memset(&hdev->captured_err_info.undef_opcode, 0, - sizeof(hdev->captured_err_info.undef_opcode)); - hdev->captured_err_info.undef_opcode.timestamp = ktime_get(); - hdev->captured_err_info.undef_opcode.engine_id = - gaudi2_queue_id_to_engine_id[qid_base]; - } - - if (i == QMAN_STREAMS) - handle_lower_qman_data_on_err(hdev, qman_base, *event_mask); } } -- 2.34.1