[AMD Official Use Only - General] > -----Original Message----- > From: Kuehling, Felix <Felix.Kuehling@xxxxxxx> > Sent: Monday, March 4, 2024 6:47 PM > To: Dhume, Samir <Samir.Dhume@xxxxxxx>; amd-gfx@xxxxxxxxxxxxxxxxxxxxx > Cc: Lazar, Lijo <Lijo.Lazar@xxxxxxx>; Wan, Gavin <Gavin.Wan@xxxxxxx>; > Liu, Leo <Leo.Liu@xxxxxxx>; Deucher, Alexander > <Alexander.Deucher@xxxxxxx> > Subject: Re: [PATCH 2/3] drm/amdgpu: sdma support for sriov cpx mode > > > On 2024-03-04 10:19, Samir Dhume wrote: > > Signed-off-by: Samir Dhume <samir.dhume@xxxxxxx> > > Please add a meaningful commit description to all the patches in the series. > See one more comment below. Right! > > > > --- > > drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 34 > +++++++++++++++++++----- > > 1 file changed, 27 insertions(+), 7 deletions(-) > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c > b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c > > index fec5a3d1c4bc..f666ececbe7d 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c > > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c > > @@ -82,17 +82,37 @@ static unsigned sdma_v4_4_2_seq_to_irq_id(int > seq_num) > > } > > } > > > > -static int sdma_v4_4_2_irq_id_to_seq(unsigned client_id) > > +static int sdma_v4_4_2_irq_id_to_seq(struct amdgpu_device *adev, > unsigned client_id) > > { > > + > > + struct amdgpu_xcp_mgr *xcp_mgr = adev->xcp_mgr; > > + bool sriov_cpx_odd = false; > > + int mode; > > + > > + if (amdgpu_sriov_vf(adev)) { > > + mode = xcp_mgr->funcs->query_partition_mode(xcp_mgr); > > This queries an MMIO register for the current mode. Is that really > necessary to do in the interrupt handler? Could we use the partition > mode stored in xcp_mgr->mode instead? The design appears to be that even when the host sets the mode to DPX/QPX/CPX, each guest sets itself to be in the SPX mode and xcp_mgr->mode is set to SPX. But I can use a new field in xcp_mgr to reflect the system mode set by the host and remove the MMIO access from the interrupt handler. Thanks, samir > > Regards, > Felix > > > > + > > + if (mode == AMDGPU_CPX_PARTITION_MODE) { > > + if (adev->gfx.funcs->get_xcc_id(adev, 0) & 0x1) > > + sriov_cpx_odd = true; > > + } > > + } > > + > > switch (client_id) { > > case SOC15_IH_CLIENTID_SDMA0: > > return 0; > > case SOC15_IH_CLIENTID_SDMA1: > > return 1; > > case SOC15_IH_CLIENTID_SDMA2: > > - return 2; > > + if (sriov_cpx_odd) > > + return 0; > > + else > > + return 2; > > case SOC15_IH_CLIENTID_SDMA3: > > - return 3; > > + if (sriov_cpx_odd) > > + return 1; > > + else > > + return 3; > > default: > > return -EINVAL; > > } > > @@ -1541,7 +1561,7 @@ static int sdma_v4_4_2_process_trap_irq(struct > amdgpu_device *adev, > > uint32_t instance, i; > > > > DRM_DEBUG("IH: SDMA trap\n"); > > - instance = sdma_v4_4_2_irq_id_to_seq(entry->client_id); > > + instance = sdma_v4_4_2_irq_id_to_seq(adev, entry->client_id); > > > > /* Client id gives the SDMA instance in AID. To know the exact SDMA > > * instance, interrupt entry gives the node id which corresponds to the > AID instance. > > @@ -1584,7 +1604,7 @@ static int > sdma_v4_4_2_process_ras_data_cb(struct amdgpu_device *adev, > > if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) > > goto out; > > > > - instance = sdma_v4_4_2_irq_id_to_seq(entry->client_id); > > + instance = sdma_v4_4_2_irq_id_to_seq(adev, entry->client_id); > > if (instance < 0) > > goto out; > > > > @@ -1603,7 +1623,7 @@ static int > sdma_v4_4_2_process_illegal_inst_irq(struct amdgpu_device *adev, > > > > DRM_ERROR("Illegal instruction in SDMA command stream\n"); > > > > - instance = sdma_v4_4_2_irq_id_to_seq(entry->client_id); > > + instance = sdma_v4_4_2_irq_id_to_seq(adev, entry->client_id); > > if (instance < 0) > > return 0; > > > > @@ -1647,7 +1667,7 @@ static int sdma_v4_4_2_print_iv_entry(struct > amdgpu_device *adev, > > struct amdgpu_task_info task_info; > > u64 addr; > > > > - instance = sdma_v4_4_2_irq_id_to_seq(entry->client_id); > > + instance = sdma_v4_4_2_irq_id_to_seq(adev, entry->client_id); > > if (instance < 0 || instance >= adev->sdma.num_instances) { > > dev_err(adev->dev, "sdma instance invalid %d\n", instance); > > return -EINVAL;