On 8/28/20 10:03 PM, Luben Tuikov wrote:
On 2020-08-26 10:46, Andrey Grodzovsky wrote:At this point the ASIC is already post reset by the HW/PSP so the HW not in proper state to be configured for suspension, some bloks might be even gated and so best is to avoid touching it."blocks"Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 37 ++++++++++++++++++++++++++---- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 6 +++++ drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 6 +++++ drivers/gpu/drm/amd/amdgpu/atom.c | 1 + drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 18 +++++++++------ drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 3 +++ drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c | 3 +++ 8 files changed, 64 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 3399242..3489622 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -992,6 +992,7 @@ struct amdgpu_device { atomic_t throttling_logging_enabled; struct ratelimit_state throttling_logging_rs; uint32_t ras_features; + bool in_dpc; };This is very cryptic and if we do that, in 10 years, the code will be full of cryptic variables and comments. Better to add a comment bool in_dpc; /* PCI Downstream Port Containment */
Per Nirmoy's comment I already modified the naming to a descriptive one in V2 patchset
static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 84f8d14..e1bbefd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -319,6 +319,9 @@ uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg, { uint32_t ret;+ if (adev->in_dpc)+ return 0; + if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) return amdgpu_kiq_rreg(adev, reg);@@ -350,8 +353,10 @@ uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,* * Returns the 8 bit value from the offset specified. */ -uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) -{ +uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) { + + if (adev->in_dpc) + return 0;if (offset < adev->rmmio_size)return (readb(adev->rmmio + offset)); @@ -373,8 +378,10 @@ uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) * * Writes the value specified to the offset specified. */ -void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) -{ +void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) { + + if (adev->in_dpc) + return;if (offset < adev->rmmio_size)writeb(value, adev->rmmio + offset); @@ -384,6 +391,8 @@ void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)void static inline amdgpu_mm_wreg_mmio(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t acc_flags){ + if (adev->in_dpc) + return;trace_amdgpu_mm_wreg(adev->pdev->device, reg, v); @@ -412,6 +421,8 @@ void static inline amdgpu_mm_wreg_mmio(struct amdgpu_device *adev, uint32_t reg,void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t acc_flags) { + if (adev->in_dpc) + return;if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))return amdgpu_kiq_wreg(adev, reg, v); @@ -427,6 +438,9 @@ void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t acc_flags) { + if (adev->in_dpc) + return; + if (amdgpu_sriov_fullaccess(adev) && adev->gfx.rlc.funcs && adev->gfx.rlc.funcs->is_rlcg_access_range) { @@ -448,6 +462,9 @@ void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, uint32_t reg, uint32_t */ u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg) { + if (adev->in_dpc) + return 0; + if ((reg * 4) < adev->rio_mem_size) return ioread32(adev->rio_mem + (reg * 4)); else { @@ -467,6 +484,8 @@ u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg) */ void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v) { + if (adev->in_dpc) + return;if ((reg * 4) < adev->rio_mem_size)iowrite32(v, adev->rio_mem + (reg * 4)); @@ -487,6 +506,8 @@ void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v) */ u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index) { + if (adev->in_dpc) + return 0;if (index < adev->doorbell.num_doorbells) {return readl(adev->doorbell.ptr + index); @@ -508,6 +529,8 @@ u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index) */ void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v) { + if (adev->in_dpc) + return;if (index < adev->doorbell.num_doorbells) {writel(v, adev->doorbell.ptr + index); @@ -527,6 +550,8 @@ void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v) */ u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index) { + if (adev->in_dpc) + return 0;if (index < adev->doorbell.num_doorbells) {return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index)); @@ -548,6 +573,8 @@ u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index) */ void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v) { + if (adev->in_dpc) + return;if (index < adev->doorbell.num_doorbells) {atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v); @@ -4790,7 +4817,9 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)pci_restore_state(pdev); + adev->in_dpc = true;r = amdgpu_device_ip_suspend(adev); + adev->in_dpc = false; if (r) goto out;diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.cindex d698142..50c42c9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -693,6 +693,9 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) struct amdgpu_kiq *kiq = &adev->gfx.kiq; struct amdgpu_ring *ring = &kiq->ring;+ if (adev->in_dpc)+ return 0; + BUG_ON(!ring->funcs->emit_rreg);spin_lock_irqsave(&kiq->ring_lock, flags);@@ -757,6 +760,9 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)BUG_ON(!ring->funcs->emit_wreg); + if (adev->in_dpc)+ return; +Don't know how I feel about such "architecture", where we introduce a flag-type variable and check it in a slew of places. If we did this a few times over, we'd have a few flags which we check almost everywhere, and it would make the code difficult to analyze. For instance, which flag to check first? Or, how are flags related to each other. There has to be a better way, where we check that flag only once before proceeding. A more unified approach.
I fail to see any other approach besides this one currently, there are a few different functions for r/w register access and unless there would be a single register access generic function where I could place this check (which I don't think is a good idea) I don't see how to do it in once place. On top of that, this variable is being checked in wait for HW response use cases such as psp_wait_for where you expect specific value to be returned from the driver/psp interfacing registers which you should simply avoid expecting in this case. All in all I don't see how u can make this a generic check in once place that solves
all the use cases. Andrey
spin_lock_irqsave(&kiq->ring_lock, flags); amdgpu_ring_alloc(ring, 32); amdgpu_ring_emit_wreg(ring, reg, v); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index d6c38e2..555ef59 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -219,6 +219,9 @@ int psp_wait_for(struct psp_context *psp, uint32_t reg_index, int i; struct amdgpu_device *adev = psp->adev;+ if (psp->adev->in_dpc)+ return 0; + for (i = 0; i < adev->usec_timeout; i++) { val = RREG32(reg_index); if (check_changed) { @@ -245,6 +248,9 @@ psp_cmd_submit_buf(struct psp_context *psp, bool ras_intr = false; bool skip_unsupport = false;+ if (psp->adev->in_dpc)+ return 0; + mutex_lock(&psp->mutex);memset(psp->cmd_buf_mem, 0, PSP_CMD_BUFFER_SIZE);diff --git a/drivers/gpu/drm/amd/amdgpu/atom.c b/drivers/gpu/drm/amd/amdgpu/atom.c index 4cfc786..613dac1 100644 --- a/drivers/gpu/drm/amd/amdgpu/atom.c +++ b/drivers/gpu/drm/amd/amdgpu/atom.c @@ -750,6 +750,7 @@ static void atom_op_jump(atom_exec_context *ctx, int *ptr, int arg) DRM_ERROR("atombios stuck in loop for more than %dsecs aborting\n", ATOM_CMD_TIMEOUT_SEC); ctx->abort = true; + dump_stack(); } } else { /* jiffies wrap around we will just wait a little longer */ diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 2db195e..86f268e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -6980,15 +6980,19 @@ static int gfx_v10_0_hw_fini(void *handle)amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); + + if (!adev->in_dpc) { #ifndef BRING_UP_DEBUG - if (amdgpu_async_gfx_ring) { - r = gfx_v10_0_kiq_disable_kgq(adev); - if (r) - DRM_ERROR("KGQ disable failed\n"); - } + if (amdgpu_async_gfx_ring) { + r = gfx_v10_0_kiq_disable_kgq(adev); + if (r) + DRM_ERROR("KGQ disable failed\n"); + } #endif - if (amdgpu_gfx_disable_kcq(adev)) - DRM_ERROR("KCQ disable failed\n"); + if (amdgpu_gfx_disable_kcq(adev)) + DRM_ERROR("KCQ disable failed\n"); + } + if (amdgpu_sriov_vf(adev)) { gfx_v10_0_cp_gfx_enable(adev, false); /* Program KIQ position of RLC_CP_SCHEDULERS during destroy */ diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 8462b30..306461d 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -224,9 +224,12 @@ int smu_dpm_set_power_gate(struct smu_context *smu, uint32_t block_type, { int ret = 0;+if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled) return -EOPNOTSUPP;++Unnecessary white space changes. Regards, Lubenswitch (block_type) { /* * Some legacy code of amdgpu_vcn.c and vcn_v2*.c still uses diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c index a58ea08..02cf55e 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c @@ -112,6 +112,9 @@ int smu_cmn_send_smc_msg_with_param(struct smu_context *smu, struct amdgpu_device *adev = smu->adev; int ret = 0, index = 0;+ if (smu->adev->in_dpc)+ return 0; + index = smu_cmn_to_asic_specific_index(smu, CMN2ASIC_MAPPING_MSG, msg);
_______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx