> -----Original Message----- > From: Gavin Wan [mailto:Gavin.Wan at amd.com] > Sent: Friday, June 23, 2017 5:33 PM > To: dl.gcr.gpu-virtual; brahma_sw_dev; amd-gfx at lists.freedesktop.org > Cc: Wan, Gavin > Subject: [PATCH] drm/amdgpu: Support passing amdgpu critical error to host > via GPU Mailbox. > > This feature works for SRIOV enviroment. For non-SRIOV enviroment, the > trans_error function does nothing. > > The error information includes error_code (16bit), error_flags(16bit) > and error_data(64bit). Since there are not many errors, we keep the > errors in an array and transfer all errors to Host before amdgpu > initialization function (amdgpu_device_init) exit. > > Change-Id: Ib20156130553b3c47046e6ca967e2c6c02b8e9ff > Signed-off-by: Gavin Wan <Gavin.Wan at amd.com> I noticed a few more minor issues noted below. Alex > --- > drivers/gpu/drm/amd/amdgpu/Makefile | 2 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c | 2 + > drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c | 2 + > drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 18 ++- > drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c | 3 + > drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 2 + > drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 6 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 9 ++ > drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c | 6 + > drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 5 + > drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 4 + > drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 6 + > drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 1 + > drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 11 ++ > drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 2 + > drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | 46 ++++--- > drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h | 4 +- > drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c | 1 + > drivers/gpu/drm/amd/amdgpu/mxgpu_vi.h | 4 +- > drivers/gpu/drm/amd/amdgpu/psp_v3_1.c | 2 + > drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c | 4 + > drivers/gpu/drm/amd/amdgpu/vce_v4_0.c | 2 + > drivers/gpu/drm/amd/amdgpu/vf_error.c | 210 > +++++++++++++++++++++++++++++ > drivers/gpu/drm/amd/amdgpu/vf_error.h | 120 +++++++++++++++++ > 24 files changed, 446 insertions(+), 26 deletions(-) > create mode 100644 drivers/gpu/drm/amd/amdgpu/vf_error.c > create mode 100644 drivers/gpu/drm/amd/amdgpu/vf_error.h > > diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile > b/drivers/gpu/drm/amd/amdgpu/Makefile > index 2062127b03a8..0e45a5c7b0f0 100644 > --- a/drivers/gpu/drm/amd/amdgpu/Makefile > +++ b/drivers/gpu/drm/amd/amdgpu/Makefile > @@ -31,7 +31,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ > amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \ > amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \ > amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o > amdgpu_atomfirmware.o \ > - amdgpu_queue_mgr.o > + amdgpu_queue_mgr.o vf_error.o > > # add asic specific block > amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o > kv_dpm.o \ > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c > index 0e512fa1e9ae..ac2062106ae0 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c > @@ -34,6 +34,7 @@ > #include "amdgpu_acp.h" > > #include "acp_gfx_if.h" > +#include "vf_error.h" > > #define ACP_TILE_ON_MASK 0x03 > #define ACP_TILE_OFF_MASK 0x02 > @@ -395,6 +396,7 @@ static int acp_hw_init(void *handle) > r = pm_genpd_add_device(&adev->acp.acp_genpd- > >gpd, dev); > if (r) { > dev_err(dev, "Failed to add dev to > genpd\n"); > + > amdgpu_vf_error_put(MDGIM_ERROR_VF_ADD_DEV_TO_GENPD_ > FAIL, 0, 0); > return r; > } > } > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c > index 365e735f6647..b6e6342f322e 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c > @@ -31,6 +31,7 @@ > > #include <linux/slab.h> > #include <linux/acpi.h> > +#include "vf_error.h" > /* > * BIOS. > */ > @@ -452,6 +453,7 @@ bool amdgpu_get_bios(struct amdgpu_device *adev) > goto success; > > DRM_ERROR("Unable to locate a BIOS ROM\n"); > + > amdgpu_vf_error_put(AMDGIM_ERROR_VF_UNLOCATE_BIOS_RO > M, 0, 0); > return false; > > success: > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > index 91133a132fb6..af886f10f556 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > @@ -54,6 +54,7 @@ > #include "bif/bif_4_1_d.h" > #include <linux/pci.h> > #include <linux/firmware.h> > +#include "vf_error.h" > > MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin"); > MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin"); > @@ -2176,6 +2177,7 @@ int amdgpu_device_init(struct amdgpu_device > *adev, > r = amdgpu_atombios_init(adev); > if (r) { > dev_err(adev->dev, "amdgpu_atombios_init failed\n"); > + > amdgpu_vf_error_put(AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, > 0, 0); > goto failed; > } > > @@ -2186,6 +2188,7 @@ int amdgpu_device_init(struct amdgpu_device > *adev, > if (amdgpu_vpost_needed(adev)) { > if (!adev->bios) { > dev_err(adev->dev, "no vBIOS found\n"); > + > amdgpu_vf_error_put(AMDGIM_ERROR_VF_NO_VBIOS, 0, 0); > r = -EINVAL; > goto failed; > } > @@ -2193,6 +2196,7 @@ int amdgpu_device_init(struct amdgpu_device > *adev, > r = amdgpu_atom_asic_init(adev- > >mode_info.atom_context); > if (r) { > dev_err(adev->dev, "gpu post error!\n"); > + > amdgpu_vf_error_put(AMDGIM_ERROR_VF_GPU_POST_ERROR, 0, > 0); > goto failed; > } > } else { > @@ -2204,6 +2208,7 @@ int amdgpu_device_init(struct amdgpu_device > *adev, > r = amdgpu_atombios_get_clock_info(adev); > if (r) { > dev_err(adev->dev, > "amdgpu_atombios_get_clock_info failed\n"); > + > amdgpu_vf_error_put(AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOC > K_FAIL, 0, 0); > return r; > } > /* init i2c buses */ > @@ -2215,6 +2220,7 @@ int amdgpu_device_init(struct amdgpu_device > *adev, > r = amdgpu_fence_driver_init(adev); > if (r) { > dev_err(adev->dev, "amdgpu_fence_driver_init failed\n"); > + > amdgpu_vf_error_put(AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, > 0); > goto failed; > } > > @@ -2224,6 +2230,7 @@ int amdgpu_device_init(struct amdgpu_device > *adev, > r = amdgpu_init(adev); > if (r) { > dev_err(adev->dev, "amdgpu_init failed\n"); > + > amdgpu_vf_error_put(AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, > 0, 0); > amdgpu_fini(adev); > goto failed; > } > @@ -2287,12 +2294,15 @@ int amdgpu_device_init(struct amdgpu_device > *adev, > r = amdgpu_late_init(adev); > if (r) { > dev_err(adev->dev, "amdgpu_late_init failed\n"); > + > amdgpu_vf_error_put(AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_ > FAIL, 0, r); > goto failed; > } > > + amdgpu_vf_error_trans_all(adev); > return 0; > > failed: > + amdgpu_vf_error_trans_all(adev); > if (runtime) > vga_switcheroo_fini_domain_pm_ops(adev->dev); > return r; > @@ -3002,6 +3012,7 @@ int amdgpu_gpu_reset(struct amdgpu_device > *adev) > } > } else { > dev_err(adev->dev, "asic resume failed (%d).\n", r); > + > amdgpu_vf_error_put(AMDGIM_ERROR_VF_ASIC_RESUME_FAIL, 0, > r); > for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { > if (adev->rings[i] && adev->rings[i]->sched.thread) { > kthread_unpark(adev->rings[i]- > >sched.thread); > @@ -3016,11 +3027,14 @@ int amdgpu_gpu_reset(struct amdgpu_device > *adev) > drm_helper_resume_force_mode(adev->ddev); > > ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, > resched); > - if (r) > + if (r) { > /* bad news, how to tell it to userspace ? */ > dev_info(adev->dev, "GPU reset failed\n"); > - else > + > amdgpu_vf_error_put(AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, > r); > + } > + else { > dev_info(adev->dev, "GPU reset successed!\n"); > + } > > return r; > } > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c > index 3ab4c65ecc8b..1ff65d2a5bee 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c > @@ -25,6 +25,7 @@ > #include "amdgpu.h" > #include "amdgpu_ih.h" > #include "amdgpu_amdkfd.h" > +#include "vf_error.h" > > /** > * amdgpu_ih_ring_alloc - allocate memory for the IH ring > @@ -95,6 +96,7 @@ int amdgpu_ih_ring_init(struct amdgpu_device *adev, > unsigned ring_size, > r = amdgpu_wb_get(adev, &adev->irq.ih.wptr_offs); > if (r) { > dev_err(adev->dev, "(%d) ih wptr_offs wb alloc > failed\n", r); > + > amdgpu_vf_error_put(AMDGIM_ERROR_VF_IH_WB_ALLOC_FAIL, > 0, r); > return r; > } > > @@ -102,6 +104,7 @@ int amdgpu_ih_ring_init(struct amdgpu_device > *adev, unsigned ring_size, > if (r) { > amdgpu_wb_free(adev, adev->irq.ih.wptr_offs); > dev_err(adev->dev, "(%d) ih rptr_offs wb alloc > failed\n", r); > + > amdgpu_vf_error_put(AMDGIM_ERROR_VF_IH_WB_ALLOC_FAIL, > 0, r); > return r; > } > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c > index 00ef2fc8c30f..c10977c302c1 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c > @@ -35,6 +35,7 @@ > #include <linux/slab.h> > #include <linux/pm_runtime.h> > #include "amdgpu_amdkfd.h" > +#include "vf_error.h" > > /** > * amdgpu_driver_unload_kms - Main unload function for KMS. > @@ -144,6 +145,7 @@ int amdgpu_driver_load_kms(struct drm_device > *dev, unsigned long flags) > r = amdgpu_device_init(adev, dev, dev->pdev, flags); > if (r) { > dev_err(&dev->pdev->dev, "Fatal error during GPU init\n"); > + > amdgpu_vf_error_put(AMDGIM_ERROR_VF_GPU_INIT_FATAL_FAI > L, 0, 0); > goto out; > } > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c > index 8ee69652be8c..3c33f4019a6a 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c > @@ -36,7 +36,7 @@ > #include <drm/drm_cache.h> > #include "amdgpu.h" > #include "amdgpu_trace.h" > - > +#include "vf_error.h" > > > static u64 amdgpu_get_vis_part_size(struct amdgpu_device *adev, > @@ -246,18 +246,21 @@ int amdgpu_bo_create_kernel(struct > amdgpu_device *adev, > NULL, NULL, bo_ptr); > if (r) { > dev_err(adev->dev, "(%d) failed to allocate kernel bo\n", r); > + > amdgpu_vf_error_put(AMDGIM_ERROR_VF_BO_ALLOC_K_FAIL, 0, > r); > return r; > } > > r = amdgpu_bo_reserve(*bo_ptr, false); > if (r) { > dev_err(adev->dev, "(%d) failed to reserve kernel bo\n", r); > + > amdgpu_vf_error_put(AMDGIM_ERROR_VF_BO_RESERVE_FAIL, 0, > r); > goto error_free; > } > > r = amdgpu_bo_pin(*bo_ptr, domain, gpu_addr); > if (r) { > dev_err(adev->dev, "(%d) kernel bo pin failed\n", r); > + amdgpu_vf_error_put(AMDGIM_ERROR_VF_BO_PIN_FAIL, > 0, r); > goto error_unreserve; > } > > @@ -265,6 +268,7 @@ int amdgpu_bo_create_kernel(struct amdgpu_device > *adev, > r = amdgpu_bo_kmap(*bo_ptr, cpu_addr); > if (r) { > dev_err(adev->dev, "(%d) kernel bo map failed\n", > r); > + > amdgpu_vf_error_put(AMDGIM_ERROR_VF_BO_MAP_FAIL, 0, r); > goto error_unreserve; > } > } > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c > index 75165e07b1cd..92e0eadfa4b2 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c > @@ -33,6 +33,7 @@ > #include <drm/amdgpu_drm.h> > #include "amdgpu.h" > #include "atom.h" > +#include "vf_error.h" > > /* > * Rings > @@ -188,12 +189,14 @@ int amdgpu_ring_init(struct amdgpu_device *adev, > struct amdgpu_ring *ring, > r = amdgpu_wb_get_64bit(adev, &ring->rptr_offs); > if (r) { > dev_err(adev->dev, "(%d) ring rptr_offs wb alloc > failed\n", r); > + > amdgpu_vf_error_put(AMDGIM_ERROR_VF_RING_R_WB_ALLOC_F > AIL, 0, r); > return r; > } > > r = amdgpu_wb_get_64bit(adev, &ring->wptr_offs); > if (r) { > dev_err(adev->dev, "(%d) ring wptr_offs wb alloc > failed\n", r); > + > amdgpu_vf_error_put(AMDGIM_ERROR_VF_RING_W_WB_ALLOC_ > FAIL, 0, r); > return r; > } > > @@ -201,12 +204,14 @@ int amdgpu_ring_init(struct amdgpu_device *adev, > struct amdgpu_ring *ring, > r = amdgpu_wb_get(adev, &ring->rptr_offs); > if (r) { > dev_err(adev->dev, "(%d) ring rptr_offs wb alloc > failed\n", r); > + > amdgpu_vf_error_put(AMDGIM_ERROR_VF_RING_R_WB_ALLOC_F > AIL, 0, r); > return r; > } > > r = amdgpu_wb_get(adev, &ring->wptr_offs); > if (r) { > dev_err(adev->dev, "(%d) ring wptr_offs wb alloc > failed\n", r); > + > amdgpu_vf_error_put(AMDGIM_ERROR_VF_RING_W_WB_ALLOC_ > FAIL, 0, r); > return r; > } > > @@ -215,12 +220,14 @@ int amdgpu_ring_init(struct amdgpu_device *adev, > struct amdgpu_ring *ring, > r = amdgpu_wb_get(adev, &ring->fence_offs); > if (r) { > dev_err(adev->dev, "(%d) ring fence_offs wb alloc failed\n", > r); > + > amdgpu_vf_error_put(AMDGIM_ERROR_VF_RING_F_WB_ALLOC_F > AIL, 0, r); > return r; > } > > r = amdgpu_wb_get(adev, &ring->cond_exe_offs); > if (r) { > dev_err(adev->dev, "(%d) ring cond_exec_polling wb alloc > failed\n", r); > + > amdgpu_vf_error_put(AMDGIM_ERROR_VF_RING_C_WB_ALLOC_F > AIL, 0, r); > return r; > } > ring->cond_exe_gpu_addr = adev->wb.gpu_addr + (ring- > >cond_exe_offs * 4); > @@ -231,6 +238,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, > struct amdgpu_ring *ring, > r = amdgpu_fence_driver_start_ring(ring, irq_src, irq_type); > if (r) { > dev_err(adev->dev, "failed initializing fences (%d).\n", r); > + > amdgpu_vf_error_put(AMDGIM_ERROR_VF_INIT_FENCE_FAIL, 0, > r); > return r; > } > > @@ -249,6 +257,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, > struct amdgpu_ring *ring, > (void **)&ring->ring); > if (r) { > dev_err(adev->dev, "(%d) ring create failed\n", r); > + > amdgpu_vf_error_put(AMDGIM_ERROR_VF_RING_CREATE_FAIL, 0, > r); > return r; > } > amdgpu_ring_clear_ring(ring); > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c > index 5ca75a456ad2..e2a08d7c2a7d 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c > @@ -43,6 +43,7 @@ > */ > #include <drm/drmP.h> > #include "amdgpu.h" > +#include "vf_error.h" > > static void amdgpu_sa_bo_remove_locked(struct amdgpu_sa_bo *sa_bo); > static void amdgpu_sa_bo_try_free(struct amdgpu_sa_manager > *sa_manager); > @@ -67,6 +68,7 @@ int amdgpu_sa_bo_manager_init(struct > amdgpu_device *adev, > 0, NULL, NULL, &sa_manager->bo); > if (r) { > dev_err(adev->dev, "(%d) failed to allocate bo for > manager\n", r); > + > amdgpu_vf_error_put(AMDGIM_ERROR_VF_BO_ALLOC_M_FAIL, 0, > r); > return r; > } > > @@ -99,6 +101,7 @@ int amdgpu_sa_bo_manager_start(struct > amdgpu_device *adev, > > if (sa_manager->bo == NULL) { > dev_err(adev->dev, "no bo for sa manager\n"); > + > amdgpu_vf_error_put(AMDGIM_ERROR_VF_NO_BO_FOR_SA, 0, 0); > return -EINVAL; > } > > @@ -106,12 +109,14 @@ int amdgpu_sa_bo_manager_start(struct > amdgpu_device *adev, > r = amdgpu_bo_reserve(sa_manager->bo, false); > if (r) { > dev_err(adev->dev, "(%d) failed to reserve manager bo\n", > r); > + > amdgpu_vf_error_put(AMDGIM_ERROR_VF_BO_RESERVE_FAIL, 0, > r); > return r; > } > r = amdgpu_bo_pin(sa_manager->bo, sa_manager->domain, > &sa_manager->gpu_addr); > if (r) { > amdgpu_bo_unreserve(sa_manager->bo); > dev_err(adev->dev, "(%d) failed to pin manager bo\n", r); > + amdgpu_vf_error_put(AMDGIM_ERROR_VF_BO_PIN_FAIL, > 0, r); > return r; > } > r = amdgpu_bo_kmap(sa_manager->bo, &sa_manager->cpu_ptr); > @@ -127,6 +132,7 @@ int amdgpu_sa_bo_manager_suspend(struct > amdgpu_device *adev, > > if (sa_manager->bo == NULL) { > dev_err(adev->dev, "no bo for sa manager\n"); > + > amdgpu_vf_error_put(AMDGIM_ERROR_VF_NO_BO_FOR_SA, 0, 0); > return -EINVAL; > } > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c > index 4f50eeb65855..039b1eefb115 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c > @@ -27,6 +27,7 @@ > #include <drm/drmP.h> > #include "amdgpu.h" > #include "amdgpu_ucode.h" > +#include "vf_error.h" > > static void amdgpu_ucode_print_common_hdr(const struct > common_firmware_header *hdr) > { > @@ -383,12 +384,14 @@ int amdgpu_ucode_init_bo(struct amdgpu_device > *adev) > NULL, NULL, bo); > if (err) { > dev_err(adev->dev, "(%d) Firmware buffer allocate > failed\n", err); > + > amdgpu_vf_error_put(AMDGIM_ERROR_VF_FW_ALLOC_FAIL, 0, > err); > goto failed; > } > > err = amdgpu_bo_reserve(*bo, false); > if (err) { > dev_err(adev->dev, "(%d) Firmware buffer reserve > failed\n", err); > + > amdgpu_vf_error_put(AMDGIM_ERROR_VF_FW_RESERVE_FAIL, 0, > err); > goto failed_reserve; > } > > @@ -396,12 +399,14 @@ int amdgpu_ucode_init_bo(struct amdgpu_device > *adev) > &fw_mc_addr); > if (err) { > dev_err(adev->dev, "(%d) Firmware buffer pin failed\n", > err); > + amdgpu_vf_error_put(AMDGIM_ERROR_VF_FW_PIN_FAIL, > 0, err); > goto failed_pin; > } > > err = amdgpu_bo_kmap(*bo, &fw_buf_ptr); > if (err) { > dev_err(adev->dev, "(%d) Firmware buffer kmap failed\n", > err); > + > amdgpu_vf_error_put(AMDGIM_ERROR_VF_FW_KMAP_FAIL, 0, > err); > goto failed_kmap; > } > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c > index 2ca09f111f08..cf41b308f72e 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c > @@ -38,6 +38,7 @@ > #include "amdgpu_uvd.h" > #include "cikd.h" > #include "uvd/uvd_4_2_d.h" > +#include "vf_error.h" > > /* 1 second timeout */ > #define UVD_IDLE_TIMEOUT msecs_to_jiffies(1000) > @@ -175,6 +176,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device > *adev) > if (r) { > dev_err(adev->dev, "amdgpu_uvd: Can't load firmware > \"%s\"\n", > fw_name); > + > amdgpu_vf_error_put(AMDGIM_ERROR_VF_UVD_NOT_LOAD_FW, > 0, 0); > return r; > } > > @@ -182,6 +184,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device > *adev) > if (r) { > dev_err(adev->dev, "amdgpu_uvd: Can't validate firmware > \"%s\"\n", > fw_name); > + > amdgpu_vf_error_put(AMDGIM_ERROR_VF_UVD_NOT_VALIDATE_ > FW, 0, 0); > release_firmware(adev->uvd.fw); > adev->uvd.fw = NULL; > return r; > @@ -226,6 +229,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device > *adev) > &adev->uvd.gpu_addr, &adev- > >uvd.cpu_addr); > if (r) { > dev_err(adev->dev, "(%d) failed to allocate UVD bo\n", r); > + > amdgpu_vf_error_put(AMDGIM_ERROR_VF_ALLOC_UVD_BO_FAIL, > 0, 0); > return r; > } > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c > index b692ad402252..a6d9a0bdd762 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c > @@ -34,6 +34,7 @@ > #include "amdgpu_pm.h" > #include "amdgpu_vce.h" > #include "cikd.h" > +#include "vf_error.h" > > /* 1 second timeout */ > #define VCE_IDLE_TIMEOUT msecs_to_jiffies(1000) > @@ -142,6 +143,7 @@ int amdgpu_vce_sw_init(struct amdgpu_device > *adev, unsigned long size) > if (r) { > dev_err(adev->dev, "amdgpu_vce: Can't load firmware > \"%s\"\n", > fw_name); > + > amdgpu_vf_error_put(AMDGIM_ERROR_VF_VCE_NOT_LOAD_FW, > 0, 0); > return r; > } > > @@ -149,6 +151,7 @@ int amdgpu_vce_sw_init(struct amdgpu_device > *adev, unsigned long size) > if (r) { > dev_err(adev->dev, "amdgpu_vce: Can't validate firmware > \"%s\"\n", > fw_name); > + > amdgpu_vf_error_put(AMDGIM_ERROR_VF_VCE_NOT_VALIDATE_ > FW, 0, 0); > release_firmware(adev->vce.fw); > adev->vce.fw = NULL; > return r; > @@ -170,6 +173,7 @@ int amdgpu_vce_sw_init(struct amdgpu_device > *adev, unsigned long size) > &adev->vce.gpu_addr, &adev- > >vce.cpu_addr); > if (r) { > dev_err(adev->dev, "(%d) failed to allocate VCE bo\n", r); > + > amdgpu_vf_error_put(AMDGIM_ERROR_VF_ALLOC_VCE_BO_FAIL, > 0, 0); > return r; > } > > @@ -265,6 +269,7 @@ int amdgpu_vce_resume(struct amdgpu_device > *adev) > r = amdgpu_bo_reserve(adev->vce.vcpu_bo, false); > if (r) { > dev_err(adev->dev, "(%d) failed to reserve VCE bo\n", r); > + > amdgpu_vf_error_put(AMDGIM_ERROR_VF_VCE_RESERVE_FAIL, 0, > r); > return r; > } > > @@ -272,6 +277,7 @@ int amdgpu_vce_resume(struct amdgpu_device > *adev) > if (r) { > amdgpu_bo_unreserve(adev->vce.vcpu_bo); > dev_err(adev->dev, "(%d) VCE map failed\n", r); > + > amdgpu_vf_error_put(AMDGIM_ERROR_VF_VCE_KMAP_FAIL, 0, r); > return r; > } > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h > index 9e1062edb76e..e5b1baf387c1 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h > @@ -43,6 +43,7 @@ struct amdgpu_virt_ops { > int (*req_full_gpu)(struct amdgpu_device *adev, bool init); > int (*rel_full_gpu)(struct amdgpu_device *adev, bool init); > int (*reset_gpu)(struct amdgpu_device *adev); > + void (*trans_msg)(struct amdgpu_device *adev, u32 req, u32 data1, > u32 data2, u32 data3); > }; > > /* GPU virtualization */ > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > index d479a627f03e..7d43758b60fb 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > @@ -36,6 +36,7 @@ > #include "soc15_common.h" > #include "clearstate_gfx9.h" > #include "v9_structs.h" > +#include "vf_error.h" > > #define GFX9_NUM_GFX_RINGS 1 > #define GFX9_MEC_HPD_SIZE 2048 > @@ -548,6 +549,7 @@ static int gfx_v9_0_init_microcode(struct > amdgpu_device *adev) > dev_err(adev->dev, > "gfx9: Failed to load firmware \"%s\"\n", > fw_name); > + > amdgpu_vf_error_put(AMDGIM_ERROR_VF_GFX_LOAD_FW_FAIL, > 0, 0); > release_firmware(adev->gfx.pfp_fw); > adev->gfx.pfp_fw = NULL; > release_firmware(adev->gfx.me_fw); > @@ -1081,6 +1083,7 @@ static int gfx_v9_0_ngg_create_buf(struct > amdgpu_device *adev, > > if (size_se < 0) { > dev_err(adev->dev, "Buffer size is invalid: %d\n", size_se); > + > amdgpu_vf_error_put(AMDGIM_ERROR_VF_BUFL_SIZE_INVALID, > 0, size_se); > return -EINVAL; > } > size_se = size_se ? size_se : default_size_se; > @@ -1093,6 +1096,7 @@ static int gfx_v9_0_ngg_create_buf(struct > amdgpu_device *adev, > NULL); > if (r) { > dev_err(adev->dev, "(%d) failed to create NGG buffer\n", r); > + > amdgpu_vf_error_put(AMDGIM_ERROR_VF_NGG_CREATE_BUF_F > AIL, 0, r); > return r; > } > ngg_buf->bo_size = amdgpu_bo_size(ngg_buf->bo); > @@ -1137,6 +1141,7 @@ static int gfx_v9_0_ngg_init(struct amdgpu_device > *adev) > 64 * 1024); > if (r) { > dev_err(adev->dev, "Failed to create Primitive Buffer\n"); > + > amdgpu_vf_error_put(AMDGIM_ERROR_VF_NGG_CREATE_PR_BU > F_FAIL, 0, r); > goto err; > } > > @@ -1146,6 +1151,7 @@ static int gfx_v9_0_ngg_init(struct amdgpu_device > *adev) > 256 * 1024); > if (r) { > dev_err(adev->dev, "Failed to create Position Buffer\n"); > + > amdgpu_vf_error_put(AMDGIM_ERROR_VF_NGG_CREATE_PO_BU > F_FAIL, 0, r); > goto err; > } > > @@ -1155,6 +1161,7 @@ static int gfx_v9_0_ngg_init(struct amdgpu_device > *adev) > 256); > if (r) { > dev_err(adev->dev, "Failed to create Control Sideband > Buffer\n"); > + > amdgpu_vf_error_put(AMDGIM_ERROR_VF_NGG_CREATE_CS_BUF > _FAIL, 0, r); > goto err; > } > > @@ -1167,6 +1174,7 @@ static int gfx_v9_0_ngg_init(struct amdgpu_device > *adev) > 512 * 1024); > if (r) { > dev_err(adev->dev, "Failed to create Parameter Cache\n"); > + > amdgpu_vf_error_put(AMDGIM_ERROR_VF_NGG_CREATE_PC_BU > F_FAIL, 0, r); > goto err; > } > > @@ -1349,18 +1357,21 @@ static int gfx_v9_0_sw_init(void *handle) > r = gfx_v9_0_init_microcode(adev); > if (r) { > DRM_ERROR("Failed to load gfx firmware!\n"); > + > amdgpu_vf_error_put(AMDGIM_ERROR_VF_LOAD_GFX_FIRMWAR > E_FAIL, 0, 0); > return r; > } > > r = gfx_v9_0_rlc_init(adev); > if (r) { > DRM_ERROR("Failed to init rlc BOs!\n"); > + > amdgpu_vf_error_put(AMDGIM_ERROR_VF_RLC_BO_INIT_FAIL, 0, > 0); > return r; > } > > r = gfx_v9_0_mec_init(adev); > if (r) { > DRM_ERROR("Failed to init MEC BOs!\n"); > + > amdgpu_vf_error_put(AMDGIM_ERROR_VF_MEC_BO_INIT_FAIL, 0, > 0); > return r; > } > > diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c > b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c > index 175ba5f9691c..91ad658dac13 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c > @@ -36,6 +36,7 @@ > #include "nbio_v7_0.h" > #include "gfxhub_v1_0.h" > #include "mmhub_v1_0.h" > +#include "vf_error.h" > > #define mmDF_CS_AON0_DramBaseAddress0 > 0x0044 > #define mmDF_CS_AON0_DramBaseAddress0_BASE_IDX > 0 > @@ -683,6 +684,7 @@ static int gmc_v9_0_gart_enable(struct > amdgpu_device *adev) > > if (adev->gart.robj == NULL) { > dev_err(adev->dev, "No VRAM object for PCIE GART.\n"); > + > amdgpu_vf_error_put(AMDGIM_ERROR_VF_NO_VRAM_FOR_GAR > T, 0, 0); > return -EINVAL; > } > r = amdgpu_gart_table_vram_pin(adev); > diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c > b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c > index bde3ca3c21c1..2812d88a8bdd 100644 > --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c > +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c > @@ -72,21 +72,6 @@ static void xgpu_ai_mailbox_set_valid(struct > amdgpu_device *adev, bool val) > reg); > } > > -static void xgpu_ai_mailbox_trans_msg(struct amdgpu_device *adev, > - enum idh_request req) > -{ > - u32 reg; > - > - reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, > - > mmBIF_BX_PF0_MAILBOX_MSGBUF_TRN_DW0)); > - reg = REG_SET_FIELD(reg, > BIF_BX_PF0_MAILBOX_MSGBUF_TRN_DW0, > - MSGBUF_DATA, req); > - WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, > mmBIF_BX_PF0_MAILBOX_MSGBUF_TRN_DW0), > - reg); > - > - xgpu_ai_mailbox_set_valid(adev, true); > -} > - > static int xgpu_ai_mailbox_rcv_msg(struct amdgpu_device *adev, > enum idh_event event) > { > @@ -154,13 +139,25 @@ static int xgpu_ai_poll_msg(struct amdgpu_device > *adev, enum idh_event event) > return r; > } > > - > -static int xgpu_ai_send_access_requests(struct amdgpu_device *adev, > - enum idh_request req) > -{ > +static void xgpu_ai_mailbox_trans_msg (struct amdgpu_device *adev, > + enum idh_request req, u32 data1, u32 data2, u32 data3) { > + u32 reg; > int r; > > - xgpu_ai_mailbox_trans_msg(adev, req); > + reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, > + > mmBIF_BX_PF0_MAILBOX_MSGBUF_TRN_DW0)); > + reg = REG_SET_FIELD(reg, > BIF_BX_PF0_MAILBOX_MSGBUF_TRN_DW0, > + MSGBUF_DATA, req); > + WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, > mmBIF_BX_PF0_MAILBOX_MSGBUF_TRN_DW0), > + reg); > + WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, > mmBIF_BX_PF0_MAILBOX_MSGBUF_TRN_DW1), > + data1); > + WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, > mmBIF_BX_PF0_MAILBOX_MSGBUF_TRN_DW2), > + data2); > + WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, > mmBIF_BX_PF0_MAILBOX_MSGBUF_TRN_DW3), > + data3); > + > + xgpu_ai_mailbox_set_valid(adev, true); > > /* start to poll ack */ > r = xgpu_ai_poll_ack(adev); > @@ -168,6 +165,14 @@ static int xgpu_ai_send_access_requests(struct > amdgpu_device *adev, > pr_err("Doesn't get ack from pf, continue\n"); > > xgpu_ai_mailbox_set_valid(adev, false); > +} > + > +static int xgpu_ai_send_access_requests(struct amdgpu_device *adev, > + enum idh_request req) > +{ > + int r; > + > + xgpu_ai_mailbox_trans_msg(adev, req, 0, 0, 0); > > /* start to check msg if request is idh_req_gpu_init_access */ > if (req == IDH_REQ_GPU_INIT_ACCESS || > @@ -342,4 +347,5 @@ const struct amdgpu_virt_ops xgpu_ai_virt_ops = { > .req_full_gpu = xgpu_ai_request_full_gpu_access, > .rel_full_gpu = xgpu_ai_release_full_gpu_access, > .reset_gpu = xgpu_ai_request_reset, > + .trans_msg = xgpu_ai_mailbox_trans_msg, > }; > diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h > b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h > index 9aefc44d2c34..1e91b9a1c591 100644 > --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h > +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h > @@ -31,7 +31,9 @@ enum idh_request { > IDH_REL_GPU_INIT_ACCESS, > IDH_REQ_GPU_FINI_ACCESS, > IDH_REL_GPU_FINI_ACCESS, > - IDH_REQ_GPU_RESET_ACCESS > + IDH_REQ_GPU_RESET_ACCESS, > + > + IDH_LOG_VF_ERROR = 200, > }; > > enum idh_event { > diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c > b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c > index 171a658135b5..c25a831f94ec 100644 > --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c > +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c > @@ -613,4 +613,5 @@ const struct amdgpu_virt_ops xgpu_vi_virt_ops = { > .req_full_gpu = xgpu_vi_request_full_gpu_access, > .rel_full_gpu = xgpu_vi_release_full_gpu_access, > .reset_gpu = xgpu_vi_request_reset, > + .trans_msg = NULL, /* Does not need to trans VF errors > to host. */ > }; > diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.h > b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.h > index 2db741131bc6..c791d73d2d54 100644 > --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.h > +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.h > @@ -32,7 +32,9 @@ enum idh_request { > IDH_REL_GPU_INIT_ACCESS, > IDH_REQ_GPU_FINI_ACCESS, > IDH_REL_GPU_FINI_ACCESS, > - IDH_REQ_GPU_RESET_ACCESS > + IDH_REQ_GPU_RESET_ACCESS, > + > + IDH_LOG_VF_ERROR = 200, > }; > > /* VI mailbox messages data */ > diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c > b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c > index 58ba3966f070..02a79ef6329c 100644 > --- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c > +++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c > @@ -37,6 +37,7 @@ > #include "vega10/GC/gc_9_0_offset.h" > #include "vega10/SDMA0/sdma0_4_0_offset.h" > #include "vega10/NBIO/nbio_6_1_offset.h" > +#include "vf_error.h" > > MODULE_FIRMWARE("amdgpu/vega10_sos.bin"); > MODULE_FIRMWARE("amdgpu/vega10_asd.bin"); > @@ -153,6 +154,7 @@ int psp_v3_1_init_microcode(struct psp_context > *psp) > dev_err(adev->dev, > "psp v3.1: Failed to load firmware \"%s\"\n", > fw_name); > + > amdgpu_vf_error_put(AMDGIM_ERROR_VF_PSP_LOAD_FW_FAIL, > 0, 0); > release_firmware(adev->psp.sos_fw); > adev->psp.sos_fw = NULL; > release_firmware(adev->psp.asd_fw); > diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c > b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c > index 987b958368ac..8eef64f16085 100644 > --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c > @@ -39,6 +39,7 @@ > #include "vega10/HDP/hdp_4_0_offset.h" > #include "vega10/MMHUB/mmhub_1_0_offset.h" > #include "vega10/MMHUB/mmhub_1_0_sh_mask.h" > +#include "vf_error.h" > > static void uvd_v7_0_set_ring_funcs(struct amdgpu_device *adev); > static void uvd_v7_0_set_enc_ring_funcs(struct amdgpu_device *adev); > @@ -700,6 +701,7 @@ static int uvd_v7_0_mmsch_start(struct > amdgpu_device *adev, > > if (!loop) { > dev_err(adev->dev, "failed to init MMSCH, > mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data); > + > amdgpu_vf_error_put(AMDGIM_ERROR_VF_MMSCH_INIT_FAIL, 0, > data); > return -EBUSY; > } > WDOORBELL32(adev->uvd.ring_enc[0].doorbell_index, 0); > @@ -1001,6 +1003,7 @@ static int uvd_v7_0_start(struct amdgpu_device > *adev) > break; > > DRM_ERROR("UVD not responding, trying to reset the > VCPU!!!\n"); > + > amdgpu_vf_error_put(AMDGIM_ERROR_VF_UVD_NORESP_RESET, > 0, 0); > WREG32_P(SOC15_REG_OFFSET(UVD, 0, > mmUVD_SOFT_RESET), > > UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK, > > ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); > @@ -1013,6 +1016,7 @@ static int uvd_v7_0_start(struct amdgpu_device > *adev) > > if (r) { > DRM_ERROR("UVD not responding, giving up!!!\n"); > + > amdgpu_vf_error_put(AMDGIM_ERROR_VF_UVD_NORESP_GIVEU > P, 0, 0); > return r; > } > /* enable master interrupt */ > diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c > b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c > index 1ecd6bb90c1f..5d5aa1cbd140 100644 > --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c > @@ -38,6 +38,7 @@ > #include "vega10/VCE/vce_4_0_sh_mask.h" > #include "vega10/MMHUB/mmhub_1_0_offset.h" > #include "vega10/MMHUB/mmhub_1_0_sh_mask.h" > +#include "vf_error.h" > > #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK 0x02 > > @@ -188,6 +189,7 @@ static int vce_v4_0_mmsch_start(struct > amdgpu_device *adev, > > if (!loop) { > dev_err(adev->dev, "failed to init MMSCH, > mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data); > + > amdgpu_vf_error_put(AMDGIM_ERROR_VF_INIT_MMSCH_FAIL, 0, > data); > return -EBUSY; > } > WDOORBELL32(adev->vce.ring[0].doorbell_index, 0); > diff --git a/drivers/gpu/drm/amd/amdgpu/vf_error.c > b/drivers/gpu/drm/amd/amdgpu/vf_error.c > new file mode 100644 > index 000000000000..9ae58f3237d1 > --- /dev/null > +++ b/drivers/gpu/drm/amd/amdgpu/vf_error.c > @@ -0,0 +1,210 @@ > +/* > + * Copyright 2017 Advanced Micro Devices, Inc. > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > + * copy of this software and associated documentation files (the > "Software"), > + * to deal in the Software without restriction, including without limitation > + * the rights to use, copy, modify, merge, publish, distribute, sublicense, > + * and/or sell copies of the Software, and to permit persons to whom the > + * Software is furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice shall be included in > + * all copies or substantial portions of the Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO > EVENT SHALL > + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, > DAMAGES OR > + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR > OTHERWISE, > + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR > THE USE OR > + * OTHER DEALINGS IN THE SOFTWARE. > + * > + */ > + > +#include "amdgpu.h" > +#include "vf_error.h" > +/* Needs enum IDH_LOG_VF_ERROR, it is defined in both mxgpu_ai.h and > mxgpu_vi.h. */ > +#include "mxgpu_ai.h" > + > +#define AMDGPU_VF_ERROR_ENTRY_SIZE 32 > + > +/* struct error_entry - amdgpu VF error information. */ > +struct amdgpu_vf_error_buffer { > + int read_count; > + int write_count; > + uint16_t code[AMDGPU_VF_ERROR_ENTRY_SIZE]; > + uint16_t flags[AMDGPU_VF_ERROR_ENTRY_SIZE]; > + uint64_t data[AMDGPU_VF_ERROR_ENTRY_SIZE]; > +}; > + > +struct amdgpu_vf_error_buffer admgpu_vf_errors; > + > +#if 0 /* amdgpu driver does not need following code, but we should keep > them same as the code of AMD GIM driver */ You can probably drop these big chunks of commented out code. Likely someone will send a patch to drop them eventually. > +struct error_text > +{ > + uint8_t arg_type; > + char* text; > +}; > + > +enum error_data_type > +{ > + ERROR_DATA_ARG_NONE = 0, // No error data > + ERROR_DATA_ARG_64, // 64-bit > + ERROR_DATA_ARG_32_32, // 32bit 32bit > + ERROR_DATA_ARG_16_16_32, // 16bit 16bit 32bit > +}; > + > +static const struct error_text amdgim_error_vf > [AMDGIM_ERROR_VF_MAX] = > +{ > + /* AMDGIM_ERROR_VF_GPU_INIT_FATAL_FAIL */ > {ERROR_DATA_ARG_NONE, "Fatal error during GPU init."}, > + /* AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL */ > {ERROR_DATA_ARG_NONE, "amdgpu_atombios_init failed."}, > + /* AMDGIM_ERROR_VF_UNLOCATE_BIOS_ROM */ > {ERROR_DATA_ARG_NONE, "Unable to locate a BIOS ROM."}, > + /* AMDGIM_ERROR_VF_NO_VBIOS */ > {ERROR_DATA_ARG_NONE, "no vBIOS found"}, > + /* AMDGIM_ERROR_VF_GPU_POST_ERROR */ > {ERROR_DATA_ARG_NONE, "gpu post error."}, > + > + /* AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL */ > {ERROR_DATA_ARG_NONE, "amdgpu_atombios_get_clock_info failed."}, > + /* AMDGIM_ERROR_VF_FENCE_INIT_FAIL */ > {ERROR_DATA_ARG_NONE, "amdgpu_fence_driver_init failed."}, > + /* AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL */ > {ERROR_DATA_ARG_NONE, "amdgpu_init failed."}, > + /* AMDGIM_ERROR_VF_IB_INIT_FAIL */ > {ERROR_DATA_ARG_64, "IB initialization failed (%d)."}, > + /* AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL */ > {ERROR_DATA_ARG_NONE, "amdgpu_late_init failed."}, > + > + /* AMDGIM_ERROR_VF_ASIC_RESUME_FAIL */ > {ERROR_DATA_ARG_64, "asic resume failed (%d)."}, > + /* AMDGIM_ERROR_VF_GPU_RESET_FAIL */ > {ERROR_DATA_ARG_NONE, "GPU reset failed."}, > + /* AMDGIM_ERROR_VF_MMSCH_INIT_FAIL */ > {ERROR_DATA_ARG_64, "failed to init MMSCH, > mmVCE_MMSCH_VF_MAILBOX_RESP = 0x%x."}, > + /* AMDGIM_ERROR_VF_UVD_NORESP_GIVEUP */ > {ERROR_DATA_ARG_NONE, "UVD not responding, giving up."}, > + /* AMDGIM_ERROR_VF_UVD_NORESP_RESET */ > {ERROR_DATA_ARG_NONE, "UVD not responding, trying to reset the > VCPU."}, > + > + /* AMDGIM_ERROR_VF_LOAD_GFX_FIRMWARE_FAIL */ > {ERROR_DATA_ARG_NONE, "Failed to load gfx firmware."}, > + /* AMDGIM_ERROR_VF_MEC_BO_INIT_FAIL */ > {ERROR_DATA_ARG_NONE, "Failed to init MEC BOs."}, > + /* AMDGIM_ERROR_VF_ADD_DEV_TO_GENPD_FAIL */ > {ERROR_DATA_ARG_NONE, "Failed to add dev to genpd."}, > + /* AMDGIM_ERROR_VF_IH_WB_ALLOC_FAIL */ > {ERROR_DATA_ARG_64, "IH wptr_offs wb alloc failed (%d)."}, > + /* AMDGIM_ERROR_VF_BO_ALLOC_K_FAIL */ > {ERROR_DATA_ARG_64, "Failed to allocate kernel bo (%d)."}, > + > + /* AMDGIM_ERROR_VF_BO_RESERVE_FAIL */ > {ERROR_DATA_ARG_64, "Failed to reserve kernel bo (%d)."}, > + /* AMDGIM_ERROR_VF_BO_PIN_FAIL */ > {ERROR_DATA_ARG_64, "Kernel bo pin failed (%d)."}, > + /* AMDGIM_ERROR_VF_BO_MAP_FAIL */ > {ERROR_DATA_ARG_64, "Kernel bo map failed (%d)."}, > + /* AMDGIM_ERROR_VF_RING_R_WB_ALLOC_FAIL */ > {ERROR_DATA_ARG_64, "Ring rptr_offs wb alloc failed (%d)."}, > + /* AMDGIM_ERROR_VF_RING_W_WB_ALLOC_FAIL */ > {ERROR_DATA_ARG_64, "Ring wptr_offs wb alloc failed (%d)."}, > + > + /* AMDGIM_ERROR_VF_RING_F_WB_ALLOC_FAIL */ > {ERROR_DATA_ARG_64, "Ring fence_offs wb alloc failed (%d)."}, > + /* AMDGIM_ERROR_VF_RING_C_WB_ALLOC_FAIL */ > {ERROR_DATA_ARG_64, "Ring cond_exec_polling wb alloc failed (%d)."}, > + /* AMDGIM_ERROR_VF_INIT_FENCE_FAIL */ > {ERROR_DATA_ARG_64, "failed initializing fences (%d)."}, > + /* AMDGIM_ERROR_VF_RING_CREATE_FAIL */ > {ERROR_DATA_ARG_64, "Ring create failed (%d)."}, > + /* AMDGIM_ERROR_VF_BO_ALLOC_M_FAIL */ > {ERROR_DATA_ARG_64, "Failed to allocate bo for manager (%d)."}, > + > + /* AMDGIM_ERROR_VF_NO_BO_FOR_SA */ > {ERROR_DATA_ARG_NONE, "No bo for sa manager."}, > + /* AMDGIM_ERROR_VF_FW_ALLOC_FAIL */ > {ERROR_DATA_ARG_64, "Firmware buffer allocate failed (%d)."}, > + /* AMDGIM_ERROR_VF_FW_RESERVE_FAIL */ > {ERROR_DATA_ARG_64, "Firmware buffer reserve failed (%d)."}, > + /* AMDGIM_ERROR_VF_FW_PIN_FAIL */ > {ERROR_DATA_ARG_64, "Firmware buffer pin failed (%d)."}, > + /* AMDGIM_ERROR_VF_FW_KMAP_FAIL */ > {ERROR_DATA_ARG_64, "Firmware buffer kmap failed (%d)."}, > + > + /* AMDGIM_ERROR_VF_UVD_NOT_LOAD_FW */ > {ERROR_DATA_ARG_NONE, "amdgpu_uvd: Can't load firmware."}, > + /* AMDGIM_ERROR_VF_UVD_NOT_VALIDATE_FW */ > {ERROR_DATA_ARG_NONE, "amdgpu_uvd: Can't validate firmware."}, > + /* AMDGIM_ERROR_VF_ALLOC_UVD_BO_FAIL */ > {ERROR_DATA_ARG_64, "Failed to allocate UVD bo (%d)."}, > + /* AMDGIM_ERROR_VF_VCE_NOT_LOAD_FW */ > {ERROR_DATA_ARG_NONE, "amdgpu_vce: Can't load firmware."}, > + /* AMDGIM_ERROR_VF_VCE_NOT_VALIDATE_FW */ > {ERROR_DATA_ARG_NONE, "amdgpu_vce: Can't validate firmware."}, > + > + /* AMDGIM_ERROR_VF_ALLOC_VCE_BO_FAIL */ > {ERROR_DATA_ARG_64, "Failed to allocate VCE bo (%d)."}, > + /* AMDGIM_ERROR_VF_VCE_RESERVE_FAIL */ > {ERROR_DATA_ARG_64, "Failed to reserve VCE bo (%d)."}, > + /* AMDGIM_ERROR_VF_VCE_KMAP_FAIL */ > {ERROR_DATA_ARG_64, "VCE kmap failed (%d)."}, > + /* AMDGIM_ERROR_VF_NO_VRAM_FOR_GART */ > {ERROR_DATA_ARG_NONE, "No VRAM object for PCIE GART."}, > + /* AMDGIM_ERROR_VF_PSP_LOAD_FW_FAIL */ > {ERROR_DATA_ARG_NONE, "PSP: Failed to load firmware."}, > + > + /* AMDGIM_ERROR_VF_INIT_MMSCH_FAIL */ > {ERROR_DATA_ARG_64, "failed to init MMSCH, > mmVCE_MMSCH_VF_MAILBOX_RESP = %x."}, > + /* AMDGIM_ERROR_VF_GFX_LOAD_FW_FAIL */ > {ERROR_DATA_ARG_NONE, "gfx: Failed to load firmware."}, > + /* AMDGIM_ERROR_VF_NGG_CREATE_BUF_FAIL */ > {ERROR_DATA_ARG_64, "Failed to create NGG buffer (%d)."}, > + /* AMDGIM_ERROR_VF_NGG_CREATE_PR_BUF_FAIL */ > {ERROR_DATA_ARG_64, "Failed to create Primitive Buffer (%d)."}, > + /* AMDGIM_ERROR_VF_NGG_CREATE_PO_BUF_FAIL */ > {ERROR_DATA_ARG_64, "Failed to create Position Buffer (%d)."}, > + > + /* AMDGIM_ERROR_VF_NGG_CREATE_CS_BUF_FAIL */ > {ERROR_DATA_ARG_64, "Failed to create Control Sideband Buffer (%d)."}, > + /* AMDGIM_ERROR_VF_NGG_CREATE_PC_BUF_FAIL */ > {ERROR_DATA_ARG_64, "Failed to create Parameter Cache (%d)."}, > + /* AMDGIM_ERROR_VF_BUFL_SIZE_INVALID */ > {ERROR_DATA_ARG_64, "Buffer size is invalid: %d"}, > + /* AMDGIM_ERROR_VF_RLC_BO_INIT_FAIL */ > {ERROR_DATA_ARG_64, "Failed to init rlc BOs (%d)"}, > + > + /* AMDGIM_ERROR_VF_TEST */ {ERROR_DATA_ARG_64, > "This is error log collect test for VF component (test count %llu)."} > +}; > + > +int get_vf_error_text (uint32_t error_code, uint64_t error_data, char* > error_msg, int buf_size) > +{ > + int error_catagory = AMDGIM_ERROR_CATAGORY(error_code); > + int error_sub_code = AMDGIM_ERROR_SUBCODE(error_code); > + > + if (AMDGIM_ERROR_CATEGORY_VF != error_catagory) { > + return 0; > + } > + if (error_sub_code >= AMDGIM_ERROR_VF_MAX) { > + return 0; > + } > + switch (amdgim_error_vf[error_sub_code].arg_type) { > + case ERROR_DATA_ARG_NONE: > + snprintf (error_msg, buf_size - 1, > amdgim_error_vf[error_sub_code].text); > + break; > + case ERROR_DATA_ARG_64: > + snprintf (error_msg, buf_size - 1, > amdgim_error_vf[error_sub_code].text, > + error_data); > + break; > + case ERROR_DATA_ARG_32_32: > + snprintf (error_msg, buf_size - 1, > amdgim_error_vf[error_sub_code].text, > + (uint32_t)(error_data >> 32), > + (uint32_t)(error_data & 0xFFFFFFFF)); > + break; > + case ERROR_DATA_ARG_16_16_32: > + snprintf (error_msg, buf_size - 1, > amdgim_error_vf[error_sub_code].text, > + (uint16_t)(error_data >> 48), > + (uint16_t)((error_data >> 32) & 0xFFFF), > + (uint32_t)(error_data & 0xFFFFFFFF)); > + break; > + default: > + return 0; > + break; > + } > + return strlen (error_msg); > +} > + > +#endif > + > +void amdgpu_vf_error_put(uint16_t sub_error_code, uint16_t error_flags, > uint64_t error_data) > +{ > + int index; > + uint16_t error_code = > AMDGIM_ERROR_CODE(AMDGIM_ERROR_CATEGORY_VF, > sub_error_code); > + > + index = admgpu_vf_errors.write_count % > AMDGPU_VF_ERROR_ENTRY_SIZE; > + admgpu_vf_errors.code [index] = error_code; > + admgpu_vf_errors.flags [index] = error_flags; > + admgpu_vf_errors.data [index] = error_data; > + admgpu_vf_errors.write_count ++; > +} > + > + > +void amdgpu_vf_error_trans_all(struct amdgpu_device *adev) > +{ > + //u32 pf2vf_flags = 0; Please use C comments (/* */) to comment out code > + u32 data1, data2, data3; > + int index; > + > + if ((NULL == adev) || (!amdgpu_sriov_vf(adev)) || (!adev->virt.ops) > || (!adev->virt.ops->trans_msg)){ > + return; > + } > +/* > + TODO: Enable these code when pv2vf_info is merged > + AMDGPU_FW_VRAM_PF2VF_READ (adev, feature_flags, > &pf2vf_flags); > + if (!(pf2vf_flags & AMDGIM_FEATURE_ERROR_LOG_COLLECT)) > + { > + return; > + } Sorry I missed this before, either move the { on the same line as the if, or drop the parens. > +*/ > + /* The errors are overlay of array, correct read_count as full. */ > + if (admgpu_vf_errors.write_count - admgpu_vf_errors.read_count > > AMDGPU_VF_ERROR_ENTRY_SIZE) { > + admgpu_vf_errors.read_count = > admgpu_vf_errors.write_count - AMDGPU_VF_ERROR_ENTRY_SIZE; > + } > + > + while (admgpu_vf_errors.read_count < > admgpu_vf_errors.write_count) { > + index =admgpu_vf_errors.read_count % > AMDGPU_VF_ERROR_ENTRY_SIZE; > + data1 = AMDGIM_ERROR_CODE_FLAGS_TO_MAILBOX > (admgpu_vf_errors.code[index], admgpu_vf_errors.flags[index]); > + data2 = admgpu_vf_errors.data[index] & 0xFFFFFFFF; > + data3 = (admgpu_vf_errors.data[index] >> 32) & 0xFFFFFFFF; > + > + adev->virt.ops->trans_msg(adev, IDH_LOG_VF_ERROR, > data1, data2, data3); > + admgpu_vf_errors.read_count ++; > + } > +} > diff --git a/drivers/gpu/drm/amd/amdgpu/vf_error.h > b/drivers/gpu/drm/amd/amdgpu/vf_error.h > new file mode 100644 > index 000000000000..612ed246aa7c > --- /dev/null > +++ b/drivers/gpu/drm/amd/amdgpu/vf_error.h > @@ -0,0 +1,120 @@ > +/* > + * Copyright 2017 Advanced Micro Devices, Inc. > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > + * copy of this software and associated documentation files (the > "Software"), > + * to deal in the Software without restriction, including without limitation > + * the rights to use, copy, modify, merge, publish, distribute, sublicense, > + * and/or sell copies of the Software, and to permit persons to whom the > + * Software is furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice shall be included in > + * all copies or substantial portions of the Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO > EVENT SHALL > + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, > DAMAGES OR > + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR > OTHERWISE, > + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR > THE USE OR > + * OTHER DEALINGS IN THE SOFTWARE. > + * > + */ > + > +#ifndef __VF_ERROR_H__ > +#define __VF_ERROR_H__ > + > +#define AMDGIM_ERROR_CODE_FLAGS_TO_MAILBOX(c,f) (((c & 0xFFFF) > << 16) | (f & 0xFFFF)) > +#define AMDGIM_ERROR_CODE(t,c) (((t&0xF)<<12)|(c&0xFFF)) > +#define AMDGIM_ERROR_CATAGORY(c) ((c>>12) & 0xF) > +#define AMDGIM_ERROR_SUBCODE(c) (c&0xFFF) > +#define AMDGIM_GPU_ERROR_MSG_SIZE 256 /* The length of > error text should be less than this number. */ > +#define AMDGIM_GPU_ERROR_BUF_SAFE_SIZE > (AMDGIM_GPU_ERROR_MSG_SIZE + 120) /* The safe length of printing out > the full error message to buffer. */ > + > + > +/* Please keep enum same as AMD GIM driver */ > +enum { Please use named enums. Some compilers complain about anonymous enums. > + AMDGIM_ERROR_VF_GPU_INIT_FATAL_FAIL = 0, > + AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, > + AMDGIM_ERROR_VF_UNLOCATE_BIOS_ROM, > + AMDGIM_ERROR_VF_NO_VBIOS, > + AMDGIM_ERROR_VF_GPU_POST_ERROR, > + > + AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, > + AMDGIM_ERROR_VF_FENCE_INIT_FAIL, > + AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, > + AMDGIM_ERROR_VF_IB_INIT_FAIL, > + AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, > + > + AMDGIM_ERROR_VF_ASIC_RESUME_FAIL, > + AMDGIM_ERROR_VF_GPU_RESET_FAIL, > + AMDGIM_ERROR_VF_MMSCH_INIT_FAIL, > + AMDGIM_ERROR_VF_UVD_NORESP_GIVEUP, > + AMDGIM_ERROR_VF_UVD_NORESP_RESET, > + > + AMDGIM_ERROR_VF_LOAD_GFX_FIRMWARE_FAIL, > + AMDGIM_ERROR_VF_MEC_BO_INIT_FAIL, > + AMDGIM_ERROR_VF_ADD_DEV_TO_GENPD_FAIL, > + AMDGIM_ERROR_VF_IH_WB_ALLOC_FAIL, > + AMDGIM_ERROR_VF_BO_ALLOC_K_FAIL, > + > + AMDGIM_ERROR_VF_BO_RESERVE_FAIL, > + AMDGIM_ERROR_VF_BO_PIN_FAIL, > + AMDGIM_ERROR_VF_BO_MAP_FAIL, > + AMDGIM_ERROR_VF_RING_R_WB_ALLOC_FAIL, > + AMDGIM_ERROR_VF_RING_W_WB_ALLOC_FAIL, > + > + AMDGIM_ERROR_VF_RING_F_WB_ALLOC_FAIL, > + AMDGIM_ERROR_VF_RING_C_WB_ALLOC_FAIL, > + AMDGIM_ERROR_VF_INIT_FENCE_FAIL, > + AMDGIM_ERROR_VF_RING_CREATE_FAIL, > + AMDGIM_ERROR_VF_BO_ALLOC_M_FAIL, > + > + AMDGIM_ERROR_VF_NO_BO_FOR_SA, > + AMDGIM_ERROR_VF_FW_ALLOC_FAIL, > + AMDGIM_ERROR_VF_FW_RESERVE_FAIL, > + AMDGIM_ERROR_VF_FW_PIN_FAIL, > + AMDGIM_ERROR_VF_FW_KMAP_FAIL, > + > + AMDGIM_ERROR_VF_UVD_NOT_LOAD_FW, > + AMDGIM_ERROR_VF_UVD_NOT_VALIDATE_FW, > + AMDGIM_ERROR_VF_ALLOC_UVD_BO_FAIL, > + AMDGIM_ERROR_VF_VCE_NOT_LOAD_FW, > + AMDGIM_ERROR_VF_VCE_NOT_VALIDATE_FW, > + > + AMDGIM_ERROR_VF_ALLOC_VCE_BO_FAIL, > + AMDGIM_ERROR_VF_VCE_RESERVE_FAIL, > + AMDGIM_ERROR_VF_VCE_KMAP_FAIL, > + AMDGIM_ERROR_VF_NO_VRAM_FOR_GART, > + AMDGIM_ERROR_VF_PSP_LOAD_FW_FAIL, > + > + AMDGIM_ERROR_VF_INIT_MMSCH_FAIL, > + AMDGIM_ERROR_VF_GFX_LOAD_FW_FAIL, > + AMDGIM_ERROR_VF_NGG_CREATE_BUF_FAIL, > + AMDGIM_ERROR_VF_NGG_CREATE_PR_BUF_FAIL, > + AMDGIM_ERROR_VF_NGG_CREATE_PO_BUF_FAIL, > + > + AMDGIM_ERROR_VF_NGG_CREATE_CS_BUF_FAIL, > + AMDGIM_ERROR_VF_NGG_CREATE_PC_BUF_FAIL, > + AMDGIM_ERROR_VF_BUFL_SIZE_INVALID, > + AMDGIM_ERROR_VF_RLC_BO_INIT_FAIL, > + > + AMDGIM_ERROR_VF_TEST, > + AMDGIM_ERROR_VF_MAX > +}; > + > +enum { Same here. > + AMDGIM_ERROR_CATEGORY_NON_USED = 0, > + AMDGIM_ERROR_CATEGORY_GIM, > + AMDGIM_ERROR_CATEGORY_PF, > + AMDGIM_ERROR_CATEGORY_VF, > + AMDGIM_ERROR_CATEGORY_VBIOS, > + AMDGIM_ERROR_CATEGORY_MONITOR, > + > + AMDGIM_ERROR_CATEGORY_MAX > +}; > + > +void amdgpu_vf_error_put(uint16_t sub_error_code, uint16_t error_flags, > uint64_t error_data); > +void amdgpu_vf_error_trans_all (struct amdgpu_device *adev); > + > +#endif /* __VF_ERROR_H__ */ > -- > 2.11.0