[AMD Official Use Only - AMD Internal Distribution Only] > -----Original Message----- > From: amd-gfx <amd-gfx-bounces@xxxxxxxxxxxxxxxxxxxxx> On Behalf Of Tony Yi > Sent: Thursday, February 27, 2025 11:12 PM > To: Yi, Tony <Tony.Yi@xxxxxxx>; Skvortsov, Victor <Victor.Skvortsov@xxxxxxx>; > amd-gfx@xxxxxxxxxxxxxxxxxxxxx; Zhang, Hawking <Hawking.Zhang@xxxxxxx>; Luo, > Zhigang <Zhigang.Luo@xxxxxxx> > Cc: Yi, Tony <Tony.Yi@xxxxxxx> > Subject: [PATCH 2/2] drm/amdgpu: Add support for CPERs on virtualization > > Add support for CPERs on VFs. > > VFs do not receive PMFW messages directly; as such, they need to query them > from the host. To avoid hitting host event guard, CPER queries need to be rate > limited. CPER queries share the same RAS telemetry buffer as error count query, so > a mutex protecting the shared buffer was added as well. > > For readability, the amdgpu_detect_virtualization was refactored into multiple > individual functions. > > Signed-off-by: Tony Yi <Tony.Yi@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 7 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 31 ++++- > drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 138 ++++++++++++++++++++- > drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 18 ++- > drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c | 14 +++ > 5 files changed, 195 insertions(+), 13 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > index 5e1d8f0039d0..198d29faa754 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > @@ -3099,7 +3099,8 @@ static int amdgpu_device_ip_init(struct amdgpu_device > *adev) > > amdgpu_fru_get_product_info(adev); > > - r = amdgpu_cper_init(adev); > + if (!amdgpu_sriov_vf(adev) || amdgpu_sriov_ras_cper_en(adev)) > + r = amdgpu_cper_init(adev); > > init_failed: > > @@ -4335,10 +4336,8 @@ int amdgpu_device_init(struct amdgpu_device *adev, > * for throttling interrupt) = 60 seconds. > */ > ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1); > - ratelimit_state_init(&adev->virt.ras_telemetry_rs, 5 * HZ, 1); > > ratelimit_set_flags(&adev->throttling_logging_rs, > RATELIMIT_MSG_ON_RELEASE); > - ratelimit_set_flags(&adev->virt.ras_telemetry_rs, > RATELIMIT_MSG_ON_RELEASE); > > /* Registers mapping */ > /* TODO: block userspace mapping of io register */ @@ -4370,7 +4369,7 > @@ int amdgpu_device_init(struct amdgpu_device *adev, > return -ENOMEM; > > /* detect hw virtualization here */ > - amdgpu_detect_virtualization(adev); > + amdgpu_virt_init(adev); > > amdgpu_device_get_pcie_info(adev); > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c > index 81a7d4faac9c..d55c8b7fdb59 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c > @@ -578,12 +578,32 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, > char __user *buf, > return result; > } > > +static ssize_t amdgpu_debugfs_virt_ring_read(struct file *f, char __user *buf, > + size_t size, loff_t *pos) > +{ > + struct amdgpu_ring *ring = file_inode(f)->i_private; > + > + if (*pos & 3 || size & 3) > + return -EINVAL; > + > + if (ring->funcs->type == AMDGPU_RING_TYPE_CPER) > + amdgpu_virt_req_ras_cper_dump(ring->adev, false); > + > + return amdgpu_debugfs_ring_read(f, buf, size, pos); } > + > static const struct file_operations amdgpu_debugfs_ring_fops = { > .owner = THIS_MODULE, > .read = amdgpu_debugfs_ring_read, > .llseek = default_llseek > }; > > +static const struct file_operations amdgpu_debugfs_virt_ring_fops = { > + .owner = THIS_MODULE, > + .read = amdgpu_debugfs_virt_ring_read, > + .llseek = default_llseek > +}; > + > static ssize_t amdgpu_debugfs_mqd_read(struct file *f, char __user *buf, > size_t size, loff_t *pos) > { > @@ -671,9 +691,14 @@ void amdgpu_debugfs_ring_init(struct amdgpu_device > *adev, > char name[32]; > > sprintf(name, "amdgpu_ring_%s", ring->name); > - debugfs_create_file_size(name, S_IFREG | 0444, root, ring, > - &amdgpu_debugfs_ring_fops, > - ring->ring_size + 12); > + if (amdgpu_sriov_vf(adev)) > + debugfs_create_file_size(name, S_IFREG | 0444, root, ring, > + &amdgpu_debugfs_virt_ring_fops, > + ring->ring_size + 12); > + else > + debugfs_create_file_size(name, S_IFREG | 0444, root, ring, > + &amdgpu_debugfs_ring_fops, > + ring->ring_size + 12); > > if (ring->mqd_obj) { > sprintf(name, "amdgpu_mqd_%s", ring->name); diff --git > a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c > index e6f0152e5b08..3832513ec7bf 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c > @@ -739,7 +739,7 @@ void amdgpu_virt_exchange_data(struct amdgpu_device > *adev) > } > } > > -void amdgpu_detect_virtualization(struct amdgpu_device *adev) > +static u32 amdgpu_virt_init_detect_asic(struct amdgpu_device *adev) > { > uint32_t reg; > > @@ -775,8 +775,17 @@ void amdgpu_detect_virtualization(struct amdgpu_device > *adev) > adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE; > } > > + return reg; > +} > + > +static bool amdgpu_virt_init_req_data(struct amdgpu_device *adev, u32 > +reg) { > + bool is_sriov = false; > + > /* we have the ability to check now */ > if (amdgpu_sriov_vf(adev)) { > + is_sriov = true; > + > switch (adev->asic_type) { > case CHIP_TONGA: > case CHIP_FIJI: > @@ -805,10 +814,39 @@ void amdgpu_detect_virtualization(struct amdgpu_device > *adev) > amdgpu_virt_request_init_data(adev); > break; > default: /* other chip doesn't support SRIOV */ > + is_sriov = false; > DRM_ERROR("Unknown asic type: %d!\n", adev- > >asic_type); > break; > } > } > + > + return is_sriov; > +} > + > +static void amdgpu_virt_init_ras(struct amdgpu_device *adev) { > + ratelimit_state_init(&adev->virt.ras.ras_error_cnt_rs, 5 * HZ, 1); > + ratelimit_state_init(&adev->virt.ras.ras_cper_dump_rs, 5 * HZ, 1); > + > + ratelimit_set_flags(&adev->virt.ras.ras_error_cnt_rs, > + RATELIMIT_MSG_ON_RELEASE); > + ratelimit_set_flags(&adev->virt.ras.ras_cper_dump_rs, > + RATELIMIT_MSG_ON_RELEASE); > + > + mutex_init(&adev->virt.ras.ras_telemetry_mutex); > + > + adev->virt.ras.cper_rptr = 0; > +} > + > +void amdgpu_virt_init(struct amdgpu_device *adev) { > + bool is_sriov = false; > + uint32_t reg = amdgpu_virt_init_detect_asic(adev); > + > + is_sriov = amdgpu_virt_init_req_data(adev, reg); > + > + if (is_sriov) > + amdgpu_virt_init_ras(adev); > } > > static bool amdgpu_virt_access_debugfs_is_mmio(struct amdgpu_device *adev) > @@ -1288,10 +1326,12 @@ static int > amdgpu_virt_req_ras_err_count_internal(struct amdgpu_device *adev, bo > * will ignore incoming guest messages. Ratelimit the guest messages to > * prevent guest self DOS. > */ > - if (__ratelimit(&adev->virt.ras_telemetry_rs) || force_update) { > + if (__ratelimit(&virt->ras.ras_error_cnt_rs) || force_update) { > + mutex_lock(&virt->ras.ras_telemetry_mutex); > if (!virt->ops->req_ras_err_count(adev)) > amdgpu_virt_cache_host_error_counts(adev, > - adev->virt.fw_reserve.ras_telemetry); > + virt->fw_reserve.ras_telemetry); > + mutex_unlock(&virt->ras.ras_telemetry_mutex); > } > > return 0; > @@ -1322,6 +1362,98 @@ int amdgpu_virt_req_ras_err_count(struct > amdgpu_device *adev, enum amdgpu_ras_bl > return 0; > } > > +static int > +amdgpu_virt_write_cpers_to_ring(struct amdgpu_device *adev, > + struct amdsriov_ras_telemetry *host_telemetry, > + u32 *more) > +{ > + struct amd_sriov_ras_cper_dump *cper_dump = NULL; > + struct cper_hdr *entry = NULL; > + struct amdgpu_ring *ring = &adev->cper.ring_buf; > + uint32_t checksum, used_size, i, j; > + int ret = 0; > + > + checksum = host_telemetry->header.checksum; > + used_size = host_telemetry->header.used_size; > + > + if (used_size > (AMD_SRIOV_RAS_TELEMETRY_SIZE_KB << 10)) > + return 0; > + > + cper_dump = kmemdup(&host_telemetry->body.cper_dump, used_size, > GFP_KERNEL); > + if (!cper_dump) > + return -ENOMEM; > + > + if (checksum != amd_sriov_msg_checksum(cper_dump, used_size, 0, 0)) > + goto out; > + > + *more = cper_dump->more; > + > + if (cper_dump->wptr < adev->virt.ras.cper_rptr) { > + dev_warn( > + adev->dev, > + "guest specified rptr that was too high! guest rptr: 0x%llx, host > rptr: 0x%llx\n", > + adev->virt.ras.cper_rptr, cper_dump->wptr); > + > + adev->virt.ras.cper_rptr = cper_dump->wptr; > + goto out; > + } > + > + entry = (struct cper_hdr *)&cper_dump->buf[0]; > + > + for (i = 0; i < cper_dump->count; i++) { > + amdgpu_cper_ring_write(ring, entry, entry->record_length); > + entry = (struct cper_hdr *)((char *)entry + > + entry->record_length); > + } > + > + if (cper_dump->overflow_count) > + dev_warn(adev->dev, > + "host reported CPER overflow of 0x%llx entries!\n", > + cper_dump->overflow_count); > + > + adev->virt.ras.cper_rptr = cper_dump->wptr; > +out: > + kfree(cper_dump); > + > + return ret; > +} > + > +static int amdgpu_virt_req_ras_cper_dump_internal(struct amdgpu_device > +*adev) { > + struct amdgpu_virt *virt = &adev->virt; > + int ret = 0; > + uint32_t more = 0; > + > + if (!amdgpu_sriov_ras_cper_en(adev)) > + return -EOPNOTSUPP; > + > + do { > + if (!virt->ops->req_ras_cper_dump(adev, virt->ras.cper_rptr)) > + ret = amdgpu_virt_write_cpers_to_ring( > + adev, virt->fw_reserve.ras_telemetry, &more); > + else > + ret = 0; > + } while (more); > + > + return ret; > +} > + > +int amdgpu_virt_req_ras_cper_dump(struct amdgpu_device *adev, bool > +force_update) { > + struct amdgpu_virt *virt = &adev->virt; > + int ret = 0; > + > + if ((__ratelimit(&virt->ras.ras_cper_dump_rs) || force_update) && > + down_read_trylock(&adev->reset_domain->sem)) { > + mutex_lock(&virt->ras.ras_telemetry_mutex); > + ret = amdgpu_virt_req_ras_cper_dump_internal(adev); > + mutex_unlock(&virt->ras.ras_telemetry_mutex); > + up_read(&adev->reset_domain->sem); > + } > + > + return ret; > +} > + > int amdgpu_virt_ras_telemetry_post_reset(struct amdgpu_device *adev) { > unsigned long ue_count, ce_count; > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h > index 0f3ccae5c1ab..9f65487e60f5 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h > @@ -96,6 +96,7 @@ struct amdgpu_virt_ops { > enum amdgpu_ras_block block); > bool (*rcvd_ras_intr)(struct amdgpu_device *adev); > int (*req_ras_err_count)(struct amdgpu_device *adev); > + int (*req_ras_cper_dump)(struct amdgpu_device *adev, u64 vf_rptr); > }; > > /* > @@ -140,6 +141,7 @@ enum AMDGIM_FEATURE_FLAG { > AMDGIM_FEATURE_MES_INFO_ENABLE = (1 << 8), > AMDGIM_FEATURE_RAS_CAPS = (1 << 9), > AMDGIM_FEATURE_RAS_TELEMETRY = (1 << 10), > + AMDGIM_FEATURE_RAS_CPER = (1 << 11), > }; > > enum AMDGIM_REG_ACCESS_FLAG { > @@ -242,6 +244,13 @@ struct amdgpu_virt_ras_err_handler_data { > int last_reserved; > }; > > +struct amdgpu_virt_ras { > + struct ratelimit_state ras_error_cnt_rs; > + struct ratelimit_state ras_cper_dump_rs; > + struct mutex ras_telemetry_mutex; > + uint64_t cper_rptr; > +}; > + > /* GPU virtualization */ > struct amdgpu_virt { > uint32_t caps; > @@ -284,8 +293,7 @@ struct amdgpu_virt { > > union amd_sriov_ras_caps ras_en_caps; > union amd_sriov_ras_caps ras_telemetry_en_caps; > - > - struct ratelimit_state ras_telemetry_rs; > + struct amdgpu_virt_ras ras; > struct amd_sriov_ras_telemetry_error_count count_cache; }; > > @@ -340,6 +348,9 @@ struct amdgpu_video_codec_info; #define > amdgpu_sriov_ras_telemetry_block_en(adev, sriov_blk) \ > (amdgpu_sriov_ras_telemetry_en((adev)) && (adev)->virt.ras_telemetry_en_caps.all > & BIT(sriov_blk)) > > +#define amdgpu_sriov_ras_cper_en(adev) \ ((adev)->virt.gim_feature & > +AMDGIM_FEATURE_RAS_CPER) > + > static inline bool is_virtual_machine(void) { #if defined(CONFIG_X86) @@ -378,7 > +389,7 @@ void amdgpu_virt_release_ras_err_handler_data(struct amdgpu_device > *adev); void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev); void > amdgpu_virt_exchange_data(struct amdgpu_device *adev); void > amdgpu_virt_fini_data_exchange(struct amdgpu_device *adev); -void > amdgpu_detect_virtualization(struct amdgpu_device *adev); > +void amdgpu_virt_init(struct amdgpu_device *adev); > > bool amdgpu_virt_can_access_debugfs(struct amdgpu_device *adev); int > amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev); @@ -406,6 > +417,7 @@ u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, > u32 v, u32 f bool amdgpu_virt_get_ras_capability(struct amdgpu_device *adev); int > amdgpu_virt_req_ras_err_count(struct amdgpu_device *adev, enum > amdgpu_ras_block block, > struct ras_err_data *err_data); > +int amdgpu_virt_req_ras_cper_dump(struct amdgpu_device *adev, bool > +force_update); > int amdgpu_virt_ras_telemetry_post_reset(struct amdgpu_device *adev); bool > amdgpu_virt_ras_telemetry_block_en(struct amdgpu_device *adev, > enum amdgpu_ras_block block); > diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c > b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c > index 4dcb72d1bdda..5aadf24cb202 100644 > --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c > +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c > @@ -184,6 +184,9 @@ static int > xgpu_nv_send_access_requests_with_param(struct amdgpu_device *adev, > case IDH_REQ_RAS_ERROR_COUNT: > event = IDH_RAS_ERROR_COUNT_READY; > break; > + case IDH_REQ_RAS_CPER_DUMP: > + event = IDH_RAS_CPER_DUMP_READY; > + break; > default: > break; > } > @@ -467,6 +470,16 @@ static int xgpu_nv_req_ras_err_count(struct > amdgpu_device *adev) > return xgpu_nv_send_access_requests(adev, > IDH_REQ_RAS_ERROR_COUNT); } > > +static int xgpu_nv_req_ras_cper_dump(struct amdgpu_device *adev, u64 > +vf_rptr) { > + uint32_t vf_rptr_hi, vf_rptr_lo; > + > + vf_rptr_hi = (uint32_t)(vf_rptr >> 32); > + vf_rptr_lo = (uint32_t)(vf_rptr & 0xFFFFFFFF); > + return xgpu_nv_send_access_requests_with_param( > + adev, IDH_REQ_RAS_CPER_DUMP, vf_rptr_hi, vf_rptr_lo, 0); [Tao] so the cper info will be sent to host? Can host handle cper work by itself? } > + > const struct amdgpu_virt_ops xgpu_nv_virt_ops = { > .req_full_gpu = xgpu_nv_request_full_gpu_access, > .rel_full_gpu = xgpu_nv_release_full_gpu_access, > @@ -478,4 +491,5 @@ const struct amdgpu_virt_ops xgpu_nv_virt_ops = { > .ras_poison_handler = xgpu_nv_ras_poison_handler, > .rcvd_ras_intr = xgpu_nv_rcvd_ras_intr, > .req_ras_err_count = xgpu_nv_req_ras_err_count, > + .req_ras_cper_dump = xgpu_nv_req_ras_cper_dump, > }; > -- > 2.34.1