Typo in the patch description. s/interafce/interface/ On 5/9/19 10:24 AM, Alex Deucher wrote: > On Thu, May 9, 2019 at 6:31 AM Pan, Xinhui <Xinhui.Pan@xxxxxxx> wrote: >> add badpages node. >> it will output badpages list in format >> gpu pfn : gpu page size : flags >> >> example >> 0x00000000 : 0x00001000 : R >> 0x00000001 : 0x00001000 : R >> 0x00000002 : 0x00001000 : R >> 0x00000003 : 0x00001000 : R >> 0x00000004 : 0x00001000 : R >> 0x00000005 : 0x00001000 : R >> 0x00000006 : 0x00001000 : R >> 0x00000007 : 0x00001000 : P >> 0x00000008 : 0x00001000 : P >> 0x00000009 : 0x00001000 : P >> >> flags can be one of below characters >> R: reserved. >> P: pending for reserve. >> F: failed to reserve for some reasons. >> >> Signed-off-by: xinhui pan <xinhui.pan@xxxxxxx> > Reviewed-by: Alex Deucher <alexander.deucher@xxxxxxx> > >> --- >> drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 146 ++++++++++++++++++++++++ >> drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 1 + >> 2 files changed, 147 insertions(+) >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c >> index c60d5f813801..c9e24f60938e 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c >> @@ -90,6 +90,12 @@ struct ras_manager { >> struct ras_err_data err_data; >> }; >> >> +struct ras_badpage { >> + unsigned int bp; >> + unsigned int size; >> + unsigned int flags; >> +}; >> + >> const char *ras_error_string[] = { >> "none", >> "parity", >> @@ -710,6 +716,77 @@ int amdgpu_ras_query_error_count(struct amdgpu_device *adev, >> >> /* sysfs begin */ >> >> +static int amdgpu_ras_badpages_read(struct amdgpu_device *adev, >> + struct ras_badpage **bps, unsigned int *count); >> + >> +static char *amdgpu_ras_badpage_flags_str(unsigned int flags) >> +{ >> + switch (flags) { >> + case 0: >> + return "R"; >> + case 1: >> + return "P"; >> + case 2: >> + default: >> + return "F"; >> + }; >> +} >> + >> +/* >> + * DOC: ras sysfs gpu_vram_bad_pages interface >> + * >> + * It allows user to read the bad pages of vram on the gpu through >> + * /sys/class/drm/card[0/1/2...]/device/ras/gpu_vram_bad_pages >> + * >> + * It outputs multiple lines, and each line stands for one gpu page. >> + * >> + * The format of one line is below, >> + * gpu pfn : gpu page size : flags >> + * >> + * gpu pfn and gpu page size are printed in hex format. >> + * flags can be one of below character, >> + * R: reserved, this gpu page is reserved and not able to use. >> + * P: pending for reserve, this gpu page is marked as bad, will be reserved >> + * in next window of page_reserve. >> + * F: unable to reserve. this gpu page can't be reserved due to some reasons. >> + * >> + * examples: >> + * 0x00000001 : 0x00001000 : R >> + * 0x00000002 : 0x00001000 : P >> + */ >> + >> +static ssize_t amdgpu_ras_sysfs_badpages_read(struct file *f, >> + struct kobject *kobj, struct bin_attribute *attr, >> + char *buf, loff_t ppos, size_t count) >> +{ >> + struct amdgpu_ras *con = >> + container_of(attr, struct amdgpu_ras, badpages_attr); >> + struct amdgpu_device *adev = con->adev; >> + const unsigned int element_size = >> + sizeof("0xabcdabcd : 0x12345678 : R\n") - 1; >> + unsigned int start = (ppos + element_size - 1) / element_size; >> + unsigned int end = (ppos + count - 1) / element_size; >> + ssize_t s = 0; >> + struct ras_badpage *bps = NULL; >> + unsigned int bps_count = 0; >> + >> + memset(buf, 0, count); >> + >> + if (amdgpu_ras_badpages_read(adev, &bps, &bps_count)) >> + return 0; >> + >> + for (; start < end && start < bps_count; start++) >> + s += scnprintf(&buf[s], element_size + 1, >> + "0x%08x : 0x%08x : %1s\n", >> + bps[start].bp, >> + bps[start].size, >> + amdgpu_ras_badpage_flags_str(bps[start].flags)); >> + >> + kfree(bps); >> + >> + return s; >> +} >> + >> static ssize_t amdgpu_ras_sysfs_features_read(struct device *dev, >> struct device_attribute *attr, char *buf) >> { >> @@ -750,9 +827,14 @@ static int amdgpu_ras_sysfs_create_feature_node(struct amdgpu_device *adev) >> &con->features_attr.attr, >> NULL >> }; >> + struct bin_attribute *bin_attrs[] = { >> + &con->badpages_attr, >> + NULL >> + }; >> struct attribute_group group = { >> .name = "ras", >> .attrs = attrs, >> + .bin_attrs = bin_attrs, >> }; >> >> con->features_attr = (struct device_attribute) { >> @@ -762,7 +844,19 @@ static int amdgpu_ras_sysfs_create_feature_node(struct amdgpu_device *adev) >> }, >> .show = amdgpu_ras_sysfs_features_read, >> }; >> + >> + con->badpages_attr = (struct bin_attribute) { >> + .attr = { >> + .name = "gpu_vram_bad_pages", >> + .mode = S_IRUGO, >> + }, >> + .size = 0, >> + .private = NULL, >> + .read = amdgpu_ras_sysfs_badpages_read, >> + }; >> + >> sysfs_attr_init(attrs[0]); >> + sysfs_bin_attr_init(bin_attrs[0]); >> >> return sysfs_create_group(&adev->dev->kobj, &group); >> } >> @@ -774,9 +868,14 @@ static int amdgpu_ras_sysfs_remove_feature_node(struct amdgpu_device *adev) >> &con->features_attr.attr, >> NULL >> }; >> + struct bin_attribute *bin_attrs[] = { >> + &con->badpages_attr, >> + NULL >> + }; >> struct attribute_group group = { >> .name = "ras", >> .attrs = attrs, >> + .bin_attrs = bin_attrs, >> }; >> >> sysfs_remove_group(&adev->dev->kobj, &group); >> @@ -1108,6 +1207,53 @@ static int amdgpu_ras_interrupt_remove_all(struct amdgpu_device *adev) >> /* ih end */ >> >> /* recovery begin */ >> + >> +/* return 0 on success. >> + * caller need free bps. >> + */ >> +static int amdgpu_ras_badpages_read(struct amdgpu_device *adev, >> + struct ras_badpage **bps, unsigned int *count) >> +{ >> + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); >> + struct ras_err_handler_data *data; >> + int i = 0; >> + int ret = 0; >> + >> + if (!con || !con->eh_data || !bps || !count) >> + return -EINVAL; >> + >> + mutex_lock(&con->recovery_lock); >> + data = con->eh_data; >> + if (!data || data->count == 0) { >> + *bps = NULL; >> + goto out; >> + } >> + >> + *bps = kmalloc(sizeof(struct ras_badpage) * data->count, GFP_KERNEL); >> + if (!*bps) { >> + ret = -ENOMEM; >> + goto out; >> + } >> + >> + for (; i < data->count; i++) { >> + (*bps)[i] = (struct ras_badpage){ >> + .bp = data->bps[i].bp, >> + .size = AMDGPU_GPU_PAGE_SIZE, >> + .flags = 0, >> + }; >> + >> + if (data->last_reserved <= i) >> + (*bps)[i].flags = 1; >> + else if (data->bps[i].bo == NULL) >> + (*bps)[i].flags = 2; >> + } >> + >> + *count = data->count; >> +out: >> + mutex_unlock(&con->recovery_lock); >> + return ret; >> +} >> + >> static void amdgpu_ras_do_recovery(struct work_struct *work) >> { >> struct amdgpu_ras *ras = >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h >> index 065c66baf947..e2dff00b8d1c 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h >> @@ -93,6 +93,7 @@ struct amdgpu_ras { >> struct dentry *ent; >> /* sysfs */ >> struct device_attribute features_attr; >> + struct bin_attribute badpages_attr; >> /* block array */ >> struct ras_manager *objs; >> >> -- >> 2.17.1 >> >> _______________________________________________ >> amd-gfx mailing list >> amd-gfx@xxxxxxxxxxxxxxxxxxxxx >> https://nam04.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&data=02%7C01%7C%7Cae69f0d4b0df49f576e008d6d4926c9c%7C84df9e7fe9f640afb435aaaaaaaaaaaa%7C1%7C0%7C636930122677737006&sdata=2kIXvUHQfN4G%2BJ0b7FqX4A1x7qXyTIlv3WMErXUtpgY%3D&reserved=0 > _______________________________________________ > amd-gfx mailing list > amd-gfx@xxxxxxxxxxxxxxxxxxxxx > https://nam04.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&data=02%7C01%7C%7Cae69f0d4b0df49f576e008d6d4926c9c%7C84df9e7fe9f640afb435aaaaaaaaaaaa%7C1%7C0%7C636930122677747017&sdata=Evfrt6pS%2B3mEGF7jH6PCzkc0Y8UArseY4GJuhr702Rs%3D&reserved=0 _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx