[AMD Official Use Only - Internal Distribution Only] From: Alex Deucher <alexdeucher@xxxxxxxxx>
Sent: Friday, May 22, 2020 11:16 PM To: Wang, Kevin(Yang) <Kevin1.Wang@xxxxxxx> Cc: amd-gfx list <amd-gfx@xxxxxxxxxxxxxxxxxxxxx>; Deucher, Alexander <Alexander.Deucher@xxxxxxx>; Zhang, Hawking <Hawking.Zhang@xxxxxxx> Subject: Re: [PATCH] drm/amdgpu: fix device attribute node create failed with multi gpu On Fri, May 22, 2020 at 10:57 AM Kevin Wang <kevin1.wang@xxxxxxx> wrote:
> > the origin design will use varible of "attr->states" to save node > supported states on current gpu device, but for multi gpu device, when > probe second gpu device, the driver will check attribute node states > from previous gpu device wthether to create attribute node. > it will cause other gpu device create attribute node faild. > > 1. add member attr_list into amdgpu_device to link supported device attribute node. > 2. add new structure "struct amdgpu_device_attr_entry{}" to track device attribute state. > > fix: > drm/amdgpu: optimize amdgpu device attribute code > > Signed-off-by: Kevin Wang <kevin1.wang@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + > drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 75 +++++++++++++++----------- > drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h | 13 +++-- > 3 files changed, 52 insertions(+), 37 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > index bfce0931f9c1..b84146339ea3 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > @@ -995,6 +995,7 @@ struct amdgpu_device { > char serial[16]; > > struct amdgpu_autodump autodump; > + struct list_head attr_list; Might want to call this pm_attr_list or even move this to the amdgpu_pm struct, but either way, assuming you've tested this with multiple GPUs: Reviewed-by: Alex Deucher <alexander.deucher@xxxxxxx> [kevin]: thanks, the patch v2 will fix it.
and this patch is test passed on local witl multi gpu.
> };
> > static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev) > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c > index 55815b899942..ac99aa0a16a8 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c > @@ -1725,7 +1725,7 @@ static struct amdgpu_device_attr amdgpu_device_attrs[] = { > }; > > static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_attr *attr, > - uint32_t mask) > + uint32_t mask, enum amdgpu_device_attr_states *states) > { > struct device_attribute *dev_attr = &attr->dev_attr; > const char *attr_name = dev_attr->attr.name; > @@ -1733,7 +1733,7 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_ > enum amd_asic_type asic_type = adev->asic_type; > > if (!(attr->flags & mask)) { > - attr->states = ATTR_STATE_UNSUPPORTED; > + *states = ATTR_STATE_UNSUPPORTED; > return 0; > } > > @@ -1741,34 +1741,34 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_ > > if (DEVICE_ATTR_IS(pp_dpm_socclk)) { > if (asic_type <= CHIP_VEGA10) > - attr->states = ATTR_STATE_UNSUPPORTED; > + *states = ATTR_STATE_UNSUPPORTED; > } else if (DEVICE_ATTR_IS(pp_dpm_dcefclk)) { > if (asic_type <= CHIP_VEGA10 || asic_type == CHIP_ARCTURUS) > - attr->states = ATTR_STATE_UNSUPPORTED; > + *states = ATTR_STATE_UNSUPPORTED; > } else if (DEVICE_ATTR_IS(pp_dpm_fclk)) { > if (asic_type < CHIP_VEGA20) > - attr->states = ATTR_STATE_UNSUPPORTED; > + *states = ATTR_STATE_UNSUPPORTED; > } else if (DEVICE_ATTR_IS(pp_dpm_pcie)) { > if (asic_type == CHIP_ARCTURUS) > - attr->states = ATTR_STATE_UNSUPPORTED; > + *states = ATTR_STATE_UNSUPPORTED; > } else if (DEVICE_ATTR_IS(pp_od_clk_voltage)) { > - attr->states = ATTR_STATE_UNSUPPORTED; > + *states = ATTR_STATE_UNSUPPORTED; > if ((is_support_sw_smu(adev) && adev->smu.od_enabled) || > (!is_support_sw_smu(adev) && hwmgr->od_enabled)) > - attr->states = ATTR_STATE_UNSUPPORTED; > + *states = ATTR_STATE_UNSUPPORTED; > } else if (DEVICE_ATTR_IS(mem_busy_percent)) { > if (adev->flags & AMD_IS_APU || asic_type == CHIP_VEGA10) > - attr->states = ATTR_STATE_UNSUPPORTED; > + *states = ATTR_STATE_UNSUPPORTED; > } else if (DEVICE_ATTR_IS(pcie_bw)) { > /* PCIe Perf counters won't work on APU nodes */ > if (adev->flags & AMD_IS_APU) > - attr->states = ATTR_STATE_UNSUPPORTED; > + *states = ATTR_STATE_UNSUPPORTED; > } else if (DEVICE_ATTR_IS(unique_id)) { > if (!adev->unique_id) > - attr->states = ATTR_STATE_UNSUPPORTED; > + *states = ATTR_STATE_UNSUPPORTED; > } else if (DEVICE_ATTR_IS(pp_features)) { > if (adev->flags & AMD_IS_APU || asic_type <= CHIP_VEGA10) > - attr->states = ATTR_STATE_UNSUPPORTED; > + *states = ATTR_STATE_UNSUPPORTED; > } > > if (asic_type == CHIP_ARCTURUS) { > @@ -1794,22 +1794,24 @@ static int amdgpu_device_attr_create(struct amdgpu_device *adev, > int ret = 0; > struct device_attribute *dev_attr = &attr->dev_attr; > const char *name = dev_attr->attr.name; > + enum amdgpu_device_attr_states attr_states = ATTR_STATE_SUPPORTED; > + struct amdgpu_device_attr_entry *attr_entry; > + > int (*attr_update)(struct amdgpu_device *adev, struct amdgpu_device_attr *attr, > - uint32_t mask) = default_attr_update; > + uint32_t mask, enum amdgpu_device_attr_states *states) = default_attr_update; > > BUG_ON(!attr); > > attr_update = attr->attr_update ? attr_update : default_attr_update; > > - ret = attr_update(adev, attr, mask); > + ret = attr_update(adev, attr, mask, &attr_states); > if (ret) { > dev_err(adev->dev, "failed to update device file %s, ret = %d\n", > name, ret); > return ret; > } > > - /* the attr->states maybe changed after call attr->attr_update function */ > - if (attr->states == ATTR_STATE_UNSUPPORTED) > + if (attr_states == ATTR_STATE_UNSUPPORTED) > return 0; > > ret = device_create_file(adev->dev, dev_attr); > @@ -1818,7 +1820,14 @@ static int amdgpu_device_attr_create(struct amdgpu_device *adev, > name, ret); > } > > - attr->states = ATTR_STATE_SUPPORTED; > + attr_entry = kmalloc(sizeof(*attr_entry), GFP_KERNEL); > + if (!attr_entry) > + return -ENOMEM; > + > + attr_entry->attr = attr; > + INIT_LIST_HEAD(&attr_entry->entry); > + > + list_add_tail(&attr_entry->entry, &adev->attr_list); > > return ret; > } > @@ -1827,14 +1836,12 @@ static void amdgpu_device_attr_remove(struct amdgpu_device *adev, struct amdgpu_ > { > struct device_attribute *dev_attr = &attr->dev_attr; > > - if (attr->states == ATTR_STATE_UNSUPPORTED) > - return; > - > device_remove_file(adev->dev, dev_attr); > - > - attr->states = ATTR_STATE_UNSUPPORTED; > } > > +static void amdgpu_device_attr_remove_groups(struct amdgpu_device *adev, > + struct list_head *attr_list); > + > static int amdgpu_device_attr_create_groups(struct amdgpu_device *adev, > struct amdgpu_device_attr *attrs, > uint32_t counts, > @@ -1852,20 +1859,24 @@ static int amdgpu_device_attr_create_groups(struct amdgpu_device *adev, > return 0; > > failed: > - while (i--) > - amdgpu_device_attr_remove(adev, &attrs[i]); > + amdgpu_device_attr_remove_groups(adev, &adev->attr_list); > > return ret; > } > > static void amdgpu_device_attr_remove_groups(struct amdgpu_device *adev, > - struct amdgpu_device_attr *attrs, > - uint32_t counts) > + struct list_head *attr_list) > { > - uint32_t i = 0; > + struct amdgpu_device_attr_entry *entry, *entry_tmp; > > - for (i = 0; i < counts; i++) > - amdgpu_device_attr_remove(adev, &attrs[i]); > + if (list_empty(&adev->attr_list)) > + return ; > + > + list_for_each_entry_safe(entry, entry_tmp, attr_list, entry) { > + amdgpu_device_attr_remove(adev, entry->attr); > + list_del(&entry->entry); > + kfree(entry); > + } > } > > static ssize_t amdgpu_hwmon_show_temp(struct device *dev, > @@ -3276,6 +3287,8 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) > if (adev->pm.dpm_enabled == 0) > return 0; > > + INIT_LIST_HEAD(&adev->attr_list); > + > adev->pm.int_hwmon_dev = hwmon_device_register_with_groups(adev->dev, > DRIVER_NAME, adev, > hwmon_groups); > @@ -3319,9 +3332,7 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev) > if (adev->pm.int_hwmon_dev) > hwmon_device_unregister(adev->pm.int_hwmon_dev); > > - amdgpu_device_attr_remove_groups(adev, > - amdgpu_device_attrs, > - ARRAY_SIZE(amdgpu_device_attrs)); > + amdgpu_device_attr_remove_groups(adev, &adev->attr_list); > } > > void amdgpu_pm_compute_clocks(struct amdgpu_device *adev) > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h > index 48e8086baf33..d9ae2b49a402 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h > @@ -47,10 +47,14 @@ enum amdgpu_device_attr_states { > struct amdgpu_device_attr { > struct device_attribute dev_attr; > enum amdgpu_device_attr_flags flags; > - enum amdgpu_device_attr_states states; > - int (*attr_update)(struct amdgpu_device *adev, > - struct amdgpu_device_attr* attr, > - uint32_t mask); > + int (*attr_update)(struct amdgpu_device *adev, struct amdgpu_device_attr *attr, > + uint32_t mask, enum amdgpu_device_attr_states *states); > + > +}; > + > +struct amdgpu_device_attr_entry { > + struct list_head entry; > + struct amdgpu_device_attr *attr; > }; > > #define to_amdgpu_device_attr(_dev_attr) \ > @@ -59,7 +63,6 @@ struct amdgpu_device_attr { > #define __AMDGPU_DEVICE_ATTR(_name, _mode, _show, _store, _flags, ...) \ > { .dev_attr = __ATTR(_name, _mode, _show, _store), \ > .flags = _flags, \ > - .states = ATTR_STATE_SUPPORTED, \ > ##__VA_ARGS__, } > > #define AMDGPU_DEVICE_ATTR(_name, _mode, _flags, ...) \ > -- > 2.17.1 > > _______________________________________________ > amd-gfx mailing list > amd-gfx@xxxxxxxxxxxxxxxxxxxxx > https://nam11.safelinks.protection.outlook.com/?url=""> |
_______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx