On Fri, Oct 28, 2022 at 6:48 PM Brian Norris <briannorris@xxxxxxxxxxxx> wrote: > > If there are multiple amdgpu devices, this list processing can be racy. > > We're really treating this like a per-device list, so make that explicit > and remove the global list. > > Signed-off-by: Brian Norris <briannorris@xxxxxxxxxxxx> @Kuehling, Felix @Kim, Jonathan can you take a look at this patch? Thanks, Alex > --- > > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 ++++ > drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c | 12 +++++------- > 2 files changed, 9 insertions(+), 7 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > index 0e6ddf05c23c..e968b7f2417c 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > @@ -1063,6 +1063,10 @@ struct amdgpu_device { > struct work_struct reset_work; > > bool job_hang; > + > +#if IS_ENABLED(CONFIG_PERF_EVENTS) > + struct list_head pmu_list; > +#endif > }; > > static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev) > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c > index 71ee361d0972..24f2055a2f23 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c > @@ -23,6 +23,7 @@ > > #include <linux/perf_event.h> > #include <linux/init.h> > +#include <linux/list.h> > #include "amdgpu.h" > #include "amdgpu_pmu.h" > > @@ -72,9 +73,6 @@ static ssize_t amdgpu_pmu_event_show(struct device *dev, > amdgpu_pmu_attr->event_str, amdgpu_pmu_attr->type); > } > > -static LIST_HEAD(amdgpu_pmu_list); > - > - > struct amdgpu_pmu_attr { > const char *name; > const char *config; > @@ -558,7 +556,7 @@ static int init_pmu_entry_by_type_and_add(struct amdgpu_pmu_entry *pmu_entry, > pr_info("Detected AMDGPU %d Perf Events.\n", total_num_events); > > > - list_add_tail(&pmu_entry->entry, &amdgpu_pmu_list); > + list_add_tail(&pmu_entry->entry, &pmu_entry->adev->pmu_list); > > return 0; > err_register: > @@ -579,9 +577,7 @@ void amdgpu_pmu_fini(struct amdgpu_device *adev) > { > struct amdgpu_pmu_entry *pe, *temp; > > - list_for_each_entry_safe(pe, temp, &amdgpu_pmu_list, entry) { > - if (pe->adev != adev) > - continue; > + list_for_each_entry_safe(pe, temp, &adev->pmu_list, entry) { > list_del(&pe->entry); > perf_pmu_unregister(&pe->pmu); > kfree(pe->pmu.attr_groups); > @@ -623,6 +619,8 @@ int amdgpu_pmu_init(struct amdgpu_device *adev) > int ret = 0; > struct amdgpu_pmu_entry *pmu_entry, *pmu_entry_df; > > + INIT_LIST_HEAD(&adev->pmu_list); > + > switch (adev->asic_type) { > case CHIP_VEGA20: > pmu_entry_df = create_pmu_entry(adev, AMDGPU_PMU_PERF_TYPE_DF, > -- > 2.38.1.273.g43a17bfeac-goog >