On Sat, Dec 9, 2017 at 6:09 AM, Felix Kuehling <Felix.Kuehling at amd.com> wrote: > From: Amber Lin <Amber.Lin at amd.com> > > For hardware blocks whose performance counters are accessed via MMIO > registers, KFD provides the support for those privileged blocks. IOMMU is > one of those privileged blocks. Most performance counter properties > required by Thunk are available at /sys/bus/event_source/devices/amd_iommu. > This patch adds properties to topology in KFD sysfs for information not > available in /sys/bus/event_source/devices/amd_iommu. They are shown at > /sys/devices/virtual/kfd/kfd/topology/nodes/0/perf/iommu/ formatted as > /sys/devices/virtual/kfd/kfd/topology/nodes/0/perf/<block>/<property>, i.e. > /sys/devices/virtual/kfd/kfd/topology/nodes/0/perf/iommu/max_concurrent. > For dGPUs, who don't have IOMMU, nothing appears under > /sys/devices/virtual/kfd/kfd/topology/nodes/0/perf. I don't feel comfortable with this patch. It seems to me you didn't have anywhere to put these counters so you just stuck them in a place the thunk already reads because it was "convenient" for you to do it. But, as you point out in a comment later, these counters have nothing to do with topology. So this just feels wrong and I would like to: a. get additional opinions on it. Christian ? Alex ? What do you think ? How the GPU's GFX counters are exposed ? b. Ask why not use IOCTL to get the counters ? btw, I tried to search for other drivers that do this (expose perf counters in sysfs) and didn't find any (it wasn't an exhaustive search so I may have missed). Thanks, Oded > > Signed-off-by: Amber Lin <Amber.Lin at amd.com> > Signed-off-by: Kent Russell <kent.russell at amd.com> > Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com> > --- > drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 116 +++++++++++++++++++++++++++++- > drivers/gpu/drm/amd/amdkfd/kfd_topology.h | 13 ++++ > 2 files changed, 127 insertions(+), 2 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c > index 7fe7ee0..52d20f5 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c > @@ -104,6 +104,7 @@ static void kfd_release_topology_device(struct kfd_topology_device *dev) > struct kfd_mem_properties *mem; > struct kfd_cache_properties *cache; > struct kfd_iolink_properties *iolink; > + struct kfd_perf_properties *perf; > > list_del(&dev->list); > > @@ -128,6 +129,13 @@ static void kfd_release_topology_device(struct kfd_topology_device *dev) > kfree(iolink); > } > > + while (dev->perf_props.next != &dev->perf_props) { > + perf = container_of(dev->perf_props.next, > + struct kfd_perf_properties, list); > + list_del(&perf->list); > + kfree(perf); > + } > + > kfree(dev); > } > > @@ -162,6 +170,7 @@ struct kfd_topology_device *kfd_create_topology_device( > INIT_LIST_HEAD(&dev->mem_props); > INIT_LIST_HEAD(&dev->cache_props); > INIT_LIST_HEAD(&dev->io_link_props); > + INIT_LIST_HEAD(&dev->perf_props); > > list_add_tail(&dev->list, device_list); > > @@ -328,6 +337,39 @@ static struct kobj_type cache_type = { > .sysfs_ops = &cache_ops, > }; > > +/****** Sysfs of Performance Counters ******/ > + > +struct kfd_perf_attr { > + struct kobj_attribute attr; > + uint32_t data; > +}; > + > +static ssize_t perf_show(struct kobject *kobj, struct kobj_attribute *attrs, > + char *buf) > +{ > + struct kfd_perf_attr *attr; > + > + buf[0] = 0; > + attr = container_of(attrs, struct kfd_perf_attr, attr); > + if (!attr->data) /* invalid data for PMC */ > + return 0; > + else > + return sysfs_show_32bit_val(buf, attr->data); > +} > + > +#define KFD_PERF_DESC(_name, _data) \ > +{ \ > + .attr = __ATTR(_name, 0444, perf_show, NULL), \ > + .data = _data, \ > +} > + > +static struct kfd_perf_attr perf_attr_iommu[] = { > + KFD_PERF_DESC(max_concurrent, 0), > + KFD_PERF_DESC(num_counters, 0), > + KFD_PERF_DESC(counter_ids, 0), > +}; > +/****************************************/ > + > static ssize_t node_show(struct kobject *kobj, struct attribute *attr, > char *buffer) > { > @@ -452,6 +494,7 @@ static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev) > struct kfd_iolink_properties *iolink; > struct kfd_cache_properties *cache; > struct kfd_mem_properties *mem; > + struct kfd_perf_properties *perf; > > if (dev->kobj_iolink) { > list_for_each_entry(iolink, &dev->io_link_props, list) > @@ -488,6 +531,16 @@ static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev) > dev->kobj_mem = NULL; > } > > + if (dev->kobj_perf) { > + list_for_each_entry(perf, &dev->perf_props, list) { > + kfree(perf->attr_group); > + perf->attr_group = NULL; > + } > + kobject_del(dev->kobj_perf); > + kobject_put(dev->kobj_perf); > + dev->kobj_perf = NULL; > + } > + > if (dev->kobj_node) { > sysfs_remove_file(dev->kobj_node, &dev->attr_gpuid); > sysfs_remove_file(dev->kobj_node, &dev->attr_name); > @@ -504,8 +557,10 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev, > struct kfd_iolink_properties *iolink; > struct kfd_cache_properties *cache; > struct kfd_mem_properties *mem; > + struct kfd_perf_properties *perf; > int ret; > - uint32_t i; > + uint32_t i, num_attrs; > + struct attribute **attrs; > > if (WARN_ON(dev->kobj_node)) > return -EEXIST; > @@ -534,6 +589,10 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev, > if (!dev->kobj_iolink) > return -ENOMEM; > > + dev->kobj_perf = kobject_create_and_add("perf", dev->kobj_node); > + if (!dev->kobj_perf) > + return -ENOMEM; > + > /* > * Creating sysfs files for node properties > */ > @@ -611,7 +670,33 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev, > if (ret < 0) > return ret; > i++; > -} > + } > + > + /* All hardware blocks have the same number of attributes. */ > + num_attrs = sizeof(perf_attr_iommu)/sizeof(struct kfd_perf_attr); > + list_for_each_entry(perf, &dev->perf_props, list) { > + perf->attr_group = kzalloc(sizeof(struct kfd_perf_attr) > + * num_attrs + sizeof(struct attribute_group), > + GFP_KERNEL); > + if (!perf->attr_group) > + return -ENOMEM; > + > + attrs = (struct attribute **)(perf->attr_group + 1); > + if (!strcmp(perf->block_name, "iommu")) { > + /* Information of IOMMU's num_counters and counter_ids is shown > + * under /sys/bus/event_source/devices/amd_iommu. We don't > + * duplicate here. > + */ > + perf_attr_iommu[0].data = perf->max_concurrent; > + for (i = 0; i < num_attrs; i++) > + attrs[i] = &perf_attr_iommu[i].attr.attr; > + } > + perf->attr_group->name = perf->block_name; > + perf->attr_group->attrs = attrs; > + ret = sysfs_create_group(dev->kobj_perf, perf->attr_group); > + if (ret < 0) > + return ret; > + } > > return 0; > } > @@ -778,6 +863,29 @@ static void find_system_memory(const struct dmi_header *dm, > } > } > } > + > +/* > + * Performance counters information is not part of CRAT but we would like to > + * put them in the sysfs under topology directory for Thunk to get the data. > + * This function is called before updating the sysfs. > + */ > +static int kfd_add_perf_to_topology(struct kfd_topology_device *kdev) > +{ > + struct kfd_perf_properties *props; > + > + if (amd_iommu_pc_supported()) { > + props = kfd_alloc_struct(props); > + if (!props) > + return -ENOMEM; > + strcpy(props->block_name, "iommu"); > + props->max_concurrent = amd_iommu_pc_get_max_banks(0) * > + amd_iommu_pc_get_max_counters(0); /* assume one iommu */ > + list_add_tail(&props->list, &kdev->perf_props); > + } > + > + return 0; > +} > + > /* kfd_add_non_crat_information - Add information that is not currently > * defined in CRAT but is necessary for KFD topology > * @dev - topology device to which addition info is added > @@ -860,6 +968,10 @@ int kfd_topology_init(void) > } > } > > + kdev = list_first_entry(&temp_topology_device_list, > + struct kfd_topology_device, list); > + kfd_add_perf_to_topology(kdev); > + > down_write(&topology_lock); > kfd_topology_update_device_list(&temp_topology_device_list, > &topology_device_list); > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h > index 55de56f..b9f3142 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h > @@ -134,6 +134,13 @@ struct kfd_iolink_properties { > struct attribute attr; > }; > > +struct kfd_perf_properties { > + struct list_head list; > + char block_name[16]; > + uint32_t max_concurrent; > + struct attribute_group *attr_group; > +}; > + > struct kfd_topology_device { > struct list_head list; > uint32_t gpu_id; > @@ -144,11 +151,13 @@ struct kfd_topology_device { > struct list_head cache_props; > uint32_t io_link_count; > struct list_head io_link_props; > + struct list_head perf_props; > struct kfd_dev *gpu; > struct kobject *kobj_node; > struct kobject *kobj_mem; > struct kobject *kobj_cache; > struct kobject *kobj_iolink; > + struct kobject *kobj_perf; > struct attribute attr_gpuid; > struct attribute attr_name; > struct attribute attr_props; > @@ -173,4 +182,8 @@ struct kfd_topology_device *kfd_create_topology_device( > struct list_head *device_list); > void kfd_release_topology_device_list(struct list_head *device_list); > > +extern bool amd_iommu_pc_supported(void); > +extern u8 amd_iommu_pc_get_max_banks(u16 devid); > +extern u8 amd_iommu_pc_get_max_counters(u16 devid); > + > #endif /* __KFD_TOPOLOGY_H__ */ > -- > 2.7.4 >