Hi, On Thu, Jun 09, 2016 at 06:24:51PM -0700, Tai Nguyen wrote: > +#include <linux/acpi.h> > +#include <linux/efi.h> > +#include <linux/module.h> > +#include <linux/io.h> > +#include <linux/clk.h> > +#include <linux/of_platform.h> > +#include <linux/of_address.h> > +#include <linux/of_fdt.h> > +#include <linux/of_irq.h> > +#include <linux/interrupt.h> > +#include <linux/platform_device.h> > +#include <linux/perf_event.h> > +#include <linux/module.h> > +#include <linux/cpumask.h> > +#include <linux/slab.h> > +#include <linux/mfd/syscon.h> > +#include <linux/regmap.h> Minor nit: please sort these alphabetically (it makes scanning them by-eye easier and can help when there are conflicts). [...] > +static struct dev_ext_attribute l3c_pmu_format_attrs[] = { > + PMU_FORMAT_EXT_ATTR(l3c_eventid, "config:0-7"), > + PMU_FORMAT_EXT_ATTR(l3c_agentid, "config1:0-9"), > +}; > + > +static struct dev_ext_attribute iob_pmu_format_attrs[] = { > + PMU_FORMAT_EXT_ATTR(iob_eventid, "config:0-7"), > + PMU_FORMAT_EXT_ATTR(iob_agentid, "config1:0-63"), > +}; > + > +static struct dev_ext_attribute mcb_pmu_format_attrs[] = { > + PMU_FORMAT_EXT_ATTR(mcb_eventid, "config:0-5"), > + PMU_FORMAT_EXT_ATTR(mcb_agentid, "config1:0-9"), > +}; > + > +static struct dev_ext_attribute mc_pmu_format_attrs[] = { > + PMU_FORMAT_EXT_ATTR(mc_eventid, "config:0-28"), > +}; Please make these structures const. > +static struct attribute_group pmu_format_attr_group = { > + .name = "format", > + .attrs = NULL, /* Filled in xgene_pmu_alloc_attrs */ > +}; I think you should just have a (static const) format_attr_group for each class, which you reuse and share across instances, rather than bothering with dynamic allocation (which I think is broken -- more on that below). To save some pain, I think you can use a compound literal to initialise each of those in one go: static const struct attribute_group l3c_pmu_format_attr_group = { .name = "format", .attrs = (const dev_ext_attribute[]) { PMU_FORMAT_EXT_ATTR(l3c_eventid, "config:0-7"), PMU_FORMAT_EXT_ATTR(l3c_agentid, "config1:0-9"), }, }; You'll need a full pmu attr group for each class also, but you don't lose much by doing so. > + > +/* > + * sysfs event attributes > + */ > +static ssize_t _xgene_pmu_event_show(struct device *dev, > + struct device_attribute *attr, char *buf) > +{ > + struct dev_ext_attribute *eattr; > + > + eattr = container_of(attr, struct dev_ext_attribute, attr); > + return sprintf(buf, "config=0x%lx\n", (unsigned long)eattr->var); > +} > + > +#define PMU_EVENT_EXT_ATTR(_name, _config) \ > + PMU_EXT_ATTR(_name, _xgene_pmu_event_show, (unsigned long)_config) > + > +static struct dev_ext_attribute l3c_pmu_events_attrs[] = { > + PMU_EVENT_EXT_ATTR(cycle-count, 0x00), > + PMU_EVENT_EXT_ATTR(cycle-count-div-64, 0x01), > + PMU_EVENT_EXT_ATTR(read-hit, 0x02), > + PMU_EVENT_EXT_ATTR(read-miss, 0x03), > + PMU_EVENT_EXT_ATTR(write-need-replacement, 0x06), > + PMU_EVENT_EXT_ATTR(write-not-need-replacement, 0x07), > + PMU_EVENT_EXT_ATTR(tq-full, 0x08), > + PMU_EVENT_EXT_ATTR(ackq-full, 0x09), > + PMU_EVENT_EXT_ATTR(wdb-full, 0x0a), > + PMU_EVENT_EXT_ATTR(bank-fifo-full, 0x0b), > + PMU_EVENT_EXT_ATTR(odb-full, 0x0c), > + PMU_EVENT_EXT_ATTR(wbq-full, 0x0d), > + PMU_EVENT_EXT_ATTR(bank-conflict-fifo-issue, 0x0e), > + PMU_EVENT_EXT_ATTR(bank-fifo-issue, 0x0f), > +}; > + > +static struct dev_ext_attribute iob_pmu_events_attrs[] = { > + PMU_EVENT_EXT_ATTR(cycle-count, 0x00), > + PMU_EVENT_EXT_ATTR(cycle-count-div-64, 0x01), > + PMU_EVENT_EXT_ATTR(axi0-read, 0x02), > + PMU_EVENT_EXT_ATTR(axi0-read-partial, 0x03), > + PMU_EVENT_EXT_ATTR(axi1-read, 0x04), > + PMU_EVENT_EXT_ATTR(axi1-read-partial, 0x05), > + PMU_EVENT_EXT_ATTR(csw-read-block, 0x06), > + PMU_EVENT_EXT_ATTR(csw-read-partial, 0x07), > + PMU_EVENT_EXT_ATTR(axi0-write, 0x10), > + PMU_EVENT_EXT_ATTR(axi0-write-partial, 0x11), > + PMU_EVENT_EXT_ATTR(axi1-write, 0x13), > + PMU_EVENT_EXT_ATTR(axi1-write-partial, 0x14), > + PMU_EVENT_EXT_ATTR(csw-inbound-dirty, 0x16), > +}; > + > +static struct dev_ext_attribute mcb_pmu_events_attrs[] = { > + PMU_EVENT_EXT_ATTR(cycle-count, 0x00), > + PMU_EVENT_EXT_ATTR(cycle-count-div-64, 0x01), > + PMU_EVENT_EXT_ATTR(csw-read, 0x02), > + PMU_EVENT_EXT_ATTR(csw-write-request, 0x03), > + PMU_EVENT_EXT_ATTR(mcb-csw-stall, 0x04), > + PMU_EVENT_EXT_ATTR(cancel-read-gack, 0x05), > +}; > + > +static struct dev_ext_attribute mc_pmu_events_attrs[] = { > + PMU_EVENT_EXT_ATTR(cycle-count, 0x00), > + PMU_EVENT_EXT_ATTR(cycle-count-div-64, 0x01), > + PMU_EVENT_EXT_ATTR(act-cmd-sent, 0x02), > + PMU_EVENT_EXT_ATTR(pre-cmd-sent, 0x03), > + PMU_EVENT_EXT_ATTR(rd-cmd-sent, 0x04), > + PMU_EVENT_EXT_ATTR(rda-cmd-sent, 0x05), > + PMU_EVENT_EXT_ATTR(wr-cmd-sent, 0x06), > + PMU_EVENT_EXT_ATTR(wra-cmd-sent, 0x07), > + PMU_EVENT_EXT_ATTR(pde-cmd-sent, 0x08), > + PMU_EVENT_EXT_ATTR(sre-cmd-sent, 0x09), > + PMU_EVENT_EXT_ATTR(prea-cmd-sent, 0x0a), > + PMU_EVENT_EXT_ATTR(ref-cmd-sent, 0x0b), > + PMU_EVENT_EXT_ATTR(rd-rda-cmd-sent, 0x0c), > + PMU_EVENT_EXT_ATTR(wr-wra-cmd-sent, 0x0d), > + PMU_EVENT_EXT_ATTR(in-rd-collision, 0x0e), > + PMU_EVENT_EXT_ATTR(in-wr-collision, 0x0f), > + PMU_EVENT_EXT_ATTR(collision-queue-not-empty, 0x10), > + PMU_EVENT_EXT_ATTR(collision-queue-full, 0x11), > + PMU_EVENT_EXT_ATTR(mcu-request, 0x12), > + PMU_EVENT_EXT_ATTR(mcu-rd-request, 0x13), > + PMU_EVENT_EXT_ATTR(mcu-hp-rd-request, 0x14), > + PMU_EVENT_EXT_ATTR(mcu-wr-request, 0x15), > + PMU_EVENT_EXT_ATTR(mcu-rd-proceed-all, 0x16), > + PMU_EVENT_EXT_ATTR(mcu-rd-proceed-cancel, 0x17), > + PMU_EVENT_EXT_ATTR(mcu-rd-response, 0x18), > + PMU_EVENT_EXT_ATTR(mcu-rd-proceed-speculative-all, 0x19), > + PMU_EVENT_EXT_ATTR(mcu-rd-proceed-speculative-cancel, 0x1a), > + PMU_EVENT_EXT_ATTR(mcu-wr-proceed-all, 0x1b), > + PMU_EVENT_EXT_ATTR(mcu-wr-proceed-cancel, 0x1c), > +}; > + > +static struct attribute_group pmu_events_attr_group = { > + .name = "events", > + .attrs = NULL, /* Filled in xgene_pmu_alloc_attrs */ > +}; My comments for the format groups apply here too. > + > +/* > + * sysfs cpumask attributes > + */ > +static ssize_t xgene_pmu_cpumask_show(struct device *dev, > + struct device_attribute *attr, char *buf) > +{ > + struct xgene_pmu_dev *pmu_dev = to_pmu_dev(dev_get_drvdata(dev)); > + > + return cpumap_print_to_pagebuf(true, buf, &pmu_dev->parent->cpu); > +} > +static DEVICE_ATTR(cpumask, S_IRUGO, xgene_pmu_cpumask_show, NULL); > + > +static struct attribute *xgene_pmu_cpumask_attrs[] = { > + &dev_attr_cpumask.attr, > + NULL, > +}; > + > +static struct attribute_group pmu_cpumask_attr_group = { > + .attrs = xgene_pmu_cpumask_attrs, > +}; Please make these const. > + > +static const struct attribute_group *pmu_attr_groups[] = { > + &pmu_format_attr_group, > + &pmu_cpumask_attr_group, > + &pmu_events_attr_group, > + NULL > +}; As mentioned earlier, you'll need one of these per class. [...] > +static int xgene_perf_event_init(struct perf_event *event) > +{ > + struct xgene_pmu_dev *pmu_dev = to_pmu_dev(event->pmu); > + struct hw_perf_event *hwc = &event->hw; > + u64 config, config1; > + > + /* Test the event attr type check for PMU enumeration */ > + if (event->attr.type != event->pmu->type) > + return -ENOENT; > + > + /* > + * SOC PMU counters are shared across all cores. > + * Therefore, it does not support per-process mode. > + * Also, it does not support event sampling mode. > + */ > + if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK) > + return -EINVAL; > + > + /* SOC counters do not have usr/os/guest/host bits */ > + if (event->attr.exclude_user || event->attr.exclude_kernel || > + event->attr.exclude_host || event->attr.exclude_guest) > + return -EINVAL; > + > + if (event->cpu < 0) > + return -EINVAL; > + /* > + * Many perf core operations (eg. events rotation) operate on a > + * single CPU context. This is obvious for CPU PMUs, where one > + * expects the same sets of events being observed on all CPUs, > + * but can lead to issues for off-core PMUs, where each > + * event could be theoretically assigned to a different CPU. To > + * mitigate this, we enforce CPU assignment to one, selected > + * processor (the one described in the "cpumask" attribute). > + */ > + event->cpu = cpumask_first(&pmu_dev->parent->cpu); I didn't spot a perf_pmu_migrate_context call anywhere. Do you not plan to handle hotplug? > + > + config = event->attr.config; > + config1 = event->attr.config1; > + > + hwc->config = config; > + /* > + * Each bit of the config1 field represents an agent from which the > + * request of the event come. The event is counted only if it's caused > + * by a request of an agent has the bit set. > + * By default, the event is counted for all agents. > + */ > + if (config1) > + hwc->extra_reg.config = config1; > + else > + hwc->extra_reg.config = 0xFFFFFFFFFFFFFFFFULL; > + If you treated the bits as having the opposite meaning (i.e. a bit set means ignore an agent), then the zero default would not have to be special-cased here. Currently 0 and 0xFFFFFFFFFFFFFFFFULL mean the same thing, which is a little odd. You also need to verify the event grouping, e.g. avoiding cross-pmu groups. See what we do in the arm-ccn driver. [...] > + if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, count) > + != prev_raw_count) > + return; Please leave the '!=' dangling on the first line. It makes it clearer that there is more to the expression. [...] > +static struct attribute **alloc_attrs(struct device *dev, u32 n, > + struct dev_ext_attribute *dev_attr) > +{ > + struct attribute **attrs; > + int i; > + > + /* Alloc n + 1 (for terminating NULL) */ > + attrs = devm_kcalloc(dev, n + 1, sizeof(struct attribute *), > + GFP_KERNEL); > + if (!attrs) > + return attrs; > + > + for (i = 0; i < n; i++) > + attrs[i] = &dev_attr[i].attr.attr; > + return attrs; > +} > + > +static int > +xgene_pmu_alloc_attrs(struct device *dev, struct xgene_pmu_dev *pmu_dev) > +{ > + struct attribute **attrs; > + > + if (pmu_dev->nformat_attrs) { > + attrs = alloc_attrs(dev, pmu_dev->nformat_attrs, > + pmu_dev->format_attr); > + if (!attrs) > + return -ENOMEM; > + pmu_format_attr_group.attrs = attrs; > + } > + > + if (pmu_dev->nevents_attrs) { > + attrs = alloc_attrs(dev, pmu_dev->nevents_attrs, > + pmu_dev->events_attr); > + if (!attrs) > + return -ENOMEM; > + pmu_events_attr_group.attrs = attrs; > + } > + > + pmu_dev->attr_groups = pmu_attr_groups; > + > + return 0; > +} I don't see why we need to dynamically allocate anything, given we make a full copy of each of the existing arrays without modification. We should just be able to use them as-is. As I mentioned above, please just statically allocate the structures you need and share them. [...] > +static int > +xgene_pmu_dev_add(struct xgene_pmu *xgene_pmu, struct xgene_pmu_dev_ctx *ctx) > +{ > + struct device *dev = xgene_pmu->dev; > + struct xgene_pmu_dev *pmu; > + int rc; > + > + pmu = devm_kzalloc(dev, sizeof(*pmu), GFP_KERNEL); > + if (!pmu) > + return -ENOMEM; > + pmu->parent = xgene_pmu; > + pmu->inf = &ctx->inf; > + ctx->pmu_dev = pmu; > + > + switch (pmu->inf->type) { > + case PMU_TYPE_L3C: > + pmu->format_attr = l3c_pmu_format_attrs; > + pmu->nformat_attrs = ARRAY_SIZE(l3c_pmu_format_attrs); > + pmu->events_attr = l3c_pmu_events_attrs; > + pmu->nevents_attrs = ARRAY_SIZE(l3c_pmu_events_attrs); > + break; > + case PMU_TYPE_IOB: > + pmu->format_attr = iob_pmu_format_attrs; > + pmu->nformat_attrs = ARRAY_SIZE(iob_pmu_format_attrs); > + pmu->events_attr = iob_pmu_events_attrs; > + pmu->nevents_attrs = ARRAY_SIZE(iob_pmu_events_attrs); > + break; > + case PMU_TYPE_MCB: > + if (!(xgene_pmu->mcb_active_mask & pmu->inf->enable_mask)) > + goto dev_err; > + pmu->format_attr = mcb_pmu_format_attrs; > + pmu->nformat_attrs = ARRAY_SIZE(mcb_pmu_format_attrs); > + pmu->events_attr = mcb_pmu_events_attrs; > + pmu->nevents_attrs = ARRAY_SIZE(mcb_pmu_events_attrs); > + break; > + case PMU_TYPE_MC: > + if (!(xgene_pmu->mc_active_mask & pmu->inf->enable_mask)) > + goto dev_err; > + pmu->format_attr = mc_pmu_format_attrs; > + pmu->nformat_attrs = ARRAY_SIZE(mc_pmu_format_attrs); > + pmu->events_attr = mc_pmu_events_attrs; > + pmu->nevents_attrs = ARRAY_SIZE(mc_pmu_events_attrs); > + break; > + default: > + return -EINVAL; > + } > + > + rc = xgene_pmu_alloc_attrs(dev, pmu); > + if (rc) { > + dev_err(dev, "%s PMU: Failed to alloc attributes\n", ctx->name); > + goto dev_err; > + } > + > + rc = xgene_init_perf(pmu, ctx->name); > + if (rc) { > + dev_err(dev, "%s PMU: Failed to init perf driver\n", ctx->name); > + goto dev_err; > + } > + > + dev_info(dev, "%s PMU registered\n", ctx->name); > + > + /* All attribute allocations can be free'd after perf_register_pmu */ > + deallocate_attrs(dev, pmu); As far as I can see, that is not true. The core perf code doesn't make a copy of the attrs, it just continues to use them as-is. So this looks to be broken and very unsafe. Please get rid of the dynamic allocation and copying. [...] > +static irqreturn_t _xgene_pmu_isr(int irq, struct xgene_pmu_dev *pmu_dev) > +{ > + struct perf_event *event = NULL; > + struct perf_sample_data data; You never sample, so don't need perf_sample_data. > + struct xgene_pmu *xgene_pmu; > + struct hw_perf_event *hwc; > + int idx; > + u32 val; > + > + /* Get interrupt counter source */ > + val = readl(pmu_dev->inf->csr + PMU_PMOVSR); > + idx = ffs(val) - 1; bit 0 is never set? What if multiple counters overflow? You should iterate over all the counters which have overflown (as we do in the arm-ccn driver). > + if (!(val & PMU_OVERFLOW_MASK)) > + goto out; > + event = pmu_dev->pmu_counter_event[idx]; > + > + /* Ignore if we don't have an event. */ > + if (!event) > + goto out; > + > + hwc = &event->hw; > + > + xgene_perf_event_update(event, hwc, idx); > + perf_sample_data_init(&data, 0, hwc->last_period); You aren't sampling, so don't need this. > + if (!xgene_perf_event_set_period(event, hwc, idx)) > + goto out; > + > +out: > + /* Clear interrupt flag */ > + xgene_pmu = pmu_dev->parent; > + if (xgene_pmu->version == 1) It would be better to consistently use the PCP_PMU_V{1,2} defines. [...] > + version = (dev_id == PCP_PMU_V1) ? 1 : 2; As above, why not set version to the dev_id, and use the PCP_PMU_V* defines consistently when comparing xgene_pmu::version? > + irq = platform_get_irq(pdev, 0); > + if (irq < 0) { > + dev_err(&pdev->dev, "No IRQ resource\n"); > + rc = -EINVAL; > + goto err; > + } > + rc = devm_request_irq(&pdev->dev, irq, xgene_pmu_isr, IRQF_SHARED, > + dev_name(&pdev->dev), xgene_pmu); > + if (rc) { > + dev_err(&pdev->dev, "Could not request IRQ %d\n", irq); > + goto err; > + } > + /* Pick one core to use for cpumask attributes */ > + cpumask_set_cpu(smp_processor_id(), &xgene_pmu->cpu); You also need to ensure that the IRQ occurs on this CPU. Thanks, Mark. -- To unsubscribe from this list: send the line "unsubscribe devicetree" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html