Enable NVLINK-C2C port filtering to distinguish traffic from different GPUs connected to NVLINK-C2C. Signed-off-by: Besar Wicaksono <bwicaksono@xxxxxxxxxx> --- Documentation/admin-guide/perf/nvidia-pmu.rst | 32 +++++++++++++++++++ drivers/perf/arm_cspmu/nvidia_cspmu.c | 7 ++-- 2 files changed, 36 insertions(+), 3 deletions(-) diff --git a/Documentation/admin-guide/perf/nvidia-pmu.rst b/Documentation/admin-guide/perf/nvidia-pmu.rst index 2e0d47cfe7ea..6d1d3206b4ad 100644 --- a/Documentation/admin-guide/perf/nvidia-pmu.rst +++ b/Documentation/admin-guide/perf/nvidia-pmu.rst @@ -86,6 +86,22 @@ Example usage: perf stat -a -e nvidia_nvlink_c2c0_pmu_3/event=0x0/ +The NVLink-C2C has two ports that can be connected to one GPU (occupying both +ports) or to two GPUs (one GPU per port). The user can use "port" bitmap +parameter to select the port(s) to monitor. Each bit represents the port number, +e.g. "port=0x1" corresponds to port 0 and "port=0x3" is for port 0 and 1. The +PMU will monitor both ports by default if not specified. + +Example for port filtering: + +* Count event id 0x0 from the GPU connected with socket 0 on port 0:: + + perf stat -a -e nvidia_nvlink_c2c0_pmu_0/event=0x0,port=0x1/ + +* Count event id 0x0 from the GPUs connected with socket 0 on port 0 and port 1:: + + perf stat -a -e nvidia_nvlink_c2c0_pmu_0/event=0x0,port=0x3/ + NVLink-C2C1 PMU ------------------- @@ -116,6 +132,22 @@ Example usage: perf stat -a -e nvidia_nvlink_c2c1_pmu_3/event=0x0/ +The NVLink-C2C has two ports that can be connected to one GPU (occupying both +ports) or to two GPUs (one GPU per port). The user can use "port" bitmap +parameter to select the port(s) to monitor. Each bit represents the port number, +e.g. "port=0x1" corresponds to port 0 and "port=0x3" is for port 0 and 1. The +PMU will monitor both ports by default if not specified. + +Example for port filtering: + +* Count event id 0x0 from the GPU connected with socket 0 on port 0:: + + perf stat -a -e nvidia_nvlink_c2c1_pmu_0/event=0x0,port=0x1/ + +* Count event id 0x0 from the GPUs connected with socket 0 on port 0 and port 1:: + + perf stat -a -e nvidia_nvlink_c2c1_pmu_0/event=0x0,port=0x3/ + CNVLink PMU --------------- diff --git a/drivers/perf/arm_cspmu/nvidia_cspmu.c b/drivers/perf/arm_cspmu/nvidia_cspmu.c index d1cd9975e71a..cd51177347e5 100644 --- a/drivers/perf/arm_cspmu/nvidia_cspmu.c +++ b/drivers/perf/arm_cspmu/nvidia_cspmu.c @@ -149,6 +149,7 @@ static struct attribute *pcie_pmu_format_attrs[] = { static struct attribute *nvlink_c2c_pmu_format_attrs[] = { ARM_CSPMU_FORMAT_EVENT_ATTR, + ARM_CSPMU_FORMAT_ATTR(port, "config1:0-1"), NULL, }; @@ -193,7 +194,7 @@ static u32 nv_cspmu_event_filter(const struct perf_event *event) const struct nv_cspmu_ctx *ctx = to_nv_cspmu_ctx(to_arm_cspmu(event->pmu)); - if (ctx->filter_mask == 0) + if (ctx->filter_mask == 0 || event->attr.config1 == 0) return ctx->filter_default_val; return event->attr.config1 & ctx->filter_mask; @@ -229,7 +230,7 @@ static const struct nv_cspmu_match nv_cspmu_match[] = { { .prodid = 0x104, .prodid_mask = NV_PRODID_MASK, - .filter_mask = 0x0, + .filter_mask = NV_NVL_C2C_FILTER_ID_MASK, .filter_default_val = NV_NVL_C2C_FILTER_ID_MASK, .name_pattern = "nvidia_nvlink_c2c1_pmu_%u", .name_fmt = NAME_FMT_SOCKET, @@ -239,7 +240,7 @@ static const struct nv_cspmu_match nv_cspmu_match[] = { { .prodid = 0x105, .prodid_mask = NV_PRODID_MASK, - .filter_mask = 0x0, + .filter_mask = NV_NVL_C2C_FILTER_ID_MASK, .filter_default_val = NV_NVL_C2C_FILTER_ID_MASK, .name_pattern = "nvidia_nvlink_c2c0_pmu_%u", .name_fmt = NAME_FMT_SOCKET, -- 2.34.1