On Tue, Jan 8, 2019 at 11:02 AM Kuehling, Felix <Felix.Kuehling@xxxxxxx> wrote: > > > On 2019-01-08 6:28 a.m., Russell, Kent wrote: > > Add a sysfs file that reports the number of bytes transmitted and > > received in the last second. This can be used to approximate the PCIe > > bandwidth usage over the last second. > > > > v2: Clarify use of mps as estimation of bandwidth > > I think you only updated the code comment, but not the output in sysfs. > > More generally, most amdgpu sysfs files report a single value, or a set > of values separated by spaces. That makes it easier to parse by external > tools. It's also supposed to be a stable API. They don't usually include > human readable text such as "bytes received:". It would be up to a tool > such as rocm-smi to turn this into human-readable text. > > In keeping with that convention I'd suggest reporting the PCIe bandwidth > as three numbers: > > <recv count> <send count> <mps> > > Then let the tool calculate the bytes estimate with the appropriate > warnings about how accurate that is. > > Alex does this match your understanding regarding sysfs conventions? Yes, agreed. Alex > > Regards, > Felix > > > v3: Don't make the file on APUs > > > > Signed-off-by: Kent Russell <kent.russell@xxxxxxx> > > --- > > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 ++++ > > drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 38 +++++++++++++++++++++++++++++ > > drivers/gpu/drm/amd/amdgpu/cik.c | 41 +++++++++++++++++++++++++++++++ > > drivers/gpu/drm/amd/amdgpu/si.c | 41 +++++++++++++++++++++++++++++++ > > drivers/gpu/drm/amd/amdgpu/soc15.c | 44 ++++++++++++++++++++++++++++++++++ > > drivers/gpu/drm/amd/amdgpu/vi.c | 41 +++++++++++++++++++++++++++++++ > > 6 files changed, 209 insertions(+) > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > > index e1b2c64..512b124 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > > @@ -542,6 +542,9 @@ struct amdgpu_asic_funcs { > > bool (*need_full_reset)(struct amdgpu_device *adev); > > /* initialize doorbell layout for specific asic*/ > > void (*init_doorbell_index)(struct amdgpu_device *adev); > > + /* PCIe bandwidth usage */ > > + void (*get_pcie_usage)(struct amdgpu_device *adev, uint64_t *count0, > > + uint64_t *count1); > > }; > > > > /* > > @@ -1045,6 +1048,7 @@ int emu_soc_asic_init(struct amdgpu_device *adev); > > #define amdgpu_asic_invalidate_hdp(adev, r) (adev)->asic_funcs->invalidate_hdp((adev), (r)) > > #define amdgpu_asic_need_full_reset(adev) (adev)->asic_funcs->need_full_reset((adev)) > > #define amdgpu_asic_init_doorbell_index(adev) (adev)->asic_funcs->init_doorbell_index((adev)) > > +#define amdgpu_asic_get_pcie_usage(adev, cnt0, cnt1) ((adev)->asic_funcs->get_pcie_usage((adev), (cnt0), (cnt1))) > > > > /* Common functions */ > > bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev); > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c > > index 6896dec..d2b29fd 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c > > @@ -990,6 +990,33 @@ static ssize_t amdgpu_get_busy_percent(struct device *dev, > > return snprintf(buf, PAGE_SIZE, "%d\n", value); > > } > > > > +/** > > + * DOC: pcie_bw > > + * > > + * The amdgpu driver provides a sysfs API for reading how much data > > + * has been sent and received by the GPU in the last second through PCIe. > > + * The file pcie_bw is used for this. > > + * The Perf counters calculate this and return the number of sent and received > > + * messages, which we multiply by the maxsize of our PCIe packets (mps). > > + * Note that it is not possible to easily and quickly obtain the size of each > > + * packet transmitted, so we use the max payload size (mps) to estimate the > > + * PCIe bandwidth usage > > + */ > > +static ssize_t amdgpu_get_pcie_bw(struct device *dev, > > + struct device_attribute *attr, > > + char *buf) > > +{ > > + struct drm_device *ddev = dev_get_drvdata(dev); > > + struct amdgpu_device *adev = ddev->dev_private; > > + uint64_t mps = pcie_get_mps(adev->pdev); > > + uint64_t count0, count1; > > + > > + amdgpu_asic_get_pcie_usage(adev, &count0, &count1); > > + return snprintf(buf, PAGE_SIZE, > > + "Bytes received: %llu\nBytes sent: %llu\n", > > + count0 * mps, count1 * mps); > > +} > > + > > static DEVICE_ATTR(power_dpm_state, S_IRUGO | S_IWUSR, amdgpu_get_dpm_state, amdgpu_set_dpm_state); > > static DEVICE_ATTR(power_dpm_force_performance_level, S_IRUGO | S_IWUSR, > > amdgpu_get_dpm_forced_performance_level, > > @@ -1025,6 +1052,7 @@ static DEVICE_ATTR(pp_od_clk_voltage, S_IRUGO | S_IWUSR, > > amdgpu_set_pp_od_clk_voltage); > > static DEVICE_ATTR(gpu_busy_percent, S_IRUGO, > > amdgpu_get_busy_percent, NULL); > > +static DEVICE_ATTR(pcie_bw, S_IRUGO, amdgpu_get_pcie_bw, NULL); > > > > static ssize_t amdgpu_hwmon_show_temp(struct device *dev, > > struct device_attribute *attr, > > @@ -2108,6 +2136,14 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) > > "gpu_busy_level\n"); > > return ret; > > } > > + /* PCIe Perf counters won't work on APU nodes */ > > + if (adev->flags & !AMD_IS_APU) { > > + ret = device_create_file(adev->dev, &dev_attr_pcie_bw); > > + if (ret) { > > + DRM_ERROR("failed to create device file pcie_bw\n"); > > + return ret; > > + } > > + } > > ret = amdgpu_debugfs_pm_init(adev); > > if (ret) { > > DRM_ERROR("Failed to register debugfs file for dpm!\n"); > > @@ -2147,6 +2183,8 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev) > > device_remove_file(adev->dev, > > &dev_attr_pp_od_clk_voltage); > > device_remove_file(adev->dev, &dev_attr_gpu_busy_percent); > > + if (adev->flags & !AMD_IS_APU) > > + device_remove_file(adev->dev, &dev_attr_pcie_bw); > > } > > > > void amdgpu_pm_compute_clocks(struct amdgpu_device *adev) > > diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c > > index 71c50d8..8681744 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/cik.c > > +++ b/drivers/gpu/drm/amd/amdgpu/cik.c > > @@ -1741,6 +1741,46 @@ static bool cik_need_full_reset(struct amdgpu_device *adev) > > return true; > > } > > > > +static void cik_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0, > > + uint64_t *count1) > > +{ > > + uint32_t perfctr = 0; > > + uint64_t cnt0_of, cnt1_of; > > + int tmp; > > + > > + /* Set the 2 events that we wish to watch, defined above */ > > + /* Reg 40 is # received msgs, Reg 104 is # of posted requests sent */ > > + perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT0_SEL, 40); > > + perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT1_SEL, 104); > > + > > + /* Write to enable desired perf counters */ > > + WREG32_PCIE(ixPCIE_PERF_CNTL_TXCLK, perfctr); > > + /* Zero out and enable the perf counters > > + * Write 0x5: > > + * Bit 0 = Start all counters(1) > > + * Bit 2 = Global counter reset enable(1) > > + */ > > + WREG32_PCIE(ixPCIE_PERF_COUNT_CNTL, 0x00000005); > > + > > + msleep(1000); > > + > > + /* Load the shadow and disable the perf counters > > + * Write 0x2: > > + * Bit 0 = Stop counters(0) > > + * Bit 1 = Load the shadow counters(1) > > + */ > > + WREG32_PCIE(ixPCIE_PERF_COUNT_CNTL, 0x00000002); > > + > > + /* Read register values to get any >32bit overflow */ > > + tmp = RREG32_PCIE(ixPCIE_PERF_CNTL_TXCLK); > > + cnt0_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER0_UPPER); > > + cnt1_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER1_UPPER); > > + > > + /* Get the values and add the overflow */ > > + *count0 = RREG32_PCIE(ixPCIE_PERF_COUNT0_TXCLK) | (cnt0_of << 32); > > + *count1 = RREG32_PCIE(ixPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32); > > +} > > + > > static const struct amdgpu_asic_funcs cik_asic_funcs = > > { > > .read_disabled_bios = &cik_read_disabled_bios, > > @@ -1756,6 +1796,7 @@ static const struct amdgpu_asic_funcs cik_asic_funcs = > > .invalidate_hdp = &cik_invalidate_hdp, > > .need_full_reset = &cik_need_full_reset, > > .init_doorbell_index = &legacy_doorbell_index_init, > > + .get_pcie_usage = &cik_get_pcie_usage, > > }; > > > > static int cik_common_early_init(void *handle) > > diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c > > index f8408f8..2b3eb0e 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/si.c > > +++ b/drivers/gpu/drm/amd/amdgpu/si.c > > @@ -1323,6 +1323,46 @@ static void si_set_pcie_lanes(struct amdgpu_device *adev, int lanes) > > WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl); > > } > > > > +static void si_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0, > > + uint64_t *count1) > > +{ > > + uint32_t perfctr = 0; > > + uint64_t cnt0_of, cnt1_of; > > + int tmp; > > + > > + /* Set the 2 events that we wish to watch, defined above */ > > + /* Reg 40 is # received msgs, Reg 104 is # of posted requests sent */ > > + perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT0_SEL, 40); > > + perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT1_SEL, 104); > > + > > + /* Write to enable desired perf counters */ > > + WREG32_PCIE(ixPCIE_PERF_CNTL_TXCLK, perfctr); > > + /* Zero out and enable the perf counters > > + * Write 0x5: > > + * Bit 0 = Start all counters(1) > > + * Bit 2 = Global counter reset enable(1) > > + */ > > + WREG32_PCIE(ixPCIE_PERF_COUNT_CNTL, 0x00000005); > > + > > + msleep(1000); > > + > > + /* Load the shadow and disable the perf counters > > + * Write 0x2: > > + * Bit 0 = Stop counters(0) > > + * Bit 1 = Load the shadow counters(1) > > + */ > > + WREG32_PCIE(ixPCIE_PERF_COUNT_CNTL, 0x00000002); > > + > > + /* Read register values to get any >32bit overflow */ > > + tmp = RREG32_PCIE(ixPCIE_PERF_CNTL_TXCLK); > > + cnt0_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER0_UPPER); > > + cnt1_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER1_UPPER); > > + > > + /* Get the values and add the overflow */ > > + *count0 = RREG32_PCIE(ixPCIE_PERF_COUNT0_TXCLK) | (cnt0_of << 32); > > + *count1 = RREG32_PCIE(ixPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32); > > +} > > + > > static const struct amdgpu_asic_funcs si_asic_funcs = > > { > > .read_disabled_bios = &si_read_disabled_bios, > > @@ -1339,6 +1379,7 @@ static const struct amdgpu_asic_funcs si_asic_funcs = > > .flush_hdp = &si_flush_hdp, > > .invalidate_hdp = &si_invalidate_hdp, > > .need_full_reset = &si_need_full_reset, > > + .get_pcie_usage = &si_get_pcie_usage, > > }; > > > > static uint32_t si_get_rev_id(struct amdgpu_device *adev) > > diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c > > index 8849b74..afd8391 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/soc15.c > > +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c > > @@ -43,6 +43,9 @@ > > #include "hdp/hdp_4_0_sh_mask.h" > > #include "smuio/smuio_9_0_offset.h" > > #include "smuio/smuio_9_0_sh_mask.h" > > +#include "nbio/nbio_7_0_default.h" > > +#include "nbio/nbio_7_0_sh_mask.h" > > +#include "nbio/nbio_7_0_smn.h" > > > > #include "soc15.h" > > #include "soc15_common.h" > > @@ -601,6 +604,45 @@ static bool soc15_need_full_reset(struct amdgpu_device *adev) > > /* change this when we implement soft reset */ > > return true; > > } > > +static void soc15_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0, > > + uint64_t *count1) > > +{ > > + uint32_t perfctr = 0; > > + uint64_t cnt0_of, cnt1_of; > > + int tmp; > > + > > + /* Set the 2 events that we wish to watch, defined above */ > > + /* Reg 40 is # received msgs, Reg 104 is # of posted requests sent */ > > + perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT0_SEL, 40); > > + perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT1_SEL, 104); > > + > > + /* Write to enable desired perf counters */ > > + WREG32_PCIE(smnPCIE_PERF_CNTL_TXCLK, perfctr); > > + /* Zero out and enable the perf counters > > + * Write 0x5: > > + * Bit 0 = Start all counters(1) > > + * Bit 2 = Global counter reset enable(1) > > + */ > > + WREG32_PCIE(smnPCIE_PERF_COUNT_CNTL, 0x00000005); > > + > > + msleep(1000); > > + > > + /* Load the shadow and disable the perf counters > > + * Write 0x2: > > + * Bit 0 = Stop counters(0) > > + * Bit 1 = Load the shadow counters(1) > > + */ > > + WREG32_PCIE(smnPCIE_PERF_COUNT_CNTL, 0x00000002); > > + > > + /* Read register values to get any >32bit overflow */ > > + tmp = RREG32_PCIE(smnPCIE_PERF_CNTL_TXCLK); > > + cnt0_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER0_UPPER); > > + cnt1_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER1_UPPER); > > + > > + /* Get the values and add the overflow */ > > + *count0 = RREG32_PCIE(smnPCIE_PERF_COUNT0_TXCLK) | (cnt0_of << 32); > > + *count1 = RREG32_PCIE(smnPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32); > > +} > > > > static const struct amdgpu_asic_funcs soc15_asic_funcs = > > { > > @@ -617,6 +659,7 @@ static const struct amdgpu_asic_funcs soc15_asic_funcs = > > .invalidate_hdp = &soc15_invalidate_hdp, > > .need_full_reset = &soc15_need_full_reset, > > .init_doorbell_index = &vega10_doorbell_index_init, > > + .get_pcie_usage = &soc15_get_pcie_usage, > > }; > > > > static const struct amdgpu_asic_funcs vega20_asic_funcs = > > @@ -634,6 +677,7 @@ static const struct amdgpu_asic_funcs vega20_asic_funcs = > > .invalidate_hdp = &soc15_invalidate_hdp, > > .need_full_reset = &soc15_need_full_reset, > > .init_doorbell_index = &vega20_doorbell_index_init, > > + .get_pcie_usage = &soc15_get_pcie_usage, > > }; > > > > static int soc15_common_early_init(void *handle) > > diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c > > index 03e7be5..f785565 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/vi.c > > +++ b/drivers/gpu/drm/amd/amdgpu/vi.c > > @@ -941,6 +941,46 @@ static bool vi_need_full_reset(struct amdgpu_device *adev) > > } > > } > > > > +static void vi_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0, > > + uint64_t *count1) > > +{ > > + uint32_t perfctr = 0; > > + uint64_t cnt0_of, cnt1_of; > > + int tmp; > > + > > + /* Set the 2 events that we wish to watch, defined above */ > > + /* Reg 40 is # received msgs, Reg 104 is # of posted requests sent */ > > + perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT0_SEL, 40); > > + perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT1_SEL, 104); > > + > > + /* Write to enable desired perf counters */ > > + WREG32_PCIE(ixPCIE_PERF_CNTL_TXCLK, perfctr); > > + /* Zero out and enable the perf counters > > + * Write 0x5: > > + * Bit 0 = Start all counters(1) > > + * Bit 2 = Global counter reset enable(1) > > + */ > > + WREG32_PCIE(ixPCIE_PERF_COUNT_CNTL, 0x00000005); > > + > > + msleep(1000); > > + > > + /* Load the shadow and disable the perf counters > > + * Write 0x2: > > + * Bit 0 = Stop counters(0) > > + * Bit 1 = Load the shadow counters(1) > > + */ > > + WREG32_PCIE(ixPCIE_PERF_COUNT_CNTL, 0x00000002); > > + > > + /* Read register values to get any >32bit overflow */ > > + tmp = RREG32_PCIE(ixPCIE_PERF_CNTL_TXCLK); > > + cnt0_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER0_UPPER); > > + cnt1_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER1_UPPER); > > + > > + /* Get the values and add the overflow */ > > + *count0 = RREG32_PCIE(ixPCIE_PERF_COUNT0_TXCLK) | (cnt0_of << 32); > > + *count1 = RREG32_PCIE(ixPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32); > > +} > > + > > static const struct amdgpu_asic_funcs vi_asic_funcs = > > { > > .read_disabled_bios = &vi_read_disabled_bios, > > @@ -956,6 +996,7 @@ static const struct amdgpu_asic_funcs vi_asic_funcs = > > .invalidate_hdp = &vi_invalidate_hdp, > > .need_full_reset = &vi_need_full_reset, > > .init_doorbell_index = &legacy_doorbell_index_init, > > + .get_pcie_usage = &vi_get_pcie_usage, > > }; > > > > #define CZ_REV_BRISTOL(rev) \ > _______________________________________________ > amd-gfx mailing list > amd-gfx@xxxxxxxxxxxxxxxxxxxxx > https://lists.freedesktop.org/mailman/listinfo/amd-gfx _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx