> -----Original Message----- > From: Kuehling, Felix > Sent: Monday, January 07, 2019 2:54 PM > To: Russell, Kent <Kent.Russell@xxxxxxx>; amd-gfx@xxxxxxxxxxxxxxxxxxxxx > Subject: Re: [PATCH 2/2] drm/amdgpu: Add sysfs file for PCIe usage > > On 2019-01-04 7:35 a.m., Russell, Kent wrote: > > Add a sysfs file that reports the number of bytes transmitted and > > received in the last second. This can be used to approximate the PCIe > > bandwidth usage over the last second. > > > > Change-Id: I6c82fe1cb17726825736512990fa64f5c1489861 > > Signed-off-by: Kent Russell <kent.russell@xxxxxxx> > > --- > > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 ++++ > > drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 31 > ++++++++++++++++++++++++ > > drivers/gpu/drm/amd/amdgpu/cik.c | 41 > +++++++++++++++++++++++++++++++ > > drivers/gpu/drm/amd/amdgpu/si.c | 41 > +++++++++++++++++++++++++++++++ > > drivers/gpu/drm/amd/amdgpu/soc15.c | 44 > ++++++++++++++++++++++++++++++++++ > > drivers/gpu/drm/amd/amdgpu/vi.c | 41 > +++++++++++++++++++++++++++++++ > > 6 files changed, 202 insertions(+) > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > > b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > > index e1b2c64..512b124 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > > @@ -542,6 +542,9 @@ struct amdgpu_asic_funcs { > > bool (*need_full_reset)(struct amdgpu_device *adev); > > /* initialize doorbell layout for specific asic*/ > > void (*init_doorbell_index)(struct amdgpu_device *adev); > > + /* PCIe bandwidth usage */ > > + void (*get_pcie_usage)(struct amdgpu_device *adev, uint64_t > *count0, > > + uint64_t *count1); > > }; > > > > /* > > @@ -1045,6 +1048,7 @@ int emu_soc_asic_init(struct amdgpu_device > > *adev); #define amdgpu_asic_invalidate_hdp(adev, r) > > (adev)->asic_funcs->invalidate_hdp((adev), (r)) #define > > amdgpu_asic_need_full_reset(adev) > > (adev)->asic_funcs->need_full_reset((adev)) > > #define amdgpu_asic_init_doorbell_index(adev) > > (adev)->asic_funcs->init_doorbell_index((adev)) > > +#define amdgpu_asic_get_pcie_usage(adev, cnt0, cnt1) > > +((adev)->asic_funcs->get_pcie_usage((adev), (cnt0), (cnt1))) > > > > /* Common functions */ > > bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev); > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c > > index 1f61ed9..051307e 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c > > @@ -990,6 +990,30 @@ static ssize_t amdgpu_get_busy_percent(struct > device *dev, > > return snprintf(buf, PAGE_SIZE, "%d\n", value); } > > > > +/** > > + * DOC: pcie_bw > > + * > > + * The amdgpu driver provides a sysfs API for reading how much data > > + * has been sent and received by the GPU in the last second through PCIe. > > + * The file pcie_bw is used for this. > > + * The Perf counters calculate this and return the number of sent and > > +received > > + * messages, which we multiply by the size of our PCIe packets (mps). > > + */ > > +static ssize_t amdgpu_get_pcie_bw(struct device *dev, > > + struct device_attribute *attr, > > + char *buf) > > +{ > > + struct drm_device *ddev = dev_get_drvdata(dev); > > + struct amdgpu_device *adev = ddev->dev_private; > > + uint64_t mps = pcie_get_mps(adev->pdev); > > + uint64_t count0, count1; > > + > > + amdgpu_asic_get_pcie_usage(adev, &count0, &count1); > > + return snprintf(buf, PAGE_SIZE, > > + "Bytes received: %llu\nBytes sent: %llu\n", > > + count0 * mps, count1 * mps); > > This quietly assumes that all transfers use the maximum payload size. > > We should either state the assumption or report the count rather than a > number in bytes. > > Regards, > Felix Agreed, I have expanded upon the comment to clarify this in the upcoming v2 patch > > > > +} > > + > > static DEVICE_ATTR(power_dpm_state, S_IRUGO | S_IWUSR, > > amdgpu_get_dpm_state, amdgpu_set_dpm_state); static > DEVICE_ATTR(power_dpm_force_performance_level, S_IRUGO | S_IWUSR, > > amdgpu_get_dpm_forced_performance_level, > > @@ -1025,6 +1049,7 @@ static DEVICE_ATTR(pp_od_clk_voltage, S_IRUGO > | S_IWUSR, > > amdgpu_set_pp_od_clk_voltage); > > static DEVICE_ATTR(gpu_busy_percent, S_IRUGO, > > amdgpu_get_busy_percent, NULL); > > +static DEVICE_ATTR(pcie_bw, S_IRUGO, amdgpu_get_pcie_bw, NULL); > > > > static ssize_t amdgpu_hwmon_show_temp(struct device *dev, > > struct device_attribute *attr, @@ -2105,6 > +2130,11 @@ int > > amdgpu_pm_sysfs_init(struct amdgpu_device *adev) > > "gpu_busy_level\n"); > > return ret; > > } > > + ret = device_create_file(adev->dev, &dev_attr_pcie_bw); > > + if (ret) { > > + DRM_ERROR("failed to create device file pcie_bw\n"); > > + return ret; > > + } > > ret = amdgpu_debugfs_pm_init(adev); > > if (ret) { > > DRM_ERROR("Failed to register debugfs file for dpm!\n"); > @@ -2141,6 > > +2171,7 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev) > > device_remove_file(adev->dev, > > &dev_attr_pp_od_clk_voltage); > > device_remove_file(adev->dev, &dev_attr_gpu_busy_percent); > > + device_remove_file(adev->dev, &dev_attr_pcie_bw); > > } > > > > void amdgpu_pm_compute_clocks(struct amdgpu_device *adev) diff --git > > a/drivers/gpu/drm/amd/amdgpu/cik.c > b/drivers/gpu/drm/amd/amdgpu/cik.c > > index 71c50d8..8681744 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/cik.c > > +++ b/drivers/gpu/drm/amd/amdgpu/cik.c > > @@ -1741,6 +1741,46 @@ static bool cik_need_full_reset(struct > amdgpu_device *adev) > > return true; > > } > > > > +static void cik_get_pcie_usage(struct amdgpu_device *adev, uint64_t > *count0, > > + uint64_t *count1) > > +{ > > + uint32_t perfctr = 0; > > + uint64_t cnt0_of, cnt1_of; > > + int tmp; > > + > > + /* Set the 2 events that we wish to watch, defined above */ > > + /* Reg 40 is # received msgs, Reg 104 is # of posted requests sent */ > > + perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, > EVENT0_SEL, 40); > > + perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, > EVENT1_SEL, > > +104); > > + > > + /* Write to enable desired perf counters */ > > + WREG32_PCIE(ixPCIE_PERF_CNTL_TXCLK, perfctr); > > + /* Zero out and enable the perf counters > > + * Write 0x5: > > + * Bit 0 = Start all counters(1) > > + * Bit 2 = Global counter reset enable(1) > > + */ > > + WREG32_PCIE(ixPCIE_PERF_COUNT_CNTL, 0x00000005); > > + > > + msleep(1000); > > + > > + /* Load the shadow and disable the perf counters > > + * Write 0x2: > > + * Bit 0 = Stop counters(0) > > + * Bit 1 = Load the shadow counters(1) > > + */ > > + WREG32_PCIE(ixPCIE_PERF_COUNT_CNTL, 0x00000002); > > + > > + /* Read register values to get any >32bit overflow */ > > + tmp = RREG32_PCIE(ixPCIE_PERF_CNTL_TXCLK); > > + cnt0_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, > COUNTER0_UPPER); > > + cnt1_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, > COUNTER1_UPPER); > > + > > + /* Get the values and add the overflow */ > > + *count0 = RREG32_PCIE(ixPCIE_PERF_COUNT0_TXCLK) | (cnt0_of << > 32); > > + *count1 = RREG32_PCIE(ixPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << > 32); } > > + > > static const struct amdgpu_asic_funcs cik_asic_funcs = { > > .read_disabled_bios = &cik_read_disabled_bios, @@ -1756,6 +1796,7 > @@ > > static const struct amdgpu_asic_funcs cik_asic_funcs = > > .invalidate_hdp = &cik_invalidate_hdp, > > .need_full_reset = &cik_need_full_reset, > > .init_doorbell_index = &legacy_doorbell_index_init, > > + .get_pcie_usage = &cik_get_pcie_usage, > > }; > > > > static int cik_common_early_init(void *handle) diff --git > > a/drivers/gpu/drm/amd/amdgpu/si.c > b/drivers/gpu/drm/amd/amdgpu/si.c > > index f8408f8..2b3eb0e 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/si.c > > +++ b/drivers/gpu/drm/amd/amdgpu/si.c > > @@ -1323,6 +1323,46 @@ static void si_set_pcie_lanes(struct > amdgpu_device *adev, int lanes) > > WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl); > } > > > > +static void si_get_pcie_usage(struct amdgpu_device *adev, uint64_t > *count0, > > + uint64_t *count1) > > +{ > > + uint32_t perfctr = 0; > > + uint64_t cnt0_of, cnt1_of; > > + int tmp; > > + > > + /* Set the 2 events that we wish to watch, defined above */ > > + /* Reg 40 is # received msgs, Reg 104 is # of posted requests sent */ > > + perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, > EVENT0_SEL, 40); > > + perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, > EVENT1_SEL, > > +104); > > + > > + /* Write to enable desired perf counters */ > > + WREG32_PCIE(ixPCIE_PERF_CNTL_TXCLK, perfctr); > > + /* Zero out and enable the perf counters > > + * Write 0x5: > > + * Bit 0 = Start all counters(1) > > + * Bit 2 = Global counter reset enable(1) > > + */ > > + WREG32_PCIE(ixPCIE_PERF_COUNT_CNTL, 0x00000005); > > + > > + msleep(1000); > > + > > + /* Load the shadow and disable the perf counters > > + * Write 0x2: > > + * Bit 0 = Stop counters(0) > > + * Bit 1 = Load the shadow counters(1) > > + */ > > + WREG32_PCIE(ixPCIE_PERF_COUNT_CNTL, 0x00000002); > > + > > + /* Read register values to get any >32bit overflow */ > > + tmp = RREG32_PCIE(ixPCIE_PERF_CNTL_TXCLK); > > + cnt0_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, > COUNTER0_UPPER); > > + cnt1_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, > COUNTER1_UPPER); > > + > > + /* Get the values and add the overflow */ > > + *count0 = RREG32_PCIE(ixPCIE_PERF_COUNT0_TXCLK) | (cnt0_of << > 32); > > + *count1 = RREG32_PCIE(ixPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << > 32); } > > + > > static const struct amdgpu_asic_funcs si_asic_funcs = { > > .read_disabled_bios = &si_read_disabled_bios, @@ -1339,6 +1379,7 > @@ > > static const struct amdgpu_asic_funcs si_asic_funcs = > > .flush_hdp = &si_flush_hdp, > > .invalidate_hdp = &si_invalidate_hdp, > > .need_full_reset = &si_need_full_reset, > > + .get_pcie_usage = &si_get_pcie_usage, > > }; > > > > static uint32_t si_get_rev_id(struct amdgpu_device *adev) diff --git > > a/drivers/gpu/drm/amd/amdgpu/soc15.c > > b/drivers/gpu/drm/amd/amdgpu/soc15.c > > index 8849b74..afd8391 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/soc15.c > > +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c > > @@ -43,6 +43,9 @@ > > #include "hdp/hdp_4_0_sh_mask.h" > > #include "smuio/smuio_9_0_offset.h" > > #include "smuio/smuio_9_0_sh_mask.h" > > +#include "nbio/nbio_7_0_default.h" > > +#include "nbio/nbio_7_0_sh_mask.h" > > +#include "nbio/nbio_7_0_smn.h" > > > > #include "soc15.h" > > #include "soc15_common.h" > > @@ -601,6 +604,45 @@ static bool soc15_need_full_reset(struct > amdgpu_device *adev) > > /* change this when we implement soft reset */ > > return true; > > } > > +static void soc15_get_pcie_usage(struct amdgpu_device *adev, uint64_t > *count0, > > + uint64_t *count1) > > +{ > > + uint32_t perfctr = 0; > > + uint64_t cnt0_of, cnt1_of; > > + int tmp; > > + > > + /* Set the 2 events that we wish to watch, defined above */ > > + /* Reg 40 is # received msgs, Reg 104 is # of posted requests sent */ > > + perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, > EVENT0_SEL, 40); > > + perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, > EVENT1_SEL, > > +104); > > + > > + /* Write to enable desired perf counters */ > > + WREG32_PCIE(smnPCIE_PERF_CNTL_TXCLK, perfctr); > > + /* Zero out and enable the perf counters > > + * Write 0x5: > > + * Bit 0 = Start all counters(1) > > + * Bit 2 = Global counter reset enable(1) > > + */ > > + WREG32_PCIE(smnPCIE_PERF_COUNT_CNTL, 0x00000005); > > + > > + msleep(1000); > > + > > + /* Load the shadow and disable the perf counters > > + * Write 0x2: > > + * Bit 0 = Stop counters(0) > > + * Bit 1 = Load the shadow counters(1) > > + */ > > + WREG32_PCIE(smnPCIE_PERF_COUNT_CNTL, 0x00000002); > > + > > + /* Read register values to get any >32bit overflow */ > > + tmp = RREG32_PCIE(smnPCIE_PERF_CNTL_TXCLK); > > + cnt0_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, > COUNTER0_UPPER); > > + cnt1_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, > COUNTER1_UPPER); > > + > > + /* Get the values and add the overflow */ > > + *count0 = RREG32_PCIE(smnPCIE_PERF_COUNT0_TXCLK) | (cnt0_of > << 32); > > + *count1 = RREG32_PCIE(smnPCIE_PERF_COUNT1_TXCLK) | (cnt1_of > << 32); > > +} > > > > static const struct amdgpu_asic_funcs soc15_asic_funcs = { @@ -617,6 > > +659,7 @@ static const struct amdgpu_asic_funcs soc15_asic_funcs = > > .invalidate_hdp = &soc15_invalidate_hdp, > > .need_full_reset = &soc15_need_full_reset, > > .init_doorbell_index = &vega10_doorbell_index_init, > > + .get_pcie_usage = &soc15_get_pcie_usage, > > }; > > > > static const struct amdgpu_asic_funcs vega20_asic_funcs = @@ -634,6 > > +677,7 @@ static const struct amdgpu_asic_funcs vega20_asic_funcs = > > .invalidate_hdp = &soc15_invalidate_hdp, > > .need_full_reset = &soc15_need_full_reset, > > .init_doorbell_index = &vega20_doorbell_index_init, > > + .get_pcie_usage = &soc15_get_pcie_usage, > > }; > > > > static int soc15_common_early_init(void *handle) diff --git > > a/drivers/gpu/drm/amd/amdgpu/vi.c > b/drivers/gpu/drm/amd/amdgpu/vi.c > > index 03e7be5..f785565 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/vi.c > > +++ b/drivers/gpu/drm/amd/amdgpu/vi.c > > @@ -941,6 +941,46 @@ static bool vi_need_full_reset(struct > amdgpu_device *adev) > > } > > } > > > > +static void vi_get_pcie_usage(struct amdgpu_device *adev, uint64_t > *count0, > > + uint64_t *count1) > > +{ > > + uint32_t perfctr = 0; > > + uint64_t cnt0_of, cnt1_of; > > + int tmp; > > + > > + /* Set the 2 events that we wish to watch, defined above */ > > + /* Reg 40 is # received msgs, Reg 104 is # of posted requests sent */ > > + perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, > EVENT0_SEL, 40); > > + perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, > EVENT1_SEL, > > +104); > > + > > + /* Write to enable desired perf counters */ > > + WREG32_PCIE(ixPCIE_PERF_CNTL_TXCLK, perfctr); > > + /* Zero out and enable the perf counters > > + * Write 0x5: > > + * Bit 0 = Start all counters(1) > > + * Bit 2 = Global counter reset enable(1) > > + */ > > + WREG32_PCIE(ixPCIE_PERF_COUNT_CNTL, 0x00000005); > > + > > + msleep(1000); > > + > > + /* Load the shadow and disable the perf counters > > + * Write 0x2: > > + * Bit 0 = Stop counters(0) > > + * Bit 1 = Load the shadow counters(1) > > + */ > > + WREG32_PCIE(ixPCIE_PERF_COUNT_CNTL, 0x00000002); > > + > > + /* Read register values to get any >32bit overflow */ > > + tmp = RREG32_PCIE(ixPCIE_PERF_CNTL_TXCLK); > > + cnt0_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, > COUNTER0_UPPER); > > + cnt1_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, > COUNTER1_UPPER); > > + > > + /* Get the values and add the overflow */ > > + *count0 = RREG32_PCIE(ixPCIE_PERF_COUNT0_TXCLK) | (cnt0_of << > 32); > > + *count1 = RREG32_PCIE(ixPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << > 32); } > > + > > static const struct amdgpu_asic_funcs vi_asic_funcs = { > > .read_disabled_bios = &vi_read_disabled_bios, @@ -956,6 +996,7 > @@ > > static const struct amdgpu_asic_funcs vi_asic_funcs = > > .invalidate_hdp = &vi_invalidate_hdp, > > .need_full_reset = &vi_need_full_reset, > > .init_doorbell_index = &legacy_doorbell_index_init, > > + .get_pcie_usage = &vi_get_pcie_usage, > > }; > > > > #define CZ_REV_BRISTOL(rev) \ _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx