On Fri, Jan 4, 2019 at 7:35 AM Russell, Kent <Kent.Russell@xxxxxxx> wrote: > > Add a sysfs file that reports the number of bytes transmitted and > received in the last second. This can be used to approximate the PCIe > bandwidth usage over the last second. > > Change-Id: I6c82fe1cb17726825736512990fa64f5c1489861 > Signed-off-by: Kent Russell <kent.russell@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 ++++ > drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 31 ++++++++++++++++++++++++ > drivers/gpu/drm/amd/amdgpu/cik.c | 41 +++++++++++++++++++++++++++++++ > drivers/gpu/drm/amd/amdgpu/si.c | 41 +++++++++++++++++++++++++++++++ > drivers/gpu/drm/amd/amdgpu/soc15.c | 44 ++++++++++++++++++++++++++++++++++ > drivers/gpu/drm/amd/amdgpu/vi.c | 41 +++++++++++++++++++++++++++++++ > 6 files changed, 202 insertions(+) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > index e1b2c64..512b124 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > @@ -542,6 +542,9 @@ struct amdgpu_asic_funcs { > bool (*need_full_reset)(struct amdgpu_device *adev); > /* initialize doorbell layout for specific asic*/ > void (*init_doorbell_index)(struct amdgpu_device *adev); > + /* PCIe bandwidth usage */ > + void (*get_pcie_usage)(struct amdgpu_device *adev, uint64_t *count0, > + uint64_t *count1); > }; > > /* > @@ -1045,6 +1048,7 @@ int emu_soc_asic_init(struct amdgpu_device *adev); > #define amdgpu_asic_invalidate_hdp(adev, r) (adev)->asic_funcs->invalidate_hdp((adev), (r)) > #define amdgpu_asic_need_full_reset(adev) (adev)->asic_funcs->need_full_reset((adev)) > #define amdgpu_asic_init_doorbell_index(adev) (adev)->asic_funcs->init_doorbell_index((adev)) > +#define amdgpu_asic_get_pcie_usage(adev, cnt0, cnt1) ((adev)->asic_funcs->get_pcie_usage((adev), (cnt0), (cnt1))) > > /* Common functions */ > bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev); > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c > index 1f61ed9..051307e 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c > @@ -990,6 +990,30 @@ static ssize_t amdgpu_get_busy_percent(struct device *dev, > return snprintf(buf, PAGE_SIZE, "%d\n", value); > } > > +/** > + * DOC: pcie_bw > + * > + * The amdgpu driver provides a sysfs API for reading how much data > + * has been sent and received by the GPU in the last second through PCIe. > + * The file pcie_bw is used for this. > + * The Perf counters calculate this and return the number of sent and received > + * messages, which we multiply by the size of our PCIe packets (mps). > + */ > +static ssize_t amdgpu_get_pcie_bw(struct device *dev, > + struct device_attribute *attr, > + char *buf) > +{ > + struct drm_device *ddev = dev_get_drvdata(dev); > + struct amdgpu_device *adev = ddev->dev_private; > + uint64_t mps = pcie_get_mps(adev->pdev); > + uint64_t count0, count1; > + > + amdgpu_asic_get_pcie_usage(adev, &count0, &count1); > + return snprintf(buf, PAGE_SIZE, > + "Bytes received: %llu\nBytes sent: %llu\n", > + count0 * mps, count1 * mps); > +} > + > static DEVICE_ATTR(power_dpm_state, S_IRUGO | S_IWUSR, amdgpu_get_dpm_state, amdgpu_set_dpm_state); > static DEVICE_ATTR(power_dpm_force_performance_level, S_IRUGO | S_IWUSR, > amdgpu_get_dpm_forced_performance_level, > @@ -1025,6 +1049,7 @@ static DEVICE_ATTR(pp_od_clk_voltage, S_IRUGO | S_IWUSR, > amdgpu_set_pp_od_clk_voltage); > static DEVICE_ATTR(gpu_busy_percent, S_IRUGO, > amdgpu_get_busy_percent, NULL); > +static DEVICE_ATTR(pcie_bw, S_IRUGO, amdgpu_get_pcie_bw, NULL); > > static ssize_t amdgpu_hwmon_show_temp(struct device *dev, > struct device_attribute *attr, > @@ -2105,6 +2130,11 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) > "gpu_busy_level\n"); > return ret; > } > + ret = device_create_file(adev->dev, &dev_attr_pcie_bw); > + if (ret) { > + DRM_ERROR("failed to create device file pcie_bw\n"); > + return ret; > + } Should this file be added for APUs too or just dGPUs? > ret = amdgpu_debugfs_pm_init(adev); > if (ret) { > DRM_ERROR("Failed to register debugfs file for dpm!\n"); > @@ -2141,6 +2171,7 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev) > device_remove_file(adev->dev, > &dev_attr_pp_od_clk_voltage); > device_remove_file(adev->dev, &dev_attr_gpu_busy_percent); > + device_remove_file(adev->dev, &dev_attr_pcie_bw); > } > > void amdgpu_pm_compute_clocks(struct amdgpu_device *adev) > diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c > index 71c50d8..8681744 100644 > --- a/drivers/gpu/drm/amd/amdgpu/cik.c > +++ b/drivers/gpu/drm/amd/amdgpu/cik.c > @@ -1741,6 +1741,46 @@ static bool cik_need_full_reset(struct amdgpu_device *adev) > return true; > } > > +static void cik_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0, > + uint64_t *count1) > +{ > + uint32_t perfctr = 0; > + uint64_t cnt0_of, cnt1_of; > + int tmp; > + > + /* Set the 2 events that we wish to watch, defined above */ > + /* Reg 40 is # received msgs, Reg 104 is # of posted requests sent */ > + perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT0_SEL, 40); > + perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT1_SEL, 104); > + > + /* Write to enable desired perf counters */ > + WREG32_PCIE(ixPCIE_PERF_CNTL_TXCLK, perfctr); > + /* Zero out and enable the perf counters > + * Write 0x5: > + * Bit 0 = Start all counters(1) > + * Bit 2 = Global counter reset enable(1) > + */ > + WREG32_PCIE(ixPCIE_PERF_COUNT_CNTL, 0x00000005); > + > + msleep(1000); > + > + /* Load the shadow and disable the perf counters > + * Write 0x2: > + * Bit 0 = Stop counters(0) > + * Bit 1 = Load the shadow counters(1) > + */ > + WREG32_PCIE(ixPCIE_PERF_COUNT_CNTL, 0x00000002); > + > + /* Read register values to get any >32bit overflow */ > + tmp = RREG32_PCIE(ixPCIE_PERF_CNTL_TXCLK); > + cnt0_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER0_UPPER); > + cnt1_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER1_UPPER); > + > + /* Get the values and add the overflow */ > + *count0 = RREG32_PCIE(ixPCIE_PERF_COUNT0_TXCLK) | (cnt0_of << 32); > + *count1 = RREG32_PCIE(ixPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32); > +} Does this work properly on APUs as well or just dGPUs? Same comment for VI and SOC15. Thanks, Alex > + > static const struct amdgpu_asic_funcs cik_asic_funcs = > { > .read_disabled_bios = &cik_read_disabled_bios, > @@ -1756,6 +1796,7 @@ static const struct amdgpu_asic_funcs cik_asic_funcs = > .invalidate_hdp = &cik_invalidate_hdp, > .need_full_reset = &cik_need_full_reset, > .init_doorbell_index = &legacy_doorbell_index_init, > + .get_pcie_usage = &cik_get_pcie_usage, > }; > > static int cik_common_early_init(void *handle) > diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c > index f8408f8..2b3eb0e 100644 > --- a/drivers/gpu/drm/amd/amdgpu/si.c > +++ b/drivers/gpu/drm/amd/amdgpu/si.c > @@ -1323,6 +1323,46 @@ static void si_set_pcie_lanes(struct amdgpu_device *adev, int lanes) > WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl); > } > > +static void si_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0, > + uint64_t *count1) > +{ > + uint32_t perfctr = 0; > + uint64_t cnt0_of, cnt1_of; > + int tmp; > + > + /* Set the 2 events that we wish to watch, defined above */ > + /* Reg 40 is # received msgs, Reg 104 is # of posted requests sent */ > + perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT0_SEL, 40); > + perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT1_SEL, 104); > + > + /* Write to enable desired perf counters */ > + WREG32_PCIE(ixPCIE_PERF_CNTL_TXCLK, perfctr); > + /* Zero out and enable the perf counters > + * Write 0x5: > + * Bit 0 = Start all counters(1) > + * Bit 2 = Global counter reset enable(1) > + */ > + WREG32_PCIE(ixPCIE_PERF_COUNT_CNTL, 0x00000005); > + > + msleep(1000); > + > + /* Load the shadow and disable the perf counters > + * Write 0x2: > + * Bit 0 = Stop counters(0) > + * Bit 1 = Load the shadow counters(1) > + */ > + WREG32_PCIE(ixPCIE_PERF_COUNT_CNTL, 0x00000002); > + > + /* Read register values to get any >32bit overflow */ > + tmp = RREG32_PCIE(ixPCIE_PERF_CNTL_TXCLK); > + cnt0_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER0_UPPER); > + cnt1_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER1_UPPER); > + > + /* Get the values and add the overflow */ > + *count0 = RREG32_PCIE(ixPCIE_PERF_COUNT0_TXCLK) | (cnt0_of << 32); > + *count1 = RREG32_PCIE(ixPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32); > +} > + > static const struct amdgpu_asic_funcs si_asic_funcs = > { > .read_disabled_bios = &si_read_disabled_bios, > @@ -1339,6 +1379,7 @@ static const struct amdgpu_asic_funcs si_asic_funcs = > .flush_hdp = &si_flush_hdp, > .invalidate_hdp = &si_invalidate_hdp, > .need_full_reset = &si_need_full_reset, > + .get_pcie_usage = &si_get_pcie_usage, > }; > > static uint32_t si_get_rev_id(struct amdgpu_device *adev) > diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c > index 8849b74..afd8391 100644 > --- a/drivers/gpu/drm/amd/amdgpu/soc15.c > +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c > @@ -43,6 +43,9 @@ > #include "hdp/hdp_4_0_sh_mask.h" > #include "smuio/smuio_9_0_offset.h" > #include "smuio/smuio_9_0_sh_mask.h" > +#include "nbio/nbio_7_0_default.h" > +#include "nbio/nbio_7_0_sh_mask.h" > +#include "nbio/nbio_7_0_smn.h" > > #include "soc15.h" > #include "soc15_common.h" > @@ -601,6 +604,45 @@ static bool soc15_need_full_reset(struct amdgpu_device *adev) > /* change this when we implement soft reset */ > return true; > } > +static void soc15_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0, > + uint64_t *count1) > +{ > + uint32_t perfctr = 0; > + uint64_t cnt0_of, cnt1_of; > + int tmp; > + > + /* Set the 2 events that we wish to watch, defined above */ > + /* Reg 40 is # received msgs, Reg 104 is # of posted requests sent */ > + perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT0_SEL, 40); > + perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT1_SEL, 104); > + > + /* Write to enable desired perf counters */ > + WREG32_PCIE(smnPCIE_PERF_CNTL_TXCLK, perfctr); > + /* Zero out and enable the perf counters > + * Write 0x5: > + * Bit 0 = Start all counters(1) > + * Bit 2 = Global counter reset enable(1) > + */ > + WREG32_PCIE(smnPCIE_PERF_COUNT_CNTL, 0x00000005); > + > + msleep(1000); > + > + /* Load the shadow and disable the perf counters > + * Write 0x2: > + * Bit 0 = Stop counters(0) > + * Bit 1 = Load the shadow counters(1) > + */ > + WREG32_PCIE(smnPCIE_PERF_COUNT_CNTL, 0x00000002); > + > + /* Read register values to get any >32bit overflow */ > + tmp = RREG32_PCIE(smnPCIE_PERF_CNTL_TXCLK); > + cnt0_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER0_UPPER); > + cnt1_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER1_UPPER); > + > + /* Get the values and add the overflow */ > + *count0 = RREG32_PCIE(smnPCIE_PERF_COUNT0_TXCLK) | (cnt0_of << 32); > + *count1 = RREG32_PCIE(smnPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32); > +} > > static const struct amdgpu_asic_funcs soc15_asic_funcs = > { > @@ -617,6 +659,7 @@ static const struct amdgpu_asic_funcs soc15_asic_funcs = > .invalidate_hdp = &soc15_invalidate_hdp, > .need_full_reset = &soc15_need_full_reset, > .init_doorbell_index = &vega10_doorbell_index_init, > + .get_pcie_usage = &soc15_get_pcie_usage, > }; > > static const struct amdgpu_asic_funcs vega20_asic_funcs = > @@ -634,6 +677,7 @@ static const struct amdgpu_asic_funcs vega20_asic_funcs = > .invalidate_hdp = &soc15_invalidate_hdp, > .need_full_reset = &soc15_need_full_reset, > .init_doorbell_index = &vega20_doorbell_index_init, > + .get_pcie_usage = &soc15_get_pcie_usage, > }; > > static int soc15_common_early_init(void *handle) > diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c > index 03e7be5..f785565 100644 > --- a/drivers/gpu/drm/amd/amdgpu/vi.c > +++ b/drivers/gpu/drm/amd/amdgpu/vi.c > @@ -941,6 +941,46 @@ static bool vi_need_full_reset(struct amdgpu_device *adev) > } > } > > +static void vi_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0, > + uint64_t *count1) > +{ > + uint32_t perfctr = 0; > + uint64_t cnt0_of, cnt1_of; > + int tmp; > + > + /* Set the 2 events that we wish to watch, defined above */ > + /* Reg 40 is # received msgs, Reg 104 is # of posted requests sent */ > + perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT0_SEL, 40); > + perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT1_SEL, 104); > + > + /* Write to enable desired perf counters */ > + WREG32_PCIE(ixPCIE_PERF_CNTL_TXCLK, perfctr); > + /* Zero out and enable the perf counters > + * Write 0x5: > + * Bit 0 = Start all counters(1) > + * Bit 2 = Global counter reset enable(1) > + */ > + WREG32_PCIE(ixPCIE_PERF_COUNT_CNTL, 0x00000005); > + > + msleep(1000); > + > + /* Load the shadow and disable the perf counters > + * Write 0x2: > + * Bit 0 = Stop counters(0) > + * Bit 1 = Load the shadow counters(1) > + */ > + WREG32_PCIE(ixPCIE_PERF_COUNT_CNTL, 0x00000002); > + > + /* Read register values to get any >32bit overflow */ > + tmp = RREG32_PCIE(ixPCIE_PERF_CNTL_TXCLK); > + cnt0_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER0_UPPER); > + cnt1_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER1_UPPER); > + > + /* Get the values and add the overflow */ > + *count0 = RREG32_PCIE(ixPCIE_PERF_COUNT0_TXCLK) | (cnt0_of << 32); > + *count1 = RREG32_PCIE(ixPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32); > +} > + > static const struct amdgpu_asic_funcs vi_asic_funcs = > { > .read_disabled_bios = &vi_read_disabled_bios, > @@ -956,6 +996,7 @@ static const struct amdgpu_asic_funcs vi_asic_funcs = > .invalidate_hdp = &vi_invalidate_hdp, > .need_full_reset = &vi_need_full_reset, > .init_doorbell_index = &legacy_doorbell_index_init, > + .get_pcie_usage = &vi_get_pcie_usage, > }; > > #define CZ_REV_BRISTOL(rev) \ > -- > 2.7.4 > > _______________________________________________ > amd-gfx mailing list > amd-gfx@xxxxxxxxxxxxxxxxxxxxx > https://lists.freedesktop.org/mailman/listinfo/amd-gfx _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx