Re: [PATCH 2/4] platform/x86/amd/hsmp: add support for metrics tbl

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




On 8/21/2023 4:55 PM, Ilpo Järvinen wrote:
Caution: This message originated from an External Source. Use proper caution when opening attachments, clicking links, or responding.


On Fri, 18 Aug 2023, Suma Hegde wrote:

From: Suma Hegde <suma.hegde@xxxxxxx>

AMD MI300 MCM provides GET_METRICS_TABLE message with which
all the system management information from SMU can be retrieved in just
one message.

The metrics table is available as hexadecimal sysfs binary file in
/sys/devices/platform/amd_hsmp/socket%d_metrics_bin
Metrics table definitions will be documented as part of PPR which
available in public domain. The same is defined in the amd_hsmp.h header
file as well.

Signed-off-by: Suma Hegde <suma.hegde@xxxxxxx>
Reviewed-by: Naveen Krishna Chatradhi <nchatrad@xxxxxxx>
---
  arch/x86/include/uapi/asm/amd_hsmp.h | 109 +++++++++++++++++++
  drivers/platform/x86/amd/hsmp.c      | 155 ++++++++++++++++++++++++++-
  2 files changed, 259 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/uapi/asm/amd_hsmp.h b/arch/x86/include/uapi/asm/amd_hsmp.h
index 769b939444ae..832701b41c7d 100644
--- a/arch/x86/include/uapi/asm/amd_hsmp.h
+++ b/arch/x86/include/uapi/asm/amd_hsmp.h
@@ -47,6 +47,9 @@ enum hsmp_message_ids {
       HSMP_SET_PCI_RATE,              /* 20h Control link rate on PCIe devices */
       HSMP_SET_POWER_MODE,            /* 21h Select power efficiency profile policy */
       HSMP_SET_PSTATE_MAX_MIN,        /* 22h Set the max and min DF P-State  */
+     HSMP_GET_METRIC_TABLE_VER,      /* 23h Get metrics table version  */
+     HSMP_GET_METRIC_TABLE,          /* 24h Get metrics table  */
extra spaces before */
Ok, will remove it.
+     HSMP_GET_METRIC_TABLE_DRAM_ADDR,/* 25h Get metrics table dram address */
       HSMP_MSG_ID_MAX,
  };

@@ -64,6 +67,14 @@ enum hsmp_msg_type {
       HSMP_GET  = 1,
  };

+enum hsmp_proto_versions {
+     HSMP_PROTO_VER2 = 2,
+     HSMP_PROTO_VER3,
+     HSMP_PROTO_VER4,
+     HSMP_PROTO_VER5,
+     HSMP_PROTO_VER6
+};
+
  struct hsmp_msg_desc {
       int num_args;
       int response_sz;
@@ -295,6 +306,104 @@ static const struct hsmp_msg_desc hsmp_msg_desc_table[] = {
        * input: args[0] = min df pstate[15:8] + max df pstate[7:0]
        */
       {1, 0, HSMP_SET},
+
+     /*
+      * HSMP_GET_METRIC_TABLE_VER, num_args = 0, response_sz = 1
+      * output: args[0] = metrics table version
+      */
+     {0, 1, HSMP_GET},
+
+     /*
+      * HSMP_GET_METRIC_TABLE, num_args = 0, response_sz = 0
+      */
+     {0, 0, HSMP_GET},
+
+     /*
+      * HSMP_GET_METRIC_TABLE_DRAM_ADDR, num_args = 0, response_sz = 2
+      * output: args[0] = lower 32 bits of the address
+      * output: args[1] = upper 32 bits of the address
+      */
+     {0, 2, HSMP_GET},
+};
+
+/* Metrics table for EPYC socket(supported only from proto version 6) */
+struct hsmp_metric_table {
+     __u32 accumulation_counter;
+
+     //TEMPERATURE
+     __u32 max_socket_temperature;
+     __u32 max_vr_temperature;
+     __u32 max_hbm_temperature;
+     __u64 max_socket_temperature_acc;
+     __u64 max_vr_temperature_acc;
+     __u64 max_hbm_temperature_acc;
+
+     //POWER
+     __u32 socket_power_limit;
+     __u32 max_socket_power_limit;
+     __u32 socket_power;
+
+     //ENERGY
+     __u64 timestamp;
+     __u64 socket_energy_acc;
+     __u64 ccd_energy_acc;
+     __u64 xcd_energy_acc;
+     __u64 aid_energy_acc;
+     __u64 hbm_energy_acc;
+
+     //FREQUENCY
+     __u32 cclk_frequency_limit;
+     __u32 gfxclk_frequency_limit;
+     __u32 fclk_frequency;
+     __u32 uclk_frequency;
+     __u32 socclk_frequency[4];
+     __u32 vclk_frequency[4];
+     __u32 dclk_frequency[4];
+     __u32 lclk_frequency[4];
+     __u64 gfxclk_frequency_acc[8];
+     __u64 cclk_frequency_acc[96];
+
+     //FREQUENCY RANGE
+     __u32 max_cclk_frequency;
+     __u32 min_cclk_frequency;
+     __u32 max_gfxclk_frequency;
+     __u32 min_gfxclk_frequency;
+     __u32 fclk_frequency_table[4];
+     __u32 uclk_frequency_table[4];
+     __u32 socclk_frequency_table[4];
+     __u32 vclk_frequency_table[4];
+     __u32 dclk_frequency_table[4];
+     __u32 lclk_frequency_table[4];
+     __u32 max_lclk_dpm_range;
+     __u32 min_lclk_dpm_range;
+
+     //XGMI
+     __u32 xgmi_width;
+     __u32 xgmi_bitrate;
+     __u64 xgmi_read_bandwidth_acc[8];
+     __u64 xgmi_write_bandwidth_acc[8];
+
+     //ACTIVITY
+     __u32 socket_c0_residency;
+     __u32 socket_gfx_busy;
+     __u32 dram_bandwidth_utilization;
+     __u64 socket_c0_residency_acc;
+     __u64 socket_gfx_busy_acc;
+     __u64 dram_bandwidth_acc;
+     __u32 max_dram_bandwidth;
+     __u64 dram_bandwidth_utilization_acc;
+     __u64 pcie_bandwidth_acc[4];
+
+     //THROTTLERS
+     __u32 prochot_residency_acc;
+     __u32 ppt_residency_acc;
+     __u32 socket_thm_residency_acc;
+     __u32 vr_thm_residency_acc;
+     __u32 hbm_thm_residency_acc;
+     __u32 spare;
+
+     // New Items at end to maintain driver compatibility
+     __u32 gfxclk_frequency[8];
  };

  /* Reset to default packing */
diff --git a/drivers/platform/x86/amd/hsmp.c b/drivers/platform/x86/amd/hsmp.c
index 699c22f7cd42..cab59750cde2 100644
--- a/drivers/platform/x86/amd/hsmp.c
+++ b/drivers/platform/x86/amd/hsmp.c
@@ -20,7 +20,7 @@
  #include <linux/semaphore.h>

  #define DRIVER_NAME          "amd_hsmp"
-#define DRIVER_VERSION               "1.0"
+#define DRIVER_VERSION               "2.0"

  /* HSMP Status / Error codes */
  #define HSMP_STATUS_NOT_READY        0x00
@@ -47,7 +47,12 @@
  #define HSMP_INDEX_REG               0xc4
  #define HSMP_DATA_REG                0xc8

+#define HSMP_CDEV_NAME               "hsmp_cdev"
+#define HSMP_DEVNODE_NAME    "hsmp"
+
  struct hsmp_socket {
+     struct bin_attribute hsmp_attr;
+     void __iomem *metric_tbl_addr;
       struct semaphore hsmp_sem;
       u16 sock_ind;
  };
@@ -56,6 +61,7 @@ struct hsmp_plat_device {
       struct miscdevice hsmp_device;
       struct hsmp_socket *sock;
       struct device *dev;
+     u32 proto_ver;
  };

  static u16 num_sockets;
@@ -328,9 +334,122 @@ static const struct file_operations hsmp_fops = {
       .compat_ioctl   = hsmp_ioctl,
  };

+static ssize_t hsmp_metric_tbl_read(struct file *filp, struct kobject *kobj,
+                                 struct bin_attribute *bin_attr, char *buf,
+                                 loff_t off, size_t count)
+{
+     struct hsmp_socket *sock = bin_attr->private;
+     struct hsmp_message msg = { 0 };
+     int ret;
+
+     if (count == 0 || count > sizeof(struct hsmp_metric_table))
!count
Ok.

+             return 0;
+
+     msg.msg_id      = HSMP_GET_METRIC_TABLE;
+     msg.sock_ind    = sock->sock_ind;
+
+     ret = hsmp_send_message(&msg);
+     if (ret)
+             return ret;
+     memcpy(buf, sock->metric_tbl_addr, count);
+
+     return count;
+}
+
+#define HSMP_ATRR_NAME_SIZE 25
ATTR (typo)?
Yes, its a typo. will correct it.

+static int hsmp_create_metric_tbl_sysfs_file(int sock_ind)
+{
+     struct bin_attribute *hattr = &plat_dev.sock[sock_ind].hsmp_attr;
+     char *name;
+
+     sysfs_attr_init(&plat_dev.sock[sock_ind].hsmp_attr);
+     name = devm_kzalloc(plat_dev.dev, HSMP_ATRR_NAME_SIZE, GFP_KERNEL);
+     if (!name)
+             return -ENOMEM;
+     sprintf(name, "socket%d_metrics_bin", sock_ind);
snprintf()
Ok. will change it.

+     hattr->attr.name        = name;
+     hattr->attr.mode        = 0444;
+     hattr->read             = hsmp_metric_tbl_read;
+     hattr->size             = sizeof(struct hsmp_metric_table);
+     hattr->private          = &plat_dev.sock[sock_ind];
+
+     return device_create_bin_file(plat_dev.dev, hattr);
+}
+
+static int hsmp_get_tbl_dram_base(u16 sock_ind)
+{
+     struct hsmp_socket *sock = &plat_dev.sock[sock_ind];
+     struct hsmp_message msg = { 0 };
+     phys_addr_t dram_addr;
+     int ret;
+
+     msg.sock_ind    = sock_ind;
+     msg.response_sz = hsmp_msg_desc_table[HSMP_GET_METRIC_TABLE_DRAM_ADDR].response_sz;
+     msg.msg_id      = HSMP_GET_METRIC_TABLE_DRAM_ADDR;
+
+     ret = hsmp_send_message(&msg);
+     if (ret)
+             return ret;
+
+     /*
+      * calculate the metric table DRAM address from lower and upper 32 bits
+      * sent from SMU and ioremap it to virtual address.
+      */
+     dram_addr = msg.args[0] | ((u64)(msg.args[1]) << 32);
+     if (!dram_addr)
+             return -ENOMEM;
+     sock->metric_tbl_addr = devm_ioremap(plat_dev.dev, dram_addr,
+                                          sizeof(struct hsmp_metric_table));
+     if (!sock->metric_tbl_addr) {
+             dev_err(plat_dev.dev, "Unable to ioremap metric table addr\n");
+             return -ENOMEM;
+     }
+     return 0;
+}
+
+static int hsmp_create_sysfs_file(void)
+{
+     int ret, i;
+
+     for (i = 0; i < num_sockets; i++) {
+             ret = hsmp_get_tbl_dram_base(i);
+             if (ret)
+                     goto cleanup;
+
+             ret = hsmp_create_metric_tbl_sysfs_file(i);
+             if (ret) {
+                     dev_err(plat_dev.dev, "Unable to create sysfs file for metric table\n");
+                     goto cleanup;
+             }
+     }
+
+     return 0;
+
+cleanup:
+     while (i > 0)
+             device_remove_bin_file(plat_dev.dev, &plat_dev.sock[--i].hsmp_attr);
+     return ret;
+}
+
+static int hsmp_cache_proto_ver(void)
+{
+     struct hsmp_message msg = { 0 };
+     int ret;
+
+     msg.msg_id      = HSMP_GET_PROTO_VER;
+     msg.sock_ind    = 0;
+     msg.response_sz = hsmp_msg_desc_table[HSMP_GET_PROTO_VER].response_sz;
+
+     ret = hsmp_send_message(&msg);
+     if (!ret)
+             plat_dev.proto_ver = msg.args[0];
+
+     return ret;
+}
+
  static int hsmp_pltdrv_probe(struct platform_device *pdev)
  {
-     int i;
+     int ret, i;

       plat_dev.sock = devm_kzalloc(&pdev->dev,
                                    (num_sockets * sizeof(struct hsmp_socket)),
@@ -344,18 +463,44 @@ static int hsmp_pltdrv_probe(struct platform_device *pdev)
               plat_dev.sock[i].sock_ind = i;
       }

-     plat_dev.hsmp_device.name       = "hsmp_cdev";
+     plat_dev.hsmp_device.name       = HSMP_CDEV_NAME;
       plat_dev.hsmp_device.minor      = MISC_DYNAMIC_MINOR;
       plat_dev.hsmp_device.fops       = &hsmp_fops;
       plat_dev.hsmp_device.parent     = &pdev->dev;
-     plat_dev.hsmp_device.nodename   = "hsmp";
+     plat_dev.hsmp_device.nodename   = HSMP_DEVNODE_NAME;
       plat_dev.hsmp_device.mode       = 0644;

-     return misc_register(&plat_dev.hsmp_device);
+     ret = misc_register(&plat_dev.hsmp_device);
+     if (ret)
+             return ret;
+
+     ret = hsmp_cache_proto_ver();
+     if (ret) {
+             dev_err(plat_dev.dev, "Failed to read HSMP protocol version\n");
+             goto cleanup;
+     }
+
+     /* metrics table is supported only from proto ver6 EPYCs */
+     if (plat_dev.proto_ver >= HSMP_PROTO_VER6) {
+             ret = hsmp_create_sysfs_file();
+             if (ret)
+                     goto cleanup;
+     }
+     return 0;
+
+cleanup:
Instead naming labels as "cleanup", try to name them according to what you
cleanup, so e.g. here deregister:
Ok, will rename it.
+     misc_deregister(&plat_dev.hsmp_device);
+     return ret;
  }

  static void hsmp_pltdrv_remove(struct platform_device *pdev)
  {
+     int i;
+
+     if (plat_dev.proto_ver >= HSMP_PROTO_VER6) {
+             for (i = 0; i < num_sockets; i++)
+                     device_remove_bin_file(plat_dev.dev, &plat_dev.sock[i].hsmp_attr);
+     }
       misc_deregister(&plat_dev.hsmp_device);
  }


--
  i.

Thanks and Regards,

Suma




[Index of Archives]     [Linux Kernel Development]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux