Re: [PATCH v3 2/3] platform/x86/amd/hsmp: add support for metrics tbl

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, 19 Sep 2023, Suma Hegde wrote:

> AMD MI300 MCM provides GET_METRICS_TABLE message to retrieve
> all the system management information from SMU.
> 
> The metrics table is made available as hexadecimal sysfs binary file
> under per socket sysfs directory created at
> /sys/devices/platform/amd_hsmp/socket%d/metrics_bin
> 
> Metrics table definitions will be documented as part of Public PPR.
> The same is defined in the amd_hsmp.h header.
> 
> Signed-off-by: Suma Hegde <suma.hegde@xxxxxxx>
> Reviewed-by: Naveen Krishna Chatradhi <nchatrad@xxxxxxx>
> ---
> Changes since v1:
> 1. Remove HSMP_DEVNODE_NAME and HSMP_CDEV_NAME macro definitions in
> this patch
> 2. Remove extra space in comments for HSMP_GET_METRIC_TABLE_VER,
>    HSMP_GET_METRIC_TABLE and HSMP_GET_METRIC_TABLE_DRAM_ADDR enum
>    definition in amd_hsmp.h files
> 3. Change check, count == 0 to !count in hsmp_metric_tbl_read() function
> 4. Add hsmp_metric_table_visible() function 
> 5. hsmp_create_metric_tbl_sysfs_file() is renamed as hsmp_init_metric_tbl_bin_attr()
>    and code is also modified slightly
> 6. Modify hsmp_create_sysfs_file() to use devm_device_add_groups()
> 7. Change from cleanup label to deregister label
> 8. Add dev_err print in hsmp_get_tbl_dram_base()
> 9. Reword "Unable to Failed" in hsmp_get_tbl_dram_base()
> 10. Add HSMP_GRP_NAME_SIZE and NUM_ATTRS macros
> 11. Remove sysfs cleanup code in hsmp_pltdrv_remove()
> 12. Correct ATRR typo error
> 13. Change sprintf to snprintf
> 14. Check metrics table support only against HSMP_PROTO_VER6
> Changes since v2:
> 1. squash documentation patch into this patch
> 2. change from num_sockets to plat_dev.num_sockets
> 
>  Documentation/arch/x86/amd_hsmp.rst  |  16 +++
>  arch/x86/include/uapi/asm/amd_hsmp.h | 109 ++++++++++++++++
>  drivers/platform/x86/amd/hsmp.c      | 180 ++++++++++++++++++++++++++-
>  3 files changed, 302 insertions(+), 3 deletions(-)
> 
> diff --git a/Documentation/arch/x86/amd_hsmp.rst b/Documentation/arch/x86/amd_hsmp.rst
> index 440e4b645a1c..a4c308784818 100644
> --- a/Documentation/arch/x86/amd_hsmp.rst
> +++ b/Documentation/arch/x86/amd_hsmp.rst
> @@ -41,6 +41,22 @@ In-kernel integration:
>   * Locking across callers is taken care by the driver.
>  
>  
> +HSMP sysfs interface
> +====================
> +
> +1. Metrics table binary sysfs
> +
> +AMD MI300A MCM provides GET_METRICS_TABLE message to retrieve
> +all the system management information from SMU.
> +
> +The metrics table is made available as hexadecimal sysfs binary file
> +under per socket sysfs directory created at
> +/sys/devices/platform/amd_hsmp/socket%d/metrics_bin
> +
> +Metrics table definitions will be documented as part of Public PPR.
> +The same is defined in the amd_hsmp.h header.
> +
> +
>  An example
>  ==========
>  

I'd have expected to have the sysfs documentation appear under 
Documentation/ABI/testing/sysfs-...


The code change itself seems pretty okay now.

-- 
 i.

> diff --git a/arch/x86/include/uapi/asm/amd_hsmp.h b/arch/x86/include/uapi/asm/amd_hsmp.h
> index 769b939444ae..f3479b768fb9 100644
> --- a/arch/x86/include/uapi/asm/amd_hsmp.h
> +++ b/arch/x86/include/uapi/asm/amd_hsmp.h
> @@ -47,6 +47,9 @@ enum hsmp_message_ids {
>  	HSMP_SET_PCI_RATE,		/* 20h Control link rate on PCIe devices */
>  	HSMP_SET_POWER_MODE,		/* 21h Select power efficiency profile policy */
>  	HSMP_SET_PSTATE_MAX_MIN,	/* 22h Set the max and min DF P-State  */
> +	HSMP_GET_METRIC_TABLE_VER,	/* 23h Get metrics table version */
> +	HSMP_GET_METRIC_TABLE,		/* 24h Get metrics table */
> +	HSMP_GET_METRIC_TABLE_DRAM_ADDR,/* 25h Get metrics table dram address */
>  	HSMP_MSG_ID_MAX,
>  };
>  
> @@ -64,6 +67,14 @@ enum hsmp_msg_type {
>  	HSMP_GET  = 1,
>  };
>  
> +enum hsmp_proto_versions {
> +	HSMP_PROTO_VER2	= 2,
> +	HSMP_PROTO_VER3,
> +	HSMP_PROTO_VER4,
> +	HSMP_PROTO_VER5,
> +	HSMP_PROTO_VER6
> +};
> +
>  struct hsmp_msg_desc {
>  	int num_args;
>  	int response_sz;
> @@ -295,6 +306,104 @@ static const struct hsmp_msg_desc hsmp_msg_desc_table[] = {
>  	 * input: args[0] = min df pstate[15:8] + max df pstate[7:0]
>  	 */
>  	{1, 0, HSMP_SET},
> +
> +	/*
> +	 * HSMP_GET_METRIC_TABLE_VER, num_args = 0, response_sz = 1
> +	 * output: args[0] = metrics table version
> +	 */
> +	{0, 1, HSMP_GET},
> +
> +	/*
> +	 * HSMP_GET_METRIC_TABLE, num_args = 0, response_sz = 0
> +	 */
> +	{0, 0, HSMP_GET},
> +
> +	/*
> +	 * HSMP_GET_METRIC_TABLE_DRAM_ADDR, num_args = 0, response_sz = 2
> +	 * output: args[0] = lower 32 bits of the address
> +	 * output: args[1] = upper 32 bits of the address
> +	 */
> +	{0, 2, HSMP_GET},
> +};
> +
> +/* Metrics table for EPYC socket(supported only from proto version 6) */
> +struct hsmp_metric_table {
> +	__u32 accumulation_counter;
> +
> +	//TEMPERATURE
> +	__u32 max_socket_temperature;
> +	__u32 max_vr_temperature;
> +	__u32 max_hbm_temperature;
> +	__u64 max_socket_temperature_acc;
> +	__u64 max_vr_temperature_acc;
> +	__u64 max_hbm_temperature_acc;
> +
> +	//POWER
> +	__u32 socket_power_limit;
> +	__u32 max_socket_power_limit;
> +	__u32 socket_power;
> +
> +	//ENERGY
> +	__u64 timestamp;
> +	__u64 socket_energy_acc;
> +	__u64 ccd_energy_acc;
> +	__u64 xcd_energy_acc;
> +	__u64 aid_energy_acc;
> +	__u64 hbm_energy_acc;
> +
> +	//FREQUENCY
> +	__u32 cclk_frequency_limit;
> +	__u32 gfxclk_frequency_limit;
> +	__u32 fclk_frequency;
> +	__u32 uclk_frequency;
> +	__u32 socclk_frequency[4];
> +	__u32 vclk_frequency[4];
> +	__u32 dclk_frequency[4];
> +	__u32 lclk_frequency[4];
> +	__u64 gfxclk_frequency_acc[8];
> +	__u64 cclk_frequency_acc[96];
> +
> +	//FREQUENCY RANGE
> +	__u32 max_cclk_frequency;
> +	__u32 min_cclk_frequency;
> +	__u32 max_gfxclk_frequency;
> +	__u32 min_gfxclk_frequency;
> +	__u32 fclk_frequency_table[4];
> +	__u32 uclk_frequency_table[4];
> +	__u32 socclk_frequency_table[4];
> +	__u32 vclk_frequency_table[4];
> +	__u32 dclk_frequency_table[4];
> +	__u32 lclk_frequency_table[4];
> +	__u32 max_lclk_dpm_range;
> +	__u32 min_lclk_dpm_range;
> +
> +	//XGMI
> +	__u32 xgmi_width;
> +	__u32 xgmi_bitrate;
> +	__u64 xgmi_read_bandwidth_acc[8];
> +	__u64 xgmi_write_bandwidth_acc[8];
> +
> +	//ACTIVITY
> +	__u32 socket_c0_residency;
> +	__u32 socket_gfx_busy;
> +	__u32 dram_bandwidth_utilization;
> +	__u64 socket_c0_residency_acc;
> +	__u64 socket_gfx_busy_acc;
> +	__u64 dram_bandwidth_acc;
> +	__u32 max_dram_bandwidth;
> +	__u64 dram_bandwidth_utilization_acc;
> +	__u64 pcie_bandwidth_acc[4];
> +
> +	//THROTTLERS
> +	__u32 prochot_residency_acc;
> +	__u32 ppt_residency_acc;
> +	__u32 socket_thm_residency_acc;
> +	__u32 vr_thm_residency_acc;
> +	__u32 hbm_thm_residency_acc;
> +	__u32 spare;
> +
> +	// New Items at end to maintain driver compatibility
> +	__u32 gfxclk_frequency[8];
>  };
>  
>  /* Reset to default packing */
> diff --git a/drivers/platform/x86/amd/hsmp.c b/drivers/platform/x86/amd/hsmp.c
> index 99727cd705cf..fc6fba18844e 100644
> --- a/drivers/platform/x86/amd/hsmp.c
> +++ b/drivers/platform/x86/amd/hsmp.c
> @@ -20,7 +20,7 @@
>  #include <linux/semaphore.h>
>  
>  #define DRIVER_NAME		"amd_hsmp"
> -#define DRIVER_VERSION		"1.0"
> +#define DRIVER_VERSION		"2.0"
>  
>  /* HSMP Status / Error codes */
>  #define HSMP_STATUS_NOT_READY	0x00
> @@ -51,6 +51,8 @@
>  #define HSMP_DEVNODE_NAME	"hsmp"
>  
>  struct hsmp_socket {
> +	struct bin_attribute hsmp_attr;
> +	void __iomem *metric_tbl_addr;
>  	struct semaphore hsmp_sem;
>  	u16 sock_ind;
>  };
> @@ -59,6 +61,7 @@ struct hsmp_plat_device {
>  	struct miscdevice hsmp_device;
>  	struct hsmp_socket *sock;
>  	struct device *dev;
> +	u32 proto_ver;
>  	u16 num_sockets;
>  };
>  
> @@ -330,9 +333,160 @@ static const struct file_operations hsmp_fops = {
>  	.compat_ioctl	= hsmp_ioctl,
>  };
>  
> +static ssize_t hsmp_metric_tbl_read(struct file *filp, struct kobject *kobj,
> +				    struct bin_attribute *bin_attr, char *buf,
> +				    loff_t off, size_t count)
> +{
> +	struct hsmp_socket *sock = bin_attr->private;
> +	struct hsmp_message msg = { 0 };
> +	int ret;
> +
> +	if (!count || count > sizeof(struct hsmp_metric_table))
> +		return 0;
> +
> +	msg.msg_id	= HSMP_GET_METRIC_TABLE;
> +	msg.sock_ind	= sock->sock_ind;
> +
> +	ret = hsmp_send_message(&msg);
> +	if (ret)
> +		return ret;
> +	memcpy(buf, sock->metric_tbl_addr, count);
> +
> +	return count;
> +}
> +
> +static int hsmp_get_tbl_dram_base(u16 sock_ind)
> +{
> +	struct hsmp_socket *sock = &plat_dev.sock[sock_ind];
> +	struct hsmp_message msg = { 0 };
> +	phys_addr_t dram_addr;
> +	int ret;
> +
> +	msg.sock_ind	= sock_ind;
> +	msg.response_sz	= hsmp_msg_desc_table[HSMP_GET_METRIC_TABLE_DRAM_ADDR].response_sz;
> +	msg.msg_id	= HSMP_GET_METRIC_TABLE_DRAM_ADDR;
> +
> +	ret = hsmp_send_message(&msg);
> +	if (ret)
> +		return ret;
> +
> +	/*
> +	 * calculate the metric table DRAM address from lower and upper 32 bits
> +	 * sent from SMU and ioremap it to virtual address.
> +	 */
> +	dram_addr = msg.args[0] | ((u64)(msg.args[1]) << 32);
> +	if (!dram_addr) {
> +		dev_err(plat_dev.dev, "Invalid dram address for metric table\n");
> +		return -ENOMEM;
> +	}
> +	sock->metric_tbl_addr = devm_ioremap(plat_dev.dev, dram_addr,
> +					     sizeof(struct hsmp_metric_table));
> +	if (!sock->metric_tbl_addr) {
> +		dev_err(plat_dev.dev, "Failed to ioremap metric table addr\n");
> +		return -ENOMEM;
> +	}
> +	return 0;
> +}
> +
> +static umode_t hsmp_metric_table_visible(struct kobject *kobj, struct bin_attribute *battr, int id)
> +{
> +	struct hsmp_socket *sock = battr->private;
> +	int ret;
> +
> +	if (plat_dev.proto_ver == HSMP_PROTO_VER6) {
> +		ret = hsmp_get_tbl_dram_base(sock->sock_ind);
> +		if (ret)
> +			return 0;
> +		return battr->attr.mode;
> +	} else {
> +		return 0;
> +	}
> +}
> +
> +#define HSMP_ATTR_NAME_SIZE	25
> +#define HSMP_GRP_NAME_SIZE	15
> +#define NUM_ATTRS		1
> +static int hsmp_init_metric_tbl_bin_attr(struct bin_attribute **hattrs, int sock_ind)
> +{
> +	struct bin_attribute *hattr = &plat_dev.sock[sock_ind].hsmp_attr;
> +	char *name;
> +
> +	sysfs_attr_init(&plat_dev.sock[sock_ind].hsmp_attr);
> +	name = devm_kzalloc(plat_dev.dev, HSMP_ATTR_NAME_SIZE, GFP_KERNEL);
> +	if (!name)
> +		return -ENOMEM;
> +	snprintf(name, HSMP_ATTR_NAME_SIZE, "metrics_bin");
> +	hattr->attr.name	= name;
> +	hattr->attr.mode	= 0444;
> +	hattr->read		= hsmp_metric_tbl_read;
> +	hattr->size		= sizeof(struct hsmp_metric_table);
> +	hattr->private		= &plat_dev.sock[sock_ind];
> +	hattrs[0]		= hattr;
> +
> +	return 0;
> +}
> +
> +static int hsmp_create_sysfs_file(void)
> +{
> +	const struct attribute_group **hsmp_attr_grps;
> +	struct bin_attribute **hsmp_bin_attrs;
> +	struct attribute_group *attr_grp;
> +	int ret, i;
> +
> +	hsmp_attr_grps = devm_kzalloc(plat_dev.dev, sizeof(struct attribute_group *) *
> +				      (plat_dev.num_sockets + 1), GFP_KERNEL);
> +	if (!hsmp_attr_grps)
> +		return -ENOMEM;
> +
> +	for (i = 0; i < plat_dev.num_sockets; i++) {
> +		attr_grp = devm_kzalloc(plat_dev.dev, sizeof(struct attribute_group), GFP_KERNEL);
> +		if (!attr_grp)
> +			return -ENOMEM;
> +
> +		attr_grp->name = devm_kzalloc(plat_dev.dev, HSMP_GRP_NAME_SIZE, GFP_KERNEL);
> +		if (!attr_grp->name)
> +			return -ENOMEM;
> +		snprintf((char *)attr_grp->name, HSMP_GRP_NAME_SIZE, "socket%d", i);
> +
> +		hsmp_bin_attrs = devm_kzalloc(plat_dev.dev, sizeof(struct bin_attribute *) *
> +					      (NUM_ATTRS + 1), GFP_KERNEL);
> +		if (!hsmp_bin_attrs)
> +			return -ENOMEM;
> +
> +		attr_grp->bin_attrs		= hsmp_bin_attrs;
> +		attr_grp->is_bin_visible	= hsmp_metric_table_visible;
> +		hsmp_attr_grps[i]		= attr_grp;
> +
> +		ret = hsmp_init_metric_tbl_bin_attr(hsmp_bin_attrs, i);
> +		if (ret)
> +			return ret;
> +	}
> +	ret = devm_device_add_groups(plat_dev.dev, hsmp_attr_grps);
> +	if (ret)
> +		return ret;
> +
> +	return 0;
> +}
> +
> +static int hsmp_cache_proto_ver(void)
> +{
> +	struct hsmp_message msg = { 0 };
> +	int ret;
> +
> +	msg.msg_id	= HSMP_GET_PROTO_VER;
> +	msg.sock_ind	= 0;
> +	msg.response_sz = hsmp_msg_desc_table[HSMP_GET_PROTO_VER].response_sz;
> +
> +	ret = hsmp_send_message(&msg);
> +	if (!ret)
> +		plat_dev.proto_ver = msg.args[0];
> +
> +	return ret;
> +}
> +
>  static int hsmp_pltdrv_probe(struct platform_device *pdev)
>  {
> -	int i;
> +	int ret, i;
>  
>  	plat_dev.sock = devm_kzalloc(&pdev->dev,
>  				     (plat_dev.num_sockets * sizeof(struct hsmp_socket)),
> @@ -353,7 +507,27 @@ static int hsmp_pltdrv_probe(struct platform_device *pdev)
>  	plat_dev.hsmp_device.nodename	= HSMP_DEVNODE_NAME;
>  	plat_dev.hsmp_device.mode	= 0644;
>  
> -	return misc_register(&plat_dev.hsmp_device);
> +	ret = misc_register(&plat_dev.hsmp_device);
> +	if (ret)
> +		return ret;
> +
> +	ret = hsmp_cache_proto_ver();
> +	if (ret) {
> +		dev_err(plat_dev.dev, "Failed to read HSMP protocol version\n");
> +		goto deregister;
> +	}
> +
> +	ret = hsmp_create_sysfs_file();
> +	if (ret) {
> +		dev_err(plat_dev.dev, "Failed to create sysfs file\n");
> +		goto deregister;
> +	}
> +
> +	return 0;
> +
> +deregister:
> +	misc_deregister(&plat_dev.hsmp_device);
> +	return ret;
>  }
>  
>  static void hsmp_pltdrv_remove(struct platform_device *pdev)
> 



[Index of Archives]     [Linux Kernel Development]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux