Re: [PATCH v2 3/3] EDAC, skx_edac: Add address translation for non-volatile DIMMs

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, Oct 11, 2018 at 02:12:36PM -0700, Tony Luck wrote:
> From: Qiuxu Zhuo <qiuxu.zhuo@xxxxxxxxx>
> 
> Current skx_edac driver doesn't support address translation for
> non-volatile DIMMs.
> 
> The ACPI ADXL DSM method support address translation for both
> volatile DIMMs and non-volatile DIMMs. So switch skx_edac to use
> the wrapped ACPI DSM methods, if they are supported and there are
> non-volatile DIMMs populated on the system.
> 
> Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@xxxxxxxxx>
> Signed-off-by: Tony Luck <tony.luck@xxxxxxxxx>
> ---
>  drivers/edac/Kconfig    |   1 +
>  drivers/edac/skx_edac.c | 191 +++++++++++++++++++++++++++++++++++++---
>  2 files changed, 179 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
> index 57304b2e989f..ffd349c12479 100644
> --- a/drivers/edac/Kconfig
> +++ b/drivers/edac/Kconfig
> @@ -234,6 +234,7 @@ config EDAC_SKX
>  	depends on PCI && X86_64 && X86_MCE_INTEL && PCI_MMCONFIG
>  	depends on ACPI_NFIT || !ACPI_NFIT # if ACPI_NFIT=m, EDAC_SKX can't be y
>  	select DMI
> +	select ACPI_ADXL
>  	help
>  	  Support for error detection and correction the Intel
>  	  Skylake server Integrated Memory Controllers. If your
> diff --git a/drivers/edac/skx_edac.c b/drivers/edac/skx_edac.c
> index a710169abdbc..598564f9c4b2 100644
> --- a/drivers/edac/skx_edac.c
> +++ b/drivers/edac/skx_edac.c
> @@ -26,6 +26,7 @@
>  #include <linux/bitmap.h>
>  #include <linux/math64.h>
>  #include <linux/mod_devicetable.h>
> +#include <linux/adxl.h>
>  #include <acpi/nfit.h>
>  #include <asm/cpu_device_id.h>
>  #include <asm/intel-family.h>
> @@ -35,6 +36,7 @@
>  #include "edac_module.h"
>  
>  #define EDAC_MOD_STR    "skx_edac"
> +#define MSG_SIZE	1024
>  
>  /*
>   * Debug macros
> @@ -54,6 +56,26 @@
>  static LIST_HEAD(skx_edac_list);
>  
>  static u64 skx_tolm, skx_tohm;
> +static int nvdimm_count;
> +static u64 *adxl_values;
> +
> +enum {
> +	INDEX_SOCKET,
> +	INDEX_MEMCTRL,
> +	INDEX_CHANNEL,
> +	INDEX_DIMM,
> +	INDEX_MAX
> +};
> +static char component_names[][32] = {
> +	[INDEX_SOCKET]	= "ProcessorSocketId",
> +	[INDEX_MEMCTRL]	= "MemoryControllerId",
> +	[INDEX_CHANNEL]	= "ChannelId",
> +	[INDEX_DIMM]	= "DimmSlotId",
> +};
> +
> +static int component_indices[ARRAY_SIZE(component_names)];
> +static int adxl_component_count;
> +static const char * const *adxl_component_names;
>  
>  #define NUM_IMC			2	/* memory controllers per socket */
>  #define NUM_CHANNELS		3	/* channels per memory controller */
> @@ -102,11 +124,11 @@ struct decoded_addr {
>  	u64	addr;
>  	int	socket;
>  	int	imc;
> -	int	channel;
> +	u64	channel;
>  	u64	chan_addr;
>  	int	sktways;
>  	int	chanways;
> -	int	dimm;
> +	u64	dimm;
>  	int	rank;
>  	int	channel_rank;
>  	u64	rank_address;
> @@ -393,6 +415,8 @@ static int get_nvdimm_info(struct dimm_info *dimm, struct skx_imc *imc,
>  	u16 flags;
>  	u64 size = 0;
>  
> +	nvdimm_count++;
> +
>  	dev_handle = ACPI_NFIT_BUILD_DEVICE_HANDLE(dimmno, chan, imc->lmc,
>  						   imc->src_id, 0);
>  
> @@ -682,7 +706,7 @@ static bool skx_sad_decode(struct decoded_addr *res)
>  	res->imc = GET_BITFIELD(d->mcroute, lchan * 3, lchan * 3 + 2);
>  	res->channel = GET_BITFIELD(d->mcroute, lchan * 2 + 18, lchan * 2 + 19);
>  
> -	edac_dbg(2, "%llx: socket=%d imc=%d channel=%d\n",
> +	edac_dbg(2, "%llx: socket=%d imc=%d channel=%llu\n",

Prepend hex formatting with "0x". Check the whole patch pls.

>  		 res->addr, res->socket, res->imc, res->channel);
>  	return true;
>  }
> @@ -818,7 +842,7 @@ static bool skx_rir_decode(struct decoded_addr *res)
>  	res->dimm = chan_rank / 4;
>  	res->rank = chan_rank % 4;
>  
> -	edac_dbg(2, "%llx: dimm=%d rank=%d chan_rank=%d rank_addr=%llx\n",
> +	edac_dbg(2, "%llx: dimm=%llu rank=%d chan_rank=%d rank_addr=%llx\n",
>  		 res->addr, res->dimm, res->rank,
>  		 res->channel_rank, res->rank_address);
>  	return true;
> @@ -894,12 +918,48 @@ static bool skx_decode(struct decoded_addr *res)
>  		skx_rir_decode(res) && skx_mad_decode(res);
>  }
>  
> +static bool skx_dsm_decode(u64 addr, char *msg, int msglen,
> +			   u64 *sock, u64 *imc,
> +			   u64 *chan, u64 *dimm)
> +
> +{
> +	int i, len = 0;
> +
> +	if (addr >= skx_tohm || (addr >= skx_tolm && addr < BIT_ULL(32))) {
> +		edac_dbg(0, "Address %llx out of range\n", addr);
> +		return false;
> +	}
> +
> +	if (adxl_decode(addr, adxl_values)) {
> +		edac_dbg(0, "Failed to decode 0x%llx\n", addr);
> +		return false;
> +	}
> +
> +	*sock = adxl_values[component_indices[INDEX_SOCKET]];
> +	*imc = adxl_values[component_indices[INDEX_MEMCTRL]];

Align all '=' vertically.

> +	*chan = adxl_values[component_indices[INDEX_CHANNEL]];
> +	*dimm = adxl_values[component_indices[INDEX_DIMM]];
> +
> +	for (i = 0; i < adxl_component_count; i++) {
> +		if (adxl_values[i] == ~0x0ull)
> +			continue;
> +		len += snprintf(msg + len, msglen, " %s:0x%llx",
> +				adxl_component_names[i], adxl_values[i]);
> +
> +		if (msglen - len <= 0)
> +			break;
> +	}
> +
> +	return true;
> +}
> +
>  static void skx_mce_output_error(struct mem_ctl_info *mci,
>  				 const struct mce *m,
> -				 struct decoded_addr *res)
> +				 struct decoded_addr *res,
> +				 char *msg)
>  {
>  	enum hw_event_mc_err_type tp_event;
> -	char *type, *optype, msg[256];
> +	char *type, *optype;
>  	bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0);
>  	bool overflow = GET_BITFIELD(m->status, 62, 62);
>  	bool uncorrected_error = GET_BITFIELD(m->status, 61, 61);
> @@ -960,13 +1020,11 @@ static void skx_mce_output_error(struct mem_ctl_info *mci,
>  		}
>  	}
>  
> -	snprintf(msg, sizeof(msg),
> -		 "%s%s err_code:%04x:%04x socket:%d imc:%d rank:%d bg:%d ba:%d row:%x col:%x",
> +	snprintf(msg, MSG_SIZE,
> +		 "%s%s err_code:%04x:%04x %s",
>  		 overflow ? " OVERFLOW" : "",
>  		 (uncorrected_error && recoverable) ? " recoverable" : "",
> -		 mscod, errcode,
> -		 res->socket, res->imc, res->rank,
> -		 res->bank_group, res->bank_address, res->row, res->column);
> +		 mscod, errcode, msg + MSG_SIZE);
>  
>  	edac_dbg(0, "%s\n", msg);
>  
> @@ -977,13 +1035,33 @@ static void skx_mce_output_error(struct mem_ctl_info *mci,
>  			     optype, msg);
>  }
>  
> +static struct mem_ctl_info *get_mci(u64 src_id, u64 lmc)
> +{
> +	struct skx_dev *d;
> +
> +	if (lmc > NUM_IMC - 1) {
> +		skx_printk(KERN_ERR, "Bad mc# 0x%llx\n", lmc);
> +		return NULL;
> +	}
> +
> +	list_for_each_entry(d, &skx_edac_list, list) {
> +		if (d->imc[0].src_id == src_id)
> +			return d->imc[lmc].mci;
> +	}
> +
> +	skx_printk(KERN_ERR, "No mci for src_id %llx lmc %llx\n", src_id, lmc);
> +
> +	return NULL;
> +}
> +
>  static int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
>  			       void *data)
>  {
>  	struct mce *mce = (struct mce *)data;
>  	struct decoded_addr res;
>  	struct mem_ctl_info *mci;
> -	char *type;
> +	u64 socket, memctrl;
> +	char *type, *msg;
>  
>  	if (edac_get_report_status() == EDAC_REPORTING_DISABLED)
>  		return NOTIFY_DONE;
> @@ -992,11 +1070,35 @@ static int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
>  	if ((mce->status & 0xefff) >> 7 != 1 || !(mce->status & MCI_STATUS_ADDRV))
>  		return NOTIFY_DONE;
>  
> +	msg = kzalloc(MSG_SIZE * 2, GFP_KERNEL);

You allocate this on every notifier call. Why not allocate it once on
driver init?

Also, what's that magic "* 2" thing?

> +	if (!msg)
> +		return NOTIFY_DONE;
> +
> +	if (adxl_component_count)
> +		goto dsm_decoding;
> +
>  	res.addr = mce->addr;
>  	if (!skx_decode(&res))
>  		return NOTIFY_DONE;
>  	mci = res.dev->imc[res.imc].mci;
>  
> +	snprintf(msg + MSG_SIZE, MSG_SIZE,

That's jumping over the first MSG_SIZE. Why?

> +		 "socket:%d imc:%d chan:%llu dimm:%llu rank:%d bg:%d ba:%d row:%x col:%x",
> +		 res.socket, res.imc, res.channel, res.dimm, res.rank,
> +		 res.bank_group, res.bank_address, res.row, res.column);
> +
> +	goto decoded;
> +
> +dsm_decoding:
> +	if (!skx_dsm_decode(mce->addr, msg + MSG_SIZE, MSG_SIZE,
> +			    &socket, &memctrl, &res.channel, &res.dimm))
> +		goto out;
> +
> +	mci = get_mci(socket, memctrl);
> +	if (!mci)
> +		goto out;
> +
> +decoded:
>  	if (mce->mcgstatus & MCG_STATUS_MCIP)
>  		type = "Exception";
>  	else
> @@ -1015,8 +1117,10 @@ static int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
>  			  "%u APIC %x\n", mce->cpuvendor, mce->cpuid,
>  			  mce->time, mce->socketid, mce->apicid);
>  
> -	skx_mce_output_error(mci, mce, &res);
> +	skx_mce_output_error(mci, mce, &res, msg);

Ah, there it is - this function writes the first 1024 bytes. So if it is
a short first errors, there'll be a bunch of '\0' in between.

Why isn't the above snprintf() simply happening in skx_mce_output_error()?

This whole code needs a reorg - like skx_mce_check_error() does a *lot*
of other stuff too.

>  
> +out:
> +	kfree(msg);
>  	return NOTIFY_DONE;
>  }
>  
> @@ -1090,6 +1194,54 @@ static void skx_remove(void)
>  	}
>  }
>  
> +static void __init skx_dsm_get(void)
> +{
> +	const char * const *names;
> +	int i, j;
> +
> +	names = adxl_get_component_names();
> +	if (!names) {
> +		skx_printk(KERN_NOTICE, "No firmware support for address translation.");
> +		skx_printk(KERN_CONT, " Only decoding DDR4 address!\n");
> +		return;
> +	}
> +
> +	for (i = 0; i < INDEX_MAX; i++) {
> +		for (j = 0; names[j]; j++) {
> +			if (!strcmp(component_names[i], names[j])) {
> +				component_indices[i] = j;
> +				break;
> +			}
> +		}
> +
> +		if (!names[j])
> +			goto err;
> +	}
> +
> +	adxl_component_names = names;
> +	while (*names++)
> +		adxl_component_count++;
> +
> +	adxl_values = kcalloc(adxl_component_count, sizeof(*adxl_values), GFP_KERNEL);
> +	if (!adxl_values) {
> +		adxl_component_count = 0;
> +		edac_dbg(0, "No memory for adxl_decode()\n");
> +	}
> +
> +	return;
> +err:
> +	skx_printk(KERN_ERR, "'%s' is not matched from DSM parameters: ",
> +		   component_names[i]);
> +	for (j = 0; names[j]; j++)
> +		skx_printk(KERN_CONT, "%s ", names[j]);
> +	skx_printk(KERN_CONT, "\n");
> +}
> +
> +static void __exit skx_dsm_put(void)
> +{
> +	kfree(adxl_values);
> +}
> +
>  /*
>   * skx_init:
>   *	make sure we are running on the correct cpu model
> @@ -1154,6 +1306,9 @@ static int __init skx_init(void)
>  		}
>  	}
>  
> +	if (nvdimm_count)
> +		skx_dsm_get();
> +
>  	/* Ensure that the OPSTATE is set correctly for POLL or NMI */
>  	opstate_init();
>  
> @@ -1170,6 +1325,8 @@ static int __init skx_init(void)
>  static void __exit skx_exit(void)
>  {
>  	edac_dbg(2, "\n");
> +	if (nvdimm_count)
> +		skx_dsm_put();
>  	mce_unregister_decode_chain(&skx_mce_dec);
>  	skx_remove();
>  	teardown_skx_debug();
> @@ -1180,6 +1337,14 @@ module_exit(skx_exit);
>  
>  module_param(edac_op_state, int, 0444);
>  MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
> +module_param_string(sock, component_names[0], sizeof(component_names[0]), 0644);
> +MODULE_PARM_DESC(sock, "String to get socket ID");
> +module_param_string(imc, component_names[1], sizeof(component_names[1]), 0644);
> +MODULE_PARM_DESC(imc, "String to get IMC ID");
> +module_param_string(chan, component_names[2], sizeof(component_names[2]), 0644);
> +MODULE_PARM_DESC(chan, "String to get channel ID");
> +module_param_string(dimm, component_names[3], sizeof(component_names[3]), 0644);
> +MODULE_PARM_DESC(dimm, "String to get DIMM ID");

So this is not really user-friendly - do you really expect people
to modprobe this with 4 module params maximum when the firmware has
changed?

adxl_get_component_names() already gives the component names and you
control what gets written in there so why does the driver need those at
all?

Worst case you can do some trivial pattern matching but letting the user
figure it out is simply wrong, IMO.

-- 
Regards/Gruss,
    Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.



[Index of Archives]     [Linux IBM ACPI]     [Linux Power Management]     [Linux Kernel]     [Linux Laptop]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Samba]     [Video 4 Linux]     [Device Mapper]     [Linux Resources]

  Powered by Linux