Re: [PATCH v2 3/4] platform/x86/intel/ifs: Add SBAF test support

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, 10 Jul 2024, Kuppuswamy Sathyanarayanan wrote:

> From: Jithu Joseph <jithu.joseph@xxxxxxxxx>
> 
> In a core, the SBAF test engine is shared between sibling CPUs.
> 
> An SBAF test image contains multiple bundles. Each bundle is further
> composed of subunits called programs. When a SBAF test (for a particular
> core) is triggered by the user, each SBAF bundle from the loaded test
> image is executed sequentially on all the threads on the core using
> the stop_core_cpuslocked mechanism. Each bundle execution is initiated by
> writing to MSR_ACTIVATE_SBAF.
> 
> SBAF test bundle execution may be aborted when an interrupt occurs or
> if the CPU does not have enough power budget for the test. In these
> cases the kernel restarts the test from the aborted bundle. SBAF
> execution is not retried if the test fails or if the test makes no
> forward progress after 5 retries.
> 
> Reviewed-by: Ashok Raj <ashok.raj@xxxxxxxxx>
> Reviewed-by: Tony Luck <tony.luck@xxxxxxxxx>
> Signed-off-by: Jithu Joseph <jithu.joseph@xxxxxxxxx>
> Signed-off-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@xxxxxxxxxxxxxxx>
> ---
>  drivers/platform/x86/intel/ifs/ifs.h     |  30 +++
>  drivers/platform/x86/intel/ifs/runtest.c | 234 +++++++++++++++++++++++
>  2 files changed, 264 insertions(+)
> 
> diff --git a/drivers/platform/x86/intel/ifs/ifs.h b/drivers/platform/x86/intel/ifs/ifs.h
> index 600bb8a1b285..b261be46bce8 100644
> --- a/drivers/platform/x86/intel/ifs/ifs.h
> +++ b/drivers/platform/x86/intel/ifs/ifs.h
> @@ -157,6 +157,8 @@
>  #define MSR_SBAF_HASHES_STATUS			0x000002b9
>  #define MSR_AUTHENTICATE_AND_COPY_SBAF_CHUNK	0x000002ba
>  #define MSR_SBAF_CHUNKS_AUTHENTICATION_STATUS	0x000002bb
> +#define MSR_ACTIVATE_SBAF			0x000002bc
> +#define MSR_SBAF_STATUS				0x000002bd
>  
>  #define MSR_COPY_SCAN_HASHES			0x000002c2
>  #define MSR_SCAN_HASHES_STATUS			0x000002c3
> @@ -283,6 +285,34 @@ union ifs_array {
>  	};
>  };
>  
> +/* MSR_ACTIVATE_SBAF bit fields */
> +union ifs_sbaf {
> +	u64	data;
> +	struct {
> +		u32	bundle_idx	:9;
> +		u32	rsvd1		:5;
> +		u32	pgm_idx		:2;
> +		u32	rsvd2		:16;
> +		u32	delay		:31;
> +		u32	sigmce		:1;
> +	};
> +};
> +
> +/* MSR_SBAF_STATUS bit fields */
> +union ifs_sbaf_status {
> +	u64	data;
> +	struct {
> +		u32	bundle_idx	:9;
> +		u32	rsvd1		:5;
> +		u32	pgm_idx		:2;
> +		u32	rsvd2		:16;
> +		u32	error_code	:8;
> +		u32	rsvd3		:21;
> +		u32	test_fail	:1;
> +		u32	sbaf_status	:2;
> +	};
> +};
> +
>  /*
>   * Driver populated error-codes
>   * 0xFD: Test timed out before completing all the chunks.
> diff --git a/drivers/platform/x86/intel/ifs/runtest.c b/drivers/platform/x86/intel/ifs/runtest.c
> index 282e4bfe30da..a812ec7761b9 100644
> --- a/drivers/platform/x86/intel/ifs/runtest.c
> +++ b/drivers/platform/x86/intel/ifs/runtest.c
> @@ -29,6 +29,13 @@ struct run_params {
>  	union ifs_status status;
>  };
>  
> +struct sbaf_run_params {
> +	struct ifs_data *ifsd;
> +	int *retry_cnt;
> +	union ifs_sbaf *activate;
> +	union ifs_sbaf_status status;
> +};
> +
>  /*
>   * Number of TSC cycles that a logical CPU will wait for the other
>   * logical CPU on the core in the WRMSR(ACTIVATE_SCAN).
> @@ -146,6 +153,7 @@ static bool can_restart(union ifs_status status)
>  #define SPINUNIT 100 /* 100 nsec */
>  static atomic_t array_cpus_in;
>  static atomic_t scan_cpus_in;
> +static atomic_t sbaf_cpus_in;
>  
>  /*
>   * Simplified cpu sibling rendezvous loop based on microcode loader __wait_for_cpus()
> @@ -387,6 +395,226 @@ static void ifs_array_test_gen1(int cpu, struct device *dev)
>  		ifsd->status = SCAN_TEST_PASS;
>  }
>  
> +#define SBAF_STATUS_PASS			0
> +#define SBAF_STATUS_SIGN_FAIL			1
> +#define SBAF_STATUS_INTR			2
> +#define SBAF_STATUS_TEST_FAIL			3
> +
> +enum sbaf_status_err_code {
> +	IFS_SBAF_NO_ERROR				= 0,
> +	IFS_SBAF_OTHER_THREAD_COULD_NOT_JOIN		= 1,
> +	IFS_SBAF_INTERRUPTED_BEFORE_RENDEZVOUS		= 2,
> +	IFS_SBAF_UNASSIGNED_ERROR_CODE3			= 3,
> +	IFS_SBAF_INVALID_BUNDLE_INDEX			= 4,
> +	IFS_SBAF_MISMATCH_ARGS_BETWEEN_THREADS		= 5,
> +	IFS_SBAF_CORE_NOT_CAPABLE_CURRENTLY		= 6,
> +	IFS_SBAF_UNASSIGNED_ERROR_CODE7			= 7,
> +	IFS_SBAF_EXCEED_NUMBER_OF_THREADS_CONCURRENT	= 8,
> +	IFS_SBAF_INTERRUPTED_DURING_EXECUTION		= 9,
> +	IFS_SBAF_INVALID_PROGRAM_INDEX			= 0xA,
> +	IFS_SBAF_CORRUPTED_CHUNK			= 0xB,
> +	IFS_SBAF_DID_NOT_START				= 0xC,
> +};
> +
> +static const char * const sbaf_test_status[] = {
> +	[IFS_SBAF_NO_ERROR] = "SBAF no error",
> +	[IFS_SBAF_OTHER_THREAD_COULD_NOT_JOIN] = "Other thread could not join.",
> +	[IFS_SBAF_INTERRUPTED_BEFORE_RENDEZVOUS] = "Interrupt occurred prior to SBAF coordination.",
> +	[IFS_SBAF_UNASSIGNED_ERROR_CODE3] = "Unassigned error code 0x3",
> +	[IFS_SBAF_INVALID_BUNDLE_INDEX] = "Non valid sbaf bundles. Reload test image",

Non-valid SBAF

...but given your define is named "INVALID", why not use just Invalid 
SBAF?

> +	[IFS_SBAF_MISMATCH_ARGS_BETWEEN_THREADS] = "Mismatch in arguments between threads T0/T1.",
> +	[IFS_SBAF_CORE_NOT_CAPABLE_CURRENTLY] = "Core not capable of performing SBAF currently",
> +	[IFS_SBAF_UNASSIGNED_ERROR_CODE7] = "Unassigned error code 0x7",
> +	[IFS_SBAF_EXCEED_NUMBER_OF_THREADS_CONCURRENT] = "Exceeded number of Logical Processors (LP) allowed to run Scan-At-Field concurrently",
> +	[IFS_SBAF_INTERRUPTED_DURING_EXECUTION] = "Interrupt occurred prior to SBAF start",
> +	[IFS_SBAF_INVALID_PROGRAM_INDEX] = "SBAF program index not valid",
> +	[IFS_SBAF_CORRUPTED_CHUNK] = "SBAF operation aborted due to corrupted chunk",
> +	[IFS_SBAF_DID_NOT_START] = "SBAF operation did not start",
> +};
> +
> +static void sbaf_message_not_tested(struct device *dev, int cpu, u64 status_data)
> +{
> +	union ifs_sbaf_status status = (union ifs_sbaf_status)status_data;
> +
> +	if (status.error_code < ARRAY_SIZE(sbaf_test_status)) {
> +		dev_info(dev, "CPU(s) %*pbl: SBAF operation did not start. %s\n",
> +			 cpumask_pr_args(cpu_smt_mask(cpu)),
> +			 sbaf_test_status[status.error_code]);
> +	} else if (status.error_code == IFS_SW_TIMEOUT) {
> +		dev_info(dev, "CPU(s) %*pbl: software timeout during scan\n",
> +			 cpumask_pr_args(cpu_smt_mask(cpu)));
> +	} else if (status.error_code == IFS_SW_PARTIAL_COMPLETION) {
> +		dev_info(dev, "CPU(s) %*pbl: %s\n",
> +			 cpumask_pr_args(cpu_smt_mask(cpu)),
> +			 "Not all SBAF bundles executed. Maximum forward progress retries exceeded");
> +	} else {
> +		dev_info(dev, "CPU(s) %*pbl: SBAF unknown status %llx\n",
> +			 cpumask_pr_args(cpu_smt_mask(cpu)), status.data);
> +	}
> +}
> +
> +static void sbaf_message_fail(struct device *dev, int cpu, union ifs_sbaf_status status)
> +{
> +	/* Failed signature check is set when SBAF signature did not match the expected value */
> +	if (status.sbaf_status == SBAF_STATUS_SIGN_FAIL) {
> +		dev_err(dev, "CPU(s) %*pbl: Failed signature check\n",
> +			cpumask_pr_args(cpu_smt_mask(cpu)));
> +	}
> +
> +	/* Failed to reach end of test */
> +	if (status.sbaf_status == SBAF_STATUS_TEST_FAIL) {
> +		dev_err(dev, "CPU(s) %*pbl: Failed to complete test\n",
> +			cpumask_pr_args(cpu_smt_mask(cpu)));
> +	}
> +}
> +
> +static bool sbaf_bundle_completed(union ifs_sbaf_status status)
> +{
> +	if (status.sbaf_status || status.error_code)
> +		return false;
> +	return true;

This is same as:

	return !status.sbaf_status && !status.error_code;

> +}
> +
> +static bool sbaf_can_restart(union ifs_sbaf_status status)
> +{
> +	enum sbaf_status_err_code err_code = status.error_code;
> +
> +	/* Signature for chunk is bad, or scan test failed */
> +	if (status.sbaf_status == SBAF_STATUS_SIGN_FAIL ||
> +	    status.sbaf_status == SBAF_STATUS_TEST_FAIL)
> +		return false;
> +
> +	switch (err_code) {
> +	case IFS_SBAF_NO_ERROR:
> +	case IFS_SBAF_OTHER_THREAD_COULD_NOT_JOIN:
> +	case IFS_SBAF_INTERRUPTED_BEFORE_RENDEZVOUS:
> +	case IFS_SBAF_EXCEED_NUMBER_OF_THREADS_CONCURRENT:
> +	case IFS_SBAF_INTERRUPTED_DURING_EXECUTION:
> +		return true;
> +	case IFS_SBAF_UNASSIGNED_ERROR_CODE3:
> +	case IFS_SBAF_INVALID_BUNDLE_INDEX:
> +	case IFS_SBAF_MISMATCH_ARGS_BETWEEN_THREADS:
> +	case IFS_SBAF_CORE_NOT_CAPABLE_CURRENTLY:
> +	case IFS_SBAF_UNASSIGNED_ERROR_CODE7:
> +	case IFS_SBAF_INVALID_PROGRAM_INDEX:
> +	case IFS_SBAF_CORRUPTED_CHUNK:
> +	case IFS_SBAF_DID_NOT_START:
> +		break;
> +	}
> +	return false;
> +}
> +
> +/*
> + * Execute the SBAF test. Called "simultaneously" on all threads of a core
> + * at high priority using the stop_cpus mechanism.
> + */
> +static int dosbaf(void *data)
> +{
> +	struct sbaf_run_params *run_params = data;
> +	int cpu = smp_processor_id();
> +	union ifs_sbaf_status status;
> +	struct ifs_data *ifsd;
> +	int first;
> +
> +	ifsd = run_params->ifsd;
> +
> +	/* Only the first logical CPU on a core reports result */
> +	first = cpumask_first(cpu_smt_mask(cpu));
> +	wait_for_sibling_cpu(&sbaf_cpus_in, NSEC_PER_SEC);
> +
> +	/*
> +	 * This WRMSR will wait for other HT threads to also write
> +	 * to this MSR (at most for activate.delay cycles). Then it
> +	 * starts scan of each requested bundle. The core test happens
> +	 * during the "execution" of the WRMSR.
> +	 */
> +	wrmsrl(MSR_ACTIVATE_SBAF, run_params->activate->data);
> +	rdmsrl(MSR_SBAF_STATUS, status.data);
> +
> +	/* Pass back the result of the test */
> +	if (cpu == first)
> +		run_params->status = status;
> +
> +	return 0;
> +}
> +
> +static void ifs_sbaf_test_core(int cpu, struct device *dev)
> +{
> +	struct sbaf_run_params run_params;
> +	union ifs_sbaf_status status = {};
> +	union ifs_sbaf activate;
> +	unsigned long timeout;
> +	struct ifs_data *ifsd;
> +	int stop_bundle;
> +	int retries;
> +
> +	ifsd = ifs_get_data(dev);
> +
> +	activate.data = 0;
> +	activate.delay = IFS_THREAD_WAIT;
> +
> +	timeout = jiffies + (2 * HZ);

Unnecessary parenthesis.

Other than those things mentioned above,

Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@xxxxxxxxxxxxxxx>


-- 
 i.

[Index of Archives]     [Linux Kernel Development]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux