Re: [PATCH v2 3/4] platform/x86/intel/ifs: Add SBAF test support

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 7/11/24 2:44 AM, Ilpo Järvinen wrote:
> On Wed, 10 Jul 2024, Kuppuswamy Sathyanarayanan wrote:
>
>> From: Jithu Joseph <jithu.joseph@xxxxxxxxx>
>>
>> In a core, the SBAF test engine is shared between sibling CPUs.
>>
>> An SBAF test image contains multiple bundles. Each bundle is further
>> composed of subunits called programs. When a SBAF test (for a particular
>> core) is triggered by the user, each SBAF bundle from the loaded test
>> image is executed sequentially on all the threads on the core using
>> the stop_core_cpuslocked mechanism. Each bundle execution is initiated by
>> writing to MSR_ACTIVATE_SBAF.
>>
>> SBAF test bundle execution may be aborted when an interrupt occurs or
>> if the CPU does not have enough power budget for the test. In these
>> cases the kernel restarts the test from the aborted bundle. SBAF
>> execution is not retried if the test fails or if the test makes no
>> forward progress after 5 retries.
>>
>> Reviewed-by: Ashok Raj <ashok.raj@xxxxxxxxx>
>> Reviewed-by: Tony Luck <tony.luck@xxxxxxxxx>
>> Signed-off-by: Jithu Joseph <jithu.joseph@xxxxxxxxx>
>> Signed-off-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@xxxxxxxxxxxxxxx>
>> ---
>>  drivers/platform/x86/intel/ifs/ifs.h     |  30 +++
>>  drivers/platform/x86/intel/ifs/runtest.c | 234 +++++++++++++++++++++++
>>  2 files changed, 264 insertions(+)
>>
>> diff --git a/drivers/platform/x86/intel/ifs/ifs.h b/drivers/platform/x86/intel/ifs/ifs.h
>> index 600bb8a1b285..b261be46bce8 100644
>> --- a/drivers/platform/x86/intel/ifs/ifs.h
>> +++ b/drivers/platform/x86/intel/ifs/ifs.h
>> @@ -157,6 +157,8 @@
>>  #define MSR_SBAF_HASHES_STATUS			0x000002b9
>>  #define MSR_AUTHENTICATE_AND_COPY_SBAF_CHUNK	0x000002ba
>>  #define MSR_SBAF_CHUNKS_AUTHENTICATION_STATUS	0x000002bb
>> +#define MSR_ACTIVATE_SBAF			0x000002bc
>> +#define MSR_SBAF_STATUS				0x000002bd
>>  
>>  #define MSR_COPY_SCAN_HASHES			0x000002c2
>>  #define MSR_SCAN_HASHES_STATUS			0x000002c3
>> @@ -283,6 +285,34 @@ union ifs_array {
>>  	};
>>  };
>>  
>> +/* MSR_ACTIVATE_SBAF bit fields */
>> +union ifs_sbaf {
>> +	u64	data;
>> +	struct {
>> +		u32	bundle_idx	:9;
>> +		u32	rsvd1		:5;
>> +		u32	pgm_idx		:2;
>> +		u32	rsvd2		:16;
>> +		u32	delay		:31;
>> +		u32	sigmce		:1;
>> +	};
>> +};
>> +
>> +/* MSR_SBAF_STATUS bit fields */
>> +union ifs_sbaf_status {
>> +	u64	data;
>> +	struct {
>> +		u32	bundle_idx	:9;
>> +		u32	rsvd1		:5;
>> +		u32	pgm_idx		:2;
>> +		u32	rsvd2		:16;
>> +		u32	error_code	:8;
>> +		u32	rsvd3		:21;
>> +		u32	test_fail	:1;
>> +		u32	sbaf_status	:2;
>> +	};
>> +};
>> +
>>  /*
>>   * Driver populated error-codes
>>   * 0xFD: Test timed out before completing all the chunks.
>> diff --git a/drivers/platform/x86/intel/ifs/runtest.c b/drivers/platform/x86/intel/ifs/runtest.c
>> index 282e4bfe30da..a812ec7761b9 100644
>> --- a/drivers/platform/x86/intel/ifs/runtest.c
>> +++ b/drivers/platform/x86/intel/ifs/runtest.c
>> @@ -29,6 +29,13 @@ struct run_params {
>>  	union ifs_status status;
>>  };
>>  
>> +struct sbaf_run_params {
>> +	struct ifs_data *ifsd;
>> +	int *retry_cnt;
>> +	union ifs_sbaf *activate;
>> +	union ifs_sbaf_status status;
>> +};
>> +
>>  /*
>>   * Number of TSC cycles that a logical CPU will wait for the other
>>   * logical CPU on the core in the WRMSR(ACTIVATE_SCAN).
>> @@ -146,6 +153,7 @@ static bool can_restart(union ifs_status status)
>>  #define SPINUNIT 100 /* 100 nsec */
>>  static atomic_t array_cpus_in;
>>  static atomic_t scan_cpus_in;
>> +static atomic_t sbaf_cpus_in;
>>  
>>  /*
>>   * Simplified cpu sibling rendezvous loop based on microcode loader __wait_for_cpus()
>> @@ -387,6 +395,226 @@ static void ifs_array_test_gen1(int cpu, struct device *dev)
>>  		ifsd->status = SCAN_TEST_PASS;
>>  }
>>  
>> +#define SBAF_STATUS_PASS			0
>> +#define SBAF_STATUS_SIGN_FAIL			1
>> +#define SBAF_STATUS_INTR			2
>> +#define SBAF_STATUS_TEST_FAIL			3
>> +
>> +enum sbaf_status_err_code {
>> +	IFS_SBAF_NO_ERROR				= 0,
>> +	IFS_SBAF_OTHER_THREAD_COULD_NOT_JOIN		= 1,
>> +	IFS_SBAF_INTERRUPTED_BEFORE_RENDEZVOUS		= 2,
>> +	IFS_SBAF_UNASSIGNED_ERROR_CODE3			= 3,
>> +	IFS_SBAF_INVALID_BUNDLE_INDEX			= 4,
>> +	IFS_SBAF_MISMATCH_ARGS_BETWEEN_THREADS		= 5,
>> +	IFS_SBAF_CORE_NOT_CAPABLE_CURRENTLY		= 6,
>> +	IFS_SBAF_UNASSIGNED_ERROR_CODE7			= 7,
>> +	IFS_SBAF_EXCEED_NUMBER_OF_THREADS_CONCURRENT	= 8,
>> +	IFS_SBAF_INTERRUPTED_DURING_EXECUTION		= 9,
>> +	IFS_SBAF_INVALID_PROGRAM_INDEX			= 0xA,
>> +	IFS_SBAF_CORRUPTED_CHUNK			= 0xB,
>> +	IFS_SBAF_DID_NOT_START				= 0xC,
>> +};
>> +
>> +static const char * const sbaf_test_status[] = {
>> +	[IFS_SBAF_NO_ERROR] = "SBAF no error",
>> +	[IFS_SBAF_OTHER_THREAD_COULD_NOT_JOIN] = "Other thread could not join.",
>> +	[IFS_SBAF_INTERRUPTED_BEFORE_RENDEZVOUS] = "Interrupt occurred prior to SBAF coordination.",
>> +	[IFS_SBAF_UNASSIGNED_ERROR_CODE3] = "Unassigned error code 0x3",
>> +	[IFS_SBAF_INVALID_BUNDLE_INDEX] = "Non valid sbaf bundles. Reload test image",
> Non-valid SBAF
>
> ...but given your define is named "INVALID", why not use just Invalid 
> SBAF?

Above string is from the specification document.But I think it is ok to use
"Invalid" or "Non-valid".

Jithu, any concerns?

>> +	[IFS_SBAF_MISMATCH_ARGS_BETWEEN_THREADS] = "Mismatch in arguments between threads T0/T1.",
>> +	[IFS_SBAF_CORE_NOT_CAPABLE_CURRENTLY] = "Core not capable of performing SBAF currently",
>> +	[IFS_SBAF_UNASSIGNED_ERROR_CODE7] = "Unassigned error code 0x7",
>> +	[IFS_SBAF_EXCEED_NUMBER_OF_THREADS_CONCURRENT] = "Exceeded number of Logical Processors (LP) allowed to run Scan-At-Field concurrently",
>> +	[IFS_SBAF_INTERRUPTED_DURING_EXECUTION] = "Interrupt occurred prior to SBAF start",
>> +	[IFS_SBAF_INVALID_PROGRAM_INDEX] = "SBAF program index not valid",
>> +	[IFS_SBAF_CORRUPTED_CHUNK] = "SBAF operation aborted due to corrupted chunk",
>> +	[IFS_SBAF_DID_NOT_START] = "SBAF operation did not start",
>> +};
>> +
>> +static void sbaf_message_not_tested(struct device *dev, int cpu, u64 status_data)
>> +{
>> +	union ifs_sbaf_status status = (union ifs_sbaf_status)status_data;
>> +
>> +	if (status.error_code < ARRAY_SIZE(sbaf_test_status)) {
>> +		dev_info(dev, "CPU(s) %*pbl: SBAF operation did not start. %s\n",
>> +			 cpumask_pr_args(cpu_smt_mask(cpu)),
>> +			 sbaf_test_status[status.error_code]);
>> +	} else if (status.error_code == IFS_SW_TIMEOUT) {
>> +		dev_info(dev, "CPU(s) %*pbl: software timeout during scan\n",
>> +			 cpumask_pr_args(cpu_smt_mask(cpu)));
>> +	} else if (status.error_code == IFS_SW_PARTIAL_COMPLETION) {
>> +		dev_info(dev, "CPU(s) %*pbl: %s\n",
>> +			 cpumask_pr_args(cpu_smt_mask(cpu)),
>> +			 "Not all SBAF bundles executed. Maximum forward progress retries exceeded");
>> +	} else {
>> +		dev_info(dev, "CPU(s) %*pbl: SBAF unknown status %llx\n",
>> +			 cpumask_pr_args(cpu_smt_mask(cpu)), status.data);
>> +	}
>> +}
>> +
>> +static void sbaf_message_fail(struct device *dev, int cpu, union ifs_sbaf_status status)
>> +{
>> +	/* Failed signature check is set when SBAF signature did not match the expected value */
>> +	if (status.sbaf_status == SBAF_STATUS_SIGN_FAIL) {
>> +		dev_err(dev, "CPU(s) %*pbl: Failed signature check\n",
>> +			cpumask_pr_args(cpu_smt_mask(cpu)));
>> +	}
>> +
>> +	/* Failed to reach end of test */
>> +	if (status.sbaf_status == SBAF_STATUS_TEST_FAIL) {
>> +		dev_err(dev, "CPU(s) %*pbl: Failed to complete test\n",
>> +			cpumask_pr_args(cpu_smt_mask(cpu)));
>> +	}
>> +}
>> +
>> +static bool sbaf_bundle_completed(union ifs_sbaf_status status)
>> +{
>> +	if (status.sbaf_status || status.error_code)
>> +		return false;
>> +	return true;
> This is same as:
>
> 	return !status.sbaf_status && !status.error_code;

Yes. Your version looks good. Do you want me to send a version with
this change or you can include it when merging it?

>> +}
>> +
>> +static bool sbaf_can_restart(union ifs_sbaf_status status)
>> +{
>> +	enum sbaf_status_err_code err_code = status.error_code;
>> +
>> +	/* Signature for chunk is bad, or scan test failed */
>> +	if (status.sbaf_status == SBAF_STATUS_SIGN_FAIL ||
>> +	    status.sbaf_status == SBAF_STATUS_TEST_FAIL)
>> +		return false;
>> +
>> +	switch (err_code) {
>> +	case IFS_SBAF_NO_ERROR:
>> +	case IFS_SBAF_OTHER_THREAD_COULD_NOT_JOIN:
>> +	case IFS_SBAF_INTERRUPTED_BEFORE_RENDEZVOUS:
>> +	case IFS_SBAF_EXCEED_NUMBER_OF_THREADS_CONCURRENT:
>> +	case IFS_SBAF_INTERRUPTED_DURING_EXECUTION:
>> +		return true;
>> +	case IFS_SBAF_UNASSIGNED_ERROR_CODE3:
>> +	case IFS_SBAF_INVALID_BUNDLE_INDEX:
>> +	case IFS_SBAF_MISMATCH_ARGS_BETWEEN_THREADS:
>> +	case IFS_SBAF_CORE_NOT_CAPABLE_CURRENTLY:
>> +	case IFS_SBAF_UNASSIGNED_ERROR_CODE7:
>> +	case IFS_SBAF_INVALID_PROGRAM_INDEX:
>> +	case IFS_SBAF_CORRUPTED_CHUNK:
>> +	case IFS_SBAF_DID_NOT_START:
>> +		break;
>> +	}
>> +	return false;
>> +}
>> +
>> +/*
>> + * Execute the SBAF test. Called "simultaneously" on all threads of a core
>> + * at high priority using the stop_cpus mechanism.
>> + */
>> +static int dosbaf(void *data)
>> +{
>> +	struct sbaf_run_params *run_params = data;
>> +	int cpu = smp_processor_id();
>> +	union ifs_sbaf_status status;
>> +	struct ifs_data *ifsd;
>> +	int first;
>> +
>> +	ifsd = run_params->ifsd;
>> +
>> +	/* Only the first logical CPU on a core reports result */
>> +	first = cpumask_first(cpu_smt_mask(cpu));
>> +	wait_for_sibling_cpu(&sbaf_cpus_in, NSEC_PER_SEC);
>> +
>> +	/*
>> +	 * This WRMSR will wait for other HT threads to also write
>> +	 * to this MSR (at most for activate.delay cycles). Then it
>> +	 * starts scan of each requested bundle. The core test happens
>> +	 * during the "execution" of the WRMSR.
>> +	 */
>> +	wrmsrl(MSR_ACTIVATE_SBAF, run_params->activate->data);
>> +	rdmsrl(MSR_SBAF_STATUS, status.data);
>> +
>> +	/* Pass back the result of the test */
>> +	if (cpu == first)
>> +		run_params->status = status;
>> +
>> +	return 0;
>> +}
>> +
>> +static void ifs_sbaf_test_core(int cpu, struct device *dev)
>> +{
>> +	struct sbaf_run_params run_params;
>> +	union ifs_sbaf_status status = {};
>> +	union ifs_sbaf activate;
>> +	unsigned long timeout;
>> +	struct ifs_data *ifsd;
>> +	int stop_bundle;
>> +	int retries;
>> +
>> +	ifsd = ifs_get_data(dev);
>> +
>> +	activate.data = 0;
>> +	activate.delay = IFS_THREAD_WAIT;
>> +
>> +	timeout = jiffies + (2 * HZ);
> Unnecessary parenthesis.
>
> Other than those things mentioned above,
>
> Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@xxxxxxxxxxxxxxx>
>
>
-- 
Sathyanarayanan Kuppuswamy
Linux Kernel Developer





[Index of Archives]     [Linux Kernel Development]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux