On Wed, 10 Jul 2024, Kuppuswamy Sathyanarayanan wrote: > From: Jithu Joseph <jithu.joseph@xxxxxxxxx> > > In a core, the SBAF test engine is shared between sibling CPUs. > > An SBAF test image contains multiple bundles. Each bundle is further > composed of subunits called programs. When a SBAF test (for a particular > core) is triggered by the user, each SBAF bundle from the loaded test > image is executed sequentially on all the threads on the core using > the stop_core_cpuslocked mechanism. Each bundle execution is initiated by > writing to MSR_ACTIVATE_SBAF. > > SBAF test bundle execution may be aborted when an interrupt occurs or > if the CPU does not have enough power budget for the test. In these > cases the kernel restarts the test from the aborted bundle. SBAF > execution is not retried if the test fails or if the test makes no > forward progress after 5 retries. > > Reviewed-by: Ashok Raj <ashok.raj@xxxxxxxxx> > Reviewed-by: Tony Luck <tony.luck@xxxxxxxxx> > Signed-off-by: Jithu Joseph <jithu.joseph@xxxxxxxxx> > Signed-off-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@xxxxxxxxxxxxxxx> > --- > drivers/platform/x86/intel/ifs/ifs.h | 30 +++ > drivers/platform/x86/intel/ifs/runtest.c | 234 +++++++++++++++++++++++ > 2 files changed, 264 insertions(+) > > diff --git a/drivers/platform/x86/intel/ifs/ifs.h b/drivers/platform/x86/intel/ifs/ifs.h > index 600bb8a1b285..b261be46bce8 100644 > --- a/drivers/platform/x86/intel/ifs/ifs.h > +++ b/drivers/platform/x86/intel/ifs/ifs.h > @@ -157,6 +157,8 @@ > #define MSR_SBAF_HASHES_STATUS 0x000002b9 > #define MSR_AUTHENTICATE_AND_COPY_SBAF_CHUNK 0x000002ba > #define MSR_SBAF_CHUNKS_AUTHENTICATION_STATUS 0x000002bb > +#define MSR_ACTIVATE_SBAF 0x000002bc > +#define MSR_SBAF_STATUS 0x000002bd > > #define MSR_COPY_SCAN_HASHES 0x000002c2 > #define MSR_SCAN_HASHES_STATUS 0x000002c3 > @@ -283,6 +285,34 @@ union ifs_array { > }; > }; > > +/* MSR_ACTIVATE_SBAF bit fields */ > +union ifs_sbaf { > + u64 data; > + struct { > + u32 bundle_idx :9; > + u32 rsvd1 :5; > + u32 pgm_idx :2; > + u32 rsvd2 :16; > + u32 delay :31; > + u32 sigmce :1; > + }; > +}; > + > +/* MSR_SBAF_STATUS bit fields */ > +union ifs_sbaf_status { > + u64 data; > + struct { > + u32 bundle_idx :9; > + u32 rsvd1 :5; > + u32 pgm_idx :2; > + u32 rsvd2 :16; > + u32 error_code :8; > + u32 rsvd3 :21; > + u32 test_fail :1; > + u32 sbaf_status :2; > + }; > +}; > + > /* > * Driver populated error-codes > * 0xFD: Test timed out before completing all the chunks. > diff --git a/drivers/platform/x86/intel/ifs/runtest.c b/drivers/platform/x86/intel/ifs/runtest.c > index 282e4bfe30da..a812ec7761b9 100644 > --- a/drivers/platform/x86/intel/ifs/runtest.c > +++ b/drivers/platform/x86/intel/ifs/runtest.c > @@ -29,6 +29,13 @@ struct run_params { > union ifs_status status; > }; > > +struct sbaf_run_params { > + struct ifs_data *ifsd; > + int *retry_cnt; > + union ifs_sbaf *activate; > + union ifs_sbaf_status status; > +}; > + > /* > * Number of TSC cycles that a logical CPU will wait for the other > * logical CPU on the core in the WRMSR(ACTIVATE_SCAN). > @@ -146,6 +153,7 @@ static bool can_restart(union ifs_status status) > #define SPINUNIT 100 /* 100 nsec */ > static atomic_t array_cpus_in; > static atomic_t scan_cpus_in; > +static atomic_t sbaf_cpus_in; > > /* > * Simplified cpu sibling rendezvous loop based on microcode loader __wait_for_cpus() > @@ -387,6 +395,226 @@ static void ifs_array_test_gen1(int cpu, struct device *dev) > ifsd->status = SCAN_TEST_PASS; > } > > +#define SBAF_STATUS_PASS 0 > +#define SBAF_STATUS_SIGN_FAIL 1 > +#define SBAF_STATUS_INTR 2 > +#define SBAF_STATUS_TEST_FAIL 3 > + > +enum sbaf_status_err_code { > + IFS_SBAF_NO_ERROR = 0, > + IFS_SBAF_OTHER_THREAD_COULD_NOT_JOIN = 1, > + IFS_SBAF_INTERRUPTED_BEFORE_RENDEZVOUS = 2, > + IFS_SBAF_UNASSIGNED_ERROR_CODE3 = 3, > + IFS_SBAF_INVALID_BUNDLE_INDEX = 4, > + IFS_SBAF_MISMATCH_ARGS_BETWEEN_THREADS = 5, > + IFS_SBAF_CORE_NOT_CAPABLE_CURRENTLY = 6, > + IFS_SBAF_UNASSIGNED_ERROR_CODE7 = 7, > + IFS_SBAF_EXCEED_NUMBER_OF_THREADS_CONCURRENT = 8, > + IFS_SBAF_INTERRUPTED_DURING_EXECUTION = 9, > + IFS_SBAF_INVALID_PROGRAM_INDEX = 0xA, > + IFS_SBAF_CORRUPTED_CHUNK = 0xB, > + IFS_SBAF_DID_NOT_START = 0xC, > +}; > + > +static const char * const sbaf_test_status[] = { > + [IFS_SBAF_NO_ERROR] = "SBAF no error", > + [IFS_SBAF_OTHER_THREAD_COULD_NOT_JOIN] = "Other thread could not join.", > + [IFS_SBAF_INTERRUPTED_BEFORE_RENDEZVOUS] = "Interrupt occurred prior to SBAF coordination.", > + [IFS_SBAF_UNASSIGNED_ERROR_CODE3] = "Unassigned error code 0x3", > + [IFS_SBAF_INVALID_BUNDLE_INDEX] = "Non valid sbaf bundles. Reload test image", Non-valid SBAF ...but given your define is named "INVALID", why not use just Invalid SBAF? > + [IFS_SBAF_MISMATCH_ARGS_BETWEEN_THREADS] = "Mismatch in arguments between threads T0/T1.", > + [IFS_SBAF_CORE_NOT_CAPABLE_CURRENTLY] = "Core not capable of performing SBAF currently", > + [IFS_SBAF_UNASSIGNED_ERROR_CODE7] = "Unassigned error code 0x7", > + [IFS_SBAF_EXCEED_NUMBER_OF_THREADS_CONCURRENT] = "Exceeded number of Logical Processors (LP) allowed to run Scan-At-Field concurrently", > + [IFS_SBAF_INTERRUPTED_DURING_EXECUTION] = "Interrupt occurred prior to SBAF start", > + [IFS_SBAF_INVALID_PROGRAM_INDEX] = "SBAF program index not valid", > + [IFS_SBAF_CORRUPTED_CHUNK] = "SBAF operation aborted due to corrupted chunk", > + [IFS_SBAF_DID_NOT_START] = "SBAF operation did not start", > +}; > + > +static void sbaf_message_not_tested(struct device *dev, int cpu, u64 status_data) > +{ > + union ifs_sbaf_status status = (union ifs_sbaf_status)status_data; > + > + if (status.error_code < ARRAY_SIZE(sbaf_test_status)) { > + dev_info(dev, "CPU(s) %*pbl: SBAF operation did not start. %s\n", > + cpumask_pr_args(cpu_smt_mask(cpu)), > + sbaf_test_status[status.error_code]); > + } else if (status.error_code == IFS_SW_TIMEOUT) { > + dev_info(dev, "CPU(s) %*pbl: software timeout during scan\n", > + cpumask_pr_args(cpu_smt_mask(cpu))); > + } else if (status.error_code == IFS_SW_PARTIAL_COMPLETION) { > + dev_info(dev, "CPU(s) %*pbl: %s\n", > + cpumask_pr_args(cpu_smt_mask(cpu)), > + "Not all SBAF bundles executed. Maximum forward progress retries exceeded"); > + } else { > + dev_info(dev, "CPU(s) %*pbl: SBAF unknown status %llx\n", > + cpumask_pr_args(cpu_smt_mask(cpu)), status.data); > + } > +} > + > +static void sbaf_message_fail(struct device *dev, int cpu, union ifs_sbaf_status status) > +{ > + /* Failed signature check is set when SBAF signature did not match the expected value */ > + if (status.sbaf_status == SBAF_STATUS_SIGN_FAIL) { > + dev_err(dev, "CPU(s) %*pbl: Failed signature check\n", > + cpumask_pr_args(cpu_smt_mask(cpu))); > + } > + > + /* Failed to reach end of test */ > + if (status.sbaf_status == SBAF_STATUS_TEST_FAIL) { > + dev_err(dev, "CPU(s) %*pbl: Failed to complete test\n", > + cpumask_pr_args(cpu_smt_mask(cpu))); > + } > +} > + > +static bool sbaf_bundle_completed(union ifs_sbaf_status status) > +{ > + if (status.sbaf_status || status.error_code) > + return false; > + return true; This is same as: return !status.sbaf_status && !status.error_code; > +} > + > +static bool sbaf_can_restart(union ifs_sbaf_status status) > +{ > + enum sbaf_status_err_code err_code = status.error_code; > + > + /* Signature for chunk is bad, or scan test failed */ > + if (status.sbaf_status == SBAF_STATUS_SIGN_FAIL || > + status.sbaf_status == SBAF_STATUS_TEST_FAIL) > + return false; > + > + switch (err_code) { > + case IFS_SBAF_NO_ERROR: > + case IFS_SBAF_OTHER_THREAD_COULD_NOT_JOIN: > + case IFS_SBAF_INTERRUPTED_BEFORE_RENDEZVOUS: > + case IFS_SBAF_EXCEED_NUMBER_OF_THREADS_CONCURRENT: > + case IFS_SBAF_INTERRUPTED_DURING_EXECUTION: > + return true; > + case IFS_SBAF_UNASSIGNED_ERROR_CODE3: > + case IFS_SBAF_INVALID_BUNDLE_INDEX: > + case IFS_SBAF_MISMATCH_ARGS_BETWEEN_THREADS: > + case IFS_SBAF_CORE_NOT_CAPABLE_CURRENTLY: > + case IFS_SBAF_UNASSIGNED_ERROR_CODE7: > + case IFS_SBAF_INVALID_PROGRAM_INDEX: > + case IFS_SBAF_CORRUPTED_CHUNK: > + case IFS_SBAF_DID_NOT_START: > + break; > + } > + return false; > +} > + > +/* > + * Execute the SBAF test. Called "simultaneously" on all threads of a core > + * at high priority using the stop_cpus mechanism. > + */ > +static int dosbaf(void *data) > +{ > + struct sbaf_run_params *run_params = data; > + int cpu = smp_processor_id(); > + union ifs_sbaf_status status; > + struct ifs_data *ifsd; > + int first; > + > + ifsd = run_params->ifsd; > + > + /* Only the first logical CPU on a core reports result */ > + first = cpumask_first(cpu_smt_mask(cpu)); > + wait_for_sibling_cpu(&sbaf_cpus_in, NSEC_PER_SEC); > + > + /* > + * This WRMSR will wait for other HT threads to also write > + * to this MSR (at most for activate.delay cycles). Then it > + * starts scan of each requested bundle. The core test happens > + * during the "execution" of the WRMSR. > + */ > + wrmsrl(MSR_ACTIVATE_SBAF, run_params->activate->data); > + rdmsrl(MSR_SBAF_STATUS, status.data); > + > + /* Pass back the result of the test */ > + if (cpu == first) > + run_params->status = status; > + > + return 0; > +} > + > +static void ifs_sbaf_test_core(int cpu, struct device *dev) > +{ > + struct sbaf_run_params run_params; > + union ifs_sbaf_status status = {}; > + union ifs_sbaf activate; > + unsigned long timeout; > + struct ifs_data *ifsd; > + int stop_bundle; > + int retries; > + > + ifsd = ifs_get_data(dev); > + > + activate.data = 0; > + activate.delay = IFS_THREAD_WAIT; > + > + timeout = jiffies + (2 * HZ); Unnecessary parenthesis. Other than those things mentioned above, Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@xxxxxxxxxxxxxxx> -- i.