On Tue, 2022-03-01 at 11:54 -0800, Jithu Joseph wrote: > Create scan kthreads for online logical cpus. Once scan test is triggered, > it wakes up the corresponding thread and its sibling threads to execute > the test. Once the scan test is done, the threads go back to thread wait > for next signal to start a new scan. > > In a core, the scan engine is shared between siblings. When a scan test > is triggered on a core, all the siblings rendezvous before the test execution. > The scan results are same for all siblings. > > Scan may be aborted by some reasons. Scan test will be aborted in certain > circumstances such as when interrupt occurred or cpu does not have enough > power budget for scan. In this case, the kernel restart scan from the chunk > where it stopped. Scan will also be aborted when the test is failed. In > this case, the test is immediately stopped without retry. This needs to explain why existing kernel facilities for running code on a given target CPU are insufficient and a new open-coded thread-pool needed to be created. For example, what's wrong with queue_work_on()? > > Originally-by: Kyung Min Park <kyung.min.park@xxxxxxxxx> > Signed-off-by: Jithu Joseph <jithu.joseph@xxxxxxxxx> > Reviewed-by: Ashok Raj <ashok.raj@xxxxxxxxx> > Reviewed-by: Tony Luck <tony.luck@xxxxxxxxx> > --- > drivers/platform/x86/intel/ifs/core.c | 317 ++++++++++++++++++++++++++ > drivers/platform/x86/intel/ifs/ifs.h | 91 ++++++++ > 2 files changed, 408 insertions(+) > > diff --git a/drivers/platform/x86/intel/ifs/core.c b/drivers/platform/x86/intel/ifs/core.c > index 765d9a2c4683..6747b523587a 100644 > --- a/drivers/platform/x86/intel/ifs/core.c > +++ b/drivers/platform/x86/intel/ifs/core.c > @@ -4,11 +4,38 @@ > * Author: Jithu Joseph <jithu.joseph@xxxxxxxxx> > */ > > +#include <linux/cpu.h> > +#include <linux/delay.h> > +#include <linux/kthread.h> > #include <linux/module.h> > +#include <linux/nmi.h> > #include <asm/cpu_device_id.h> > > #include "ifs.h" > + > +static enum cpuhp_state cpuhp_scan_state; > struct ifs_params ifs_params; > +int cpu_sibl_ct; > +atomic_t siblings_in; /* sibling count for joining rendezvous.*/ > +atomic_t siblings_out; /* sibling count for exiting rendezvous.*/ > +struct completion test_thread_done; /* set when scan are done for all siblings threads.*/ > + > +DEFINE_PER_CPU(struct ifs_state, ifs_state); > + > +static int ifs_retry_set(const char *val, const struct kernel_param *kp); > +static const struct kernel_param_ops ifs_retry_ops = { > + .set = ifs_retry_set, > + .get = param_get_int, > +}; > + > +static int retry = 5; > +module_param_cb(retry, &ifs_retry_ops, &retry, 0644); > + > +MODULE_PARM_DESC(retry, "Maximum retry count when the test is not executed"); Why does this retry need to be in the kernel? Can't the test runner retry the test if they want? If it stays in the kernel, why a module parameter and not a sysfs attribute? > + > +static bool noint = 1; > +module_param(noint, bool, 0644); > +MODULE_PARM_DESC(noint, "Option to enable/disable interrupt during test"); Same sysfs vs module parameter question... > > #define X86_MATCH(model) \ > X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6, \ > @@ -21,6 +48,273 @@ static const struct x86_cpu_id ifs_cpu_ids[] __initconst = { > > MODULE_DEVICE_TABLE(x86cpu, ifs_cpu_ids); > > +static int ifs_retry_set(const char *val, const struct kernel_param *kp) > +{ > + int var = 0; > + > + if (kstrtoint(val, 0, &var)) { > + pr_err("unable to parse retry\n"); > + return -EINVAL; > + } > + > + /* validate retry value for sanity */ > + if (var < 1 || var > 20) { > + pr_err("retry parameter should be between 1 and 20\n"); > + return -EINVAL; > + } > + > + return param_set_int(val, kp); > +} > + > +static unsigned long msec_to_tsc(unsigned long msec) > +{ > + return tsc_khz * 1000 * msec / MSEC_PER_SEC; > +} > + > +static const char * const scan_test_status[] = { > + "SCAN no error", > + "Other thread could not join.", > + "Interrupt occurred prior to SCAN coordination.", > + "Core Abort SCAN Response due to power management condition.", > + "Non valid chunks in the range", > + "Mismatch in arguments between threads T0/T1.", > + "Core not capable of performing SCAN currently", > + "Unassigned error code 0x7", > + "Exceeded number of Logical Processors (LP) allowed to run Scan-At-Field concurrently", > + "Interrupt occurred prior to SCAN start", This looks unmaintainable... /me finds large comment block around IFS_* error codes and suggests killing 2 birds with one stone, i.e. delete that comment and make this self documenting: static const char * const scan_test_status[] = { [IFS_NO_ERROR] = "SCAN no error", [IFS_OTHER_THREAD_DID_NOT_JOIN] = "Other thread could not join.", ...etc... }; Btw, which is it "did not join" and "could not join"? If the symbol name is going to be that long might as well make it match the log message verbatim. That way you can add / delete error codes without wondering if you managed to match the code number to the right position in the array. Even better would be to kick this out of the kernel and let the user tool translate the error codes to test result log messages. > +}; > + > +static void message_not_tested(int cpu, union ifs_status status) > +{ > + if (status.error_code < ARRAY_SIZE(scan_test_status)) > + pr_warn("CPU %d: SCAN operation did not start. %s\n", cpu, > + scan_test_status[status.error_code]); > + else if (status.error_code == IFS_SW_TIMEOUT) > + pr_warn("CPU %d: software timeout during scan\n", cpu); > + else if (status.error_code == IFS_SW_PARTIAL_COMPLETION) > + pr_warn("CPU %d: %s\n", cpu, > + "Not all scan chunks were executed. Maximum forward progress retries exceeded"); Why are these codes not in the scan_test_status set? I see that IFS_SW_PARTIAL_COMPLETION and IFS_SW_TIMEOUT are defined with larger values, but why? > + else > + pr_warn("CPU %d: SCAN unknown status %llx\n", cpu, status.data); > +} > + > +static void message_fail(int cpu, union ifs_status status) > +{ > + if (status.control_error) { > + pr_err("CPU %d: scan failed. %s\n", cpu, > + "Suggest reload scan file: # echo 1 > /sys/devices/system/cpu/ifs/reload"); > + } > + if (status.signature_error) { > + pr_err("CPU %d: test signature incorrect. %s\n", cpu, > + "Suggest retry scan to check if problem is transient"); > + } This looks and feels more like tools/testing/selftests/ style code printing information for a kernel developer to read. For a production capability I would expect these debug messages to be elided and have an ifs user tool that knows when to "Suggest reload scan file". Otherwise, it's not scalable to use the kernel log buffer like a utility's stdout. > +} > + > +static bool can_restart(union ifs_status status) > +{ > + /* Signature for chunk is bad, or scan test failed */ > + if (status.signature_error || status.control_error) > + return false; > + > + switch (status.error_code) { > + case IFS_POWER_MGMT_INADEQUATE_FOR_SCAN: > + mdelay(1); > + fallthrough; > + case IFS_NO_ERROR: > + case IFS_OTHER_THREAD_DID_NOT_JOIN: > + case IFS_INTERRUPTED_BEFORE_RENDEZVOUS: > + case IFS_EXCEED_NUMBER_OF_THREADS_CONCURRENT: > + case IFS_INTERRUPTED_DURING_EXECUTION: > + return true; If the error codes were an enum compiler would be able to check here that all possible codes were handled, as is this needs manual review which is a maintenance tax. > + } > + return false; > +} > + > +static bool wait_for_siblings(atomic_t *t, long long timeout) > +{ > + atomic_inc(t); > + while (atomic_read(t) < cpu_sibl_ct) { > + if (timeout < SPINUNIT) { > + pr_err("Timeout while waiting for CPUs rendezvous, remaining: %d\n", > + cpu_sibl_ct - atomic_read(t)); > + return false; > + } > + > + ndelay(SPINUNIT); > + timeout -= SPINUNIT; > + > + touch_nmi_watchdog(); Live spin-waiting for threads to rendevous with active defeat of the lockup detector? Is this attempting to replicate an open coded SMM entry? > + } > + > + return true; > +} > + > +/* > + * Scan test kthreads bound with each logical cpu. > + * Wait for the sibling thread to join before the execution. > + * Execute the scan test by running wrmsr(MSR_ACTIVATE_SCAN). > + */ > +static int scan_test_worker(void *info) > +{ > + int cpu = smp_processor_id(); > + union ifs_scan activate; > + union ifs_status status; > + unsigned long timeout; > + int retries; > + u32 first; > + > + activate.rsvd = 0; > + activate.delay = msec_to_tsc(THREAD_WAIT); > + activate.sigmce = 0; > + > + while (1) { > + /* wait event until cpumask set from user */ > + wait_event_interruptible(per_cpu(ifs_state, cpu).scan_wq, > + (cpumask_test_cpu(cpu, &per_cpu(ifs_state, cpu).mask) || > + kthread_should_stop())); > + > + if (kthread_should_stop()) > + break; > + > + /* > + * Need to get (and keep) the threads on this core executing close together > + * so that the writes to MSR_ACTIVATE_SCAN below will succeed in entering > + * IFS test mode on this core. Interrupts on each thread are expected to be > + * brief. But preemption would be a problem. What is this requirement to try to synchronize CPU execution? Comments should explain the "why", the code usually explains the "what". > + */ > + preempt_disable(); > + > + /* wait for the sibling threads to join */ > + first = cpumask_first(topology_sibling_cpumask(cpu)); > + if (!wait_for_siblings(&siblings_in, NSEC_PER_SEC)) { > + preempt_enable(); > + return -1; > + } > + > + activate.start = 0; > + activate.stop = ifs_params.valid_chunks - 1; > + timeout = jiffies + HZ / 2; > + retries = retry; > + > + while (activate.start <= activate.stop) { > + if (time_after(jiffies, timeout)) { > + status.error_code = IFS_SW_TIMEOUT; > + break; > + } > + > + if (noint) > + local_irq_disable(); > + /* scan start */ > + wrmsrl(MSR_ACTIVATE_SCAN, activate.data); > + > + if (noint) > + local_irq_enable(); > + > + /* > + * All logical CPUs on this core are now running IFS test. When it completes > + * execution or is interrupted, the following RDMSR gets the scan status. > + */ > + > + rdmsrl(MSR_SCAN_STATUS, status.data); > + > + /* Some cases can be retried, give up for others */ > + if (!can_restart(status)) > + break; > + > + if (status.chunk_num == activate.start) { > + /* Check for forward progress */ > + if (retries-- == 0) { > + if (status.error_code == IFS_NO_ERROR) > + status.error_code = IFS_SW_PARTIAL_COMPLETION; > + break; > + } > + } else { > + retries = retry; > + activate.start = status.chunk_num; > + } > + } > + > + preempt_enable(); > + > + /* Update status for this core */ > + per_cpu(ifs_state, cpu).scan_details = status.data; > + > + if (status.control_error || status.signature_error) { > + per_cpu(ifs_state, cpu).status = SCAN_TEST_FAIL; > + cpumask_set_cpu(cpu, &ifs_params.fail_mask); > + cpumask_clear_cpu(cpu, &ifs_params.not_tested_mask); > + cpumask_clear_cpu(cpu, &ifs_params.pass_mask); > + message_fail(cpu, status); > + } else if (status.error_code) { > + per_cpu(ifs_state, cpu).status = SCAN_NOT_TESTED; > + cpumask_set_cpu(cpu, &ifs_params.not_tested_mask); > + cpumask_clear_cpu(cpu, &ifs_params.fail_mask); > + cpumask_clear_cpu(cpu, &ifs_params.pass_mask); > + message_not_tested(cpu, status); > + } else { > + per_cpu(ifs_state, cpu).status = SCAN_TEST_PASS; > + cpumask_set_cpu(cpu, &ifs_params.pass_mask); > + cpumask_clear_cpu(cpu, &ifs_params.not_tested_mask); > + cpumask_clear_cpu(cpu, &ifs_params.fail_mask); > + } > + > + cpumask_clear_cpu(cpu, &per_cpu(ifs_state, cpu).mask); > + > + if (!wait_for_siblings(&siblings_out, NSEC_PER_SEC)) > + return -1; > + > + if (cpu == first) > + complete(&test_thread_done); > + } > + > + return 0; > +} > + > +static void ifs_first_time(unsigned int cpu) > +{ > + init_waitqueue_head(&(per_cpu(ifs_state, cpu).scan_wq)); > + > + per_cpu(ifs_state, cpu).first_time = 1; > + per_cpu(ifs_state, cpu).status = SCAN_NOT_TESTED; > + cpumask_set_cpu(cpu, &ifs_params.not_tested_mask); > + cpumask_clear_cpu(cpu, &ifs_params.fail_mask); > + cpumask_clear_cpu(cpu, &ifs_params.pass_mask); > +} > + > +static int ifs_online_cpu(unsigned int cpu) > +{ > + /* If the CPU is coming online for the first time*/ > + if (per_cpu(ifs_state, cpu).first_time == 0) > + ifs_first_time(cpu); > + > + cpumask_clear_cpu(cpu, &(per_cpu(ifs_state, cpu).mask)); > + > + per_cpu(ifs_state, cpu).scan_task = kthread_create_on_node(scan_test_worker, (void *)&cpu, > + cpu_to_node(cpu), "ifsCpu/%u", > + cpu); > + if (IS_ERR(per_cpu(ifs_state, cpu).scan_task)) { > + pr_err("scan_test_worker task create failed\n"); > + return PTR_ERR(per_cpu(ifs_state, cpu).scan_task); > + } > + kthread_bind(per_cpu(ifs_state, cpu).scan_task, cpu); > + wake_up_process(per_cpu(ifs_state, cpu).scan_task); > + > + return 0; > +} > + > +static int ifs_offline_cpu(unsigned int cpu) > +{ > + struct task_struct *thread; > + > + thread = per_cpu(ifs_state, cpu).scan_task; > + per_cpu(ifs_state, cpu).scan_task = NULL; > + > + if (thread) > + kthread_stop(thread); Per-cpu workqueues already handle cpu-online / offline and as far as I can see a dedicated workqueue for ifs could allow you jettison some of this infrastructure code. > + > + return 0; > +} > + > static int __init ifs_init(void) > { > const struct x86_cpu_id *m; > @@ -42,11 +336,34 @@ static int __init ifs_init(void) > return ret; > } > > + init_completion(&test_thread_done); > + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/ifs:online", > + ifs_online_cpu, ifs_offline_cpu); > + > + if (ret < 0) { > + pr_err("cpuhp_setup_failed\n"); > + return ret; > + } > + cpuhp_scan_state = ret; > + > return 0; > } > > static void __exit ifs_exit(void) > { > + struct task_struct *thread; > + int cpu; > + > + cpus_read_lock(); > + for_each_online_cpu(cpu) { > + thread = per_cpu(ifs_state, cpu).scan_task; > + per_cpu(ifs_state, cpu).scan_task = NULL; > + if (thread) > + kthread_stop(thread); > + } > + cpus_read_unlock(); > + cpuhp_remove_state(cpuhp_scan_state); > + > pr_info("unloaded 'In-Field Scan' module\n"); > } > > diff --git a/drivers/platform/x86/intel/ifs/ifs.h b/drivers/platform/x86/intel/ifs/ifs.h > index 8f9abdb304b0..fcbbb49faa19 100644 > --- a/drivers/platform/x86/intel/ifs/ifs.h > +++ b/drivers/platform/x86/intel/ifs/ifs.h > @@ -18,6 +18,13 @@ > #define MSR_SCAN_HASHES_STATUS 0x000002c3 > #define MSR_AUTHENTICATE_AND_COPY_CHUNK 0x000002c4 > #define MSR_CHUNKS_AUTHENTICATION_STATUS 0x000002c5 > +#define MSR_ACTIVATE_SCAN 0x000002c6 > +#define MSR_SCAN_STATUS 0x000002c7 > +#define SCAN_TEST_PASS 0 > +#define SCAN_TEST_FAIL 1 > +#define SCAN_NOT_TESTED 2 > +#define SPINUNIT 100 > +#define THREAD_WAIT 5 > > /* MSR_SCAN_HASHES_STATUS bit fields */ > union ifs_scan_hashes_status { > @@ -45,16 +52,100 @@ union ifs_chunks_auth_status { > }; > }; > > +/* MSR_ACTIVATE_SCAN bit fields */ > +union ifs_scan { > + u64 data; > + struct { > + u64 start :8; > + u64 stop :8; > + u64 rsvd :16; > + u64 delay :31; > + u64 sigmce :1; > + }; > +}; > + > +/* MSR_SCAN_STATUS bit fields */ > +union ifs_status { > + u64 data; > + struct { > + u64 chunk_num :8; > + u64 chunk_stop_index :8; > + u64 rsvd1 :16; > + u64 error_code :8; > + u64 rsvd2 :22; > + u64 control_error :1; > + u64 signature_error :1; > + }; > +}; > + > +/* > + * ifs_status.error_code values after rdmsr(SCAN_STATUS) > + * 0x0: no error. > + * 0x1: scan did not start because all sibling threads did not join. > + * 0x2: scan did not start because interrupt occurred prior to threads rendezvous > + * 0x3: scan did not start because power management conditions are inadequate. > + * 0x4: scan did not start because non-valid chunks in range stop_index:start_index. > + * 0x5: scan did not start because of mismatches in arguments between sibling threads. > + * 0x6: scan did not start because core is not capable of performing scan currently. > + * 0x7: not assigned. > + * 0x8: scan did not start because of exceed number of concurrent CPUs attempt to run scan. > + * 0x9: interrupt occurred. Scan operation aborted prematurely. Not all chunks executed. > + * 0xFE: not all scan chunks were executed. Maximum forward progress retries exceeded. > + * This is a driver populated error-code as hardware returns success in this scenario. > + */ > +#define IFS_NO_ERROR 0x0 > +#define IFS_OTHER_THREAD_DID_NOT_JOIN 0x1 > +#define IFS_INTERRUPTED_BEFORE_RENDEZVOUS 0x2 > +#define IFS_POWER_MGMT_INADEQUATE_FOR_SCAN 0x3 > +#define IFS_INVALID_CHUNK_RANGE 0x4 > +#define IFS_MISMATCH_ARGUMENTS_BETWEEN_THREADS 0x5 > +#define IFS_CORE_NOT_CAPABLE_CURRENTLY 0x6 > +/* Code 0x7 not assigned */ > +#define IFS_EXCEED_NUMBER_OF_THREADS_CONCURRENT 0x8 > +#define IFS_INTERRUPTED_DURING_EXECUTION 0x9 > + > +#define IFS_SW_TIMEOUT 0xFD > +#define IFS_SW_PARTIAL_COMPLETION 0xFE > + > /** > * struct ifs_params - global ifs parameter for all cpus. > * @loaded_version: stores the currently loaded ifs image version. > * @valid_chunks: number of chunks which could be validated. > + * @fail_mask: stores the cpus which failed the scan. > + * @not_tested_mask: stores the cpus which have not been tested. > */ > struct ifs_params { > int loaded_version; > int valid_chunks; > + struct cpumask fail_mask; > + struct cpumask pass_mask; > + struct cpumask not_tested_mask; > }; > > +/** > + * struct ifs_state - per-cpu ifs parameter. > + * @scan_task: scan_task for kthread to run scan test on each cpu. > + * @first_time: to track if cpu is coming online for the first time. > + * @status: it holds simple status pass/fail/untested. > + * @scan_details: opaque scan status code from h/w. > + * @scan_wq: kthread task wait queue. > + * @mask: triggering the test by setting the mask. > + */ > +struct ifs_state { > + struct task_struct *scan_task; > + int first_time; > + int status; > + u64 scan_details; > + wait_queue_head_t scan_wq; > + struct cpumask mask; > +}; > + > +DECLARE_PER_CPU(struct ifs_state, ifs_state); > + > int load_ifs_binary(void); > extern struct ifs_params ifs_params; > +extern atomic_t siblings_in; > +extern atomic_t siblings_out; > +extern struct completion test_thread_done; > +extern int cpu_sibl_ct; > #endif