Currently, if cyclictest or rtla (whichever rteval is configured to use) is missing, the error output only says "measurement threads did not use the full time slot" and then rteval hangs. Avoid catching these exceptions so that they will be printed out, pointing to the actual problem. Also set up a thread exception hook to do a prompt and (relatively) graceful exit if a thread has any uncaught exception (this requires python >= 3.8). Signed-off-by: Crystal Wood <crwood@xxxxxxxxxx> --- README | 2 +- rteval/__init__.py | 12 ++++++++++++ rteval/modules/__init__.py | 14 +++++++++++--- rteval/modules/measurement/cyclictest.py | 14 +++++--------- rteval/modules/measurement/timerlat.py | 13 +++++-------- 5 files changed, 34 insertions(+), 21 deletions(-) diff --git a/README b/README index b352d7f..19704b4 100644 --- a/README +++ b/README @@ -16,7 +16,7 @@ The rteval source may be pulled from it's git tree on kernel.org: Rteval requires the following packages to run: -Python >= 3.0 +Python >= 3.8 http://www.python.org/download/ python-lxml diff --git a/rteval/__init__.py b/rteval/__init__.py index 7c13e84..6097ddf 100644 --- a/rteval/__init__.py +++ b/rteval/__init__.py @@ -19,6 +19,7 @@ import threading import time from datetime import datetime import sysconfig +from traceback import format_exception from rteval.modules.loads import LoadModules from rteval.modules.measurement import MeasurementModules from rteval.rtevalReport import rtevalReport @@ -29,6 +30,7 @@ from rteval import version RTEVAL_VERSION = version.RTEVAL_VERSION EARLYSTOP = False +threaderr = False stopsig = threading.Event() def sig_handler(signum, frame): @@ -39,9 +41,17 @@ def sig_handler(signum, frame): else: raise RuntimeError(f"SIGNAL received! ({signum})") +def except_hook(args): + global threaderr + + threading.__excepthook__(args) + threaderr = True + stopsig.set() + class RtEval(rtevalReport): def __init__(self, config, loadmods, measuremods, logger): self.__version = RTEVAL_VERSION + threading.excepthook = except_hook if not isinstance(config, rtevalConfig.rtevalConfig): raise TypeError("config variable is not an rtevalConfig object") @@ -237,6 +247,8 @@ class RtEval(rtevalReport): global EARLYSTOP rtevalres = 0 measure_start = self.__RunMeasurement() + if threaderr: + return 1 self._report(measure_start, self.__rtevcfg.xslt_report) if self.__rtevcfg.sysreport: diff --git a/rteval/modules/__init__.py b/rteval/modules/__init__.py index eb29db8..9827651 100644 --- a/rteval/modules/__init__.py +++ b/rteval/modules/__init__.py @@ -124,8 +124,8 @@ class rtevalModulePrototype(threading.Thread): def WaitForCompletion(self, wtime=None): """ Blocks until the module has completed its workload """ - if not self.shouldStart(): - # If it hasn't been started yet, nothing to wait for + if self.hadRuntimeError() or not self.shouldStart(): + # If it failed or hasn't been started yet, nothing to wait for return None return self.__events["finished"].wait(wtime) @@ -175,7 +175,7 @@ class rtevalModulePrototype(threading.Thread): return self._donotrun is False - def run(self): + def __run(self): "Workload thread runner - takes care of keeping the workload running as long as needed" if self.shouldStop(): return @@ -215,6 +215,12 @@ class rtevalModulePrototype(threading.Thread): self._WorkloadCleanup() + def run(self): + try: + self.__run() + except Exception as e: + self._setRuntimeError() + raise e def MakeReport(self): """ required module method, needs to return an libxml2.xmlNode object @@ -532,6 +538,8 @@ class RtEvalModules: rep_n = libxml2.newNode(self._report_tag) for (modname, mod) in self.__modules: + if mod.hadRuntimeError(): + continue self._logger.log(Log.DEBUG, f"Getting report from {modname}") modrep_n = mod.MakeReport() if modrep_n is not None: diff --git a/rteval/modules/measurement/cyclictest.py b/rteval/modules/measurement/cyclictest.py index 2e8f6f1..3d25c20 100644 --- a/rteval/modules/measurement/cyclictest.py +++ b/rteval/modules/measurement/cyclictest.py @@ -285,15 +285,11 @@ class Cyclictest(rtevalModulePrototype): fp.flush() self.__cyclicoutput.seek(0) - try: - self.__cyclicprocess = subprocess.Popen(self.__cmd, - stdout=self.__cyclicoutput, - stderr=self.__nullfp, - stdin=self.__nullfp) - self.__started = True - except OSError: - self.__started = False - + self.__cyclicprocess = subprocess.Popen(self.__cmd, + stdout=self.__cyclicoutput, + stderr=self.__nullfp, + stdin=self.__nullfp) + self.__started = True def WorkloadAlive(self): if self.__started: diff --git a/rteval/modules/measurement/timerlat.py b/rteval/modules/measurement/timerlat.py index 92bc070..df42777 100644 --- a/rteval/modules/measurement/timerlat.py +++ b/rteval/modules/measurement/timerlat.py @@ -252,14 +252,11 @@ class Timerlat(rtevalModulePrototype): self.__timerlat_out.seek(0) self.__timerlat_err.seek(0) - try: - self.__timerlat_process = subprocess.Popen(self.__cmd, - stdout=self.__timerlat_out, - stderr=self.__timerlat_err, - stdin=None) - self.__started = True - except OSError: - self.__started = False + self.__timerlat_process = subprocess.Popen(self.__cmd, + stdout=self.__timerlat_out, + stderr=self.__timerlat_err, + stdin=None) + self.__started = True def WorkloadAlive(self): if self.__started: -- 2.47.0