On Fri, 1 Nov 2024, Crystal Wood wrote: > Currently, if cyclictest or rtla (whichever rteval is configured to use) > is missing, the error output only says "measurement threads did not use > the full time slot" and then rteval hangs. > > Avoid catching these exceptions so that they will be printed out, > pointing to the actual problem. Also set up a thread exception hook to > do a prompt and (relatively) graceful exit if a thread has any uncaught > exception (this requires python >= 3.8). > > Signed-off-by: Crystal Wood <crwood@xxxxxxxxxx> > --- > README | 2 +- > rteval/__init__.py | 12 ++++++++++++ > rteval/modules/__init__.py | 14 +++++++++++--- > rteval/modules/measurement/cyclictest.py | 14 +++++--------- > rteval/modules/measurement/timerlat.py | 13 +++++-------- > 5 files changed, 34 insertions(+), 21 deletions(-) > > diff --git a/README b/README > index b352d7f..19704b4 100644 > --- a/README > +++ b/README > @@ -16,7 +16,7 @@ The rteval source may be pulled from it's git tree on kernel.org: > > Rteval requires the following packages to run: > > -Python >= 3.0 > +Python >= 3.8 > http://www.python.org/download/ > > python-lxml > diff --git a/rteval/__init__.py b/rteval/__init__.py > index 7c13e84..6097ddf 100644 > --- a/rteval/__init__.py > +++ b/rteval/__init__.py > @@ -19,6 +19,7 @@ import threading > import time > from datetime import datetime > import sysconfig > +from traceback import format_exception > from rteval.modules.loads import LoadModules > from rteval.modules.measurement import MeasurementModules > from rteval.rtevalReport import rtevalReport > @@ -29,6 +30,7 @@ from rteval import version > RTEVAL_VERSION = version.RTEVAL_VERSION > > EARLYSTOP = False > +threaderr = False > > stopsig = threading.Event() > def sig_handler(signum, frame): > @@ -39,9 +41,17 @@ def sig_handler(signum, frame): > else: > raise RuntimeError(f"SIGNAL received! ({signum})") > > +def except_hook(args): > + global threaderr > + > + threading.__excepthook__(args) > + threaderr = True > + stopsig.set() > + > class RtEval(rtevalReport): > def __init__(self, config, loadmods, measuremods, logger): > self.__version = RTEVAL_VERSION > + threading.excepthook = except_hook > > if not isinstance(config, rtevalConfig.rtevalConfig): > raise TypeError("config variable is not an rtevalConfig object") > @@ -237,6 +247,8 @@ class RtEval(rtevalReport): > global EARLYSTOP > rtevalres = 0 > measure_start = self.__RunMeasurement() > + if threaderr: > + return 1 > > self._report(measure_start, self.__rtevcfg.xslt_report) > if self.__rtevcfg.sysreport: > diff --git a/rteval/modules/__init__.py b/rteval/modules/__init__.py > index eb29db8..9827651 100644 > --- a/rteval/modules/__init__.py > +++ b/rteval/modules/__init__.py > @@ -124,8 +124,8 @@ class rtevalModulePrototype(threading.Thread): > > def WaitForCompletion(self, wtime=None): > """ Blocks until the module has completed its workload """ > - if not self.shouldStart(): > - # If it hasn't been started yet, nothing to wait for > + if self.hadRuntimeError() or not self.shouldStart(): > + # If it failed or hasn't been started yet, nothing to wait for > return None > return self.__events["finished"].wait(wtime) > > @@ -175,7 +175,7 @@ class rtevalModulePrototype(threading.Thread): > return self._donotrun is False > > > - def run(self): > + def __run(self): > "Workload thread runner - takes care of keeping the workload running as long as needed" > if self.shouldStop(): > return > @@ -215,6 +215,12 @@ class rtevalModulePrototype(threading.Thread): > > self._WorkloadCleanup() > > + def run(self): > + try: > + self.__run() > + except Exception as e: > + self._setRuntimeError() > + raise e > > def MakeReport(self): > """ required module method, needs to return an libxml2.xmlNode object > @@ -532,6 +538,8 @@ class RtEvalModules: > rep_n = libxml2.newNode(self._report_tag) > > for (modname, mod) in self.__modules: > + if mod.hadRuntimeError(): > + continue > self._logger.log(Log.DEBUG, f"Getting report from {modname}") > modrep_n = mod.MakeReport() > if modrep_n is not None: > diff --git a/rteval/modules/measurement/cyclictest.py b/rteval/modules/measurement/cyclictest.py > index 2e8f6f1..3d25c20 100644 > --- a/rteval/modules/measurement/cyclictest.py > +++ b/rteval/modules/measurement/cyclictest.py > @@ -285,15 +285,11 @@ class Cyclictest(rtevalModulePrototype): > fp.flush() > > self.__cyclicoutput.seek(0) > - try: > - self.__cyclicprocess = subprocess.Popen(self.__cmd, > - stdout=self.__cyclicoutput, > - stderr=self.__nullfp, > - stdin=self.__nullfp) > - self.__started = True > - except OSError: > - self.__started = False > - > + self.__cyclicprocess = subprocess.Popen(self.__cmd, > + stdout=self.__cyclicoutput, > + stderr=self.__nullfp, > + stdin=self.__nullfp) > + self.__started = True > > def WorkloadAlive(self): > if self.__started: > diff --git a/rteval/modules/measurement/timerlat.py b/rteval/modules/measurement/timerlat.py > index 92bc070..df42777 100644 > --- a/rteval/modules/measurement/timerlat.py > +++ b/rteval/modules/measurement/timerlat.py > @@ -252,14 +252,11 @@ class Timerlat(rtevalModulePrototype): > > self.__timerlat_out.seek(0) > self.__timerlat_err.seek(0) > - try: > - self.__timerlat_process = subprocess.Popen(self.__cmd, > - stdout=self.__timerlat_out, > - stderr=self.__timerlat_err, > - stdin=None) > - self.__started = True > - except OSError: > - self.__started = False > + self.__timerlat_process = subprocess.Popen(self.__cmd, > + stdout=self.__timerlat_out, > + stderr=self.__timerlat_err, > + stdin=None) > + self.__started = True > > def WorkloadAlive(self): > if self.__started: > -- > 2.47.0 > > > Signed-off-by: John Kacur <jkacur@xxxxxxxxxx> Thank you