Re: [PATCH] rteval: Print useful exception info and exit on missing measurement tool

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




On Fri, 1 Nov 2024, Crystal Wood wrote:

> Currently, if cyclictest or rtla (whichever rteval is configured to use)
> is missing, the error output only says "measurement threads did not use
> the full time slot" and then rteval hangs.
> 
> Avoid catching these exceptions so that they will be printed out,
> pointing to the actual problem.  Also set up a thread exception hook to
> do a prompt and (relatively) graceful exit if a thread has any uncaught
> exception (this requires python >= 3.8).
> 
> Signed-off-by: Crystal Wood <crwood@xxxxxxxxxx>
> ---
>  README                                   |  2 +-
>  rteval/__init__.py                       | 12 ++++++++++++
>  rteval/modules/__init__.py               | 14 +++++++++++---
>  rteval/modules/measurement/cyclictest.py | 14 +++++---------
>  rteval/modules/measurement/timerlat.py   | 13 +++++--------
>  5 files changed, 34 insertions(+), 21 deletions(-)
> 
> diff --git a/README b/README
> index b352d7f..19704b4 100644
> --- a/README
> +++ b/README
> @@ -16,7 +16,7 @@ The rteval source may be pulled from it's git tree on kernel.org:
>  
>  Rteval requires the following packages to run:
>  
> -Python >= 3.0
> +Python >= 3.8
>      http://www.python.org/download/
>  
>  python-lxml
> diff --git a/rteval/__init__.py b/rteval/__init__.py
> index 7c13e84..6097ddf 100644
> --- a/rteval/__init__.py
> +++ b/rteval/__init__.py
> @@ -19,6 +19,7 @@ import threading
>  import time
>  from datetime import datetime
>  import sysconfig
> +from traceback import format_exception
>  from rteval.modules.loads import LoadModules
>  from rteval.modules.measurement import MeasurementModules
>  from rteval.rtevalReport import rtevalReport
> @@ -29,6 +30,7 @@ from rteval import version
>  RTEVAL_VERSION = version.RTEVAL_VERSION
>  
>  EARLYSTOP = False
> +threaderr = False
>  
>  stopsig = threading.Event()
>  def sig_handler(signum, frame):
> @@ -39,9 +41,17 @@ def sig_handler(signum, frame):
>      else:
>          raise RuntimeError(f"SIGNAL received! ({signum})")
>  
> +def except_hook(args):
> +    global threaderr
> +
> +    threading.__excepthook__(args)
> +    threaderr = True
> +    stopsig.set()
> +
>  class RtEval(rtevalReport):
>      def __init__(self, config, loadmods, measuremods, logger):
>          self.__version = RTEVAL_VERSION
> +        threading.excepthook = except_hook
>  
>          if not isinstance(config, rtevalConfig.rtevalConfig):
>              raise TypeError("config variable is not an rtevalConfig object")
> @@ -237,6 +247,8 @@ class RtEval(rtevalReport):
>          global EARLYSTOP
>          rtevalres = 0
>          measure_start = self.__RunMeasurement()
> +        if threaderr:
> +            return 1
>  
>          self._report(measure_start, self.__rtevcfg.xslt_report)
>          if self.__rtevcfg.sysreport:
> diff --git a/rteval/modules/__init__.py b/rteval/modules/__init__.py
> index eb29db8..9827651 100644
> --- a/rteval/modules/__init__.py
> +++ b/rteval/modules/__init__.py
> @@ -124,8 +124,8 @@ class rtevalModulePrototype(threading.Thread):
>  
>      def WaitForCompletion(self, wtime=None):
>          """ Blocks until the module has completed its workload """
> -        if not self.shouldStart():
> -            # If it hasn't been started yet, nothing to wait for
> +        if self.hadRuntimeError() or not self.shouldStart():
> +            # If it failed or hasn't been started yet, nothing to wait for
>              return None
>          return self.__events["finished"].wait(wtime)
>  
> @@ -175,7 +175,7 @@ class rtevalModulePrototype(threading.Thread):
>          return self._donotrun is False
>  
>  
> -    def run(self):
> +    def __run(self):
>          "Workload thread runner - takes care of keeping the workload running as long as needed"
>          if self.shouldStop():
>              return
> @@ -215,6 +215,12 @@ class rtevalModulePrototype(threading.Thread):
>  
>          self._WorkloadCleanup()
>  
> +    def run(self):
> +        try:
> +            self.__run()
> +        except Exception as e:
> +            self._setRuntimeError()
> +            raise e
>  
>      def MakeReport(self):
>          """ required module method, needs to return an libxml2.xmlNode object
> @@ -532,6 +538,8 @@ class RtEvalModules:
>          rep_n = libxml2.newNode(self._report_tag)
>  
>          for (modname, mod) in self.__modules:
> +            if mod.hadRuntimeError():
> +                continue
>              self._logger.log(Log.DEBUG, f"Getting report from {modname}")
>              modrep_n = mod.MakeReport()
>              if modrep_n is not None:
> diff --git a/rteval/modules/measurement/cyclictest.py b/rteval/modules/measurement/cyclictest.py
> index 2e8f6f1..3d25c20 100644
> --- a/rteval/modules/measurement/cyclictest.py
> +++ b/rteval/modules/measurement/cyclictest.py
> @@ -285,15 +285,11 @@ class Cyclictest(rtevalModulePrototype):
>                  fp.flush()
>  
>          self.__cyclicoutput.seek(0)
> -        try:
> -            self.__cyclicprocess = subprocess.Popen(self.__cmd,
> -                                                    stdout=self.__cyclicoutput,
> -                                                    stderr=self.__nullfp,
> -                                                    stdin=self.__nullfp)
> -            self.__started = True
> -        except OSError:
> -            self.__started = False
> -
> +        self.__cyclicprocess = subprocess.Popen(self.__cmd,
> +                                                stdout=self.__cyclicoutput,
> +                                                stderr=self.__nullfp,
> +                                                stdin=self.__nullfp)
> +        self.__started = True
>  
>      def WorkloadAlive(self):
>          if self.__started:
> diff --git a/rteval/modules/measurement/timerlat.py b/rteval/modules/measurement/timerlat.py
> index 92bc070..df42777 100644
> --- a/rteval/modules/measurement/timerlat.py
> +++ b/rteval/modules/measurement/timerlat.py
> @@ -252,14 +252,11 @@ class Timerlat(rtevalModulePrototype):
>  
>          self.__timerlat_out.seek(0)
>          self.__timerlat_err.seek(0)
> -        try:
> -            self.__timerlat_process = subprocess.Popen(self.__cmd,
> -                                                       stdout=self.__timerlat_out,
> -                                                       stderr=self.__timerlat_err,
> -                                                       stdin=None)
> -            self.__started = True
> -        except OSError:
> -            self.__started = False
> +        self.__timerlat_process = subprocess.Popen(self.__cmd,
> +                                                   stdout=self.__timerlat_out,
> +                                                   stderr=self.__timerlat_err,
> +                                                   stdin=None)
> +        self.__started = True
>  
>      def WorkloadAlive(self):
>          if self.__started:
> -- 
> 2.47.0
> 
> 
> 
Signed-off-by: John Kacur <jkacur@xxxxxxxxxx>

Thank you





[Index of Archives]     [RT Stable]     [Kernel Newbies]     [IDE]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux ATA RAID]     [Samba]     [Video 4 Linux]     [Device Mapper]

  Powered by Linux