On Mon, 2025-01-27 at 20:45 -0500, John B. Wyatt IV wrote: > Allows Tuna to control cpu idle-state functionality on the system, > including querying, enabling, disabling of cpu idle-states to control > power usage or to test functionality. > > This requires cpupower, a utility in the Linux kernel repository and > the cpupower Python bindings added in Linux 6.12 to control cpu > idle-states. If cpupower is missing Tuna as a whole will continue to > function and idle-set functionality will error out. > > Signed-off-by: John B. Wyatt IV <jwyatt@xxxxxxxxxx> > Signed-off-by: John B. Wyatt IV <sageofredondo@xxxxxxxxx> > --- > tuna-cmd.py | 33 +++++++- > tuna/cpupower.py | 202 +++++++++++++++++++++++++++++++++++++++++++++++ > 2 files changed, 233 insertions(+), 2 deletions(-) > create mode 100755 tuna/cpupower.py > > diff --git a/tuna-cmd.py b/tuna-cmd.py > index d0323f5..81d0f48 100755 > --- a/tuna-cmd.py > +++ b/tuna-cmd.py > @@ -25,6 +25,7 @@ from tuna import tuna, sysfs, utils > import logging > import time > import shutil > +import tuna.cpupower as cpw > def get_loglevel(level): > if level.isdigit() and int(level) in range(0,5): > @@ -115,8 +116,12 @@ def gen_parser(): > "disable_perf": dict(action='store_true', help="Explicitly disable usage of perf in GUI for process view"), > "refresh": dict(default=2500, metavar='MSEC', type=int, help="Refresh the GUI every MSEC milliseconds"), > "priority": dict(default=(None, None), metavar="POLICY:RTPRIO", type=tuna.get_policy_and_rtprio, help="Set thread scheduler tunables: POLICY and RTPRIO"), > - "background": dict(action='store_true', help="Run command as background task") > - } > + "background": dict(action='store_true', help="Run command as background task"), > + "idle_state_disabled_status": dict(dest='idle_state_disabled_status', metavar='IDLESTATEDISABLEDSTATUS', type=int, help='Print if cpu idle state of the cpus in CPU-LIST is enabled or disabled. If CPU-LIST is not specified, default to all cpus.'), > + "idle_info": dict(dest='idle_info', action='store_const', const=True, help='Print general idle information on cpus in CPU-LIST. If CPU-LIST is not specified, default to all cpus.'), > + "disable_idle_state": dict(dest='disable_idle_state', metavar='IDLESTATEINDEX', type=int, help='Disable cpus in CPU-LIST\'s cpu idle (cpu sleep state). If CPU-LIST is not specified, default to all cpus.'), > + "enable_idle_state": dict(dest='enable_idle_state', metavar='IDLESTATEINDEX', type=int, help='Enable cpus in CPU-LIST\'s cpu idle (cpu sleep state). If CPU-LIST is not specified, default to all cpus.') > + } > > parser = HelpMessageParser(description="tuna - Application Tuning Program") > > @@ -147,6 +152,10 @@ def gen_parser(): > show_irqs = subparser.add_parser('show_irqs', description='Show IRQ list', help='Show IRQ list') > show_configs = subparser.add_parser('show_configs', description='List preloaded profiles', help='List preloaded profiles') > > + idle_set = subparser.add_parser('idle-set', > + description='Query and set all idle states on a given CPU list. Requires libcpupower to be installed', > + help='Set all idle states on a given CPU-LIST.') idle_state would be a better name (or idle-state, but underscores are already used elsewhere...), since it can both set and get. It also mostly operates on individual states (-i being the exception), not all at once. How about just: Manage CPU idle state disabling (requires libcpupower) Also, don't forget to update the man page. > @@ -635,6 +651,19 @@ def main(): > my_logger.addHandler(add_handler("DEBUG", tofile=False)) > my_logger.info("Debug option set") > > + if args.command == 'idle-set': > + if not cpw.have_cpupower: > + print(f"Error: libcpupower bindings are not detected; need {cpw.cpupower_required_kernel} at a minimum.") > + sys.exit(1) > + > + if not args.cpu_list or args.cpu_list == []: > + args.cpu_list = cpw.Cpupower().get_all_cpu_list() > + > + my_cpupower = cpw.Cpupower(args.cpu_list) > + ret = my_cpupower.idle_state_handler(args) > + if ret > 0: > + sys.exit(ret) Why not just pass in cpu_list as is, and have cpw understand what an empty or absent list is? And it looks like it already partially does check for None... If the user specifically does something like --cpu '' should we really treat that as equivalent to not specifing --cpu? I don't see other commands doing this. Especially if you're going to delegate all other option parsing... Why is cpulist special? Just do something like: elif args.command == 'idle_state' cpw.idle_state(args) > if args.loglevel: > if not args.debug: > my_logger = setup_logging("my_logger") Why did you put the new command before log handling, rather than with all the other commands? > diff --git a/tuna/cpupower.py b/tuna/cpupower.py > new file mode 100755 > index 0000000..b09dc2f > --- /dev/null > +++ b/tuna/cpupower.py > @@ -0,0 +1,202 @@ > +# Copyright (C) 2024 John B. Wyatt IV > +# SPDX-License-Identifier: GPL-2.0-only > + > +from typing import List > +import tuna.utils as utils > + > +cpupower_required_kernel = "6.12" > +have_cpupower = None > + > + > +import raw_pylibcpupower as cpw This is a bit confusing since you import this module as cpw elsewhere... Also, I got this even without trying to use the new functionality: $ ./tuna-cmd.py Traceback (most recent call last): File "/home/crwood/git/tuna/./tuna-cmd.py", line 28, in <module> import tuna.cpupower as cpw File "/home/crwood/git/tuna/tuna/cpupower.py", line 11, in <module> import raw_pylibcpupower as cpw ModuleNotFoundError: No module named 'raw_pylibcpupower' Maybe something like: try: import raw_pylibcpupower as lcpw lcpw.cpufreq_get_available_frequencies(0) except: lcpw = None > + You must use have_cpupower variable to determine if the bindings were > + detected in your code.""" Instead of doing to the class/module user what SWIG did to the binding user, why not just throw an exception if the binding is missing? This will automatically happen if lcpw is None, though you may want a friendlier error message in the main entry point. > + def __init__(self, cpulist=None): > + if cpulist == None: > + self.__cpulist = self.get_all_cpu_list() > + else: > + self.__cpulist = cpulist > + > + @classmethod > + def get_all_cpu_list(cls): > + return list(range(cls.get_idle_info()["all_cpus"])) Is this really idle-state-specific? Maybe just something like this in tuna/utils.py: def get_all_cpu_list(): return list(range(get_nr_cpus())) We shouldn't need to get all the idle state information just to get a cpu list. And all these class methods make me wonder why this is a class to begin with, rather than just module functions. > + > + @classmethod > + def get_idle_info(cls, cpu=0): Why is cpu 0 special? > + idle_states, idle_states_amt = cls.get_idle_states(cpu) > + idle_states_list = [] > + for idle_state in range(0, len(idle_states)): > + idle_states_list.append( > + { > + "CPU ID": cpu, > + "Idle State Name": idle_states[idle_state], > + "Flags/Description": cpw.cpuidle_state_desc(cpu, idle_state), > + "Latency": cpw.cpuidle_state_latency(cpu, idle_state), > + "Usage": cpw.cpuidle_state_usage(cpu, idle_state), > + "Duration": cpw.cpuidle_state_time(cpu, idle_state) > + } > + ) > + idle_info = { > + "all_cpus": utils.get_nr_cpus(), > + "CPUidle-driver": cpw.cpuidle_get_driver(), > + "CPUidle-governor": cpw.cpuidle_get_governor(), > + "idle-states-count": idle_states_amt, > + "available-idle-states": idle_states, > + "cpu-states": idle_states_list > + } > + return idle_info This seems overly complicated. Why do you bundle all this stuff up just to extract it elsewhere? The call to get the data is simpler than the data structure lookup. > + > + @classmethod > + def print_idle_info(cls, cpu_list=[0]): The only thing that instantiating this class does is to store a cpu list, and here you're passing it into a class method instead... > + def idle_state_handler(self, args) -> int: > + if args.idle_state_disabled_status != None: > + cstate_index = args.idle_state_disabled_status > + cstate_list, cstate_amt = self.get_idle_states(args.cpu_list[0]) # Assumption, that all cpus have the same idle state The API doesn't make that assumption, so why should we? Systems exist with heterogeneous CPUs... could be a reason to support looking up states by name, if and when there are systems we care about that actually have different cpuidle states. And you don't need this lookup anyay -- libcpupower already does the bounds check, and you can get the name inside the loop. > + if cstate_index < 0 or cstate_index >= cstate_amt: > + print(f"Invalid idle state range. Total for this cpu is {cstate_amt}") > + return 1 "this cpu"? > + cstate_name = cstate_list[cstate_index] > + ret = self.is_disabled_idle_state(cstate_index) > + for i, e in enumerate(ret): > + match e: > + case 1: > + print(f"CPU: {args.cpu_list[i]} Idle state \"{cstate_name}\" is disabled.") > + case 0: > + print(f"CPU: {args.cpu_list[i]} Idle state \"{cstate_name}\" is enabled.") > + case -1: > + print(f"Idlestate not available") > + case -2: > + print(f"Disabling is not supported by the kernel") > + case _: > + print(f"Not documented: {e}") > + elif args.idle_info != None: > + self.print_idle_info(args.cpu_list) > + return 0 > + elif args.disable_idle_state != None: > + cstate_index = args.disable_idle_state > + cstate_list, cstate_amt = self.get_idle_states(args.cpu_list[0]) # Assumption, that all cpus have the same idle state > + if cstate_index < 0 or cstate_index >= cstate_amt: > + print(f"Invalid idle state range. Total for this cpu is {cstate_amt}") > + return 1 > + cstate_name = cstate_list[cstate_index] > + ret = self.disable_idle_state(cstate_index, 1) > + for i, e in enumerate(ret): > + match e: > + case 0: > + print(f"CPU: {args.cpu_list[i]} Idle state \"{cstate_name}\" is disabled.") > + case -1: > + print(f"Idlestate not available") > + case -2: > + print(f"Disabling is not supported by the kernel") > + case -3: > + print(f"No write access to disable/enable C-states: try using sudo") > + case _: > + print(f"Not documented: {e}") > + elif args.enable_idle_state != None: > + cstate_index = args.enable_idle_state > + cstate_list, cstate_amt = self.get_idle_states(args.cpu_list[0]) # Assumption, that all cpus have the same idle state > + if cstate_index < 0 or cstate_index >= cstate_amt: > + print(f"Invalid idle state range. Total for this cpu is {cstate_amt}") > + return 1 > + cstate_name = cstate_list[cstate_index] > + ret = self.disable_idle_state(cstate_index, 0) > + for i, e in enumerate(ret): > + match e: > + case 0: > + print(f"CPU: {args.cpu_list[i]} Idle state \"{cstate_name}\" is enabled.") > + case -1: > + print(f"Idlestate not available") > + case -2: > + print(f"Disabling is not supported by the kernel") > + case -3: > + print(f"No write access to disable/enable C-states: try using sudo") > + case _: > + print(f"Not documented: {e}") Factor out the error messages so they're not duplicated between suboperations. Actually, pretty much the whole thing is duplicated between enable/disable... Do we want to print anything at all on success? It looks like tuna generally follows the Unix philosophy of not doing so. And the error messages are a bit vague... imagine running a script and something deep inside it spits out "Disabling is not supported by the kernel". Disabling what? > + else: > + print(args) > + print("idle-set error: you should not get here!") "you should not get here" is not a useful error message... jut throw an exception. -Crystal