On Thu, Feb 13, 2025 at 05:09:18PM -0600, Crystal Wood wrote: > On Mon, 2025-01-27 at 20:45 -0500, John B. Wyatt IV wrote: > > Allows Tuna to control cpu idle-state functionality on the system, > > including querying, enabling, disabling of cpu idle-states to control > > power usage or to test functionality. > > > > This requires cpupower, a utility in the Linux kernel repository and > > the cpupower Python bindings added in Linux 6.12 to control cpu > > idle-states. If cpupower is missing Tuna as a whole will continue to > > function and idle-set functionality will error out. > > > > Signed-off-by: John B. Wyatt IV <jwyatt@xxxxxxxxxx> > > Signed-off-by: John B. Wyatt IV <sageofredondo@xxxxxxxxx> > > --- > > tuna-cmd.py | 33 +++++++- > > tuna/cpupower.py | 202 +++++++++++++++++++++++++++++++++++++++++++++++ > > 2 files changed, 233 insertions(+), 2 deletions(-) > > create mode 100755 tuna/cpupower.py > > > > diff --git a/tuna-cmd.py b/tuna-cmd.py > > index d0323f5..81d0f48 100755 > > --- a/tuna-cmd.py > > +++ b/tuna-cmd.py > > @@ -25,6 +25,7 @@ from tuna import tuna, sysfs, utils > > import logging > > import time > > import shutil > > +import tuna.cpupower as cpw > > > def get_loglevel(level): > > if level.isdigit() and int(level) in range(0,5): > > @@ -115,8 +116,12 @@ def gen_parser(): > > "disable_perf": dict(action='store_true', help="Explicitly disable usage of perf in GUI for process view"), > > "refresh": dict(default=2500, metavar='MSEC', type=int, help="Refresh the GUI every MSEC milliseconds"), > > "priority": dict(default=(None, None), metavar="POLICY:RTPRIO", type=tuna.get_policy_and_rtprio, help="Set thread scheduler tunables: POLICY and RTPRIO"), > > - "background": dict(action='store_true', help="Run command as background task") > > - } > > + "background": dict(action='store_true', help="Run command as background task"), > > + "idle_state_disabled_status": dict(dest='idle_state_disabled_status', metavar='IDLESTATEDISABLEDSTATUS', type=int, help='Print if cpu idle state of the cpus in CPU-LIST is enabled or disabled. If CPU-LIST is not specified, default to all cpus.'), > > + "idle_info": dict(dest='idle_info', action='store_const', const=True, help='Print general idle information on cpus in CPU-LIST. If CPU-LIST is not specified, default to all cpus.'), > > + "disable_idle_state": dict(dest='disable_idle_state', metavar='IDLESTATEINDEX', type=int, help='Disable cpus in CPU-LIST\'s cpu idle (cpu sleep state). If CPU-LIST is not specified, default to all cpus.'), > > + "enable_idle_state": dict(dest='enable_idle_state', metavar='IDLESTATEINDEX', type=int, help='Enable cpus in CPU-LIST\'s cpu idle (cpu sleep state). If CPU-LIST is not specified, default to all cpus.') > > + } > > > > parser = HelpMessageParser(description="tuna - Application Tuning Program") > > > > @@ -147,6 +152,10 @@ def gen_parser(): > > show_irqs = subparser.add_parser('show_irqs', description='Show IRQ list', help='Show IRQ list') > > show_configs = subparser.add_parser('show_configs', description='List preloaded profiles', help='List preloaded profiles') > > > > + idle_set = subparser.add_parser('idle-set', > > + description='Query and set all idle states on a given CPU list. Requires libcpupower to be installed', > > + help='Set all idle states on a given CPU-LIST.') > > idle_state would be a better name (or idle-state, but underscores are > already used elsewhere...), since it can both set and get. Being used elsewhere is why I used it. Nothing else to add. > > It also mostly operates on individual states (-i being the exception), > not all at once. How about just: > > Manage CPU idle state disabling (requires libcpupower) Will do. > > Also, don't forget to update the man page. The man page will be in a future patch. > > > @@ -635,6 +651,19 @@ def main(): > > my_logger.addHandler(add_handler("DEBUG", tofile=False)) > > my_logger.info("Debug option set") > > > > + if args.command == 'idle-set': > > + if not cpw.have_cpupower: > > + print(f"Error: libcpupower bindings are not detected; need {cpw.cpupower_required_kernel} at a minimum.") > > + sys.exit(1) > > + > > + if not args.cpu_list or args.cpu_list == []: > > + args.cpu_list = cpw.Cpupower().get_all_cpu_list() > > + > > + my_cpupower = cpw.Cpupower(args.cpu_list) > > + ret = my_cpupower.idle_state_handler(args) > > + if ret > 0: > > + sys.exit(ret) > > Why not just pass in cpu_list as is, and have cpw understand what > an empty or absent list is? And it looks like it already partially > does check for None... If the user specifically does something > like --cpu '' should we really treat that as equivalent to not > specifing --cpu? I don't see other commands doing this. Will move the logic to the class. I do not understand what you wrote for the second part with checking for --cpu. Would you please rephase? Note I am still learning argparse; if you know of a better way, an example would be welcome. :-) > > Especially if you're going to delegate all other option parsing... > Why is cpulist special? Just do something like: > > elif args.command == 'idle_state' > cpw.idle_state(args) > > > if args.loglevel: > > if not args.debug: > > my_logger = setup_logging("my_logger") > > Why did you put the new command before log handling, rather than with > all the other commands? Will amend this. Looks like I misapplied a commit hunk. > > > diff --git a/tuna/cpupower.py b/tuna/cpupower.py > > new file mode 100755 > > index 0000000..b09dc2f > > --- /dev/null > > +++ b/tuna/cpupower.py > > @@ -0,0 +1,202 @@ > > +# Copyright (C) 2024 John B. Wyatt IV > > +# SPDX-License-Identifier: GPL-2.0-only > > + > > +from typing import List > > +import tuna.utils as utils > > + > > +cpupower_required_kernel = "6.12" > > +have_cpupower = None > > + > > + > > +import raw_pylibcpupower as cpw > > This is a bit confusing since you import this module as cpw > elsewhere... > > Also, I got this even without trying to use the new functionality: > > $ ./tuna-cmd.py > Traceback (most recent call last): > File "/home/crwood/git/tuna/./tuna-cmd.py", line 28, in <module> > import tuna.cpupower as cpw > File "/home/crwood/git/tuna/tuna/cpupower.py", line 11, in <module> > import raw_pylibcpupower as cpw > ModuleNotFoundError: No module named 'raw_pylibcpupower' > This was pointed out to me in a separate email. I have already fixed it for the v2 I will send. > Maybe something like: > > try: > import raw_pylibcpupower as lcpw > lcpw.cpufreq_get_available_frequencies(0) > except: > lcpw = None pylint does not like a module to be dynamically imported this way, but I found a workaround using importlib. pylint's messaging is very valuable so I rather not have to disable it. It gives a warning every time cpw is used otherwise. > > > + You must use have_cpupower variable to determine if the bindings were > > + detected in your code.""" > > Instead of doing to the class/module user what SWIG did to the binding > user, why not just throw an exception if the binding is missing? This > will automatically happen if lcpw is None, though you may want a > friendlier error message in the main entry point. Please see above. > > > + def __init__(self, cpulist=None): > > + if cpulist == None: > > + self.__cpulist = self.get_all_cpu_list() > > + else: > > + self.__cpulist = cpulist > > + > > + @classmethod > > + def get_all_cpu_list(cls): > > + return list(range(cls.get_idle_info()["all_cpus"])) > > Is this really idle-state-specific? Maybe just something like this > in tuna/utils.py: It is in the cpupower.py file, not an idle-state.py file. The only part specific to idle-state is idle-set is the only user. I was thinking of that, but that can be a future patch. I wanted this implementation to be kept simple. > > def get_all_cpu_list(): > return list(range(get_nr_cpus())) > > We shouldn't need to get all the idle state information just to get a > cpu list. I mimicked the way cpupower reports this information. The information is useful to report. I am not sure what your objection is. > > And all these class methods make me wonder why this is a class to begin > with, rather than just module functions. I plan to add future functionality down the road; a class would be appropriate for handling this. > > > + > > + @classmethod > > + def get_idle_info(cls, cpu=0): > > Why is cpu 0 special? cpupower itself for this functionality picks a random cpu thread to report. I choose cpu 0 as a stopgap until I find/upstream better functionality to handle this. I did not feel a random cpu was any better. > > > + idle_states, idle_states_amt = cls.get_idle_states(cpu) > > + idle_states_list = [] > > + for idle_state in range(0, len(idle_states)): > > + idle_states_list.append( > > + { > > + "CPU ID": cpu, > > + "Idle State Name": idle_states[idle_state], > > + "Flags/Description": cpw.cpuidle_state_desc(cpu, idle_state), > > + "Latency": cpw.cpuidle_state_latency(cpu, idle_state), > > + "Usage": cpw.cpuidle_state_usage(cpu, idle_state), > > + "Duration": cpw.cpuidle_state_time(cpu, idle_state) > > + } > > + ) > > + idle_info = { > > + "all_cpus": utils.get_nr_cpus(), > > + "CPUidle-driver": cpw.cpuidle_get_driver(), > > + "CPUidle-governor": cpw.cpuidle_get_governor(), > > + "idle-states-count": idle_states_amt, > > + "available-idle-states": idle_states, > > + "cpu-states": idle_states_list > > + } > > + return idle_info > > This seems overly complicated. Why do you bundle all this stuff up > just to extract it elsewhere? The call to get the data is simpler than > the data structure lookup. I do not understand. There are multiple calls to get the data. As I said above, I was mimicking how cpupower reports this information. I thought it would be useful to report. I wrap this data up into a hashmap that I hope may be useful to export as json for scripting in the future. > > > + > > + @classmethod > > + def print_idle_info(cls, cpu_list=[0]): > > The only thing that instantiating this class does is to store a cpu > list, and here you're passing it into a class method instead... Yes, please see above. > > > + def idle_state_handler(self, args) -> int: > > + if args.idle_state_disabled_status != None: > > + cstate_index = args.idle_state_disabled_status > > + cstate_list, cstate_amt = self.get_idle_states(args.cpu_list[0]) # Assumption, that all cpus have the same idle state > > The API doesn't make that assumption, so why should we? Systems exist > with heterogeneous CPUs... could be a reason to support looking up states > by name, if and when there are systems we care about that actually have > different cpuidle states. Heterogeneous CPU support will come in a future patch once I have upstreamed some more work. Note that the libcpupower API offers a way to get the number of cores of a system (get_cpu_topology()), but the code to do this is commented out (see the missing curly brace?) and returns a 0 for the number of cores currently. https://elixir.bootlin.com/linux/v6.13.3/source/tools/power/cpupower/lib/cpupower.c#L206-L213 > > And you don't need this lookup anyay -- libcpupower already does the > bounds check, and you can get the name inside the loop. I will check on that. > > > + if cstate_index < 0 or cstate_index >= cstate_amt: > > + print(f"Invalid idle state range. Total for this cpu is {cstate_amt}") > > + return 1 > > "this cpu"? Will amend. > > > + cstate_name = cstate_list[cstate_index] > > + ret = self.is_disabled_idle_state(cstate_index) > > + for i, e in enumerate(ret): > > + match e: > > + case 1: > > + print(f"CPU: {args.cpu_list[i]} Idle state \"{cstate_name}\" is disabled.") > > + case 0: > > + print(f"CPU: {args.cpu_list[i]} Idle state \"{cstate_name}\" is enabled.") > > + case -1: > > + print(f"Idlestate not available") > > + case -2: > > + print(f"Disabling is not supported by the kernel") > > + case _: > > + print(f"Not documented: {e}") > > + elif args.idle_info != None: > > + self.print_idle_info(args.cpu_list) > > + return 0 > > + elif args.disable_idle_state != None: > > + cstate_index = args.disable_idle_state > > + cstate_list, cstate_amt = self.get_idle_states(args.cpu_list[0]) # Assumption, that all cpus have the same idle state > > + if cstate_index < 0 or cstate_index >= cstate_amt: > > + print(f"Invalid idle state range. Total for this cpu is {cstate_amt}") > > + return 1 > > + cstate_name = cstate_list[cstate_index] > > + ret = self.disable_idle_state(cstate_index, 1) > > + for i, e in enumerate(ret): > > + match e: > > + case 0: > > + print(f"CPU: {args.cpu_list[i]} Idle state \"{cstate_name}\" is disabled.") > > + case -1: > > + print(f"Idlestate not available") > > + case -2: > > + print(f"Disabling is not supported by the kernel") > > + case -3: > > + print(f"No write access to disable/enable C-states: try using sudo") > > + case _: > > + print(f"Not documented: {e}") > > + elif args.enable_idle_state != None: > > + cstate_index = args.enable_idle_state > > + cstate_list, cstate_amt = self.get_idle_states(args.cpu_list[0]) # Assumption, that all cpus have the same idle state > > + if cstate_index < 0 or cstate_index >= cstate_amt: > > + print(f"Invalid idle state range. Total for this cpu is {cstate_amt}") > > + return 1 > > + cstate_name = cstate_list[cstate_index] > > + ret = self.disable_idle_state(cstate_index, 0) > > + for i, e in enumerate(ret): > > + match e: > > + case 0: > > + print(f"CPU: {args.cpu_list[i]} Idle state \"{cstate_name}\" is enabled.") > > + case -1: > > + print(f"Idlestate not available") > > + case -2: > > + print(f"Disabling is not supported by the kernel") > > + case -3: > > + print(f"No write access to disable/enable C-states: try using sudo") > > + case _: > > + print(f"Not documented: {e}") > > Factor out the error messages so they're not duplicated between > suboperations. Actually, pretty much the whole thing is duplicated between > enable/disable... Will do. > > Do we want to print anything at all on success? It looks like tuna > generally follows the Unix philosophy of not doing so. Good point. That will also make it easier to script. > > And the error messages are a bit vague... imagine running a script and > something deep inside it spits out "Disabling is not supported by the > kernel". Disabling what? > > > + else: > > + print(args) > > + print("idle-set error: you should not get here!") > > "you should not get here" is not a useful error message... jut throw > an exception. Will amend. -- Sincerely, John Wyatt Software Engineer, Core Kernel Red Hat