To reduce the cost of poll, we introduce three sysctl to control the poll time. Signed-off-by: Yang Zhang <yang.zhang.wz@xxxxxxxxx> Signed-off-by: Quan Xu <quan.xu0@xxxxxxxxx> Cc: Jonathan Corbet <corbet@xxxxxxx> Cc: Jeremy Fitzhardinge <jeremy@xxxxxxxx> Cc: Chris Wright <chrisw@xxxxxxxxxxxx> Cc: Alok Kataria <akataria@xxxxxxxxxx> Cc: Rusty Russell <rusty@xxxxxxxxxxxxxxx> Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx> Cc: Ingo Molnar <mingo@xxxxxxxxxx> Cc: "H. Peter Anvin" <hpa@xxxxxxxxx> Cc: x86@xxxxxxxxxx Cc: "Luis R. Rodriguez" <mcgrof@xxxxxxxxxx> Cc: Kees Cook <keescook@xxxxxxxxxxxx> Cc: Mauro Carvalho Chehab <mchehab@xxxxxxxxxx> Cc: Krzysztof Kozlowski <krzk@xxxxxxxxxx> Cc: Josh Poimboeuf <jpoimboe@xxxxxxxxxx> Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> Cc: Petr Mladek <pmladek@xxxxxxxx> Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx> Cc: Jessica Yu <jeyu@xxxxxxxxxx> Cc: Larry Finger <Larry.Finger@xxxxxxxxxxxx> Cc: zijun_hu <zijun_hu@xxxxxxx> Cc: Baoquan He <bhe@xxxxxxxxxx> Cc: Johannes Berg <johannes.berg@xxxxxxxxx> Cc: Ian Abbott <abbotti@xxxxxxxxx> Cc: linux-doc@xxxxxxxxxxxxxxx Cc: linux-kernel@xxxxxxxxxxxxxxx Cc: virtualization@xxxxxxxxxxxxxxxxxxxxxxxxxx Cc: linux-fsdevel@xxxxxxxxxxxxxxx --- Documentation/sysctl/kernel.txt | 25 +++++++++++++++++++++++++ arch/x86/kernel/paravirt.c | 4 ++++ include/linux/kernel.h | 6 ++++++ kernel/sysctl.c | 23 +++++++++++++++++++++++ 4 files changed, 58 insertions(+) diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index bac23c1..67447b8 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt @@ -63,6 +63,9 @@ show up in /proc/sys/kernel: - perf_event_max_stack - perf_event_max_contexts_per_stack - pid_max +- poll_grow [ X86 only ] +- poll_shrink [ X86 only ] +- poll_threshold_ns [ X86 only ] - powersave-nap [ PPC only ] - printk - printk_delay @@ -702,6 +705,28 @@ kernel tries to allocate a number starting from this one. ============================================================== +poll_grow: (X86 only) + +This parameter is multiplied in the grow_poll_ns() to increase the poll time. +By default, the values is 2. + +============================================================== +poll_shrink: (X86 only) + +This parameter is divided in the shrink_poll_ns() to reduce the poll time. +By default, the values is 2. + +============================================================== +poll_threshold_ns: (X86 only) + +This parameter controls the max poll time before entering real idle path. +This parameter is expected to take effect only when running inside a VM. +It would make no sense to turn on it in bare mental. +By default, the values is 0 means don't poll. It is recommended to change +the value to non-zero if running latency-bound workloads inside VM. + +============================================================== + powersave-nap: (PPC only) If set, Linux-PPC will use the 'nap' mode of powersaving, diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index a11b2c2..0b92f8f 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -318,6 +318,10 @@ struct pv_idle_ops pv_idle_ops = { .update = paravirt_nop, }; +unsigned long poll_threshold_ns; +unsigned int poll_shrink = 2; +unsigned int poll_grow = 2; + __visible struct pv_irq_ops pv_irq_ops = { .save_fl = __PV_IS_CALLEE_SAVE(native_save_fl), .restore_fl = __PV_IS_CALLEE_SAVE(native_restore_fl), diff --git a/include/linux/kernel.h b/include/linux/kernel.h index bd6d96c..6cb2820 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -462,6 +462,12 @@ extern __scanf(2, 0) extern bool crash_kexec_post_notifiers; +#ifdef CONFIG_PARAVIRT +extern unsigned long poll_threshold_ns; +extern unsigned int poll_shrink; +extern unsigned int poll_grow; +#endif + /* * panic_cpu is used for synchronizing panic() and crash_kexec() execution. It * holds a CPU number which is executing panic() currently. A value of diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 6648fbb..9b86a8f 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1229,6 +1229,29 @@ static int sysrq_sysctl_handler(struct ctl_table *table, int write, .extra2 = &one, }, #endif +#ifdef CONFIG_PARAVIRT + { + .procname = "halt_poll_threshold", + .data = &poll_threshold_ns, + .maxlen = sizeof(unsigned long), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "halt_poll_grow", + .data = &poll_grow, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "halt_poll_shrink", + .data = &poll_shrink, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +#endif { } }; -- 1.8.3.1