Hi, On Tue, Jul 21, 2020 at 5:10 AM Sumit Garg <sumit.garg@xxxxxxxxxx> wrote: > > In a future patch we will add support to the serial core to make it > possible to trigger a magic sysrq from an NMI context. Prepare for this > by marking some sysrq actions as NMI safe. Safe actions will be allowed > to run from NMI context whilst that cannot run from an NMI will be queued > as irq_work for later processing. > > A particular sysrq handler is only marked as NMI safe in case the handler > isn't contending for any synchronization primitives as in NMI context > they are expected to cause deadlocks. Note that the debug sysrq do not > contend for any synchronization primitives. It does call kgdb_breakpoint() > to provoke a trap but that trap handler should be NMI safe on > architectures that implement an NMI. > > Signed-off-by: Sumit Garg <sumit.garg@xxxxxxxxxx> > --- > drivers/tty/sysrq.c | 33 ++++++++++++++++++++++++++++++++- > include/linux/sysrq.h | 1 + > kernel/debug/debug_core.c | 1 + > 3 files changed, 34 insertions(+), 1 deletion(-) > > diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c > index 7c95afa9..8017e33 100644 > --- a/drivers/tty/sysrq.c > +++ b/drivers/tty/sysrq.c > @@ -50,6 +50,8 @@ > #include <linux/syscalls.h> > #include <linux/of.h> > #include <linux/rcupdate.h> > +#include <linux/irq_work.h> > +#include <linux/kfifo.h> > > #include <asm/ptrace.h> > #include <asm/irq_regs.h> > @@ -111,6 +113,7 @@ static const struct sysrq_key_op sysrq_loglevel_op = { > .help_msg = "loglevel(0-9)", > .action_msg = "Changing Loglevel", > .enable_mask = SYSRQ_ENABLE_LOG, > + .nmi_safe = true, > }; > > #ifdef CONFIG_VT > @@ -157,6 +160,7 @@ static const struct sysrq_key_op sysrq_crash_op = { > .help_msg = "crash(c)", > .action_msg = "Trigger a crash", > .enable_mask = SYSRQ_ENABLE_DUMP, > + .nmi_safe = true, > }; > > static void sysrq_handle_reboot(int key) > @@ -170,6 +174,7 @@ static const struct sysrq_key_op sysrq_reboot_op = { > .help_msg = "reboot(b)", > .action_msg = "Resetting", > .enable_mask = SYSRQ_ENABLE_BOOT, > + .nmi_safe = true, > }; > > const struct sysrq_key_op *__sysrq_reboot_op = &sysrq_reboot_op; > @@ -217,6 +222,7 @@ static const struct sysrq_key_op sysrq_showlocks_op = { > .handler = sysrq_handle_showlocks, > .help_msg = "show-all-locks(d)", > .action_msg = "Show Locks Held", > + .nmi_safe = true, > }; > #else > #define sysrq_showlocks_op (*(const struct sysrq_key_op *)NULL) > @@ -289,6 +295,7 @@ static const struct sysrq_key_op sysrq_showregs_op = { > .help_msg = "show-registers(p)", > .action_msg = "Show Regs", > .enable_mask = SYSRQ_ENABLE_DUMP, > + .nmi_safe = true, > }; > > static void sysrq_handle_showstate(int key) > @@ -326,6 +333,7 @@ static const struct sysrq_key_op sysrq_ftrace_dump_op = { > .help_msg = "dump-ftrace-buffer(z)", > .action_msg = "Dump ftrace buffer", > .enable_mask = SYSRQ_ENABLE_DUMP, > + .nmi_safe = true, > }; > #else > #define sysrq_ftrace_dump_op (*(const struct sysrq_key_op *)NULL) > @@ -538,6 +546,23 @@ static void __sysrq_put_key_op(int key, const struct sysrq_key_op *op_p) > sysrq_key_table[i] = op_p; > } > > +#define SYSRQ_NMI_FIFO_SIZE 64 > +static DEFINE_KFIFO(sysrq_nmi_fifo, int, SYSRQ_NMI_FIFO_SIZE); A 64-entry FIFO seems excessive. Quite honestly even a FIFO seems a bit excessive and it feels like if two sysrqs were received in super quick succession that it would be OK to just process the first one. I guess if it simplifies the processing to have a FIFO then it shouldn't hurt, but no need for 64 entries. > +static void sysrq_do_nmi_work(struct irq_work *work) > +{ > + const struct sysrq_key_op *op_p; > + int key; > + > + while (kfifo_out(&sysrq_nmi_fifo, &key, 1)) { > + op_p = __sysrq_get_key_op(key); > + if (op_p) > + op_p->handler(key); > + } Do you need to manage "suppress_printk" in this function? Do you need to call rcu_sysrq_start() and rcu_read_lock()? If so, how do you prevent racing between the mucking we're doing with these things and the mucking that the NMI does with them? > +} > + > +static DEFINE_IRQ_WORK(sysrq_nmi_work, sysrq_do_nmi_work); > + > void __handle_sysrq(int key, bool check_mask) > { > const struct sysrq_key_op *op_p; > @@ -568,7 +593,13 @@ void __handle_sysrq(int key, bool check_mask) > if (!check_mask || sysrq_on_mask(op_p->enable_mask)) { > pr_info("%s\n", op_p->action_msg); > console_loglevel = orig_log_level; > - op_p->handler(key); > + > + if (in_nmi() && !op_p->nmi_safe) { > + kfifo_in(&sysrq_nmi_fifo, &key, 1); Rather than kfifo_in() and kfifo_out(), I think you can use kfifo_put() and kfifo_get(). As I understand it those just get/put one element which is what you want. > + irq_work_queue(&sysrq_nmi_work); Wishful thinking, but (as far as I can tell) irq_work_queue() only queues work on the CPU running the NMI. I don't have lots of NMI experience, but any chance there is a variant that will queue work on any CPU? Then sysrq handlers that aren't NMI aware will be more likely to work. > + } else { > + op_p->handler(key); > + } > } else { > pr_info("This sysrq operation is disabled.\n"); > console_loglevel = orig_log_level; > diff --git a/include/linux/sysrq.h b/include/linux/sysrq.h > index 3a582ec..630b5b9 100644 > --- a/include/linux/sysrq.h > +++ b/include/linux/sysrq.h > @@ -34,6 +34,7 @@ struct sysrq_key_op { > const char * const help_msg; > const char * const action_msg; > const int enable_mask; > + const bool nmi_safe; > }; > > #ifdef CONFIG_MAGIC_SYSRQ > diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c > index 9e59347..2b51173 100644 > --- a/kernel/debug/debug_core.c > +++ b/kernel/debug/debug_core.c > @@ -943,6 +943,7 @@ static const struct sysrq_key_op sysrq_dbg_op = { > .handler = sysrq_handle_dbg, > .help_msg = "debug(g)", > .action_msg = "DEBUG", > + .nmi_safe = true, > }; > #endif > > -- > 2.7.4 >