Patch "ipmi: Stop timers before cleaning up the module" has been added to the 4.4-stable tree

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This is a note to let you know that I've just added the patch titled

    ipmi: Stop timers before cleaning up the module

to the 4.4-stable tree which can be found at:
    http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary

The filename of the patch is:
     ipmi-stop-timers-before-cleaning-up-the-module.patch
and it can be found in the queue-4.4 subdirectory.

If you, or anyone else, feels it should not be added to the stable tree,
please let <stable@xxxxxxxxxxxxxxx> know about it.


>From 4f7f5551a760eb0124267be65763008169db7087 Mon Sep 17 00:00:00 2001
From: Masamitsu Yamazaki <m-yamazaki@xxxxxxxxxxxxx>
Date: Wed, 15 Nov 2017 07:33:14 +0000
Subject: ipmi: Stop timers before cleaning up the module

From: Masamitsu Yamazaki <m-yamazaki@xxxxxxxxxxxxx>

commit 4f7f5551a760eb0124267be65763008169db7087 upstream.

System may crash after unloading ipmi_si.ko module
because a timer may remain and fire after the module cleaned up resources.

cleanup_one_si() contains the following processing.

        /*
         * Make sure that interrupts, the timer and the thread are
         * stopped and will not run again.
         */
        if (to_clean->irq_cleanup)
                to_clean->irq_cleanup(to_clean);
        wait_for_timer_and_thread(to_clean);

        /*
         * Timeouts are stopped, now make sure the interrupts are off
         * in the BMC.  Note that timers and CPU interrupts are off,
         * so no need for locks.
         */
        while (to_clean->curr_msg || (to_clean->si_state != SI_NORMAL)) {
                poll(to_clean);
                schedule_timeout_uninterruptible(1);
        }

si_state changes as following in the while loop calling poll(to_clean).

  SI_GETTING_MESSAGES
    => SI_CHECKING_ENABLES
     => SI_SETTING_ENABLES
      => SI_GETTING_EVENTS
       => SI_NORMAL

As written in the code comments above,
timers are expected to stop before the polling loop and not to run again.
But the timer is set again in the following process
when si_state becomes SI_SETTING_ENABLES.

  => poll
     => smi_event_handler
       => handle_transaction_done
          // smi_info->si_state == SI_SETTING_ENABLES
         => start_getting_events
           => start_new_msg
            => smi_mod_timer
              => mod_timer

As a result, before the timer set in start_new_msg() expires,
the polling loop may see si_state becoming SI_NORMAL
and the module clean-up finishes.

For example, hard LOCKUP and panic occurred as following.
smi_timeout was called after smi_event_handler,
kcs_event and hangs at port_inb()
trying to access I/O port after release.

    [exception RIP: port_inb+19]
    RIP: ffffffffc0473053  RSP: ffff88069fdc3d80  RFLAGS: 00000006
    RAX: ffff8806800f8e00  RBX: ffff880682bd9400  RCX: 0000000000000000
    RDX: 0000000000000ca3  RSI: 0000000000000ca3  RDI: ffff8806800f8e40
    RBP: ffff88069fdc3d80   R8: ffffffff81d86dfc   R9: ffffffff81e36426
    R10: 00000000000509f0  R11: 0000000000100000  R12: 0000000000]:000000
    R13: 0000000000000000  R14: 0000000000000246  R15: ffff8806800f8e00
    ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0000
 --- <NMI exception stack> ---

To fix the problem I defined a flag, timer_can_start,
as member of struct smi_info.
The flag is enabled immediately after initializing the timer
and disabled immediately before waiting for timer deletion.

Fixes: 0cfec916e86d ("ipmi: Start the timer and thread on internal msgs")
Signed-off-by: Yamazaki Masamitsu <m-yamazaki@xxxxxxxxxxxxx>
[Adjusted for recent changes in the driver.]
[Some fairly major changes went into the IPMI driver in 4.15, so this
 required a backport as the code had changed and moved to a different
 file.  The 4.14 version of this patch moved some code under an
 if statement and there was an API change causing it to not apply to
 4.4-4.6.]
Signed-off-by: Corey Minyard <cminyard@xxxxxxxxxx>
Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>
---
This is for kernel version 4.4-4.6 only.  Code and API changes required
backporting.  There is another version for 4.7-4.13 and another for
4.14 coming, too.  Bug was introduced in 4.4.

 drivers/char/ipmi/ipmi_si_intf.c |   44 ++++++++++++++++++++-------------------
 1 file changed, 23 insertions(+), 21 deletions(-)

--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -239,6 +239,9 @@ struct smi_info {
 	/* The timer for this si. */
 	struct timer_list   si_timer;
 
+	/* This flag is set, if the timer can be set */
+	bool		    timer_can_start;
+
 	/* This flag is set, if the timer is running (timer_pending() isn't enough) */
 	bool		    timer_running;
 
@@ -414,6 +417,8 @@ static enum si_sm_result start_next_msg(
 
 static void smi_mod_timer(struct smi_info *smi_info, unsigned long new_val)
 {
+	if (!smi_info->timer_can_start)
+		return;
 	smi_info->last_timeout_jiffies = jiffies;
 	mod_timer(&smi_info->si_timer, new_val);
 	smi_info->timer_running = true;
@@ -433,21 +438,18 @@ static void start_new_msg(struct smi_inf
 	smi_info->handlers->start_transaction(smi_info->si_sm, msg, size);
 }
 
-static void start_check_enables(struct smi_info *smi_info, bool start_timer)
+static void start_check_enables(struct smi_info *smi_info)
 {
 	unsigned char msg[2];
 
 	msg[0] = (IPMI_NETFN_APP_REQUEST << 2);
 	msg[1] = IPMI_GET_BMC_GLOBAL_ENABLES_CMD;
 
-	if (start_timer)
-		start_new_msg(smi_info, msg, 2);
-	else
-		smi_info->handlers->start_transaction(smi_info->si_sm, msg, 2);
+	start_new_msg(smi_info, msg, 2);
 	smi_info->si_state = SI_CHECKING_ENABLES;
 }
 
-static void start_clear_flags(struct smi_info *smi_info, bool start_timer)
+static void start_clear_flags(struct smi_info *smi_info)
 {
 	unsigned char msg[3];
 
@@ -456,10 +458,7 @@ static void start_clear_flags(struct smi
 	msg[1] = IPMI_CLEAR_MSG_FLAGS_CMD;
 	msg[2] = WDT_PRE_TIMEOUT_INT;
 
-	if (start_timer)
-		start_new_msg(smi_info, msg, 3);
-	else
-		smi_info->handlers->start_transaction(smi_info->si_sm, msg, 3);
+	start_new_msg(smi_info, msg, 3);
 	smi_info->si_state = SI_CLEARING_FLAGS;
 }
 
@@ -494,11 +493,11 @@ static void start_getting_events(struct
  * Note that we cannot just use disable_irq(), since the interrupt may
  * be shared.
  */
-static inline bool disable_si_irq(struct smi_info *smi_info, bool start_timer)
+static inline bool disable_si_irq(struct smi_info *smi_info)
 {
 	if ((smi_info->irq) && (!smi_info->interrupt_disabled)) {
 		smi_info->interrupt_disabled = true;
-		start_check_enables(smi_info, start_timer);
+		start_check_enables(smi_info);
 		return true;
 	}
 	return false;
@@ -508,7 +507,7 @@ static inline bool enable_si_irq(struct
 {
 	if ((smi_info->irq) && (smi_info->interrupt_disabled)) {
 		smi_info->interrupt_disabled = false;
-		start_check_enables(smi_info, true);
+		start_check_enables(smi_info);
 		return true;
 	}
 	return false;
@@ -526,7 +525,7 @@ static struct ipmi_smi_msg *alloc_msg_ha
 
 	msg = ipmi_alloc_smi_msg();
 	if (!msg) {
-		if (!disable_si_irq(smi_info, true))
+		if (!disable_si_irq(smi_info))
 			smi_info->si_state = SI_NORMAL;
 	} else if (enable_si_irq(smi_info)) {
 		ipmi_free_smi_msg(msg);
@@ -542,7 +541,7 @@ static void handle_flags(struct smi_info
 		/* Watchdog pre-timeout */
 		smi_inc_stat(smi_info, watchdog_pretimeouts);
 
-		start_clear_flags(smi_info, true);
+		start_clear_flags(smi_info);
 		smi_info->msg_flags &= ~WDT_PRE_TIMEOUT_INT;
 		if (smi_info->intf)
 			ipmi_smi_watchdog_pretimeout(smi_info->intf);
@@ -925,7 +924,7 @@ static enum si_sm_result smi_event_handl
 		 * disable and messages disabled.
 		 */
 		if (smi_info->supports_event_msg_buff || smi_info->irq) {
-			start_check_enables(smi_info, true);
+			start_check_enables(smi_info);
 		} else {
 			smi_info->curr_msg = alloc_msg_handle_irq(smi_info);
 			if (!smi_info->curr_msg)
@@ -1232,6 +1231,7 @@ static int smi_start_processing(void
 
 	/* Set up the timer that drives the interface. */
 	setup_timer(&new_smi->si_timer, smi_timeout, (long)new_smi);
+	new_smi->timer_can_start = true;
 	smi_mod_timer(new_smi, jiffies + SI_TIMEOUT_JIFFIES);
 
 	/* Try to claim any interrupts. */
@@ -3434,10 +3434,12 @@ static void check_for_broken_irqs(struct
 	check_set_rcv_irq(smi_info);
 }
 
-static inline void wait_for_timer_and_thread(struct smi_info *smi_info)
+static inline void stop_timer_and_thread(struct smi_info *smi_info)
 {
 	if (smi_info->thread != NULL)
 		kthread_stop(smi_info->thread);
+
+	smi_info->timer_can_start = false;
 	if (smi_info->timer_running)
 		del_timer_sync(&smi_info->si_timer);
 }
@@ -3635,7 +3637,7 @@ static int try_smi_init(struct smi_info
 	 * Start clearing the flags before we enable interrupts or the
 	 * timer to avoid racing with the timer.
 	 */
-	start_clear_flags(new_smi, false);
+	start_clear_flags(new_smi);
 
 	/*
 	 * IRQ is defined to be set when non-zero.  req_events will
@@ -3713,7 +3715,7 @@ static int try_smi_init(struct smi_info
 	return 0;
 
  out_err_stop_timer:
-	wait_for_timer_and_thread(new_smi);
+	stop_timer_and_thread(new_smi);
 
  out_err:
 	new_smi->interrupt_disabled = true;
@@ -3919,7 +3921,7 @@ static void cleanup_one_si(struct smi_in
 	 */
 	if (to_clean->irq_cleanup)
 		to_clean->irq_cleanup(to_clean);
-	wait_for_timer_and_thread(to_clean);
+	stop_timer_and_thread(to_clean);
 
 	/*
 	 * Timeouts are stopped, now make sure the interrupts are off
@@ -3930,7 +3932,7 @@ static void cleanup_one_si(struct smi_in
 		poll(to_clean);
 		schedule_timeout_uninterruptible(1);
 	}
-	disable_si_irq(to_clean, false);
+	disable_si_irq(to_clean);
 	while (to_clean->curr_msg || (to_clean->si_state != SI_NORMAL)) {
 		poll(to_clean);
 		schedule_timeout_uninterruptible(1);


Patches currently in stable-queue which might be from m-yamazaki@xxxxxxxxxxxxx are

queue-4.4/ipmi-stop-timers-before-cleaning-up-the-module.patch



[Index of Archives]     [Linux Kernel]     [Kernel Development Newbies]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite Hiking]     [Linux Kernel]     [Linux SCSI]