Re: [PATCH] x86: auto poll/interrupt mode switch for CMC to stop CMC storm

Chen Gong <gong.chen@xxxxxxxxxxxxxxx> · Wed, 23 May 2012 10:30:46 +0800



于 2012/5/23 10:30, Chen Gong 写道:
> This idea is inspired from IA64 implementation. It is like NAPI for
> network stack. When CMCI is too many to handle, this interrupt can
> be disabled and then poll mode will take over the events handle.
> When no more events happen in the system, CMC interrupt can be
> enabled automatically.
> 
> Signed-off-by: Chen Gong <gong.chen@xxxxxxxxxxxxxxx> --- 
> arch/x86/kernel/cpu/mcheck/mce.c |   83
> +++++++++++++++++++++++++++++++++++++- 1 file changed, 81
> insertions(+), 2 deletions(-)
> 
> diff --git a/arch/x86/kernel/cpu/mcheck/mce.c
> b/arch/x86/kernel/cpu/mcheck/mce.c index d086a09..6334f0d 100644 
> --- a/arch/x86/kernel/cpu/mcheck/mce.c +++
> b/arch/x86/kernel/cpu/mcheck/mce.c @@ -92,6 +92,7 @@ static char
> *mce_helper_argv[2] = { mce_helper, NULL };
> 
> static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait);
> 
> +static DEFINE_PER_CPU(struct timer_list, mce_timer); static
> DEFINE_PER_CPU(struct mce, mces_seen); static int			cpu_missing;
> 
> @@ -100,8 +101,28 @@ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) =
> { [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL };
> 
> +#define CMC_POLL_INTERVAL	(1 * 30) +#define CMC_STORM		5 +static
> DEFINE_PER_CPU(int, cmci_storm_warning); +static
> DEFINE_PER_CPU(unsigned long, first_cmci_jiffie); +static
> DEFINE_SPINLOCK(cmc_poll_lock); + +/* + * This variable tells
> whether we are in cmci-storm-happened mode. + * Start with this in
> the wrong state so we won't play w/ timers + * before the system is
> ready. + */ +static int cmci_storm_detected = 1; + static
> DEFINE_PER_CPU(struct work_struct, mce_work);
> 
> +static void mce_disable_cmci(void *data); +static void
> mce_enable_ce(void *all); +static void cmc_disable_keventd(struct
> work_struct *dummy); +static void cmc_enable_keventd(struct
> work_struct *dummy); + +static DECLARE_WORK(cmc_disable_work,
> cmc_disable_keventd); +static DECLARE_WORK(cmc_enable_work,
> cmc_enable_keventd); /* * CPU/chipset specific EDAC code can
> register a notifier call here to print * MCE errors in a
> human-readable form. @@ -582,6 +603,37 @@ void
> machine_check_poll(enum mcp_flags flags, mce_banks_t *b) { struct
> mce m; int i; +	unsigned long flag; + +
> spin_lock_irqsave(&cmc_poll_lock, flag); +	if (cmci_storm_detected
> == 0) { +		unsigned long now = jiffies; +		int *count =
> &__get_cpu_var(cmci_storm_warning); +		unsigned long *history =
> &__get_cpu_var(first_cmci_jiffie); + +		if (time_before_eq(now,
> *history + HZ)) +			(*count)++; +		else { +			*count = 0; +
> *history = now; +		} + +		if (*count >= CMC_STORM) { +
> cmci_storm_detected = 1; +			/* If we're being hit with CMC
> interrupts, we won't +			 * ever execute the schedule_work() below.
> Need to +			 * disable CMC interrupts on this processor now. +
> */ +			mce_disable_cmci(NULL); +			if
> (!work_pending(&cmc_disable_work)) +
> schedule_work(&cmc_disable_work); +
> spin_unlock_irqrestore(&cmc_poll_lock, flag); +
> printk(KERN_WARNING "WARNING: Switching to polling "\ +				"CMC
> handler; error records may be lost\n"); +			goto out; +		} +	} +
> spin_unlock_irqrestore(&cmc_poll_lock, flag);
> 
> percpu_inc(mce_poll_count);
> 
> @@ -628,6 +680,7 @@ void machine_check_poll(enum mcp_flags flags,
> mce_banks_t *b) mce_wrmsrl(MSR_IA32_MCx_STATUS(i), 0); }
> 
> +out: /* * Don't clear MCG_STATUS here because it's only defined
> for * exceptions. @@ -1199,6 +1252,20 @@ static void
> mce_process_work(struct work_struct *dummy) memory_failure(pfn,
> MCE_VECTOR, 0); }
> 
> +static void cmc_disable_keventd(struct work_struct *dummy) +{ +
> struct timer_list *t = __this_cpu_ptr(&mce_timer); + +
> on_each_cpu(mce_disable_cmci, NULL, 0); +	mod_timer(t, jiffies +
> CMC_POLL_INTERVAL * HZ); +} + +static void
> cmc_enable_keventd(struct work_struct *dummy) +{ +	/* don't
> re-initiate timer */ +	on_each_cpu(mce_enable_ce, NULL, 0); +} + 
> #ifdef CONFIG_X86_MCE_INTEL /*** * mce_log_therm_throt_event - Logs
> the thermal throttling event to mcelog @@ -1232,12 +1299,12 @@ void
> mce_log_therm_throt_event(__u64 status) static int check_interval =
> 5 * 60; /* 5 minutes */
> 
> static DEFINE_PER_CPU(int, mce_next_interval); /* in jiffies */ 
> -static DEFINE_PER_CPU(struct timer_list, mce_timer);
> 
> static void mce_start_timer(unsigned long data) { struct timer_list
> *t = &per_cpu(mce_timer, data); int *n; +	unsigned long flags;
> 
> WARN_ON(smp_processor_id() != data);
> 
> @@ -1253,8 +1320,19 @@ static void mce_start_timer(unsigned long
> data) n = &__get_cpu_var(mce_next_interval); if (mce_notify_irq()) 
> *n = max(*n/2, HZ/100); -	else +	else { *n = min(*n*2,
> (int)round_jiffies_relative(check_interval*HZ)); +		/* if no CMC
> event, switch out of polling mode */ +
> spin_lock_irqsave(&cmc_poll_lock, flags); +		if
> (cmci_storm_detected == 1) { +			printk(KERN_WARNING "Returning to
> interrupt driven "\ +					"CMC handler\n"); +			if
> (!work_pending(&cmc_enable_work)) +
> schedule_work(&cmc_enable_work); +			cmci_storm_detected = 0; +		} 
> +		spin_unlock_irqrestore(&cmc_poll_lock, flags); +	}
> 
> t->expires = jiffies + *n; add_timer_on(t, smp_processor_id()); @@
> -1547,6 +1625,7 @@ void __cpuinit mcheck_cpu_init(struct
> cpuinfo_x86 *c) __mcheck_cpu_init_generic(); 
> __mcheck_cpu_init_vendor(c); __mcheck_cpu_init_timer(); +
> cmci_storm_detected = 0; INIT_WORK(&__get_cpu_var(mce_work),
> mce_process_work); init_irq_work(&__get_cpu_var(mce_irq_work),
> &mce_irq_work_cb); }

Oops, I send it to wrong LKML address. I will send this patch again.
--
To unsubscribe from this list: send the line "unsubscribe linux-next" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html