于 2012/5/23 10:30, Chen Gong 写道: > This idea is inspired from IA64 implementation. It is like NAPI for > network stack. When CMCI is too many to handle, this interrupt can > be disabled and then poll mode will take over the events handle. > When no more events happen in the system, CMC interrupt can be > enabled automatically. > > Signed-off-by: Chen Gong <gong.chen@xxxxxxxxxxxxxxx> --- > arch/x86/kernel/cpu/mcheck/mce.c | 83 > +++++++++++++++++++++++++++++++++++++- 1 file changed, 81 > insertions(+), 2 deletions(-) > > diff --git a/arch/x86/kernel/cpu/mcheck/mce.c > b/arch/x86/kernel/cpu/mcheck/mce.c index d086a09..6334f0d 100644 > --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ > b/arch/x86/kernel/cpu/mcheck/mce.c @@ -92,6 +92,7 @@ static char > *mce_helper_argv[2] = { mce_helper, NULL }; > > static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait); > > +static DEFINE_PER_CPU(struct timer_list, mce_timer); static > DEFINE_PER_CPU(struct mce, mces_seen); static int cpu_missing; > > @@ -100,8 +101,28 @@ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = > { [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL }; > > +#define CMC_POLL_INTERVAL (1 * 30) +#define CMC_STORM 5 +static > DEFINE_PER_CPU(int, cmci_storm_warning); +static > DEFINE_PER_CPU(unsigned long, first_cmci_jiffie); +static > DEFINE_SPINLOCK(cmc_poll_lock); + +/* + * This variable tells > whether we are in cmci-storm-happened mode. + * Start with this in > the wrong state so we won't play w/ timers + * before the system is > ready. + */ +static int cmci_storm_detected = 1; + static > DEFINE_PER_CPU(struct work_struct, mce_work); > > +static void mce_disable_cmci(void *data); +static void > mce_enable_ce(void *all); +static void cmc_disable_keventd(struct > work_struct *dummy); +static void cmc_enable_keventd(struct > work_struct *dummy); + +static DECLARE_WORK(cmc_disable_work, > cmc_disable_keventd); +static DECLARE_WORK(cmc_enable_work, > cmc_enable_keventd); /* * CPU/chipset specific EDAC code can > register a notifier call here to print * MCE errors in a > human-readable form. @@ -582,6 +603,37 @@ void > machine_check_poll(enum mcp_flags flags, mce_banks_t *b) { struct > mce m; int i; + unsigned long flag; + + > spin_lock_irqsave(&cmc_poll_lock, flag); + if (cmci_storm_detected > == 0) { + unsigned long now = jiffies; + int *count = > &__get_cpu_var(cmci_storm_warning); + unsigned long *history = > &__get_cpu_var(first_cmci_jiffie); + + if (time_before_eq(now, > *history + HZ)) + (*count)++; + else { + *count = 0; + > *history = now; + } + + if (*count >= CMC_STORM) { + > cmci_storm_detected = 1; + /* If we're being hit with CMC > interrupts, we won't + * ever execute the schedule_work() below. > Need to + * disable CMC interrupts on this processor now. + > */ + mce_disable_cmci(NULL); + if > (!work_pending(&cmc_disable_work)) + > schedule_work(&cmc_disable_work); + > spin_unlock_irqrestore(&cmc_poll_lock, flag); + > printk(KERN_WARNING "WARNING: Switching to polling "\ + "CMC > handler; error records may be lost\n"); + goto out; + } + } + > spin_unlock_irqrestore(&cmc_poll_lock, flag); > > percpu_inc(mce_poll_count); > > @@ -628,6 +680,7 @@ void machine_check_poll(enum mcp_flags flags, > mce_banks_t *b) mce_wrmsrl(MSR_IA32_MCx_STATUS(i), 0); } > > +out: /* * Don't clear MCG_STATUS here because it's only defined > for * exceptions. @@ -1199,6 +1252,20 @@ static void > mce_process_work(struct work_struct *dummy) memory_failure(pfn, > MCE_VECTOR, 0); } > > +static void cmc_disable_keventd(struct work_struct *dummy) +{ + > struct timer_list *t = __this_cpu_ptr(&mce_timer); + + > on_each_cpu(mce_disable_cmci, NULL, 0); + mod_timer(t, jiffies + > CMC_POLL_INTERVAL * HZ); +} + +static void > cmc_enable_keventd(struct work_struct *dummy) +{ + /* don't > re-initiate timer */ + on_each_cpu(mce_enable_ce, NULL, 0); +} + > #ifdef CONFIG_X86_MCE_INTEL /*** * mce_log_therm_throt_event - Logs > the thermal throttling event to mcelog @@ -1232,12 +1299,12 @@ void > mce_log_therm_throt_event(__u64 status) static int check_interval = > 5 * 60; /* 5 minutes */ > > static DEFINE_PER_CPU(int, mce_next_interval); /* in jiffies */ > -static DEFINE_PER_CPU(struct timer_list, mce_timer); > > static void mce_start_timer(unsigned long data) { struct timer_list > *t = &per_cpu(mce_timer, data); int *n; + unsigned long flags; > > WARN_ON(smp_processor_id() != data); > > @@ -1253,8 +1320,19 @@ static void mce_start_timer(unsigned long > data) n = &__get_cpu_var(mce_next_interval); if (mce_notify_irq()) > *n = max(*n/2, HZ/100); - else + else { *n = min(*n*2, > (int)round_jiffies_relative(check_interval*HZ)); + /* if no CMC > event, switch out of polling mode */ + > spin_lock_irqsave(&cmc_poll_lock, flags); + if > (cmci_storm_detected == 1) { + printk(KERN_WARNING "Returning to > interrupt driven "\ + "CMC handler\n"); + if > (!work_pending(&cmc_enable_work)) + > schedule_work(&cmc_enable_work); + cmci_storm_detected = 0; + } > + spin_unlock_irqrestore(&cmc_poll_lock, flags); + } > > t->expires = jiffies + *n; add_timer_on(t, smp_processor_id()); @@ > -1547,6 +1625,7 @@ void __cpuinit mcheck_cpu_init(struct > cpuinfo_x86 *c) __mcheck_cpu_init_generic(); > __mcheck_cpu_init_vendor(c); __mcheck_cpu_init_timer(); + > cmci_storm_detected = 0; INIT_WORK(&__get_cpu_var(mce_work), > mce_process_work); init_irq_work(&__get_cpu_var(mce_irq_work), > &mce_irq_work_cb); } Oops, I send it to wrong LKML address. I will send this patch again. -- To unsubscribe from this list: send the line "unsubscribe linux-next" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html