Normally, corrected hardware error records will go through the kernel processing and be logged to disk or network finally. But for uncorrected errors, system may go panic directly for better error containment, disk or network is not usable in this half-working system. To avoid losing these valuable hardware error records, the error records are saved into some kind of simple persistent storage such as flash before panic, so that they can be read out after system reboot successfully. Different kind of simple persistent storage implementation mechanisms are provided on different platforms, so an abstract interface for persistent storage is defined. Different implementations of the interface can be registered. Even after successfully reboot, before being erased from the simple persistent storage, the error records should be guaranteed to be saved into disk or network firstly. Peek and clear operations on simple persistent storage is implemented to support this transaction semantics as follow: - Peek an error record from simple persistent storage - Save the error record into disk or network - Sync the disk file or get ACK from network - Clear the error record in simple persistent storage This patch is designed by Andi Kleen and Huang Ying. Signed-off-by: Huang Ying <ying.huang@xxxxxxxxx> Reviewed-by: Andi Kleen <ak@xxxxxxxxxxxxxxx> --- drivers/herror/Makefile | 2 drivers/herror/herr-core.c | 39 ++++++++- drivers/herror/herr-internal.h | 12 ++ drivers/herror/herr-persist.c | 174 +++++++++++++++++++++++++++++++++++++++++ include/linux/Kbuild | 1 include/linux/herror.h | 48 +++++++++++ 6 files changed, 271 insertions(+), 5 deletions(-) create mode 100644 drivers/herror/herr-internal.h create mode 100644 drivers/herror/herr-persist.c --- a/drivers/herror/Makefile +++ b/drivers/herror/Makefile @@ -1 +1 @@ -obj-y += herr-core.o +obj-y += herr-core.o herr-persist.o --- a/drivers/herror/herr-core.c +++ b/drivers/herror/herr-core.c @@ -38,9 +38,9 @@ #include <linux/genalloc.h> #include <linux/herror.h> -#define HERR_NOTIFY_BIT 0 +#include "herr-internal.h" -static unsigned long herr_flags; +unsigned long herr_flags; /* * Record list management and error reporting @@ -413,6 +413,7 @@ static ssize_t herr_mix_read(struct file { int rc; static DEFINE_MUTEX(read_mutex); + u64 record_id; if (*off != 0) return -EINVAL; @@ -420,7 +421,14 @@ static ssize_t herr_mix_read(struct file rc = mutex_lock_interruptible(&read_mutex); if (rc) return rc; + rc = herr_persist_peek_user(&record_id, ubuf, usize); + if (rc > 0) { + herr_persist_clear(record_id); + goto out; + } + rc = herr_rcd_lists_read(ubuf, usize, &read_mutex); +out: mutex_unlock(&read_mutex); return rc; @@ -429,15 +437,40 @@ static ssize_t herr_mix_read(struct file static unsigned int herr_mix_poll(struct file *file, poll_table *wait) { poll_wait(file, &herr_mix_wait, wait); - if (!herr_rcd_lists_is_empty()) + if (!herr_rcd_lists_is_empty() || !herr_persist_read_done()) return POLLIN | POLLRDNORM; return 0; } +static long herr_mix_ioctl(struct file *f, unsigned int cmd, unsigned long arg) +{ + void __user *p = (void __user *)arg; + int rc; + u64 record_id; + struct herr_persist_buffer buf; + + switch (cmd) { + case HERR_PERSIST_PEEK: + rc = copy_from_user(&buf, p, sizeof(buf)); + if (rc) + return -EFAULT; + return herr_persist_peek_user(&record_id, buf.buf, + buf.buf_size); + case HERR_PERSIST_CLEAR: + rc = copy_from_user(&record_id, p, sizeof(record_id)); + if (rc) + return -EFAULT; + return herr_persist_clear(record_id); + default: + return -ENOTTY; + } +} + static const struct file_operations herr_mix_dev_fops = { .owner = THIS_MODULE, .read = herr_mix_read, .poll = herr_mix_poll, + .unlocked_ioctl = herr_mix_ioctl, }; static int __init herr_mix_dev_init(void) --- /dev/null +++ b/drivers/herror/herr-internal.h @@ -0,0 +1,12 @@ +#ifndef HERR_INTERNAL_H +#define HERR_INTERNAL_H + +#define HERR_NOTIFY_BIT 0 + +extern unsigned long herr_flags; + +int herr_persist_read_done(void); +ssize_t herr_persist_peek_user(u64 *record_id, char __user *ercd, + size_t bufsiz); +int herr_persist_clear(u64 record_id); +#endif /* HERR_INTERNAL_H */ --- /dev/null +++ b/drivers/herror/herr-persist.c @@ -0,0 +1,174 @@ +/* + * Hardware error record persistent support + * + * Normally, corrected hardware error records will go through the + * kernel processing and be logged to disk or network finally. But + * for uncorrected errors, system may go panic directly for better + * error containment, disk or network is not usable in this + * half-working system. To avoid losing these valuable hardware error + * records, the error records are saved into some kind of simple + * persistent storage such as flash before panic, so that they can be + * read out after system reboot successfully. + * + * Copyright 2010 Intel Corp. + * Author: Huang Ying <ying.huang@xxxxxxxxx> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation; + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/rculist.h> +#include <linux/mutex.h> + +#include <linux/herror.h> + +#include "herr-internal.h" + +/* + * Simple persistent storage provider list, herr_persists_mutex is + * used for writer side mutual exclusion, RCU is used to implement + * lock-less reader side. + */ +static LIST_HEAD(herr_persists); +static DEFINE_MUTEX(herr_persists_mutex); + +int herr_persist_register(struct herr_persist *persist) +{ + if (!persist->peek_user) + return -EINVAL; + persist->read_done = 0; + if (mutex_lock_interruptible(&herr_persists_mutex)) + return -EINTR; + list_add_rcu(&persist->list, &herr_persists); + mutex_unlock(&herr_persists_mutex); + /* + * There may be hardware error records of previous boot in + * persistent storage, notify the user space error daemon to + * check. + */ + set_bit(HERR_NOTIFY_BIT, &herr_flags); + herr_notify(); + return 0; +} +EXPORT_SYMBOL_GPL(herr_persist_register); + +void herr_persist_unregister(struct herr_persist *persist) +{ + mutex_lock(&herr_persists_mutex); + list_del_rcu(&persist->list); + mutex_unlock(&herr_persists_mutex); + synchronize_rcu(); +} +EXPORT_SYMBOL_GPL(herr_persist_unregister); + +/* Can be used in atomic context including NMI */ +int herr_persist_write(const struct herr_record *ercd) +{ + struct herr_persist *persist; + int rc = -ENODEV; + + rcu_read_lock(); + list_for_each_entry_rcu(persist, &herr_persists, list) { + if (!persist->write) + continue; + rc = persist->write(ercd); + if (!rc) + break; + } + rcu_read_unlock(); + return rc; +} +EXPORT_SYMBOL_GPL(herr_persist_write); + +int herr_persist_read_done(void) +{ + struct herr_persist *persist; + int rc = 1; + + rcu_read_lock(); + list_for_each_entry_rcu(persist, &herr_persists, list) { + if (!persist->read_done) { + rc = 0; + break; + } + } + rcu_read_unlock(); + return rc; +} + +/* Read next error record from persist storage, don't remove it */ +ssize_t herr_persist_peek_user(u64 *record_id, char __user *ercd, + size_t bufsiz) +{ + struct herr_persist *persist; + ssize_t rc = 0; + + if (mutex_lock_interruptible(&herr_persists_mutex)) + return -EINTR; + list_for_each_entry(persist, &herr_persists, list) { + if (persist->read_done) + continue; + rc = persist->peek_user(record_id, ercd, bufsiz); + if (rc > 0) + break; + else if (rc != -EINTR && rc != -EAGAIN && rc != -EINVAL) + persist->read_done = 1; + } + mutex_unlock(&herr_persists_mutex); + return rc; +} + +/* Clear specified error record from persist storage */ +int herr_persist_clear(u64 record_id) +{ + struct herr_persist *persist; + int rc = -ENOENT; + + if (mutex_lock_interruptible(&herr_persists_mutex)) + return -EINTR; + list_for_each_entry(persist, &herr_persists, list) { + if (!persist->clear) + continue; + rc = persist->clear(record_id); + if (!rc) + break; + /* + * Failed to clear, mark as read_done, because we can + * not skip this one + */ + else if (rc != -EINTR && rc != -EAGAIN && rc != -ENOENT) + persist->read_done = 1; + } + mutex_unlock(&herr_persists_mutex); + return rc; +} + +static int herr_persist_record(struct herr_record *ercd, void *data) +{ + int *severity = data; + + if (ercd->severity == *severity) + return herr_persist_write(ercd); + return 0; +} + +void herr_persist_all_records(void) +{ + int severity; + + for (severity = HERR_SEV_FATAL; severity >= HERR_SEV_NONE; severity--) + herr_for_each_record(herr_persist_record, &severity); +} +EXPORT_SYMBOL_GPL(herr_persist_all_records); --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -142,6 +142,7 @@ header-y += hdlc.h header-y += hdlcdrv.h header-y += hdreg.h header-y += herror_record.h +header-y += herror.h header-y += hid.h header-y += hiddev.h header-y += hidraw.h --- a/include/linux/herror.h +++ b/include/linux/herror.h @@ -1,10 +1,22 @@ #ifndef LINUX_HERROR_H #define LINUX_HERROR_H +#include <linux/ioctl.h> +#include <linux/herror_record.h> + +struct herr_persist_buffer { + void __user *buf; + unsigned int buf_size; +}; + +#define HERR_PERSIST_PEEK _IOW('H', 1, struct herr_persist_buffer) +#define HERR_PERSIST_CLEAR _IOW('H', 2, u64) + +#ifdef __KERNEL__ + #include <linux/types.h> #include <linux/list.h> #include <linux/device.h> -#include <linux/herror_record.h> /* * Hardware error reporting @@ -32,4 +44,38 @@ void herr_notify(void); /* Traverse all error records not consumed by user space */ typedef int (*herr_traverse_func_t)(struct herr_record *ercd, void *data); int herr_for_each_record(herr_traverse_func_t func, void *data); + + +/* + * Simple Persistent Storage + */ + +struct herr_persist; +/* Put an error record into simple persistent storage */ +int herr_persist_write(const struct herr_record *ercd); +/* Save all error records not yet consumed in persistent storage */ +void herr_persist_all_records(void); + +/* + * Simple Persistent Storage Provider Management + */ +struct herr_persist { + struct list_head list; + char *name; + unsigned int read_done:1; + /* Write an error record into storage, must be NMI-safe */ + int (*write)(const struct herr_record *ercd); + /* + * Read out an error record from storage to user space, don't + * remove it, the HERR_RCD_PERSIST must be set in record flags + */ + ssize_t (*peek_user)(u64 *record_id, char __user *ubuf, size_t usize); + /* Clear an error record */ + int (*clear)(u64 record_id); +}; + +/* Register (un-register) simple persistent storage provider */ +int herr_persist_register(struct herr_persist *persist); +void herr_persist_unregister(struct herr_persist *persist); +#endif #endif -- To unsubscribe from this list: send the line "unsubscribe linux-acpi" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html