blkoops is a better wrapper for pstore/blk, which provides efficient configuration mothod. It divides all configurations of pstore/blk into 2 parts, configurations for user and configurations for driver. Configurations for user detemine how pstore/blk work, such as dump_oops and dmesg_size. They can be set by Kconfig and module parameters. Configurations for driver are all about block/non-block device, such as total_size of device and read/write operations. They should be provided by device drivers, calling blkoops_register_device() for non-block device and blkoops_register_blkdev() for block device. If device driver support for panic records, @panic_write must be valid. If panic occurs and pstore/blk does not recover yet, the first zone of dmesg will be used. Besides, Block device driver has no need to verify which partition is used and provides generic read/write operations. Because blkoops has done it. It also means that if users do not care panic records but records for oops/console/pmsg/ftrace, block device driver should do nothing. Signed-off-by: WeiXiong Liao <liaoweixiong@xxxxxxxxxxxxxxxxx> --- MAINTAINERS | 2 +- fs/pstore/Kconfig | 61 +++++++ fs/pstore/Makefile | 2 + fs/pstore/blkoops.c | 417 ++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/blkoops.h | 58 +++++++ 5 files changed, 539 insertions(+), 1 deletion(-) create mode 100644 fs/pstore/blkoops.c create mode 100644 include/linux/blkoops.h diff --git a/MAINTAINERS b/MAINTAINERS index cc0a4a8ae06a..e4ba97130560 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13381,7 +13381,7 @@ F: drivers/firmware/efi/efi-pstore.c F: drivers/acpi/apei/erst.c F: Documentation/admin-guide/ramoops.rst F: Documentation/devicetree/bindings/reserved-memory/ramoops.txt -K: \b(pstore|ramoops) +K: \b(pstore|ramoops|blkoops) PTP HARDWARE CLOCK SUPPORT M: Richard Cochran <richardcochran@xxxxxxxxx> diff --git a/fs/pstore/Kconfig b/fs/pstore/Kconfig index 536fde9e13e8..cd15f9322acd 100644 --- a/fs/pstore/Kconfig +++ b/fs/pstore/Kconfig @@ -163,3 +163,64 @@ config PSTORE_BLK where it can be read back at some later point. If unsure, say N. + +config PSTORE_BLKOOPS + tristate "pstore block with oops logger" + depends on PSTORE_BLK + help + This is a wrapper for pstore/blk. + + NOTE that, both kconfig and module parameters can configure blkoops, + but module parameters have priority over kconfig. + + If unsure, say N. + +config PSTORE_BLKOOPS_DMESG_SIZE + int "dmesg size in kbytes for blkoops" + depends on PSTORE_BLKOOPS + default 64 + help + This just sets size of dmesg (dmesg_size) for pstore/blk. The value + must be a multiple of 4096. + + NOTE that, both kconfig and module parameters can configure blkoops, + but module parameters have priority over kconfig. + +config PSTORE_BLKOOPS_BLKDEV + string "block device for blkoops" + depends on PSTORE_BLKOOPS + default "" + help + Which block device should be used for pstore/blk. + + It accept the following variants: + 1) <hex_major><hex_minor> device number in hexadecimal represents + itself no leading 0x, for example b302. + 2) /dev/<disk_name> represents the device number of disk + 3) /dev/<disk_name><decimal> represents the device number + of partition - device number of disk plus the partition number + 4) /dev/<disk_name>p<decimal> - same as the above, this form is + used when disk name of partitioned disk ends with a digit. + 5) PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF representing the + unique id of a partition if the partition table provides it. + The UUID may be either an EFI/GPT UUID, or refer to an MSDOS + partition using the format SSSSSSSS-PP, where SSSSSSSS is a zero- + filled hex representation of the 32-bit "NT disk signature", and PP + is a zero-filled hex representation of the 1-based partition number. + 6) PARTUUID=<UUID>/PARTNROFF=<int> to select a partition in relation + to a partition with a known unique id. + 7) <major>:<minor> major and minor number of the device separated by + a colon. + + NOTE that, both kconfig and module parameters can configure blkoops, + but module parameters have priority over kconfig. + +config PSTORE_BLKOOPS_DUMP_OOPS + bool "dump oops" + depends on PSTORE_BLKOOPS + default y + help + Whether blkoops dumps oops or not. + + NOTE that, both kconfig and module parameters can configure blkoops, + but module parameters have priority over kconfig. diff --git a/fs/pstore/Makefile b/fs/pstore/Makefile index 0ee2fc8d1bfb..24b3d488d2f0 100644 --- a/fs/pstore/Makefile +++ b/fs/pstore/Makefile @@ -15,3 +15,5 @@ obj-$(CONFIG_PSTORE_RAM) += ramoops.o obj-$(CONFIG_PSTORE_BLK) += pstore_blk.o pstore_blk-y += blkzone.o + +obj-$(CONFIG_PSTORE_BLKOOPS) += blkoops.o diff --git a/fs/pstore/blkoops.c b/fs/pstore/blkoops.c new file mode 100644 index 000000000000..69904fdeab6b --- /dev/null +++ b/fs/pstore/blkoops.c @@ -0,0 +1,417 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * + * blkoops.c: Block device Oops logger + * + * Copyright (C) 2019 WeiXiong Liao <liaoweixiong@xxxxxxxxxxxxxxxxxx> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#define pr_fmt(fmt) "blkoops : " fmt + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/string.h> +#include <linux/of.h> +#include <linux/of_address.h> +#include <linux/platform_device.h> +#include <linux/blkoops.h> +#include <linux/mount.h> +#include <linux/uio.h> + +static long dmesg_size = -1; +module_param(dmesg_size, long, 0400); +MODULE_PARM_DESC(dmesg_size, "demsg size in kbytes"); + +static int dump_oops = -1; +module_param(dump_oops, int, 0400); +MODULE_PARM_DESC(total_size, "whether dump oops"); + +/** + * The block device to use. Most of the time, it is a partition of block + * device. It's fine to ignore it if you are not block device and register + * to blkoops by blkoops_register_device(). In this case, @blkdev is + * useless and @read, @write and @total_size must be supplied. + * + * @blkdev accepts the following variants: + * 1) <hex_major><hex_minor> device number in hexadecimal represents itself + * no leading 0x, for example b302. + * 2) /dev/<disk_name> represents the device number of disk + * 3) /dev/<disk_name><decimal> represents the device number + * of partition - device number of disk plus the partition number + * 4) /dev/<disk_name>p<decimal> - same as the above, that form is + * used when disk name of partitioned disk ends on a digit. + * 5) PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF representing the + * unique id of a partition if the partition table provides it. + * The UUID may be either an EFI/GPT UUID, or refer to an MSDOS + * partition using the format SSSSSSSS-PP, where SSSSSSSS is a zero- + * filled hex representation of the 32-bit "NT disk signature", and PP + * is a zero-filled hex representation of the 1-based partition number. + * 6) PARTUUID=<UUID>/PARTNROFF=<int> to select a partition in relation to + * a partition with a known unique id. + * 7) <major>:<minor> major and minor number of the device separated by + * a colon. + */ +static char blkdev[80]; +module_param_string(blkdev, blkdev, 80, 0400); +MODULE_PARM_DESC(blkdev, "the block device for general read/write"); + +static DEFINE_MUTEX(blkz_lock); +static struct block_device *blkoops_bdev; +static struct blkz_info *bzinfo; +static blkoops_blk_panic_write_op blkdev_panic_write; + +#ifdef CONFIG_PSTORE_BLKOOPS_DMESG_SIZE +#define DEFAULT_DMESG_SIZE CONFIG_PSTORE_BLKOOPS_DMESG_SIZE +#else +#define DEFAULT_DMESG_SIZE 0 +#endif + +#ifdef CONFIG_PSTORE_BLKOOPS_DUMP_OOPS +#define DEFAULT_DUMP_OOPS CONFIG_PSTORE_BLKOOPS_DUMP_OOPS +#else +#define DEFAULT_DUMP_OOPS 1 +#endif + +#ifdef CONFIG_PSTORE_BLKOOPS_BLKDEV +#define DEFAULT_BLKDEV CONFIG_PSTORE_BLKOOPS_BLKDEV +#else +#define DEFAULT_BLKDEV "" +#endif + +/** + * register device to blkoops + * + * Drivers, not only block drivers but also non-block drivers can call this + * function to register to blkoops. It will pack for blkzone and pstore. + */ +int blkoops_register_device(struct blkoops_device *bo_dev) +{ + int ret; + + if (!bo_dev || !bo_dev->total_size || !bo_dev->read || !bo_dev->write) + return -EINVAL; + + mutex_lock(&blkz_lock); + + /* someone already registered before */ + if (bzinfo) { + mutex_unlock(&blkz_lock); + return -EBUSY; + } + bzinfo = kzalloc(sizeof(struct blkz_info), GFP_KERNEL); + if (!bzinfo) { + mutex_unlock(&blkz_lock); + return -ENOMEM; + } + +#define verify_size(name, defsize, alignsize) { \ + long _##name_ = (name); \ + if (_##name_ < 0) \ + _##name_ = (defsize); \ + _##name_ = _##name_ <= 0 ? 0 : (_##name_ * 1024); \ + if (_##name_ & (alignsize - 1)) { \ + pr_info(#name " must align to %d\n", \ + (alignsize)); \ + _##name_ = ALIGN(name, alignsize); \ + } \ + name = _##name_ / 1024; \ + bzinfo->name = _##name_; \ + } + + verify_size(dmesg_size, DEFAULT_DMESG_SIZE, 4096); +#undef verify_size + dump_oops = !!(dump_oops < 0 ? DEFAULT_DUMP_OOPS : dump_oops); + + bzinfo->total_size = bo_dev->total_size; + bzinfo->dump_oops = dump_oops; + bzinfo->read = bo_dev->read; + bzinfo->write = bo_dev->write; + bzinfo->panic_write = bo_dev->panic_write; + bzinfo->name = "blkoops"; + bzinfo->owner = THIS_MODULE; + + ret = blkz_register(bzinfo); + if (ret) { + kfree(bzinfo); + bzinfo = NULL; + } + mutex_unlock(&blkz_lock); + return ret; +} +EXPORT_SYMBOL_GPL(blkoops_register_device); + +void blkoops_unregister_device(struct blkoops_device *bo_dev) +{ + mutex_lock(&blkz_lock); + if (bzinfo && bzinfo->read == bo_dev->read) { + blkz_unregister(bzinfo); + kfree(bzinfo); + bzinfo = NULL; + } + mutex_unlock(&blkz_lock); +} +EXPORT_SYMBOL_GPL(blkoops_unregister_device); + +/** + * get block_device of @blkdev + * @holder: exclusive holder identifier + * + * On success, @blkoops_bdev will save the block_device and the returned + * block_device has reference count of one. + */ +static struct block_device *blkoops_get_bdev(void *holder) +{ + struct block_device *bdev = ERR_PTR(-ENODEV); + fmode_t mode = FMODE_READ | FMODE_WRITE; + + if (!blkdev[0] && strlen(DEFAULT_BLKDEV)) + strncpy(blkdev, DEFAULT_BLKDEV, 80); + if (!blkdev[0]) + return ERR_PTR(-ENODEV); + + mutex_lock(&blkz_lock); + if (bzinfo) + goto out; + if (holder) + mode |= FMODE_EXCL; + bdev = blkdev_get_by_path(blkdev, mode, holder); + if (IS_ERR(bdev)) { + dev_t devt; + + devt = name_to_dev_t(blkdev); + if (devt == 0) { + bdev = ERR_PTR(-ENODEV); + goto out; + } + bdev = blkdev_get_by_dev(devt, mode, holder); + } +out: + mutex_unlock(&blkz_lock); + return bdev; +} + +static void blkoops_put_bdev(struct block_device *bdev, void *holder) +{ + fmode_t mode = FMODE_READ | FMODE_WRITE; + + if (!bdev) + return; + + mutex_lock(&blkz_lock); + if (holder) + mode |= FMODE_EXCL; + blkdev_put(bdev, mode); + mutex_unlock(&blkz_lock); +} + +static ssize_t blkoops_generic_blk_read(char *buf, size_t bytes, loff_t pos) +{ + ssize_t ret; + struct block_device *bdev = blkoops_bdev; + struct file filp; + mm_segment_t ofs; + struct kiocb kiocb; + struct iov_iter iter; + struct iovec iov = { + .iov_base = (void __user *)buf, + .iov_len = bytes + }; + + if (!bdev) + return -ENODEV; + + memset(&filp, 0, sizeof(struct file)); + filp.f_mapping = bdev->bd_inode->i_mapping; + filp.f_flags = O_DSYNC | __O_SYNC | O_NOATIME; + filp.f_inode = bdev->bd_inode; + + init_sync_kiocb(&kiocb, &filp); + kiocb.ki_pos = pos; + iov_iter_init(&iter, READ, &iov, 1, bytes); + + ofs = get_fs(); + set_fs(KERNEL_DS); + ret = generic_file_read_iter(&kiocb, &iter); + set_fs(ofs); + return ret; +} + +static ssize_t blkoops_generic_blk_write(const char *buf, size_t bytes, + loff_t pos) +{ + struct block_device *bdev = blkoops_bdev; + struct iov_iter iter; + struct kiocb kiocb; + struct file filp; + mm_segment_t ofs; + ssize_t ret; + struct iovec iov = { + .iov_base = (void __user *)buf, + .iov_len = bytes + }; + + if (!bdev) + return -ENODEV; + + /* Console/Ftrace recorder may handle buffer until flush dirty zones */ + if (in_interrupt() || irqs_disabled()) + return -EBUSY; + + memset(&filp, 0, sizeof(struct file)); + filp.f_mapping = bdev->bd_inode->i_mapping; + filp.f_flags = O_DSYNC | __O_SYNC | O_NOATIME; + filp.f_inode = bdev->bd_inode; + + init_sync_kiocb(&kiocb, &filp); + kiocb.ki_pos = pos; + iov_iter_init(&iter, WRITE, &iov, 1, bytes); + + ofs = get_fs(); + set_fs(KERNEL_DS); + + inode_lock(bdev->bd_inode); + ret = generic_write_checks(&kiocb, &iter); + if (ret > 0) + ret = generic_perform_write(&filp, &iter, pos); + inode_unlock(bdev->bd_inode); + + if (likely(ret > 0)) { + const struct file_operations f_op = {.fsync = blkdev_fsync}; + + filp.f_op = &f_op; + kiocb.ki_pos += ret; + ret = generic_write_sync(&kiocb, ret); + } + set_fs(ofs); + return ret; +} + +static inline unsigned long blkoops_bdev_size(struct block_device *bdev) +{ + return (unsigned long)part_nr_sects_read(bdev->bd_part) << SECTOR_SHIFT; +} + +static ssize_t blkoops_blk_panic_write(const char *buf, size_t size, + loff_t off) +{ + int ret; + + if (!blkdev_panic_write) + return -EOPNOTSUPP; + + /* size and off must align to SECTOR_SIZE for block device */ + ret = blkdev_panic_write(buf, off >> SECTOR_SHIFT, + size >> SECTOR_SHIFT); + return ret ? -EIO : size; +} + +/** + * register block device to blkoops + * @major: the major device number of registering device + * @panic_write: the write interface for panic case. + * + * It is ONLY used for block device to register to blkoops. In this case, + * the module parameter @blkdev must be valid. Generic read/write interfaces + * will be used. + * + * Block driver has no need to verify which partition is used. Block driver + * should only tell me what major number is, so blkoops can get the matching + * driver for @blkdev. + * + * If block driver support for panic records, @panic_write must be valid. If + * panic occurs but pstore/blk does not recover yet, the first zone of dmesg + * will be used. + */ +int blkoops_register_blkdev(unsigned int major, + blkoops_blk_panic_write_op panic_write) +{ + struct block_device *bdev; + struct blkoops_device bo_dev = {0}; + int ret = -ENODEV; + void *holder = blkdev; + + bdev = blkoops_get_bdev(holder); + if (IS_ERR(bdev)) + return PTR_ERR(bdev); + + blkoops_bdev = bdev; + blkdev_panic_write = panic_write; + + /* only allow driver matching the @blkdev */ + if (!bdev->bd_dev || MAJOR(bdev->bd_dev) != major) + goto err_put_bdev; + + bo_dev.total_size = blkoops_bdev_size(bdev); + if (bo_dev.total_size == 0) + goto err_put_bdev; + bo_dev.panic_write = panic_write ? blkoops_blk_panic_write : NULL; + bo_dev.read = blkoops_generic_blk_read; + bo_dev.write = blkoops_generic_blk_write; + + ret = blkoops_register_device(&bo_dev); + if (ret) + goto err_put_bdev; + return 0; + +err_put_bdev: + blkdev_panic_write = NULL; + blkoops_bdev = NULL; + blkoops_put_bdev(bdev, holder); + return ret; +} +EXPORT_SYMBOL_GPL(blkoops_register_blkdev); + +void blkoops_unregister_blkdev(unsigned int major) +{ + struct blkoops_device bo_dev = {.read = blkoops_generic_blk_read}; + void *holder = blkdev; + + if (blkoops_bdev && MAJOR(blkoops_bdev->bd_dev) == major) { + blkoops_unregister_device(&bo_dev); + blkoops_put_bdev(blkoops_bdev, holder); + blkdev_panic_write = NULL; + blkoops_bdev = NULL; + } +} +EXPORT_SYMBOL_GPL(blkoops_unregister_blkdev); + +/** + * get information of @blkdev + * @devt: the block device num of @blkdev + * @nr_sectors: the sector count of @blkdev + * @start_sect: the start sector of @blkdev + * + * Block driver needs the follow information for @panic_write. + */ +int blkoops_blkdev_info(dev_t *devt, sector_t *nr_sects, sector_t *start_sect) +{ + struct block_device *bdev; + + bdev = blkoops_get_bdev(NULL); + if (IS_ERR(bdev)) + return PTR_ERR(bdev); + + if (devt) + *devt = bdev->bd_dev; + if (nr_sects) + *nr_sects = part_nr_sects_read(bdev->bd_part); + if (start_sect) + *start_sect = get_start_sect(bdev); + + blkoops_put_bdev(bdev, NULL); + return 0; +} +EXPORT_SYMBOL_GPL(blkoops_blkdev_info); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("WeiXiong Liao <liaoweixiong@xxxxxxxxxxxxxxxxx>"); +MODULE_DESCRIPTION("Wrapper for Pstore BLK with Oops logger"); diff --git a/include/linux/blkoops.h b/include/linux/blkoops.h new file mode 100644 index 000000000000..fe63739309aa --- /dev/null +++ b/include/linux/blkoops.h @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __BLKOOPS_H_ +#define __BLKOOPS_H_ + +#include <linux/types.h> +#include <linux/blkdev.h> +#include <linux/pstore_blk.h> + +/** + * struct blkoops_device - backend blkoops driver structure. + * + * This structure is ONLY used for non-block device by + * blkoops_register_device(). If block device, you are strongly recommended + * to use blkoops_register_blkdev(). + * + * @total_size: + * The total size in bytes pstore/blk can use. It must be greater than + * 4096 and be multiple of 4096. + * @read, @write: + * The general (not panic) read/write operation. + * + * Both of the @size and @offset parameters on this interface are + * the relative size of the space provided, not the whole disk/flash. + * + * On success, the number of bytes read should be returned. + * On error, negative number should be returned. + * @panic_write: + * The write operation only used for panic. + * + * Both of the @size and @offset parameters on this interface are + * the relative size of the space provided, not the whole disk/flash. + * + * On success, the number of bytes read should be returned. + * On error, negative number should be returned. + */ +struct blkoops_device { + unsigned long total_size; + blkz_read_op read; + blkz_write_op write; + blkz_write_op panic_write; +}; + +/* + * Panic write for block device who should write alignmemt to SECTOR_SIZE. + * On success, zero should be returned. Others mean error. + */ +typedef int (*blkoops_blk_panic_write_op)(const char *buf, sector_t start_sect, + sector_t sects); + +int blkoops_register_device(struct blkoops_device *bo_dev); +void blkoops_unregister_device(struct blkoops_device *bo_dev); +int blkoops_register_blkdev(unsigned int major, + blkoops_blk_panic_write_op panic_write); +void blkoops_unregister_blkdev(unsigned int major); +int blkoops_blkdev_info(dev_t *devt, sector_t *nr_sects, sector_t *start_sect); + +#endif -- 1.9.1