[PATCH 1/7] [RFC PATCH 1/7] osdump: Add one new os minidump module

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: qiruipeng <qiruipeng@xxxxxxxxxxx>

Osdump is a new crash dumping solution aiming at specific embedded
devices within automotive or Industrial.
 - limited memory.
 - reboot as soon as possible when system fault.

In order to reduce dump file size and speed up dump process, it has the
following features:
 - no userspace memory, just designed for solving os issues.
 - no const data, such as text segment
 - dump core os data only.
   - bss, data segments which alloc static.
   - dynamic data
     - slub data for little size data.
     - some large size data.
 - compress dump data to reduce dump file size.

consist of large massive of uncontinuous data,so use binary data format
directly.

reasemble a standard elf format file with dump file and vmlinux, and
then parse it with crash tool.

Signed-off-by: qiruipeng <qiruipeng@xxxxxxxxxxx>
---
 arch/arm64/Kconfig       | 169 +++++++++++
 drivers/of/platform.c    |   1 +
 include/linux/osdump.h   |   7 +
 kernel/Makefile          |   1 +
 kernel/osdump/Makefile   |   1 +
 kernel/osdump/compr.c    | 252 ++++++++++++++++
 kernel/osdump/core.c     | 608 +++++++++++++++++++++++++++++++++++++++
 kernel/osdump/extern.h   |  13 +
 kernel/osdump/inode.c    |  77 +++++
 kernel/osdump/internal.h |  95 ++++++
 10 files changed, 1224 insertions(+)
 create mode 100644 include/linux/osdump.h
 create mode 100644 kernel/osdump/Makefile
 create mode 100644 kernel/osdump/compr.c
 create mode 100644 kernel/osdump/core.c
 create mode 100644 kernel/osdump/extern.h
 create mode 100644 kernel/osdump/inode.c
 create mode 100644 kernel/osdump/internal.h

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 7b071a00425d..6e5e96b5738e 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -258,6 +258,175 @@ config ARM64
 	help
 	  ARM 64-bit (AArch64) Linux support.
 
+config OS_MINIDUMP
+	bool "Enable os minidump module"
+	default n
+	depends on !CRASH_CORE && !SLUB_DEBUG && !SLUB_TINY
+	help
+	  Trigger os minidump when panic. It only dump parts of os core
+	  datas/segments in raw binary format to reduce its size and
+	  speed up dump process. Reasemble a standard elf format file
+	  with dump file and vmlinux, and then parse it with crash tool.
+
+	  For more details see Documentation/admin-guide/osdump.rst
+
+	  If unsure, say N.
+
+config OS_MINIDUMP_DEFLATE_COMPRESS
+	tristate "DEFLATE (ZLIB) compression"
+	depends on OS_MINIDUMP
+	select CRYPTO_DEFLATE
+	help
+	  This option enables DEFLATE (also known as ZLIB) compression
+	  algorithm support for os minidump. Enabling this option will allow
+	  mini dumps to be compressed using DEFLATE, which may reduce the size of
+	  the dump files.
+
+	  If unsure, say N.
+
+config OS_MINIDUMP_LZO_COMPRESS
+	tristate "LZO compression"
+	default y
+	depends on OS_MINIDUMP
+	select CRYPTO_LZO
+	help
+	  This option enables LZO compression algorithm support for os minidump.
+	  Enabling this option will allow mini dumps to be compressed using LZO,
+	  which may reduce the size of the dump files.
+
+	  If unsure, say Y.
+
+config OS_MINIDUMP_LZ4_COMPRESS
+	tristate "LZ4 compression"
+	depends on OS_MINIDUMP
+	select CRYPTO_LZ4
+	help
+	  This option enables LZ4 compression algorithm support for os minidump.
+	  Enabling this option will allow mini dumps to be compressed using LZ4,
+	  which may reduce the size of the dump files.
+
+	  If unsure, say N.
+
+config OS_MINIDUMP_LZ4HC_COMPRESS
+	tristate "LZ4HC compression"
+	depends on OS_MINIDUMP
+	select CRYPTO_LZ4HC
+	help
+	  This option enables LZ4HC (high compression) mode algorithm.
+	  Enabling this option will allow mini dumps to be compressed using LZ4HC,
+	  which may reduce the size of the dump files.
+
+	  If unsure, say N.
+
+config OS_MINIDUMP_842_COMPRESS
+	bool "842 compression"
+	depends on OS_MINIDUMP
+	select CRYPTO_842
+	help
+	  This option enables 842 compression algorithm support for os minidump.
+	  Enabling this option will allow mini dumps to be compressed using 842,
+	  which may reduce the size of the dump files.
+
+	  If unsure, say N.
+
+config OS_MINIDUMP_ZSTD_COMPRESS
+	bool "zstd compression"
+	depends on OS_MINIDUMP
+	select CRYPTO_ZSTD
+	help
+	  This option enables zstd compression algorithm support for os minidump.
+	  Enabling this option will allow mini dumps to be compressed using ZSTD,
+	  which may reduce the size of the dump files.
+
+	  If unsure, say N.
+
+config OS_MINIDUMP_COMPRESS
+	def_bool y
+	depends on OS_MINIDUMP
+	depends on OS_MINIDUMP_DEFLATE_COMPRESS || OS_MINIDUMP_LZO_COMPRESS ||    \
+		   OS_MINIDUMP_LZ4_COMPRESS || OS_MINIDUMP_LZ4HC_COMPRESS ||      \
+		   OS_MINIDUMP_842_COMPRESS || OS_MINIDUMP_ZSTD_COMPRESS
+
+choice
+	prompt "Default osdump.compression algorithm"
+	depends on OS_MINIDUMP_COMPRESS
+	help
+	  This option chooses the default active compression algorithm.
+	  This can be changed at boot with "osdump.compress=..." on
+	  the kernel command line.
+
+	  Currently, osdump.has support for 6 compression algorithms:
+	  deflate, lzo, lz4, lz4hc, 842 and zstd.
+
+	  The default compression algorithm is lzo.
+
+	config OS_MINIDUMP_DEFLATE_COMPRESS_DEFAULT
+		bool "deflate" if OS_MINIDUMP_DEFLATE_COMPRESS
+		help
+		  This option chooses deflate as default compression algorithm,
+		  which reduce the size of the dump files greately.
+
+		  This can be changed at boot with "osdump.compress=..." on
+		  the kernel command line.
+
+	config OS_MINIDUMP_LZO_COMPRESS_DEFAULT
+		bool "lzo" if OS_MINIDUMP_LZO_COMPRESS
+		help
+		  This option chooses lzo as default compression algorithm,
+		  which reduce the size of the dump files greately.
+
+		  This can be changed at boot with "osdump.compress=..." on
+		  the kernel command line.
+
+	config OS_MINIDUMP_LZ4_COMPRESS_DEFAULT
+		bool "lz4" if OS_MINIDUMP_LZ4_COMPRESS
+		help
+		  This option chooses lz4 as default compression algorithm,
+		  which reduce the size of the dump files greately.
+
+		  This can be changed at boot with "osdump.compress=..." on
+		  the kernel command line.
+
+	config OS_MINIDUMP_LZ4HC_COMPRESS_DEFAULT
+		bool "lz4hc" if OS_MINIDUMP_LZ4HC_COMPRESS
+		help
+		  This option chooses lz4hc as default compression algorithm,
+		  which reduce the size of the dump files greately.
+
+		  This can be changed at boot with "osdump.compress=..." on
+		  the kernel command line.
+
+	config OS_MINIDUMP_842_COMPRESS_DEFAULT
+		bool "842" if OS_MINIDUMP_842_COMPRESS
+		help
+		  This option chooses 842 as default compression algorithm,
+		  which reduce the size of the dump files greately.
+
+		  This can be changed at boot with "osdump.compress=..." on
+		  the kernel command line.
+
+	config OS_MINIDUMP_ZSTD_COMPRESS_DEFAULT
+		bool "zstd" if OS_MINIDUMP_ZSTD_COMPRESS
+		help
+		  This option chooses zstd as default compression algorithm,
+		  which reduce the size of the dump files greately.
+
+		  This can be changed at boot with "osdump.compress=..." on
+		  the kernel command line.
+
+endchoice
+
+config OS_MINIDUMP_COMPRESS_DEFAULT
+	string
+	depends on OS_MINIDUMP_COMPRESS
+	default "deflate" if OS_MINIDUMP_DEFLATE_COMPRESS_DEFAULT
+	default "lzo" if OS_MINIDUMP_LZO_COMPRESS_DEFAULT
+	default "lz4" if OS_MINIDUMP_LZ4_COMPRESS_DEFAULT
+	default "lz4hc" if OS_MINIDUMP_LZ4HC_COMPRESS_DEFAULT
+	default "842" if OS_MINIDUMP_842_COMPRESS_DEFAULT
+	default "zstd" if OS_MINIDUMP_ZSTD_COMPRESS_DEFAULT
+
+
 config CLANG_SUPPORTS_DYNAMIC_FTRACE_WITH_ARGS
 	def_bool CC_IS_CLANG
 	# https://github.com/ClangBuiltLinux/linux/issues/1507
diff --git a/drivers/of/platform.c b/drivers/of/platform.c
index 126d265aa7d8..6622292633f7 100644
--- a/drivers/of/platform.c
+++ b/drivers/of/platform.c
@@ -538,6 +538,7 @@ static const struct of_device_id reserved_mem_matches[] = {
 	{ .compatible = "qcom,cmd-db" },
 	{ .compatible = "qcom,smem" },
 	{ .compatible = "ramoops" },
+	{ .compatible = "osdump" },
 	{ .compatible = "nvmem-rmem" },
 	{ .compatible = "google,open-dice" },
 	{}
diff --git a/include/linux/osdump.h b/include/linux/osdump.h
new file mode 100644
index 000000000000..600d0d4901eb
--- /dev/null
+++ b/include/linux/osdump.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_OS_MINIDUMP_H
+#define _LINUX_OS_MINIDUMP_H
+
+extern int osdump(void);
+
+#endif
diff --git a/kernel/Makefile b/kernel/Makefile
index 3947122d618b..dc2b4ba5c953 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -116,6 +116,7 @@ obj-$(CONFIG_HAVE_STATIC_CALL_INLINE) += static_call_inline.o
 obj-$(CONFIG_CFI_CLANG) += cfi.o
 
 obj-$(CONFIG_PERF_EVENTS) += events/
+obj-$(CONFIG_OS_MINIDUMP) += osdump/
 
 obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o
 obj-$(CONFIG_PADATA) += padata.o
diff --git a/kernel/osdump/Makefile b/kernel/osdump/Makefile
new file mode 100644
index 000000000000..3f9cd6282653
--- /dev/null
+++ b/kernel/osdump/Makefile
@@ -0,0 +1 @@
+obj-y += core.o inode.o compr.o
diff --git a/kernel/osdump/compr.c b/kernel/osdump/compr.c
new file mode 100644
index 000000000000..08aa6b72177c
--- /dev/null
+++ b/kernel/osdump/compr.c
@@ -0,0 +1,252 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * OS Minidump - compression driver interface parts.
+ *
+ */
+
+#if IS_ENABLED(CONFIG_OS_MINIDUMP_LZO_COMPRESS)
+#include <linux/types.h>
+#include <linux/lzo.h>
+#endif
+#if IS_ENABLED(CONFIG_OS_MINIDUMP_LZ4_COMPRESS) || IS_ENABLED(CONFIG_OS_MINIDUMP_LZ4HC_COMPRESS)
+#include <linux/lz4.h>
+#endif
+#if IS_ENABLED(CONFIG_OS_MINIDUMP_ZSTD_COMPRESS)
+#include <linux/zstd.h>
+#endif
+#include <linux/crypto.h>
+#include <linux/string.h>
+
+#include <linux/module.h>
+#include "internal.h"
+
+static char *compress = CONFIG_OS_MINIDUMP_COMPRESS_DEFAULT;
+
+module_param(compress, charp, 0444);
+MODULE_PARM_DESC(compress, "compression to use");
+
+/* Compression parameters */
+static struct crypto_comp *tfm;
+
+struct osdump_zbackend {
+	int (*zbufsize)(size_t size);
+	const char *name;
+};
+
+static void *big_oops_buf;
+size_t big_oops_buf_sz;
+EXPORT_SYMBOL_GPL(big_oops_buf_sz);
+
+#if IS_ENABLED(CONFIG_OS_MINIDUMP_DEFLATE_COMPRESS)
+static int zbufsize_deflate(size_t size)
+{
+	size_t cmpr;
+
+	switch (size) {
+	/* buffer range for efivars */
+	case 1000 ... 2000:
+		cmpr = 56;
+		break;
+	case 2001 ... 3000:
+		cmpr = 54;
+		break;
+	case 3001 ... 3999:
+		cmpr = 52;
+		break;
+		/* buffer range for nvram, erst */
+	case 4000 ... 10000:
+		cmpr = 45;
+		break;
+	default:
+		cmpr = 60;
+		break;
+	}
+
+	return (size * 100) / cmpr;
+}
+#endif
+
+#if IS_ENABLED(CONFIG_OS_MINIDUMP_LZO_COMPRESS)
+static int zbufsize_lzo(size_t size)
+{
+	return lzo1x_worst_compress(size);
+}
+#endif
+
+#if IS_ENABLED(CONFIG_OS_MINIDUMP_LZ4_COMPRESS) || IS_ENABLED(CONFIG_OS_MINIDUMP_LZ4HC_COMPRESS)
+static int zbufsize_lz4(size_t size)
+{
+	return LZ4_compressBound(size);
+}
+#endif
+
+#if IS_ENABLED(CONFIG_OS_MINIDUMP_842_COMPRESS)
+static int zbufsize_842(size_t size)
+{
+	return size;
+}
+#endif
+
+#if IS_ENABLED(CONFIG_OS_MINIDUMP_ZSTD_COMPRESS)
+static int zbufsize_zstd(size_t size)
+{
+	return ZSTD_compressBound(size);
+}
+#endif
+
+static const struct osdump_zbackend *zbackend __ro_after_init;
+
+static const struct osdump_zbackend zbackends[] = {
+#if IS_ENABLED(CONFIG_OS_MINIDUMP_DEFLATE_COMPRESS)
+	{
+		.zbufsize       = zbufsize_deflate,
+		.name           = "deflate",
+	},
+#endif
+#if IS_ENABLED(CONFIG_OS_MINIDUMP_LZO_COMPRESS)
+	{
+		.zbufsize       = zbufsize_lzo,
+		.name           = "lzo",
+	},
+#endif
+#if IS_ENABLED(CONFIG_OS_MINIDUMP_LZ4_COMPRESS)
+	{
+		.zbufsize       = zbufsize_lz4,
+		.name           = "lz4",
+	},
+#endif
+#if IS_ENABLED(CONFIG_OS_MINIDUMP_LZ4HC_COMPRESS)
+	{
+		.zbufsize       = zbufsize_lz4,
+		.name           = "lz4hc",
+	},
+#endif
+#if IS_ENABLED(CONFIG_OS_MINIDUMP_842_COMPRESS)
+	{
+		.zbufsize       = zbufsize_842,
+		.name           = "842",
+	},
+#endif
+#if IS_ENABLED(CONFIG_OS_MINIDUMP_ZSTD_COMPRESS)
+	{
+		.zbufsize       = zbufsize_zstd,
+		.name           = "zstd",
+	},
+#endif
+	{ }
+};
+
+int osdump_compress(const void *in, void *out, unsigned int size, int *left)
+{
+	int ret;
+	unsigned int outlen = size;
+
+	if (!IS_ENABLED(CONFIG_OS_MINIDUMP_COMPRESS))
+		return -EINVAL;
+
+	ret = crypto_comp_compress(tfm, in, size, big_oops_buf, &outlen);
+	if (ret) {
+		pr_err(DUMP_BANNER "crypto_comp_compress failed, ret = %d!\n", ret);
+		return ret;
+	}
+
+	if (outlen <= *left) {
+		memcpy(out, big_oops_buf, outlen);
+		*left -= outlen;
+	} else
+		*left = 0;
+
+	return outlen;
+}
+
+static void allocate_buf_for_compression(void)
+{
+	struct crypto_comp *ctx;
+	int size;
+	char *buf;
+
+	/* Skip if not built-in or compression backend not selected yet. */
+	if (!IS_ENABLED(CONFIG_OS_MINIDUMP_COMPRESS) || !zbackend)
+		return;
+
+	/* Skip if compression init already done. */
+	if (tfm)
+		return;
+
+	if (!crypto_has_comp(zbackend->name, 0, 0)) {
+		pr_err(DUMP_BANNER "Unknown compression: %s\n", zbackend->name);
+		return;
+	}
+
+	size = zbackend->zbufsize(CRYPTO_SIZE);
+	if (size <= 0) {
+		pr_err(DUMP_BANNER "Invalid compression size for %s: %d\n",
+				zbackend->name, size);
+		return;
+	}
+	size = round_down(size, PAGE_SIZE);
+
+	buf = kmalloc(size, GFP_KERNEL);
+	if (!buf)
+		return;
+
+	ctx = crypto_alloc_comp(zbackend->name, 0, 0);
+	if (IS_ERR_OR_NULL(ctx)) {
+		kfree(buf);
+		pr_err(DUMP_BANNER "crypto_alloc_comp('%s') failed: %ld\n", zbackend->name,
+				PTR_ERR(ctx));
+		return;
+	}
+
+	/* A non-NULL big_oops_buf indicates compression is available. */
+	tfm = ctx;
+	big_oops_buf_sz = size;
+	big_oops_buf = buf;
+
+	pr_info(DUMP_BANNER "Using os minidump compression: %s\n", zbackend->name);
+}
+
+static void free_buf_for_compression(void)
+{
+	if (IS_ENABLED(CONFIG_OS_MINIDUMP_COMPRESS) && tfm) {
+		crypto_free_comp(tfm);
+		tfm = NULL;
+	}
+	kfree(big_oops_buf);
+	big_oops_buf = NULL;
+	big_oops_buf_sz = 0;
+}
+
+static void __init osdump_choose_compression(void)
+{
+	const struct osdump_zbackend *step;
+
+	if (!compress)
+		return;
+
+	for (step = zbackends; step->name; step++) {
+		if (!strcmp(compress, step->name)) {
+			zbackend = step;
+			return;
+		}
+	}
+}
+
+static int __init compr_init(void)
+{
+	osdump_choose_compression();
+
+	allocate_buf_for_compression();
+
+	return 0;
+}
+late_initcall(compr_init);
+
+static void __exit compr_exit(void)
+{
+	free_buf_for_compression();
+}
+module_exit(compr_exit);
+
+MODULE_AUTHOR("qiruipeng");
+MODULE_LICENSE("GPL");
diff --git a/kernel/osdump/core.c b/kernel/osdump/core.c
new file mode 100644
index 000000000000..0da8c2ac3fc7
--- /dev/null
+++ b/kernel/osdump/core.c
@@ -0,0 +1,608 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * OS Minidump - core implement parts.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/seq_file.h>
+#include <linux/proc_fs.h>
+#include <linux/mm.h>
+#include <linux/platform_device.h>
+#include <linux/of_device.h>
+#include <linux/io.h>
+#include <linux/device.h>
+#include "internal.h"
+#include <linux/slab.h>
+#include "../../mm/slab.h"
+#include <linux/nmi.h>
+#include <linux/sched.h>
+#include <linux/byteorder/generic.h>
+#include <asm/kernel-pgtable.h>
+#include <linux/osdump.h>
+#include <linux/cpu.h>
+#include "extern.h"
+
+#define OS_DUMP		"osdump"
+
+struct osdump_variables osdump_var;
+EXPORT_SYMBOL_GPL(osdump_var);
+
+static inline u64 htonll(u64 val)
+{
+	return (((u64) htonl(val)) << 32) + htonl(val >> 32);
+}
+
+static inline u64 ntohll(u64 val)
+{
+	return (((u64) ntohl(val)) << 32) + ntohl(val >> 32);
+}
+
+static int common_store(void *src, phys_addr_t phys_addr, int src_size,
+			struct osdump_variables *osdump_var, int *sum)
+{
+	void *addr = osdump_var->mem_address;
+	int size, tmp, cnt = 0;
+
+	while (src_size > 0) {
+		size  = src_size > big_oops_buf_sz ? big_oops_buf_sz : src_size;
+		tmp = size;
+		size = osdump_compress(src, osdump_var->dst, size, &osdump_var->rem);
+
+		*sum += tmp;
+		(osdump_var->meta)->start = htonl(osdump_var->dst - addr);
+		(osdump_var->meta)->size = htonl(size);
+		(osdump_var->meta)->src = htonll(phys_addr);
+		(osdump_var->dst) += size;
+		(osdump_var->meta)++;
+		cnt++;
+		src += tmp;
+		phys_addr += tmp;
+		src_size -= tmp;
+
+		if (osdump_var->meta > osdump_var->meta_end)
+			osdump_var->rem = 0;
+	}
+
+	return cnt;
+}
+
+static int stack_dump(struct osdump_variables *osdump_var)
+{
+	int cnt = 0;
+	int sum = 0;
+
+	struct task_struct *tsk;
+	struct vm_struct *area;
+	int i;
+
+	tsk = current;
+	area = tsk->stack_vm_area;
+
+	for (i = 0; i < area->nr_pages; i++) {
+		cnt += common_store(area->addr + i * PAGE_SIZE, page_to_phys(area->pages[i]),
+					PAGE_SIZE, osdump_var, &sum);
+		if (osdump_var->rem <= 0)
+			break;
+	}
+
+
+	return cnt;
+}
+
+#if CONFIG_PGTABLE_LEVELS >= 2
+static int process_pmd(pmd_t *pmd, struct osdump_variables *osdump_var, int *sum)
+{
+	int i, cnt = 0;
+	struct page *page;
+
+	for (i = 0; i < PTRS_PER_PMD; i++) {
+
+		if (!pmd_none(*pmd)) {
+			page = pmd_page(*pmd);
+			if ((pmd_val(*pmd) & PMD_ATTRINDX_MASK) >> 2 == MT_NORMAL) {
+				if ((pmd_val(*pmd) & PMD_TYPE_MASK)  == PMD_TYPE_TABLE) {
+					cnt += common_store(page_to_virt(page), page_to_phys(page),
+								PAGE_SIZE, osdump_var, sum);
+
+				// save struct page here
+				} else if ((pmd_val(*pmd) & PMD_TYPE_MASK)  == PMD_TYPE_SECT
+						&& !(pmd_val(*pmd) & (PMD_SECT_RDONLY))) {
+					cnt += common_store(page_to_virt(page), page_to_phys(page),
+								PMD_SIZE, osdump_var, sum);
+
+				}
+				if (osdump_var->rem <= 0)
+					break;
+			}
+		}
+
+		pmd++;
+	}
+	return cnt;
+}
+#endif
+
+#if CONFIG_PGTABLE_LEVELS >= 3
+static int process_pud(pud_t *pud, struct osdump_variables *osdump_var, int *sum)
+{
+	int i, cnt = 0;
+	struct page *page;
+
+	for (i = 0; i < PTRS_PER_PUD; i++) {
+		if ((pud_val(*pud))) {
+			page = pud_page(*pud);
+
+			cnt += common_store(page_to_virt(page), page_to_phys(page),
+						PAGE_SIZE, osdump_var, sum);
+			if (osdump_var->rem <= 0)
+				break;
+
+			cnt += process_pmd(pud_pgtable(*pud), osdump_var, sum);
+			if (osdump_var->rem <= 0)
+				break;
+
+		}
+
+		pud++;
+	}
+
+	return cnt;
+}
+#endif
+
+#if CONFIG_PGTABLE_LEVELS >= 4
+static int process_p4d(p4d_t *p4d, struct osdump_variables *osdump_var, int *sum)
+{
+	int i, cnt = 0;
+	struct page *page;
+
+	for (i = 0; i < PTRS_PER_P4D; i++) {
+		if ((p4d_val(*p4d))) {
+			page = p4d_page(*p4d);
+
+			cnt += common_store(page_to_virt(page), page_to_phys(page),
+						PAGE_SIZE, osdump_var, sum);
+			if (osdump_var->rem <= 0)
+				break;
+
+			cnt += process_pud(p4d_pgtable(*p4d), osdump_var, sum);
+			if (osdump_var->rem <= 0)
+				break;
+
+		}
+
+		p4d++;
+	}
+
+	return cnt;
+}
+#endif
+
+#if CONFIG_PGTABLE_LEVELS == 5
+static int process_pgd(pgd_t *pgd, struct osdump_variables *osdump_var, int *sum)
+{
+	int i, cnt = 0;
+	struct page *page;
+
+	for (i = 0; i < PTRS_PER_PGD; i++) {
+		if ((pgd_val(*pgd))) {
+			page = pgd_page(*pgd);
+
+			cnt += common_store(page_to_virt(page), page_to_phys(page),
+						PAGE_SIZE, osdump_var, sum);
+			if (osdump_var->rem <= 0)
+				break;
+
+			cnt += process_p4d(pgd_pgtable(*pgd), osdump_var, sum);
+			if (osdump_var->rem <= 0)
+				break;
+
+		}
+
+		pgd++;
+	}
+
+	return cnt;
+}
+#endif
+
+static int ptl_dump(struct osdump_variables *osdump_var)
+{
+	int cnt = 0, sum = 0;
+
+	struct mm_struct *mm = &init_mm;
+	pgd_t *pgd = mm->pgd;
+
+	cnt += common_store(pgd, virt_to_phys(pgd), PAGE_SIZE, osdump_var, &sum);
+	if (osdump_var->rem <= 0)
+		return cnt;
+
+#if CONFIG_PGTABLE_LEVELS == 5
+	cnt += process_pgd(pgd, osdump_var, &sum);
+#elif CONFIG_PGTABLE_LEVELS == 4
+	cnt += process_p4d((p4d_t *) pgd, osdump_var, &sum);
+#elif CONFIG_PGTABLE_LEVELS == 3
+	cnt += process_pud((pud_t *) pgd, osdump_var, &sum);
+#elif CONFIG_PGTABLE_LEVELS == 2
+	cnt += process_pmd((pmd_t *) pgd, osdump_var, &sum);
+#endif
+
+	return cnt;
+}
+
+static int other_dump(struct osdump_variables *osdump_var)
+{
+	int cpu, len, sum = 0, cnt = 0;
+	void *rq;
+
+	cnt += common_store(_sdata, virt_to_phys(_sdata), _edata - _sdata, osdump_var, &sum);
+	if (osdump_var->rem <= 0)
+		return cnt;
+
+	cnt += common_store(__bss_start, virt_to_phys(__bss_start),
+				__bss_stop - __bss_start, osdump_var, &sum);
+	if (osdump_var->rem <= 0)
+		return cnt;
+
+	cnt += common_store(mem_section, virt_to_phys(mem_section), PAGE_SIZE<<1, osdump_var, &sum);
+	if (osdump_var->rem <= 0)
+		return cnt;
+
+	for_each_online_cpu(cpu) {
+		len = get_rq(cpu, &rq);
+
+		cnt += common_store(rq, virt_to_phys(rq), len, osdump_var, &sum);
+		if (osdump_var->rem <= 0)
+			return cnt;
+
+	}
+
+	return cnt;
+}
+
+static int note_dump(struct osdump_variables *osdump_var)
+{
+	int cnt = 0;
+	int sum = 0;
+
+	cnt += common_store(vmcoreinfo_note, virt_to_phys(vmcoreinfo_note),
+				get_note_size(), osdump_var, &sum);
+
+	return cnt;
+}
+
+
+struct unrecord_struct {
+	const char *name;
+	struct kmem_cache *s;
+};
+
+/*
+ * In certain scenarios, some slab objects may experience abnormally high growth,
+ * which can lead to an excessive increase in memory dump size. or some objects
+ * are unlikely to cause issues, they can be excluded from memory dumps.
+ * This exclusion helps to reduce the overall size of the dump, making crash
+ * analysis more manageable and efficient.
+ */
+static struct unrecord_struct unrecord_slab[] = {
+	[0] = {
+		.name = "dentry",
+	},
+	[1] = {
+		.name = "inode_cache",
+	},
+	[2] = {
+		.name = "ext4_inode_cache",
+	},
+	[3] = {
+		.name = "proc_inode_cache",
+	},
+	[4] = {
+		.name = "kernfs_node_cache",
+	},
+
+	[5] = {
+		.name = "UDP",
+	},
+	[6] = {
+		.name = "TCP",
+	},
+	[7] = {
+		.name = "fs_cache",
+	},
+};
+
+static int slab_dump(struct osdump_variables *osdump_var)
+{
+	int sum = 0, i;
+
+	unsigned long flags;
+	struct kmem_cache *s;
+	struct page *page;
+	struct kmem_cache_node *node;
+	struct list_head *head;
+	struct kmem_cache_cpu *c;
+	struct slab *slab;
+
+	int cnt = 0, cpu;
+	pg_data_t *pgdat;
+
+	if (!mutex_trylock(&slab_mutex)) {
+		pr_err(DUMP_BANNER "can't acquire slab_mutex, just skip dump slab!\n");
+		return 0;
+	}
+
+	list_for_each_entry(s, &slab_caches, list) {
+		for (i = 0; i < ARRAY_SIZE(unrecord_slab); i++) {
+			if (!strcmp(unrecord_slab[i].name, s->name)) {
+				unrecord_slab[i].s = s;
+				break;
+			}
+		}
+
+		if (i != ARRAY_SIZE(unrecord_slab))
+			continue;
+
+		touch_nmi_watchdog();
+		touch_all_softlockup_watchdogs();
+
+		for_each_possible_cpu(cpu) {
+			local_irq_save(flags);
+			c = per_cpu_ptr(s->cpu_slab, cpu);
+
+			slab = READ_ONCE(c->slab);
+			if (!slab) {
+				local_irq_restore(flags);
+				continue;
+			}
+
+			page = slab_page(slab);
+			cnt += common_store(page_to_virt(page), page_to_phys(page),
+					PAGE_SIZE << compound_order(page), osdump_var, &sum);
+			if (osdump_var->rem <= 0) {
+				local_irq_restore(flags);
+				goto out_ret;
+			}
+
+
+#ifdef CONFIG_SLUB_CPU_PARTIAL
+			slab = slub_percpu_partial_read_once(c);
+			while (slab) {
+				page = slab_page(slab);
+				cnt += common_store(page_to_virt(page), page_to_phys(page),
+					PAGE_SIZE << compound_order(page), osdump_var, &sum);
+				if (osdump_var->rem <= 0) {
+					local_irq_restore(flags);
+					goto out_ret;
+				}
+
+				slab = slab->next;
+			}
+#endif
+			local_irq_restore(flags);
+		}
+
+		for_each_online_pgdat(pgdat) {
+			node = s->node[pgdat->node_id];
+
+#ifdef CONFIG_SLAB
+
+			head = &node->slabs_full;
+			list_for_each_entry(slab, head, slab_list) {
+				page = slab_page(slab);
+				cnt += common_store(page_to_virt(page), page_to_phys(page),
+					PAGE_SIZE << compound_order(page), osdump_var, &sum);
+				if (osdump_var->rem <= 0)
+					goto out_ret;
+
+			}
+
+			head = &node->slabs_partial;
+			list_for_each_entry(slab, head, slab_list) {
+				page = slab_page(slab);
+				cnt += common_store(page_to_virt(page), page_to_phys(page),
+					PAGE_SIZE << compound_order(page), osdump_var, &sum);
+				if (osdump_var->rem <= 0)
+					goto out_ret;
+
+			}
+
+#elif CONFIG_SLUB
+			spin_lock_irqsave(&node->list_lock, flags);
+			head = &node->full;
+
+
+			list_for_each_entry(slab, head, slab_list) {
+				page = slab_page(slab);
+				cnt += common_store(page_to_virt(page), page_to_phys(page),
+					PAGE_SIZE << compound_order(page), osdump_var, &sum);
+				if (osdump_var->rem <= 0) {
+					spin_unlock_irqrestore(&node->list_lock, flags);
+					goto out_ret;
+				}
+			}
+
+			spin_unlock_irqrestore(&node->list_lock, flags);
+
+			spin_lock_irqsave(&node->list_lock, flags);
+			head = &node->partial;
+
+			list_for_each_entry(slab, head, slab_list) {
+				page = slab_page(slab);
+				cnt += common_store(page_to_virt(page), page_to_phys(page),
+					PAGE_SIZE << compound_order(page), osdump_var, &sum);
+				if (osdump_var->rem <= 0) {
+					spin_unlock_irqrestore(&node->list_lock, flags);
+					goto out_ret;
+				}
+			}
+
+
+			spin_unlock_irqrestore(&node->list_lock, flags);
+#endif
+
+		}
+
+	}
+
+out_ret:
+	mutex_unlock(&slab_mutex);
+
+	return cnt;
+}
+
+static int dump_mem(struct osdump_variables *osdump_var)
+{
+	int cnt = 0;
+	struct table_hdr *hdr = (struct table_hdr *)(osdump_var->mem_address);
+
+	memset(hdr, 0, sizeof(struct table_hdr));
+
+	osdump_var->rem = osdump_var->mem_size - MDUMP_HDR_SIZE;
+	osdump_var->dst = (void *)osdump_var->mem_address + MDUMP_HDR_SIZE;
+	osdump_var->meta_end = (void *)osdump_var->mem_address + MDUMP_HDR_SIZE -
+				sizeof(struct table_entry);
+	osdump_var->meta = (void *)osdump_var->mem_address + sizeof(struct table_hdr);
+
+
+	if (osdump_var->rem <= 0)
+		goto out;
+
+	cnt += note_dump(osdump_var);
+	if (osdump_var->rem <= 0)
+		goto out;
+
+	cnt += other_dump(osdump_var);
+	if (osdump_var->rem <= 0)
+		goto out;
+
+	cnt += stack_dump(osdump_var);
+	if (osdump_var->rem <= 0)
+		goto out;
+
+	cnt += slab_dump(osdump_var);
+	if (osdump_var->rem <= 0)
+		goto out;
+
+	cnt += ptl_dump(osdump_var);
+
+out:
+	hdr->count = htonl(cnt);
+	hdr->size = htonl(osdump_var->dst - (void *)osdump_var->mem_address);
+	strscpy(hdr->magic, MDUMP_MAGIC, strlen(MDUMP_MAGIC));
+	hdr->checksum = cal_checksum(hdr);
+
+	return cnt;
+}
+
+int osdump(void)
+{
+	int ret = 0;
+
+	ret = dump_mem(&osdump_var);
+	if (ret < 0) {
+		pr_err(DUMP_BANNER "dump mem failed!\n");
+		return -1;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(osdump);
+
+static void *persistent_ram_vmap(phys_addr_t start, size_t size,
+		unsigned int memtype)
+{
+	struct page **pages;
+	phys_addr_t page_start;
+	unsigned int page_count;
+	pgprot_t prot;
+	unsigned int i;
+	void *vaddr;
+
+	page_start = start - offset_in_page(start);
+	page_count = DIV_ROUND_UP(size + offset_in_page(start), PAGE_SIZE);
+
+	if (memtype)
+		prot = pgprot_noncached(PAGE_KERNEL);
+	else
+		prot = pgprot_writecombine(PAGE_KERNEL);
+
+	pages = kmalloc_array(page_count, sizeof(struct page *), GFP_KERNEL);
+	if (!pages)
+		return NULL;
+
+	for (i = 0; i < page_count; i++) {
+		phys_addr_t addr = page_start + i * PAGE_SIZE;
+
+		pages[i] = pfn_to_page(addr >> PAGE_SHIFT);
+	}
+	vaddr = vmap(pages, page_count, VM_MAP, prot);
+	kfree(pages);
+
+	/*
+	 * Since vmap() uses page granularity, we must add the offset
+	 * into the page here, to get the byte granularity address
+	 * into the mapping to represent the actual "start" location.
+	 */
+	return vaddr + offset_in_page(start);
+}
+
+static int osdump_parse_dt(struct platform_device *pdev, struct osdump_variables *pdata)
+{
+	struct resource *res;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		pr_err(DUMP_BANNER "failed to locate DT /reserved-memory resource!\n");
+		return -EINVAL;
+	}
+
+	pdata->mem_address = persistent_ram_vmap(res->start, resource_size(res), 0);
+	pdata->mem_size = resource_size(res);
+
+	return 0;
+}
+
+static int osdump_probe(struct platform_device *pdev)
+{
+	struct device *dev;
+
+	dev = &pdev->dev;
+	dev->platform_data = &osdump_var;
+
+	if (dev_of_node(dev))
+		osdump_parse_dt(pdev, &osdump_var);
+
+	return 0;
+}
+
+static int osdump_remove(struct platform_device *pdev)
+{
+	return 0;
+}
+
+
+static const struct of_device_id osdump_of_match[] = {
+	{ .compatible = "osdump", },
+	{}
+};
+
+static struct platform_driver osdump_driver = {
+	.probe = osdump_probe,
+	.remove = osdump_remove,
+	.driver = {
+		.name = OS_DUMP,
+		.of_match_table = osdump_of_match,
+	},
+};
+
+static int __init osdump_init(void)
+{
+	return platform_driver_register(&osdump_driver);
+}
+postcore_initcall(osdump_init);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("os minidump driver");
+MODULE_AUTHOR("qiruipeng");
diff --git a/kernel/osdump/extern.h b/kernel/osdump/extern.h
new file mode 100644
index 000000000000..da0d25ef4662
--- /dev/null
+++ b/kernel/osdump/extern.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _LINUX_OS_MINIDUMP_EXTERN_H
+#define _LINUX_OS_MINIDUMP_EXTERN_H
+
+extern struct kmem_cache *task_struct_cachep;
+extern size_t big_oops_buf_sz;
+extern int osdump_compress(const void *in, void *out, int size, int *left);
+extern int get_rq(int cpu, void **rq);
+extern u32 *vmcoreinfo_note;
+extern int get_note_size(void);
+
+#endif
diff --git a/kernel/osdump/inode.c b/kernel/osdump/inode.c
new file mode 100644
index 000000000000..9356c4b8795f
--- /dev/null
+++ b/kernel/osdump/inode.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Interface for accessing the crash dump from the system's previous life.
+ */
+#include <linux/module.h>
+#include <linux/proc_fs.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include "internal.h"
+
+struct inode_priv {
+	void *addr;
+	size_t size;
+	struct proc_dir_entry *proc_osdump;
+};
+static struct inode_priv *inode_priv;
+
+static ssize_t osdump_read(struct file *file, char __user *buf,
+		size_t count, loff_t *ppos)
+{
+	if (!inode_priv)
+		return 0;
+
+	return simple_read_from_buffer(buf, count, ppos, inode_priv->addr, inode_priv->size);
+}
+
+static const struct proc_ops osdump_proc_ops = {
+	.proc_read = osdump_read,
+	.proc_lseek = default_llseek,
+};
+
+static int __init osdump_init_fs(void)
+{
+	int ret = 0;
+	struct table_hdr *hdr;
+
+	inode_priv = kzalloc(sizeof(struct inode_priv), GFP_KERNEL);
+	if (!inode_priv)
+		return -ENOMEM;
+
+	hdr = (struct table_hdr *)osdump_var.mem_address;
+	if (!hdr_valid(hdr))
+		goto err;
+
+	inode_priv->addr = hdr;
+	inode_priv->size = ntohl(hdr->size);
+
+	inode_priv->proc_osdump = proc_create("osdump", 0400, NULL, &osdump_proc_ops);
+	if (!inode_priv->proc_osdump) {
+		ret = -ENOENT;
+		pr_err(DUMP_BANNER "osdump create inode failed!\n");
+		goto err;
+	}
+
+	return 0;
+
+err:
+	kfree(inode_priv);
+	inode_priv = NULL;
+
+	return ret;
+}
+fs_initcall(osdump_init_fs);
+
+void osdump_cleanup_fs(void)
+{
+	if (inode_priv && inode_priv->proc_osdump) {
+		proc_remove(inode_priv->proc_osdump);
+		inode_priv->proc_osdump = NULL;
+	}
+
+	kfree(inode_priv);
+	inode_priv = NULL;
+}
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("qiruipeng");
diff --git a/kernel/osdump/internal.h b/kernel/osdump/internal.h
new file mode 100644
index 000000000000..5dc09fe37295
--- /dev/null
+++ b/kernel/osdump/internal.h
@@ -0,0 +1,95 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef	_LINUX_OS_MINIDUMP_INTERNAL_H
+#define	_LINUX_OS_MINIDUMP_INTERNAL_H
+
+/*
+ * We only dump the data we are interested in, each of which represents
+ * a continuous region - this could be a structure or pages of data. These
+ * data are compressed first and then stored into one or several entries,
+ * each with a maximum size of 32K.
+ *
+ * We use reserved memory to store these dump data, referred to as
+ * 'dump region'. This region consists of two parts: The first part records
+ * the meta info and is thus, known as the 'meta region' and the second part
+ * stores the raw data in sequence and is known as the 'data region'.
+ *
+ * At the beginning of the meta region lies a header that records the total
+ * size and count of entries. Following the header, there's a table which
+ * holds metadata for each entry. This metadata records the physical address
+ * of the dumped data in memory, in addition to the offset and size of the
+ * data stored in the dump region.
+ *
+ * The second part of the dump region stores compressed data for each entry
+ * in an orderly manner.
+ */
+
+// fixed size for meta region.
+#define MDUMP_HDR_SIZE		(192*1024)
+#define MDUMP_MAGIC		"osdump"
+#define DUMP_BANNER		"osdump: "
+
+#define CRYPTO_SIZE		(PAGE_SIZE << 3)
+
+struct table_hdr {
+	/* entries count */
+	unsigned int count;
+	/* total size */
+	unsigned int size;
+	/* for aligned */
+	union {
+		int reserve[2];
+		struct {
+			char magic[7];
+			unsigned char  checksum;
+		};
+	};
+};
+
+struct table_entry {
+	/*  offset in dump region */
+	unsigned int start;
+	unsigned int size;
+	/* physical addr in memory */
+	unsigned long long src;
+};
+
+/*
+ * Runtime information: this struct save the runtime information used for
+ * os minidump.
+ */
+struct osdump_variables {
+	/* virtual addr for dump region */
+	void           *mem_address;
+	/* total size for dump data */
+	unsigned long   mem_size;
+	/* curr entry's metadata */
+	struct table_entry *meta;
+	/* last metadata */
+	struct table_entry *meta_end;
+	/* remain size for dump data */
+	int             rem;
+	/* addr stored  in dump region */
+	void            *dst;
+};
+
+
+extern struct osdump_variables osdump_var;
+
+static inline unsigned char cal_checksum(struct table_hdr *hdr)
+{
+	int i, len = sizeof(struct table_hdr) - 1;
+	unsigned char ch = 0, *ptr = (unsigned char *)hdr;
+
+	for (i = 0; i < len; i++)
+		ch += ptr[i];
+
+	return ch;
+}
+
+static inline bool hdr_valid(struct table_hdr *hdr)
+{
+	return !strncmp(MDUMP_MAGIC, hdr->magic, strlen(MDUMP_MAGIC)) &&
+						hdr->checksum == cal_checksum(hdr);
+}
+#endif
-- 
2.17.1


_______________________________________________
kexec mailing list
kexec@xxxxxxxxxxxxxxxxxxx
http://lists.infradead.org/mailman/listinfo/kexec



[Index of Archives]     [LM Sensors]     [Linux Sound]     [ALSA Users]     [ALSA Devel]     [Linux Audio Users]     [Linux Media]     [Kernel]     [Gimp]     [Yosemite News]     [Linux Media]

  Powered by Linux