Re: kmem -s/-S not working properly on RHEL8.6/8.7

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Georges, Lianbo,

On 2023/02/09 13:55, lijiang wrote:
> On Wed, Feb 8, 2023 at 9:08 PM Aureau, Georges (Kernel Tools ERT) <
> georges.aureau@xxxxxxx> wrote:
> 
>>
>>> It could be good to check the return value of gdb_pass_through(). For
>> example:
>>
>> This would not bring much value, if disasm fails, tmpfile would not have
>> much to process anyway.
>>
>>
> It's true. But if the gdb_pass_through() call fails, it will still
> execute the remaining code. Seems that is not expected behavior.

ok I've added it and a commit log from Georges' description,
is the attached patch fine?

Thanks,
Kazu
From 30f9dacd829df7cd877a47a1bc6307969cf2fd1c Mon Sep 17 00:00:00 2001
From: "Aureau, Georges (Kernel Tools ERT)" <georges.aureau@xxxxxxx>
Date: Wed, 8 Feb 2023 12:09:03 +0000
Subject: [PATCH] Fix "kmem -s|-S" not working properly on RHEL8.6 and later

For CONFIG_SLAB_FREELIST_HARDENED, the crash memory.c:freelist_ptr()
code is checking for an additional bswap using a simple release test eg.
THIS_KERNEL_VERSION >= LINUX(5,7,0), basically checking for RHEL9 and
beyond.

However, for RHEL8.6 and later, we have CONFIG_SLAB_FREELIST_HARDENED=y,
and we also have the additional bswap, but the current crash is not
handling this case, hence "kmem -s|-S" will not work properly, and free
objects will not be counted nor reported properly.

An example from a RHEL8.6 x86_64 kdump, a kmem cache with a single slab
having 42 objects, only the freelist head is seen as free as crash can't
walk freelist next pointers, and crash is wrongly reporting 41 allocated
objects:

  crash> sys | grep RELEASE
       RELEASE: 4.18.0-372.9.1.el8.x86_64
  crash> kmem -s nfs_commit_data
  CACHE             OBJSIZE  ALLOCATED     TOTAL  SLABS  SSIZE  NAME
  ffff9ad40c7cb2c0      728         41        42      1    32k  nfs_commit_data

When properly accounting for the additional bswap, we can walk the
freelist and find 38 free objects, and crash is now reporting only 4
allocated objects:

  crash> kmem -s nfs_commit_data
  CACHE             OBJSIZE  ALLOCATED     TOTAL  SLABS  SSIZE  NAME
  ffff9ad40c7cb2c0      728          4        42      1    32k  nfs_commit_data

Signed-off-by: Georges Aureau <georges.aureau@xxxxxxx>
---
 defs.h   |  1 +
 memory.c | 49 ++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 49 insertions(+), 1 deletion(-)

diff --git a/defs.h b/defs.h
index 33a823b7b67c..56d6cf4489c9 100644
--- a/defs.h
+++ b/defs.h
@@ -2638,6 +2638,7 @@ struct vm_table {                /* kernel VM-related data */
 #define SLAB_OVERLOAD_PAGE    (0x8000000)
 #define SLAB_CPU_CACHE       (0x10000000)
 #define SLAB_ROOT_CACHES     (0x20000000)
+#define FREELIST_PTR_BSWAP   (0x40000000)
 
 #define IS_FLATMEM()		(vt->flags & FLATMEM)
 #define IS_DISCONTIGMEM()	(vt->flags & DISCONTIGMEM)
diff --git a/memory.c b/memory.c
index 5141fbea4b40..e0742c1bd3a4 100644
--- a/memory.c
+++ b/memory.c
@@ -320,6 +320,7 @@ static void dump_per_cpu_offsets(void);
 static void dump_page_flags(ulonglong);
 static ulong kmem_cache_nodelists(ulong);
 static void dump_hstates(void);
+static void freelist_ptr_init(void);
 static ulong freelist_ptr(struct meminfo *, ulong, ulong);
 static ulong handle_each_vm_area(struct handle_each_vm_area_args *);
 
@@ -789,6 +790,8 @@ vm_init(void)
 		MEMBER_OFFSET_INIT(kmem_cache_name, "kmem_cache", "name");
 		MEMBER_OFFSET_INIT(kmem_cache_flags, "kmem_cache", "flags");
 		MEMBER_OFFSET_INIT(kmem_cache_random, "kmem_cache", "random");
+		if (VALID_MEMBER(kmem_cache_random))
+			freelist_ptr_init();
 		MEMBER_OFFSET_INIT(kmem_cache_cpu_freelist, "kmem_cache_cpu", "freelist");
 		MEMBER_OFFSET_INIT(kmem_cache_cpu_page, "kmem_cache_cpu", "page");
 		if (INVALID_MEMBER(kmem_cache_cpu_page))
@@ -13932,6 +13935,8 @@ dump_vm_table(int verbose)
 		fprintf(fp, "%sSLAB_CPU_CACHE", others++ ? "|" : "");\
 	if (vt->flags & SLAB_ROOT_CACHES)
 		fprintf(fp, "%sSLAB_ROOT_CACHES", others++ ? "|" : "");\
+	if (vt->flags & FREELIST_PTR_BSWAP)
+		fprintf(fp, "%sFREELIST_PTR_BSWAP", others++ ? "|" : "");\
 	if (vt->flags & USE_VMAP_AREA)
 		fprintf(fp, "%sUSE_VMAP_AREA", others++ ? "|" : "");\
 	if (vt->flags & CONFIG_NUMA)
@@ -19519,13 +19524,55 @@ count_free_objects(struct meminfo *si, ulong freelist)
 	return c;
 }
 
+/*
+ * With CONFIG_SLAB_FREELIST_HARDENED, freelist_ptr's are crypted with xor's,
+ * and for recent release with an additionnal bswap. Some releases prio to 5.7.0
+ * may be using the additionnal bswap. The only easy and reliable way to tell is
+ * to inspect assembly code (eg. "__slab_free") for a bswap instruction.
+ */
+static int
+freelist_ptr_bswap_x86(void)
+{
+	char buf1[BUFSIZE];
+	char buf2[BUFSIZE];
+	char *arglist[MAXARGS];
+	int found;
+
+	sprintf(buf1, "disassemble __slab_free");
+	open_tmpfile();
+	if (!gdb_pass_through(buf1, pc->tmpfile, GNU_RETURN_ON_ERROR)) {
+		close_tmpfile();
+		return FALSE;
+	}
+	rewind(pc->tmpfile);
+	found = FALSE;
+	while (fgets(buf2, BUFSIZE, pc->tmpfile)) {
+		if (parse_line(buf2, arglist) < 3)
+			continue;
+		if (STREQ(arglist[2], "bswap")) {
+			found = TRUE;
+			break;
+		}
+	}
+	close_tmpfile();
+	return found;
+}
+
+static void
+freelist_ptr_init(void)
+{
+	if (THIS_KERNEL_VERSION >= LINUX(5,7,0) ||
+	    ((machine_type("X86_64") || machine_type("X86")) && freelist_ptr_bswap_x86()))
+		vt->flags |= FREELIST_PTR_BSWAP;
+}
+
 static ulong
 freelist_ptr(struct meminfo *si, ulong ptr, ulong ptr_addr)
 {
 	if (VALID_MEMBER(kmem_cache_random)) {
 		/* CONFIG_SLAB_FREELIST_HARDENED */
 
-		if (THIS_KERNEL_VERSION >= LINUX(5,7,0))
+		if (vt->flags & FREELIST_PTR_BSWAP)
 			ptr_addr = (sizeof(long) == 8) ? bswap_64(ptr_addr)
 						       : bswap_32(ptr_addr);
 		return (ptr ^ si->random ^ ptr_addr);
-- 
2.31.1

--
Crash-utility mailing list
Crash-utility@xxxxxxxxxx
https://listman.redhat.com/mailman/listinfo/crash-utility
Contribution Guidelines: https://github.com/crash-utility/crash/wiki

[Index of Archives]     [Fedora Development]     [Fedora Desktop]     [Fedora SELinux]     [Yosemite News]     [KDE Users]     [Fedora Tools]

 

Powered by Linux