kmem -s/-S not working properly on RHEL8.6/8.7

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hello,

I've submitted https://github.com/crash-utility/crash/issues/130 describing this "kmem -s/-S" problem.

For CONFIG_SLAB_FREELIST_HARDENED, the crash memory.c:freelist_ptr() code is checking for an additional bswap using a simple release test eg. THIS_KERNEL_VERSION >= LINUX(5,7,0), basically checking for RHEL 9 and beyond.

However, for RHEL 8.6/8.7, we have CONFIG_SLAB_FREELIST_HARDENED=y, and we also have the additional bswap, but the current crash memory.c:freelist_ptr() code is not handling this case, hence kmem -s/-S will not not work properly for RHEL 8.6/8.7, and free objects will not be counted nor reported properly.

An example from a RHEL 8.6 x86_64 kdump, a kmem cache with a single slab having 42 objects, only the freelist head is seen as free as crash can't walk freelist next pointers, and crash is wrongly reporting 41 allocated objects:

crash> sys | grep RELEASE
     RELEASE: 4.18.0-372.9.1.el8.x86_64
crash> kmem -s nfs_commit_data
CACHE             OBJSIZE  ALLOCATED     TOTAL  SLABS  SSIZE  NAME
ffff9ad40c7cb2c0      728         41        42      1    32k  nfs_commit_data

When properly accounting for the additional bswap, we can walk the freelist and find 38 free objects, and crash is now reporting only 4 allocated objects:

crash> kmem -s nfs_commit_data
CACHE             OBJSIZE  ALLOCATED     TOTAL  SLABS  SSIZE  NAME
ffff9ad40c7cb2c0      728          4        42      1    32k  nfs_commit_data

See also pull request https://github.com/crash-utility/crash/pull/131 and diff/patch below.

-- georges@xxxxxxx (hpux/linux kernel tools)

diff --git a/defs.h b/defs.h
index 33a823b..5d01f69 100644
--- a/defs.h
+++ b/defs.h
@@ -731,6 +731,7 @@ struct kernel_table {                   /* kernel data */
 #define ONLINE_MAP     (ONLINE)
 #define ACTIVE_MAP       (0x10)
        int BUG_bytes;
+       int freelist_ptr_has_bswap;
        ulong xen_flags;
 #define WRITABLE_PAGE_TABLES    (0x1)
 #define SHADOW_PAGE_TABLES      (0x2)
diff --git a/kernel.c b/kernel.c
index a42e6ad..6658370 100644
--- a/kernel.c
+++ b/kernel.c
@@ -81,6 +81,7 @@ static void read_in_kernel_config_err(int, char *);
 static void BUG_bytes_init(void);
 static int BUG_x86(void);
 static int BUG_x86_64(void);
+static void freelist_ptr_init();
 static void cpu_maps_init(void);
 static void get_xtime(struct timespec *);
 static char *log_from_idx(uint32_t, char *);
@@ -766,6 +767,7 @@ kernel_init()
        STRUCT_SIZE_INIT(mem_section, "mem_section");

        BUG_bytes_init();
+       freelist_ptr_init();

        /*
         *  for hrtimer
@@ -2352,6 +2354,59 @@ BUG_x86_64(void)
 }


+/*
+ * With CONFIG_SLAB_FREELIST_HARDENED, freelist_ptr's are crypted with xor's,
+ * and for recent release with an additionnal bswap. Some releases prio to 5.7.0
+ * may be using the additionnal bswap. The only easy and reliable way to tell is
+ * to inspect assembly code (eg. "__slab_free") for a bswap instruction.
+ */
+static int
+freelist_ptr_has_bswap_x86(void)
+{
+       struct syment *sp, *spn;
+       char buf1[BUFSIZE];
+       char buf2[BUFSIZE];
+       char *arglist[MAXARGS];
+       ulong vaddr;
+       int found;
+
+       if (!(sp = symbol_search("__slab_free")) ||
+           !(spn = next_symbol(NULL, sp)))
+               return FALSE;
+
+       sprintf(buf1, "x/%ldi 0x%lx", spn->value - sp->value, sp->value);
+
+       found = FALSE;
+       vaddr = 0;
+       open_tmpfile();
+       gdb_pass_through(buf1, pc->tmpfile, GNU_RETURN_ON_ERROR);
+       rewind(pc->tmpfile);
+       while (fgets(buf2, BUFSIZE, pc->tmpfile)) {
+               if (parse_line(buf2, arglist) < 3)
+                       continue;
+
+               if ((vaddr = htol(strip_ending_char(arglist[0], ':'),
+                   RETURN_ON_ERROR|QUIET, NULL)) >= spn->value)
+                       continue;
+
+               if (STREQ(arglist[2], "bswap")) {
+                       found = TRUE;
+                       break;
+               }
+       }
+       close_tmpfile();
+       return found;
+}
+
+static void
+freelist_ptr_init(void)
+{
+       if (THIS_KERNEL_VERSION >= LINUX(5,7,0))
+               kt->freelist_ptr_has_bswap = TRUE;
+       else if (machine_type("X86") || machine_type("X86_64"))
+               kt->freelist_ptr_has_bswap = freelist_ptr_has_bswap_x86();
+}
+
 /*
  *  Callback from gdb disassembly code.
  */
diff --git a/memory.c b/memory.c
index 5141fbe..c835614 100644
--- a/memory.c
+++ b/memory.c
@@ -19525,7 +19525,7 @@ freelist_ptr(struct meminfo *si, ulong ptr, ulong ptr_addr)
        if (VALID_MEMBER(kmem_cache_random)) {
                /* CONFIG_SLAB_FREELIST_HARDENED */

-               if (THIS_KERNEL_VERSION >= LINUX(5,7,0))
+               if (kt->freelist_ptr_has_bswap)
                        ptr_addr = (sizeof(long) == 8) ? bswap_64(ptr_addr)
                                                       : bswap_32(ptr_addr);
                return (ptr ^ si->random ^ ptr_addr);

--
Crash-utility mailing list
Crash-utility@xxxxxxxxxx
https://listman.redhat.com/mailman/listinfo/crash-utility
Contribution Guidelines: https://github.com/crash-utility/crash/wiki




[Index of Archives]     [Fedora Development]     [Fedora Desktop]     [Fedora SELinux]     [Yosemite News]     [KDE Users]     [Fedora Tools]

 

Powered by Linux