On Mon, Oct 31, 2022 at 10:05:58AM +0000, John Thomson wrote: > On Mon, 31 Oct 2022, at 02:36, Feng Tang wrote: > >> > > >> > possibly relevant config options: > >> > grep -E '(SLUB|SLAB)' .config > >> > # SLAB allocator options > >> > # CONFIG_SLAB is not set > >> > CONFIG_SLUB=y > >> > CONFIG_SLAB_MERGE_DEFAULT=y > >> > # CONFIG_SLAB_FREELIST_RANDOM is not set > >> > # CONFIG_SLAB_FREELIST_HARDENED is not set > >> > # CONFIG_SLUB_STATS is not set > >> > CONFIG_SLUB_CPU_PARTIAL=y > >> > # end of SLAB allocator options > >> > # CONFIG_SLUB_DEBUG is not set > >> > >> Also not having CONFIG_SLUB_DEBUG enabled means most of the code the > >> patch/commit touches is not even active. > >> Could this be some miscompile or code layout change exposing some > >> different bug, hmm. > > Yes, it could be. > > >> Is it any different if you do enable CONFIG_SLUB_DEBUG ? > > No change > > >> Or change to CONFIG_SLAB? (that would be really weird if not) > > This boots fine > > > I haven't found any clue from the code either, and I compiled > > kernel with the config above and tested booting on an Alder-lake > > desktop and a QEMU, which boot fine. > > > > Could you provide the full kernel config and demsg (in compressed > > format if you think it's too big), so we can check more? > > Attached > > > Thanks, > > Feng > > vmlinux is bigger, and entry point is larger (0x8074081c vs 0x807407dc revert vs 0x8073fcbc), > so that may be it? Or not? > revert + SLUB_DEBUG + SLUB_DEBUG_ON is bigger still, but does successfully boot. > vmlinux entry point is 0x8074705c Thanks for prompt info! As I can't reproduce it locally yet, could you help try 3 tests separately: * change the O2/O3 compile option to O1 * try the attached 0001 patch (which cut part of commit) * try attached 0001+0002 patch Thanks! - Feng
>From c52d8c8803562dd6d1d9d57f4f6e8b7e1ddf675d Mon Sep 17 00:00:00 2001 From: Feng Tang <feng.tang@xxxxxxxxx> Date: Mon, 31 Oct 2022 16:17:22 +0800 Subject: [PATCH 1/2] reduced slub patch Signed-off-by: Feng Tang <feng.tang@xxxxxxxxx> --- include/linux/slab.h | 2 ++ mm/slab_common.c | 3 +- mm/slub.c | 73 +++++++++++++++++++++++++------------------- 3 files changed, 46 insertions(+), 32 deletions(-) diff --git a/include/linux/slab.h b/include/linux/slab.h index f2da0df0d58d..90877fcde70b 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -29,6 +29,8 @@ #define SLAB_RED_ZONE ((slab_flags_t __force)0x00000400U) /* DEBUG: Poison objects */ #define SLAB_POISON ((slab_flags_t __force)0x00000800U) +/* Indicate a kmalloc slab */ +#define SLAB_KMALLOC ((slab_flags_t __force)0x00001000U) /* Align objs on cache lines */ #define SLAB_HWCACHE_ALIGN ((slab_flags_t __force)0x00002000U) /* Use GFP_DMA memory */ diff --git a/mm/slab_common.c b/mm/slab_common.c index 87a28e298be8..33b1886b06eb 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -661,7 +661,8 @@ struct kmem_cache *__init create_kmalloc_cache(const char *name, if (!s) panic("Out of memory when creating slab %s\n", name); - create_boot_cache(s, name, size, flags, useroffset, usersize); + create_boot_cache(s, name, size, flags | SLAB_KMALLOC, useroffset, + usersize); kasan_cache_create_kmalloc(s); list_add(&s->list, &slab_caches); s->refcount = 1; diff --git a/mm/slub.c b/mm/slub.c index ade3e851fc65..6fa3c24742b8 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -195,6 +195,13 @@ DEFINE_STATIC_KEY_FALSE(slub_debug_enabled); #endif #endif /* CONFIG_SLUB_DEBUG */ +/* Structure holding parameters for get_partial() call chain */ +struct partial_context { + struct slab **slab; + gfp_t flags; + unsigned int orig_size; +}; + static inline bool kmem_cache_debug(struct kmem_cache *s) { return kmem_cache_debug_flags(s, SLAB_DEBUG_FLAGS); @@ -994,7 +1001,8 @@ static int check_bytes_and_report(struct kmem_cache *s, struct slab *slab, * * A. Free pointer (if we cannot overwrite object on free) * B. Tracking data for SLAB_STORE_USER - * C. Padding to reach required alignment boundary or at minimum + * C. Original request size for kmalloc object (SLAB_STORE_USER enabled) + * D. Padding to reach required alignment boundary or at minimum * one word if debugging is on to be able to detect writes * before the word boundary. * @@ -1310,7 +1318,7 @@ static inline int alloc_consistency_checks(struct kmem_cache *s, } static noinline int alloc_debug_processing(struct kmem_cache *s, - struct slab *slab, void *object) + struct slab *slab, void *object, int orig_size) { if (s->flags & SLAB_CONSISTENCY_CHECKS) { if (!alloc_consistency_checks(s, slab, object)) @@ -1587,7 +1595,7 @@ static inline void setup_slab_debug(struct kmem_cache *s, struct slab *slab, void *addr) {} static inline int alloc_debug_processing(struct kmem_cache *s, - struct slab *slab, void *object) { return 0; } + struct slab *slab, void *object, int orig_size) { return 0; } static inline void free_debug_processing( struct kmem_cache *s, struct slab *slab, @@ -2017,7 +2025,7 @@ static inline void remove_partial(struct kmem_cache_node *n, * it to full list if it was the last free object. */ static void *alloc_single_from_partial(struct kmem_cache *s, - struct kmem_cache_node *n, struct slab *slab) + struct kmem_cache_node *n, struct slab *slab, int orig_size) { void *object; @@ -2027,7 +2035,7 @@ static void *alloc_single_from_partial(struct kmem_cache *s, slab->freelist = get_freepointer(s, object); slab->inuse++; - if (!alloc_debug_processing(s, slab, object)) { + if (!alloc_debug_processing(s, slab, object, orig_size)) { remove_partial(n, slab); return NULL; } @@ -2046,7 +2054,7 @@ static void *alloc_single_from_partial(struct kmem_cache *s, * and put the slab to the partial (or full) list. */ static void *alloc_single_from_new_slab(struct kmem_cache *s, - struct slab *slab) + struct slab *slab, int orig_size) { int nid = slab_nid(slab); struct kmem_cache_node *n = get_node(s, nid); @@ -2058,7 +2066,7 @@ static void *alloc_single_from_new_slab(struct kmem_cache *s, slab->freelist = get_freepointer(s, object); slab->inuse = 1; - if (!alloc_debug_processing(s, slab, object)) + if (!alloc_debug_processing(s, slab, object, orig_size)) /* * It's not really expected that this would fail on a * freshly allocated slab, but a concurrent memory @@ -2136,7 +2144,7 @@ static inline bool pfmemalloc_match(struct slab *slab, gfp_t gfpflags); * Try to allocate a partial slab from a specific node. */ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n, - struct slab **ret_slab, gfp_t gfpflags) + struct partial_context *pc) { struct slab *slab, *slab2; void *object = NULL; @@ -2156,11 +2164,12 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n, list_for_each_entry_safe(slab, slab2, &n->partial, slab_list) { void *t; - if (!pfmemalloc_match(slab, gfpflags)) + if (!pfmemalloc_match(slab, pc->flags)) continue; if (kmem_cache_debug(s)) { - object = alloc_single_from_partial(s, n, slab); + object = alloc_single_from_partial(s, n, slab, + pc->orig_size); if (object) break; continue; @@ -2171,7 +2180,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n, break; if (!object) { - *ret_slab = slab; + *pc->slab = slab; stat(s, ALLOC_FROM_PARTIAL); object = t; } else { @@ -2195,14 +2204,13 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n, /* * Get a slab from somewhere. Search in increasing NUMA distances. */ -static void *get_any_partial(struct kmem_cache *s, gfp_t flags, - struct slab **ret_slab) +static void *get_any_partial(struct kmem_cache *s, struct partial_context *pc) { #ifdef CONFIG_NUMA struct zonelist *zonelist; struct zoneref *z; struct zone *zone; - enum zone_type highest_zoneidx = gfp_zone(flags); + enum zone_type highest_zoneidx = gfp_zone(pc->flags); void *object; unsigned int cpuset_mems_cookie; @@ -2230,15 +2238,15 @@ static void *get_any_partial(struct kmem_cache *s, gfp_t flags, do { cpuset_mems_cookie = read_mems_allowed_begin(); - zonelist = node_zonelist(mempolicy_slab_node(), flags); + zonelist = node_zonelist(mempolicy_slab_node(), pc->flags); for_each_zone_zonelist(zone, z, zonelist, highest_zoneidx) { struct kmem_cache_node *n; n = get_node(s, zone_to_nid(zone)); - if (n && cpuset_zone_allowed(zone, flags) && + if (n && cpuset_zone_allowed(zone, pc->flags) && n->nr_partial > s->min_partial) { - object = get_partial_node(s, n, ret_slab, flags); + object = get_partial_node(s, n, pc); if (object) { /* * Don't check read_mems_allowed_retry() @@ -2259,8 +2267,7 @@ static void *get_any_partial(struct kmem_cache *s, gfp_t flags, /* * Get a partial slab, lock it and return it. */ -static void *get_partial(struct kmem_cache *s, gfp_t flags, int node, - struct slab **ret_slab) +static void *get_partial(struct kmem_cache *s, int node, struct partial_context *pc) { void *object; int searchnode = node; @@ -2268,11 +2275,11 @@ static void *get_partial(struct kmem_cache *s, gfp_t flags, int node, if (node == NUMA_NO_NODE) searchnode = numa_mem_id(); - object = get_partial_node(s, get_node(s, searchnode), ret_slab, flags); + object = get_partial_node(s, get_node(s, searchnode), pc); if (object || node != NUMA_NO_NODE) return object; - return get_any_partial(s, flags, ret_slab); + return get_any_partial(s, pc); } #ifdef CONFIG_PREEMPTION @@ -2996,11 +3003,12 @@ static inline void *get_freelist(struct kmem_cache *s, struct slab *slab) * already disabled (which is the case for bulk allocation). */ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, - unsigned long addr, struct kmem_cache_cpu *c) + unsigned long addr, struct kmem_cache_cpu *c, unsigned int orig_size) { void *freelist; struct slab *slab; unsigned long flags; + struct partial_context pc; stat(s, ALLOC_SLOWPATH); @@ -3114,7 +3122,10 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, new_objects: - freelist = get_partial(s, gfpflags, node, &slab); + pc.flags = gfpflags; + pc.slab = &slab; + pc.orig_size = orig_size; + freelist = get_partial(s, node, &pc); if (freelist) goto check_new_slab; @@ -3130,7 +3141,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, stat(s, ALLOC_SLAB); if (kmem_cache_debug(s)) { - freelist = alloc_single_from_new_slab(s, slab); + freelist = alloc_single_from_new_slab(s, slab, orig_size); if (unlikely(!freelist)) goto new_objects; @@ -3162,6 +3173,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, */ if (s->flags & SLAB_STORE_USER) set_track(s, freelist, TRACK_ALLOC, addr); + return freelist; } @@ -3204,7 +3216,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, * pointer. */ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, - unsigned long addr, struct kmem_cache_cpu *c) + unsigned long addr, struct kmem_cache_cpu *c, unsigned int orig_size) { void *p; @@ -3217,7 +3229,7 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, c = slub_get_cpu_ptr(s->cpu_slab); #endif - p = ___slab_alloc(s, gfpflags, node, addr, c); + p = ___slab_alloc(s, gfpflags, node, addr, c, orig_size); #ifdef CONFIG_PREEMPT_COUNT slub_put_cpu_ptr(s->cpu_slab); #endif @@ -3302,7 +3314,7 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s, struct list_l if (!USE_LOCKLESS_FAST_PATH() || unlikely(!object || !slab || !node_match(slab, node))) { - object = __slab_alloc(s, gfpflags, node, addr, c); + object = __slab_alloc(s, gfpflags, node, addr, c, orig_size); } else { void *next_object = get_freepointer_safe(s, object); @@ -3769,7 +3781,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, * of re-populating per CPU c->freelist */ p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE, - _RET_IP_, c); + _RET_IP_, c, s->object_size); if (unlikely(!p[i])) goto error; @@ -5031,10 +5043,9 @@ static int add_location(struct loc_track *t, struct kmem_cache *s, if (pos == end) break; - caddr = t->loc[pos].addr; - chandle = t->loc[pos].handle; + caddr = l->addr; + chandle = l->handle; if ((track->addr == caddr) && (handle == chandle)) { - l = &t->loc[pos]; l->count++; if (track->when) { -- 2.34.1
>From 00d06db374449a3151d94d11f7c9bd62d4de0d6b Mon Sep 17 00:00:00 2001 From: Feng Tang <feng.tang@xxxxxxxxx> Date: Mon, 31 Oct 2022 16:19:12 +0800 Subject: [PATCH 2/2] reorder the partial_context initialization Signed-off-by: Feng Tang <feng.tang@xxxxxxxxx> --- mm/slub.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 6fa3c24742b8..4265d293f4dd 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3010,6 +3010,11 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, unsigned long flags; struct partial_context pc; + pc.flags = gfpflags; + pc.slab = &slab; + pc.orig_size = orig_size; + barrier(); + stat(s, ALLOC_SLOWPATH); reread_slab: @@ -3122,9 +3127,6 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, new_objects: - pc.flags = gfpflags; - pc.slab = &slab; - pc.orig_size = orig_size; freelist = get_partial(s, node, &pc); if (freelist) goto check_new_slab; -- 2.34.1