[PATCH v2 6/7] mm, slab: add static key for should_failslab()

Vlastimil Babka <vbabka@xxxxxxx> · Thu, 20 Jun 2024 00:49:00 +0200

Since commit 4f6923fbb352 ("mm: make should_failslab always available for
fault injection") should_failslab() is unconditionally a noinline
function. This adds visible overhead to the slab allocation hotpath,
even if the function is empty. With CONFIG_FAILSLAB=y there's additional
overhead, even when the functionality is not activated by a boot
parameter or via debugfs.

The overhead can be eliminated with a static key around the callsite.
Fault injection and error injection frameworks including bpf can now be
told that this function has a static key associated, and are able to
enable and disable it accordingly.

Additionally, compile out all relevant code if neither CONFIG_FAILSLAB
nor CONFIG_FUNCTION_ERROR_INJECTION is enabled. When only the latter is
not enabled, make should_failslab() static inline instead of noinline.

To demonstrate the reduced overhead of calling an empty
should_failslab() function, a kernel build with
CONFIG_FUNCTION_ERROR_INJECTION enabled but CONFIG_FAILSLAB disabled,
and CPU mitigations enabled, was used in a qemu-kvm (virtme-ng) on AMD
Ryzen 7 2700 machine, and execution of a program trying to open() a
non-existent file was measured 3 times:

    for (int i = 0; i < 10000000; i++) {
        open("non_existent", O_RDONLY);
    }

After this patch, the measured real time was 4.3% smaller. Using perf
profiling it was verified that should_failslab was gone from the
profile.

With CONFIG_FAILSLAB also enabled, the patched kernel performace was
unaffected, as expected, while unpatched kernel's performance was worse,
resulting in the relative speedup being 10.5%. This means it no longer
needs to be an option suitable only for debug kernel builds.

Acked-by: Alexei Starovoitov <ast@xxxxxxxxxx>
Reviewed-by: Roman Gushchin <roman.gushchin@xxxxxxxxx>
Signed-off-by: Vlastimil Babka <vbabka@xxxxxxx>
---
 include/linux/fault-inject.h |  4 +++-
 mm/failslab.c                |  2 +-
 mm/slab.h                    |  3 +++
 mm/slub.c                    | 30 +++++++++++++++++++++++++++---
 4 files changed, 34 insertions(+), 5 deletions(-)

diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h
index cfe75cc1bac4..0d0fa94dc1c8 100644
--- a/include/linux/fault-inject.h
+++ b/include/linux/fault-inject.h
@@ -107,9 +107,11 @@ static inline bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
 }
 #endif /* CONFIG_FAIL_PAGE_ALLOC */
 
+#ifdef CONFIG_FUNCTION_ERROR_INJECTION
 int should_failslab(struct kmem_cache *s, gfp_t gfpflags);
+#endif
 #ifdef CONFIG_FAILSLAB
-extern bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags);
+bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags);
 #else
 static inline bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags)
 {
diff --git a/mm/failslab.c b/mm/failslab.c
index ffc420c0e767..878fd08e5dac 100644
--- a/mm/failslab.c
+++ b/mm/failslab.c
@@ -9,7 +9,7 @@ static struct {
 	bool ignore_gfp_reclaim;
 	bool cache_filter;
 } failslab = {
-	.attr = FAULT_ATTR_INITIALIZER,
+	.attr = FAULT_ATTR_INITIALIZER_KEY(&should_failslab_active.key),
 	.ignore_gfp_reclaim = true,
 	.cache_filter = false,
 };
diff --git a/mm/slab.h b/mm/slab.h
index 5f8f47c5bee0..792e19cb37b8 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -11,6 +11,7 @@
 #include <linux/memcontrol.h>
 #include <linux/kfence.h>
 #include <linux/kasan.h>
+#include <linux/jump_label.h>
 
 /*
  * Internal slab definitions
@@ -160,6 +161,8 @@ static_assert(IS_ALIGNED(offsetof(struct slab, freelist), sizeof(freelist_aba_t)
  */
 #define slab_page(s) folio_page(slab_folio(s), 0)
 
+DECLARE_STATIC_KEY_FALSE(should_failslab_active);
+
 /*
  * If network-based swap is enabled, sl*b must keep track of whether pages
  * were allocated from pfmemalloc reserves.
diff --git a/mm/slub.c b/mm/slub.c
index 0809760cf789..11980aa94631 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3874,13 +3874,37 @@ static __always_inline void maybe_wipe_obj_freeptr(struct kmem_cache *s,
 			0, sizeof(void *));
 }
 
-noinline int should_failslab(struct kmem_cache *s, gfp_t gfpflags)
+#if defined(CONFIG_FUNCTION_ERROR_INJECTION) || defined(CONFIG_FAILSLAB)
+DEFINE_STATIC_KEY_FALSE(should_failslab_active);
+
+#ifdef CONFIG_FUNCTION_ERROR_INJECTION
+noinline
+#else
+static inline
+#endif
+int should_failslab(struct kmem_cache *s, gfp_t gfpflags)
 {
 	if (__should_failslab(s, gfpflags))
 		return -ENOMEM;
 	return 0;
 }
-ALLOW_ERROR_INJECTION(should_failslab, ERRNO);
+ALLOW_ERROR_INJECTION_KEY(should_failslab, ERRNO, &should_failslab_active);
+
+static __always_inline int should_failslab_wrapped(struct kmem_cache *s,
+						   gfp_t gfp)
+{
+	if (static_branch_unlikely(&should_failslab_active))
+		return should_failslab(s, gfp);
+	else
+		return 0;
+}
+#else
+static __always_inline int should_failslab_wrapped(struct kmem_cache *s,
+						   gfp_t gfp)
+{
+	return false;
+}
+#endif
 
 static __fastpath_inline
 struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
@@ -3889,7 +3913,7 @@ struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
 
 	might_alloc(flags);
 
-	if (unlikely(should_failslab(s, flags)))
+	if (should_failslab_wrapped(s, flags))
 		return NULL;
 
 	return s;

-- 
2.45.2