The synchronization here is just to ensure the module init's been freed before doing W+X checking. But the commit 1a7b7d922081 ("modules: Use vmalloc special flag") moves do_free_init() into a global workqueue instead of call_rcu(). So now rcu_barrier() can not ensure that do_free_init has completed. We should wait it via flush_work(). Without this fix, we still could encounter false positive reports in W+X checking, and the rcu synchronization is unnecessary which can introduce significant delay. Eric Chanudet reports that the rcu_barrier introduces ~0.1s delay on a PREEMPT_RT kernel. [ 0.291444] Freeing unused kernel memory: 5568K [ 0.402442] Run /sbin/init as init process With this fix, the above delay can be eliminated. Fixes: 1a7b7d922081 ("modules: Use vmalloc special flag") Signed-off-by: Changbin Du <changbin.du@xxxxxxxxxx> Cc: Xiaoyi Su <suxiaoyi@xxxxxxxxxx> Cc: Eric Chanudet <echanude@xxxxxxxxxx> --- v3: - amend comment in do_init_module() and update commit msg. v2: - fix compilation issue for no CONFIG_MODULES found by 0-DAY. --- include/linux/moduleloader.h | 8 ++++++++ init/main.c | 5 +++-- kernel/module/main.c | 9 +++++++-- 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/include/linux/moduleloader.h b/include/linux/moduleloader.h index 001b2ce83832..89b1e0ed9811 100644 --- a/include/linux/moduleloader.h +++ b/include/linux/moduleloader.h @@ -115,6 +115,14 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *mod); +#ifdef CONFIG_MODULES +void flush_module_init_free_work(void); +#else +static inline void flush_module_init_free_work(void) +{ +} +#endif + /* Any cleanup needed when module leaves. */ void module_arch_cleanup(struct module *mod); diff --git a/init/main.c b/init/main.c index e24b0780fdff..f0b7e21ac67f 100644 --- a/init/main.c +++ b/init/main.c @@ -99,6 +99,7 @@ #include <linux/init_syscalls.h> #include <linux/stackdepot.h> #include <linux/randomize_kstack.h> +#include <linux/moduleloader.h> #include <net/net_namespace.h> #include <asm/io.h> @@ -1402,11 +1403,11 @@ static void mark_readonly(void) if (rodata_enabled) { /* * load_module() results in W+X mappings, which are cleaned - * up with call_rcu(). Let's make sure that queued work is + * up with init_free_wq. Let's make sure that queued work is * flushed so that we don't hit false positives looking for * insecure pages which are W+X. */ - rcu_barrier(); + flush_module_init_free_work(); mark_rodata_ro(); rodata_test(); } else diff --git a/kernel/module/main.c b/kernel/module/main.c index 36681911c05a..b0b99348e1a8 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -2489,6 +2489,11 @@ static void do_free_init(struct work_struct *w) } } +void flush_module_init_free_work(void) +{ + flush_work(&init_free_wq); +} + #undef MODULE_PARAM_PREFIX #define MODULE_PARAM_PREFIX "module." /* Default value for module->async_probe_requested */ @@ -2593,8 +2598,8 @@ static noinline int do_init_module(struct module *mod) * Note that module_alloc() on most architectures creates W+X page * mappings which won't be cleaned up until do_free_init() runs. Any * code such as mark_rodata_ro() which depends on those mappings to - * be cleaned up needs to sync with the queued work - ie - * rcu_barrier() + * be cleaned up needs to sync with the queued work by invoking + * flush_module_init_free_work(). */ if (llist_add(&freeinit->node, &init_free_list)) schedule_work(&init_free_wq); -- 2.25.1