Re: [RFC PATCH 2/2] kfence: Alloc kfence_pool after system startup

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, 3 Mar 2022 at 04:15, Tianchen Ding <dtcccc@xxxxxxxxxxxxxxxxx> wrote:
>
> KFENCE aims at production environments, but it does not allow enabling
> after system startup because kfence_pool only alloc pages from memblock.
> Consider the following production scene:
> At first, for performance considerations, production machines do not
> enable KFENCE.
> However, after running for a while, the kernel is suspected to have
> memory errors. (e.g., a sibling machine crashed.)
> So other production machines need to enable KFENCE, but it's hard for
> them to reboot.

I think having this flexibility isn't bad, but your usecase just
doesn't make sense (to us at least, based on our experience).

So I would simply remove the above as it will give folks the wrong
impression. The below paragraph can be improved a little, but should
be enough.

> Allow enabling KFENCE by alloc pages after system startup, even if
> KFENCE is not enabled during booting.

The above doesn't parse very well -- my suggestion:
  "Allow enabling KFENCE after system startup by allocating its pool
via the page allocator. This provides the flexibility to enable KFENCE
even if it wasn't enabled at boot time."

> Signed-off-by: Tianchen Ding <dtcccc@xxxxxxxxxxxxxxxxx>
> ---
> This patch is similar to what the KFENCE(early version) do on ARM64.
> Instead of alloc_pages(), we'd prefer alloc_contig_pages() to get exact
> number of pages.
> I'm not sure about the impact of breaking __ro_after_init. I've tested
> with hackbench, and it seems no performance regression.
> Or any problem about security?

Performance would be the main consideration. However, I think
__read_mostly should be as good as __ro_after_init in terms of
performance.

> ---
>  mm/kfence/core.c | 96 ++++++++++++++++++++++++++++++++++++++----------
>  1 file changed, 76 insertions(+), 20 deletions(-)
>
> diff --git a/mm/kfence/core.c b/mm/kfence/core.c
> index 19eb123c0bba..ae69b2a113a4 100644
> --- a/mm/kfence/core.c
> +++ b/mm/kfence/core.c
> @@ -93,7 +93,7 @@ static unsigned long kfence_skip_covered_thresh __read_mostly = 75;
>  module_param_named(skip_covered_thresh, kfence_skip_covered_thresh, ulong, 0644);
>
>  /* The pool of pages used for guard pages and objects. */
> -char *__kfence_pool __ro_after_init;
> +char *__kfence_pool __read_mostly;
>  EXPORT_SYMBOL(__kfence_pool); /* Export for test modules. */
>
>  /*
> @@ -534,17 +534,18 @@ static void rcu_guarded_free(struct rcu_head *h)
>         kfence_guarded_free((void *)meta->addr, meta, false);
>  }
>
> -static bool __init kfence_init_pool(void)
> +/*
> + * The main part of init kfence pool.

"Initialization of the KFENCE pool after its allocation."

> + * Return 0 if succeed. Otherwise return the address where error occurs.

"Return 0 on success; otherwise returns the address up to which
partial initialization succeeded."

> + */
> +static unsigned long __kfence_init_pool(void)

Keep this function simply named 'kfence_init_pool()' - it's a static
function, and we can be more descriptive with the other function
names.

>  {
>         unsigned long addr = (unsigned long)__kfence_pool;
>         struct page *pages;
>         int i;
>
> -       if (!__kfence_pool)
> -               return false;
> -
>         if (!arch_kfence_init_pool())
> -               goto err;
> +               return addr;
>
>         pages = virt_to_page(addr);
>
> @@ -562,7 +563,7 @@ static bool __init kfence_init_pool(void)
>
>                 /* Verify we do not have a compound head page. */
>                 if (WARN_ON(compound_head(&pages[i]) != &pages[i]))
> -                       goto err;
> +                       return addr;
>
>                 __SetPageSlab(&pages[i]);
>         }
> @@ -575,7 +576,7 @@ static bool __init kfence_init_pool(void)
>          */
>         for (i = 0; i < 2; i++) {
>                 if (unlikely(!kfence_protect(addr)))
> -                       goto err;
> +                       return addr;
>
>                 addr += PAGE_SIZE;
>         }
> @@ -592,7 +593,7 @@ static bool __init kfence_init_pool(void)
>
>                 /* Protect the right redzone. */
>                 if (unlikely(!kfence_protect(addr + PAGE_SIZE)))
> -                       goto err;
> +                       return addr;
>
>                 addr += 2 * PAGE_SIZE;
>         }
> @@ -605,9 +606,21 @@ static bool __init kfence_init_pool(void)
>          */
>         kmemleak_free(__kfence_pool);
>
> -       return true;
> +       return 0;
> +}
> +
> +static bool __init kfence_init_pool(void)

Just call this kfence_init_pool_early().

> +{
> +       unsigned long addr;
> +
> +       if (!__kfence_pool)
> +               return false;
> +
> +       addr = __kfence_init_pool();
> +
> +       if (!addr)
> +               return true;
>
> -err:
>         /*
>          * Only release unprotected pages, and do not try to go back and change
>          * page attributes due to risk of failing to do so as well. If changing
> @@ -620,6 +633,22 @@ static bool __init kfence_init_pool(void)
>         return false;
>  }
>
> +static bool kfence_init_pool_late(void)
> +{
> +       unsigned long addr, free_pages;
> +
> +       addr = __kfence_init_pool();
> +
> +       if (!addr)
> +               return true;
> +
> +       /* Same as above. */
> +       free_pages = (KFENCE_POOL_SIZE - (addr - (unsigned long)__kfence_pool)) / PAGE_SIZE;
> +       free_contig_range(page_to_pfn(virt_to_page(addr)), free_pages);
> +       __kfence_pool = NULL;
> +       return false;
> +}
> +
>  /* === DebugFS Interface ==================================================== */
>
>  static int stats_show(struct seq_file *seq, void *v)
> @@ -768,31 +797,58 @@ void __init kfence_alloc_pool(void)
>                 pr_err("failed to allocate pool\n");
>  }
>
> +static inline void __kfence_init(void)

Don't make this 'inline', I see no reason for it. If the compiler
thinks it's really worth inlining, it'll do it anyway.

Also, just call it 'kfence_init_enable()' (sprinkling '__' everywhere
really doesn't improve readability if we can avoid it).

> +{
> +       if (!IS_ENABLED(CONFIG_KFENCE_STATIC_KEYS))
> +               static_branch_enable(&kfence_allocation_key);
> +       WRITE_ONCE(kfence_enabled, true);
> +       queue_delayed_work(system_unbound_wq, &kfence_timer, 0);
> +       pr_info("initialized - using %lu bytes for %d objects at 0x%p-0x%p\n", KFENCE_POOL_SIZE,
> +               CONFIG_KFENCE_NUM_OBJECTS, (void *)__kfence_pool,
> +               (void *)(__kfence_pool + KFENCE_POOL_SIZE));
> +}
> +
>  void __init kfence_init(void)
>  {
> +       stack_hash_seed = (u32)random_get_entropy();
> +
>         /* Setting kfence_sample_interval to 0 on boot disables KFENCE. */
>         if (!kfence_sample_interval)
>                 return;
>
> -       stack_hash_seed = (u32)random_get_entropy();
>         if (!kfence_init_pool()) {
>                 pr_err("%s failed\n", __func__);
>                 return;
>         }
>
> -       if (!IS_ENABLED(CONFIG_KFENCE_STATIC_KEYS))
> -               static_branch_enable(&kfence_allocation_key);
> -       WRITE_ONCE(kfence_enabled, true);
> -       queue_delayed_work(system_unbound_wq, &kfence_timer, 0);
> -       pr_info("initialized - using %lu bytes for %d objects at 0x%p-0x%p\n", KFENCE_POOL_SIZE,
> -               CONFIG_KFENCE_NUM_OBJECTS, (void *)__kfence_pool,
> -               (void *)(__kfence_pool + KFENCE_POOL_SIZE));
> +       __kfence_init();
> +}
> +
> +static int kfence_init_late(void)
> +{
> +       struct page *pages;
> +       const unsigned long nr_pages = KFENCE_POOL_SIZE / PAGE_SIZE;

Order 'nr_pages' above 'pages' (reverse xmas-tree).


> +       pages = alloc_contig_pages(nr_pages, GFP_KERNEL, first_online_node, NULL);
> +
> +       if (!pages)
> +               return -ENOMEM;
> +
> +       __kfence_pool = page_to_virt(pages);
> +
> +       if (!kfence_init_pool_late()) {
> +               pr_err("%s failed\n", __func__);
> +               return -EBUSY;
> +       }
> +
> +       __kfence_init();
> +       return 0;
>  }
>
>  static int kfence_enable_late(void)
>  {
>         if (!__kfence_pool)
> -               return -EINVAL;
> +               return kfence_init_late();
>
>         WRITE_ONCE(kfence_enabled, true);
>         queue_delayed_work(system_unbound_wq, &kfence_timer, 0);
> --
> 2.27.0
>
> --
> You received this message because you are subscribed to the Google Groups "kasan-dev" group.
> To unsubscribe from this group and stop receiving emails from it, send an email to kasan-dev+unsubscribe@xxxxxxxxxxxxxxxx.
> To view this discussion on the web visit https://groups.google.com/d/msgid/kasan-dev/20220303031505.28495-3-dtcccc%40linux.alibaba.com.




[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux