Re: [PATCH RFC v3 10/36] kmsan: add KMSAN runtime

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, 22 Nov 2019, glider@xxxxxxxxxx wrote:

[...]
> diff --git a/arch/x86/include/asm/kmsan.h b/arch/x86/include/asm/kmsan.h
> new file mode 100644
> index 000000000000..fc5f1224a059
> --- /dev/null
> +++ b/arch/x86/include/asm/kmsan.h
> @@ -0,0 +1,117 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Assembly bits to safely invoke KMSAN hooks from .S files.
> + *
> + * Adopted from KTSAN assembly hooks implementation by Dmitry Vyukov:
> + * https://github.com/google/ktsan/blob/ktsan/arch/x86/include/asm/ktsan.h
> + *
> + * Copyright (C) 2017-2019 Google LLC
> + * Author: Alexander Potapenko <glider@xxxxxxxxxx>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + *
> + */
> +#ifndef _ASM_X86_KMSAN_H
> +#define _ASM_X86_KMSAN_H
> +
> +#ifdef CONFIG_KMSAN
> +
> +#define KMSAN_PUSH_REGS				\
> +	pushq	%rax;				\
> +	pushq	%rcx;				\
> +	pushq	%rdx;				\
> +	pushq	%rdi;				\
> +	pushq	%rsi;				\
> +	pushq	%r8;				\
> +	pushq	%r9;				\
> +	pushq	%r10;				\
> +	pushq	%r11;				\
> +/**/
> +
[...]
> +
> +#define KMSAN_IST_EXIT(shift_ist)		\
> +	KMSAN_PUSH_REGS				\
> +	movq	$shift_ist, %rdi;		\
> +	call	kmsan_ist_exit;			\
> +	KMSAN_POP_REGS				\
> +/**/
> +
> +#define KMSAN_UNPOISON_PT_REGS			\
> +	KMSAN_PUSH_REGS				\
> +	call	kmsan_unpoison_pt_regs;		\
> +	KMSAN_POP_REGS				\
> +/**/

Could all these just be using '.macro .endm'?

> +#else /* ifdef CONFIG_KMSAN */
> +
> +#define KMSAN_INTERRUPT_ENTER
> +#define KMSAN_INTERRUPT_EXIT
> +#define KMSAN_SOFTIRQ_ENTER
> +#define KMSAN_SOFTIRQ_EXIT
> +#define KMSAN_NMI_ENTER
> +#define KMSAN_NMI_EXIT
> +#define KMSAN_SYSCALL_ENTER
> +#define KMSAN_SYSCALL_EXIT
> +#define KMSAN_IST_ENTER(shift_ist)
> +#define KMSAN_IST_EXIT(shift_ist)
> +#define KMSAN_UNPOISON_PT_REGS
> +
> +#endif /* ifdef CONFIG_KMSAN */
> +#endif /* ifndef _ASM_X86_KMSAN_H */
> diff --git a/include/linux/kmsan-checks.h b/include/linux/kmsan-checks.h
> new file mode 100644
> index 000000000000..623854e88d4b
> --- /dev/null
> +++ b/include/linux/kmsan-checks.h
> @@ -0,0 +1,122 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * KMSAN checks.
> + *
> + * Copyright (C) 2017-2019 Google LLC
> + * Author: Alexander Potapenko <glider@xxxxxxxxxx>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + *
> + */
> +
> +#ifndef _LINUX_KMSAN_CHECKS_H
> +#define _LINUX_KMSAN_CHECKS_H
> +
> +#include <linux/bug.h>
> +#include <linux/dma-direction.h>
> +#include <linux/types.h>
> +
> +struct page;
> +struct sk_buff;
> +struct urb;
> +
> +#ifdef CONFIG_KMSAN
> +
> +/*
> + * Helper functions that mark the return value initialized.
> + * Note that Clang ignores the inline attribute in the cases when a no_sanitize
> + * function is called from an instrumented one.
> + */
> +
> +__no_sanitize_memory
> +static inline unsigned char KMSAN_INIT_1(unsigned char value)
> +{
> +	return value;
> +}
> +
> +__no_sanitize_memory
> +static inline unsigned short KMSAN_INIT_2(unsigned short value)
> +{
> +	return value;
> +}
> +
> +__no_sanitize_memory
> +static inline unsigned int KMSAN_INIT_4(unsigned int value)
> +{
> +	return value;
> +}
> +
> +__no_sanitize_memory
> +static inline unsigned long KMSAN_INIT_8(unsigned long value)
> +{
> +	return value;
> +}

Should the above be __always_inline?

Does it make sense to use u8, u16, u32, u64 here -- just in case it's
ported to other architectures in future?

> +
> +#define KMSAN_INIT_VALUE(val)		\
> +	({				\
> +		typeof(val) __ret;	\
> +		switch (sizeof(val)) {	\
> +		case 1:						\
> +			*(unsigned char *)&__ret = KMSAN_INIT_1(	\
> +					(unsigned char)val);	\
> +			break;					\
> +		case 2:						\
> +			*(unsigned short *)&__ret = KMSAN_INIT_2(	\
> +					(unsigned short)val);	\
> +			break;					\
> +		case 4:						\
> +			*(unsigned int *)&__ret = KMSAN_INIT_4(	\
> +					(unsigned int)val);	\
> +			break;					\
> +		case 8:						\
> +			*(unsigned long *)&__ret = KMSAN_INIT_8(	\
> +					(unsigned long)val);	\
> +			break;					\
> +		default:					\
> +			BUILD_BUG_ON(1);			\
> +		}						\
> +		__ret;						\
> +	}) /**/

Is the /**/ needed?

> +
> +void kmsan_ignore_page(struct page *page, int order);
> +void kmsan_poison_shadow(const void *address, size_t size, gfp_t flags);
> +void kmsan_unpoison_shadow(const void *address, size_t size);
> +void kmsan_check_memory(const void *address, size_t size);
> +void kmsan_check_skb(const struct sk_buff *skb);
> +void kmsan_handle_dma(const void *address, size_t size,
> +		      enum dma_data_direction direction);
> +void kmsan_handle_urb(const struct urb *urb, bool is_out);
> +void kmsan_copy_to_user(const void *to, const void *from, size_t to_copy,
> +			size_t left);
> +void *__msan_memcpy(void *dst, const void *src, u64 n);
> +void kmsan_enter_runtime(unsigned long *flags);
> +void kmsan_leave_runtime(unsigned long *flags);

It would be good to add doc comments to all API functions.

> +
> +#else
> +
> +#define KMSAN_INIT_VALUE(value) (value)
> +
> +static inline void kmsan_ignore_page(struct page *page, int order) {}
> +static inline void kmsan_poison_shadow(const void *address, size_t size,
> +				       gfp_t flags) {}
> +static inline void kmsan_unpoison_shadow(const void *address, size_t size) {}
> +static inline void kmsan_check_memory(const void *address, size_t size) {}
> +static inline void kmsan_check_skb(const struct sk_buff *skb) {}
> +static inline void kmsan_handle_urb(const struct urb *urb, bool is_out) {}
> +static inline void kmsan_handle_dma(const void *address, size_t size,
> +				    enum dma_data_direction direction) {}
> +static inline void kmsan_copy_to_user(
> +	const void *to, const void *from, size_t to_copy, size_t left) {}
> +static inline void *__msan_memcpy(void *dst, const void *src, size_t n)
> +{
> +	return NULL;
> +}
> +
> +static inline void kmsan_enter_runtime(unsigned long *flags) {}
> +static inline void kmsan_leave_runtime(unsigned long *flags) {}
> +
> +#endif
> +
> +#endif /* _LINUX_KMSAN_CHECKS_H */
> diff --git a/include/linux/kmsan.h b/include/linux/kmsan.h
> new file mode 100644
> index 000000000000..f5638bac368e
> --- /dev/null
> +++ b/include/linux/kmsan.h
> @@ -0,0 +1,143 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * KMSAN API for subsystems.
> + *
> + * Copyright (C) 2017-2019 Google LLC
> + * Author: Alexander Potapenko <glider@xxxxxxxxxx>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + *
> + */
> +#ifndef LINUX_KMSAN_H
> +#define LINUX_KMSAN_H
> +
> +#include <linux/gfp.h>
> +#include <linux/stackdepot.h>
> +#include <linux/types.h>
> +#include <linux/vmalloc.h>
> +
> +struct page;
> +struct kmem_cache;
> +struct task_struct;
> +struct vm_struct;
> +
> +
> +extern bool kmsan_ready;

What does this variable mean. Would 'kmsan_enabled' be more accurate?

> +#ifdef CONFIG_KMSAN
> +void __init kmsan_initialize_shadow(void);
> +void __init kmsan_initialize(void);
> +
> +/* These constants are defined in the MSan LLVM instrumentation pass. */
> +#define RETVAL_SIZE 800

This is in include/linux -- do they need a KMSAN_ prefix to not clash
with other definitions?

> +#define KMSAN_PARAM_SIZE 800
> +
> +#define PARAM_ARRAY_SIZE (KMSAN_PARAM_SIZE / sizeof(depot_stack_handle_t))

Similar here -- does it need a KMSAN_ prefix?

> +struct kmsan_context_state {
> +	char param_tls[KMSAN_PARAM_SIZE];
> +	char retval_tls[RETVAL_SIZE];
> +	char va_arg_tls[KMSAN_PARAM_SIZE];
> +	char va_arg_origin_tls[KMSAN_PARAM_SIZE];
> +	u64 va_arg_overflow_size_tls;
> +	depot_stack_handle_t param_origin_tls[PARAM_ARRAY_SIZE];
> +	depot_stack_handle_t retval_origin_tls;
> +	depot_stack_handle_t origin_tls;
> +};
> +
> +struct kmsan_task_state {
> +	bool allow_reporting;
> +	struct kmsan_context_state cstate;
> +};
> +
> +void kmsan_task_create(struct task_struct *task);
> +void kmsan_task_exit(struct task_struct *task);
> +void kmsan_alloc_shadow_for_region(void *start, size_t size);
> +int kmsan_alloc_page(struct page *page, unsigned int order, gfp_t flags);
> +void kmsan_gup_pgd_range(struct page **pages, int nr);
> +void kmsan_free_page(struct page *page, unsigned int order);
> +void kmsan_split_page(struct page *page, unsigned int order);
> +void kmsan_copy_page_meta(struct page *dst, struct page *src);
> +
> +void kmsan_poison_slab(struct page *page, gfp_t flags);
> +void kmsan_kmalloc_large(const void *ptr, size_t size, gfp_t flags);
> +void kmsan_kfree_large(const void *ptr);
> +void kmsan_kmalloc(struct kmem_cache *s, const void *object, size_t size,
> +		   gfp_t flags);
> +void kmsan_slab_alloc(struct kmem_cache *s, void *object, gfp_t flags);
> +void kmsan_slab_free(struct kmem_cache *s, void *object);
> +
> +void kmsan_slab_setup_object(struct kmem_cache *s, void *object);
> +void kmsan_post_alloc_hook(struct kmem_cache *s, gfp_t flags,
> +			size_t size, void *object);
> +
> +/* vmap */
> +void kmsan_vmap_page_range_noflush(unsigned long start, unsigned long end,
> +				   pgprot_t prot, struct page **pages);
> +void kmsan_vunmap_page_range(unsigned long addr, unsigned long end);
> +
> +/* ioremap */
> +void kmsan_ioremap_page_range(unsigned long addr, unsigned long end,
> +			      phys_addr_t phys_addr, pgprot_t prot);
> +void kmsan_iounmap_page_range(unsigned long start, unsigned long end);
> +
> +void kmsan_softirq_enter(void);
> +void kmsan_softirq_exit(void);
> +
> +void kmsan_clear_page(void *page_addr);

It would be good to have doc comments for each of them.

> +#else
> +
> +static inline void __init kmsan_initialize_shadow(void) { }
> +static inline void __init kmsan_initialize(void) { }
> +
> +static inline void kmsan_task_create(struct task_struct *task) {}
> +static inline void kmsan_task_exit(struct task_struct *task) {}
> +static inline void kmsan_alloc_shadow_for_region(void *start, size_t size) {}
> +static inline int kmsan_alloc_page(struct page *page, unsigned int order,
> +				   gfp_t flags)
> +{
> +	return 0;
> +}
> +static inline void kmsan_gup_pgd_range(struct page **pages, int nr) {}
> +static inline void kmsan_free_page(struct page *page, unsigned int order) {}
> +static inline void kmsan_split_page(struct page *page, unsigned int order) {}
> +static inline void kmsan_copy_page_meta(struct page *dst, struct page *src) {}
> +
> +static inline void kmsan_poison_slab(struct page *page, gfp_t flags) {}
> +static inline void kmsan_kmalloc_large(const void *ptr, size_t size,
> +				       gfp_t flags) {}
> +static inline void kmsan_kfree_large(const void *ptr) {}
> +static inline void kmsan_kmalloc(struct kmem_cache *s, const void *object,
> +				 size_t size, gfp_t flags) {}
> +static inline void kmsan_slab_alloc(struct kmem_cache *s, void *object,
> +				    gfp_t flags) {}
> +static inline void kmsan_slab_free(struct kmem_cache *s, void *object) {}
> +
> +static inline void kmsan_slab_setup_object(struct kmem_cache *s,
> +					   void *object) {}
> +static inline void kmsan_post_alloc_hook(struct kmem_cache *s, gfp_t flags,
> +					 size_t size, void *object) {}
> +
> +static inline void kmsan_vmap_page_range_noflush(unsigned long start,
> +						 unsigned long end,
> +						 pgprot_t prot,
> +						 struct page **pages) {}
> +static inline void kmsan_vunmap_page_range(unsigned long start,
> +					   unsigned long end) {}
> +
> +static inline void kmsan_ioremap_page_range(unsigned long start,
> +					    unsigned long end,
> +					    phys_addr_t phys_addr,
> +					    pgprot_t prot) {}
> +static inline void kmsan_iounmap_page_range(unsigned long start,
> +					    unsigned long end) {}
> +static inline void kmsan_softirq_enter(void) {}
> +static inline void kmsan_softirq_exit(void) {}
> +
> +static inline void kmsan_clear_page(void *page_addr) {}
> +#endif
> +
> +#endif /* LINUX_KMSAN_H */
> diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
> index 93d97f9b0157..75c36318943d 100644
> --- a/lib/Kconfig.debug
> +++ b/lib/Kconfig.debug
> @@ -756,6 +756,8 @@ config DEBUG_STACKOVERFLOW
>  
>  source "lib/Kconfig.kasan"
>  
> +source "lib/Kconfig.kmsan"
> +
>  endmenu # "Memory Debugging"
>  
>  config ARCH_HAS_KCOV
> diff --git a/lib/Kconfig.kmsan b/lib/Kconfig.kmsan
> new file mode 100644
> index 000000000000..187dddfcf220
> --- /dev/null
> +++ b/lib/Kconfig.kmsan
> @@ -0,0 +1,22 @@
> +config HAVE_ARCH_KMSAN
> +	bool
> +
> +if HAVE_ARCH_KMSAN
> +
> +config KMSAN
> +	bool "KMSAN: detector of uninitialized memory use"
> +	depends on SLUB && !KASAN
> +	select STACKDEPOT
> +	help
> +	  KMSAN is a dynamic detector of uses of uninitialized memory in the
> +	  kernel. It is based on compiler instrumentation provided by Clang
> +	  and thus requires Clang 10.0.0+ to build.
> +
> +config TEST_KMSAN
> +	tristate "Module for testing KMSAN for bug detection"
> +	depends on m && KMSAN
> +	help
> +	  Test module that can trigger various uses of uninitialized memory
> +	  detectable by KMSAN.
> +
> +endif
> diff --git a/mm/kmsan/Makefile b/mm/kmsan/Makefile
> new file mode 100644
> index 000000000000..ccf6d2d00a7a
> --- /dev/null
> +++ b/mm/kmsan/Makefile
> @@ -0,0 +1,4 @@
> +obj-y := kmsan.o kmsan_instr.o kmsan_init.o kmsan_entry.o kmsan_hooks.o kmsan_report.o kmsan_shadow.o
> +
> +KMSAN_SANITIZE := n
> +KCOV_INSTRUMENT := n

Does KMSAN work together with UBSAN? In that case may this needs a
UBSAN_SANITIZE := n

> diff --git a/mm/kmsan/kmsan.c b/mm/kmsan/kmsan.c
> new file mode 100644
> index 000000000000..21e97d4b1a99
> --- /dev/null
> +++ b/mm/kmsan/kmsan.c
> @@ -0,0 +1,563 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * KMSAN runtime library.
> + *
> + * Copyright (C) 2017-2019 Google LLC
> + * Author: Alexander Potapenko <glider@xxxxxxxxxx>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#include <asm/page.h>
> +#include <linux/compiler.h>
> +#include <linux/export.h>
> +#include <linux/highmem.h>
> +#include <linux/interrupt.h>
> +#include <linux/kernel.h>
> +#include <linux/kmsan.h>
> +#include <linux/memory.h>
> +#include <linux/mm.h>
> +#include <linux/preempt.h>
> +#include <linux/percpu-defs.h>
> +#include <linux/mm_types.h>
> +#include <linux/slab.h>
> +#include <linux/stackdepot.h>
> +#include <linux/stacktrace.h>
> +#include <linux/types.h>
> +#include <linux/vmalloc.h>
> +
> +#include <linux/mmzone.h>

Why the space above the mmzone.h include?

> +
> +#include "../slab.h"
> +#include "kmsan.h"
> +
> +/*
> + * Some kernel asm() calls mention the non-existing |__force_order| variable
> + * in the asm constraints to preserve the order of accesses to control
> + * registers. KMSAN turns those mentions into actual memory accesses, therefore
> + * the variable is now required to link the kernel.
> + */
> +unsigned long __force_order;

Not sure if this is related, but when compiling with KMSAN I get

	ERROR: "__force_order" [drivers/misc/lkdtm/lkdtm.ko] undefined!

with a default config with KMSAN selected.

> +bool kmsan_ready;
> +#define KMSAN_STACK_DEPTH 64
> +#define MAX_CHAIN_DEPTH 7

Should these defines be above the variable definitions?

> +/*
> + * According to Documentation/x86/kernel-stacks, kernel code can run on the
> + * following stacks:
> + * - regular task stack - when executing the task code
> + *  - interrupt stack - when handling external hardware interrupts and softirqs
> + *  - NMI stack
> + * 0 is for regular interrupts, 1 for softirqs, 2 for NMI.
> + * Because interrupts may nest, trying to use a new context for every new
> + * interrupt.
> + */
> +/* [0] for dummy per-CPU context. */
> +DEFINE_PER_CPU(struct kmsan_context_state[KMSAN_NESTED_CONTEXT_MAX],
> +	       kmsan_percpu_cstate);
> +/* 0 for task context, |i>0| for kmsan_context_state[i]. */
> +DEFINE_PER_CPU(int, kmsan_context_level);
> +DEFINE_PER_CPU(int, kmsan_in_interrupt);
> +DEFINE_PER_CPU(bool, kmsan_in_softirq);
> +DEFINE_PER_CPU(bool, kmsan_in_nmi);
> +DEFINE_PER_CPU(int, kmsan_in_runtime);
> +
> +struct kmsan_context_state *task_kmsan_context_state(void)
> +{
> +	int cpu = smp_processor_id();
> +	int level = this_cpu_read(kmsan_context_level);
> +	struct kmsan_context_state *ret;
> +
> +	if (!kmsan_ready || IN_RUNTIME()) {
> +		ret = &per_cpu(kmsan_percpu_cstate[0], cpu);
> +		__memset(ret, 0, sizeof(struct kmsan_context_state));
> +		return ret;
> +	}
> +
> +	if (!level)
> +		ret = &current->kmsan.cstate;
> +	else
> +		ret = &per_cpu(kmsan_percpu_cstate[level], cpu);
> +	return ret;
> +}
> +
> +void kmsan_internal_task_create(struct task_struct *task)
> +{
> +	struct kmsan_task_state *state = &task->kmsan;
> +
> +	__memset(state, 0, sizeof(struct kmsan_task_state));
> +	state->allow_reporting = true;
> +}
> +
> +void kmsan_internal_memset_shadow(void *addr, int b, size_t size,
> +				  bool checked)
> +{
> +	void *shadow_start;
> +	u64 page_offset, address = (u64)addr;
> +	size_t to_fill;
> +
> +	BUG_ON(!metadata_is_contiguous(addr, size, META_SHADOW));
> +	while (size) {
> +		page_offset = address % PAGE_SIZE;
> +		to_fill = min(PAGE_SIZE - page_offset, (u64)size);
> +		shadow_start = kmsan_get_metadata((void *)address, to_fill,
> +						  META_SHADOW);
> +		if (!shadow_start) {
> +			if (checked) {
> +				kmsan_pr_locked("WARNING: not memsetting %d bytes starting at %px, because the shadow is NULL\n", to_fill, address);
> +				BUG();

Why not just 'panic("%s: ...", __func__, ...)' ?

If the BUG() should not be here, then maybe just WARN_ON?

> +			}
> +			/* Otherwise just move on. */
> +		} else {
> +			__memset(shadow_start, b, to_fill);
> +		}
> +		address += to_fill;
> +		size -= to_fill;
> +	}
> +}
> +
> +void kmsan_internal_poison_shadow(void *address, size_t size,
> +				gfp_t flags, unsigned int poison_flags)
> +{
> +	bool checked = poison_flags & KMSAN_POISON_CHECK;
> +	depot_stack_handle_t handle;
> +	u32 extra_bits = 0;
> +
> +	if (poison_flags & KMSAN_POISON_FREE)
> +		extra_bits = 1;
> +	kmsan_internal_memset_shadow(address, -1, size, checked);
> +	handle = kmsan_save_stack_with_flags(flags, extra_bits);
> +	kmsan_set_origin_checked(address, size, handle, checked);
> +}
> +
> +void kmsan_internal_unpoison_shadow(void *address, size_t size, bool checked)
> +{
> +	kmsan_internal_memset_shadow(address, 0, size, checked);
> +	kmsan_set_origin_checked(address, size, 0, checked);
> +}
> +
> +depot_stack_handle_t kmsan_save_stack_with_flags(gfp_t flags,
> +						 unsigned int reserved)
> +{
> +	depot_stack_handle_t handle;
> +	unsigned long entries[KMSAN_STACK_DEPTH];
> +	unsigned int nr_entries;
> +
> +	nr_entries = stack_trace_save(entries, KMSAN_STACK_DEPTH, 0);
> +	filter_irq_stacks(entries, nr_entries);
> +
> +	/* Don't sleep (see might_sleep_if() in __alloc_pages_nodemask()). */
> +	flags &= ~__GFP_DIRECT_RECLAIM;
> +
> +	handle = stack_depot_save(entries, nr_entries, flags);
> +	return set_dsh_extra_bits(handle, reserved);
> +}
> +
> +/*
> + * Depending on the value of is_memmove, this serves as both a memcpy and a
> + * memmove implementation.
> + *
> + * As with the regular memmove, do the following:
> + * - if src and dst don't overlap, use memcpy();
> + * - if src and dst overlap:
> + *   - if src > dst, use memcpy();
> + *   - if src < dst, use reverse-memcpy.
> + * Why this is correct:
> + * - problems may arise if for some part of the overlapping region we
> + *   overwrite its shadow with a new value before copying it somewhere.
> + *   But there's a 1:1 mapping between the kernel memory and its shadow,
> + *   therefore if this doesn't happen with the kernel memory it can't happen
> + *   with the shadow.
> + */
> +void kmsan_memcpy_memmove_metadata(void *dst, void *src, size_t n,
> +				   bool is_memmove)
> +{
> +	void *shadow_src, *shadow_dst;
> +	depot_stack_handle_t *origin_src, *origin_dst;
> +	int src_slots, dst_slots, i, iter, step, skip_bits;
> +	depot_stack_handle_t old_origin = 0, chain_origin, new_origin = 0;
> +	u32 *align_shadow_src, shadow;
> +	bool backwards;
> +
> +	BUG_ON(!metadata_is_contiguous(dst, n, META_SHADOW));
> +	BUG_ON(!metadata_is_contiguous(src, n, META_SHADOW));
> +
> +	shadow_dst = kmsan_get_metadata(dst, n, META_SHADOW);
> +	if (!shadow_dst)
> +		return;
> +
> +	shadow_src = kmsan_get_metadata(src, n, META_SHADOW);
> +	if (!shadow_src) {
> +		/*
> +		 * |src| is untracked: zero out destination shadow, ignore the
> +		 * origins, we're done.
> +		 */
> +		__memset(shadow_dst, 0, n);
> +		return;
> +	}
> +	if (is_memmove)
> +		__memmove(shadow_dst, shadow_src, n);
> +	else
> +		__memcpy(shadow_dst, shadow_src, n);
> +
> +	origin_dst = kmsan_get_metadata(dst, n, META_ORIGIN);
> +	origin_src = kmsan_get_metadata(src, n, META_ORIGIN);
> +	BUG_ON(!origin_dst || !origin_src);
> +	BUG_ON(!metadata_is_contiguous(dst, n, META_ORIGIN));
> +	BUG_ON(!metadata_is_contiguous(src, n, META_ORIGIN));
> +	src_slots = (ALIGN((u64)src + n, ORIGIN_SIZE) -
> +		     ALIGN_DOWN((u64)src, ORIGIN_SIZE)) / ORIGIN_SIZE;
> +	dst_slots = (ALIGN((u64)dst + n, ORIGIN_SIZE) -
> +		     ALIGN_DOWN((u64)dst, ORIGIN_SIZE)) / ORIGIN_SIZE;
> +	BUG_ON(!src_slots || !dst_slots);
> +	BUG_ON((src_slots < 1) || (dst_slots < 1));
> +	BUG_ON((src_slots - dst_slots > 1) || (dst_slots - src_slots < -1));
> +
> +	backwards = is_memmove && (dst > src);
> +	i = backwards ? min(src_slots, dst_slots) - 1 : 0;
> +	iter = backwards ? -1 : 1;
> +
> +	align_shadow_src = (u32 *)ALIGN_DOWN((u64)shadow_src, ORIGIN_SIZE);
> +	for (step = 0; step < min(src_slots, dst_slots); step++, i += iter) {
> +		BUG_ON(i < 0);
> +		shadow = align_shadow_src[i];
> +		if (i == 0) {
> +			/*
> +			 * If |src| isn't aligned on ORIGIN_SIZE, don't
> +			 * look at the first |src % ORIGIN_SIZE| bytes
> +			 * of the first shadow slot.
> +			 */
> +			skip_bits = ((u64)src % ORIGIN_SIZE) * 8;
> +			shadow = (shadow << skip_bits) >> skip_bits;
> +		}
> +		if (i == src_slots - 1) {
> +			/*
> +			 * If |src + n| isn't aligned on
> +			 * ORIGIN_SIZE, don't look at the last
> +			 * |(src + n) % ORIGIN_SIZE| bytes of the
> +			 * last shadow slot.
> +			 */
> +			skip_bits = (((u64)src + n) % ORIGIN_SIZE) * 8;
> +			shadow = (shadow >> skip_bits) << skip_bits;
> +		}
> +		/*
> +		 * Overwrite the origin only if the corresponding
> +		 * shadow is nonempty.
> +		 */
> +		if (origin_src[i] && (origin_src[i] != old_origin) && shadow) {
> +			old_origin = origin_src[i];
> +			chain_origin = kmsan_internal_chain_origin(old_origin);
> +			/*
> +			 * kmsan_internal_chain_origin() may return
> +			 * NULL, but we don't want to lose the previous
> +			 * origin value.
> +			 */
> +			if (chain_origin)
> +				new_origin = chain_origin;
> +			else
> +				new_origin = old_origin;
> +		}
> +		if (shadow)
> +			origin_dst[i] = new_origin;
> +		else
> +			origin_dst[i] = 0;
> +	}
> +}
> +
> +void kmsan_memcpy_metadata(void *dst, void *src, size_t n)
> +{
> +	kmsan_memcpy_memmove_metadata(dst, src, n, /*is_memmove*/false);
> +}
> +
> +void kmsan_memmove_metadata(void *dst, void *src, size_t n)
> +{
> +	kmsan_memcpy_memmove_metadata(dst, src, n, /*is_memmove*/true);
> +}
> +
> +depot_stack_handle_t kmsan_internal_chain_origin(depot_stack_handle_t id)
> +{
> +	depot_stack_handle_t handle;
> +	unsigned long entries[3];
> +	u64 magic = KMSAN_CHAIN_MAGIC_ORIGIN_FULL;
> +	int depth = 0;
> +	static int skipped;
> +	u32 extra_bits;
> +
> +	if (!kmsan_ready)
> +		return 0;
> +
> +	if (!id)
> +		return id;
> +	/*
> +	 * Make sure we have enough spare bits in |id| to hold the UAF bit and
> +	 * the chain depth.
> +	 */
> +	BUILD_BUG_ON((1 << STACK_DEPOT_EXTRA_BITS) <= (MAX_CHAIN_DEPTH << 1));
> +
> +	extra_bits = get_dsh_extra_bits(id);
> +
> +	depth = extra_bits >> 1;
> +	if (depth >= MAX_CHAIN_DEPTH) {
> +		skipped++;
> +		if (skipped % 10000 == 0) {
> +			kmsan_pr_locked("not chained %d origins\n", skipped);
> +			dump_stack();
> +			kmsan_print_origin(id);
> +		}
> +		return id;
> +	}
> +	depth++;
> +	/* Lowest bit is the UAF flag, higher bits hold the depth. */
> +	extra_bits = (depth << 1) | (extra_bits & 1);
> +	/* TODO(glider): how do we figure out we've dropped some frames? */
> +	entries[0] = magic + depth;
> +	entries[1] = kmsan_save_stack_with_flags(GFP_ATOMIC, extra_bits);
> +	entries[2] = id;
> +	handle = stack_depot_save(entries, ARRAY_SIZE(entries), GFP_ATOMIC);
> +	return set_dsh_extra_bits(handle, extra_bits);
> +}
> +
> +void kmsan_write_aligned_origin(void *var, size_t size, u32 origin)
> +{
> +	u32 *var_cast = (u32 *)var;
> +	int i;
> +
> +	BUG_ON((u64)var_cast % ORIGIN_SIZE);
> +	BUG_ON(size % ORIGIN_SIZE);
> +	for (i = 0; i < size / ORIGIN_SIZE; i++)
> +		var_cast[i] = origin;
> +}
> +
> +/*
> + * TODO(glider): writing an initialized byte shouldn't zero out the origin, if
> + * the remaining three bytes are uninitialized.
> + */

What needs to be done to address the TODO?  Just adding a comment is
fine (or if the TODO can be resolved that's also fine).

> +void kmsan_internal_set_origin(void *addr, int size, u32 origin)
> +{
> +	void *origin_start;
> +	u64 address = (u64)addr, page_offset;
> +	size_t to_fill, pad = 0;
> +
> +	if (!IS_ALIGNED(address, ORIGIN_SIZE)) {
> +		pad = address % ORIGIN_SIZE;
> +		address -= pad;
> +		size += pad;
> +	}
> +
> +	while (size > 0) {
> +		page_offset = address % PAGE_SIZE;
> +		to_fill = min(PAGE_SIZE - page_offset, (u64)size);
> +		/* write at least ORIGIN_SIZE bytes */
> +		to_fill = ALIGN(to_fill, ORIGIN_SIZE);
> +		BUG_ON(!to_fill);
> +		origin_start = kmsan_get_metadata((void *)address, to_fill,
> +						  META_ORIGIN);
> +		address += to_fill;
> +		size -= to_fill;
> +		if (!origin_start)
> +			/* Can happen e.g. if the memory is untracked. */
> +			continue;
> +		kmsan_write_aligned_origin(origin_start, to_fill, origin);
> +	}
> +}
> +
> +void kmsan_set_origin_checked(void *addr, int size, u32 origin, bool checked)
> +{
> +	if (checked && !metadata_is_contiguous(addr, size, META_ORIGIN)) {
> +		kmsan_pr_locked("WARNING: not setting origin for %d bytes starting at %px, because the metadata is incontiguous\n", size, addr);
> +		BUG();

Just panic?

> +	}
> +	kmsan_internal_set_origin(addr, size, origin);
> +}
> +
> +struct page *vmalloc_to_page_or_null(void *vaddr)
> +{
> +	struct page *page;
> +
> +	if (!kmsan_internal_is_vmalloc_addr(vaddr) &&
> +	    !kmsan_internal_is_module_addr(vaddr))
> +		return NULL;
> +	page = vmalloc_to_page(vaddr);
> +	if (pfn_valid(page_to_pfn(page)))
> +		return page;
> +	else
> +		return NULL;
> +}
> +
> +void kmsan_internal_check_memory(void *addr, size_t size, const void *user_addr,
> +				 int reason)
> +{
> +	unsigned long irq_flags;
> +	unsigned long addr64 = (unsigned long)addr;
> +	unsigned char *shadow = NULL;
> +	depot_stack_handle_t *origin = NULL;
> +	depot_stack_handle_t cur_origin = 0, new_origin = 0;
> +	int cur_off_start = -1;
> +	int i, chunk_size;
> +	size_t pos = 0;
> +
> +	BUG_ON(!metadata_is_contiguous(addr, size, META_SHADOW));
> +	if (size <= 0)
> +		return;
> +	while (pos < size) {
> +		chunk_size = min(size - pos,
> +				 PAGE_SIZE - ((addr64 + pos) % PAGE_SIZE));
> +		shadow = kmsan_get_metadata((void *)(addr64 + pos), chunk_size,
> +					    META_SHADOW);
> +		if (!shadow) {
> +			/*
> +			 * This page is untracked. If there were uninitialized
> +			 * bytes before, report them.
> +			 */
> +			if (cur_origin) {
> +				ENTER_RUNTIME(irq_flags);
> +				kmsan_report(cur_origin, addr, size,
> +					     cur_off_start, pos - 1, user_addr,
> +					     reason);
> +				LEAVE_RUNTIME(irq_flags);
> +			}
> +			cur_origin = 0;
> +			cur_off_start = -1;
> +			pos += chunk_size;
> +			continue;
> +		}
> +		for (i = 0; i < chunk_size; i++) {
> +			if (!shadow[i]) {
> +				/*
> +				 * This byte is unpoisoned. If there were
> +				 * poisoned bytes before, report them.
> +				 */
> +				if (cur_origin) {
> +					ENTER_RUNTIME(irq_flags);
> +					kmsan_report(cur_origin, addr, size,
> +						     cur_off_start, pos + i - 1,
> +						     user_addr, reason);
> +					LEAVE_RUNTIME(irq_flags);
> +				}
> +				cur_origin = 0;
> +				cur_off_start = -1;
> +				continue;
> +			}
> +			origin = kmsan_get_metadata((void *)(addr64 + pos + i),
> +						chunk_size - i, META_ORIGIN);
> +			BUG_ON(!origin);
> +			new_origin = *origin;
> +			/*
> +			 * Encountered new origin - report the previous
> +			 * uninitialized range.
> +			 */
> +			if (cur_origin != new_origin) {
> +				if (cur_origin) {
> +					ENTER_RUNTIME(irq_flags);
> +					kmsan_report(cur_origin, addr, size,
> +						     cur_off_start, pos + i - 1,
> +						     user_addr, reason);
> +					LEAVE_RUNTIME(irq_flags);
> +				}
> +				cur_origin = new_origin;
> +				cur_off_start = pos + i;
> +			}
> +		}
> +		pos += chunk_size;
> +	}
> +	BUG_ON(pos != size);
> +	if (cur_origin) {
> +		ENTER_RUNTIME(irq_flags);
> +		kmsan_report(cur_origin, addr, size, cur_off_start, pos - 1,
> +			     user_addr, reason);
> +		LEAVE_RUNTIME(irq_flags);
> +	}
> +}
> +
> +/*
> + * TODO(glider): this check shouldn't be performed for origin pages, because
> + * they're always accessed after the shadow pages.
> + */

What needs to be done to address the TODO?  Just adding a comment is
fine (or if the TODO can be resolved that's also fine).

> +bool metadata_is_contiguous(void *addr, size_t size, bool is_origin)
> +{
> +	u64 cur_addr = (u64)addr, next_addr;
> +	char *cur_meta = NULL, *next_meta = NULL;
> +	depot_stack_handle_t *origin_p;
> +	bool all_untracked = false;
> +	const char *fname = is_origin ? "origin" : "shadow";
> +
> +	if (!size)
> +		return true;
> +
> +	/* The whole range belongs to the same page. */
> +	if (ALIGN_DOWN(cur_addr + size - 1, PAGE_SIZE) ==
> +	    ALIGN_DOWN(cur_addr, PAGE_SIZE))
> +		return true;
> +	cur_meta = kmsan_get_metadata((void *)cur_addr, 1, is_origin);
> +	if (!cur_meta)
> +		all_untracked = true;
> +	for (next_addr = cur_addr + PAGE_SIZE; next_addr < (u64)addr + size;
> +		     cur_addr = next_addr,
> +		     cur_meta = next_meta,
> +		     next_addr += PAGE_SIZE) {
> +		next_meta = kmsan_get_metadata((void *)next_addr, 1, is_origin);
> +		if (!next_meta) {
> +			if (!all_untracked)
> +				goto report;
> +			continue;
> +		}
> +		if ((u64)cur_meta == ((u64)next_meta - PAGE_SIZE))
> +			continue;
> +		goto report;
> +	}
> +	return true;
> +
> +report:
> +	kmsan_pr_locked("BUG: attempting to access two shadow page ranges.\n");
> +	dump_stack();
> +	kmsan_pr_locked("\n");
> +	kmsan_pr_locked("Access of size %d at %px.\n", size, addr);
> +	kmsan_pr_locked("Addresses belonging to different ranges: %px and %px\n",
> +		     cur_addr, next_addr);
> +	kmsan_pr_locked("page[0].%s: %px, page[1].%s: %px\n",
> +		     fname, cur_meta, fname, next_meta);
> +	origin_p = kmsan_get_metadata(addr, 1, META_ORIGIN);
> +	if (origin_p) {
> +		kmsan_pr_locked("Origin: %08x\n", *origin_p);
> +		kmsan_print_origin(*origin_p);
> +	} else {
> +		kmsan_pr_locked("Origin: unavailable\n");
> +	}

These repeated calls to kmsan_pr_locked seem unnecessary. There is
nothing ensuring atomicity of all these print calls w.r.t. reporting.

> +	return false;
> +}
> +
> +/*
> + * Dummy replacement for __builtin_return_address() which may crash without
> + * frame pointers.
> + */
> +void *kmsan_internal_return_address(int arg)
> +{
> +#ifdef CONFIG_UNWINDER_FRAME_POINTER
> +	switch (arg) {
> +	case 1:
> +		return __builtin_return_address(1);
> +	case 2:
> +		return __builtin_return_address(2);
> +	default:
> +		BUG();
> +	}
> +#else
> +	unsigned long entries[1];
> +
> +	stack_trace_save(entries, 1, arg);
> +	return (void *)entries[0];
> +#endif
> +}
> +
> +bool kmsan_internal_is_module_addr(void *vaddr)
> +{
> +	return ((u64)vaddr >= MODULES_VADDR) && ((u64)vaddr < MODULES_END);
> +}
> +
> +bool kmsan_internal_is_vmalloc_addr(void *addr)
> +{
> +	return ((u64)addr >= VMALLOC_START) && ((u64)addr < VMALLOC_END);
> +}
> diff --git a/mm/kmsan/kmsan.h b/mm/kmsan/kmsan.h
> new file mode 100644
> index 000000000000..8760feef39bf
> --- /dev/null
> +++ b/mm/kmsan/kmsan.h
> @@ -0,0 +1,146 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * KMSAN internal declarations.
> + *
> + * Copyright (C) 2017-2019 Google LLC
> + * Author: Alexander Potapenko <glider@xxxxxxxxxx>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + *
> + */
> +
> +#ifndef __MM_KMSAN_KMSAN_H
> +#define __MM_KMSAN_KMSAN_H
> +
> +#include <asm/pgtable_64_types.h>
> +#include <linux/irqflags.h>
> +#include <linux/sched.h>
> +#include <linux/stackdepot.h>
> +#include <linux/stacktrace.h>
> +#include <linux/nmi.h>
> +#include <linux/mm.h>
> +#include <linux/printk.h>
> +
> +#include "kmsan_shadow.h"
> +
> +#define KMSAN_MAGIC_MASK 0xffffffffff00
> +#define KMSAN_ALLOCA_MAGIC_ORIGIN 0x4110c4071900
> +#define KMSAN_CHAIN_MAGIC_ORIGIN_FULL 0xd419170cba00
> +
> +#define KMSAN_POISON_NOCHECK	0x0
> +#define KMSAN_POISON_CHECK	0x1
> +#define KMSAN_POISON_FREE	0x2
> +
> +#define ORIGIN_SIZE 4
> +
> +#define META_SHADOW	(false)
> +#define META_ORIGIN	(true)
> +
> +#define KMSAN_NESTED_CONTEXT_MAX (8)
> +/* [0] for dummy per-CPU context */
> +DECLARE_PER_CPU(struct kmsan_context_state[KMSAN_NESTED_CONTEXT_MAX],
> +		kmsan_percpu_cstate);
> +/* 0 for task context, |i>0| for kmsan_context_state[i]. */
> +DECLARE_PER_CPU(int, kmsan_context_level);
> +DECLARE_PER_CPU(int, kmsan_in_interrupt);
> +DECLARE_PER_CPU(bool, kmsan_in_softirq);
> +DECLARE_PER_CPU(bool, kmsan_in_nmi);
> +
> +extern spinlock_t report_lock;
> +
> +/* Stolen from kernel/printk/internal.h */
> +#define PRINTK_SAFE_CONTEXT_MASK	 0x3fffffff

Is this used anywhere?

> +/* Called by kmsan_report.c under a lock. */
> +#define kmsan_pr_err(...) pr_err(__VA_ARGS__)

Why is this macro needed? It's never redefined, so in the places it is
used, you can just use pr_err. For readability I would avoid unnecessary
aliases, but if there is a genuine reason this may be needed in future,
I would just add a comment.

> +/* Used in other places - doesn't require a lock. */
> +#define kmsan_pr_locked(...) \
> +	do { \
> +		unsigned long flags;			\
> +		spin_lock_irqsave(&report_lock, flags); \
> +		pr_err(__VA_ARGS__); \
> +		spin_unlock_irqrestore(&report_lock, flags); \
> +	} while (0)

Is this macro needed?  The only reason it sort of makes sense is to
serialize a report with other printing, but otherwise pr_err already
makes sure things are serialized properly.

> +void kmsan_print_origin(depot_stack_handle_t origin);
> +void kmsan_report(depot_stack_handle_t origin,
> +		  void *address, int size, int off_first, int off_last,
> +		  const void *user_addr, int reason);
> +
> +
> +enum KMSAN_BUG_REASON {
> +	REASON_ANY = 0,
> +	REASON_COPY_TO_USER = 1,
> +	REASON_USE_AFTER_FREE = 2,
> +	REASON_SUBMIT_URB = 3,
> +};

Is it required to explicitly assign constants to these?

> +
> +/*
> + * When a compiler hook is invoked, it may make a call to instrumented code
> + * and eventually call itself recursively. To avoid that, we protect the
> + * runtime entry points with ENTER_RUNTIME()/LEAVE_RUNTIME() macros and exit
> + * the hook if IN_RUNTIME() is true. But when an interrupt occurs inside the
> + * runtime, the hooks won’t run either, which may lead to errors.
> + * Therefore we have to disable interrupts inside the runtime.
> + */
> +DECLARE_PER_CPU(int, kmsan_in_runtime);
> +#define IN_RUNTIME()	(this_cpu_read(kmsan_in_runtime))
> +#define ENTER_RUNTIME(irq_flags) \
> +	do { \
> +		preempt_disable(); \
> +		local_irq_save(irq_flags); \
> +		stop_nmi();		\
> +		this_cpu_inc(kmsan_in_runtime); \
> +		BUG_ON(this_cpu_read(kmsan_in_runtime) > 1); \
> +	} while (0)
> +#define LEAVE_RUNTIME(irq_flags)	\
> +	do {	\
> +		this_cpu_dec(kmsan_in_runtime);	\
> +		if (this_cpu_read(kmsan_in_runtime)) { \
> +			kmsan_pr_err("kmsan_in_runtime: %d\n", \
> +				this_cpu_read(kmsan_in_runtime)); \
> +			BUG(); \
> +		}	\
> +		restart_nmi();		\
> +		local_irq_restore(irq_flags);	\
> +		preempt_enable(); } while (0)

Could these not be macros, and instead be static __always_inline
functions?

> +
> +void kmsan_memcpy_metadata(void *dst, void *src, size_t n);
> +void kmsan_memmove_metadata(void *dst, void *src, size_t n);
> +
> +depot_stack_handle_t kmsan_save_stack(void);
> +depot_stack_handle_t kmsan_save_stack_with_flags(gfp_t flags,
> +						 unsigned int extra_bits);
> +void kmsan_internal_poison_shadow(void *address, size_t size, gfp_t flags,
> +				  unsigned int poison_flags);
> +void kmsan_internal_unpoison_shadow(void *address, size_t size, bool checked);
> +void kmsan_internal_memset_shadow(void *address, int b, size_t size,
> +				  bool checked);
> +depot_stack_handle_t kmsan_internal_chain_origin(depot_stack_handle_t id);
> +void kmsan_write_aligned_origin(void *var, size_t size, u32 origin);
> +
> +void kmsan_internal_task_create(struct task_struct *task);
> +void kmsan_internal_set_origin(void *addr, int size, u32 origin);
> +void kmsan_set_origin_checked(void *addr, int size, u32 origin, bool checked);
> +
> +struct kmsan_context_state *task_kmsan_context_state(void);
> +
> +bool metadata_is_contiguous(void *addr, size_t size, bool is_origin);
> +void kmsan_internal_check_memory(void *addr, size_t size, const void *user_addr,
> +				 int reason);
> +
> +struct page *vmalloc_to_page_or_null(void *vaddr);
> +
> +/* Declared in mm/vmalloc.c */
> +void __vunmap_page_range(unsigned long addr, unsigned long end);
> +int __vmap_page_range_noflush(unsigned long start, unsigned long end,
> +				   pgprot_t prot, struct page **pages);
> +
> +void *kmsan_internal_return_address(int arg);
> +bool kmsan_internal_is_module_addr(void *vaddr);
> +bool kmsan_internal_is_vmalloc_addr(void *addr);
> +
> +#endif  /* __MM_KMSAN_KMSAN_H */
> diff --git a/mm/kmsan/kmsan_entry.c b/mm/kmsan/kmsan_entry.c
> new file mode 100644
> index 000000000000..47bc7736f1a9
> --- /dev/null
> +++ b/mm/kmsan/kmsan_entry.c
> @@ -0,0 +1,118 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * KMSAN hooks for entry_64.S
> + *
> + * Copyright (C) 2018-2019 Google LLC
> + * Author: Alexander Potapenko <glider@xxxxxxxxxx>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + *
> + */
> +
> +#include "kmsan.h"
> +
> +static void kmsan_context_enter(void)
> +{
> +	int level = this_cpu_read(kmsan_context_level) + 1;
> +
> +	BUG_ON(level >= KMSAN_NESTED_CONTEXT_MAX);
> +	this_cpu_write(kmsan_context_level, level);
> +}
> +
> +static void kmsan_context_exit(void)
> +{
> +	int level = this_cpu_read(kmsan_context_level) - 1;
> +
> +	BUG_ON(level < 0);
> +	this_cpu_write(kmsan_context_level, level);
> +}

These are not preemption-safe. this_cpu_dec_return followed by the
BUG_ON should be sufficient. Similarly above and below (using
this_cpu_add_return)

> +void kmsan_interrupt_enter(void)
> +{
> +	int in_interrupt = this_cpu_read(kmsan_in_interrupt);
> +
> +	/* Turns out it's possible for in_interrupt to be >0 here. */
> +	kmsan_context_enter();
> +	BUG_ON(in_interrupt > 1);
> +	/* Can't check preempt_count() here, it may be zero. */
> +	this_cpu_write(kmsan_in_interrupt, in_interrupt + 1);
> +}
> +EXPORT_SYMBOL(kmsan_interrupt_enter);
> +
> +void kmsan_interrupt_exit(void)
> +{
> +	int in_interrupt = this_cpu_read(kmsan_in_interrupt);
> +
> +	BUG_ON(!in_interrupt);
> +	kmsan_context_exit();
> +	/* Can't check preempt_count() here, it may be zero. */
> +	this_cpu_write(kmsan_in_interrupt, in_interrupt - 1);
> +}
> +EXPORT_SYMBOL(kmsan_interrupt_exit);

Why exactly does kmsan_in_interrupt need to be maintained here? I can't
see them being used anywhere else. Is it only for the BUG_ON?

> +void kmsan_softirq_enter(void)
> +{
> +	bool in_softirq = this_cpu_read(kmsan_in_softirq);
> +
> +	BUG_ON(in_softirq);
> +	kmsan_context_enter();
> +	/* Can't check preempt_count() here, it may be zero. */
> +	this_cpu_write(kmsan_in_softirq, true);
> +}
> +EXPORT_SYMBOL(kmsan_softirq_enter);
> +
> +void kmsan_softirq_exit(void)
> +{
> +	bool in_softirq = this_cpu_read(kmsan_in_softirq);
> +
> +	BUG_ON(!in_softirq);
> +	kmsan_context_exit();
> +	/* Can't check preempt_count() here, it may be zero. */
> +	this_cpu_write(kmsan_in_softirq, false);
> +}
> +EXPORT_SYMBOL(kmsan_softirq_exit);

Same question here for kmsan_in_softirq.

> +void kmsan_nmi_enter(void)
> +{
> +	bool in_nmi = this_cpu_read(kmsan_in_nmi);
> +
> +	BUG_ON(in_nmi);
> +	BUG_ON(preempt_count() & NMI_MASK);
> +	kmsan_context_enter();
> +	this_cpu_write(kmsan_in_nmi, true);
> +}
> +EXPORT_SYMBOL(kmsan_nmi_enter);
> +
> +void kmsan_nmi_exit(void)
> +{
> +	bool in_nmi = this_cpu_read(kmsan_in_nmi);
> +
> +	BUG_ON(!in_nmi);
> +	BUG_ON(preempt_count() & NMI_MASK);
> +	kmsan_context_exit();
> +	this_cpu_write(kmsan_in_nmi, false);
> +
> +}
> +EXPORT_SYMBOL(kmsan_nmi_exit);

And same question here for kmsan_in_nmi.

> +void kmsan_ist_enter(u64 shift_ist)
> +{
> +	kmsan_context_enter();
> +}
> +EXPORT_SYMBOL(kmsan_ist_enter);
> +
> +void kmsan_ist_exit(u64 shift_ist)
> +{
> +	kmsan_context_exit();
> +}
> +EXPORT_SYMBOL(kmsan_ist_exit);
> +
> +void kmsan_unpoison_pt_regs(struct pt_regs *regs)
> +{
> +	if (!kmsan_ready || IN_RUNTIME())
> +		return;
> +	kmsan_internal_unpoison_shadow(regs, sizeof(*regs), /*checked*/true);
> +}
> +EXPORT_SYMBOL(kmsan_unpoison_pt_regs);
> diff --git a/mm/kmsan/kmsan_hooks.c b/mm/kmsan/kmsan_hooks.c
> new file mode 100644
> index 000000000000..13a6ed809d81
> --- /dev/null
> +++ b/mm/kmsan/kmsan_hooks.c
> @@ -0,0 +1,422 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * KMSAN hooks for kernel subsystems.
> + *
> + * These functions handle creation of KMSAN metadata for memory allocations.
> + *
> + * Copyright (C) 2018-2019 Google LLC
> + * Author: Alexander Potapenko <glider@xxxxxxxxxx>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + *
> + */
> +
> +#include <asm/cacheflush.h>
> +#include <linux/dma-direction.h>
> +#include <linux/gfp.h>
> +#include <linux/mm.h>
> +#include <linux/mm_types.h>
> +#include <linux/skbuff.h>
> +#include <linux/slab.h>
> +#include <linux/usb.h>
> +
> +#include "../slab.h"
> +#include "kmsan.h"
> +
> +/*
> + * The functions may call back to instrumented code, which, in turn, may call
> + * these hooks again. To avoid re-entrancy, we use __GFP_NO_KMSAN_SHADOW.
> + * Instrumented functions shouldn't be called under
> + * ENTER_RUNTIME()/LEAVE_RUNTIME(), because this will lead to skipping
> + * effects of functions like memset() inside instrumented code.
> + */
> +/* Called from kernel/kthread.c, kernel/fork.c */
> +void kmsan_task_create(struct task_struct *task)
> +{
> +	unsigned long irq_flags;
> +
> +	if (!task)
> +		return;
> +	ENTER_RUNTIME(irq_flags);
> +	kmsan_internal_task_create(task);
> +	LEAVE_RUNTIME(irq_flags);
> +}
> +EXPORT_SYMBOL(kmsan_task_create);
> +
> +
> +/* Called from kernel/exit.c */
> +void kmsan_task_exit(struct task_struct *task)
> +{
> +	unsigned long irq_flags;
> +	struct kmsan_task_state *state = &task->kmsan;
> +
> +	if (!kmsan_ready || IN_RUNTIME())
> +		return;
> +
> +	ENTER_RUNTIME(irq_flags);
> +	state->allow_reporting = false;
> +
> +	LEAVE_RUNTIME(irq_flags);
> +}
> +EXPORT_SYMBOL(kmsan_task_exit);
> +
> +/* Called from mm/slub.c */
> +void kmsan_slab_alloc(struct kmem_cache *s, void *object, gfp_t flags)
> +{
> +	unsigned long irq_flags;
> +
> +	if (unlikely(object == NULL))
> +		return;
> +	if (!kmsan_ready || IN_RUNTIME())
> +		return;
> +	/*
> +	 * There's a ctor or this is an RCU cache - do nothing. The memory
> +	 * status hasn't changed since last use.
> +	 */
> +	if (s->ctor || (s->flags & SLAB_TYPESAFE_BY_RCU))
> +		return;
> +
> +	ENTER_RUNTIME(irq_flags);
> +	if (flags & __GFP_ZERO) {
> +		kmsan_internal_unpoison_shadow(object, s->object_size,
> +					       KMSAN_POISON_CHECK);
> +	} else {
> +		kmsan_internal_poison_shadow(object, s->object_size, flags,
> +					     KMSAN_POISON_CHECK);
> +	}
> +	LEAVE_RUNTIME(irq_flags);
> +}
> +EXPORT_SYMBOL(kmsan_slab_alloc);
> +
> +/* Called from mm/slub.c */
> +void kmsan_slab_free(struct kmem_cache *s, void *object)
> +{
> +	unsigned long irq_flags;
> +
> +	if (!kmsan_ready || IN_RUNTIME())
> +		return;
> +	ENTER_RUNTIME(irq_flags);
> +
> +	/* RCU slabs could be legally used after free within the RCU period */
> +	if (unlikely(s->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)))
> +		goto leave;
> +	if (s->ctor)
> +		goto leave;
> +	kmsan_internal_poison_shadow(object, s->object_size,
> +				     GFP_KERNEL,
> +				     KMSAN_POISON_CHECK | KMSAN_POISON_FREE);
> +leave:
> +	LEAVE_RUNTIME(irq_flags);
> +}
> +EXPORT_SYMBOL(kmsan_slab_free);
> +
> +/* Called from mm/slub.c */
> +void kmsan_kmalloc_large(const void *ptr, size_t size, gfp_t flags)
> +{
> +	unsigned long irq_flags;
> +
> +	if (unlikely(ptr == NULL))
> +		return;
> +	if (!kmsan_ready || IN_RUNTIME())
> +		return;
> +	ENTER_RUNTIME(irq_flags);
> +	if (flags & __GFP_ZERO) {
> +		kmsan_internal_unpoison_shadow((void *)ptr, size,
> +					       /*checked*/true);
> +	} else {
> +		kmsan_internal_poison_shadow((void *)ptr, size, flags,
> +					     KMSAN_POISON_CHECK);
> +	}
> +	LEAVE_RUNTIME(irq_flags);
> +}
> +EXPORT_SYMBOL(kmsan_kmalloc_large);
> +
> +/* Called from mm/slub.c */
> +void kmsan_kfree_large(const void *ptr)
> +{
> +	struct page *page;
> +	unsigned long irq_flags;
> +
> +	if (!kmsan_ready || IN_RUNTIME())
> +		return;
> +	ENTER_RUNTIME(irq_flags);
> +	page = virt_to_head_page((void *)ptr);
> +	BUG_ON(ptr != page_address(page));
> +	kmsan_internal_poison_shadow(
> +		(void *)ptr, PAGE_SIZE << compound_order(page), GFP_KERNEL,
> +		KMSAN_POISON_CHECK | KMSAN_POISON_FREE);
> +	LEAVE_RUNTIME(irq_flags);
> +}
> +EXPORT_SYMBOL(kmsan_kfree_large);
> +
> +static unsigned long vmalloc_shadow(unsigned long addr)
> +{
> +	return (unsigned long)kmsan_get_metadata((void *)addr, 1, META_SHADOW);
> +}
> +
> +static unsigned long vmalloc_origin(unsigned long addr)
> +{
> +	return (unsigned long)kmsan_get_metadata((void *)addr, 1, META_ORIGIN);
> +}
> +
> +/* Called from mm/vmalloc.c */
> +void kmsan_vunmap_page_range(unsigned long start, unsigned long end)
> +{
> +	__vunmap_page_range(vmalloc_shadow(start), vmalloc_shadow(end));
> +	__vunmap_page_range(vmalloc_origin(start), vmalloc_origin(end));
> +}
> +EXPORT_SYMBOL(kmsan_vunmap_page_range);
> +
> +/* Called from lib/ioremap.c */
> +/*
> + * This function creates new shadow/origin pages for the physical pages mapped
> + * into the virtual memory. If those physical pages already had shadow/origin,
> + * those are ignored.
> + */
> +void kmsan_ioremap_page_range(unsigned long start, unsigned long end,
> +	phys_addr_t phys_addr, pgprot_t prot)
> +{
> +	unsigned long irq_flags;
> +	struct page *shadow, *origin;
> +	int i, nr;
> +	unsigned long off = 0;
> +	gfp_t gfp_mask = GFP_KERNEL | __GFP_ZERO | __GFP_NO_KMSAN_SHADOW;
> +
> +	if (!kmsan_ready || IN_RUNTIME())
> +		return;
> +
> +	nr = (end - start) / PAGE_SIZE;
> +	ENTER_RUNTIME(irq_flags);
> +	for (i = 0; i < nr; i++, off += PAGE_SIZE) {
> +		shadow = alloc_pages(gfp_mask, 1);
> +		origin = alloc_pages(gfp_mask, 1);
> +		__vmap_page_range_noflush(vmalloc_shadow(start + off),
> +				vmalloc_shadow(start + off + PAGE_SIZE),
> +				prot, &shadow);
> +		__vmap_page_range_noflush(vmalloc_origin(start + off),
> +				vmalloc_origin(start + off + PAGE_SIZE),
> +				prot, &origin);
> +	}
> +	flush_cache_vmap(vmalloc_shadow(start), vmalloc_shadow(end));
> +	flush_cache_vmap(vmalloc_origin(start), vmalloc_origin(end));
> +	LEAVE_RUNTIME(irq_flags);
> +}
> +EXPORT_SYMBOL(kmsan_ioremap_page_range);
> +
> +void kmsan_iounmap_page_range(unsigned long start, unsigned long end)
> +{
> +	int i, nr;
> +	struct page *shadow, *origin;
> +	unsigned long v_shadow, v_origin;
> +	unsigned long irq_flags;
> +
> +	if (!kmsan_ready || IN_RUNTIME())
> +		return;
> +
> +	nr = (end - start) / PAGE_SIZE;
> +	ENTER_RUNTIME(irq_flags);
> +	v_shadow = (unsigned long)vmalloc_shadow(start);
> +	v_origin = (unsigned long)vmalloc_origin(start);
> +	for (i = 0; i < nr; i++, v_shadow += PAGE_SIZE, v_origin += PAGE_SIZE) {
> +		shadow = vmalloc_to_page_or_null((void *)v_shadow);
> +		origin = vmalloc_to_page_or_null((void *)v_origin);
> +		__vunmap_page_range(v_shadow, v_shadow + PAGE_SIZE);
> +		__vunmap_page_range(v_origin, v_origin + PAGE_SIZE);
> +		if (shadow)
> +			__free_pages(shadow, 1);
> +		if (origin)
> +			__free_pages(origin, 1);
> +	}
> +	LEAVE_RUNTIME(irq_flags);
> +}
> +EXPORT_SYMBOL(kmsan_iounmap_page_range);
> +
> +/* Called from include/linux/uaccess.h, include/linux/uaccess.h */
> +void kmsan_copy_to_user(const void *to, const void *from,
> +			size_t to_copy, size_t left)
> +{
> +	void *shadow;
> +
> +	if (!kmsan_ready || IN_RUNTIME())
> +		return;
> +	/*
> +	 * At this point we've copied the memory already. It's hard to check it
> +	 * before copying, as the size of actually copied buffer is unknown.
> +	 */
> +
> +	/* copy_to_user() may copy zero bytes. No need to check. */
> +	if (!to_copy)
> +		return;
> +	/* Or maybe copy_to_user() failed to copy anything. */
> +	if (to_copy == left)
> +		return;
> +	if ((u64)to < TASK_SIZE) {
> +		/* This is a user memory access, check it. */
> +		kmsan_internal_check_memory((void *)from, to_copy - left, to,
> +						REASON_COPY_TO_USER);
> +		return;
> +	}
> +	/* Otherwise this is a kernel memory access. This happens when a compat
> +	 * syscall passes an argument allocated on the kernel stack to a real
> +	 * syscall.
> +	 * Don't check anything, just copy the shadow of the copied bytes.
> +	 */
> +	shadow = kmsan_get_metadata((void *)to, to_copy - left, META_SHADOW);
> +	if (shadow)
> +		kmsan_memcpy_metadata((void *)to, (void *)from, to_copy - left);
> +}
> +EXPORT_SYMBOL(kmsan_copy_to_user);
> +
> +void kmsan_poison_shadow(const void *address, size_t size, gfp_t flags)
> +{
> +	unsigned long irq_flags;
> +
> +	if (!kmsan_ready || IN_RUNTIME())
> +		return;
> +	ENTER_RUNTIME(irq_flags);
> +	/* The users may want to poison/unpoison random memory. */
> +	kmsan_internal_poison_shadow((void *)address, size, flags,
> +				     KMSAN_POISON_NOCHECK);
> +	LEAVE_RUNTIME(irq_flags);
> +}
> +EXPORT_SYMBOL(kmsan_poison_shadow);
> +
> +void kmsan_unpoison_shadow(const void *address, size_t size)
> +{
> +	unsigned long irq_flags;
> +
> +	if (!kmsan_ready || IN_RUNTIME())
> +		return;
> +
> +	ENTER_RUNTIME(irq_flags);
> +	/* The users may want to poison/unpoison random memory. */
> +	kmsan_internal_unpoison_shadow((void *)address, size,
> +				       KMSAN_POISON_NOCHECK);
> +	LEAVE_RUNTIME(irq_flags);
> +}
> +EXPORT_SYMBOL(kmsan_unpoison_shadow);
> +
> +void kmsan_check_memory(const void *addr, size_t size)
> +{
> +	return kmsan_internal_check_memory((void *)addr, size, /*user_addr*/ 0,
> +					   REASON_ANY);
> +}
> +EXPORT_SYMBOL(kmsan_check_memory);
> +
> +void kmsan_gup_pgd_range(struct page **pages, int nr)
> +{
> +	int i;
> +	void *page_addr;
> +
> +	/*
> +	 * gup_pgd_range() has just created a number of new pages that KMSAN
> +	 * treats as uninitialized. In the case they belong to the userspace
> +	 * memory, unpoison the corresponding kernel pages.
> +	 */
> +	for (i = 0; i < nr; i++) {
> +		page_addr = page_address(pages[i]);
> +		if (((u64)page_addr < TASK_SIZE) &&
> +		    ((u64)page_addr + PAGE_SIZE < TASK_SIZE))
> +			kmsan_unpoison_shadow(page_addr, PAGE_SIZE);
> +	}
> +
> +}
> +EXPORT_SYMBOL(kmsan_gup_pgd_range);
> +
> +/* Helper function to check an SKB. */
> +void kmsan_check_skb(const struct sk_buff *skb)
> +{
> +	int start = skb_headlen(skb);
> +	struct sk_buff *frag_iter;
> +	int i, copy = 0;
> +	skb_frag_t *f;
> +	u32 p_off, p_len, copied;
> +	struct page *p;
> +	u8 *vaddr;
> +
> +	if (!skb || !skb->len)
> +		return;
> +
> +	kmsan_internal_check_memory(skb->data, skb_headlen(skb), 0, REASON_ANY);
> +	if (skb_is_nonlinear(skb)) {
> +		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
> +			f = &skb_shinfo(skb)->frags[i];
> +
> +			skb_frag_foreach_page(f,
> +					      skb_frag_off(f)  - start,
> +					      copy, p, p_off, p_len, copied) {
> +
> +				vaddr = kmap_atomic(p);
> +				kmsan_internal_check_memory(vaddr + p_off,
> +						p_len, /*user_addr*/ 0,
> +						REASON_ANY);
> +				kunmap_atomic(vaddr);
> +			}
> +		}
> +	}
> +	skb_walk_frags(skb, frag_iter)
> +		kmsan_check_skb(frag_iter);
> +}
> +EXPORT_SYMBOL(kmsan_check_skb);
> +
> +/* Helper function to check an URB. */
> +void kmsan_handle_urb(const struct urb *urb, bool is_out)
> +{
> +	if (!urb)
> +		return;
> +	if (is_out)
> +		kmsan_internal_check_memory(urb->transfer_buffer,
> +					    urb->transfer_buffer_length,
> +					    /*user_addr*/ 0, REASON_SUBMIT_URB);
> +	else
> +		kmsan_internal_unpoison_shadow(urb->transfer_buffer,
> +					       urb->transfer_buffer_length,
> +					       /*checked*/false);
> +}
> +EXPORT_SYMBOL(kmsan_handle_urb);
> +
> +static void kmsan_handle_dma_page(const void *addr, size_t size,
> +				  enum dma_data_direction dir)
> +{
> +	switch (dir) {
> +	case DMA_BIDIRECTIONAL:
> +		kmsan_internal_check_memory((void *)addr, size, /*user_addr*/0,
> +					    REASON_ANY);
> +		kmsan_internal_unpoison_shadow((void *)addr, size,
> +					       /*checked*/false);
> +		break;
> +	case DMA_TO_DEVICE:
> +		kmsan_internal_check_memory((void *)addr, size, /*user_addr*/0,
> +					    REASON_ANY);
> +		break;
> +	case DMA_FROM_DEVICE:
> +		kmsan_internal_unpoison_shadow((void *)addr, size,
> +					       /*checked*/false);
> +		break;
> +	case DMA_NONE:
> +		break;
> +	}
> +}
> +
> +/* Helper function to handle DMA data transfers. */
> +void kmsan_handle_dma(const void *addr, size_t size,
> +		      enum dma_data_direction dir)
> +{
> +	u64 page_offset, to_go, uaddr = (u64)addr;
> +
> +	/*
> +	 * The kernel may occasionally give us adjacent DMA pages not belonging
> +	 * to the same allocation. Process them separately to avoid triggering
> +	 * internal KMSAN checks.
> +	 */
> +	while (size > 0) {
> +		page_offset = uaddr % PAGE_SIZE;
> +		to_go = min(PAGE_SIZE - page_offset, (u64)size);
> +		kmsan_handle_dma_page((void *)uaddr, to_go, dir);
> +		uaddr += to_go;
> +		size -= to_go;
> +	}
> +}
> +EXPORT_SYMBOL(kmsan_handle_dma);
> diff --git a/mm/kmsan/kmsan_init.c b/mm/kmsan/kmsan_init.c
> new file mode 100644
> index 000000000000..2816e7075a30
> --- /dev/null
> +++ b/mm/kmsan/kmsan_init.c
> @@ -0,0 +1,88 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * KMSAN initialization routines.
> + *
> + * Copyright (C) 2017-2019 Google LLC
> + * Author: Alexander Potapenko <glider@xxxxxxxxxx>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + *
> + */
> +
> +#include "kmsan.h"
> +
> +#include <asm/cpu_entry_area.h>
> +#include <linux/mm.h>
> +#include <linux/memblock.h>
> +
> +#define NUM_FUTURE_RANGES 128
> +struct start_end_pair {
> +	void *start, *end;
> +};
> +
> +static struct start_end_pair start_end_pairs[NUM_FUTURE_RANGES] __initdata;
> +static int future_index __initdata;
> +
> +/*
> + * Record a range of memory for which the metadata pages will be created once
> + * the page allocator becomes available.
> + * TODO(glider): squash together ranges belonging to the same page.
> + */

What needs to be done to address the TODO?  Just adding a comment is
fine (or if the TODO can be resolved that's also fine).

> +static void __init kmsan_record_future_shadow_range(void *start, void *end)
> +{
> +	BUG_ON(future_index == NUM_FUTURE_RANGES);
> +	BUG_ON((start >= end) || !start || !end);
> +	start_end_pairs[future_index].start = start;
> +	start_end_pairs[future_index].end = end;
> +	future_index++;
> +}
> +
> +extern char _sdata[], _edata[];
> +
> +
> +
> +/*
> + * Initialize the shadow for existing mappings during kernel initialization.
> + * These include kernel text/data sections, NODE_DATA and future ranges
> + * registered while creating other data (e.g. percpu).
> + *
> + * Allocations via memblock can be only done before slab is initialized.
> + */
> +void __init kmsan_initialize_shadow(void)
> +{
> +	int nid;
> +	u64 i;
> +	const size_t nd_size = roundup(sizeof(pg_data_t), PAGE_SIZE);
> +	phys_addr_t p_start, p_end;
> +
> +	for_each_reserved_mem_region(i, &p_start, &p_end) {
> +		kmsan_record_future_shadow_range(phys_to_virt(p_start),
> +						 phys_to_virt(p_end+1));
> +	}
> +	/* Allocate shadow for .data */
> +	kmsan_record_future_shadow_range(_sdata, _edata);
> +
> +	/*
> +	 * TODO(glider): alloc_node_data() in arch/x86/mm/numa.c uses
> +	 * sizeof(pg_data_t).
> +	 */

What needs to be done to address the TODO?  Just adding a comment is
fine (or if the TODO can be resolved that's also fine).

> +	for_each_online_node(nid)
> +		kmsan_record_future_shadow_range(
> +			NODE_DATA(nid),	(char *)NODE_DATA(nid) + nd_size);
> +
> +	for (i = 0; i < future_index; i++)
> +		kmsan_init_alloc_meta_for_range(start_end_pairs[i].start,
> +						start_end_pairs[i].end);
> +}
> +EXPORT_SYMBOL(kmsan_initialize_shadow);
> +
> +void __init kmsan_initialize(void)
> +{
> +	/* Assuming current is init_task */
> +	kmsan_internal_task_create(current);
> +	kmsan_pr_locked("Starting KernelMemorySanitizer\n");
> +	kmsan_ready = true;
> +}
> +EXPORT_SYMBOL(kmsan_initialize);
> diff --git a/mm/kmsan/kmsan_instr.c b/mm/kmsan/kmsan_instr.c
> new file mode 100644
> index 000000000000..7695daf2d88a
> --- /dev/null
> +++ b/mm/kmsan/kmsan_instr.c
> @@ -0,0 +1,259 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * KMSAN compiler API.
> + *
> + * Copyright (C) 2017-2019 Google LLC
> + * Author: Alexander Potapenko <glider@xxxxxxxxxx>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + *
> + */
> +
> +#include "kmsan.h"
> +#include <linux/gfp.h>
> +#include <linux/mm.h>
> +
> +static bool is_bad_asm_addr(void *addr, u64 size, bool is_store)
> +{
> +	if ((u64)addr < TASK_SIZE)
> +		return true;
> +	if (!kmsan_get_metadata(addr, size, META_SHADOW))
> +		return true;
> +	return false;
> +}
> +
> +struct shadow_origin_ptr __msan_metadata_ptr_for_load_n(void *addr, u64 size)
> +{
> +	return kmsan_get_shadow_origin_ptr(addr, size, /*store*/false);
> +}
> +EXPORT_SYMBOL(__msan_metadata_ptr_for_load_n);
> +
> +struct shadow_origin_ptr __msan_metadata_ptr_for_store_n(void *addr, u64 size)
> +{
> +	return kmsan_get_shadow_origin_ptr(addr, size, /*store*/true);
> +}
> +EXPORT_SYMBOL(__msan_metadata_ptr_for_store_n);
> +
> +#define DECLARE_METADATA_PTR_GETTER(size)	\
> +struct shadow_origin_ptr __msan_metadata_ptr_for_load_##size(void *addr) \
> +{		\
> +	return kmsan_get_shadow_origin_ptr(addr, size, /*store*/false);	\
> +}		\
> +EXPORT_SYMBOL(__msan_metadata_ptr_for_load_##size);			\
> +		\
> +struct shadow_origin_ptr __msan_metadata_ptr_for_store_##size(void *addr) \
> +{									\
> +	return kmsan_get_shadow_origin_ptr(addr, size, /*store*/true);	\
> +}									\
> +EXPORT_SYMBOL(__msan_metadata_ptr_for_store_##size)
> +
> +DECLARE_METADATA_PTR_GETTER(1);
> +DECLARE_METADATA_PTR_GETTER(2);
> +DECLARE_METADATA_PTR_GETTER(4);
> +DECLARE_METADATA_PTR_GETTER(8);
> +
> +void __msan_instrument_asm_store(void *addr, u64 size)
> +{
> +	unsigned long irq_flags;
> +
> +	if (!kmsan_ready || IN_RUNTIME())
> +		return;
> +	/*
> +	 * Most of the accesses are below 32 bytes. The two exceptions so far
> +	 * are clwb() (64 bytes) and FPU state (512 bytes).
> +	 * It's unlikely that the assembly will touch more than 512 bytes.
> +	 */
> +	if (size > 512)
> +		size = 8;
> +	if (is_bad_asm_addr(addr, size, /*is_store*/true))
> +		return;
> +	ENTER_RUNTIME(irq_flags);
> +	/* Unpoisoning the memory on best effort. */
> +	kmsan_internal_unpoison_shadow(addr, size, /*checked*/false);
> +	LEAVE_RUNTIME(irq_flags);
> +}
> +EXPORT_SYMBOL(__msan_instrument_asm_store);
> +
> +void *__msan_memmove(void *dst, void *src, u64 n)
> +{
> +	void *result;
> +	void *shadow_dst;
> +
> +	result = __memmove(dst, src, n);
> +	if (!n)
> +		/* Some people call memmove() with zero length. */
> +		return result;
> +	if (!kmsan_ready || IN_RUNTIME())
> +		return result;
> +
> +	/* Ok to skip address check here, we'll do it later. */
> +	shadow_dst = kmsan_get_metadata(dst, n, META_SHADOW);
> +
> +	if (!shadow_dst)
> +		/* Can happen e.g. if the memory is untracked. */
> +		return result;
> +
> +	kmsan_memmove_metadata(dst, src, n);
> +
> +	return result;
> +}
> +EXPORT_SYMBOL(__msan_memmove);
> +
> +void *__msan_memmove_nosanitize(void *dst, void *src, u64 n)
> +{
> +	return __memmove(dst, src, n);
> +}
> +EXPORT_SYMBOL(__msan_memmove_nosanitize);
> +
> +void *__msan_memcpy(void *dst, const void *src, u64 n)
> +{
> +	void *result;
> +	void *shadow_dst;
> +
> +	result = __memcpy(dst, src, n);
> +	if (!n)
> +		/* Some people call memcpy() with zero length. */
> +		return result;
> +
> +	if (!kmsan_ready || IN_RUNTIME())
> +		return result;
> +
> +	/* Ok to skip address check here, we'll do it later. */
> +	shadow_dst = kmsan_get_metadata(dst, n, META_SHADOW);
> +	if (!shadow_dst)
> +		/* Can happen e.g. if the memory is untracked. */
> +		return result;
> +
> +	kmsan_memcpy_metadata(dst, (void *)src, n);
> +
> +	return result;
> +}
> +EXPORT_SYMBOL(__msan_memcpy);
> +
> +void *__msan_memcpy_nosanitize(void *dst, void *src, u64 n)
> +{
> +	return __memcpy(dst, src, n);
> +}
> +EXPORT_SYMBOL(__msan_memcpy_nosanitize);
> +
> +void *__msan_memset(void *dst, int c, size_t n)
> +{
> +	void *result;
> +	unsigned long irq_flags;
> +	depot_stack_handle_t new_origin;
> +	unsigned int shadow;
> +
> +	result = __memset(dst, c, n);
> +	if (!kmsan_ready || IN_RUNTIME())
> +		return result;
> +
> +	ENTER_RUNTIME(irq_flags);
> +	shadow = 0;
> +	kmsan_internal_memset_shadow(dst, shadow, n, /*checked*/false);
> +	new_origin = 0;
> +	kmsan_internal_set_origin(dst, n, new_origin);
> +	LEAVE_RUNTIME(irq_flags);
> +
> +	return result;
> +}
> +EXPORT_SYMBOL(__msan_memset);
> +
> +void *__msan_memset_nosanitize(void *dst, int c, size_t n)
> +{
> +	return __memset(dst, c, n);
> +}
> +EXPORT_SYMBOL(__msan_memset_nosanitize);
> +
> +depot_stack_handle_t __msan_chain_origin(depot_stack_handle_t origin)
> +{
> +	depot_stack_handle_t ret = 0;
> +	unsigned long irq_flags;
> +
> +	if (!kmsan_ready || IN_RUNTIME())
> +		return ret;
> +
> +	/* Creating new origins may allocate memory. */
> +	ENTER_RUNTIME(irq_flags);
> +	ret = kmsan_internal_chain_origin(origin);
> +	LEAVE_RUNTIME(irq_flags);
> +	return ret;
> +}
> +EXPORT_SYMBOL(__msan_chain_origin);
> +
> +void __msan_poison_alloca(void *address, u64 size, char *descr)
> +{
> +	depot_stack_handle_t handle;
> +	unsigned long entries[4];
> +	unsigned long irq_flags;
> +	u64 size_copy = size, to_fill;
> +	u64 addr_copy = (u64)address;
> +	u64 page_offset;
> +	void *shadow_start;
> +
> +	if (!kmsan_ready || IN_RUNTIME())
> +		return;
> +
> +	while (size_copy) {
> +		page_offset = addr_copy % PAGE_SIZE;
> +		to_fill = min(PAGE_SIZE - page_offset, size_copy);
> +		shadow_start = kmsan_get_metadata((void *)addr_copy, to_fill,
> +						  META_SHADOW);
> +		addr_copy += to_fill;
> +		size_copy -= to_fill;
> +		if (!shadow_start)
> +			/* Can happen e.g. if the memory is untracked. */
> +			continue;
> +		__memset(shadow_start, -1, to_fill);
> +	}
> +
> +	entries[0] = KMSAN_ALLOCA_MAGIC_ORIGIN;
> +	entries[1] = (u64)descr;
> +	entries[2] = (u64)__builtin_return_address(0);
> +	entries[3] = (u64)kmsan_internal_return_address(1);
> +
> +	/* stack_depot_save() may allocate memory. */
> +	ENTER_RUNTIME(irq_flags);
> +	handle = stack_depot_save(entries, ARRAY_SIZE(entries), GFP_ATOMIC);
> +	LEAVE_RUNTIME(irq_flags);
> +	kmsan_internal_set_origin(address, size, handle);
> +}
> +EXPORT_SYMBOL(__msan_poison_alloca);
> +
> +void __msan_unpoison_alloca(void *address, u64 size)
> +{
> +	unsigned long irq_flags;
> +
> +	if (!kmsan_ready || IN_RUNTIME())
> +		return;
> +
> +	ENTER_RUNTIME(irq_flags);
> +	/* Assuming the shadow exists. */
> +	kmsan_internal_unpoison_shadow(address, size, /*checked*/true);
> +	LEAVE_RUNTIME(irq_flags);
> +}
> +EXPORT_SYMBOL(__msan_unpoison_alloca);
> +
> +void __msan_warning(u32 origin)
> +{
> +	unsigned long irq_flags;
> +
> +	if (!kmsan_ready || IN_RUNTIME())
> +		return;
> +	ENTER_RUNTIME(irq_flags);
> +	kmsan_report(origin, /*address*/0, /*size*/0,
> +		/*off_first*/0, /*off_last*/0, /*user_addr*/0, REASON_ANY);
> +	LEAVE_RUNTIME(irq_flags);
> +}
> +EXPORT_SYMBOL(__msan_warning);
> +
> +struct kmsan_context_state *__msan_get_context_state(void)
> +{
> +	struct kmsan_context_state *ret;
> +
> +	ret = task_kmsan_context_state();
> +	BUG_ON(!ret);
> +	return ret;
> +}
> +EXPORT_SYMBOL(__msan_get_context_state);
> diff --git a/mm/kmsan/kmsan_report.c b/mm/kmsan/kmsan_report.c
> new file mode 100644
> index 000000000000..443ab9c1e8bf
> --- /dev/null
> +++ b/mm/kmsan/kmsan_report.c
> @@ -0,0 +1,133 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * KMSAN error reporting routines.
> + *
> + * Copyright (C) 2019 Google LLC
> + * Author: Alexander Potapenko <glider@xxxxxxxxxx>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + *
> + */
> +
> +#include <linux/console.h>
> +#include <linux/stackdepot.h>
> +#include <linux/stacktrace.h>
> +
> +#include "kmsan.h"
> +
> +DEFINE_SPINLOCK(report_lock);
> +
> +void kmsan_print_origin(depot_stack_handle_t origin)
> +{
> +	unsigned long *entries = NULL, *chained_entries = NULL;
> +	unsigned long nr_entries, chained_nr_entries, magic;
> +	char *descr = NULL;
> +	void *pc1 = NULL, *pc2 = NULL;
> +	depot_stack_handle_t head;
> +
> +	if (!origin) {
> +		kmsan_pr_err("Origin not found, presumably a false report.\n");
> +		return;
> +	}
> +
> +	while (true) {
> +		nr_entries = stack_depot_fetch(origin, &entries);
> +		magic = nr_entries ? (entries[0] & KMSAN_MAGIC_MASK) : 0;
> +		if ((nr_entries == 4) && (magic == KMSAN_ALLOCA_MAGIC_ORIGIN)) {
> +			descr = (char *)entries[1];
> +			pc1 = (void *)entries[2];
> +			pc2 = (void *)entries[3];
> +			kmsan_pr_err("Local variable description: %s\n", descr);
> +			kmsan_pr_err("Variable was created at:\n");
> +			kmsan_pr_err(" %pS\n", pc1);
> +			kmsan_pr_err(" %pS\n", pc2);
> +			break;
> +		}
> +		if ((nr_entries == 3) &&
> +		    (magic == KMSAN_CHAIN_MAGIC_ORIGIN_FULL)) {
> +			head = entries[1];
> +			origin = entries[2];
> +			kmsan_pr_err("Uninit was stored to memory at:\n");
> +			chained_nr_entries =
> +				stack_depot_fetch(head, &chained_entries);
> +			stack_trace_print(chained_entries, chained_nr_entries,
> +					  0);
> +			kmsan_pr_err("\n");
> +			continue;
> +		}
> +		kmsan_pr_err("Uninit was created at:\n");
> +		if (entries)
> +			stack_trace_print(entries, nr_entries, 0);
> +		else
> +			kmsan_pr_err("No stack\n");
> +		break;
> +	}
> +}
> +
> +void kmsan_report(depot_stack_handle_t origin,
> +		  void *address, int size, int off_first, int off_last,
> +		  const void *user_addr, int reason)
> +{
> +	unsigned long flags;
> +	unsigned long *entries;
> +	unsigned int nr_entries;
> +	bool is_uaf = false;
> +	char *bug_type = NULL;
> +
> +	if (!kmsan_ready)
> +		return;
> +	if (!current->kmsan.allow_reporting)
> +		return;
> +	if (!origin)
> +		return;
> +
> +	nr_entries = stack_depot_fetch(origin, &entries);
> +
> +	current->kmsan.allow_reporting = false;
> +	spin_lock_irqsave(&report_lock, flags);
> +	kmsan_pr_err("=====================================================\n");
> +	if (get_dsh_extra_bits(origin) & 1)
> +		is_uaf = true;
> +	switch (reason) {
> +	case REASON_ANY:
> +		bug_type = is_uaf ? "use-after-free" : "uninit-value";
> +		break;
> +	case REASON_COPY_TO_USER:
> +		bug_type = is_uaf ? "kernel-infoleak-after-free" :
> +				    "kernel-infoleak";
> +		break;
> +	case REASON_SUBMIT_URB:
> +		bug_type = is_uaf ? "kernel-usb-infoleak-after-free" :
> +				    "kernel-usb-infoleak";
> +		break;
> +	}
> +	kmsan_pr_err("BUG: KMSAN: %s in %pS\n",
> +		     bug_type, kmsan_internal_return_address(2));
> +	dump_stack();
> +	kmsan_pr_err("\n");
> +
> +	kmsan_print_origin(origin);
> +
> +	if (size) {
> +		kmsan_pr_err("\n");
> +		if (off_first == off_last)
> +			kmsan_pr_err("Byte %d of %d is uninitialized\n",
> +				     off_first, size);
> +		else
> +			kmsan_pr_err("Bytes %d-%d of %d are uninitialized\n",
> +				     off_first, off_last, size);
> +	}
> +	if (address)
> +		kmsan_pr_err("Memory access of size %d starts at %px\n",
> +			     size, address);
> +	if (user_addr && reason == REASON_COPY_TO_USER)
> +		kmsan_pr_err("Data copied to user address %px\n", user_addr);
> +	kmsan_pr_err("=====================================================\n");
> +	add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
> +	spin_unlock_irqrestore(&report_lock, flags);
> +	if (panic_on_warn)
> +		panic("panic_on_warn set ...\n");
> +	current->kmsan.allow_reporting = true;
> +}
> diff --git a/mm/kmsan/kmsan_shadow.c b/mm/kmsan/kmsan_shadow.c
> new file mode 100644
> index 000000000000..06801d76e6b8
> --- /dev/null
> +++ b/mm/kmsan/kmsan_shadow.c
> @@ -0,0 +1,543 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * KMSAN shadow implementation.
> + *
> + * Copyright (C) 2017-2019 Google LLC
> + * Author: Alexander Potapenko <glider@xxxxxxxxxx>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + *
> + */
> +
> +#include <asm/cpu_entry_area.h>
> +#include <asm/page.h>
> +#include <asm/pgtable_64_types.h>
> +#include <asm/tlbflush.h>
> +#include <linux/memblock.h>
> +#include <linux/mm_types.h>
> +#include <linux/percpu-defs.h>
> +#include <linux/slab.h>
> +#include <linux/smp.h>
> +#include <linux/stddef.h>
> +
> +#include "kmsan.h"
> +#include "kmsan_shadow.h"
> +
> +#define shadow_page_for(page) \
> +	((page)->shadow)
> +
> +#define origin_page_for(page) \
> +	((page)->origin)
> +
> +#define shadow_ptr_for(page) \
> +	(page_address((page)->shadow))
> +
> +#define origin_ptr_for(page) \
> +	(page_address((page)->origin))
> +
> +#define has_shadow_page(page) \
> +	(!!((page)->shadow))
> +
> +#define has_origin_page(page) \
> +	(!!((page)->origin))
> +
> +#define set_no_shadow_origin_page(page)	\
> +	do {				\
> +		(page)->shadow = NULL;	\
> +		(page)->origin = NULL;	\
> +	} while (0) /**/
> +
> +#define is_ignored_page(page)	\
> +	(!!(((u64)((page)->shadow)) % 2))
> +
> +#define ignore_page(pg)			\
> +		((pg)->shadow = (struct page *)((u64)((pg)->shadow) | 1)) \
> +
> +DEFINE_PER_CPU(char[CPU_ENTRY_AREA_SIZE], cpu_entry_area_shadow);
> +DEFINE_PER_CPU(char[CPU_ENTRY_AREA_SIZE], cpu_entry_area_origin);
> +
> +/*
> + * Dummy load and store pages to be used when the real metadata is unavailable.
> + * There are separate pages for loads and stores, so that every load returns a
> + * zero, and every store doesn't affect other stores.
> + */
> +char dummy_load_page[PAGE_SIZE] __aligned(PAGE_SIZE);
> +char dummy_store_page[PAGE_SIZE] __aligned(PAGE_SIZE);
> +
> +/*
> + * Taken from arch/x86/mm/physaddr.h to avoid using an instrumented version.
> + */
> +static int kmsan_phys_addr_valid(unsigned long addr)
> +{
> +#ifdef CONFIG_PHYS_ADDR_T_64BIT
> +	return !(addr >> boot_cpu_data.x86_phys_bits);
> +#else
> +	return 1;
> +#endif
> +}
> +
> +/*
> + * Taken from arch/x86/mm/physaddr.c to avoid using an instrumented version.
> + */
> +static bool kmsan_virt_addr_valid(void *addr)
> +{
> +	unsigned long x = (unsigned long)addr;
> +	unsigned long y = x - __START_KERNEL_map;
> +
> +	/* use the carry flag to determine if x was < __START_KERNEL_map */
> +	if (unlikely(x > y)) {
> +		x = y + phys_base;
> +
> +		if (y >= KERNEL_IMAGE_SIZE)
> +			return false;
> +	} else {
> +		x = y + (__START_KERNEL_map - PAGE_OFFSET);
> +
> +		/* carry flag will be set if starting x was >= PAGE_OFFSET */
> +		if ((x > y) || !kmsan_phys_addr_valid(x))
> +			return false;
> +	}
> +
> +	return pfn_valid(x >> PAGE_SHIFT);
> +}
> +
> +static unsigned long vmalloc_meta(void *addr, bool is_origin)
> +{
> +	unsigned long addr64 = (unsigned long)addr, off;
> +
> +	BUG_ON(is_origin && !IS_ALIGNED(addr64, ORIGIN_SIZE));
> +	if (kmsan_internal_is_vmalloc_addr(addr)) {
> +		return addr64 + (is_origin ? VMALLOC_ORIGIN_OFFSET
> +					   : VMALLOC_SHADOW_OFFSET);
> +	}
> +	if (kmsan_internal_is_module_addr(addr)) {
> +		off = addr64 - MODULES_VADDR;
> +		return off + (is_origin ? MODULES_ORIGIN_START
> +					: MODULES_SHADOW_START);
> +	}
> +	return 0;
> +}
> +
> +static void *get_cea_meta_or_null(void *addr, bool is_origin)
> +{
> +	int cpu = smp_processor_id();
> +	int off;
> +	char *metadata_array;
> +
> +	if (((u64)addr < CPU_ENTRY_AREA_BASE) ||
> +	    ((u64)addr >= (CPU_ENTRY_AREA_BASE + CPU_ENTRY_AREA_MAP_SIZE)))
> +		return NULL;
> +	off = (char *)addr - (char *)get_cpu_entry_area(cpu);
> +	if ((off < 0) || (off >= CPU_ENTRY_AREA_SIZE))
> +		return NULL;
> +	metadata_array = is_origin ? cpu_entry_area_origin :
> +				     cpu_entry_area_shadow;
> +	return &per_cpu(metadata_array[off], cpu);
> +}
> +
> +static struct page *virt_to_page_or_null(void *vaddr)
> +{
> +	if (kmsan_virt_addr_valid(vaddr))
> +		return virt_to_page(vaddr);
> +	else
> +		return NULL;
> +}
> +
> +struct shadow_origin_ptr kmsan_get_shadow_origin_ptr(void *address, u64 size,
> +						     bool store)
> +{
> +	struct shadow_origin_ptr ret;
> +	struct page *page;
> +	u64 pad, offset, o_offset;
> +	const u64 addr64 = (u64)address;
> +	u64 o_addr64 = (u64)address;
> +	void *shadow;
> +
> +	if (size > PAGE_SIZE) {
> +		WARN(1, "size too big in %s(%px, %d, %d)\n",
> +		     __func__, address, size, store);
> +		BUG();
> +	}
> +	if (store) {
> +		ret.s = dummy_store_page;
> +		ret.o = dummy_store_page;
> +	} else {
> +		ret.s = dummy_load_page;
> +		ret.o = dummy_load_page;
> +	}
> +	if (!kmsan_ready || IN_RUNTIME())
> +		return ret;
> +	BUG_ON(!metadata_is_contiguous(address, size, META_SHADOW));
> +
> +	if (!IS_ALIGNED(addr64, ORIGIN_SIZE)) {
> +		pad = addr64 % ORIGIN_SIZE;
> +		o_addr64 -= pad;
> +	}
> +
> +	if (kmsan_internal_is_vmalloc_addr(address) ||
> +	    kmsan_internal_is_module_addr(address)) {
> +		ret.s = (void *)vmalloc_meta(address, META_SHADOW);
> +		ret.o = (void *)vmalloc_meta((void *)o_addr64, META_ORIGIN);
> +		return ret;
> +	}
> +
> +	if (!kmsan_virt_addr_valid(address)) {
> +		page = vmalloc_to_page_or_null(address);
> +		if (page)
> +			goto next;
> +		shadow = get_cea_meta_or_null(address, META_SHADOW);
> +		if (shadow) {
> +			ret.s = shadow;
> +			ret.o = get_cea_meta_or_null((void *)o_addr64,
> +						     META_ORIGIN);
> +			return ret;
> +		}
> +	}
> +	page = virt_to_page_or_null(address);
> +	if (!page)
> +		return ret;
> +next:
> +	if (is_ignored_page(page))
> +		return ret;
> +
> +	if (!has_shadow_page(page) || !has_origin_page(page))
> +		return ret;
> +	offset = addr64 % PAGE_SIZE;
> +	o_offset = o_addr64 % PAGE_SIZE;
> +
> +	if (offset + size - 1 > PAGE_SIZE) {
> +		/*
> +		 * The access overflows the current page and touches the
> +		 * subsequent ones. Make sure the shadow/origin pages are also
> +		 * consequent.
> +		 */
> +		BUG_ON(!metadata_is_contiguous(address, size, META_SHADOW));
> +	}
> +
> +	ret.s = shadow_ptr_for(page) + offset;
> +	ret.o = origin_ptr_for(page) + o_offset;
> +	return ret;
> +}
> +
> +/*
> + * Obtain the shadow or origin pointer for the given address, or NULL if there's
> + * none. The caller must check the return value for being non-NULL if needed.
> + * The return value of this function should not depend on whether we're in the
> + * runtime or not.
> + */
> +void *kmsan_get_metadata(void *address, size_t size, bool is_origin)
> +{
> +	struct page *page;
> +	void *ret;
> +	u64 addr = (u64)address, pad, off;
> +
> +	if (is_origin && !IS_ALIGNED(addr, ORIGIN_SIZE)) {
> +		pad = addr % ORIGIN_SIZE;
> +		addr -= pad;
> +		size += pad;
> +	}
> +	address = (void *)addr;
> +	if (kmsan_internal_is_vmalloc_addr(address) ||
> +	    kmsan_internal_is_module_addr(address)) {
> +		return (void *)vmalloc_meta(address, is_origin);
> +	}
> +
> +	if (!kmsan_virt_addr_valid(address)) {
> +		page = vmalloc_to_page_or_null(address);
> +		if (page)
> +			goto next;
> +		ret = get_cea_meta_or_null(address, is_origin);
> +		if (ret)
> +			return ret;
> +	}
> +	page = virt_to_page_or_null(address);
> +	if (!page)
> +		return NULL;
> +next:
> +	if (is_ignored_page(page))
> +		return NULL;
> +	if (!has_shadow_page(page) || !has_origin_page(page))
> +		return NULL;
> +	off = addr % PAGE_SIZE;
> +
> +	ret = (is_origin ? origin_ptr_for(page) : shadow_ptr_for(page)) + off;
> +	return ret;
> +}
> +
> +void __init kmsan_init_alloc_meta_for_range(void *start, void *end)
> +{
> +	u64 addr, size;
> +	struct page *page;
> +	void *shadow, *origin;
> +	struct page *shadow_p, *origin_p;
> +
> +	start = (void *)ALIGN_DOWN((u64)start, PAGE_SIZE);
> +	size = ALIGN((u64)end - (u64)start, PAGE_SIZE);
> +	shadow = memblock_alloc(size, PAGE_SIZE);
> +	origin = memblock_alloc(size, PAGE_SIZE);
> +	for (addr = 0; addr < size; addr += PAGE_SIZE) {
> +		page = virt_to_page_or_null((char *)start + addr);
> +		shadow_p = virt_to_page_or_null((char *)shadow + addr);
> +		set_no_shadow_origin_page(shadow_p);
> +		shadow_page_for(page) = shadow_p;
> +		origin_p = virt_to_page_or_null((char *)origin + addr);
> +		set_no_shadow_origin_page(origin_p);
> +		origin_page_for(page) = origin_p;
> +	}
> +}
> +
> +/* Called from mm/memory.c */
> +void kmsan_copy_page_meta(struct page *dst, struct page *src)
> +{
> +	unsigned long irq_flags;
> +
> +	if (!kmsan_ready || IN_RUNTIME())
> +		return;
> +	if (!has_shadow_page(src)) {
> +		/* TODO(glider): are we leaking pages here? */
> +		set_no_shadow_origin_page(dst);
> +		return;
> +	}
> +	if (!has_shadow_page(dst))
> +		return;
> +	if (is_ignored_page(src)) {
> +		ignore_page(dst);
> +		return;
> +	}
> +
> +	ENTER_RUNTIME(irq_flags);
> +	__memcpy(shadow_ptr_for(dst), shadow_ptr_for(src),
> +		PAGE_SIZE);
> +	BUG_ON(!has_origin_page(src) || !has_origin_page(dst));
> +	__memcpy(origin_ptr_for(dst), origin_ptr_for(src),
> +		PAGE_SIZE);
> +	LEAVE_RUNTIME(irq_flags);
> +}
> +EXPORT_SYMBOL(kmsan_copy_page_meta);
> +
> +/* Helper function to allocate page metadata. */
> +static int kmsan_internal_alloc_meta_for_pages(struct page *page,
> +					       unsigned int order,
> +					       gfp_t flags, int node)
> +{
> +	struct page *shadow, *origin;
> +	int pages = 1 << order;
> +	int i;
> +	bool initialized = (flags & __GFP_ZERO) || !kmsan_ready;
> +	depot_stack_handle_t handle;
> +
> +	if (flags & __GFP_NO_KMSAN_SHADOW) {
> +		for (i = 0; i < pages; i++)
> +			set_no_shadow_origin_page(&page[i]);
> +		return 0;
> +	}
> +
> +	/* TODO(glider): must we override the flags? */
> +	flags = GFP_ATOMIC;
> +	if (initialized)
> +		flags |= __GFP_ZERO;
> +	shadow = alloc_pages_node(node, flags | __GFP_NO_KMSAN_SHADOW, order);
> +	if (!shadow) {
> +		for (i = 0; i < pages; i++) {
> +			set_no_shadow_origin_page(&page[i]);
> +			set_no_shadow_origin_page(&page[i]);
> +		}
> +		return -ENOMEM;
> +	}
> +	if (!initialized)
> +		__memset(page_address(shadow), -1, PAGE_SIZE * pages);
> +
> +	origin = alloc_pages_node(node, flags | __GFP_NO_KMSAN_SHADOW, order);
> +	/* Assume we've allocated the origin. */
> +	if (!origin) {
> +		__free_pages(shadow, order);
> +		for (i = 0; i < pages; i++)
> +			set_no_shadow_origin_page(&page[i]);
> +		return -ENOMEM;
> +	}
> +
> +	if (!initialized) {
> +		handle = kmsan_save_stack_with_flags(flags, /*extra_bits*/0);
> +		/*
> +		 * Addresses are page-aligned, pages are contiguous, so it's ok
> +		 * to just fill the origin pages with |handle|.
> +		 */
> +		for (i = 0; i < PAGE_SIZE * pages / sizeof(handle); i++) {
> +			((depot_stack_handle_t *)page_address(origin))[i] =
> +						handle;
> +		}
> +	}
> +
> +	for (i = 0; i < pages; i++) {
> +		shadow_page_for(&page[i]) = &shadow[i];
> +		set_no_shadow_origin_page(shadow_page_for(&page[i]));
> +		origin_page_for(&page[i]) = &origin[i];
> +		set_no_shadow_origin_page(origin_page_for(&page[i]));
> +	}
> +	return 0;
> +}
> +
> +/* Called from mm/page_alloc.c */
> +int kmsan_alloc_page(struct page *page, unsigned int order, gfp_t flags)
> +{
> +	unsigned long irq_flags;
> +	int ret;
> +
> +	if (IN_RUNTIME())
> +		return 0;
> +	ENTER_RUNTIME(irq_flags);
> +	ret = kmsan_internal_alloc_meta_for_pages(page, order, flags, -1);
> +	LEAVE_RUNTIME(irq_flags);
> +	return ret;
> +}
> +
> +/* Called from mm/page_alloc.c */
> +void kmsan_free_page(struct page *page, unsigned int order)
> +{
> +	struct page *shadow, *origin, *cur_page;
> +	int pages = 1 << order;
> +	int i;
> +	unsigned long irq_flags;
> +
> +	if (!shadow_page_for(page)) {
> +		for (i = 0; i < pages; i++) {
> +			cur_page = &page[i];
> +			BUG_ON(shadow_page_for(cur_page));
> +		}
> +		return;
> +	}
> +
> +	if (!kmsan_ready) {
> +		for (i = 0; i < pages; i++) {
> +			cur_page = &page[i];
> +			set_no_shadow_origin_page(cur_page);
> +		}
> +		return;
> +	}
> +
> +	if (IN_RUNTIME()) {
> +		/*
> +		 * TODO(glider): looks legit. depot_save_stack() may call
> +		 * free_pages().
> +		 */

What needs to be done to address the TODO?  Just adding a comment is
fine (or if the TODO can be resolved that's also fine).

> +		return;
> +	}
> +
> +	ENTER_RUNTIME(irq_flags);
> +	shadow = shadow_page_for(&page[0]);
> +	origin = origin_page_for(&page[0]);
> +
> +	/* TODO(glider): this is racy. */

Can this be fixed or does the race not matter -- in the latter case,
just remove the TODO and turn it into a NOTE or similar.

> +	for (i = 0; i < pages; i++) {
> +		BUG_ON(has_shadow_page(shadow_page_for(&page[i])));
> +		BUG_ON(has_shadow_page(origin_page_for(&page[i])));
> +		set_no_shadow_origin_page(&page[i]);
> +	}
> +	BUG_ON(has_shadow_page(shadow));
> +	__free_pages(shadow, order);
> +
> +	BUG_ON(has_shadow_page(origin));
> +	__free_pages(origin, order);
> +	LEAVE_RUNTIME(irq_flags);
> +}
> +EXPORT_SYMBOL(kmsan_free_page);
> +
> +/* Called from mm/page_alloc.c */
> +void kmsan_split_page(struct page *page, unsigned int order)
> +{
> +	struct page *shadow, *origin;
> +	unsigned long irq_flags;
> +
> +	if (!kmsan_ready || IN_RUNTIME())
> +		return;
> +
> +	ENTER_RUNTIME(irq_flags);
> +	if (!has_shadow_page(&page[0])) {
> +		BUG_ON(has_origin_page(&page[0]));
> +		LEAVE_RUNTIME(irq_flags);
> +		return;
> +	}
> +	shadow = shadow_page_for(&page[0]);
> +	split_page(shadow, order);
> +
> +	origin = origin_page_for(&page[0]);
> +	split_page(origin, order);
> +	LEAVE_RUNTIME(irq_flags);
> +}
> +EXPORT_SYMBOL(kmsan_split_page);
> +
> +/* Called from include/linux/highmem.h */
> +void kmsan_clear_page(void *page_addr)
> +{
> +	struct page *page;
> +
> +	if (!kmsan_ready || IN_RUNTIME())
> +		return;
> +	BUG_ON(!IS_ALIGNED((u64)page_addr, PAGE_SIZE));
> +	page = vmalloc_to_page_or_null(page_addr);
> +	if (!page)
> +		page = virt_to_page_or_null(page_addr);
> +	if (!page || !has_shadow_page(page))
> +		return;
> +	__memset(shadow_ptr_for(page), 0, PAGE_SIZE);
> +	BUG_ON(!has_origin_page(page));
> +	__memset(origin_ptr_for(page), 0, PAGE_SIZE);
> +}
> +EXPORT_SYMBOL(kmsan_clear_page);
> +
> +/* Called from mm/vmalloc.c */
> +void kmsan_vmap_page_range_noflush(unsigned long start, unsigned long end,
> +				   pgprot_t prot, struct page **pages)
> +{
> +	int nr, i, mapped;
> +	struct page **s_pages, **o_pages;
> +	unsigned long shadow_start, shadow_end, origin_start, origin_end;
> +
> +	if (!kmsan_ready || IN_RUNTIME())
> +		return;
> +	shadow_start = vmalloc_meta((void *)start, META_SHADOW);
> +	if (!shadow_start)
> +		return;
> +
> +	BUG_ON(start >= end);
> +	nr = (end - start) / PAGE_SIZE;
> +	s_pages = kcalloc(nr, sizeof(struct page *), GFP_KERNEL);
> +	o_pages = kcalloc(nr, sizeof(struct page *), GFP_KERNEL);
> +	if (!s_pages || !o_pages)
> +		goto ret;
> +	for (i = 0; i < nr; i++) {
> +		s_pages[i] = shadow_page_for(pages[i]);
> +		o_pages[i] = origin_page_for(pages[i]);
> +	}
> +	prot = __pgprot(pgprot_val(prot) | _PAGE_NX);
> +	prot = PAGE_KERNEL;
> +
> +	shadow_end = vmalloc_meta((void *)end, META_SHADOW);
> +	origin_start = vmalloc_meta((void *)start, META_ORIGIN);
> +	origin_end = vmalloc_meta((void *)end, META_ORIGIN);
> +	mapped = __vmap_page_range_noflush(shadow_start, shadow_end,
> +					   prot, s_pages);
> +	BUG_ON(mapped != nr);
> +	flush_tlb_kernel_range(shadow_start, shadow_end);
> +	mapped = __vmap_page_range_noflush(origin_start, origin_end,
> +					   prot, o_pages);
> +	BUG_ON(mapped != nr);
> +	flush_tlb_kernel_range(origin_start, origin_end);
> +ret:
> +	kfree(s_pages);
> +	kfree(o_pages);
> +}
> +
> +void kmsan_ignore_page(struct page *page, int order)
> +{
> +	int pages = 1 << order;
> +	int i;
> +	struct page *cp;
> +
> +	for (i = 0; i < pages; i++) {
> +		cp = &page[i];
> +		ignore_page(cp);
> +	}
> +}
> diff --git a/mm/kmsan/kmsan_shadow.h b/mm/kmsan/kmsan_shadow.h
> new file mode 100644
> index 000000000000..eaa7f771b6a5
> --- /dev/null
> +++ b/mm/kmsan/kmsan_shadow.h
> @@ -0,0 +1,30 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * KMSAN shadow API.
> + *
> + * This should be agnostic to shadow implementation details.
> + *
> + * Copyright (C) 2017-2019 Google LLC
> + * Author: Alexander Potapenko <glider@xxxxxxxxxx>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + *
> + */
> +
> +#ifndef __MM_KMSAN_KMSAN_SHADOW_H
> +#define __MM_KMSAN_KMSAN_SHADOW_H
> +
> +#include <asm/cpu_entry_area.h>  /* for CPU_ENTRY_AREA_MAP_SIZE */
> +
> +struct shadow_origin_ptr {
> +	void *s, *o;
> +};
> +
> +struct shadow_origin_ptr kmsan_get_shadow_origin_ptr(void *addr, u64 size,
> +						     bool store);
> +void *kmsan_get_metadata(void *addr, size_t size, bool is_origin);
> +void __init kmsan_init_alloc_meta_for_range(void *start, void *end);
> +
> +#endif  /* __MM_KMSAN_KMSAN_SHADOW_H */
> diff --git a/scripts/Makefile.kmsan b/scripts/Makefile.kmsan
> new file mode 100644
> index 000000000000..8b3844b66b22
> --- /dev/null
> +++ b/scripts/Makefile.kmsan
> @@ -0,0 +1,12 @@
> +ifdef CONFIG_KMSAN
> +
> +CFLAGS_KMSAN := -fsanitize=kernel-memory
> +
> +ifeq ($(call cc-option, $(CFLAGS_KMSAN) -Werror),)
> +   ifneq ($(CONFIG_COMPILE_TEST),y)
> +        $(warning Cannot use CONFIG_KMSAN: \
> +            -fsanitize=kernel-memory is not supported by compiler)
> +   endif
> +endif
> +
> +endif
> -- 
> 2.24.0.432.g9d3f5f5b63-goog
> 






[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux