This is a note to let you know that I've just added the patch titled kaiser: fix intel_bts perf crashes to the 4.4-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary The filename of the patch is: kaiser-fix-intel_bts-perf-crashes.patch and it can be found in the queue-4.4 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <stable@xxxxxxxxxxxxxxx> know about it. >From hughd@xxxxxxxxxx Thu Feb 1 09:09:20 2018 From: Hugh Dickins <hughd@xxxxxxxxxx> Date: Mon, 29 Jan 2018 18:15:33 -0800 Subject: kaiser: fix intel_bts perf crashes To: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx> Cc: Hugh Dickins <hughd@xxxxxxxxxx>, Thomas Gleixner <tglx@xxxxxxxxxxxxx>, Ingo Molnar <mingo@xxxxxxxxxx>, Andy Lutomirski <luto@xxxxxxxxxxxxxx>, Alexander Shishkin <alexander.shishkin@xxxxxxxxxxxxxxx>, Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>, Vince Weaver <vince@xxxxxxxxxx>, stable@xxxxxxxxxxxxxxx, Jiri Kosina <jkosina@xxxxxxx> Message-ID: <20180130021533.228782-1-hughd@xxxxxxxxxx> From: Hugh Dickins <hughd@xxxxxxxxxx> Vince reported perf_fuzzer quickly locks up on 4.15-rc7 with PTI; Robert reported Bad RIP with KPTI and Intel BTS also on 4.15-rc7: honggfuzz -f /tmp/somedirectorywithatleastonefile \ --linux_perf_bts_edge -s -- /bin/true (honggfuzz from https://github.com/google/honggfuzz) crashed with BUG: unable to handle kernel paging request at ffff9d3215100000 (then narrowed it down to perf record --per-thread -e intel_bts//u -- /bin/ls). The intel_bts driver does not use the 'normal' BTS buffer which is exposed through kaiser_add_mapping(), but instead uses the memory allocated for the perf AUX buffer. This obviously comes apart when using PTI, because then the kernel mapping, which includes that AUX buffer memory, disappears while switched to user page tables. Easily fixed in old-Kaiser backports, by applying kaiser_add_mapping() to those pages; perhaps not so easy for upstream, where 4.15-rc8 commit 99a9dc98ba52 ("x86,perf: Disable intel_bts when PTI") disables for now. Slightly reorganized surrounding code in bts_buffer_setup_aux(), so it can better match bts_buffer_free_aux(): free_aux with an #ifdef to avoid the loop when PTI is off, but setup_aux needs to loop anyway (and kaiser_add_mapping() is cheap when PTI config is off or "pti=off"). Reported-by: Vince Weaver <vincent.weaver@xxxxxxxxx> Reported-by: Robert Święcki <robert@xxxxxxxxxxx> Analyzed-by: Peter Zijlstra <peterz@xxxxxxxxxxxxx> Analyzed-by: Stephane Eranian <eranian@xxxxxxxxxx> Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx> Cc: Ingo Molnar <mingo@xxxxxxxxxx> Cc: Andy Lutomirski <luto@xxxxxxxxxxxxxx> Cc: Alexander Shishkin <alexander.shishkin@xxxxxxxxxxxxxxx> Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx> Cc: Vince Weaver <vince@xxxxxxxxxx> Cc: stable@xxxxxxxxxxxxxxx Cc: Jiri Kosina <jkosina@xxxxxxx> Signed-off-by: Hugh Dickins <hughd@xxxxxxxxxx> Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx> --- arch/x86/kernel/cpu/perf_event_intel_bts.c | 44 +++++++++++++++++++++-------- 1 file changed, 33 insertions(+), 11 deletions(-) --- a/arch/x86/kernel/cpu/perf_event_intel_bts.c +++ b/arch/x86/kernel/cpu/perf_event_intel_bts.c @@ -22,6 +22,7 @@ #include <linux/debugfs.h> #include <linux/device.h> #include <linux/coredump.h> +#include <linux/kaiser.h> #include <asm-generic/sizes.h> #include <asm/perf_event.h> @@ -67,6 +68,23 @@ static size_t buf_size(struct page *page return 1 << (PAGE_SHIFT + page_private(page)); } +static void bts_buffer_free_aux(void *data) +{ +#ifdef CONFIG_PAGE_TABLE_ISOLATION + struct bts_buffer *buf = data; + int nbuf; + + for (nbuf = 0; nbuf < buf->nr_bufs; nbuf++) { + struct page *page = buf->buf[nbuf].page; + void *kaddr = page_address(page); + size_t page_size = buf_size(page); + + kaiser_remove_mapping((unsigned long)kaddr, page_size); + } +#endif + kfree(data); +} + static void * bts_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool overwrite) { @@ -103,29 +121,33 @@ bts_buffer_setup_aux(int cpu, void **pag buf->real_size = size - size % BTS_RECORD_SIZE; for (pg = 0, nbuf = 0, offset = 0, pad = 0; nbuf < buf->nr_bufs; nbuf++) { - unsigned int __nr_pages; + void *kaddr = pages[pg]; + size_t page_size; + + page = virt_to_page(kaddr); + page_size = buf_size(page); + + if (kaiser_add_mapping((unsigned long)kaddr, + page_size, __PAGE_KERNEL) < 0) { + buf->nr_bufs = nbuf; + bts_buffer_free_aux(buf); + return NULL; + } - page = virt_to_page(pages[pg]); - __nr_pages = PagePrivate(page) ? 1 << page_private(page) : 1; buf->buf[nbuf].page = page; buf->buf[nbuf].offset = offset; buf->buf[nbuf].displacement = (pad ? BTS_RECORD_SIZE - pad : 0); - buf->buf[nbuf].size = buf_size(page) - buf->buf[nbuf].displacement; + buf->buf[nbuf].size = page_size - buf->buf[nbuf].displacement; pad = buf->buf[nbuf].size % BTS_RECORD_SIZE; buf->buf[nbuf].size -= pad; - pg += __nr_pages; - offset += __nr_pages << PAGE_SHIFT; + pg += page_size >> PAGE_SHIFT; + offset += page_size; } return buf; } -static void bts_buffer_free_aux(void *data) -{ - kfree(data); -} - static unsigned long bts_buffer_offset(struct bts_buffer *buf, unsigned int idx) { return buf->buf[idx].offset + buf->buf[idx].displacement; Patches currently in stable-queue which might be from hughd@xxxxxxxxxx are queue-4.4/x86-pti-make-unpoison-of-pgd-for-trusted-boot-work-for-real.patch queue-4.4/kaiser-fix-intel_bts-perf-crashes.patch