Patch "x86/alternatives: Optimize optimize_nops()" has been added to the 5.10-stable tree

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This is a note to let you know that I've just added the patch titled

    x86/alternatives: Optimize optimize_nops()

to the 5.10-stable tree which can be found at:
    http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary

The filename of the patch is:
     x86-alternatives-optimize-optimize_nops.patch
and it can be found in the queue-5.10 subdirectory.

If you, or anyone else, feels it should not be added to the stable tree,
please let <stable@xxxxxxxxxxxxxxx> know about it.


>From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
From: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Date: Fri, 26 Mar 2021 16:12:01 +0100
Subject: x86/alternatives: Optimize optimize_nops()

From: Peter Zijlstra <peterz@xxxxxxxxxxxxx>

commit 23c1ad538f4f371bdb67d8a112314842d5db7e5a upstream.

Currently, optimize_nops() scans to see if the alternative starts with
NOPs. However, the emit pattern is:

  141:	\oldinstr
  142:	.skip (len-(142b-141b)), 0x90

That is, when 'oldinstr' is short, the tail is padded with NOPs. This case
never gets optimized.

Rewrite optimize_nops() to replace any trailing string of NOPs inside
the alternative to larger NOPs. Also run it irrespective of patching,
replacing NOPs in both the original and replaced code.

A direct consequence is that 'padlen' becomes superfluous, so remove it.

 [ bp:
   - Adjust commit message
   - remove a stale comment about needing to pad
   - add a comment in optimize_nops()
   - exit early if the NOP verif. loop catches a mismatch - function
     should not not add NOPs in that case
   - fix the "optimized NOPs" offsets output ]

Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
Signed-off-by: Borislav Petkov <bp@xxxxxxx>
Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx>
Link: https://lkml.kernel.org/r/20210326151259.442992235@xxxxxxxxxxxxx
Signed-off-by: Ben Hutchings <ben@xxxxxxxxxxxxxxx>
Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>
---
 arch/x86/include/asm/alternative.h            |   17 ++-------
 arch/x86/kernel/alternative.c                 |   49 ++++++++++++++++----------
 tools/objtool/arch/x86/include/arch_special.h |    2 -
 3 files changed, 37 insertions(+), 31 deletions(-)

--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -65,7 +65,6 @@ struct alt_instr {
 	u16 cpuid;		/* cpuid bit set for replacement */
 	u8  instrlen;		/* length of original instruction */
 	u8  replacementlen;	/* length of new instruction */
-	u8  padlen;		/* length of build-time padding */
 } __packed;
 
 /*
@@ -104,7 +103,6 @@ static inline int alternatives_text_rese
 
 #define alt_end_marker		"663"
 #define alt_slen		"662b-661b"
-#define alt_pad_len		alt_end_marker"b-662b"
 #define alt_total_slen		alt_end_marker"b-661b"
 #define alt_rlen(num)		e_replacement(num)"f-"b_replacement(num)"f"
 
@@ -151,8 +149,7 @@ static inline int alternatives_text_rese
 	" .long " b_replacement(num)"f - .\n"		/* new instruction */ \
 	" .word " __stringify(feature) "\n"		/* feature bit     */ \
 	" .byte " alt_total_slen "\n"			/* source len      */ \
-	" .byte " alt_rlen(num) "\n"			/* replacement len */ \
-	" .byte " alt_pad_len "\n"			/* pad len */
+	" .byte " alt_rlen(num) "\n"			/* replacement len */
 
 #define ALTINSTR_REPLACEMENT(newinstr, feature, num)	/* replacement */	\
 	"# ALT: replacement " #num "\n"						\
@@ -224,9 +221,6 @@ static inline int alternatives_text_rese
  * Peculiarities:
  * No memory clobber here.
  * Argument numbers start with 1.
- * Best is to use constraints that are fixed size (like (%1) ... "r")
- * If you use variable sized constraints like "m" or "g" in the
- * replacement make sure to pad to the worst case length.
  * Leaving an unused argument 0 to keep API compatibility.
  */
 #define alternative_input(oldinstr, newinstr, feature, input...)	\
@@ -315,13 +309,12 @@ static inline int alternatives_text_rese
  * enough information for the alternatives patching code to patch an
  * instruction. See apply_alternatives().
  */
-.macro altinstruction_entry orig alt feature orig_len alt_len pad_len
+.macro altinstruction_entry orig alt feature orig_len alt_len
 	.long \orig - .
 	.long \alt - .
 	.word \feature
 	.byte \orig_len
 	.byte \alt_len
-	.byte \pad_len
 .endm
 
 /*
@@ -338,7 +331,7 @@ static inline int alternatives_text_rese
 142:
 
 	.pushsection .altinstructions,"a"
-	altinstruction_entry 140b,143f,\feature,142b-140b,144f-143f,142b-141b
+	altinstruction_entry 140b,143f,\feature,142b-140b,144f-143f
 	.popsection
 
 	.pushsection .altinstr_replacement,"ax"
@@ -375,8 +368,8 @@ static inline int alternatives_text_rese
 142:
 
 	.pushsection .altinstructions,"a"
-	altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f,142b-141b
-	altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f,142b-141b
+	altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f
+	altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f
 	.popsection
 
 	.pushsection .altinstr_replacement,"ax"
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -344,19 +344,35 @@ done:
 static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *instr)
 {
 	unsigned long flags;
-	int i;
+	struct insn insn;
+	int nop, i = 0;
 
-	for (i = 0; i < a->padlen; i++) {
-		if (instr[i] != 0x90)
+	/*
+	 * Jump over the non-NOP insns, the remaining bytes must be single-byte
+	 * NOPs, optimize them.
+	 */
+	for (;;) {
+		if (insn_decode_kernel(&insn, &instr[i]))
+			return;
+
+		if (insn.length == 1 && insn.opcode.bytes[0] == 0x90)
+			break;
+
+		if ((i += insn.length) >= a->instrlen)
+			return;
+	}
+
+	for (nop = i; i < a->instrlen; i++) {
+		if (WARN_ONCE(instr[i] != 0x90, "Not a NOP at 0x%px\n", &instr[i]))
 			return;
 	}
 
 	local_irq_save(flags);
-	add_nops(instr + (a->instrlen - a->padlen), a->padlen);
+	add_nops(instr + nop, i - nop);
 	local_irq_restore(flags);
 
 	DUMP_BYTES(instr, a->instrlen, "%px: [%d:%d) optimized NOPs: ",
-		   instr, a->instrlen - a->padlen, a->padlen);
+		   instr, nop, a->instrlen);
 }
 
 /*
@@ -402,19 +418,15 @@ void __init_or_module noinline apply_alt
 		 * - feature not present but ALTINSTR_FLAG_INV is set to mean,
 		 *   patch if feature is *NOT* present.
 		 */
-		if (!boot_cpu_has(feature) == !(a->cpuid & ALTINSTR_FLAG_INV)) {
-			if (a->padlen > 1)
-				optimize_nops(a, instr);
-
-			continue;
-		}
+		if (!boot_cpu_has(feature) == !(a->cpuid & ALTINSTR_FLAG_INV))
+			goto next;
 
-		DPRINTK("feat: %s%d*32+%d, old: (%pS (%px) len: %d), repl: (%px, len: %d), pad: %d",
+		DPRINTK("feat: %s%d*32+%d, old: (%pS (%px) len: %d), repl: (%px, len: %d)",
 			(a->cpuid & ALTINSTR_FLAG_INV) ? "!" : "",
 			feature >> 5,
 			feature & 0x1f,
 			instr, instr, a->instrlen,
-			replacement, a->replacementlen, a->padlen);
+			replacement, a->replacementlen);
 
 		DUMP_BYTES(instr, a->instrlen, "%px: old_insn: ", instr);
 		DUMP_BYTES(replacement, a->replacementlen, "%px: rpl_insn: ", replacement);
@@ -438,14 +450,15 @@ void __init_or_module noinline apply_alt
 		if (a->replacementlen && is_jmp(replacement[0]))
 			recompute_jump(a, instr, replacement, insn_buff);
 
-		if (a->instrlen > a->replacementlen) {
-			add_nops(insn_buff + a->replacementlen,
-				 a->instrlen - a->replacementlen);
-			insn_buff_sz += a->instrlen - a->replacementlen;
-		}
+		for (; insn_buff_sz < a->instrlen; insn_buff_sz++)
+			insn_buff[insn_buff_sz] = 0x90;
+
 		DUMP_BYTES(insn_buff, insn_buff_sz, "%px: final_insn: ", instr);
 
 		text_poke_early(instr, insn_buff, insn_buff_sz);
+
+next:
+		optimize_nops(a, instr);
 	}
 }
 
--- a/tools/objtool/arch/x86/include/arch_special.h
+++ b/tools/objtool/arch/x86/include/arch_special.h
@@ -10,7 +10,7 @@
 #define JUMP_ORIG_OFFSET	0
 #define JUMP_NEW_OFFSET		4
 
-#define ALT_ENTRY_SIZE		13
+#define ALT_ENTRY_SIZE		12
 #define ALT_ORIG_OFFSET		0
 #define ALT_NEW_OFFSET		4
 #define ALT_FEATURE_OFFSET	8


Patches currently in stable-queue which might be from peterz@xxxxxxxxxxxxx are

queue-5.10/objtool-cache-instruction-relocs.patch
queue-5.10/x86-sev-avoid-using-__x86_return_thunk.patch
queue-5.10/objtool-add-elf_create_undef_symbol.patch
queue-5.10/x86-ftrace-use-alternative-ret-encoding.patch
queue-5.10/objtool-re-add-unwind_hint_-save_restore.patch
queue-5.10/x86-bugs-add-retbleed-ibpb.patch
queue-5.10/x86-bugs-enable-stibp-for-jmp2ret.patch
queue-5.10/x86-retpoline-cleanup-some-ifdefery.patch
queue-5.10/objtool-handle-__sanitize_cov-tail-calls.patch
queue-5.10/x86-prepare-asm-files-for-straight-line-speculation.patch
queue-5.10/kvm-vmx-flatten-__vmx_vcpu_run.patch
queue-5.10/x86-kvm-vmx-make-noinstr-clean.patch
queue-5.10/objtool-x86-replace-alternatives-with-.retpoline_sites.patch
queue-5.10/objtool-skip-magical-retpoline-.altinstr_replacement.patch
queue-5.10/x86-retbleed-add-fine-grained-kconfig-knobs.patch
queue-5.10/x86-cpu-amd-add-spectral-chicken.patch
queue-5.10/objtool-add-straight-line-speculation-validation.patch
queue-5.10/kvm-vmx-fix-ibrs-handling-after-vmexit.patch
queue-5.10/kvm-vmx-prevent-guest-rsb-poisoning-attacks-with-eibrs.patch
queue-5.10/x86-vsyscall_emu-64-don-t-use-ret-in-vsyscall-emulation.patch
queue-5.10/tools-arch-update-arch-x86-lib-mem-cpy-set-_64.s-copies-used-in-perf-bench-mem-memcpy.patch
queue-5.10/x86-add-straight-line-speculation-mitigation.patch
queue-5.10/x86-add-magic-amd-return-thunk.patch
queue-5.10/x86-bugs-keep-a-per-cpu-ia32_spec_ctrl-value.patch
queue-5.10/x86-alternatives-optimize-optimize_nops.patch
queue-5.10/x86-objtool-create-.return_sites.patch
queue-5.10/crypto-x86-poly1305-fixup-sls.patch
queue-5.10/x86-alternative-handle-jcc-__x86_indirect_thunk_-reg.patch
queue-5.10/x86-kvm-fix-setcc-emulation-for-return-thunks.patch
queue-5.10/objtool-fix-objtool-regression-on-x32-systems.patch
queue-5.10/x86-alternative-relax-text_poke_bp-constraint.patch
queue-5.10/x86-retpoline-swizzle-retpoline-thunk.patch
queue-5.10/objtool-rework-the-elf_rebuild_reloc_section-logic.patch
queue-5.10/x86-speculation-fix-firmware-entry-spec_ctrl-handling.patch
queue-5.10/x86-retpoline-remove-unused-replacement-symbols.patch
queue-5.10/objtool-fix-symbol-creation.patch
queue-5.10/x86-speculation-add-spectre_v2-ibrs-option-to-support-kernel-ibrs.patch
queue-5.10/bpf-x86-respect-x86_feature_retpoline.patch
queue-5.10/objtool-fix-type-of-reloc-addend.patch
queue-5.10/objtool-x86-rewrite-retpoline-thunk-calls.patch
queue-5.10/x86-undo-return-thunk-damage.patch
queue-5.10/x86-prepare-inline-asm-for-straight-line-speculation.patch
queue-5.10/x86-alternative-support-alternative_ternary.patch
queue-5.10/kvm-emulate-fix-setcc-emulation-function-offsets-with-sls.patch
queue-5.10/objtool-handle-per-arch-retpoline-naming.patch
queue-5.10/x86-retpoline-create-a-retpoline-thunk-array.patch
queue-5.10/x86-retpoline-simplify-retpolines.patch
queue-5.10/x86-asm-fix-register-order.patch
queue-5.10/x86-speculation-fill-rsb-on-vmexit-for-ibrs.patch
queue-5.10/objtool-add-entry-unret-validation.patch
queue-5.10/objtool-keep-track-of-retpoline-call-sites.patch
queue-5.10/kvm-vmx-convert-launched-argument-to-flags.patch
queue-5.10/objtool-add-elf_create_reloc-helper.patch
queue-5.10/objtool-make-.altinstructions-section-entry-size-consistent.patch
queue-5.10/x86-bpf-use-alternative-ret-encoding.patch
queue-5.10/x86-common-stamp-out-the-stepping-madness.patch
queue-5.10/x86-bugs-split-spectre_v2_select_mitigation-and-spectre_v2_user_select_mitigation.patch
queue-5.10/x86-bugs-report-intel-retbleed-vulnerability.patch
queue-5.10/bpf-x86-simplify-computing-label-offsets.patch
queue-5.10/x86-cpufeatures-move-retpoline-flags-to-word-11.patch
queue-5.10/x86-speculation-fix-spec_ctrl-write-on-smt-state-change.patch
queue-5.10/x86-retpoline-use-mfunction-return.patch
queue-5.10/x86-xen-rename-sys-entry-points.patch
queue-5.10/objtool-only-rewrite-unconditional-retpoline-thunk-calls.patch
queue-5.10/x86-bugs-optimize-spec_ctrl-msr-writes.patch
queue-5.10/x86-alternative-optimize-single-byte-nops-at-an-arbitrary-position.patch
queue-5.10/objtool-fix-code-relocs-vs-weak-symbols.patch
queue-5.10/x86-bugs-report-amd-retbleed-vulnerability.patch
queue-5.10/x86-static_call-use-alternative-ret-encoding.patch
queue-5.10/x86-speculation-fix-rsb-filling-with-config_retpoline-n.patch
queue-5.10/x86-asm-fixup-odd-gen-for-each-reg.h-usage.patch
queue-5.10/x86-alternative-add-debug-prints-to-apply_retpolines.patch
queue-5.10/objtool-extract-elf_symbol_add.patch
queue-5.10/x86-use-return-thunk-in-asm-code.patch
queue-5.10/objtool-remove-reloc-symbol-type-checks-in-get_alt_entry.patch
queue-5.10/objtool-classify-symbols.patch
queue-5.10/intel_idle-disable-ibrs-during-long-idle.patch
queue-5.10/objtool-correctly-handle-retpoline-thunk-calls.patch
queue-5.10/objtool-fix-.symtab_shndx-handling-for-elf_create_undef_symbol.patch
queue-5.10/x86-retpoline-move-the-retpoline-thunk-declarations-to-nospec-branch.h.patch
queue-5.10/objtool-support-asm-jump-tables.patch
queue-5.10/x86-alternative-implement-.retpoline_sites-support.patch
queue-5.10/objtool-x86-ignore-__x86_indirect_alt_-symbols.patch
queue-5.10/objtool-fix-sls-validation-for-kcov-tail-call-replacement.patch
queue-5.10/x86-alternative-try-inline-spectre_v2-retpoline-amd.patch
queue-5.10/x86-entry-remove-skip_r11rcx.patch
queue-5.10/objtool-explicitly-avoid-self-modifying-code-in-.altinstr_replacement.patch
queue-5.10/x86-speculation-use-cached-host-spec_ctrl-value-for-guest-entry-exit.patch
queue-5.10/x86-bugs-add-amd-retbleed-boot-parameter.patch
queue-5.10/objtool-create-reloc-sections-implicitly.patch
queue-5.10/x86-entry-add-kernel-ibrs-implementation.patch
queue-5.10/objtool-treat-.text.__x86.-as-noinstr.patch
queue-5.10/x86-lib-atomic64_386_32-rename-things.patch
queue-5.10/objtool-introduce-cfi-hash.patch
queue-5.10/objtool-default-ignore-int3-for-unreachable.patch
queue-5.10/objtool-extract-elf_strtab_concat.patch
queue-5.10/objtool-teach-get_alt_entry-about-more-relocation-types.patch
queue-5.10/objtool-update-retpoline-validation.patch



[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux