In preparation for reducing the phys-to-virt minimum relative alignment from 16 MiB to 2 MiB, switch to patchable sequences involving MOVW instructions that can more easily be manipulated to carry a 12-bit immediate. Note that the non-LPAE ARM sequence is not updated: MOVW may not be supported on non-LPAE platforms, and the sequence itself can be updated more easily to apply the 12 bits of displacement. For Thumb2, which has many more versions of opcodes, switch to a sequence that can be patched by the same patching code for both versions, and use asm constraints and S-suffixed opcodes to force narrow encodings to be selected. Suggested-by: Russell King <linux@xxxxxxxxxxxxxxx> Signed-off-by: Ard Biesheuvel <ardb@xxxxxxxxxx> --- arch/arm/include/asm/memory.h | 43 +++++++++++---- arch/arm/kernel/head.S | 57 +++++++++++++------- 2 files changed, 69 insertions(+), 31 deletions(-) diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index 4121662dea5a..7184a2540816 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -183,6 +183,7 @@ extern const void *__pv_table_begin, *__pv_table_end; #define PHYS_OFFSET ((phys_addr_t)__pv_phys_pfn_offset << PAGE_SHIFT) #define PHYS_PFN_OFFSET (__pv_phys_pfn_offset) +#ifndef CONFIG_THUMB2_KERNEL #define __pv_stub(from,to,instr) \ __asm__("@ __pv_stub\n" \ "1: " instr " %0, %1, %2\n" \ @@ -192,25 +193,46 @@ extern const void *__pv_table_begin, *__pv_table_end; : "=r" (to) \ : "r" (from), "I" (__PV_BITS_31_24)) -#define __pv_stub_mov_hi(t) \ - __asm__ volatile("@ __pv_stub_mov\n" \ - "1: mov %R0, %1\n" \ +#define __pv_add_carry_stub(x, y) \ + __asm__ volatile("@ __pv_add_carry_stub\n" \ + "0: movw %R0, %2\n" \ + "1: adds %Q0, %1, %R0, lsl #24\n" \ + "2: mov %R0, %3\n" \ + " adc %R0, %R0, #0\n" \ " .pushsection .pv_table,\"a\"\n" \ - " .long 1b - .\n" \ + " .long 0b - ., 1b - ., 2b - .\n" \ " .popsection\n" \ - : "=r" (t) \ - : "I" (__PV_BITS_7_0)) + : "=&r" (y) \ + : "r" (x), "j" (0), "I" (__PV_BITS_7_0) \ + : "cc") + +#else +#define __pv_stub(from,to,instr) \ + __asm__("@ __pv_stub\n" \ + "0: movw %0, %2\n" \ + " lsls %0, #24\n" \ + " " instr "s %0, %1, %0\n" \ + " .pushsection .pv_table,\"a\"\n" \ + " .long 0b - .\n" \ + " .popsection\n" \ + : "=&l" (to) \ + : "l" (from), "j" (0) \ + : "cc") #define __pv_add_carry_stub(x, y) \ __asm__ volatile("@ __pv_add_carry_stub\n" \ - "1: adds %Q0, %1, %2\n" \ + "0: movw %R0, %2\n" \ + " lsls %R0, #24\n" \ + " adds %Q0, %1, %R0\n" \ + "1: mvn %R0, #0\n" \ " adc %R0, %R0, #0\n" \ " .pushsection .pv_table,\"a\"\n" \ - " .long 1b - .\n" \ + " .long 0b - ., 1b - .\n" \ " .popsection\n" \ - : "+r" (y) \ - : "r" (x), "I" (__PV_BITS_31_24) \ + : "=&l" (y) \ + : "l" (x), "j" (0) \ : "cc") +#endif static inline phys_addr_t __virt_to_phys_nodebug(unsigned long x) { @@ -219,7 +241,6 @@ static inline phys_addr_t __virt_to_phys_nodebug(unsigned long x) if (sizeof(phys_addr_t) == 4) { __pv_stub(x, t, "add"); } else { - __pv_stub_mov_hi(t); __pv_add_carry_stub(x, t); } return t; diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index d2bd3b258386..86cea608a5ea 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -639,43 +639,45 @@ __fixup_a_pv_table: mov r6, r6, lsr #24 cmn r0, #1 #ifdef CONFIG_THUMB2_KERNEL - moveq r0, #0x200000 @ set bit 21, mov to mvn instruction - lsls r6, #24 - beq .Lnext - clz r7, r6 - lsr r6, #24 - lsl r6, r7 - bic r6, #0x0080 - lsrs r7, #1 - orrcs r6, #0x0080 - orr r6, r6, r7, lsl #12 - orr r6, #0x4000 + moveq r0, #0x200 @ bit 9, ADD to SUB instruction (T1 encoding) b .Lnext .Lloop: add r7, r4 add r4, #4 +#ifdef CONFIG_ARM_LPAE + ldrh ip, [r7] +ARM_BE8(rev16 ip, ip) + tst ip, #0x200 @ MOVW has bit 9 set, MVN has it clear + bne 0f @ skip if MOVW + tst r0, #0x200 @ need to convert MVN to MOV ? + bne .Lnext + eor ip, ip, #0x20 @ flick bit #5 +ARM_BE8(rev16 ip, ip) + strh ip, [r7] + b .Lnext +0: +#endif ldrh ip, [r7, #2] ARM_BE8(rev16 ip, ip) - tst ip, #0x4000 - and ip, #0x8f00 - orrne ip, r6 @ mask in offset bits 31-24 - orreq ip, r0 @ mask in offset bits 7-0 + orr ip, r6 @ mask in offset bits 31-24 ARM_BE8(rev16 ip, ip) strh ip, [r7, #2] - bne .Lnext - ldrh ip, [r7] + ldrh ip, [r7, #6] ARM_BE8(rev16 ip, ip) - bic ip, #0x20 - orr ip, ip, r0, lsr #16 + eor ip, ip, r0 ARM_BE8(rev16 ip, ip) - strh ip, [r7] + strh ip, [r7, #6] #else #ifdef CONFIG_CPU_ENDIAN_BE8 @ in BE8, we load data in BE, but instructions still in LE +#define PV_BIT20 0x00001000 #define PV_BIT22 0x00004000 +#define PV_BIT23_22 0x0000c000 #define PV_IMM8_MASK 0xff000000 #define PV_ROT_MASK 0x000f0000 #else +#define PV_BIT20 0x00100000 #define PV_BIT22 0x00400000 +#define PV_BIT23_22 0x00c00000 #define PV_IMM8_MASK 0x000000ff #define PV_ROT_MASK 0xf00 #endif @@ -683,11 +685,26 @@ ARM_BE8(rev16 ip, ip) moveq r0, #PV_BIT22 @ set bit 22, mov to mvn instruction b .Lnext .Lloop: ldr ip, [r7, r4] +#ifdef CONFIG_ARM_LPAE + tst ip, #PV_BIT23_22 @ MOVW has bit 23:22 clear, MOV/ADD/SUB have it set +ARM_BE8(rev ip, ip) + orreq ip, ip, r6 +ARM_BE8(rev ip, ip) + beq 2f + tst ip, #PV_BIT20 @ ADDS has bit 20 set + beq 1f + tst r0, #PV_BIT22 @ check whether to invert bits 23:22 (ADD -> SUB) + beq .Lnext + eor ip, ip, #PV_BIT23_22 + b 2f +1: +#endif bic ip, ip, #PV_IMM8_MASK tst ip, #PV_ROT_MASK @ check the rotation field orrne ip, ip, r6 ARM_BE8(, lsl #24) @ mask in offset bits 31-24 biceq ip, ip, #PV_BIT22 @ clear bit 22 orreq ip, ip, r0 @ mask in offset bits 7-0 +2: str ip, [r7, r4] add r4, r4, #4 #endif -- 2.17.1