On Fri, May 29, 2020 at 04:53:25PM +0200, Peter Zijlstra wrote: > On Fri, May 29, 2020 at 04:35:56PM +0200, Peter Zijlstra wrote: > *groan*, this is one of those CONFIG_PROFILE_ALL_BRANCHES builds. If I > disable that it goes away. > > Still trying to untangle the mess it generated, but on first go it > looks like objtool is right, but I'm not sure what went wrong. $ tools/objtool/objtool check -fab arch/x86/lib/csum-wrappers_64.o arch/x86/lib/csum-wrappers_64.o: warning: objtool: csum_and_copy_from_user()+0x29f: call to memset() with UACCESS enabled arch/x86/lib/csum-wrappers_64.o: warning: objtool: csum_and_copy_from_user()+0x283: (branch) arch/x86/lib/csum-wrappers_64.o: warning: objtool: csum_and_copy_from_user()+0x113: (branch) arch/x86/lib/csum-wrappers_64.o: warning: objtool: .altinstr_replacement+0xffffffffffffffff: (branch) arch/x86/lib/csum-wrappers_64.o: warning: objtool: csum_and_copy_from_user()+0xea: (alt) arch/x86/lib/csum-wrappers_64.o: warning: objtool: .altinstr_replacement+0xffffffffffffffff: (branch) arch/x86/lib/csum-wrappers_64.o: warning: objtool: csum_and_copy_from_user()+0xe7: (alt) arch/x86/lib/csum-wrappers_64.o: warning: objtool: csum_and_copy_from_user()+0xd2: (branch) arch/x86/lib/csum-wrappers_64.o: warning: objtool: csum_and_copy_from_user()+0x7e: (branch) arch/x86/lib/csum-wrappers_64.o: warning: objtool: csum_and_copy_from_user()+0x43: (branch) arch/x86/lib/csum-wrappers_64.o: warning: objtool: csum_and_copy_from_user()+0x0: <=== (sym) The problem is with the +0x113 branch, which is at 0x1d1. That looks to be: if (!likely(user_access_begin(src, len))) goto out_err; Except that the brach profiling stuff confused GCC enough to leak STAC into the error path or something. Reproduces for me with gcc-9 and gcc-10. $ objdump -drS arch/x86/lib/csum-wrappers_64.o 00000000000000be <csum_and_copy_from_user>: { be: e8 00 00 00 00 callq c3 <csum_and_copy_from_user+0x5> bf: R_X86_64_PLT32 __fentry__-0x4 c3: 41 57 push %r15 c5: 41 56 push %r14 c7: 41 89 d6 mov %edx,%r14d ca: 41 55 push %r13 cc: 49 89 f5 mov %rsi,%r13 cf: 41 54 push %r12 d1: 41 89 cc mov %ecx,%r12d d4: 55 push %rbp d5: 48 89 fd mov %rdi,%rbp d8: 53 push %rbx d9: 4c 89 c3 mov %r8,%rbx dc: 41 51 push %r9 might_sleep(); de: e8 00 00 00 00 callq e3 <csum_and_copy_from_user+0x25> df: R_X86_64_PLT32 _cond_resched-0x4 *errp = 0; e3: 48 89 da mov %rbx,%rdx e6: b8 ff ff 37 00 mov $0x37ffff,%eax eb: 48 c1 e0 2a shl $0x2a,%rax ef: 48 c1 ea 03 shr $0x3,%rdx f3: 8a 14 02 mov (%rdx,%rax,1),%dl f6: 48 89 d8 mov %rbx,%rax f9: 83 e0 07 and $0x7,%eax fc: 83 c0 03 add $0x3,%eax ff: 38 d0 cmp %dl,%al 101: 7c 0c jl 10f <csum_and_copy_from_user+0x51> 103: 84 d2 test %dl,%dl 105: 74 08 je 10f <csum_and_copy_from_user+0x51> 107: 48 89 df mov %rbx,%rdi 10a: e8 00 00 00 00 callq 10f <csum_and_copy_from_user+0x51> 10b: R_X86_64_PLT32 __asan_report_store4_noabort-0x4 DECLARE_PER_CPU(struct task_struct *, current_task); static __always_inline struct task_struct *get_current(void) { return this_cpu_read_stable(current_task); 10f: 65 4c 8b 3c 25 00 00 mov %gs:0x0,%r15 116: 00 00 114: R_X86_64_32S current_task * checking before using them, but you have to surround them with the * user_access_begin/end() pair. */ static __must_check __always_inline bool user_access_begin(const void __user *ptr, size_t len) { if (unlikely(!access_ok(ptr,len))) 118: 49 8d bf 10 0a 00 00 lea 0xa10(%r15),%rdi 11f: b8 ff ff 37 00 mov $0x37ffff,%eax 124: c7 03 00 00 00 00 movl $0x0,(%rbx) if (!likely(user_access_begin(src, len))) 12a: 49 63 f6 movslq %r14d,%rsi 12d: 48 89 fa mov %rdi,%rdx 130: 48 c1 e0 2a shl $0x2a,%rax 134: 48 c1 ea 03 shr $0x3,%rdx 138: 80 3c 02 00 cmpb $0x0,(%rdx,%rax,1) 13c: 74 0d je 14b <csum_and_copy_from_user+0x8d> 13e: 48 89 34 24 mov %rsi,(%rsp) 142: e8 00 00 00 00 callq 147 <csum_and_copy_from_user+0x89> 143: R_X86_64_PLT32 __asan_report_load8_noabort-0x4 147: 48 8b 34 24 mov (%rsp),%rsi 14b: 49 8b 97 10 0a 00 00 mov 0xa10(%r15),%rdx 152: 48 89 ef mov %rbp,%rdi 155: e8 a6 fe ff ff callq 0 <__chk_range_not_ok> 15a: 31 c9 xor %ecx,%ecx 15c: ba 01 00 00 00 mov $0x1,%edx 161: 48 c7 c7 00 00 00 00 mov $0x0,%rdi 164: R_X86_64_32S _ftrace_annotated_branch+0x120 168: 89 c6 mov %eax,%esi 16a: 41 89 c7 mov %eax,%r15d 16d: 83 f6 01 xor $0x1,%esi 170: 40 0f b6 f6 movzbl %sil,%esi 174: e8 00 00 00 00 callq 179 <csum_and_copy_from_user+0xbb> 175: R_X86_64_PLT32 ftrace_likely_update-0x4 179: 31 c9 xor %ecx,%ecx 17b: 31 d2 xor %edx,%edx 17d: 41 0f b6 f7 movzbl %r15b,%esi 181: 48 c7 c7 00 00 00 00 mov $0x0,%rdi 184: R_X86_64_32S _ftrace_annotated_branch+0x150 188: e8 00 00 00 00 callq 18d <csum_and_copy_from_user+0xcf> 189: R_X86_64_PLT32 ftrace_likely_update-0x4 18d: 45 84 ff test %r15b,%r15b 190: 74 0c je 19e <csum_and_copy_from_user+0xe0> 192: 48 ff 05 00 00 00 00 incq 0x0(%rip) # 199 <csum_and_copy_from_user+0xdb> 195: R_X86_64_PC32 _ftrace_branch+0x10c return 0; 199: 45 31 ff xor %r15d,%r15d 19c: eb 10 jmp 1ae <csum_and_copy_from_user+0xf0> if (unlikely(!access_ok(ptr,len))) 19e: 48 ff 05 00 00 00 00 incq 0x0(%rip) # 1a5 <csum_and_copy_from_user+0xe7> 1a1: R_X86_64_PC32 _ftrace_branch+0x104 } static __always_inline void stac(void) { /* Note: a barrier is implicit in alternative() */ alternative("", __ASM_STAC, X86_FEATURE_SMAP); 1a5: 90 nop 1a6: 90 nop 1a7: 90 nop __uaccess_begin_nospec(); 1a8: 90 nop 1a9: 90 nop 1aa: 90 nop return 1; 1ab: 41 b7 01 mov $0x1,%r15b 1ae: 31 c9 xor %ecx,%ecx 1b0: 41 0f b6 f7 movzbl %r15b,%esi 1b4: ba 01 00 00 00 mov $0x1,%edx 1b9: 48 c7 c7 00 00 00 00 mov $0x0,%rdi 1bc: R_X86_64_32S _ftrace_annotated_branch+0xf0 1c0: e8 00 00 00 00 callq 1c5 <csum_and_copy_from_user+0x107> 1c1: R_X86_64_PLT32 ftrace_likely_update-0x4 1c5: 45 84 ff test %r15b,%r15b 1c8: 75 0c jne 1d6 <csum_and_copy_from_user+0x118> 1ca: 48 ff 05 00 00 00 00 incq 0x0(%rip) # 1d1 <csum_and_copy_from_user+0x113> 1cd: R_X86_64_PC32 _ftrace_branch+0xe4 goto out_err; 1d1: e9 4d 01 00 00 jmpq 323 <csum_and_copy_from_user+0x265> if (unlikely((unsigned long)src & 6)) { 1d6: 49 89 ef mov %rbp,%r15 1d9: 31 f6 xor %esi,%esi if (!likely(user_access_begin(src, len))) 1db: 48 ff 05 00 00 00 00 incq 0x0(%rip) # 1e2 <csum_and_copy_from_user+0x124> 1de: R_X86_64_PC32 _ftrace_branch+0xdc if (unlikely((unsigned long)src & 6)) { 1e2: 48 c7 c7 00 00 00 00 mov $0x0,%rdi 1e5: R_X86_64_32S _ftrace_annotated_branch+0xc0 1e9: 41 83 e7 06 and $0x6,%r15d 1ed: 40 0f 95 c6 setne %sil 1f1: 31 c9 xor %ecx,%ecx 1f3: 31 d2 xor %edx,%edx 1f5: e8 00 00 00 00 callq 1fa <csum_and_copy_from_user+0x13c> 1f6: R_X86_64_PLT32 ftrace_likely_update-0x4 1fa: 4d 85 ff test %r15,%r15 1fd: 74 09 je 208 <csum_and_copy_from_user+0x14a> 1ff: 48 ff 05 00 00 00 00 incq 0x0(%rip) # 206 <csum_and_copy_from_user+0x148> 202: R_X86_64_PC32 _ftrace_branch+0xbc 206: eb 4e jmp 256 <csum_and_copy_from_user+0x198> 208: 48 ff 05 00 00 00 00 incq 0x0(%rip) # 20f <csum_and_copy_from_user+0x151> 20b: R_X86_64_PC32 _ftrace_branch+0xb4 20f: e9 96 00 00 00 jmpq 2aa <csum_and_copy_from_user+0x1ec> *(__u16 *)dst = val16; 214: 4c 89 e8 mov %r13,%rax 217: b9 ff ff 37 00 mov $0x37ffff,%ecx unsafe_get_user(val16, (const __u16 __user *)src, out); 21c: 48 ff 05 00 00 00 00 incq 0x0(%rip) # 223 <csum_and_copy_from_user+0x165> 21f: R_X86_64_PC32 _ftrace_branch+0x8c *(__u16 *)dst = val16; 223: 48 c1 e8 03 shr $0x3,%rax 227: 48 c1 e1 2a shl $0x2a,%rcx 22b: 8a 14 08 mov (%rax,%rcx,1),%dl 22e: 4c 89 e8 mov %r13,%rax 231: 83 e0 07 and $0x7,%eax 234: ff c0 inc %eax 236: 38 d0 cmp %dl,%al 238: 7d 62 jge 29c <csum_and_copy_from_user+0x1de> 23a: 66 45 89 7d 00 mov %r15w,0x0(%r13) src += 2; 23f: 48 83 c5 02 add $0x2,%rbp dst += 2; 243: 49 83 c5 02 add $0x2,%r13 len -= 2; 247: 41 83 ee 02 sub $0x2,%r14d isum = (__force __wsum)add32_with_carry( 24b: 45 0f b7 ff movzwl %r15w,%r15d asm("addl %2,%0\n\t" 24f: 45 01 fc add %r15d,%r12d 252: 41 83 d4 00 adc $0x0,%r12d while (((unsigned long)src & 6) && len >= 2) { 256: 40 f6 c5 06 test $0x6,%bpl 25a: 74 4e je 2aa <csum_and_copy_from_user+0x1ec> 25c: 41 83 fe 01 cmp $0x1,%r14d 260: 7e 48 jle 2aa <csum_and_copy_from_user+0x1ec> unsafe_get_user(val16, (const __u16 __user *)src, out); 262: 31 c0 xor %eax,%eax 264: 66 44 8b 7d 00 mov 0x0(%rbp),%r15w 269: 85 c0 test %eax,%eax 26b: 48 c7 c7 00 00 00 00 mov $0x0,%rdi 26e: R_X86_64_32S _ftrace_annotated_branch+0x90 272: 89 04 24 mov %eax,(%rsp) 275: 40 0f 95 c6 setne %sil 279: 31 c9 xor %ecx,%ecx 27b: 31 d2 xor %edx,%edx 27d: 40 0f b6 f6 movzbl %sil,%esi 281: e8 00 00 00 00 callq 286 <csum_and_copy_from_user+0x1c8> 282: R_X86_64_PLT32 ftrace_likely_update-0x4 286: 8b 04 24 mov (%rsp),%eax 289: 85 c0 test %eax,%eax 28b: 74 87 je 214 <csum_and_copy_from_user+0x156> 28d: 48 ff 05 00 00 00 00 incq 0x0(%rip) # 294 <csum_and_copy_from_user+0x1d6> 290: R_X86_64_PC32 _ftrace_branch+0x94 alternative("", __ASM_CLAC, X86_FEATURE_SMAP); 294: 90 nop 295: 90 nop 296: 90 nop } 297: e9 87 00 00 00 jmpq 323 <csum_and_copy_from_user+0x265> *(__u16 *)dst = val16; 29c: 84 d2 test %dl,%dl 29e: 74 9a je 23a <csum_and_copy_from_user+0x17c> 2a0: 4c 89 ef mov %r13,%rdi 2a3: e8 00 00 00 00 callq 2a8 <csum_and_copy_from_user+0x1ea> 2a4: R_X86_64_PLT32 __asan_report_store2_noabort-0x4 2a8: eb 90 jmp 23a <csum_and_copy_from_user+0x17c> isum = csum_partial_copy_generic((__force const void *)src, 2aa: 44 89 e1 mov %r12d,%ecx 2ad: 45 31 c9 xor %r9d,%r9d 2b0: 49 89 d8 mov %rbx,%r8 2b3: 44 89 f2 mov %r14d,%edx 2b6: 4c 89 ee mov %r13,%rsi 2b9: 48 89 ef mov %rbp,%rdi 2bc: e8 00 00 00 00 callq 2c1 <csum_and_copy_from_user+0x203> 2bd: R_X86_64_PLT32 csum_partial_copy_generic-0x4 2c1: 41 89 c4 mov %eax,%r12d alternative("", __ASM_CLAC, X86_FEATURE_SMAP); 2c4: 90 nop 2c5: 90 nop 2c6: 90 nop if (unlikely(*errp)) 2c7: b8 ff ff 37 00 mov $0x37ffff,%eax 2cc: 48 89 da mov %rbx,%rdx 2cf: 48 c1 e0 2a shl $0x2a,%rax 2d3: 48 c1 ea 03 shr $0x3,%rdx 2d7: 8a 14 02 mov (%rdx,%rax,1),%dl 2da: 48 89 d8 mov %rbx,%rax 2dd: 83 e0 07 and $0x7,%eax 2e0: 83 c0 03 add $0x3,%eax 2e3: 38 d0 cmp %dl,%al 2e5: 7c 0c jl 2f3 <csum_and_copy_from_user+0x235> 2e7: 84 d2 test %dl,%dl 2e9: 74 08 je 2f3 <csum_and_copy_from_user+0x235> 2eb: 48 89 df mov %rbx,%rdi 2ee: e8 00 00 00 00 callq 2f3 <csum_and_copy_from_user+0x235> 2ef: R_X86_64_PLT32 __asan_report_load4_noabort-0x4 2f3: 8b 2b mov (%rbx),%ebp 2f5: 31 f6 xor %esi,%esi 2f7: 48 c7 c7 00 00 00 00 mov $0x0,%rdi 2fa: R_X86_64_32S _ftrace_annotated_branch+0x60 2fe: 85 ed test %ebp,%ebp 300: 40 0f 95 c6 setne %sil 304: 31 c9 xor %ecx,%ecx 306: 31 d2 xor %edx,%edx 308: e8 00 00 00 00 callq 30d <csum_and_copy_from_user+0x24f> 309: R_X86_64_PLT32 ftrace_likely_update-0x4 30d: 85 ed test %ebp,%ebp 30f: 74 09 je 31a <csum_and_copy_from_user+0x25c> 311: 48 ff 05 00 00 00 00 incq 0x0(%rip) # 318 <csum_and_copy_from_user+0x25a> 314: R_X86_64_PC32 _ftrace_branch+0x6c goto out_err; 318: eb 09 jmp 323 <csum_and_copy_from_user+0x265> if (unlikely(*errp)) 31a: 48 ff 05 00 00 00 00 incq 0x0(%rip) # 321 <csum_and_copy_from_user+0x263> 31d: R_X86_64_PC32 _ftrace_branch+0x64 321: eb 3f jmp 362 <csum_and_copy_from_user+0x2a4> *errp = -EFAULT; 323: 48 89 da mov %rbx,%rdx 326: b8 ff ff 37 00 mov $0x37ffff,%eax 32b: 48 c1 e0 2a shl $0x2a,%rax 32f: 48 c1 ea 03 shr $0x3,%rdx 333: 8a 14 02 mov (%rdx,%rax,1),%dl 336: 48 89 d8 mov %rbx,%rax 339: 83 e0 07 and $0x7,%eax 33c: 83 c0 03 add $0x3,%eax 33f: 38 d0 cmp %dl,%al 341: 7c 0c jl 34f <csum_and_copy_from_user+0x291> 343: 84 d2 test %dl,%dl 345: 74 08 je 34f <csum_and_copy_from_user+0x291> 347: 48 89 df mov %rbx,%rdi 34a: e8 00 00 00 00 callq 34f <csum_and_copy_from_user+0x291> 34b: R_X86_64_PLT32 __asan_report_store4_noabort-0x4 34f: c7 03 f2 ff ff ff movl $0xfffffff2,(%rbx) memset(dst, 0, len); 355: 49 63 d6 movslq %r14d,%rdx 358: 31 f6 xor %esi,%esi 35a: 4c 89 ef mov %r13,%rdi 35d: e8 00 00 00 00 callq 362 <csum_and_copy_from_user+0x2a4> 35e: R_X86_64_PLT32 memset-0x4 } 362: 5a pop %rdx 363: 44 89 e0 mov %r12d,%eax 366: 5b pop %rbx 367: 5d pop %rbp 368: 41 5c pop %r12 36a: 41 5d pop %r13 36c: 41 5e pop %r14 36e: 41 5f pop %r15 370: c3 retq