Hi Everyone, I'm trying to use _addcarryx_u64. GCC 7.1 is still generating ADD and ADC instead of ADCX or ADOX: $ objdump --disassemble test.o | c++filt | egrep 'adc|adcx|adox' a4: 48 11 c8 adc %rcx,%rax 10f: 48 11 c8 adc %rcx,%rax 17a: 48 11 c8 adc %rcx,%rax 1e5: 48 11 c8 adc %rcx,%rax 24a: 48 11 c8 adc %rcx,%rax 29d: 48 11 c8 adc %rcx,%rax 2f0: 48 11 c8 adc %rcx,%rax 337: 48 11 c8 adc %rcx,%rax Below is the test program, the objdump and the cpu features as reported in /proc/cpuinfo. Unnecessary stuff was removed from objdump, like Print and static initalizers. The bug reporter is still hanging, so I cannot search for bug reports. It would be nice if someone fixed the web server problem, too. Its been a problem of 3 or 6 months now. Any ideas what I am doing wrong with respect to ADX? Thanks in advance **************************************** $ g++ -Wall -march=native test.cxx -c **************************************** skylake:cryptopp-fork$ objdump --disassemble test.o | c++filt test.o: file format elf64-x86-64 Disassembly of section .text: 0000000000000000 <unsigned char Add<8u>(unsigned long*, unsigned long const*, unsigned long const*)>: 0: 55 push %rbp 1: 48 89 e5 mov %rsp,%rbp 4: 48 81 ec c0 00 00 00 sub $0xc0,%rsp b: 48 89 bd d8 fe ff ff mov %rdi,-0x128(%rbp) 12: 48 89 b5 d0 fe ff ff mov %rsi,-0x130(%rbp) 19: 48 89 95 c8 fe ff ff mov %rdx,-0x138(%rbp) 20: 48 8b 85 d0 fe ff ff mov -0x130(%rbp),%rax 27: 48 89 45 f8 mov %rax,-0x8(%rbp) 2b: 48 8b 85 c8 fe ff ff mov -0x138(%rbp),%rax 32: 48 89 45 f0 mov %rax,-0x10(%rbp) 36: 48 8b 85 d8 fe ff ff mov -0x128(%rbp),%rax 3d: 48 89 45 e8 mov %rax,-0x18(%rbp) 41: c6 45 e7 00 movb $0x0,-0x19(%rbp) 45: 48 8b 45 e8 mov -0x18(%rbp),%rax 49: 48 8d 70 38 lea 0x38(%rax),%rsi 4d: 48 8b 45 f0 mov -0x10(%rbp),%rax 51: 48 83 c0 38 add $0x38,%rax 55: 48 8b 00 mov (%rax),%rax 58: 48 8b 55 f8 mov -0x8(%rbp),%rdx 5c: 48 83 c2 38 add $0x38,%rdx 60: 48 8b 12 mov (%rdx),%rdx 63: 0f b6 4d e7 movzbl -0x19(%rbp),%ecx 67: 88 8d 07 ff ff ff mov %cl,-0xf9(%rbp) 6d: 48 89 95 f8 fe ff ff mov %rdx,-0x108(%rbp) 74: 48 89 85 f0 fe ff ff mov %rax,-0x110(%rbp) 7b: 48 89 b5 e8 fe ff ff mov %rsi,-0x118(%rbp) 82: 0f b6 85 07 ff ff ff movzbl -0xf9(%rbp),%eax 89: 89 c6 mov %eax,%esi 8b: 48 8b 85 f8 fe ff ff mov -0x108(%rbp),%rax 92: 48 8b 8d f0 fe ff ff mov -0x110(%rbp),%rcx 99: 48 8b 95 e8 fe ff ff mov -0x118(%rbp),%rdx a0: 40 80 c6 ff add $0xff,%sil a4: 48 11 c8 adc %rcx,%rax a7: 0f 92 c1 setb %cl aa: 48 89 02 mov %rax,(%rdx) ad: 88 4d e7 mov %cl,-0x19(%rbp) b0: 48 8b 45 e8 mov -0x18(%rbp),%rax b4: 48 8d 70 30 lea 0x30(%rax),%rsi b8: 48 8b 45 f0 mov -0x10(%rbp),%rax bc: 48 83 c0 30 add $0x30,%rax c0: 48 8b 00 mov (%rax),%rax c3: 48 8b 55 f8 mov -0x8(%rbp),%rdx c7: 48 83 c2 30 add $0x30,%rdx cb: 48 8b 12 mov (%rdx),%rdx ce: 0f b6 4d e7 movzbl -0x19(%rbp),%ecx d2: 88 8d 27 ff ff ff mov %cl,-0xd9(%rbp) d8: 48 89 95 18 ff ff ff mov %rdx,-0xe8(%rbp) df: 48 89 85 10 ff ff ff mov %rax,-0xf0(%rbp) e6: 48 89 b5 08 ff ff ff mov %rsi,-0xf8(%rbp) ed: 0f b6 85 27 ff ff ff movzbl -0xd9(%rbp),%eax f4: 89 c6 mov %eax,%esi f6: 48 8b 85 18 ff ff ff mov -0xe8(%rbp),%rax fd: 48 8b 8d 10 ff ff ff mov -0xf0(%rbp),%rcx 104: 48 8b 95 08 ff ff ff mov -0xf8(%rbp),%rdx 10b: 40 80 c6 ff add $0xff,%sil 10f: 48 11 c8 adc %rcx,%rax 112: 0f 92 c1 setb %cl 115: 48 89 02 mov %rax,(%rdx) 118: 88 4d e7 mov %cl,-0x19(%rbp) 11b: 48 8b 45 e8 mov -0x18(%rbp),%rax 11f: 48 8d 70 28 lea 0x28(%rax),%rsi 123: 48 8b 45 f0 mov -0x10(%rbp),%rax 127: 48 83 c0 28 add $0x28,%rax 12b: 48 8b 00 mov (%rax),%rax 12e: 48 8b 55 f8 mov -0x8(%rbp),%rdx 132: 48 83 c2 28 add $0x28,%rdx 136: 48 8b 12 mov (%rdx),%rdx 139: 0f b6 4d e7 movzbl -0x19(%rbp),%ecx 13d: 88 8d 47 ff ff ff mov %cl,-0xb9(%rbp) 143: 48 89 95 38 ff ff ff mov %rdx,-0xc8(%rbp) 14a: 48 89 85 30 ff ff ff mov %rax,-0xd0(%rbp) 151: 48 89 b5 28 ff ff ff mov %rsi,-0xd8(%rbp) 158: 0f b6 85 47 ff ff ff movzbl -0xb9(%rbp),%eax 15f: 89 c6 mov %eax,%esi 161: 48 8b 85 38 ff ff ff mov -0xc8(%rbp),%rax 168: 48 8b 8d 30 ff ff ff mov -0xd0(%rbp),%rcx 16f: 48 8b 95 28 ff ff ff mov -0xd8(%rbp),%rdx 176: 40 80 c6 ff add $0xff,%sil 17a: 48 11 c8 adc %rcx,%rax 17d: 0f 92 c1 setb %cl 180: 48 89 02 mov %rax,(%rdx) 183: 88 4d e7 mov %cl,-0x19(%rbp) 186: 48 8b 45 e8 mov -0x18(%rbp),%rax 18a: 48 8d 70 20 lea 0x20(%rax),%rsi 18e: 48 8b 45 f0 mov -0x10(%rbp),%rax 192: 48 83 c0 20 add $0x20,%rax 196: 48 8b 00 mov (%rax),%rax 199: 48 8b 55 f8 mov -0x8(%rbp),%rdx 19d: 48 83 c2 20 add $0x20,%rdx 1a1: 48 8b 12 mov (%rdx),%rdx 1a4: 0f b6 4d e7 movzbl -0x19(%rbp),%ecx 1a8: 88 8d 67 ff ff ff mov %cl,-0x99(%rbp) 1ae: 48 89 95 58 ff ff ff mov %rdx,-0xa8(%rbp) 1b5: 48 89 85 50 ff ff ff mov %rax,-0xb0(%rbp) 1bc: 48 89 b5 48 ff ff ff mov %rsi,-0xb8(%rbp) 1c3: 0f b6 85 67 ff ff ff movzbl -0x99(%rbp),%eax 1ca: 89 c6 mov %eax,%esi 1cc: 48 8b 85 58 ff ff ff mov -0xa8(%rbp),%rax 1d3: 48 8b 8d 50 ff ff ff mov -0xb0(%rbp),%rcx 1da: 48 8b 95 48 ff ff ff mov -0xb8(%rbp),%rdx 1e1: 40 80 c6 ff add $0xff,%sil 1e5: 48 11 c8 adc %rcx,%rax 1e8: 0f 92 c1 setb %cl 1eb: 48 89 02 mov %rax,(%rdx) 1ee: 88 4d e7 mov %cl,-0x19(%rbp) 1f1: 48 8b 45 e8 mov -0x18(%rbp),%rax 1f5: 48 8d 70 18 lea 0x18(%rax),%rsi 1f9: 48 8b 45 f0 mov -0x10(%rbp),%rax 1fd: 48 83 c0 18 add $0x18,%rax 201: 48 8b 00 mov (%rax),%rax 204: 48 8b 55 f8 mov -0x8(%rbp),%rdx 208: 48 83 c2 18 add $0x18,%rdx 20c: 48 8b 12 mov (%rdx),%rdx 20f: 0f b6 4d e7 movzbl -0x19(%rbp),%ecx 213: 88 4d 87 mov %cl,-0x79(%rbp) 216: 48 89 95 78 ff ff ff mov %rdx,-0x88(%rbp) 21d: 48 89 85 70 ff ff ff mov %rax,-0x90(%rbp) 224: 48 89 b5 68 ff ff ff mov %rsi,-0x98(%rbp) 22b: 0f b6 45 87 movzbl -0x79(%rbp),%eax 22f: 89 c6 mov %eax,%esi 231: 48 8b 85 78 ff ff ff mov -0x88(%rbp),%rax 238: 48 8b 8d 70 ff ff ff mov -0x90(%rbp),%rcx 23f: 48 8b 95 68 ff ff ff mov -0x98(%rbp),%rdx 246: 40 80 c6 ff add $0xff,%sil 24a: 48 11 c8 adc %rcx,%rax 24d: 0f 92 c1 setb %cl 250: 48 89 02 mov %rax,(%rdx) 253: 88 4d e7 mov %cl,-0x19(%rbp) 256: 48 8b 45 e8 mov -0x18(%rbp),%rax 25a: 48 8d 70 10 lea 0x10(%rax),%rsi 25e: 48 8b 45 f0 mov -0x10(%rbp),%rax 262: 48 83 c0 10 add $0x10,%rax 266: 48 8b 00 mov (%rax),%rax 269: 48 8b 55 f8 mov -0x8(%rbp),%rdx 26d: 48 83 c2 10 add $0x10,%rdx 271: 48 8b 12 mov (%rdx),%rdx 274: 0f b6 4d e7 movzbl -0x19(%rbp),%ecx 278: 88 4d a7 mov %cl,-0x59(%rbp) 27b: 48 89 55 98 mov %rdx,-0x68(%rbp) 27f: 48 89 45 90 mov %rax,-0x70(%rbp) 283: 48 89 75 88 mov %rsi,-0x78(%rbp) 287: 0f b6 45 a7 movzbl -0x59(%rbp),%eax 28b: 89 c6 mov %eax,%esi 28d: 48 8b 45 98 mov -0x68(%rbp),%rax 291: 48 8b 4d 90 mov -0x70(%rbp),%rcx 295: 48 8b 55 88 mov -0x78(%rbp),%rdx 299: 40 80 c6 ff add $0xff,%sil 29d: 48 11 c8 adc %rcx,%rax 2a0: 0f 92 c1 setb %cl 2a3: 48 89 02 mov %rax,(%rdx) 2a6: 88 4d e7 mov %cl,-0x19(%rbp) 2a9: 48 8b 45 e8 mov -0x18(%rbp),%rax 2ad: 48 8d 70 08 lea 0x8(%rax),%rsi 2b1: 48 8b 45 f0 mov -0x10(%rbp),%rax 2b5: 48 83 c0 08 add $0x8,%rax 2b9: 48 8b 00 mov (%rax),%rax 2bc: 48 8b 55 f8 mov -0x8(%rbp),%rdx 2c0: 48 83 c2 08 add $0x8,%rdx 2c4: 48 8b 12 mov (%rdx),%rdx 2c7: 0f b6 4d e7 movzbl -0x19(%rbp),%ecx 2cb: 88 4d c7 mov %cl,-0x39(%rbp) 2ce: 48 89 55 b8 mov %rdx,-0x48(%rbp) 2d2: 48 89 45 b0 mov %rax,-0x50(%rbp) 2d6: 48 89 75 a8 mov %rsi,-0x58(%rbp) 2da: 0f b6 45 c7 movzbl -0x39(%rbp),%eax 2de: 89 c6 mov %eax,%esi 2e0: 48 8b 45 b8 mov -0x48(%rbp),%rax 2e4: 48 8b 4d b0 mov -0x50(%rbp),%rcx 2e8: 48 8b 55 a8 mov -0x58(%rbp),%rdx 2ec: 40 80 c6 ff add $0xff,%sil 2f0: 48 11 c8 adc %rcx,%rax 2f3: 0f 92 c1 setb %cl 2f6: 48 89 02 mov %rax,(%rdx) 2f9: 88 4d e7 mov %cl,-0x19(%rbp) 2fc: 48 8b 45 f0 mov -0x10(%rbp),%rax 300: 48 8b 00 mov (%rax),%rax 303: 48 8b 55 f8 mov -0x8(%rbp),%rdx 307: 48 8b 12 mov (%rdx),%rdx 30a: 0f b6 4d e7 movzbl -0x19(%rbp),%ecx 30e: 88 4d e6 mov %cl,-0x1a(%rbp) 311: 48 89 55 d8 mov %rdx,-0x28(%rbp) 315: 48 89 45 d0 mov %rax,-0x30(%rbp) 319: 48 8b 45 e8 mov -0x18(%rbp),%rax 31d: 48 89 45 c8 mov %rax,-0x38(%rbp) 321: 0f b6 45 e6 movzbl -0x1a(%rbp),%eax 325: 89 c6 mov %eax,%esi 327: 48 8b 45 d8 mov -0x28(%rbp),%rax 32b: 48 8b 4d d0 mov -0x30(%rbp),%rcx 32f: 48 8b 55 c8 mov -0x38(%rbp),%rdx 333: 40 80 c6 ff add $0xff,%sil 337: 48 11 c8 adc %rcx,%rax 33a: 0f 92 c1 setb %cl 33d: 48 89 02 mov %rax,(%rdx) 340: 88 4d e7 mov %cl,-0x19(%rbp) 343: 0f b6 45 e7 movzbl -0x19(%rbp),%eax 347: c9 leaveq 348: c3 retq 0000000000000349 <main>: 349: 55 push %rbp 34a: 48 89 e5 mov %rsp,%rbp 34d: 53 push %rbx 34e: 48 81 ec f8 02 00 00 sub $0x2f8,%rsp 355: 89 bd 0c fd ff ff mov %edi,-0x2f4(%rbp) 35b: 48 89 b5 00 fd ff ff mov %rsi,-0x300(%rbp) 362: 48 8d 85 10 fd ff ff lea -0x2f0(%rbp),%rax 369: 48 89 c7 mov %rax,%rdi 36c: e8 00 00 00 00 callq 371 <main+0x28> 371: 48 8d 85 10 fd ff ff lea -0x2f0(%rbp),%rax 378: ba 08 00 00 00 mov $0x8,%edx 37d: be 00 00 00 00 mov $0x0,%esi 382: 48 89 c7 mov %rax,%rdi 385: e8 00 00 00 00 callq 38a <main+0x41> 38a: 48 8d 8d 60 ff ff ff lea -0xa0(%rbp),%rcx 391: 48 8d 85 10 fd ff ff lea -0x2f0(%rbp),%rax 398: ba 40 00 00 00 mov $0x40,%edx 39d: 48 89 ce mov %rcx,%rsi 3a0: 48 89 c7 mov %rax,%rdi 3a3: e8 00 00 00 00 callq 3a8 <main+0x5f> 3a8: 48 8d 8d 20 ff ff ff lea -0xe0(%rbp),%rcx 3af: 48 8d 85 10 fd ff ff lea -0x2f0(%rbp),%rax 3b6: ba 40 00 00 00 mov $0x40,%edx 3bb: 48 89 ce mov %rcx,%rsi 3be: 48 89 c7 mov %rax,%rdi 3c1: e8 00 00 00 00 callq 3c6 <main+0x7d> 3c6: 48 8d 95 20 ff ff ff lea -0xe0(%rbp),%rdx 3cd: 48 8d 8d 60 ff ff ff lea -0xa0(%rbp),%rcx 3d4: 48 8d 45 a0 lea -0x60(%rbp),%rax 3d8: 48 89 ce mov %rcx,%rsi 3db: 48 89 c7 mov %rax,%rdi 3de: e8 00 00 00 00 callq 3e3 <main+0x9a> 3e3: 88 45 ef mov %al,-0x11(%rbp) 3e6: be 00 00 00 00 mov $0x0,%esi 3eb: bf 00 00 00 00 mov $0x0,%edi 3f0: e8 00 00 00 00 callq 3f5 <main+0xac> 3f5: 48 8d 85 60 ff ff ff lea -0xa0(%rbp),%rax 3fc: 48 89 c7 mov %rax,%rdi 3ff: e8 00 00 00 00 callq 404 <main+0xbb> 404: be 00 00 00 00 mov $0x0,%esi 409: bf 00 00 00 00 mov $0x0,%edi 40e: e8 00 00 00 00 callq 413 <main+0xca> 413: 48 8d 85 20 ff ff ff lea -0xe0(%rbp),%rax 41a: 48 89 c7 mov %rax,%rdi 41d: e8 00 00 00 00 callq 422 <main+0xd9> 422: be 00 00 00 00 mov $0x0,%esi 427: bf 00 00 00 00 mov $0x0,%edi 42c: e8 00 00 00 00 callq 431 <main+0xe8> 431: 80 7d ef 00 cmpb $0x0,-0x11(%rbp) 435: 74 0a je 441 <main+0xf8> 437: bf 31 00 00 00 mov $0x31,%edi 43c: e8 00 00 00 00 callq 441 <main+0xf8> 441: 48 8d 45 a0 lea -0x60(%rbp),%rax 445: 48 89 c7 mov %rax,%rdi 448: e8 00 00 00 00 callq 44d <main+0x104> 44d: bb 00 00 00 00 mov $0x0,%ebx 452: 48 8d 85 10 fd ff ff lea -0x2f0(%rbp),%rax 459: 48 89 c7 mov %rax,%rdi 45c: e8 00 00 00 00 callq 461 <main+0x118> 461: 89 d8 mov %ebx,%eax 463: eb 1d jmp 482 <main+0x139> 465: 48 89 c3 mov %rax,%rbx 468: 48 8d 85 10 fd ff ff lea -0x2f0(%rbp),%rax 46f: 48 89 c7 mov %rax,%rdi 472: e8 00 00 00 00 callq 477 <main+0x12e> 477: 48 89 d8 mov %rbx,%rax 47a: 48 89 c7 mov %rax,%rdi 47d: e8 00 00 00 00 callq 482 <main+0x139> 482: 48 81 c4 f8 02 00 00 add $0x2f8,%rsp 489: 5b pop %rbx 48a: 5d pop %rbp 48b: c3 retq **************************************** $ cat -n test.cxx 1 #include <iostream> 2 #include <string> 3 #include <fstream> 4 #include <cstdio> 5 #include <cstdint> 6 #include <cstring> 7 8 #include <immintrin.h> 9 #if defined(__GNUC__) 10 # include <adxintrin.h> 11 #endif 12 13 #if !defined(__ADX__) 14 # error ADX is not available 15 #endif 16 17 template<unsigned int N> 18 void Print(uint64_t vals[]) 19 { 20 for (size_t i=0; i<N; ++i) 21 { 22 printf("%08X", static_cast<uint32_t>(vals[i] >> 32)); 23 printf("%08X", static_cast<uint32_t>(vals[i])); 24 } 25 printf("\n"); 26 } 27 28 template<unsigned int N> 29 uint8_t Add(uint64_t C[], const uint64_t A[], const uint64_t B[]) 30 { 31 const ssize_t NN = static_cast<ssize_t>(N); 32 const long long unsigned int* AA = reinterpret_cast<const long long unsigned int*>(A); 33 const long long unsigned int* BB = reinterpret_cast<const long long unsigned int*>(B); 34 long long unsigned int* CC = reinterpret_cast<long long unsigned int*>(C); 35 36 uint8_t carry = 0; 37 for (ssize_t i=NN-1; i>=0; --i) 38 carry = _addcarryx_u64(carry, AA[i], BB[i], &CC[i]); 39 40 return carry; 41 } 42 43 template<> 44 uint8_t Add<8>(uint64_t C[], const uint64_t A[], const uint64_t B[]) 45 { 46 const long long unsigned int* AA = reinterpret_cast<const long long unsigned int*>(A); 47 const long long unsigned int* BB = reinterpret_cast<const long long unsigned int*>(B); 48 long long unsigned int* CC = reinterpret_cast<long long unsigned int*>(C); 49 50 uint8_t carry = 0; 51 52 carry = _addcarryx_u64(carry, AA[7], BB[7], &CC[7]); 53 carry = _addcarryx_u64(carry, AA[6], BB[6], &CC[6]); 54 carry = _addcarryx_u64(carry, AA[5], BB[5], &CC[5]); 55 carry = _addcarryx_u64(carry, AA[4], BB[4], &CC[4]); 56 carry = _addcarryx_u64(carry, AA[3], BB[3], &CC[3]); 57 carry = _addcarryx_u64(carry, AA[2], BB[2], &CC[2]); 58 carry = _addcarryx_u64(carry, AA[1], BB[1], &CC[1]); 59 carry = _addcarryx_u64(carry, AA[0], BB[0], &CC[0]); 60 61 return carry; 62 } 63 64 int main(int argc, char* argv[]) 65 { 66 enum {COUNT=8}; 67 uint64_t C[COUNT], A[COUNT], B[COUNT]; 68 69 std::ifstream in; 70 in.open("/dev/urandom"); 71 in.read(reinterpret_cast<char*>(A), sizeof(A)); 72 in.read(reinterpret_cast<char*>(B), sizeof(B)); 73 74 uint8_t carry = Add<COUNT>(C, A, B); 75 76 std::cout << "A:\n "; 77 Print<COUNT>(A); 78 79 std::cout << "B:\n "; 80 Print<COUNT>(B); 81 82 std::cout << "C:\n"; 83 if (carry) printf("1"); 84 Print<COUNT>(C); 85 86 return 0; 87 } **************************************** $ cat /proc/cpuinfo processor : 0 vendor_id : GenuineIntel cpu family : 6 model : 94 model name : Intel(R) Core(TM) i5-6400 CPU @ 2.70GHz stepping : 3 microcode : 0xba cpu MHz : 799.914 cache size : 6144 KB physical id : 0 siblings : 4 core id : 0 cpu cores : 4 apicid : 0 initial apicid : 0 fpu : yes fpu_exception : yes cpuid level : 22 wp : yes flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq dtes64 monitor ds_cpl vmx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault intel_pt tpr_shadow vnmi flexpriority ept vpid fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx rdseed adx smap clflushopt xsaveopt xsavec xgetbv1 xsaves dtherm ida arat pln pts hwp hwp_notify hwp_act_window hwp_epp bugs : bogomips : 5424.00 clflush size : 64 cache_alignment : 64 address sizes : 39 bits physical, 48 bits virtual