2015-01-30 16:23+0100, Paolo Bonzini: > On 30/01/2015 16:14, Radim Krčmář wrote: > > > > + case KVM_APIC_MODE_XAPIC_FLAT: > > > > + *cid = 0; > > > > + *lid = ldr & 0xff; > > > > + return true; > > > > + case KVM_APIC_MODE_XAPIC_CLUSTER: > > > > + *cid = (ldr >> 4) & 0xf; > > > > + *lid = ldr & 0xf; > > > > + return true; > > > > + case KVM_APIC_MODE_X2APIC: > > > > + *cid = ldr >> 16; > > > > + *lid = ldr & 0xffff; > > > > + return true; > > > > + } > > > >> > lid_bits = mode; > >> > cid_bits = mode & (16 | 4); > >> > lid_mask = (1 << lid_bits) - 1; > >> > cid_mask = (1 << cid_bits) - 1; > >> > > >> > *cid = (ldr >> lid_bits) & cid_mask; > >> > *lid = ldr & lid_mask; > > Would jump predictor fail on the switch? Or is size of the code that > > important? This code is shorter, but is going to execute far more > > operations, so I think it would be slower ... (And harder to read.) > > Considering the additional comparisons for the switch, I don't think > it's going to execute far more operations... (A quick assembly comparison [1].) As optimizations go, we could drop the "&" on cid as "cid < 16" is checked later, so mode=4 practically does nothing ... Not the best for future bugs, but still pretty safe -- only x2APIC can set a value that would require the "&" and it can't have valid XAPIC mode, so u32 still protects us enough. *cid = ldr >> mode; *lid = ldr & ((1 << mode) - 1); Which is probably faster a solution where we don't shuffle switch cases to jump to x2APIC first. A comparison is at the very bottom [2]. Would that be ok in v2? --- 1: To make it easier, it wasn't inlined, sorry. There are four parts, first one compares the whole functions and three subsequent compare each switch case, with common head/tail omitted. (We don't care about performance when the map is invalid.) The optimized version is always first and next to it is the old one. 0000000000001700 <apic_logical_id>: (ALI from now on) 1700: callq 1705 <ALI+0x5> 1700: callq 1705 <ALI+0x5> 1705: push %rbp 1705: movzbl 0x10(%rdi),%eax 1706: movzbl 0x10(%rdi),%r8d 1709: push %rbp 170b: mov %rcx,%r10 170a: mov %rsp,%rbp 170e: xor %eax,%eax 170d: cmp $0x8,%al 1710: mov %rsp,%rbp 170f: je 1758 <ALI+0x58> 1713: lea -0x1(%r8),%ecx 1711: cmp $0x10,%al 1717: test %r8d,%ecx 1713: je 1740 <ALI+0x40> 171a: jne 1752 <ALI+0x52> 1715: cmp $0x4,%al 171c: mov %r8d,%edi 1717: je 1720 <ALI+0x20> 171f: mov $0x1,%eax 1719: xor %eax,%eax 1724: mov %esi,%r8d 171b: pop %rbp 1727: mov %edi,%ecx 171c: retq 1729: mov %eax,%r9d 171d: nopl (%rax) 172c: shr %cl,%r8d 1720: mov %esi,%eax 172f: and $0x14,%ecx 1722: and $0xf,%esi 1732: shl %cl,%r9d 1725: shr $0x4,%eax 1735: mov %edi,%ecx 1728: and $0xf,%eax 1737: shl %cl,%eax 172b: mov %ax,(%rdx) 1739: sub $0x1,%r9d 172e: mov $0x1,%eax 173d: sub $0x1,%eax 1733: mov %si,(%rcx) 1740: and %r9d,%r8d 1736: pop %rbp 1743: and %esi,%eax 1737: retq 1745: mov %r8w,(%rdx) 1738: nopl 0x0(%rax,%rax,1) 1749: mov %ax,(%r10) 173f: 174d: mov $0x1,%eax 1740: mov %esi,%eax 1752: pop %rbp 1742: shr $0x10,%eax 1753: retq 1745: mov %ax,(%rdx) 1748: mov $0x1,%eax 174d: mov %si,(%rcx) 1750: pop %rbp 1751: retq 1752: nopw 0x0(%rax,%rax,1) 1758: xor %eax,%eax 175a: and $0xff,%si 175f: mov %ax,(%rdx) 1762: mov $0x1,%eax 1767: mov %si,(%rcx) 176a: pop %rbp 176b: retq 170b: mov %rcx,%r10 170a: mov %rsp,%rbp 170e: xor %eax,%eax 170d: cmp $0x8,%al 1710: mov %rsp,%rbp 170f: je 1758 <ALI+0x58> 1713: lea -0x1(%r8),%ecx [jump] 1717: test %r8d,%ecx 1758: xor %eax,%eax 171a: jne 1752 <ALI+0x52> 175a: and $0xff,%si 171c: mov %r8d,%edi 175f: mov %ax,(%rdx) 171f: mov $0x1,%eax 1762: mov $0x1,%eax 1724: mov %esi,%r8d 1767: mov %si,(%rcx) 1727: mov %edi,%ecx 1729: mov %eax,%r9d 172c: shr %cl,%r8d 172f: and $0x14,%ecx 1732: shl %cl,%r9d 1735: mov %edi,%ecx 1737: shl %cl,%eax 1739: sub $0x1,%r9d 173d: sub $0x1,%eax 1740: and %r9d,%r8d 1743: and %esi,%eax 1745: mov %r8w,(%rdx) 1749: mov %ax,(%r10) 174d: mov $0x1,%eax 170b: mov %rcx,%r10 170a: mov %rsp,%rbp 170e: xor %eax,%eax 170d: cmp $0x8,%al 1710: mov %rsp,%rbp 170f: je 1758 <ALI+0x58> 1713: lea -0x1(%r8),%ecx 1711: cmp $0x10,%al 1717: test %r8d,%ecx 1713: je 1740 <ALI+0x40> 171a: jne 1752 <ALI+0x52> [jump] 171c: mov %r8d,%edi 1740: mov %esi,%eax 171f: mov $0x1,%eax 1742: shr $0x10,%eax 1724: mov %esi,%r8d 1745: mov %ax,(%rdx) 1727: mov %edi,%ecx 1748: mov $0x1,%eax 1729: mov %eax,%r9d 174d: mov %si,(%rcx) 172c: shr %cl,%r8d 172f: and $0x14,%ecx 1732: shl %cl,%r9d 1735: mov %edi,%ecx 1737: shl %cl,%eax 1739: sub $0x1,%r9d 173d: sub $0x1,%eax 1740: and %r9d,%r8d 1743: and %esi,%eax 1745: mov %r8w,(%rdx) 1749: mov %ax,(%r10) 174d: mov $0x1,%eax 170b: mov %rcx,%r10 170a: mov %rsp,%rbp 170e: xor %eax,%eax 170d: cmp $0x8,%al 1710: mov %rsp,%rbp 170f: je 1758 <ALI+0x58> 1713: lea -0x1(%r8),%ecx 1711: cmp $0x10,%al 1717: test %r8d,%ecx 1713: je 1740 <ALI+0x40> 171a: jne 1752 <ALI+0x52> 1715: cmp $0x4,%al 171c: mov %r8d,%edi 1717: je 1720 <ALI+0x20> 171f: mov $0x1,%eax [jump] 1724: mov %esi,%r8d 1720: mov %esi,%eax 1727: mov %edi,%ecx 1722: and $0xf,%esi 1729: mov %eax,%r9d 1725: shr $0x4,%eax 172c: shr %cl,%r8d 1728: and $0xf,%eax 172f: and $0x14,%ecx 172b: mov %ax,(%rdx) 1732: shl %cl,%r9d 172e: mov $0x1,%eax 1735: mov %edi,%ecx 1733: mov %si,(%rcx) 1737: shl %cl,%eax 1739: sub $0x1,%r9d 173d: sub $0x1,%eax 1740: and %r9d,%r8d 1743: and %esi,%eax 1745: mov %r8w,(%rdx) 1749: mov %ax,(%r10) 174d: mov $0x1,%eax 2: A comparison of aggressively optimized ALI with the worst case scenario, where x2APIC is the third jump. 16e5: mov %rcx,%r9 170a: mov %rsp,%rbp 16ed: xor %eax,%eax 170d: cmp $0x8,%al 16ef: mov %rsp,%rbp 170f: je 1758 <ALI+0x58> 16f2: lea -0x1(%rcx),%r8d 1711: cmp $0x10,%al 16f6: test %ecx,%r8d 1713: je 1740 <ALI+0x40> 16f9: jne 1717 <ALI+0x37> 1715: cmp $0x4,%al 16fb: mov %esi,%eax 1717: je 1720 <ALI+0x20> 16fd: shr %cl,%eax [jump] 16ff: mov %ax,(%rdx) 1720: mov %esi,%eax 1702: mov $0x1,%eax 1722: and $0xf,%esi 1707: shl %cl,%eax 1725: shr $0x4,%eax 1709: sub $0x1,%eax 1728: and $0xf,%eax 170c: and %esi,%eax 172b: mov %ax,(%rdx) 170e: mov %ax,(%r9) 172e: mov $0x1,%eax 1712: mov $0x1,%eax 1733: mov %si,(%rcx) -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html