On Sat, 5 Dec 2020, 15:15 andre maute, <andre.maute@xxxxxx> wrote: > Hi list, > > I'm wondering if we have here a missed optimization > for a popcount implementation. > > https://en.cppreference.com/w/cpp/numeric/popcount > > std::popcount is available with C++20, which might not be able > on many systems for years. > > Let us consider the code below > > I'm running a Fedora 32 linux with > $ g++ --version > g++ (GCC) 10.2.1 20201016 (Red Hat 10.2.1-6) > Copyright (C) 2020 Free Software Foundation, Inc. > This is free software; see the source for copying conditions. There is NO > warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. > > $ g++ -O3 -save-temps popcount.cc -march=ivybridge > > popcount1() what I consider the standard implementation > does NOT get reduced into the relevant popcount instruction > > popcount2() what I consider the standard "trick" implementation > gets reduced into the relevant popcount instruction > > Is this expected behaviour? > You'll have to know the trick to get the optimization! > > Regards Andre > > /////////////// popcount.cc /////////////// > #include <iostream> > > unsigned int popcount1( unsigned int n ) > { > #if __has_builtin(__builtin_popcount) return __builtin_popcount(n); #endif unsigned int res = 0; > while( n != 0 ) { > res += n & 1; > n >>= 1; > } > return res; > } > > unsigned int popcount2( unsigned int n ) > { > unsigned int res = 0; > while( n != 0 ) { > res++; > n &= (n-1); > } > return res; > } > > int main() > { > for( unsigned int n = 0; n < (1 << 31); n++ ) { > if( n % 100000 == 0 ) { > std::cout << n << std::endl; > } > unsigned int c1 = popcount1(n); > unsigned int c2 = popcount2(n); > if( c1 != c2 ) { > std::cout << n << " " << c1 << " " << c2 << > std::endl; > return 1; > } > } > > return 0; > } > /////////////// popcount.cc /////////////// > > > > >