popcnt.c 1.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546
  1. /* vi: set sw=4 ts=4: */
  2. /*
  3. * Utility routines.
  4. *
  5. * Copyright (C) 2024 Denys Vlasenko
  6. *
  7. * Licensed under GPLv2, see file LICENSE in this source tree.
  8. */
  9. #include "libbb.h"
  10. unsigned FAST_FUNC bb_popcnt_32(uint32_t m)
  11. {
  12. /* replace each 2 bit group with the count of set bits in it */
  13. /* 00->00 01->01 10->01 11->10 */
  14. m = m - ((m >> 1) & 0x55555555);
  15. /* in each 4 bit group, add two 2-bit counts */
  16. m = (m & 0x33333333) + ((m >> 2) & 0x33333333);
  17. /* in each 8 bit group, add two 4-bit counts (in fact, 3-bit, 0nnn with n=0..4) */
  18. m = (m + (m >> 4)) & 0x0f0f0f0f;
  19. #if 1 /* assume 32*32->32 multiply is fast */
  20. m = m * 0x01010101; /* top byte = m + (m<<8) + (m<<16) + (m<<24) */
  21. return m >> 24;
  22. #else
  23. /* 0000aaaa0000bbbb0000cccc0000dddd */
  24. /* + 0000aaaa0000bbbb0000cccc */
  25. /* = 0000xxxx000_a+b_000xxxxx000_c+d_ (we don't care about x bits) */
  26. m += m >> 8; /* in each 16-bit group, lowest 5 bits is the count */
  27. /* 0000xxxx000_a+b_000xxxxx000_c+d_ */
  28. /* + 0000xxxx000_a+b_ */
  29. /* = 0000xxxx000xxxxx00xxxxxx00a+b+cd */
  30. m += m >> 16; /* in each 32-bit group, lowest 6 bits is the count */
  31. return m & 0x3f; /* clear x bits */
  32. #endif
  33. }
  34. #if ULONG_MAX > 0xffffffff
  35. unsigned FAST_FUNC bb_popcnt_long(unsigned long m)
  36. {
  37. BUILD_BUG_ON(sizeof(m) != 8);
  38. /* 64-bit version of bb_popcnt_32 exists, but it uses 64-bit constants,
  39. * which are awkward to generate on assembly level on most CPUs.
  40. * For now, just add two 32-bit counts:
  41. */
  42. return bb_popcnt_32((uint32_t)m) + bb_popcnt_32((uint32_t)(m >> 32));
  43. }
  44. #endif