/*** Copyright (C) 2022 J Reece Wilson (a/k/a "Reece"). All rights reserved. File: auBitsUtils.hpp Date: 2022-2-1 Author: Reece ***/ #pragma once // options: _AU_FORCE_NO_FLIP8_MAP, _AU_DO_NOT_USE_BITFLIP_U16, _AU_DO_NOT_USE_BITFLIP_U32 namespace __audetail { AU_INLINE_17 const constexpr AuUInt8 kFlipBitsU4Lookup[16] { 0x0, 0x8, 0x4, 0xc, 0x2, 0xa, 0x6, 0xe, 0x1, 0x9, 0x5, 0xd, 0x3, 0xb, 0x7, 0xf }; #if !defined(_AU_FORCE_NO_FLIP8_MAP) #define _AU_HAS_FLIP8 AU_INLINE_17 const constexpr AuUInt8 kFlipBitsU8Lookup[256] { 0, 128, 64, 192, 32, 160, 96, 224, 16, 144, 80, 208, 48, 176, 112, 240, 8, 136, 72, 200, 40, 168, 104, 232, 24, 152, 88, 216, 56, 184, 120, 248, 4, 132, 68, 196, 36, 164, 100, 228, 20, 148, 84, 212, 52, 180, 116, 244, 12, 140, 76, 204, 44, 172, 108, 236, 28, 156, 92, 220, 60, 188, 124, 252, 2, 130, 66, 194, 34, 162, 98, 226, 18, 146, 82, 210, 50, 178, 114, 242, 10, 138, 74, 202, 42, 170, 106, 234, 26, 154, 90, 218, 58, 186, 122, 250, 6, 134, 70, 198, 38, 166, 102, 230, 22, 150, 86, 214, 54, 182, 118, 246, 14, 142, 78, 206, 46, 174, 110, 238, 30, 158, 94, 222, 62, 190, 126, 254, 1, 129, 65, 193, 33, 161, 97, 225, 17, 145, 81, 209, 49, 177, 113, 241, 9, 137, 73, 201, 41, 169, 105, 233, 25, 153, 89, 217, 57, 185, 121, 249, 5, 133, 69, 197, 37, 165, 101, 229, 21, 149, 85, 213, 53, 181, 117, 245, 13, 141, 77, 205, 45, 173, 109, 237, 29, 157, 93, 221, 61, 189, 125, 253, 3, 131, 67, 195, 35, 163, 99, 227, 19, 147, 83, 211, 51, 179, 115, 243, 11, 139, 75, 203, 43, 171, 107, 235, 27, 155, 91, 219, 59, 187, 123, 251, 7, 135, 71, 199, 39, 167, 103, 231, 23, 151, 87, 215, 55, 183, 119, 247, 15, 143, 79, 207, 47, 175, 111, 239, 31, 159, 95, 223, 63, 191, 127, 255 }; #endif } #if defined(AURORA_ARCH_ARM) || defined(AURORA_ARCH_ARM32) || defined(AURORA_ARCH_ARM64) #define AU_RBITS_CONSTEXPR #if !defined(AU_RBITS_ARM_SMIS) && (defined(AURORA_COMPILER_CLANG) || defined(AURORA_COMPILER_GCC)) #define AU_RBITS_ARM_GCC #elif !defined(AU_RBITS_ARM_SMIS) // https://github.com/ARM-software/CMSIS_5/blob/develop/CMSIS/Core/Include/cmsis_gcc.h // assume we have this #define AU_RBITS_ARM_SMIS #endif #define AU_RBITS_ARM #endif #if !defined(AU_RBITS_CONSTEXPR) #define AU_RBITS_CONSTEXPR constexpr #endif template AU_OPTIMIZED bool AuTestBit(T value, AuUInt8 idx) { return value & (T(1) << T(idx)); } template AU_OPTIMIZED bool AuBitTest(T value, AuUInt8 idx) { return value & (T(1) << T(idx)); } template AU_OPTIMIZED void AuSetBit(T &value, AuUInt8 idx) { value |= T(1) << T(idx); } template AU_OPTIMIZED void AuBitSet(T &value, AuUInt8 idx) { value |= T(1) << T(idx); } template AU_OPTIMIZED void AuClearBit(T &value, AuUInt8 idx) { value &= ~(T(1) << T(idx)); } template AU_OPTIMIZED void AuBitClear(T &value, AuUInt8 idx) { value &= ~(T(1) << T(idx)); } /// Bit scan forward from LSB to MSB, returning LSB-indexed offset in index and true if found. /// xref MSDN for "_BitScanForward" intrin /// (+ u64 variants work on 32bit targets) template AU_OPTIMIZED bool AuBitScanForward(AuUInt8 &index, T value) { unsigned long ret; bool success; success = false; index = 0; #if defined(AURORA_COMPILER_MSVC) if constexpr (sizeof(T) == sizeof(AuUInt64)) { #if defined(AURORA_IS_32BIT) if (!(success = _BitScanForward(&ret, static_cast(value & 0xffffffff)))) { if (!_BitScanForward(&ret, static_cast((value >> 32) & 0xffffffff))) { return false; } ret += 32; } #else success = _BitScanForward64(&ret, static_cast(value)); #endif } else { success = _BitScanForward(&ret, static_cast(value)); } #elif defined(AURORA_COMPILER_GCC) || defined(AURORA_COMPILER_CLANG) if (value == 0) { return false; } if constexpr (sizeof(T) == sizeof(AuUInt64)) { #if defined(AURORA_IS_32BIT) auto lower = static_cast(value & 0xffffffff); if (lower == 0) { ret = __builtin_ctzl(static_cast((value >> 32) & 0xffffffff)); ret += 32; } else { ret = __builtin_ctzl(static_cast(lower)); } #else ret = __builtin_ctzll(static_cast(value)); #endif } else if constexpr (sizeof(T) == sizeof(unsigned long)) { ret = __builtin_ctzl(static_cast(value)); } else if constexpr (sizeof(T) == sizeof(unsigned int)) { ret = __builtin_ctz(static_cast(value)); } success = true; #endif index = ret; return success; } /// Bit scan reverse from MSB to LSB, returning LSB-indexed offset in index and true if found. /// xref MSDN for "_BitScanReverse" intrin /// (+ u64 variants work on 32bit targets) template AU_OPTIMIZED bool AuBitScanReverse(AuUInt8 &index, T value) { unsigned long ret; bool success; success = false; index = 0; #if defined(AURORA_COMPILER_MSVC) if constexpr (sizeof(T) == sizeof(AuUInt64)) { #if defined(AURORA_IS_32BIT) if (!(success = _BitScanReverse(&ret, static_cast((value >> 32) & 0xffffffff)))) { if (!_BitScanReverse(&ret, static_cast(value & 0xffffffff))) { return false; } } else { ret += 32; } #else success = _BitScanReverse64(&ret, static_cast(value)); #endif } else { success = _BitScanReverse(&ret, static_cast(value)); } #elif defined(AURORA_COMPILER_GCC) || defined(AURORA_COMPILER_CLANG) if (value == 0) { return false; } if constexpr (sizeof(T) == sizeof(AuUInt64)) { #if defined(AURORA_IS_32BIT) auto higher = static_cast((value >> 32) & 0xffffffff); if (higher == 0) { ret = __builtin_clzl(static_cast(value & 0xffffffff)); ret = 31 - ret; } else { ret = __builtin_clzl(static_cast(higher)); ret = 63 - ret; } #else ret = __builtin_clzll(static_cast(value)); ret = 63 - ret; #endif } else if constexpr (sizeof(T) == sizeof(unsigned long)) { ret = __builtin_clzl(static_cast(value)); if constexpr (sizeof(unsigned long) == 4) { ret = 31 - ret; } else if constexpr (sizeof(unsigned long) == 8) { ret = 63 - ret; } } else// if constexpr (sizeof(T) == sizeof(unsigned int)) { ret = __builtin_clz(static_cast(value)); if constexpr (sizeof(unsigned int) == 4) { ret = 31 - ret; } else if constexpr (sizeof(unsigned int) == 8) { ret = 63 - ret; } } success = true; #endif index = ret; return success; } /// AuBitScanForward utility - offsets value by uOffset of last uIndex template AU_OPTIMIZED bool AuBitScanForwardItr(AuUInt8 &uIndex, T value, AuUInt8 uOffset = 0) { if (AuBitScanForward(uIndex, value >> uOffset)) { uIndex += uOffset; return true; } else { return false; } } /// AuBitScanReverse utility - offsets value by uOffset of last uIndex, starting at end bit offset, to break/false on 0. template AU_OPTIMIZED bool AuBitScanReverseItr(AuUInt8 &uIndex, T value, AuUInt8 uOffset = sizeof(T) * 8) { if (uOffset == 0) { return false; } AuUInt32 uBitOffset2 = AuUInt32(8 * sizeof(T)) - uOffset; if (AuBitScanReverse(uIndex, (T(value) << uBitOffset2))) { uIndex -= uBitOffset2; return true; } else { return false; } } template struct AuHalfWord { using ReturnType_t = AuConditional_t, AuUInt32, AuConditional_t, AuUInt32, AuConditional_t, AuUInt8, AuFalseType>>>; static ReturnType_t ToLower(T in) { if constexpr (AuIsSame_v) { return in & AuUInt64(0xFFFFFFFF); } else if constexpr (AuIsSame_v) { return in & 0xFFFF; } else if constexpr (AuIsSame_v) { return in & 0xFF; } else { return {}; } } static ReturnType_t ToHigher(T in) { if constexpr (AuIsSame_v) { return (in >> AuUInt64(32)) & AuUInt64(0xFFFFFFFF); } else if constexpr (AuIsSame_v) { return (in >> 16) & 0xFFFF; } else if constexpr (AuIsSame_v) { return (in >> 8) & 0xFF; } else { return {}; } } }; template static auto AuBitsToLower(T in) { return AuHalfWord::ToLower(in); } template static auto AuBitsToHigher(T in) { return AuHalfWord::ToHigher(in); } template static AuUInt8 AuPopCnt(T in) { #if defined(AURORA_COMPILER_MSVC) #if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86) #if defined(AURORA_ARCH_X64) if constexpr (sizeof(T) == sizeof(AuUInt64)) { return _mm_popcnt_u64(static_cast(in)); } else #endif if constexpr (sizeof(T) == sizeof(unsigned int)) { return __popcnt(static_cast(in)); } else if constexpr (sizeof(T) <= sizeof(AuUInt16)) { return __popcnt16(static_cast(in)); } #endif #else if constexpr (sizeof(T) == sizeof(unsigned long long)) { return __builtin_popcountll(static_cast(in)); } else if constexpr (sizeof(T) == sizeof(unsigned long)) { return __builtin_popcountl(static_cast(in)); } else if constexpr (sizeof(T) == sizeof(unsigned int)) { return __builtin_popcount(static_cast(in)); } #endif #if defined(AU_CPU_ENDIAN_LITTLE) if constexpr (sizeof(T) == sizeof(AuUInt64)) { const AuUInt64 m1 = 0x5555555555555555ll; const AuUInt64 m2 = 0x3333333333333333ll; const AuUInt64 m4 = 0x0F0F0F0F0F0F0F0Fll; const AuUInt64 h01 = 0x0101010101010101ll; in -= (in >> 1) & m1; in = (in & m2) + ((in >> 2) & m2); in = (in + (in >> 4)) & m4; return (in * h01) >> 56; } else if constexpr (sizeof(T) == sizeof(AuUInt32)) { const AuUInt32 m1 = 0x55555555l; const AuUInt32 m2 = 0x33333333l; const AuUInt32 m4 = 0x0F0F0F0Fl; const AuUInt32 h01 = 0x01010101l; in -= (in >> 1) & m1; in = (in & m2) + ((in >> 2) & m2); in = (in + (in >> 4)) & m4; return (in * h01) >> 24; } #endif if constexpr ((sizeof(T) == sizeof(AuUInt16)) || (sizeof(T) == sizeof(AuUInt8))) { return AuPopCnt(AuUInt32(in)); } return {}; } template AU_RBITS_CONSTEXPR T AuBitReverse(T uBits); template <> AU_RBITS_CONSTEXPR AuUInt8 AuBitReverse(AuUInt8 uBits) { #if !defined(_AU_DO_NOT_USE_BITFLIP_U8) #if defined(AU_RBITS_ARM) AuUInt32 uOutput { AuBitReverse(uBits) }; return AuUInt8((uOutput >> 24) & 0xffu); #endif uBits = ((uBits >> 1) & 0x55u) | ((uBits << 1) & 0xAAu); uBits = ((uBits >> 2) & 0x33u) | ((uBits << 2) & 0xCCu); uBits = ((uBits >> 4) & 0x0Fu) | ((uBits << 4) & 0xF0u); return uBits; #endif return (__audetail::kFlipBitsU4Lookup[uBits & 0b1111] << 4) | (__audetail::kFlipBitsU4Lookup[uBits >> 4] ); } template <> AU_RBITS_CONSTEXPR AuUInt16 AuBitReverse(AuUInt16 uBits) { #if !defined(_AU_DO_NOT_USE_BITFLIP_U16) #if defined(AU_RBITS_ARM) AuUInt32 uOutput { AuBitReverse(uBits) }; return AuUInt16((uOutput >> 16) & 0xffffu); #endif uBits = ((uBits & 0xAAAAu) >> 1) | ((uBits & 0x5555u) << 1); uBits = ((uBits & 0xCCCCu) >> 2) | ((uBits & 0x3333u) << 2); uBits = ((uBits & 0xF0F0u) >> 4) | ((uBits & 0x0F0Fu) << 4); uBits = ((uBits & 0xFF00u) >> 8) | ((uBits & 0x00FFu) << 8); return uBits; #endif #if !defined(_AU_HAS_FLIP8) return AuUInt16(AuBitReverse(AuUInt8((uBits >> 8u) & 0xFFu))) | AuUInt16(AuUInt16(AuBitReverse(AuUInt8(uBits & 0xFFu))) << 8u); #else return 0 | AuUInt32(__audetail::kFlipBitsU8Lookup[(uBits >> 8) & 0xFF]) << 0u | AuUInt32(__audetail::kFlipBitsU8Lookup[(uBits >> 0) & 0xFF]) << 8u; #endif } template <> AU_RBITS_CONSTEXPR AuUInt32 AuBitReverse(AuUInt32 uBits) { #if !defined(_AU_DO_NOT_USE_BITFLIP_U32) #if defined(AU_RBITS_ARM_SMIS) return __RBIT(uBits); #elif defined(AU_RBITS_ARM_GCC) AuUInt32 uOutput {}; asm("rbit %0,%1" : "=r"(uOutput) : "r"(uBits)); return uOutput; #endif uBits = ((uBits & 0xAAAAAAAAu) >> 1) | ((uBits & 0x55555555u) << 1); uBits = ((uBits & 0xCCCCCCCCu) >> 2) | ((uBits & 0x33333333u) << 2); uBits = ((uBits & 0xF0F0F0F0u) >> 4) | ((uBits & 0x0F0F0F0Fu) << 4); uBits = ((uBits & 0xFF00FF00u) >> 8) | ((uBits & 0x00FF00FFu) << 8); uBits = ((uBits >> 16) & 0xffffu) | ((uBits & 0xffffu) << 16); return uBits; #endif #if !defined(_AU_HAS_FLIP8) return 0 | AuUInt32(AuBitReverse((uBits >> 24) & 0xFF)) << 0ul | AuUInt32(AuBitReverse((uBits >> 16) & 0xFF)) << 8ul | AuUInt32(AuBitReverse((uBits >> 8) & 0xFF)) << 16ul | AuUInt32(AuBitReverse((uBits >> 0) & 0xFF)) << 24ul; #else return 0 | AuUInt32(__audetail::kFlipBitsU8Lookup[(uBits >> 24) & 0xFF]) << 0ul | AuUInt32(__audetail::kFlipBitsU8Lookup[(uBits >> 16) & 0xFF]) << 8ul | AuUInt32(__audetail::kFlipBitsU8Lookup[(uBits >> 8) & 0xFF]) << 16ul | AuUInt32(__audetail::kFlipBitsU8Lookup[(uBits >> 0) & 0xFF]) << 24ul; #endif } template <> AU_RBITS_CONSTEXPR AuUInt64 AuBitReverse(AuUInt64 uBits) { return AuUInt64(AuBitReverse(AuUInt32((uBits >> 32ull) & 0xFFFFFFFFul))) | AuUInt64(AuUInt64(AuBitReverse(AuUInt32(uBits & 0xFFFFFFFFul))) << 32UL); } template AU_RBITS_CONSTEXPR T AuBitsReverse(T uBits) { return AuBitReverse(uBits); } template AU_RBITS_CONSTEXPR T AuReverseBits(T uBits) { return AuBitReverse(uBits); }