diff --git a/Include/auROXTL/Strings/auCodepointsUTF8.hpp b/Include/auROXTL/Strings/auCodepointsUTF8.hpp index 96bec12..9927b45 100644 --- a/Include/auROXTL/Strings/auCodepointsUTF8.hpp +++ b/Include/auROXTL/Strings/auCodepointsUTF8.hpp @@ -56,6 +56,8 @@ AU_INLINE_CONSTEXPR_17 CodepointOffset_t AuCodepointsCount(const /// Counts the bytes required to iterate over a UTF8 encoded codepoint AU_INLINE_CONSTEXPR_17 CodepointByteOffset_t AuCodepointsNextLength(const AuROString &in); +AU_INLINE_CONSTEXPR_17 CodepointByteOffset_t AuCodepointsNextLengthFromCharacter(AuUInt8 uCharacter); + /// Iterates over a UTF8 sequence using OP. /// If op returns void, AuCodepointsForEach will continue until EOS or invalid squence. /// If op returns bool, AuCodepointsForEach will continue until EOS, invalid squence, or !op. @@ -85,6 +87,11 @@ static bool AuCodepointsStartsWithI static bool AuCodepointsEndsWithIgnoreCase(const AuROString &inA, const AuROString &inB); +/// Also see: AuCodepointsFindByteOffset, AuCodepointsContainsIgnoreCase +static CodepointByteOffset_t AuCodepointsFindByteOffsetIgnoreCase(const AuROString &inA, + const AuROString &inB); + +/// Also see: AuCodepointsFindByteOffsetIgnoreCase static bool AuCodepointsContainsIgnoreCase(const AuROString &inA, const AuROString &inB); diff --git a/Include/auROXTL/Strings/auCodepointsUTF8.ipp b/Include/auROXTL/Strings/auCodepointsUTF8.ipp index 4ae8435..06b84f8 100644 --- a/Include/auROXTL/Strings/auCodepointsUTF8.ipp +++ b/Include/auROXTL/Strings/auCodepointsUTF8.ipp @@ -37,9 +37,9 @@ AU_STATIC_CONSTEXPR_17 AuOptional AuCodepointsDecodeOne(const AuROStr return AuOptional(); } - auto uLength = in.length(); + auto uLength = in.Length(); - const char *pItr = in.data(); + const char *pItr = in.Begin(); const char *pEnd = pItr + uLength; while (pItr < pEnd) @@ -59,9 +59,7 @@ AU_STATIC_CONSTEXPR_17 AuOptional AuCodepointsDecodeOne(const AuROStr return AuOptional(); } - for (AuUInt8 b = *pItr; (b & 0x80U) != 0; b <<= 1, ++nby) - { - } + nby = AuCodepointsNextLengthFromCharacter(*pItr); if (nby > kAuCodepointUTF8MaxBytes) { @@ -107,11 +105,11 @@ static AuList AuCodepointsDecode(const AuROString &in) return ret; } - auto uLength = in.length(); + auto uLength = in.Length(); ret.reserve(uLength); - const char *pItr = in.data(); + const char *pItr = in.Begin(); const char *pEnd = pItr + uLength; while (pItr < pEnd) @@ -131,9 +129,7 @@ static AuList AuCodepointsDecode(const AuROString &in) return AuList(); } - for (AuUInt8 b = *pItr; (b & 0x80U) != 0; b <<= 1, ++nby) - { - } + nby = AuCodepointsNextLengthFromCharacter(*pItr); if (nby > kAuCodepointUTF8MaxBytes) { @@ -243,9 +239,9 @@ AU_OPTIMIZED AuString AuCodepointsToUpper(const AuROString &in) AU_INLINE_CONSTEXPR_17 CodepointOffset_t AuCodepointsCount(const AuROString &in) { CodepointOffset_t uCounter (0); - auto uLength = in.length(); + auto uLength = in.Length(); - const char *pItr = in.data(); + const char *pItr = in.Begin(); const char *pEnd = pItr + uLength; while (pItr != pEnd) @@ -254,58 +250,8 @@ AU_INLINE_CONSTEXPR_17 CodepointOffset_t AuCodepointsCount(const AuROString &in) auto ch = *pItr; unsigned int result = (ch & 0xF0); - if ((ch & 0x80) == 0) - { - nby = 1; - } - else if ((ch & 0xE0) == 0xC0) - { - nby = 2; - } - else if (result == 0xE0) - { - nby = 3; - } - else if (result == 0xF0) - { - if ((ch & 0x08) == 0x08) - { - // Historic UTF8 - nby = 5; - #if defined(AURORA_UTF8_I_SUCK_AND_WANT_MODERN_UTF8_WANT_THROW) - AU_THROW_CONST_STRING("Illegal UTF8"); - #endif - } - else if ((ch & 0x0c) == 0x0c) - { - // Special UTF8 - nby = 6; - #if defined(AURORA_UTF8_I_SUCK_AND_WANT_MODERN_UTF8_WANT_THROW) - AU_THROW_CONST_STRING("Illegal UTF8"); - #endif - } - else if ((ch & 0x0e) == 0x0e) - { - // Illegal UTF8 - nby = 7; - #if defined(AURORA_UTF8_THROW_WHEN_STUPIDILY_ILLEGAL) - AU_THROW_CONST_STRING("Illegal UTF8"); - #endif - } - else if ((ch & 0x0f) == 0x0f) - { - // Not even logical - #if defined(AURORA_UTF8_THROW_WHEN_STUPIDILY_ILLEGAL) - AU_THROW_CONST_STRING("Illegal UTF8"); - #endif - break; - } - else - { - nby = 4; - } - } - else + nby = AuCodepointsNextLengthFromCharacter(ch); + if (!nby) { break; } @@ -322,10 +268,63 @@ AU_INLINE_CONSTEXPR_17 CodepointOffset_t AuCodepointsCount(const AuROString &in) return uCounter; } +AU_INLINE_CONSTEXPR_17 CodepointByteOffset_t AuCodepointsNextLengthFromCharacter(AuUInt8 uCharacter) +{ +#if defined(AU_CPU_ENDIAN_BIG) + + const char uChar = uCharacter; + return AuCodepointsNextLength(AuROString(&uChar, 1)); + +#else + + +#if defined(AU_LANG_CPP_17_) + + if (__builtin_is_constant_evaluated()) + { + const char uChar = uCharacter; + return AuCodepointsNextLength(AuROString(&uChar, 1)); + } + +#endif + + if ((uCharacter & 0x80) == 0) + { + return 1; + } + + AuUInt8 uBits(0); + if (AuBitScanReverse(uBits, ~(AuUInt32(uCharacter) << 24u))) + { + return 31 - uBits; + } + else + { + return 0; + } + +#endif +} + /// Counts the bytes required to iterate over a UTF8 encoded codepoint AU_INLINE_CONSTEXPR_17 CodepointByteOffset_t AuCodepointsNextLength(const AuROString &in) { - if (in.length()) + +#if defined(AU_LANG_CPP_17_) + + if (!__builtin_is_constant_evaluated()) + { + if (in.Empty()) + { + return 0; + } + + return AuCodepointsNextLengthFromCharacter(in.Data()[0]); + } + +#endif + + if (in.Length()) { auto ch = in[0]; unsigned int result = (ch & 0xF0); @@ -398,9 +397,9 @@ static bool AuCodepointsForEach(T op, const AuROStrin return true; } - auto uLength = in.length(); + auto uLength = in.Length(); - const char *pItr = in.data(); + const char *pItr = in.Begin(); const char *pEnd = pItr + uLength; while (pItr < pEnd) @@ -420,9 +419,7 @@ static bool AuCodepointsForEach(T op, const AuROStrin return false; } - for (AuUInt8 b = *pItr; (b & 0x80U) != 0; b <<= 1, ++nby) - { - } + nby = AuCodepointsNextLengthFromCharacter(*pItr); if (nby > kAuCodepointUTF8MaxBytes) { @@ -479,11 +476,11 @@ static AuString AuCodepointsTransform(T op, const AuROStr return ret; } - auto uLength = in.length(); + auto uLength = in.Length(); ret.reserve(uLength); - const char *pItr = in.data(); + const char *pItr = in.Begin(); const char *pEnd = pItr + uLength; while (pItr < pEnd) @@ -503,9 +500,7 @@ static AuString AuCodepointsTransform(T op, const AuROStr return AuString(); } - for (AuUInt8 b = *pItr; (b & 0x80U) != 0; b <<= 1, ++nby) - { - } + nby = AuCodepointsNextLengthFromCharacter(*pItr); if (nby > kAuCodepointUTF8MaxBytes) { @@ -548,11 +543,11 @@ template AuString AuCodepointsTransformASCIIOp(T op, const AuROString &in) { AuString ret; - auto uLength = in.length(); + auto uLength = in.Length(); ret.resize(uLength); - const char *pItr = in.data(); + const char *pItr = in.Begin(); const char *pEnd = pItr + uLength; AuUInt32 uCounter (0); @@ -562,58 +557,9 @@ AuString AuCodepointsTransformASCIIOp(T op, const auto ch = *pItr; unsigned int result = (ch & 0xF0); - if ((ch & 0x80) == 0) - { - nby = 1; - } - else if ((ch & 0xE0) == 0xC0) - { - nby = 2; - } - else if (result == 0xE0) - { - nby = 3; - } - else if (result == 0xF0) - { - if ((ch & 0x08) == 0x08) - { - // Historic UTF8 - nby = 5; - #if defined(AURORA_UTF8_I_SUCK_AND_WANT_MODERN_UTF8_WANT_THROW) - AU_THROW_CONST_STRING("Illegal UTF8"); - #endif - } - else if ((ch & 0x0c) == 0x0c) - { - // Special UTF8 - nby = 6; - #if defined(AURORA_UTF8_I_SUCK_AND_WANT_MODERN_UTF8_WANT_THROW) - AU_THROW_CONST_STRING("Illegal UTF8"); - #endif - } - else if ((ch & 0x0e) == 0x0e) - { - // Illegal UTF8 - nby = 7; - #if defined(AURORA_UTF8_THROW_WHEN_STUPIDILY_ILLEGAL) - AU_THROW_CONST_STRING("Illegal UTF8"); - #endif - } - else if ((ch & 0x0f) == 0x0f) - { - // Not even logical - #if defined(AURORA_UTF8_THROW_WHEN_STUPIDILY_ILLEGAL) - AU_THROW_CONST_STRING("Illegal UTF8"); - #endif - break; - } - else - { - nby = 4; - } - } - else + nby = AuCodepointsNextLengthFromCharacter(ch); + + if (!nby) { break; } @@ -655,9 +601,9 @@ static bool AuCodepointsIsEqualIgnoreCase(const AuROS return true; } - const char *pItr = inA.data(); - const char *pItr2 = inB.data(); - const char *pEnd = pItr + inA.length(); + const char *pItr = inA.Begin(); + const char *pItr2 = inB.Begin(); + const char *pEnd = pItr + inA.Length(); while (pItr < pEnd) { @@ -681,9 +627,7 @@ static bool AuCodepointsIsEqualIgnoreCase(const AuROS return AuMemcmp(pItr, pItr2, pEnd - pItr) == 0; } - for (AuUInt8 b = *pItr; (b & 0x80U) != 0; b <<= 1, ++nby) - { - } + nby = AuCodepointsNextLengthFromCharacter(*pItr); if (nby > kAuCodepointUTF8MaxBytes) { @@ -723,9 +667,9 @@ static bool AuCodepointsStartsWithIgnoreCase(const Au return true; } - const char *pItr = inA.data(); - const char *pItr2 = inB.data(); - const char *pEnd = pItr + inB.length(); + const char *pItr = inA.Begin(); + const char *pItr2 = inB.Begin(); + const char *pEnd = pItr + inB.Length(); const char *pEnd2 = inB.End(); while (pItr < pEnd) @@ -754,9 +698,7 @@ static bool AuCodepointsStartsWithIgnoreCase(const Au return AuMemcmp(pItr, pItr2, pEnd - pItr) == 0; } - for (AuUInt8 b = *pItr; (b & 0x80U) != 0; b <<= 1, ++nby) - { - } + nby = AuCodepointsNextLengthFromCharacter(*pItr); if (nby > kAuCodepointUTF8MaxBytes) { @@ -776,7 +718,7 @@ static bool AuCodepointsStartsWithIgnoreCase(const Au return AuMemcmp(pItr, pItr2, pEnd - pItr) == 0; } - if (nby > pEnd2 - pItr2) + if (nby > AuUInt(pEnd2 - pItr2)) { return false; } @@ -828,8 +770,8 @@ static bool AuCodepointsEndsWithIgnoreCase(const AuRO return false; } - auto pItr = inA.data() + uOffset - nby; - auto pItr2 = inB.data() + uOffset2 - nby; + auto pItr = inA.Begin() + uOffset - nby; + auto pItr2 = inB.Begin() + uOffset2 - nby; if ((c = *pItr) <= 0x7FU) { @@ -856,54 +798,44 @@ static bool AuCodepointsEndsWithIgnoreCase(const AuRO return true; } -static bool AuCodepointsContainsIgnoreCase(const AuROString &inA, - const AuROString &inB) +static CodepointByteOffset_t AuCodepointsFindByteOffsetIgnoreCase(const AuROString &inA, + const AuROString &inB) { if (inA.size() < inB.size()) { - return false; + return AuROString::npos; } - const char *pItr = inA.data(); - const char *pItr2 = inB.data(); - const char *pEnd = pItr + inA.length(); - const char *pEnd2 = inB.end(); + const char *pItr = inA.Begin(); + const char *pItr2 = inB.Begin(); + const char *pEnd = pItr + inA.Length(); + const char *pEnd2 = inB.End(); while (pItr < pEnd) { - AuUInt32 c (0); AuUInt32 nby (0); - if ((c = *pItr) <= 0x7FU) + nby = AuCodepointsNextLengthFromCharacter(*pItr); + if (!nby) { - nby = 1; - } - else - { - if ((*pItr & 0xC0U) != 0xC0U) - { - if (pEnd - pItr != pEnd2 - pItr2) - { - return false; - } - - return AuMemcmp(pItr, pItr2, pEnd - pItr) == 0; - } - - for (AuUInt8 b = *pItr; (b & 0x80U) != 0; b <<= 1, ++nby) - { - } + break; } if (AuCodepointsStartsWithIgnoreCase(AuROString(pItr, AuUInt(pEnd - pItr)), inB)) { - return true; + return pItr - inA.Begin(); } pItr += nby; } - return false; + return AuROString::npos; +} + +static bool AuCodepointsContainsIgnoreCase(const AuROString &inA, + const AuROString &inB) +{ + return AuCodepointsFindByteOffsetIgnoreCase(inA, inB) != AuROString::npos; } /// Returns the byte offset of the codepoint index or AuROString::npos @@ -911,9 +843,9 @@ AU_INLINE_CONSTEXPR_17 CodepointByteOffset_t AuCodepointsGetByteOffset(const AuR CodepointOffset_t uCodepointIndex) { AuUInt uCounter (0); - auto uLength = in.length(); + auto uLength = in.Length(); - const char *pStart = in.data(); + const char *pStart = in.Begin(); const char *pItr = pStart; const char *pEnd = pStart + uLength; @@ -928,58 +860,8 @@ AU_INLINE_CONSTEXPR_17 CodepointByteOffset_t AuCodepointsGetByteOffset(const AuR return CodepointByteOffset_t(pItr - pStart); } - if ((ch & 0x80) == 0) - { - nby = 1; - } - else if ((ch & 0xE0) == 0xC0) - { - nby = 2; - } - else if (result == 0xE0) - { - nby = 3; - } - else if (result == 0xF0) - { - if ((ch & 0x08) == 0x08) - { - // Historic UTF8 - nby = 5; - #if defined(AURORA_UTF8_I_SUCK_AND_WANT_MODERN_UTF8_WANT_THROW) - AU_THROW_CONST_STRING("Illegal UTF8"); - #endif - } - else if ((ch & 0x0c) == 0x0c) - { - // Special UTF8 - nby = 6; - #if defined(AURORA_UTF8_I_SUCK_AND_WANT_MODERN_UTF8_WANT_THROW) - AU_THROW_CONST_STRING("Illegal UTF8"); - #endif - } - else if ((ch & 0x0e) == 0x0e) - { - // Illegal UTF8 - #if defined(AURORA_UTF8_THROW_WHEN_STUPIDILY_ILLEGAL) - AU_THROW_CONST_STRING("Illegal UTF8"); - #endif - nby = 7; - } - else if ((ch & 0x0f) == 0x0f) - { - // Not even logical - #if defined(AURORA_UTF8_THROW_WHEN_STUPIDILY_ILLEGAL) - AU_THROW_CONST_STRING("Illegal UTF8"); - #endif - break; - } - else - { - nby = 4; - } - } - else + nby = AuCodepointsNextLengthFromCharacter(ch); + if (!nby) { break; } @@ -1001,9 +883,9 @@ AU_INLINE_CONSTEXPR_17 CodepointByteOffset_t AuCodepointsGetByteLength(const AuR CodepointOffset_t uCodepointIndex) { AuUInt uCounter (0); - auto uLength = in.length(); + auto uLength = in.Length(); - const char *pStart = in.data(); + const char *pStart = in.Begin(); const char *pItr = pStart; const char *pEnd = pStart + uLength; @@ -1013,52 +895,8 @@ AU_INLINE_CONSTEXPR_17 CodepointByteOffset_t AuCodepointsGetByteLength(const AuR auto ch = *pItr; unsigned int result = (ch & 0xF0); - if ((ch & 0x80) == 0) - { - nby = 1; - } - else if ((ch & 0xE0) == 0xC0) - { - nby = 2; - } - else if (result == 0xE0) - { - nby = 3; - } - else if (result == 0xF0) - { - if ((ch & 0x08) == 0x08) - { - // Historic UTF8 - nby = 5; - #if defined(AURORA_UTF8_I_SUCK_AND_WANT_MODERN_UTF8_WANT_THROW) - AU_THROW_CONST_STRING("Illegal UTF8"); - #endif - } - else if ((ch & 0x0c) == 0x0c) - { - // Special UTF8 - nby = 6; - #if defined(AURORA_UTF8_I_SUCK_AND_WANT_MODERN_UTF8_WANT_THROW) - AU_THROW_CONST_STRING("Illegal UTF8"); - #endif - } - else if ((ch & 0x0e) == 0x0e) - { - // Illegal UTF8 - nby = 7; - } - else if ((ch & 0x0f) == 0x0f) - { - // Not even logical - break; - } - else - { - nby = 4; - } - } - else + nby = AuCodepointsNextLengthFromCharacter(ch); + if (!nby) { break; } @@ -1098,10 +936,10 @@ static CodepointByteOffset_t AuCodepointsFindByteOffset(const AuROStri CodepointByteOffset_t uStartPosition) { AuUInt uCounter = 0; - auto uLength = in.length(); - auto uFindLength = find.length(); + auto uLength = in.Length(); + auto uFindLength = find.Length(); - const char *pStart = in.data(); + const char *pStart = in.Begin(); const char *pItr = pStart; const char *pEnd = pStart + uLength; @@ -1115,9 +953,9 @@ static CodepointByteOffset_t AuCodepointsFindByteOffset(const AuROStri if (uByteOffset >= uStartPosition) { AuROString suffixView(pItr, pEnd); - if (suffixView.length() > uFindLength) + if (suffixView.Length() > uFindLength) { - suffixView = AuROString ( suffixView.data(), uFindLength ); + suffixView = AuROString ( suffixView.Begin(), uFindLength ); } if (suffixView == find) @@ -1126,58 +964,8 @@ static CodepointByteOffset_t AuCodepointsFindByteOffset(const AuROStri } } - if ((ch & 0x80) == 0) - { - nby = 1; - } - else if ((ch & 0xE0) == 0xC0) - { - nby = 2; - } - else if (result == 0xE0) - { - nby = 3; - } - else if (result == 0xF0) - { - if ((ch & 0x08) == 0x08) - { - // Historic UTF8 - nby = 5; - #if defined(AURORA_UTF8_I_SUCK_AND_WANT_MODERN_UTF8_WANT_THROW) - AU_THROW_CONST_STRING("Illegal UTF8"); - #endif - } - else if ((ch & 0x0c) == 0x0c) - { - // Special UTF8 - nby = 6; - #if defined(AURORA_UTF8_I_SUCK_AND_WANT_MODERN_UTF8_WANT_THROW) - AU_THROW_CONST_STRING("Illegal UTF8"); - #endif - } - else if ((ch & 0x0e) == 0x0e) - { - // Illegal UTF8 - nby = 7; - #if defined(AURORA_UTF8_THROW_WHEN_STUPIDILY_ILLEGAL) - AU_THROW_CONST_STRING("Illegal UTF8"); - #endif - } - else if ((ch & 0x0f) == 0x0f) - { - // Not even logical - #if defined(AURORA_UTF8_THROW_WHEN_STUPIDILY_ILLEGAL) - AU_THROW_CONST_STRING("Illegal UTF8"); - #endif - break; - } - else - { - nby = 4; - } - } - else + nby = AuCodepointsNextLengthFromCharacter(ch); + if (!nby) { break; } @@ -1202,10 +990,10 @@ AU_STATIC_CONSTEXPR_17 CodepointByteOffset_t AuCodepointsFindByteOffsetUnsa CodepointByteOffset_t uStartPosition) { AuUInt uCounter = 0; - auto uLength = in.length(); - auto uFindLength = find.length(); + auto uLength = in.Length(); + auto uFindLength = find.Length(); - const char *pStart = in.data(); + const char *pStart = in.Begin(); const char *pItr = pStart + uStartPosition; const char *pEnd = pStart + uLength; @@ -1218,9 +1006,9 @@ AU_STATIC_CONSTEXPR_17 CodepointByteOffset_t AuCodepointsFindByteOffsetUnsa { AuROString suffixView(pItr, pEnd); - if (suffixView.length() > uFindLength) + if (suffixView.Length() > uFindLength) { - suffixView = AuROString ( suffixView.data(), uFindLength ); + suffixView = AuROString ( suffixView.Begin(), uFindLength ); } if (suffixView == find) @@ -1229,58 +1017,8 @@ AU_STATIC_CONSTEXPR_17 CodepointByteOffset_t AuCodepointsFindByteOffsetUnsa } } - if ((ch & 0x80) == 0) - { - nby = 1; - } - else if ((ch & 0xE0) == 0xC0) - { - nby = 2; - } - else if (result == 0xE0) - { - nby = 3; - } - else if (result == 0xF0) - { - if ((ch & 0x08) == 0x08) - { - // Historic UTF8 - nby = 5; - #if defined(AURORA_UTF8_I_SUCK_AND_WANT_MODERN_UTF8_WANT_THROW) - AU_THROW_CONST_STRING("Illegal UTF8"); - #endif - } - else if ((ch & 0x0c) == 0x0c) - { - // Special UTF8 - nby = 6; - #if defined(AURORA_UTF8_I_SUCK_AND_WANT_MODERN_UTF8_WANT_THROW) - AU_THROW_CONST_STRING("Illegal UTF8"); - #endif - } - else if ((ch & 0x0e) == 0x0e) - { - // Illegal UTF8 - nby = 7; - #if defined(AURORA_UTF8_THROW_WHEN_STUPIDILY_ILLEGAL) - AU_THROW_CONST_STRING("Illegal UTF8"); - #endif - } - else if ((ch & 0x0f) == 0x0f) - { - // Not even logical - #if defined(AURORA_UTF8_THROW_WHEN_STUPIDILY_ILLEGAL) - AU_THROW_CONST_STRING("Illegal UTF8"); - #endif - break; - } - else - { - nby = 4; - } - } - else + nby = AuCodepointsNextLengthFromCharacter(ch); + if (!nby) { break; } @@ -1303,9 +1041,9 @@ AU_STATIC_CONSTEXPR_17 CodepointByteOffset_t AuCodepointsFindPreviousValidB CodepointOffset_t uStartPosition) { AuUInt uCounter = 0; - auto uLength = in.length(); + auto uLength = in.Length(); - const char *pStart = in.data(); + const char *pStart = in.Begin(); const char *pItr = pStart; const char *pEnd = pStart + uLength; @@ -1320,58 +1058,8 @@ AU_STATIC_CONSTEXPR_17 CodepointByteOffset_t AuCodepointsFindPreviousValidB auto ch = *pItr; unsigned int result = (ch & 0xF0); - if ((ch & 0x80) == 0) - { - nby = 1; - } - else if ((ch & 0xE0) == 0xC0) - { - nby = 2; - } - else if (result == 0xE0) - { - nby = 3; - } - else if (result == 0xF0) - { - if ((ch & 0x08) == 0x08) - { - // Historic UTF8 - nby = 5; - #if defined(AURORA_UTF8_I_SUCK_AND_WANT_MODERN_UTF8_WANT_THROW) - AU_THROW_CONST_STRING("Illegal UTF8"); - #endif - } - else if ((ch & 0x0c) == 0x0c) - { - // Special UTF8 - nby = 6; - #if defined(AURORA_UTF8_I_SUCK_AND_WANT_MODERN_UTF8_WANT_THROW) - AU_THROW_CONST_STRING("Illegal UTF8"); - #endif - } - else if ((ch & 0x0e) == 0x0e) - { - // Illegal UTF8 - nby = 7; - #if defined(AURORA_UTF8_THROW_WHEN_STUPIDILY_ILLEGAL) - AU_THROW_CONST_STRING("Illegal UTF8"); - #endif - } - else if ((ch & 0x0f) == 0x0f) - { - // Not even logical - #if defined(AURORA_UTF8_THROW_WHEN_STUPIDILY_ILLEGAL) - AU_THROW_CONST_STRING("Illegal UTF8"); - #endif - break; - } - else - { - nby = 4; - } - } - else + nby = AuCodepointsNextLengthFromCharacter(ch); + if (!nby) { break; } @@ -1400,7 +1088,7 @@ AU_STATIC_CONSTEXPR_17 CodepointByteOffset_t AuCodepointsFindPreviousValidB AU_STATIC_CONSTEXPR_17 CodepointByteOffset_t AuCodepointsFindPreviousValidByteOffsetFromByteOffset(const AuROString &in, CodepointByteOffset_t uStartPosition) { - const char * pStart = in.data(); + const char * pStart = in.Begin(); const char * pItr = pStart + uStartPosition - 1; if (uStartPosition == 0) @@ -1446,10 +1134,10 @@ static CodepointOffset_t AuCodepointsFindCodepointOffset(const AuR CodepointOffset_t uStartPosition) { AuUInt uCounter (0); - auto uLength = in.length(); - auto uFindLength = find.length(); + auto uLength = in.Length(); + auto uFindLength = find.Length(); - const char *pStart = in.data(); + const char *pStart = in.Begin(); const char *pItr = pStart; const char *pEnd = pStart + uLength; @@ -1462,9 +1150,9 @@ static CodepointOffset_t AuCodepointsFindCodepointOffset(const AuR if (uCounter >= uStartPosition) { AuROString suffixView(pItr, pEnd); - if (suffixView.length() > uFindLength) + if (suffixView.Length() > uFindLength) { - suffixView = AuROString ( suffixView.data(), uFindLength ); + suffixView = AuROString ( suffixView.Begin(), uFindLength ); } if (suffixView == find) @@ -1473,58 +1161,8 @@ static CodepointOffset_t AuCodepointsFindCodepointOffset(const AuR } } - if ((ch & 0x80) == 0) - { - nby = 1; - } - else if ((ch & 0xE0) == 0xC0) - { - nby = 2; - } - else if (result == 0xE0) - { - nby = 3; - } - else if (result == 0xF0) - { - if ((ch & 0x08) == 0x08) - { - // Historic UTF8 - nby = 5; - #if defined(AURORA_UTF8_I_SUCK_AND_WANT_MODERN_UTF8_WANT_THROW) - AU_THROW_CONST_STRING("Illegal UTF8"); - #endif - } - else if ((ch & 0x0c) == 0x0c) - { - // Special UTF8 - nby = 6; - #if defined(AURORA_UTF8_I_SUCK_AND_WANT_MODERN_UTF8_WANT_THROW) - AU_THROW_CONST_STRING("Illegal UTF8"); - #endif - } - else if ((ch & 0x0e) == 0x0e) - { - // Illegal UTF8 - nby = 7; - #if defined(AURORA_UTF8_THROW_WHEN_STUPIDILY_ILLEGAL) - AU_THROW_CONST_STRING("Illegal UTF8"); - #endif - } - else if ((ch & 0x0f) == 0x0f) - { - // Not even logical - #if defined(AURORA_UTF8_THROW_WHEN_STUPIDILY_ILLEGAL) - AU_THROW_CONST_STRING("Illegal UTF8"); - #endif - break; - } - else - { - nby = 4; - } - } - else + nby = AuCodepointsNextLengthFromCharacter(ch); + if (!nby) { break; } @@ -1582,14 +1220,18 @@ static AuString & AuCodepointsReplaceAll(AuString &str, const AuROString &to) { AuUInt uStartPosition (0); +#if !defined(AU_LANG_CPP_17_) + std::string hack(to); +#endif + while ((uStartPosition = AuCodepointsFindByteOffsetUnsafe(str, from, uStartPosition)) != AuROString::npos) { #if defined(AU_LANG_CPP_17_) - str.replace(uStartPosition, from.length(), to); + str.replace(uStartPosition, from.Length(), to); #else - str.replace(uStartPosition, from.length(), std::string(to).c_str(), 0, to.length()); + str.replace(uStartPosition, from.Length(), hack.c_str(), 0, to.Length()); #endif - uStartPosition += to.length(); + uStartPosition += to.Length(); } return str; } @@ -1606,16 +1248,16 @@ static AuList AuCodepointsSplitString(const AuROString uPos = AuCodepointsFindByteOffsetUnsafe(str, delim, uPrev); if (uPos == AuROString::npos) { - uPos = str.length(); + uPos = str.Length(); } auto token = str.substr(uPrev, uPos - uPrev); if ((!token.empty()) && bIgnoreEmpty) { tokens.push_back(token); } - uPrev = uPos + delim.length(); + uPrev = uPos + delim.Length(); } - while (uPos < str.length() && uPrev < str.length()); + while (uPos < str.Length() && uPrev < str.Length()); return tokens; } diff --git a/Include/auROXTL/auStringUtils.hpp b/Include/auROXTL/auStringUtils.hpp index ca0de6d..a009d62 100644 --- a/Include/auROXTL/auStringUtils.hpp +++ b/Include/auROXTL/auStringUtils.hpp @@ -63,7 +63,6 @@ #include #include -#include #if !defined(AURORA_RUNTIME_TO_STRING) #define AURORA_RUNTIME_TO_STRING std::to_string diff --git a/Include/auROXTLUtils.hpp b/Include/auROXTLUtils.hpp index 873cbb8..e191002 100644 --- a/Include/auROXTLUtils.hpp +++ b/Include/auROXTLUtils.hpp @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -84,6 +85,9 @@ namespace __audetail #include +#include +#include + struct IAuNullDelegate { virtual void OnCall() = 0;