From c522e9bf6ab2013912bb12dcac08615b4554132b Mon Sep 17 00:00:00 2001 From: Jamie Reece Wilson Date: Sat, 31 Aug 2024 21:21:07 +0100 Subject: [PATCH] [+] AuCodepointsDecodeOne --- Include/auROXTL/auStringUtils.hpp | 105 ++++++++++++++++++++++++------ 1 file changed, 86 insertions(+), 19 deletions(-) diff --git a/Include/auROXTL/auStringUtils.hpp b/Include/auROXTL/auStringUtils.hpp index 40ff96a..391ebef 100644 --- a/Include/auROXTL/auStringUtils.hpp +++ b/Include/auROXTL/auStringUtils.hpp @@ -13,7 +13,7 @@ Implements: AuStringContains, AuEndsWith, AuStartsWith, AuReplaceAll, AuSplitString (views), AuSplitStringLegacy (returns an array of strings instead of views) AuToLower(char), AuToUpper(char), AuToLower(view), AuToUpper(view). Implements: AuCodepointsTransform, AuCodepointsTransformASCIIOp, AuCodepointsForEach, AuCodepointsToLower, AuCodepointsToUpper, - AuCodepointsCount, AuCodepointsNextLength, AuCodepointsDecode, AuCodepointsEncodeInto, + AuCodepointsCount, AuCodepointsNextLength, AuCodepointsDecodeOne, AuCodepointsDecode, AuCodepointsEncodeInto, AuCodepointsGetByteOffset(CodepointOffset_t),AuCodepointsGetByteLength(CodepointOffset_t), AuCodepointsFindByteOffset[Unsafe], AuCodepointsFindCodepointOffset(view, CodepointOffset_t), AuCodepointsFindCodepointOffset(CodepointByteOffset_t), AuCodepointsContains, @@ -171,7 +171,7 @@ static auline AuString AuCodepointsTransformASCIIOp(T op, const AuROString &in) return ret; } -static auline CodepointOffset_t AuCodepointsCount(const AuROString &in) +static auline constexpr CodepointOffset_t AuCodepointsCount(const AuROString &in) { CodepointOffset_t uCounter {}; auto uLength = in.length(); @@ -253,7 +253,7 @@ static auline CodepointOffset_t AuCodepointsCount(const AuROString &in) return uCounter; } -static auline CodepointByteOffset_t AuCodepointsNextLength(const AuROString &in) +static auline constexpr CodepointByteOffset_t AuCodepointsNextLength(const AuROString &in) { if (in.length()) { @@ -316,17 +316,17 @@ static auline CodepointByteOffset_t AuCodepointsNextLength(const AuROString &in) return 0; } -static auline bool AuIsAlpha(char c) +static auline constexpr bool AuIsAlpha(char c) { return (c) && (((unsigned char)c | 0x20) - 'a' < 26); } -static auline char AuToLower(char c) +static auline constexpr char AuToLower(char c) { return AuIsAlpha(c) ? c | 0x20 : c; } -static auline char AuToUpper(char c) +static auline constexpr char AuToUpper(char c) { return AuIsAlpha(c) ? c & ~0x20 : c; } @@ -351,6 +351,73 @@ static auline AuString AuToUpper(const AuROString &in) return AuCodepointsToUpper(in); } +static constexpr AuOptional AuCodepointsDecodeOne(const AuROString &in) +{ + if (in.empty()) + { + return {}; + } + + auto uLength = in.length(); + + const char *pItr = in.data(); + const char *pEnd = pItr + uLength; + + while (pItr < pEnd) + { + AuUInt32 c {}; + + if ((c = *pItr) <= 0x7FU) + { + ++pItr; + } + else + { + AuUInt32 nby {}; + + if ((*pItr & 0xC0U) != 0xC0U) + { + return {}; + } + + for (AuUInt8 b = *pItr; (b & 0x80U) != 0; b <<= 1, ++nby) + { + } + + if (nby > kAuCodepointUTF8MaxBytes) + { + #if defined(AURORA_UTF8_I_SUCK_AND_WANT_MODERN_UTF8_WANT_THROW) + AU_THROW_CONST_STRING("Illegal UTF8"); + #endif + return {}; + } + + if (AuUInt(pEnd - pItr) < AuUInt(nby)) + { + return {}; + } + + c = *pItr & (AuUInt8(0xFFU) >> (nby + 1)); + + for (AuUInt32 i = 1; i < nby; ++i) + { + if ((pItr[i] & 0xC0U) != 0x80U) + { + return {}; + } + + c = (c << 6) | (pItr[i] & 0x3FU); + } + + pItr += nby; + } + + return c; + } + + return {}; +} + static AuList AuCodepointsDecode(const AuROString &in) { AuList ret; @@ -696,8 +763,8 @@ static bool AuCodepointsIsEqualIgnoreCase(const AuROString &inA, return true; } -static auline CodepointByteOffset_t AuCodepointsGetByteOffset(const AuROString &in, - CodepointOffset_t uCodepointIndex) +static auline constexpr CodepointByteOffset_t AuCodepointsGetByteOffset(const AuROString &in, + CodepointOffset_t uCodepointIndex) { AuUInt uCounter {}; auto uLength = in.length(); @@ -785,8 +852,8 @@ static auline CodepointByteOffset_t AuCodepointsGetByteOffset(const AuROString & return AuROString::npos; } -static auline CodepointByteOffset_t AuCodepointsGetByteLength(const AuROString &in, - CodepointOffset_t uCodepointIndex) +static auline constexpr CodepointByteOffset_t AuCodepointsGetByteLength(const AuROString &in, + CodepointOffset_t uCodepointIndex) { AuUInt uCounter {}; auto uLength = in.length(); @@ -1070,8 +1137,8 @@ static constexpr CodepointByteOffset_t AuCodepointsFindByteOffsetUnsafe(const Au return AuROString::npos; } -static CodepointByteOffset_t AuCodepointsFindPreviousValidByteOffsetFromOffset(const AuROString &in, - CodepointOffset_t uStartPosition = {}) +static constexpr CodepointByteOffset_t AuCodepointsFindPreviousValidByteOffsetFromOffset(const AuROString &in, + CodepointOffset_t uStartPosition = {}) { AuUInt uCounter = 0; auto uLength = in.length(); @@ -1165,8 +1232,8 @@ static CodepointByteOffset_t AuCodepointsFindPreviousValidByteOffsetFromOffset(c return AuROString::npos; } -static CodepointByteOffset_t AuCodepointsFindPreviousValidByteOffsetFromByteOffset(const AuROString &in, - CodepointByteOffset_t uStartPosition = {}) +static constexpr CodepointByteOffset_t AuCodepointsFindPreviousValidByteOffsetFromByteOffset(const AuROString &in, + CodepointByteOffset_t uStartPosition = {}) { const char * pStart = in.data(); const char * pItr = pStart + uStartPosition - 1; @@ -1202,8 +1269,8 @@ static CodepointByteOffset_t AuCodepointsFindPreviousValidByteOffsetFromByteOffs } } -static CodepointOffset_t AuCodepointsFindCodepointOffset(const AuROString &in, - CodepointByteOffset_t uBytePosition) +static constexpr CodepointOffset_t AuCodepointsFindCodepointOffset(const AuROString &in, + CodepointByteOffset_t uBytePosition) { return AuCodepointsCount(in.substr(0, uBytePosition)); } @@ -1437,7 +1504,7 @@ static bool AuCodepointsEndsWithEqualIgnoreCase(const AuROString &inA, return true; } -static AuUInt AuCodepointsReverseIterate(const AuROString &string) +static constexpr AuUInt AuCodepointsReverseIterate(const AuROString &string) { auto uLastValid = AuCodepointsFindPreviousValidByteOffsetFromByteOffset(string, string.Size()); if (uLastValid == AuROString::npos) @@ -1448,7 +1515,7 @@ static AuUInt AuCodepointsReverseIterate(const AuROString &string) return string.Size() - uLastValid; } -static AuROString AuCodepointsReverseIterateSubStrPrefixView(const AuROString &string) +static constexpr AuROString AuCodepointsReverseIterateSubStrPrefixView(const AuROString &string) { auto uOffset = AuCodepointsReverseIterate(string); if (uOffset == AuROString::npos) @@ -1459,7 +1526,7 @@ static AuROString AuCodepointsReverseIterateSubStrPrefixView(const AuROString &s return string.RemoveSuffix(uOffset); } -static AuROString AuCodepointsReverseIterateSubStrSuffixView(const AuROString &string) +static constexpr AuROString AuCodepointsReverseIterateSubStrSuffixView(const AuROString &string) { auto uLastValid = AuCodepointsFindPreviousValidByteOffsetFromByteOffset(string, string.Size()); if (uLastValid == AuROString::npos)