[+] AuCodepointsDecodeOne

This commit is contained in:
Reece Wilson 2024-08-31 21:21:07 +01:00
parent 3ce4184836
commit c522e9bf6a

View File

@ -13,7 +13,7 @@
Implements: AuStringContains, AuEndsWith, AuStartsWith, AuReplaceAll, AuSplitString (views), AuSplitStringLegacy (returns an array of strings instead of views) Implements: AuStringContains, AuEndsWith, AuStartsWith, AuReplaceAll, AuSplitString (views), AuSplitStringLegacy (returns an array of strings instead of views)
AuToLower(char), AuToUpper(char), AuToLower(view), AuToUpper(view). AuToLower(char), AuToUpper(char), AuToLower(view), AuToUpper(view).
Implements: AuCodepointsTransform, AuCodepointsTransformASCIIOp, AuCodepointsForEach, AuCodepointsToLower, AuCodepointsToUpper, Implements: AuCodepointsTransform, AuCodepointsTransformASCIIOp, AuCodepointsForEach, AuCodepointsToLower, AuCodepointsToUpper,
AuCodepointsCount, AuCodepointsNextLength, AuCodepointsDecode, AuCodepointsEncodeInto, AuCodepointsCount, AuCodepointsNextLength, AuCodepointsDecodeOne, AuCodepointsDecode, AuCodepointsEncodeInto,
AuCodepointsGetByteOffset(CodepointOffset_t),AuCodepointsGetByteLength(CodepointOffset_t), AuCodepointsGetByteOffset(CodepointOffset_t),AuCodepointsGetByteLength(CodepointOffset_t),
AuCodepointsFindByteOffset[Unsafe], AuCodepointsFindCodepointOffset(view, CodepointOffset_t), AuCodepointsFindCodepointOffset(CodepointByteOffset_t), AuCodepointsFindByteOffset[Unsafe], AuCodepointsFindCodepointOffset(view, CodepointOffset_t), AuCodepointsFindCodepointOffset(CodepointByteOffset_t),
AuCodepointsContains, AuCodepointsContains,
@ -171,7 +171,7 @@ static auline AuString AuCodepointsTransformASCIIOp(T op, const AuROString &in)
return ret; return ret;
} }
static auline CodepointOffset_t AuCodepointsCount(const AuROString &in) static auline constexpr CodepointOffset_t AuCodepointsCount(const AuROString &in)
{ {
CodepointOffset_t uCounter {}; CodepointOffset_t uCounter {};
auto uLength = in.length(); auto uLength = in.length();
@ -253,7 +253,7 @@ static auline CodepointOffset_t AuCodepointsCount(const AuROString &in)
return uCounter; return uCounter;
} }
static auline CodepointByteOffset_t AuCodepointsNextLength(const AuROString &in) static auline constexpr CodepointByteOffset_t AuCodepointsNextLength(const AuROString &in)
{ {
if (in.length()) if (in.length())
{ {
@ -316,17 +316,17 @@ static auline CodepointByteOffset_t AuCodepointsNextLength(const AuROString &in)
return 0; return 0;
} }
static auline bool AuIsAlpha(char c) static auline constexpr bool AuIsAlpha(char c)
{ {
return (c) && (((unsigned char)c | 0x20) - 'a' < 26); return (c) && (((unsigned char)c | 0x20) - 'a' < 26);
} }
static auline char AuToLower(char c) static auline constexpr char AuToLower(char c)
{ {
return AuIsAlpha(c) ? c | 0x20 : c; return AuIsAlpha(c) ? c | 0x20 : c;
} }
static auline char AuToUpper(char c) static auline constexpr char AuToUpper(char c)
{ {
return AuIsAlpha(c) ? c & ~0x20 : c; return AuIsAlpha(c) ? c & ~0x20 : c;
} }
@ -351,6 +351,73 @@ static auline AuString AuToUpper(const AuROString &in)
return AuCodepointsToUpper(in); return AuCodepointsToUpper(in);
} }
static constexpr AuOptional<AuUInt32> AuCodepointsDecodeOne(const AuROString &in)
{
if (in.empty())
{
return {};
}
auto uLength = in.length();
const char *pItr = in.data();
const char *pEnd = pItr + uLength;
while (pItr < pEnd)
{
AuUInt32 c {};
if ((c = *pItr) <= 0x7FU)
{
++pItr;
}
else
{
AuUInt32 nby {};
if ((*pItr & 0xC0U) != 0xC0U)
{
return {};
}
for (AuUInt8 b = *pItr; (b & 0x80U) != 0; b <<= 1, ++nby)
{
}
if (nby > kAuCodepointUTF8MaxBytes)
{
#if defined(AURORA_UTF8_I_SUCK_AND_WANT_MODERN_UTF8_WANT_THROW)
AU_THROW_CONST_STRING("Illegal UTF8");
#endif
return {};
}
if (AuUInt(pEnd - pItr) < AuUInt(nby))
{
return {};
}
c = *pItr & (AuUInt8(0xFFU) >> (nby + 1));
for (AuUInt32 i = 1; i < nby; ++i)
{
if ((pItr[i] & 0xC0U) != 0x80U)
{
return {};
}
c = (c << 6) | (pItr[i] & 0x3FU);
}
pItr += nby;
}
return c;
}
return {};
}
static AuList<AuUInt32> AuCodepointsDecode(const AuROString &in) static AuList<AuUInt32> AuCodepointsDecode(const AuROString &in)
{ {
AuList<AuUInt32> ret; AuList<AuUInt32> ret;
@ -696,7 +763,7 @@ static bool AuCodepointsIsEqualIgnoreCase(const AuROString &inA,
return true; return true;
} }
static auline CodepointByteOffset_t AuCodepointsGetByteOffset(const AuROString &in, static auline constexpr CodepointByteOffset_t AuCodepointsGetByteOffset(const AuROString &in,
CodepointOffset_t uCodepointIndex) CodepointOffset_t uCodepointIndex)
{ {
AuUInt uCounter {}; AuUInt uCounter {};
@ -785,7 +852,7 @@ static auline CodepointByteOffset_t AuCodepointsGetByteOffset(const AuROString &
return AuROString::npos; return AuROString::npos;
} }
static auline CodepointByteOffset_t AuCodepointsGetByteLength(const AuROString &in, static auline constexpr CodepointByteOffset_t AuCodepointsGetByteLength(const AuROString &in,
CodepointOffset_t uCodepointIndex) CodepointOffset_t uCodepointIndex)
{ {
AuUInt uCounter {}; AuUInt uCounter {};
@ -1070,7 +1137,7 @@ static constexpr CodepointByteOffset_t AuCodepointsFindByteOffsetUnsafe(const Au
return AuROString::npos; return AuROString::npos;
} }
static CodepointByteOffset_t AuCodepointsFindPreviousValidByteOffsetFromOffset(const AuROString &in, static constexpr CodepointByteOffset_t AuCodepointsFindPreviousValidByteOffsetFromOffset(const AuROString &in,
CodepointOffset_t uStartPosition = {}) CodepointOffset_t uStartPosition = {})
{ {
AuUInt uCounter = 0; AuUInt uCounter = 0;
@ -1165,7 +1232,7 @@ static CodepointByteOffset_t AuCodepointsFindPreviousValidByteOffsetFromOffset(c
return AuROString::npos; return AuROString::npos;
} }
static CodepointByteOffset_t AuCodepointsFindPreviousValidByteOffsetFromByteOffset(const AuROString &in, static constexpr CodepointByteOffset_t AuCodepointsFindPreviousValidByteOffsetFromByteOffset(const AuROString &in,
CodepointByteOffset_t uStartPosition = {}) CodepointByteOffset_t uStartPosition = {})
{ {
const char * pStart = in.data(); const char * pStart = in.data();
@ -1202,7 +1269,7 @@ static CodepointByteOffset_t AuCodepointsFindPreviousValidByteOffsetFromByteOffs
} }
} }
static CodepointOffset_t AuCodepointsFindCodepointOffset(const AuROString &in, static constexpr CodepointOffset_t AuCodepointsFindCodepointOffset(const AuROString &in,
CodepointByteOffset_t uBytePosition) CodepointByteOffset_t uBytePosition)
{ {
return AuCodepointsCount(in.substr(0, uBytePosition)); return AuCodepointsCount(in.substr(0, uBytePosition));
@ -1437,7 +1504,7 @@ static bool AuCodepointsEndsWithEqualIgnoreCase(const AuROString &inA,
return true; return true;
} }
static AuUInt AuCodepointsReverseIterate(const AuROString &string) static constexpr AuUInt AuCodepointsReverseIterate(const AuROString &string)
{ {
auto uLastValid = AuCodepointsFindPreviousValidByteOffsetFromByteOffset(string, string.Size()); auto uLastValid = AuCodepointsFindPreviousValidByteOffsetFromByteOffset(string, string.Size());
if (uLastValid == AuROString::npos) if (uLastValid == AuROString::npos)
@ -1448,7 +1515,7 @@ static AuUInt AuCodepointsReverseIterate(const AuROString &string)
return string.Size() - uLastValid; return string.Size() - uLastValid;
} }
static AuROString AuCodepointsReverseIterateSubStrPrefixView(const AuROString &string) static constexpr AuROString AuCodepointsReverseIterateSubStrPrefixView(const AuROString &string)
{ {
auto uOffset = AuCodepointsReverseIterate(string); auto uOffset = AuCodepointsReverseIterate(string);
if (uOffset == AuROString::npos) if (uOffset == AuROString::npos)
@ -1459,7 +1526,7 @@ static AuROString AuCodepointsReverseIterateSubStrPrefixView(const AuROString &s
return string.RemoveSuffix(uOffset); return string.RemoveSuffix(uOffset);
} }
static AuROString AuCodepointsReverseIterateSubStrSuffixView(const AuROString &string) static constexpr AuROString AuCodepointsReverseIterateSubStrSuffixView(const AuROString &string)
{ {
auto uLastValid = AuCodepointsFindPreviousValidByteOffsetFromByteOffset(string, string.Size()); auto uLastValid = AuCodepointsFindPreviousValidByteOffsetFromByteOffset(string, string.Size());
if (uLastValid == AuROString::npos) if (uLastValid == AuROString::npos)