From 2a0dc1f064a364d8777085f396ce102679445796 Mon Sep 17 00:00:00 2001 From: Jamie Reece Wilson Date: Fri, 19 Apr 2024 23:12:39 +0100 Subject: [PATCH] [+] AuCodepointsFindPreviousValidByteOffsetFromOffset [+] AuCodepointsFindPreviousValidByteOffsetFromByteOffset [+] AuCodepointsFindCodepointOffset --- Include/auROXTL/auStringUtils.hpp | 125 +++++++++++++++++++++++++++++- 1 file changed, 121 insertions(+), 4 deletions(-) diff --git a/Include/auROXTL/auStringUtils.hpp b/Include/auROXTL/auStringUtils.hpp index 388f6f5..f3dd997 100644 --- a/Include/auROXTL/auStringUtils.hpp +++ b/Include/auROXTL/auStringUtils.hpp @@ -15,9 +15,10 @@ Implements: AuCodepointsTransform, AuCodepointsTransformASCIIOp, AuCodepointsToLower, AuCodepointsToUpper, AuCodepointsCount, AuCodepointsNextLength, AuCodepointsDecode, AuCodepointsEncodeInto, AuCodepointsGetByteOffset(CodepointOffset_t),AuCodepointsGetByteLength(CodepointOffset_t), - AuCodepointsFindByteOffset[Unsafe], AuCodepointsFindCodepointOffset, + AuCodepointsFindByteOffset[Unsafe], AuCodepointsFindCodepointOffset(view, CodepointOffset_t), AuCodepointsFindCodepointOffset(CodepointByteOffset_t), AuCodepointsContains, - AuCodepointsReplaceAll, AuCodepointsSplitString (views) + AuCodepointsReplaceAll, AuCodepointsSplitString (views), + AuCodepointsFindPreviousValidByteOffsetFromOffset, AuCodepointsFindPreviousValidByteOffsetFromByteOffset For translating between locales (including utf8-32), defer to AuLocale (Aurora::Locale) in the Aurora Runtime. ***/ @@ -591,7 +592,7 @@ static CodepointByteOffset_t AuCodepointsFindByteOffset(const AuROString &in, const AuROString &find, CodepointByteOffset_t uStartPosition = {}) { - AuUInt uCounter {}; + AuUInt uCounter = 0; auto uLength = in.length(); auto uFindLength = find.length(); @@ -670,7 +671,7 @@ static CodepointByteOffset_t AuCodepointsFindByteOffsetUnsafe(const AuROString & const AuROString &find, CodepointByteOffset_t uStartPosition = {}) { - AuUInt uCounter {}; + AuUInt uCounter = 0; auto uLength = in.length(); auto uFindLength = find.length(); @@ -744,6 +745,122 @@ static CodepointByteOffset_t AuCodepointsFindByteOffsetUnsafe(const AuROString & return AuROString::npos; } +static CodepointByteOffset_t AuCodepointsFindPreviousValidByteOffsetFromOffset(const AuROString &in, + CodepointOffset_t uStartPosition = {}) +{ + AuUInt uCounter = 0; + auto uLength = in.length(); + + const char *pStart = in.data(); + const char *pItr = pStart; + const char *pEnd = pStart + uLength; + + if (uStartPosition == 0) + { + return AuROString::npos; + } + + while (pItr != pEnd) + { + AuUInt32 nby {}; + auto ch = *pItr; + unsigned int result = (ch & 0xF0); + + if ((ch & 0x80) == 0) + { + nby = 1; + } + else if ((ch & 0xE0) == 0xC0) + { + nby = 2; + } + else if (result == 0xE0) + { + nby = 3; + } + else if (result == 0xF0) + { + if ((ch & 0x08) == 0x08) + { + // Special/Historic UTF8 + nby = 5; + } + else if ((ch & 0x0c) == 0x0c) + { + // Special/Historic UTF8 + nby = 6; + } + else + { + nby = 4; + } + } + else + { + break; + } + + if (pItr + nby > pEnd) + { + break; + } + + uCounter++; + + if (uCounter == uStartPosition) + { + return CodepointByteOffset_t(pItr - pStart); + } + + pItr += nby; + } + + return AuROString::npos; +} + +static CodepointByteOffset_t AuCodepointsFindPreviousValidByteOffsetFromByteOffset(const AuROString &in, + CodepointByteOffset_t uStartPosition = {}) +{ + const char * pStart = in.data(); + const char * pItr = pStart + uStartPosition - 1; + + if (uStartPosition == 0) + { + return AuROString::npos; + } + else if ((*pItr & 0x80) == 0) + { + return uStartPosition - 1; + } + else + { + while (pItr != pStart) + { + if ((*pItr & 0xC0U) == 0x80U) + { + pItr--; + } + else + { + break; + } + } + + if ((*pItr & 0xC0U) != 0xC0U) + { + return AuROString::npos; + } + + return CodepointByteOffset_t(pItr - pStart); + } +} + +static CodepointOffset_t AuCodepointsFindCodepointOffset(const AuROString &in, + CodepointByteOffset_t uBytePosition) +{ + return AuCodepointsCount(in.substr(0, uBytePosition)); +} + static CodepointOffset_t AuCodepointsFindCodepointOffset(const AuROString &in, const AuROString &find, CodepointOffset_t uStartPosition = {})