[+] AuCodepointsFindPreviousValidByteOffsetFromOffset

[+] AuCodepointsFindPreviousValidByteOffsetFromByteOffset
[+] AuCodepointsFindCodepointOffset
This commit is contained in:
Reece Wilson 2024-04-19 23:12:39 +01:00
parent 72853a54ab
commit 2a0dc1f064

View File

@ -15,9 +15,10 @@
Implements: AuCodepointsTransform, AuCodepointsTransformASCIIOp, AuCodepointsToLower, AuCodepointsToUpper, Implements: AuCodepointsTransform, AuCodepointsTransformASCIIOp, AuCodepointsToLower, AuCodepointsToUpper,
AuCodepointsCount, AuCodepointsNextLength, AuCodepointsDecode, AuCodepointsEncodeInto, AuCodepointsCount, AuCodepointsNextLength, AuCodepointsDecode, AuCodepointsEncodeInto,
AuCodepointsGetByteOffset(CodepointOffset_t),AuCodepointsGetByteLength(CodepointOffset_t), AuCodepointsGetByteOffset(CodepointOffset_t),AuCodepointsGetByteLength(CodepointOffset_t),
AuCodepointsFindByteOffset[Unsafe], AuCodepointsFindCodepointOffset, AuCodepointsFindByteOffset[Unsafe], AuCodepointsFindCodepointOffset(view, CodepointOffset_t), AuCodepointsFindCodepointOffset(CodepointByteOffset_t),
AuCodepointsContains, AuCodepointsContains,
AuCodepointsReplaceAll, AuCodepointsSplitString (views) AuCodepointsReplaceAll, AuCodepointsSplitString (views),
AuCodepointsFindPreviousValidByteOffsetFromOffset, AuCodepointsFindPreviousValidByteOffsetFromByteOffset
For translating between locales (including utf8-32), defer to AuLocale (Aurora::Locale) in the Aurora Runtime. For translating between locales (including utf8-32), defer to AuLocale (Aurora::Locale) in the Aurora Runtime.
***/ ***/
@ -591,7 +592,7 @@ static CodepointByteOffset_t AuCodepointsFindByteOffset(const AuROString &in,
const AuROString &find, const AuROString &find,
CodepointByteOffset_t uStartPosition = {}) CodepointByteOffset_t uStartPosition = {})
{ {
AuUInt uCounter {}; AuUInt uCounter = 0;
auto uLength = in.length(); auto uLength = in.length();
auto uFindLength = find.length(); auto uFindLength = find.length();
@ -670,7 +671,7 @@ static CodepointByteOffset_t AuCodepointsFindByteOffsetUnsafe(const AuROString &
const AuROString &find, const AuROString &find,
CodepointByteOffset_t uStartPosition = {}) CodepointByteOffset_t uStartPosition = {})
{ {
AuUInt uCounter {}; AuUInt uCounter = 0;
auto uLength = in.length(); auto uLength = in.length();
auto uFindLength = find.length(); auto uFindLength = find.length();
@ -744,6 +745,122 @@ static CodepointByteOffset_t AuCodepointsFindByteOffsetUnsafe(const AuROString &
return AuROString::npos; return AuROString::npos;
} }
static CodepointByteOffset_t AuCodepointsFindPreviousValidByteOffsetFromOffset(const AuROString &in,
CodepointOffset_t uStartPosition = {})
{
AuUInt uCounter = 0;
auto uLength = in.length();
const char *pStart = in.data();
const char *pItr = pStart;
const char *pEnd = pStart + uLength;
if (uStartPosition == 0)
{
return AuROString::npos;
}
while (pItr != pEnd)
{
AuUInt32 nby {};
auto ch = *pItr;
unsigned int result = (ch & 0xF0);
if ((ch & 0x80) == 0)
{
nby = 1;
}
else if ((ch & 0xE0) == 0xC0)
{
nby = 2;
}
else if (result == 0xE0)
{
nby = 3;
}
else if (result == 0xF0)
{
if ((ch & 0x08) == 0x08)
{
// Special/Historic UTF8
nby = 5;
}
else if ((ch & 0x0c) == 0x0c)
{
// Special/Historic UTF8
nby = 6;
}
else
{
nby = 4;
}
}
else
{
break;
}
if (pItr + nby > pEnd)
{
break;
}
uCounter++;
if (uCounter == uStartPosition)
{
return CodepointByteOffset_t(pItr - pStart);
}
pItr += nby;
}
return AuROString::npos;
}
static CodepointByteOffset_t AuCodepointsFindPreviousValidByteOffsetFromByteOffset(const AuROString &in,
CodepointByteOffset_t uStartPosition = {})
{
const char * pStart = in.data();
const char * pItr = pStart + uStartPosition - 1;
if (uStartPosition == 0)
{
return AuROString::npos;
}
else if ((*pItr & 0x80) == 0)
{
return uStartPosition - 1;
}
else
{
while (pItr != pStart)
{
if ((*pItr & 0xC0U) == 0x80U)
{
pItr--;
}
else
{
break;
}
}
if ((*pItr & 0xC0U) != 0xC0U)
{
return AuROString::npos;
}
return CodepointByteOffset_t(pItr - pStart);
}
}
static CodepointOffset_t AuCodepointsFindCodepointOffset(const AuROString &in,
CodepointByteOffset_t uBytePosition)
{
return AuCodepointsCount(in.substr(0, uBytePosition));
}
static CodepointOffset_t AuCodepointsFindCodepointOffset(const AuROString &in, static CodepointOffset_t AuCodepointsFindCodepointOffset(const AuROString &in,
const AuROString &find, const AuROString &find,
CodepointOffset_t uStartPosition = {}) CodepointOffset_t uStartPosition = {})