[+] Util: AuCodepointsForEach

This commit is contained in:
Reece Wilson 2024-06-14 14:14:51 +01:00
parent bea0f5c8f2
commit cd5dec55b2

View File

@ -30,6 +30,12 @@
// offset in codepoints
/* using CodepointOffset_t = AuUInt; */
#if defined(AURORA_I_SUCK_AND_WANT_MODERN_UTF8)
static const AuUInt8 kAuCodepointUTF8MaxBytes = 4;
#else
static const AuUInt8 kAuCodepointUTF8MaxBytes = 6;
#endif
static auline constexpr bool AuStringContains(const AuROString &value, const AuROString &subpattern)
{
return value.find(subpattern) != AuROString::npos;
@ -296,7 +302,7 @@ static AuList<AuUInt32> AuCodepointsDecode(const AuROString &in)
{
}
if (nby > 6)
if (nby > kAuCodepointUTF8MaxBytes)
{
return {};
}
@ -420,7 +426,7 @@ static AuString AuCodepointsTransform(T op, const AuROString &in)
{
}
if (nby > 6)
if (nby > kAuCodepointUTF8MaxBytes)
{
return {};
}
@ -453,6 +459,81 @@ static AuString AuCodepointsTransform(T op, const AuROString &in)
return ret;
}
template <class T>
static bool AuCodepointsForEach(T op, const AuROString &in)
{
if (in.empty())
{
return true;
}
auto uLength = in.length();
const char *pItr = in.data();
const char *pEnd = pItr + uLength;
while (pItr < pEnd)
{
AuUInt32 c {};
if ((c = *pItr) <= 0x7FU)
{
++pItr;
}
else
{
AuUInt32 nby {};
if ((*pItr & 0xC0U) != 0xC0U)
{
return false;
}
for (AuUInt8 b = *pItr; (b & 0x80U) != 0; b <<= 1, ++nby)
{
}
if (nby > kAuCodepointUTF8MaxBytes)
{
return false;
}
if (AuUInt(pEnd - pItr) < AuUInt(nby))
{
return false;
}
c = *pItr & (AuUInt8(0xFFU) >> (nby + 1));
for (AuUInt32 i = 1; i < nby; ++i)
{
if ((pItr[i] & 0xC0U) != 0x80U)
{
return {};
}
c = (c << 6) | (pItr[i] & 0x3FU);
}
pItr += nby;
}
if constexpr (AuIsSame_v<AuResultOf_t<T, AuUInt32>, bool>)
{
if (!op(c))
{
return false;
}
}
else
{
op(c);
}
}
return true;
}
static auline CodepointByteOffset_t AuCodepointsGetByteOffset(const AuROString &in,
CodepointOffset_t uCodepointIndex)
{