[+] Missing external class if iterative/codepoint length locale apis

[+] (internal) UTF8::IterateUTF8
This commit is contained in:
Reece Wilson 2022-08-13 06:01:41 +01:00
parent 592a93c19d
commit fe19a47a2a
3 changed files with 109 additions and 1 deletions

View File

@ -40,6 +40,14 @@ namespace Aurora::Locale::Encoding
AUKN_SYM AuUInt32 CountSJISLength (const Memory::MemoryViewRead &sjis, bool bytes = false); // codepoint = one character
AUKN_SYM AuUInt32 CountGBK16Length (const Memory::MemoryViewRead &gbk, bool bytes = false); // codepoint = at most; one GBK byte pair
AUKN_SYM AuUInt32 CountEncodedStringLength(ECodePage page, const Memory::MemoryViewRead &view, bool bytes = false);
AUKN_SYM AuUInt32 IterateUTF32 (const Memory::MemoryViewRead &utf32);
AUKN_SYM AuUInt32 IterateUTF16 (const Memory::MemoryViewRead &utf16);
AUKN_SYM AuUInt32 IterateUTF16BE(const Memory::MemoryViewRead &utf16);
AUKN_SYM AuUInt32 IterateUTF8 (const Memory::MemoryViewRead &utf8);
AUKN_SYM AuUInt32 IterateSJIS (const Memory::MemoryViewRead &sjis);
AUKN_SYM AuUInt32 IterateGBK16 (const Memory::MemoryViewRead &gbk);
AUKN_SYM AuUInt32 IterateEncodedString(ECodePage page, const Memory::MemoryViewRead &view);
}

View File

@ -132,4 +132,56 @@ namespace Aurora::Locale::Encoding
return {};
}
}
AUKN_SYM AuUInt32 IterateUTF32(const Memory::MemoryViewRead &utf32)
{
return utf32.length < 4 ? 0 : 4;
}
AUKN_SYM AuUInt32 IterateUTF16(const Memory::MemoryViewRead &utf16)
{
return UTF16::GetLenUC2CodePointLE(utf16.ToPointer(), utf16.length);
}
AUKN_SYM AuUInt32 IterateUTF16BE(const Memory::MemoryViewRead &utf16)
{
return UTF16::GetLenUC2CodePointBE(utf16.ToPointer(), utf16.length);
}
AUKN_SYM AuUInt32 IterateUTF8(const Memory::MemoryViewRead &utf8)
{
return UTF8::IterateUTF8(utf8);
}
AUKN_SYM AuUInt32 IterateSJIS(const Memory::MemoryViewRead &sjis)
{
return SJIS::GetLenSJISCodePoint(sjis.ToPointer(), sjis.length);
}
AUKN_SYM AuUInt32 IterateGBK16(const Memory::MemoryViewRead &gbk)
{
return GBK::GetLenGBKCodePoint(gbk.ToPointer(), gbk.length);
}
AUKN_SYM AuUInt32 IterateEncodedString(ECodePage page, const Memory::MemoryViewRead &view)
{
switch (page)
{
case ECodePage::eGBK:
return IterateGBK16(view);
case ECodePage::eUTF8:
return IterateUTF8(view);
case ECodePage::eSJIS:
return IterateSJIS(view);
case ECodePage::eUTF32:
case ECodePage::eUTF32BE:
return IterateUTF16(view);
case ECodePage::eUTF16:
return IterateUTF16(view);
case ECodePage::eUTF16BE:
return IterateUTF16BE(view);
default:
return {};
}
}
}

View File

@ -170,4 +170,52 @@ namespace Aurora::Locale::Encoding::UTF8
else
return 0; // I've seen 7 char support in some libs, i thought we should only go up to 6? i haven't seen a coeffient of 0x80000000 x 200h [-1] used in any of them
}
static AuUInt32 IterateUTF8(const Memory::MemoryViewRead &utf8)
{
const char *pItr = utf8.Begin<char>();
AuUInt32 nby = 0;
auto ch = *pItr;
unsigned int result = (ch & 0xF0);
if ((ch & 0x80) == 0)
{
nby = 1;
}
else if ((ch & 0xE0) == 0xC0)
{
nby = 2;
}
else if (result == 0xE0)
{
nby = 3;
}
else if (result == 0xF0)
{
if ((ch & 0x08) == 0x08)
{
nby = 5;
}
else if ((ch & 0x0c) == 0x0c)
{
nby = 6;
}
else
{
nby = 4;
}
}
else
{
return 0;
}
if (nby > utf8.length)
{
return 0;
}
return nby;
}
}