[+] Missing external class if iterative/codepoint length locale apis
[+] (internal) UTF8::IterateUTF8
This commit is contained in:
parent
592a93c19d
commit
fe19a47a2a
@ -40,6 +40,14 @@ namespace Aurora::Locale::Encoding
|
||||
AUKN_SYM AuUInt32 CountSJISLength (const Memory::MemoryViewRead &sjis, bool bytes = false); // codepoint = one character
|
||||
AUKN_SYM AuUInt32 CountGBK16Length (const Memory::MemoryViewRead &gbk, bool bytes = false); // codepoint = at most; one GBK byte pair
|
||||
|
||||
|
||||
AUKN_SYM AuUInt32 CountEncodedStringLength(ECodePage page, const Memory::MemoryViewRead &view, bool bytes = false);
|
||||
|
||||
AUKN_SYM AuUInt32 IterateUTF32 (const Memory::MemoryViewRead &utf32);
|
||||
AUKN_SYM AuUInt32 IterateUTF16 (const Memory::MemoryViewRead &utf16);
|
||||
AUKN_SYM AuUInt32 IterateUTF16BE(const Memory::MemoryViewRead &utf16);
|
||||
AUKN_SYM AuUInt32 IterateUTF8 (const Memory::MemoryViewRead &utf8);
|
||||
AUKN_SYM AuUInt32 IterateSJIS (const Memory::MemoryViewRead &sjis);
|
||||
AUKN_SYM AuUInt32 IterateGBK16 (const Memory::MemoryViewRead &gbk);
|
||||
|
||||
AUKN_SYM AuUInt32 IterateEncodedString(ECodePage page, const Memory::MemoryViewRead &view);
|
||||
}
|
@ -132,4 +132,56 @@ namespace Aurora::Locale::Encoding
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
AUKN_SYM AuUInt32 IterateUTF32(const Memory::MemoryViewRead &utf32)
|
||||
{
|
||||
return utf32.length < 4 ? 0 : 4;
|
||||
}
|
||||
|
||||
AUKN_SYM AuUInt32 IterateUTF16(const Memory::MemoryViewRead &utf16)
|
||||
{
|
||||
return UTF16::GetLenUC2CodePointLE(utf16.ToPointer(), utf16.length);
|
||||
}
|
||||
|
||||
AUKN_SYM AuUInt32 IterateUTF16BE(const Memory::MemoryViewRead &utf16)
|
||||
{
|
||||
return UTF16::GetLenUC2CodePointBE(utf16.ToPointer(), utf16.length);
|
||||
}
|
||||
|
||||
AUKN_SYM AuUInt32 IterateUTF8(const Memory::MemoryViewRead &utf8)
|
||||
{
|
||||
return UTF8::IterateUTF8(utf8);
|
||||
}
|
||||
|
||||
AUKN_SYM AuUInt32 IterateSJIS(const Memory::MemoryViewRead &sjis)
|
||||
{
|
||||
return SJIS::GetLenSJISCodePoint(sjis.ToPointer(), sjis.length);
|
||||
}
|
||||
|
||||
AUKN_SYM AuUInt32 IterateGBK16(const Memory::MemoryViewRead &gbk)
|
||||
{
|
||||
return GBK::GetLenGBKCodePoint(gbk.ToPointer(), gbk.length);
|
||||
}
|
||||
|
||||
AUKN_SYM AuUInt32 IterateEncodedString(ECodePage page, const Memory::MemoryViewRead &view)
|
||||
{
|
||||
switch (page)
|
||||
{
|
||||
case ECodePage::eGBK:
|
||||
return IterateGBK16(view);
|
||||
case ECodePage::eUTF8:
|
||||
return IterateUTF8(view);
|
||||
case ECodePage::eSJIS:
|
||||
return IterateSJIS(view);
|
||||
case ECodePage::eUTF32:
|
||||
case ECodePage::eUTF32BE:
|
||||
return IterateUTF16(view);
|
||||
case ECodePage::eUTF16:
|
||||
return IterateUTF16(view);
|
||||
case ECodePage::eUTF16BE:
|
||||
return IterateUTF16BE(view);
|
||||
default:
|
||||
return {};
|
||||
}
|
||||
}
|
||||
}
|
@ -170,4 +170,52 @@ namespace Aurora::Locale::Encoding::UTF8
|
||||
else
|
||||
return 0; // I've seen 7 char support in some libs, i thought we should only go up to 6? i haven't seen a coeffient of 0x80000000 x 200h [-1] used in any of them
|
||||
}
|
||||
|
||||
static AuUInt32 IterateUTF8(const Memory::MemoryViewRead &utf8)
|
||||
{
|
||||
const char *pItr = utf8.Begin<char>();
|
||||
AuUInt32 nby = 0;
|
||||
|
||||
auto ch = *pItr;
|
||||
unsigned int result = (ch & 0xF0);
|
||||
|
||||
if ((ch & 0x80) == 0)
|
||||
{
|
||||
nby = 1;
|
||||
}
|
||||
else if ((ch & 0xE0) == 0xC0)
|
||||
{
|
||||
nby = 2;
|
||||
}
|
||||
else if (result == 0xE0)
|
||||
{
|
||||
nby = 3;
|
||||
}
|
||||
else if (result == 0xF0)
|
||||
{
|
||||
if ((ch & 0x08) == 0x08)
|
||||
{
|
||||
nby = 5;
|
||||
}
|
||||
else if ((ch & 0x0c) == 0x0c)
|
||||
{
|
||||
nby = 6;
|
||||
}
|
||||
else
|
||||
{
|
||||
nby = 4;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (nby > utf8.length)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
return nby;
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user