187 lines
5.8 KiB
C++
187 lines
5.8 KiB
C++
/***
|
|
Copyright (C) 2021 J Reece Wilson (a/k/a "Reece"). All rights reserved.
|
|
|
|
File: Encoding.cpp
|
|
Date: 2021-8-19
|
|
Author: Reece
|
|
***/
|
|
#include <Source/RuntimeInternal.hpp>
|
|
#include "Encoding.hpp"
|
|
|
|
namespace Aurora::Locale::Encoding
|
|
{
|
|
/// OLD ENCODING API
|
|
|
|
AUKN_SYM AuStreamReadWrittenPair_t EncodeUTF8(const AuMemoryViewRead &utf8, const AuMemoryViewWrite & binary, ECodePage page)
|
|
{
|
|
TextStreamEncoder re(page);
|
|
return re.DecodeUTF8(utf8.ptr, utf8.length, binary.ptr, binary.length);
|
|
}
|
|
|
|
AUKN_SYM AuStreamReadWrittenPair_t DecodeUTF8(const AuMemoryViewRead &binary, const AuMemoryViewWrite & utf8, ECodePage page)
|
|
{
|
|
TextStreamProcessor re(page);
|
|
return re.EncodeUTF8(binary.ptr, binary.length, utf8.ptr, utf8.length);
|
|
}
|
|
|
|
AUKN_SYM AuStreamReadWrittenPair_t DecodeUTF8(const AuMemoryViewRead &binary, AuString &out, ECodePage page)
|
|
{
|
|
auto aaa = DecodeUTF8(binary, {}, page);
|
|
if (!AuTryResize(out, aaa.second))
|
|
{
|
|
return {};
|
|
}
|
|
auto ret = DecodeUTF8(binary, Memory::MemoryViewWrite(out.data(), out.size()), page);
|
|
if (!AuTryResize(out, aaa.second))
|
|
{
|
|
return {};
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
AuStreamReadWrittenPair_t DecodeUTF8(void *binary, AuUInt32 binaryLength, AuString &out, ECodePage page)
|
|
{
|
|
auto aaa = DecodeUTF8(Memory::MemoryViewRead(binary, binaryLength), {}, page);
|
|
if (!AuTryResize(out, aaa.second))
|
|
{
|
|
return {};
|
|
}
|
|
auto ret = DecodeUTF8(Memory::MemoryViewRead(binary, binaryLength), Memory::MemoryViewWrite(out.data(), out.size()), page);
|
|
if (!AuTryResize(out, aaa.second))
|
|
{
|
|
return {};
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
/// Supporting full 6 byte UTF-8, copies or returns the available streams from @param utf8 to @param utf32
|
|
|
|
AUKN_SYM AuStreamReadWrittenPair_t ReadUTF8IntoUTF32ByteString(const AuMemoryViewRead &utf8, const AuMemoryViewWrite &utf32)
|
|
{
|
|
return UTF32::UTF8ToCp(utf8, utf32);
|
|
}
|
|
|
|
AUKN_SYM AuStreamReadWrittenPair_t ReadUTF32IntoUTF8ByteString(const AuMemoryViewRead &utf32, const AuMemoryViewWrite &utf8)
|
|
{
|
|
return UTF32::CPToUTF8(utf32, utf8);
|
|
}
|
|
|
|
/// Endian swap
|
|
|
|
AUKN_SYM void SwapUTF32(const AuMemoryViewWrite &utf32)
|
|
{
|
|
UTF32::SwapU32(utf32.Begin<AuUInt32>(), utf32.ToCount<AuUInt32>());
|
|
}
|
|
|
|
AUKN_SYM void SwapUTF16(const AuMemoryViewWrite &utf32)
|
|
{
|
|
UTF16::SwapU16(utf32.Begin<AuUInt32>(), utf32.ToCount<AuUInt32>());
|
|
}
|
|
|
|
/// String length awareness utilities
|
|
|
|
AUKN_SYM AuUInt32 CountUTF32Length(const AuMemoryViewRead &utf32, bool bytes)
|
|
{
|
|
return UTF32::Count32(utf32.ptr, utf32.length, bytes);
|
|
}
|
|
|
|
AUKN_SYM AuUInt32 CountUTF16Length(const AuMemoryViewRead &utf16, bool bytes)
|
|
{
|
|
return UTF16::Count16(utf16.ptr, utf16.length, bytes, true);
|
|
}
|
|
|
|
AUKN_SYM AuUInt32 CountUTF16BELength(const AuMemoryViewRead &utf16, bool bytes)
|
|
{
|
|
return UTF16::Count16(utf16.ptr, utf16.length, bytes, false);
|
|
}
|
|
|
|
AUKN_SYM AuUInt32 CountUTF8Length(const AuMemoryViewRead &utf8, bool bytes)
|
|
{
|
|
auto pair = ReadUTF8IntoUTF32ByteString(utf8, {});
|
|
return bytes ? pair.first : pair.second / sizeof(AuUInt32);
|
|
}
|
|
|
|
AUKN_SYM AuUInt32 CountSJISLength(const AuMemoryViewRead &sjis, bool bytes)
|
|
{
|
|
return SJIS::CountSJIS(sjis.ptr, sjis.length, bytes);
|
|
}
|
|
|
|
AUKN_SYM AuUInt32 CountGBK16Length(const AuMemoryViewRead &gbk, bool bytes)
|
|
{
|
|
return GBK::CountGbk(gbk.ptr, gbk.length, bytes);
|
|
}
|
|
|
|
AUKN_SYM AuUInt32 CountEncodedStringLength(ECodePage page, const Memory::MemoryViewRead &view, bool bytes)
|
|
{
|
|
switch (page)
|
|
{
|
|
case ECodePage::eGBK:
|
|
return CountGBK16Length(view, bytes);
|
|
case ECodePage::eUTF8:
|
|
return CountUTF8Length(view, bytes);
|
|
case ECodePage::eSJIS:
|
|
return CountSJISLength(view, bytes);
|
|
case ECodePage::eUTF32:
|
|
case ECodePage::eUTF32BE:
|
|
return CountUTF32Length(view, bytes);
|
|
case ECodePage::eUTF16:
|
|
return CountUTF16Length(view, bytes);
|
|
case ECodePage::eUTF16BE:
|
|
return CountUTF16BELength(view, bytes);
|
|
default:
|
|
return {};
|
|
}
|
|
}
|
|
|
|
AUKN_SYM AuUInt32 IterateUTF32(const Memory::MemoryViewRead &utf32)
|
|
{
|
|
return utf32.length < 4 ? 0 : 4;
|
|
}
|
|
|
|
AUKN_SYM AuUInt32 IterateUTF16(const Memory::MemoryViewRead &utf16)
|
|
{
|
|
return UTF16::GetLenUC2CodePointLE(utf16.ToPointer(), utf16.length);
|
|
}
|
|
|
|
AUKN_SYM AuUInt32 IterateUTF16BE(const Memory::MemoryViewRead &utf16)
|
|
{
|
|
return UTF16::GetLenUC2CodePointBE(utf16.ToPointer(), utf16.length);
|
|
}
|
|
|
|
AUKN_SYM AuUInt32 IterateUTF8(const Memory::MemoryViewRead &utf8)
|
|
{
|
|
return UTF8::IterateUTF8(utf8);
|
|
}
|
|
|
|
AUKN_SYM AuUInt32 IterateSJIS(const Memory::MemoryViewRead &sjis)
|
|
{
|
|
return SJIS::GetLenSJISCodePoint(sjis.ToPointer(), sjis.length);
|
|
}
|
|
|
|
AUKN_SYM AuUInt32 IterateGBK16(const Memory::MemoryViewRead &gbk)
|
|
{
|
|
return GBK::GetLenGBKCodePoint(gbk.ToPointer(), gbk.length);
|
|
}
|
|
|
|
AUKN_SYM AuUInt32 IterateEncodedString(ECodePage page, const Memory::MemoryViewRead &view)
|
|
{
|
|
switch (page)
|
|
{
|
|
case ECodePage::eGBK:
|
|
return IterateGBK16(view);
|
|
case ECodePage::eUTF8:
|
|
return IterateUTF8(view);
|
|
case ECodePage::eSJIS:
|
|
return IterateSJIS(view);
|
|
case ECodePage::eUTF32:
|
|
case ECodePage::eUTF32BE:
|
|
return IterateUTF32(view);
|
|
case ECodePage::eUTF16:
|
|
return IterateUTF16(view);
|
|
case ECodePage::eUTF16BE:
|
|
return IterateUTF16BE(view);
|
|
default:
|
|
return {};
|
|
}
|
|
}
|
|
} |