AuroraRuntime/Source/Locale/Encoding/Encoding.cpp

146 lines
5.3 KiB
C++
Raw Normal View History

2021-09-06 10:58:08 +00:00
/***
Copyright (C) 2021 J Reece Wilson (a/k/a "Reece"). All rights reserved.
File: Encoding.cpp
Date: 2021-8-19
Author: Reece
***/
2021-09-30 14:57:41 +00:00
#include <Source/RuntimeInternal.hpp>
2021-09-06 10:58:08 +00:00
#include "../Locale.hpp"
#include "Encoding.hpp"
namespace Aurora::Locale::Encoding
{
AUKN_SYM BOM DecodeBOM(const Memory::MemoryViewRead & binary)
2021-09-06 10:58:08 +00:00
{
#define ADD_PATTERN(str, code) {str, {ECodePage::code, AuArraySize(str) - 1}}
AuList<std::tuple<const char *, BOM>> bows =
2021-09-06 10:58:08 +00:00
{
ADD_PATTERN("\xFF\xFE\x00\x00", eUTF32),
ADD_PATTERN("\x00\x00\xFE\xFF", eUTF32BE),
ADD_PATTERN("\x84\x31\x95\x33", e18030),
ADD_PATTERN("\xDD\x73\x66\x73", eUnsupported), // UTF-EBCDIC
ADD_PATTERN("\xEF\xBB\xBF", eUTF8),
ADD_PATTERN("\xF7\x64\x4C", eUnsupported), // UTF-1
ADD_PATTERN("\xFB\xEE\x28", eUTF7), // UTF-7
ADD_PATTERN("\x2B\x2F\x76", eUnsupported), // BOCU-1
ADD_PATTERN("\x0E\xFE\xFF", eUnsupported), // SCSU
ADD_PATTERN("\xFF\xFE", eUTF16), // UTF-16
ADD_PATTERN("\xFE\xFF", eUTF16BE) // UTF-16
};
#undef ADD_PATTERN
for (const auto &[string, bom] : bows)
2021-09-06 10:58:08 +00:00
{
if (binary.length < bom.length) continue;
if (std::memcmp(binary.ptr, string, bom.length) != 0) continue;
2021-09-06 10:58:08 +00:00
return bom;
2021-09-06 10:58:08 +00:00
}
return {};
}
// OLD SHIT API
AUKN_SYM AuStreamReadWrittenPair_t EncodeUTF8(const Memory::MemoryViewRead &utf8, const Memory::MemoryViewWrite & binary, ECodePage page)
2021-09-06 10:58:08 +00:00
{
TextStreamEncoder re(page);
2021-09-15 01:14:29 +00:00
return re.DecodeUTF8(utf8.ptr, utf8.length, binary.ptr, binary.length);
2021-09-06 10:58:08 +00:00
}
AUKN_SYM AuStreamReadWrittenPair_t DecodeUTF8(const Memory::MemoryViewRead &binary, const Memory::MemoryViewWrite & utf8, ECodePage page)
2021-09-06 10:58:08 +00:00
{
TextStreamProcessor re(page);
2021-09-15 01:14:29 +00:00
return re.EncodeUTF8(binary.ptr, binary.length, utf8.ptr, utf8.length);
2021-09-06 10:58:08 +00:00
}
AUKN_SYM AuStreamReadWrittenPair_t DecodeUTF8(const Memory::MemoryViewRead &binary, AuString &out, ECodePage page)
2021-09-06 10:58:08 +00:00
{
2021-09-15 01:14:29 +00:00
auto aaa = DecodeUTF8(binary, {}, page);
2021-09-06 13:03:45 +00:00
out.resize(aaa.second);
2021-09-15 01:14:29 +00:00
auto ret = DecodeUTF8(binary, Memory::MemoryViewWrite(out.data(), out.size()), page);
2021-09-06 10:58:08 +00:00
out.resize(ret.second);
return ret;
}
AuStreamReadWrittenPair_t DecodeUTF8(void *binary, AuUInt32 binaryLength, AuString &out, ECodePage page)
{
auto aaa = DecodeUTF8(Memory::MemoryViewRead(binary, binaryLength), {}, page);
2021-09-06 13:03:45 +00:00
out.resize(aaa.second);
auto ret = DecodeUTF8(Memory::MemoryViewRead(binary, binaryLength), Memory::MemoryViewWrite(out.data(), out.size()), page);
2021-09-06 10:58:08 +00:00
out.resize(ret.second);
return ret;
}
// NEW API
/// Supporting full 6 byte UTF-8, copies or returns the available streams from @param utf8 to @param utf32
AUKN_SYM AuStreamReadWrittenPair_t ReadUTF8IntoUTF32ByteString(const Memory::MemoryViewRead &utf8, const Memory::MemoryViewWrite &utf32)
{
const char *begin = utf8.Begin<const char>();
const char *end = utf8.End<const char>();
AuUInt32 *begin2 = utf32.Begin<AuUInt32>();
AuUInt32 *end2 = utf32.End<AuUInt32>();
UTF32::ReadUtf8ByteString(begin2, end2, begin, end);
return AuStreamReadWrittenPair_t {begin - utf8.Begin<const char>(), (begin2 - utf32.Begin<AuUInt32>()) * sizeof(AuUInt32)};
}
/// Supporting full 6 byte UTF-8, copies or returns the available streams from @param utf32 to @param utf8
AUKN_SYM AuStreamReadWrittenPair_t ReadUTF32IntoUTF8ByteString(const Memory::MemoryViewRead &utf32, const Memory::MemoryViewWrite &utf8)
{
const AuUInt32 *begin = utf32.Begin<const AuUInt32>();
const AuUInt32 *end = utf32.End<const AuUInt32>();
char *dest = utf8.Begin<char>();
char *destEnd = utf8.End<char>();
AuUInt32 counter {};
const AuUInt32 *cur = begin;
for (; cur < end; cur++)
{
UTF32::WriteCp(*cur, dest, counter, destEnd - dest);
}
return AuStreamReadWrittenPair_t {(cur - begin) * sizeof(AuUInt32), dest - utf8.Begin<char>()};
}
AUKN_SYM void SwapUTF32(const Memory::MemoryViewWrite &utf32)
{
UTF32::SwapU32(utf32.Begin<AuUInt32>(), utf32.ToCount<AuUInt32>());
}
AUKN_SYM void SwapUTF16(const Memory::MemoryViewWrite &utf32)
{
UTF16::SwapU16(utf32.Begin<AuUInt32>(), utf32.ToCount<AuUInt32>());
}
AUKN_SYM AuUInt32 CountUTF32Length(const Memory::MemoryViewRead &utf32, bool bytes)
{
return UTF32::Count32(utf32.ptr, utf32.length, bytes);
}
AUKN_SYM AuUInt32 CountUTF16Length(const Memory::MemoryViewRead &utf16, bool bytes)
{
return UTF16::Count16(utf16.ptr, utf16.length, bytes);
}
AUKN_SYM AuUInt32 CountUTF8Length(const Memory::MemoryViewRead &utf8, bool bytes)
{
auto pair = ReadUTF8IntoUTF32ByteString(utf8, {});
return bytes ? pair.first : pair.second / sizeof(AuUInt32);
}
AUKN_SYM AuUInt32 CountSJISLength(const Memory::MemoryViewRead &sjis, bool bytes)
{
return SJIS::CountSJIS(sjis.ptr, sjis.length, bytes);
}
AUKN_SYM AuUInt32 CountGBK16Length(const Memory::MemoryViewRead &gbk, bool bytes)
{
return GBK::CountGbk(gbk.ptr, gbk.length, bytes);
}
2021-09-06 10:58:08 +00:00
}