2021-09-06 10:58:08 +00:00
|
|
|
/***
|
|
|
|
Copyright (C) 2021 J Reece Wilson (a/k/a "Reece"). All rights reserved.
|
|
|
|
|
|
|
|
File: Encoding.cpp
|
|
|
|
Date: 2021-8-19
|
|
|
|
Author: Reece
|
|
|
|
***/
|
|
|
|
#include <RuntimeInternal.hpp>
|
|
|
|
#include "../Locale.hpp"
|
|
|
|
#include "Encoding.hpp"
|
|
|
|
|
|
|
|
namespace Aurora::Locale::Encoding
|
|
|
|
{
|
|
|
|
AUKN_SYM AuOptional<AuPair<ECodePage, AuUInt8>> DecodeBOM(const void *binary, AuUInt32 binaryLength)
|
|
|
|
{
|
|
|
|
#define ADD_PATTERN(str, code) {str, AuArraySize(str) - 1, ECodePage::code}
|
|
|
|
AuList<std::tuple<const char *, int, ECodePage>> bows =
|
|
|
|
{
|
|
|
|
ADD_PATTERN("\xFF\xFE\x00\x00", eUTF32),
|
|
|
|
ADD_PATTERN("\x00\x00\xFE\xFF", eUTF32BE),
|
|
|
|
ADD_PATTERN("\x84\x31\x95\x33", e18030),
|
|
|
|
ADD_PATTERN("\xDD\x73\x66\x73", eUnsupported), // UTF-EBCDIC
|
|
|
|
ADD_PATTERN("\xEF\xBB\xBF", eUTF8),
|
|
|
|
ADD_PATTERN("\xF7\x64\x4C", eUnsupported), // UTF-1
|
|
|
|
ADD_PATTERN("\xFB\xEE\x28", eUTF7), // UTF-7
|
|
|
|
ADD_PATTERN("\x2B\x2F\x76", eUnsupported), // BOCU-1
|
|
|
|
ADD_PATTERN("\x0E\xFE\xFF", eUnsupported), // SCSU
|
|
|
|
ADD_PATTERN("\xFF\xFE", eUTF16), // UTF-16
|
|
|
|
ADD_PATTERN("\xFE\xFF", eUTF16BE) // UTF-16
|
|
|
|
};
|
|
|
|
#undef ADD_PATTERN
|
|
|
|
|
|
|
|
for (const auto &[string, length, category] : bows)
|
|
|
|
{
|
|
|
|
if (binaryLength < length) continue;
|
|
|
|
if (std::memcmp(binary, string, length) != 0) continue;
|
|
|
|
|
|
|
|
return AuMakePair(category, length);
|
|
|
|
}
|
|
|
|
|
|
|
|
return {};
|
|
|
|
}
|
|
|
|
|
|
|
|
AUKN_SYM AuStreamReadWrittenPair_t EncodeUTF8(const void *utf8, AuUInt32 utf8Length, void *binary, AuUInt32 binaryLength, ECodePage page)
|
|
|
|
{
|
|
|
|
TextStreamEncoder re(page);
|
|
|
|
return re.DecodeUTF8(utf8, utf8Length, binary, binaryLength);
|
|
|
|
}
|
|
|
|
|
|
|
|
AUKN_SYM AuStreamReadWrittenPair_t DecodeUTF8(const void *binary, AuUInt32 binaryLength, void *utf8, AuUInt32 utf8Max, ECodePage page)
|
|
|
|
{
|
|
|
|
TextStreamProcessor re(page);
|
|
|
|
return re.EncodeUTF8(binary, binaryLength, utf8, utf8Max);
|
|
|
|
}
|
|
|
|
|
|
|
|
AUKN_SYM AuStreamReadWrittenPair_t DecodeUTF8(const void *binary, AuUInt32 binaryLength, AuString &out, ECodePage page)
|
|
|
|
{
|
2021-09-06 13:03:45 +00:00
|
|
|
auto aaa = DecodeUTF8(binary, binaryLength, nullptr, 0, page);
|
|
|
|
out.resize(aaa.second);
|
2021-09-06 10:58:08 +00:00
|
|
|
auto ret = DecodeUTF8(binary, binaryLength, out.data(), out.size(), page);
|
|
|
|
out.resize(ret.second);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
AuStreamReadWrittenPair_t DecodeUTF8(void *binary, AuUInt32 binaryLength, AuString &out, ECodePage page)
|
|
|
|
{
|
2021-09-06 13:03:45 +00:00
|
|
|
auto aaa = DecodeUTF8(binary, binaryLength, nullptr, 0, page);
|
|
|
|
out.resize(aaa.second);
|
2021-09-06 10:58:08 +00:00
|
|
|
auto ret = DecodeUTF8(binary, binaryLength, out.data(), out.size(), page);
|
|
|
|
out.resize(ret.second);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
}
|