/*** Copyright (C) 2021 J Reece Wilson (a/k/a "Reece"). All rights reserved. File: Encoding.cpp Date: 2021-8-19 Author: Reece ***/ #include #include "../Locale.hpp" #include "Encoding.hpp" namespace Aurora::Locale::Encoding { AUKN_SYM AuOptional> DecodeBOM(const Aurora::Memory::MemoryViewRead & binary) { #define ADD_PATTERN(str, code) {str, AuArraySize(str) - 1, ECodePage::code} AuList> bows = { ADD_PATTERN("\xFF\xFE\x00\x00", eUTF32), ADD_PATTERN("\x00\x00\xFE\xFF", eUTF32BE), ADD_PATTERN("\x84\x31\x95\x33", e18030), ADD_PATTERN("\xDD\x73\x66\x73", eUnsupported), // UTF-EBCDIC ADD_PATTERN("\xEF\xBB\xBF", eUTF8), ADD_PATTERN("\xF7\x64\x4C", eUnsupported), // UTF-1 ADD_PATTERN("\xFB\xEE\x28", eUTF7), // UTF-7 ADD_PATTERN("\x2B\x2F\x76", eUnsupported), // BOCU-1 ADD_PATTERN("\x0E\xFE\xFF", eUnsupported), // SCSU ADD_PATTERN("\xFF\xFE", eUTF16), // UTF-16 ADD_PATTERN("\xFE\xFF", eUTF16BE) // UTF-16 }; #undef ADD_PATTERN for (const auto &[string, length, category] : bows) { if (binary.length < length) continue; if (std::memcmp(binary.ptr, string, length) != 0) continue; return AuMakePair(category, length); } return {}; } AUKN_SYM AuStreamReadWrittenPair_t EncodeUTF8(const Aurora::Memory::MemoryViewRead & utf8, const Aurora::Memory::MemoryViewWrite & binary, ECodePage page) { TextStreamEncoder re(page); return re.DecodeUTF8(utf8.ptr, utf8.length, binary.ptr, binary.length); } AUKN_SYM AuStreamReadWrittenPair_t DecodeUTF8(const Aurora::Memory::MemoryViewRead & binary, const Aurora::Memory::MemoryViewWrite & utf8, ECodePage page) { TextStreamProcessor re(page); return re.EncodeUTF8(binary.ptr, binary.length, utf8.ptr, utf8.length); } AUKN_SYM AuStreamReadWrittenPair_t DecodeUTF8(const Aurora::Memory::MemoryViewRead & binary, AuString &out, ECodePage page) { auto aaa = DecodeUTF8(binary, {}, page); out.resize(aaa.second); auto ret = DecodeUTF8(binary, Memory::MemoryViewWrite(out.data(), out.size()), page); out.resize(ret.second); return ret; } AuStreamReadWrittenPair_t DecodeUTF8(void *binary, AuUInt32 binaryLength, AuString &out, ECodePage page) { auto aaa = DecodeUTF8(Aurora::Memory::MemoryViewRead(binary, binaryLength), {}, page); out.resize(aaa.second); auto ret = DecodeUTF8(Aurora::Memory::MemoryViewRead(binary, binaryLength), Aurora::Memory::MemoryViewWrite(out.data(), out.size()), page); out.resize(ret.second); return ret; } }