/*** Copyright (C) 2021 J Reece Wilson (a/k/a "Reece"). All rights reserved. File: ConvertInternal.cpp Date: 2021-8-19 Author: Reece ***/ #include #include "../Locale.hpp" #include "Encoding.hpp" #include "ConvertInternal.hpp" #if !defined(AU_NO_CPPLOCALE) #include #include #endif namespace Aurora::Locale::Encoding { #if !defined(AU_NO_CPPLOCALE) static std::wstring_convert> gUtf8Conv; struct Utf16Converter_t : public std::codecvt { Utf16Converter_t(std::size_t refs = 0) : codecvt(refs) {} }; struct Utf32Converter_t : public std::codecvt { Utf32Converter_t(std::size_t refs = 0) : codecvt(refs) {} }; #else using Utf16Converter_t = void; using Utf32Converter_t = void; #endif template void FlipEndianness(AuUInt8 *out, AuUInt32 count, bool isEncodingLE) { if (isEncodingLE == (Aurora::Build::kCurrentEndian == Aurora::Build::ECPUEndian::eCPULittle)) { return; } for (AuUInt i = 0; i < count; i += sizeof(charout_t)) { // TODO: Read/Write[LE/BE] macros // Someone will moan about sizeof(T) != ABI guaranteed reads despite being legal under aarch+x86+x86_64 // So long as buffer.data !% alignof(T) (usually = sizeof(T)), we should be good if constexpr (sizeof(charout_t) == 2) { auto word = reinterpret_cast(&out[i]); AuUInt16 altEndian = *word; AuUInt16 swapped = (altEndian & 0xFF) << 8 | (altEndian >> 8) & 0xFF; *word = swapped; } else if constexpr (sizeof(charout_t) == 4) { auto word = reinterpret_cast(&out[i]); AuUInt32 altEndian = *word; AuUInt32 swapped = (altEndian & 0xFF) << 24 | ((altEndian >> 8) & 0xFF) << 16 | ((altEndian >> 16) & 0xFF) << 8 | ((altEndian >> 24) & 0xFF); *word = swapped; } } } template<> void FlipEndianness(AuUInt8 *out, AuUInt32 count, bool isEncodingLE); template<> void FlipEndianness(AuUInt8 *out, AuUInt32 count, bool isEncodingLE); template static AuStreamReadWrittenPair_t TranslateOutUtfBuffer(AuUInt8 *in, AuUInt length, void *out, AuUInt outLen, bool endianLe = true) { #if defined(AU_NO_CPPLOCALE) return {}; #else std::mbstate_t mb {}; converter_t converter; const charin_t *fromNext; char *toNext; FlipEndianness(in, length, endianLe); converter.out(mb, reinterpret_cast(in), reinterpret_cast(in + length), fromNext, reinterpret_cast(out), reinterpret_cast(out) + outLen, toNext); return {fromNext - reinterpret_cast(in), toNext - out}; #endif } template static AuStreamReadWrittenPair_t TranslateInUtfBuffer(const AuUInt8 *in, AuUInt length, AuUInt8 *out, AuUInt outLen, bool endianLe = true) { #if defined(AU_NO_CPPLOCALE) return {}; #else std::mbstate_t mb {}; converter_t converter; const char *fromNext; charout_t *toNext; converter.in(mb, reinterpret_cast(in), reinterpret_cast(in) + length, fromNext, reinterpret_cast(out), reinterpret_cast(out + outLen), toNext); auto done = toNext - reinterpret_cast(out); FlipEndianness(out, done, endianLe); return {(const AuUInt8 *)fromNext - in, done * sizeof(charout_t)}; #endif } template static bool TranslateInUtfBuffer(const AuUInt8 *in, AuUInt length, AuList &out, bool endianLe = true) { out.resize(length * sizeof(charout_t)); auto len = TranslateInUtfBuffer(in, length, out.data(), out.size(), endianLe); if (len) { out.resize(len); out.shrink_to_fit(); } return len; } AuStreamReadWrittenPair_t EncodeUTF8Internal(const void *utf8, AuUInt32 utf8Length, void *binary, AuUInt32 binaryLength, ECodePage page) { AuStreamReadWrittenPair_t ret {}; auto readable = std::min(AuUInt(utf8Length), AuUInt(binaryLength)); AuList temp; if (!binary) { temp.resize(utf8Length); binary = temp.data(); binaryLength = temp.size(); } switch (page) { default: case ECodePage::eUnsupported: return {}; case ECodePage::eUTF16: case ECodePage::eUTF16BE: ret = TranslateInUtfBuffer(reinterpret_cast(utf8), utf8Length, reinterpret_cast(binary), binaryLength, page == ECodePage::eUTF16); break; case ECodePage::eUTF32: case ECodePage::eUTF32BE: ret = TranslateInUtfBuffer(reinterpret_cast(utf8), utf8Length, reinterpret_cast(binary), binaryLength, page == ECodePage::eUTF32); break; case ECodePage::eUTF8: if (utf8 && binary) { std::memcpy(binary, utf8, readable); } ret = AuMakePair(utf8Length, binary ? binaryLength : utf8Length); break; } return ret; } AuStreamReadWrittenPair_t DecodeUTF8Internal(const void *binary, AuUInt32 binaryLength, void *utf8, AuUInt32 utf8Max, ECodePage page) { AuStreamReadWrittenPair_t ret {}; AuList temp; if (!utf8) { temp.resize(binaryLength * 4); utf8 = temp.data(); utf8Max = temp.size(); } AuList rw(reinterpret_cast(binary), reinterpret_cast(binary) + binaryLength); auto readable = std::min(AuUInt(binaryLength), AuUInt(utf8Max)); switch (page) { default: case ECodePage::eUnsupported: return {}; case ECodePage::eUTF16: case ECodePage::eUTF16BE: ret = TranslateOutUtfBuffer(rw.data(), rw.size(), utf8, utf8Max, page == ECodePage::eUTF16); break; case ECodePage::eUTF32: case ECodePage::eUTF32BE: ret = TranslateOutUtfBuffer(rw.data(), rw.size(), utf8, utf8Max, page == ECodePage::eUTF32); break; case ECodePage::eUTF8: if (utf8 && binary) { std::memcpy(utf8, binary, readable); } ret = AuMakePair(binaryLength, utf8 ? utf8Max : binaryLength); break; } return ret; } AuStreamReadWrittenPair_t DecodeUTF8Internal(void *binary, AuUInt32 binaryLength, void *utf8, AuUInt32 utf8Max, ECodePage page) { AuStreamReadWrittenPair_t ret {}; AuList temp; if (!utf8) { temp.resize(binaryLength * 4); utf8 = temp.data(); utf8Max = temp.size(); } AuList rw(reinterpret_cast(binary), reinterpret_cast(binary) + binaryLength); auto readable = std::min(AuUInt(binaryLength), AuUInt(utf8Max)); switch (page) { default: case ECodePage::eUnsupported: return {}; case ECodePage::eUTF16: case ECodePage::eUTF16BE: ret = TranslateOutUtfBuffer(rw.data(), rw.size(), utf8, utf8Max, page == ECodePage::eUTF16); break; case ECodePage::eUTF32: case ECodePage::eUTF32BE: ret = TranslateOutUtfBuffer(rw.data(), rw.size(), utf8, utf8Max, page == ECodePage::eUTF32); break; case ECodePage::eUTF8: if (utf8 && binary) { std::memcpy(utf8, binary, readable); } ret = AuMakePair(binaryLength, utf8 ? utf8Max : binaryLength); break; } return ret; } AuStreamReadWrittenPair_t STLCPToUTF8(ECodePage page, void *in, AuUInt32 length, void *utf8, AuUInt32 utf8Max) { return DecodeUTF8Internal(in, length, utf8, utf8Max, page); } AuStreamReadWrittenPair_t STLCPToUTF8(ECodePage page, const void *in, AuUInt32 length, void *utf8, AuUInt32 utf8Max) { return DecodeUTF8Internal(in, length, utf8, utf8Max, page); } AuStreamReadWrittenPair_t STLUTF8ToCp(ECodePage page, const void *utf8, AuUInt32 utf8Length, void *cp, AuUInt32 cpLen) { return EncodeUTF8Internal(utf8, utf8Length, cp, cpLen, page); } }