/*** Copyright (C) 2021 J Reece Wilson (a/k/a "Reece"). All rights reserved. File: Encoding.hpp Date: 2021-8-18 Author: Reece ***/ #pragma once #include "../Locale.hpp" #include "GBK/GBK.hpp" #include "SJIS/SJIS.hpp" #include "UTFn/AuUTF8.hpp" #include "UTFn/AuUTF16.hpp" #include "UTFn/AuUTF32.hpp" namespace Aurora::Locale::Encoding { AuStreamReadWrittenPair_t DecodeUTF8(void *binary, AuUInt32 binaryLength, AuString &out, ECodePage page = ECodePage::eEnumInvalid); struct TextStreamProcessor { bool readHeader {}; ECodePage page = ECodePage::eEnumInvalid; ECodePage defaultPage = ECodePage::eEnumInvalid; EncoderAdapter state; TextStreamProcessor(ECodePage page = ECodePage::eSysUnk) : defaultPage(page) {} using TypeIn_t = const void *; using TypeCast_t = const AuUInt8 *; AuStreamReadWrittenPair_t EncodeUTF8(const void *binary, AuUInt32 binaryLength, void *utf8, AuUInt32 utfLen) { int offset = 0; if (!binary) return {}; if (!binaryLength) return {}; if (!AuExchange(readHeader, true)) { if (page == ECodePage::eEnumInvalid) { auto header = DecodeBOM(Memory::MemoryViewRead(binary, binaryLength)); if (header.length) { page = header.page; offset = header.length; } else { if ((defaultPage != ECodePage::eEnumInvalid)) { page = defaultPage; } else { page = GetInternalCodePage(); } } state.Init(page, true); } } if (page == ECodePage::eEnumInvalid) { return {}; } binaryLength = binaryLength - offset; auto real = state.CPToUTF8(reinterpret_cast(binary) + offset, binaryLength, utf8, utfLen); return AuMakePair(real.first + offset, real.second); } AuStreamReadWrittenPair_t EncodeUTF8(const void *binary, AuUInt32 binaryLength, AuString &out) { auto preemptive = EncodeUTF8(binary, binaryLength, nullptr, 0); if (!AuTryResize(out, preemptive.second)) return {}; auto main = EncodeUTF8(binary, preemptive.second, out.data(), AuUInt32(out.size())); if (main.second == 0) return {}; if (!AuTryResize(out, main.second)) return {}; out.shrink_to_fit(); return main; } }; struct TextStreamEncoder { ECodePage page; EncoderAdapter state; TextStreamEncoder(ECodePage page = ECodePage::eUTF32) : page(page) { state.Init(page, false); } AuStreamReadWrittenPair_t DecodeUTF8(const void *utf8In, AuUInt32 length, AuString &out) { if (page == ECodePage::eEnumInvalid) { return {}; } if (!utf8In) { return {}; } if (!length) { return {}; } auto preemptive = state.UTF8ToCp(utf8In, length, nullptr, 0); auto written = state.UTF8ToCp(utf8In, preemptive.second, out.data(), AuUInt32(out.size())); out.resize(written.second); out.shrink_to_fit(); return written; } AuStreamReadWrittenPair_t DecodeUTF8(const void *utf8In, AuUInt32 utf8Length, void *binaryOut, AuUInt32 binaryLength) { if (page == ECodePage::eEnumInvalid) { return {}; } return state.UTF8ToCp(utf8In, utf8Length, binaryOut, binaryLength); } }; /// 'TextStreamProcessor', a stateful wrapper around DecodeUTF8 /// Using this you can handle a stateful, optionally bom prefixed, stream /// Initialization (ie: setting a default codepage) is optional }