/*** Copyright (C) 2021 J Reece Wilson (a/k/a "Reece"). All rights reserved. File: EncoderIConv.cpp Date: 2021-8-19 Author: Reece ***/ #include #include "Encoding.hpp" #include "EncoderIConv.hpp" #if defined(AURORA_IS_POSIX_DERIVED) #include #endif namespace Aurora::Locale::Encoding { #if defined(AURORA_IS_POSIX_DERIVED) static AuHashMap gIconvCodePage2UTF; static AuHashMap gIconvUTF2CodePage; #endif static void SanitizeIConvCharset(AuString &str) { if (AuStartsWith(str, "MS-")) { str = "WINDOWS-" + str.substr(3); return; } str = AuToUpper(str); if (str == "SJIS") { str = "SHIFT_JIS"; return; } if (AuStartsWith(str, "LATIN-") && str.size() == 7) { str[5] = str[6]; str.pop_back(); return; } // str is now **probably** something your unix platform iconv can understand } static const char *PageToString(ECodePage page, AuString &temp) { const char *base {}; if (page == ECodePage::eSysUnk) { temp = GetInternalCodePageString(); SanitizeIConvCharset(temp); base = temp.c_str(); } else { switch (page) { default: case ECodePage::e18030: base = "GB18030"; break; case ECodePage::eLatin1: base = "LATIN1"; break; case ECodePage::eUTF7: base = "UTF7"; break; case ECodePage::e2312: base = "GB2312"; break; case ECodePage::eGBK: base = "GBK"; break; case ECodePage::eSJIS: base = "SHIFT_JIS"; break; case ECodePage::eUTF16: base = "UTF16"; break; } } return base; } AuStreamReadWrittenPair_t IConvWork(iconv_t handle, const void *in, AuUInt length, void *out, AuUInt32 outLen) { #if defined(AURORA_IS_POSIX_DERIVED) auto originalLength = length; size_t canReadLength = length; size_t canWriteLength = outLen; char * inPtr = (char *)in; char * outPtr = (char *)out; AuByteBuffer buffer; if (!out) { buffer.resize(length); outPtr = (char *)buffer.base; canWriteLength = outLen = buffer.length; } AuUInt32 a {}, b{}; while (iconv(handle, &inPtr, &canReadLength, &outPtr, &canWriteLength) == (size_t) -1) { a += length - canReadLength; b += outLen - canWriteLength; length = canReadLength; if (!out) { if (errno == E2BIG) { buffer.resize(AuMax(originalLength, buffer.length) * 2); outPtr = (char *)buffer.base; canWriteLength = outLen = buffer.length; continue; } } SysPushErrorGen(); return {}; } a += length - canReadLength; b += outLen - canWriteLength; return {a, b}; #else return {}; #endif } AuStreamReadWrittenPair_t IConvCPToUTF8(ECodePage page, const void *in, AuUInt length, void *utf8, AuUInt32 utf8Max) { #if defined(AURORA_IS_POSIX_DERIVED) auto handleItr = gIconvCodePage2UTF.find(page); if (handleItr == gIconvCodePage2UTF.end()) { return {}; } auto handle = handleItr->second; return IConvWork(handle, in, length, utf8, utf8Max); #else return {}; #endif } AuStreamReadWrittenPair_t IConvUTF8ToCp(ECodePage page, const void *utf8, AuUInt32 utf8Length, void *cp, AuUInt32 cpLen) { #if defined(AURORA_IS_POSIX_DERIVED) auto handleItr = gIconvUTF2CodePage.find(page); if (handleItr == gIconvUTF2CodePage.end()) { return {}; } auto handle = handleItr->second; return IConvWork(handle, utf8, utf8Length, cp, cpLen); #else return {}; #endif } static void AddPair(ECodePage page) { #if defined(AURORA_IS_POSIX_DERIVED) AuString temp; auto fromLocale = PageToString(page, temp); if (!fromLocale) { SysPushErrorUnimplemented(); return; } auto toUTF = iconv_open("UTF8", fromLocale); if (toUTF == (iconv_t) -1) { SysPushErrorGen(); return; } auto fromUTF = iconv_open(fromLocale, "UTF8"); if (fromUTF == (iconv_t) -1) { SysPushErrorGen(); return; } AuTryInsert(gIconvCodePage2UTF, page, toUTF); AuTryInsert(gIconvUTF2CodePage, page, fromUTF); #endif } void InitIConv() { AddPair(ECodePage::eSysUnk); AddPair(ECodePage::eLatin1); AddPair(ECodePage::eUTF7); AddPair(ECodePage::e2312); AddPair(ECodePage::eGBK); AddPair(ECodePage::eSJIS); AddPair(ECodePage::eUTF16); AddPair(ECodePage::e18030); } }