AuroraRuntime/Source/Locale/Encoding/EncoderIConv.cpp

231 lines
5.5 KiB
C++

/***
Copyright (C) 2021 J Reece Wilson (a/k/a "Reece"). All rights reserved.
File: EncoderIConv.cpp
Date: 2021-8-19
Author: Reece
***/
#include <Source/RuntimeInternal.hpp>
#include "Encoding.hpp"
#include "EncoderIConv.hpp"
#if defined(AURORA_IS_POSIX_DERIVED)
#include <iconv.h>
#endif
namespace Aurora::Locale::Encoding
{
#if defined(AURORA_IS_POSIX_DERIVED)
static AuHashMap<ECodePage, iconv_t> gIconvCodePage2UTF;
static AuHashMap<ECodePage, iconv_t> gIconvUTF2CodePage;
#endif
static void SanitizeIConvCharset(AuString &str)
{
if (AuStartsWith(str, "MS-"))
{
str = "WINDOWS-" + str.substr(3);
return;
}
str = AuToUpper(str);
if (str == "SJIS")
{
str = "SHIFT_JIS";
return;
}
if (AuStartsWith(str, "LATIN-") && str.size() == 7)
{
str[5] = str[6];
str.pop_back();
return;
}
// str is now **probably** something your unix platform iconv can understand
}
static const char *PageToString(ECodePage page, AuString &temp)
{
const char *base {};
if (page == ECodePage::eSysUnk)
{
temp = GetInternalCodePageString();
SanitizeIConvCharset(temp);
base = temp.c_str();
}
else
{
switch (page)
{
default:
case ECodePage::e18030:
base = "GB18030";
break;
case ECodePage::eLatin1:
base = "LATIN1";
break;
case ECodePage::eUTF7:
base = "UTF7";
break;
case ECodePage::e2312:
base = "GB2312";
break;
case ECodePage::eGBK:
base = "GBK";
break;
case ECodePage::eSJIS:
base = "SHIFT_JIS";
break;
case ECodePage::eUTF16:
base = "UTF16";
break;
}
}
return base;
}
#if defined(AURORA_IS_POSIX_DERIVED)
AuStreamReadWrittenPair_t IConvWork(iconv_t handle, const void *in, AuUInt length, void *out, AuUInt32 outLen)
{
auto originalLength = length;
size_t canReadLength = length;
size_t canWriteLength = outLen;
char *inPtr = (char *)in;
char *outPtr = (char *)out;
AuByteBuffer buffer;
if (!out)
{
if (!AuTryResize(buffer, length))
{
return {};
}
outPtr = (char *)buffer.base;
canWriteLength = outLen = buffer.length;
}
AuUInt32 a {}, b{};
while (iconv(handle, &inPtr, &canReadLength, &outPtr, &canWriteLength) == (size_t) -1)
{
a += length - canReadLength;
b += outLen - canWriteLength;
length = canReadLength;
if (!out)
{
if (errno == E2BIG)
{
if (!AuTryResize(buffer, AuMax(originalLength, buffer.length) * 2))
{
return {a, b};
}
outPtr = (char *)buffer.base;
canWriteLength = outLen = buffer.length;
continue;
}
}
SysPushErrorGen();
return {a, b};
}
a += length - canReadLength;
b += outLen - canWriteLength;
return {a, b};
}
#endif
AuStreamReadWrittenPair_t IConvCPToUTF8(ECodePage page, const void *in, AuUInt length, void *utf8, AuUInt32 utf8Max)
{
#if defined(AURORA_IS_POSIX_DERIVED)
auto handleItr = gIconvCodePage2UTF.find(page);
if (handleItr == gIconvCodePage2UTF.end())
{
return {};
}
return IConvWork(handleItr->second, in, length, utf8, utf8Max);
#else
return {};
#endif
}
AuStreamReadWrittenPair_t IConvUTF8ToCp(ECodePage page, const void *utf8, AuUInt32 utf8Length, void *cp, AuUInt32 cpLen)
{
#if defined(AURORA_IS_POSIX_DERIVED)
auto handleItr = gIconvUTF2CodePage.find(page);
if (handleItr == gIconvUTF2CodePage.end())
{
return {};
}
return IConvWork(handleItr->second, utf8, utf8Length, cp, cpLen);
#else
return {};
#endif
}
static void AddPair(ECodePage page)
{
#if defined(AURORA_IS_POSIX_DERIVED)
AuString temp;
auto fromLocale = PageToString(page, temp);
if (!fromLocale)
{
SysPushErrorUnimplemented();
return;
}
auto toUTF = iconv_open("UTF8", fromLocale);
if (toUTF == (iconv_t) -1)
{
SysPushErrorGen();
return;
}
auto fromUTF = iconv_open(fromLocale, "UTF8");
if (fromUTF == (iconv_t) -1)
{
SysPushErrorGen();
return;
}
AuTryInsert(gIconvCodePage2UTF, page, toUTF);
AuTryInsert(gIconvUTF2CodePage, page, fromUTF);
#endif
}
void InitIConv()
{
AddPair(ECodePage::eSysUnk);
AddPair(ECodePage::eLatin1);
AddPair(ECodePage::eUTF7);
AddPair(ECodePage::e2312);
AddPair(ECodePage::eGBK);
AddPair(ECodePage::eSJIS);
AddPair(ECodePage::eUTF16);
AddPair(ECodePage::e18030);
}
}