231 lines
5.5 KiB
C++
231 lines
5.5 KiB
C++
/***
|
|
Copyright (C) 2021 J Reece Wilson (a/k/a "Reece"). All rights reserved.
|
|
|
|
File: EncoderIConv.cpp
|
|
Date: 2021-8-19
|
|
Author: Reece
|
|
***/
|
|
#include <Source/RuntimeInternal.hpp>
|
|
#include "Encoding.hpp"
|
|
#include "EncoderIConv.hpp"
|
|
|
|
#if defined(AURORA_IS_POSIX_DERIVED)
|
|
#include <iconv.h>
|
|
#endif
|
|
|
|
namespace Aurora::Locale::Encoding
|
|
{
|
|
|
|
#if defined(AURORA_IS_POSIX_DERIVED)
|
|
static AuHashMap<ECodePage, iconv_t> gIconvCodePage2UTF;
|
|
static AuHashMap<ECodePage, iconv_t> gIconvUTF2CodePage;
|
|
#endif
|
|
|
|
static void SanitizeIConvCharset(AuString &str)
|
|
{
|
|
if (AuStartsWith(str, "MS-"))
|
|
{
|
|
str = "WINDOWS-" + str.substr(3);
|
|
return;
|
|
}
|
|
|
|
str = AuToUpper(str);
|
|
|
|
if (str == "SJIS")
|
|
{
|
|
str = "SHIFT_JIS";
|
|
return;
|
|
}
|
|
|
|
if (AuStartsWith(str, "LATIN-") && str.size() == 7)
|
|
{
|
|
str[5] = str[6];
|
|
str.pop_back();
|
|
return;
|
|
}
|
|
|
|
// str is now **probably** something your unix platform iconv can understand
|
|
}
|
|
|
|
static const char *PageToString(ECodePage page, AuString &temp)
|
|
{
|
|
const char *base {};
|
|
|
|
if (page == ECodePage::eSysUnk)
|
|
{
|
|
temp = GetInternalCodePageString();
|
|
SanitizeIConvCharset(temp);
|
|
base = temp.c_str();
|
|
}
|
|
else
|
|
{
|
|
switch (page)
|
|
{
|
|
default:
|
|
case ECodePage::e18030:
|
|
base = "GB18030";
|
|
break;
|
|
case ECodePage::eLatin1:
|
|
base = "LATIN1";
|
|
break;
|
|
case ECodePage::eUTF7:
|
|
base = "UTF7";
|
|
break;
|
|
case ECodePage::e2312:
|
|
base = "GB2312";
|
|
break;
|
|
case ECodePage::eGBK:
|
|
base = "GBK";
|
|
break;
|
|
case ECodePage::eSJIS:
|
|
base = "SHIFT_JIS";
|
|
break;
|
|
case ECodePage::eUTF16:
|
|
base = "UTF16";
|
|
break;
|
|
}
|
|
}
|
|
|
|
return base;
|
|
}
|
|
#if defined(AURORA_IS_POSIX_DERIVED)
|
|
|
|
AuStreamReadWrittenPair_t IConvWork(iconv_t handle, const void *in, AuUInt length, void *out, AuUInt32 outLen)
|
|
{
|
|
auto originalLength = length;
|
|
|
|
size_t canReadLength = length;
|
|
size_t canWriteLength = outLen;
|
|
|
|
char *inPtr = (char *)in;
|
|
char *outPtr = (char *)out;
|
|
|
|
AuByteBuffer buffer;
|
|
if (!out)
|
|
{
|
|
if (!AuTryResize(buffer, length))
|
|
{
|
|
return {};
|
|
}
|
|
|
|
outPtr = (char *)buffer.base;
|
|
canWriteLength = outLen = buffer.length;
|
|
}
|
|
|
|
AuUInt32 a {}, b{};
|
|
while (iconv(handle, &inPtr, &canReadLength, &outPtr, &canWriteLength) == (size_t) -1)
|
|
{
|
|
a += length - canReadLength;
|
|
b += outLen - canWriteLength;
|
|
length = canReadLength;
|
|
|
|
if (!out)
|
|
{
|
|
if (errno == E2BIG)
|
|
{
|
|
if (!AuTryResize(buffer, AuMax(originalLength, buffer.length) * 2))
|
|
{
|
|
return {a, b};
|
|
}
|
|
|
|
outPtr = (char *)buffer.base;
|
|
canWriteLength = outLen = buffer.length;
|
|
continue;
|
|
}
|
|
}
|
|
|
|
SysPushErrorGen();
|
|
return {a, b};
|
|
}
|
|
|
|
a += length - canReadLength;
|
|
b += outLen - canWriteLength;
|
|
|
|
return {a, b};
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
AuStreamReadWrittenPair_t IConvCPToUTF8(ECodePage page, const void *in, AuUInt length, void *utf8, AuUInt32 utf8Max)
|
|
{
|
|
#if defined(AURORA_IS_POSIX_DERIVED)
|
|
|
|
auto handleItr = gIconvCodePage2UTF.find(page);
|
|
if (handleItr == gIconvCodePage2UTF.end())
|
|
{
|
|
return {};
|
|
}
|
|
|
|
return IConvWork(handleItr->second, in, length, utf8, utf8Max);
|
|
|
|
#else
|
|
|
|
return {};
|
|
|
|
#endif
|
|
}
|
|
|
|
AuStreamReadWrittenPair_t IConvUTF8ToCp(ECodePage page, const void *utf8, AuUInt32 utf8Length, void *cp, AuUInt32 cpLen)
|
|
{
|
|
#if defined(AURORA_IS_POSIX_DERIVED)
|
|
|
|
auto handleItr = gIconvUTF2CodePage.find(page);
|
|
if (handleItr == gIconvUTF2CodePage.end())
|
|
{
|
|
return {};
|
|
}
|
|
|
|
return IConvWork(handleItr->second, utf8, utf8Length, cp, cpLen);
|
|
|
|
#else
|
|
|
|
return {};
|
|
|
|
#endif
|
|
}
|
|
|
|
static void AddPair(ECodePage page)
|
|
{
|
|
#if defined(AURORA_IS_POSIX_DERIVED)
|
|
AuString temp;
|
|
|
|
auto fromLocale = PageToString(page, temp);
|
|
if (!fromLocale)
|
|
{
|
|
SysPushErrorUnimplemented();
|
|
return;
|
|
}
|
|
|
|
auto toUTF = iconv_open("UTF8", fromLocale);
|
|
if (toUTF == (iconv_t) -1)
|
|
{
|
|
SysPushErrorGen();
|
|
return;
|
|
}
|
|
|
|
auto fromUTF = iconv_open(fromLocale, "UTF8");
|
|
if (fromUTF == (iconv_t) -1)
|
|
{
|
|
SysPushErrorGen();
|
|
return;
|
|
}
|
|
|
|
AuTryInsert(gIconvCodePage2UTF, page, toUTF);
|
|
AuTryInsert(gIconvUTF2CodePage, page, fromUTF);
|
|
|
|
#endif
|
|
}
|
|
|
|
void InitIConv()
|
|
{
|
|
AddPair(ECodePage::eSysUnk);
|
|
AddPair(ECodePage::eLatin1);
|
|
AddPair(ECodePage::eUTF7);
|
|
AddPair(ECodePage::e2312);
|
|
AddPair(ECodePage::eGBK);
|
|
AddPair(ECodePage::eSJIS);
|
|
AddPair(ECodePage::eUTF16);
|
|
AddPair(ECodePage::e18030);
|
|
}
|
|
} |