[+] Initial unoptimized iconv backend in the gross locale encode subsystem

This commit is contained in:
Reece Wilson 2022-04-11 19:56:14 +01:00
parent c7fb247f99
commit 2de033a575
5 changed files with 222 additions and 4 deletions

View File

@ -50,6 +50,11 @@ namespace Aurora::Locale::Encoding
{
return {};
}
if (!utf8Max)
{
utf8 = nullptr;
}
// decode using internal and/or optimized apis first
if (TestPage(ECodePage::eUTF8))
@ -81,6 +86,7 @@ namespace Aurora::Locale::Encoding
}
else if (TestPage(ECodePage::eSJIS))
{
auto temp = length;
length = SJIS::CountSJIS(in, length, true);
}
else if (TestPage(ECodePage::eUTF16) || TestPage(ECodePage::eUTF16BE))
@ -92,11 +98,19 @@ namespace Aurora::Locale::Encoding
length &= ~3;
}
if (!length)
{
return {};
}
ret = Win32CPToUTF8(page, in, length, utf8, utf8Max);
if (!ret.first)
{
// TODO: iconv support here
ret = STLCPToUTF8(page, in, length, utf8, utf8Max);
ret = IConvCPToUTF8(page, in, length, utf8, utf8Max);
if (!ret.first)
{
ret = STLCPToUTF8(page, in, length, utf8, utf8Max);
}
}
return ret;
@ -115,6 +129,11 @@ namespace Aurora::Locale::Encoding
{
return {};
}
if (!cpLen)
{
cp = nullptr;
}
if (TestPage(ECodePage::eUTF8))
{
@ -130,8 +149,11 @@ namespace Aurora::Locale::Encoding
ret = Win32UTF8ToCp(page, utf8, utf8Length, cp, cpLen);
if (!ret.first)
{
// TODO: iconv support here
ret = STLUTF8ToCp(page, utf8, utf8Length, cp, cpLen);
ret = IConvUTF8ToCp(page, utf8, utf8Length, cp, cpLen);
if (!ret.first)
{
ret = STLUTF8ToCp(page, utf8, utf8Length, cp, cpLen);
}
}
return ret;

View File

@ -9,8 +9,18 @@
#include "Encoding.hpp"
#include "EncoderIConv.hpp"
#if defined(AURORA_IS_POSIX_DERIVED)
#include <iconv.h>
#endif
namespace Aurora::Locale::Encoding
{
#if defined(AURORA_IS_POSIX_DERIVED)
static AuHashMap<ECodePage, iconv_t> gIconvCodePage2UTF;
static AuHashMap<ECodePage, iconv_t> gIconvUTF2CodePage;
#endif
static void SanitizeIConvCharset(AuString &str)
{
if (AuStartsWith(str, "MS-"))
@ -36,4 +46,184 @@ namespace Aurora::Locale::Encoding
// str is now **probably** something your unix platform iconv can understand
}
static const char *PageToString(ECodePage page, AuString &temp)
{
const char *base {};
if (page == ECodePage::eSysUnk)
{
temp = GetInternalCodePageString();
SanitizeIConvCharset(temp);
base = temp.c_str();
}
else
{
switch (page)
{
default:
case ECodePage::e18030:
base = "GB18030";
break;
case ECodePage::eLatin1:
base = "LATIN1";
break;
case ECodePage::eUTF7:
base = "UTF7";
break;
case ECodePage::e2312:
base = "GB2312";
break;
case ECodePage::eGBK:
base = "GBK";
break;
case ECodePage::eSJIS:
base = "SHIFT_JIS";
break;
case ECodePage::eUTF16:
base = "UTF16";
break;
}
}
return base;
}
AuStreamReadWrittenPair_t IConvWork(iconv_t handle, const void *in, AuUInt length, void *out, AuUInt32 outLen)
{
#if defined(AURORA_IS_POSIX_DERIVED)
auto originalLength = length;
size_t canReadLength = length;
size_t canWriteLength = outLen;
char * inPtr = (char *)in;
char * outPtr = (char *)out;
AuByteBuffer buffer;
if (!out)
{
buffer.resize(length);
outPtr = (char *)buffer.base;
canWriteLength = outLen = buffer.length;
}
AuUInt32 a {}, b{};
while (iconv(handle, &inPtr, &canReadLength, &outPtr, &canWriteLength) == (size_t) -1)
{
a += length - canReadLength;
b += outLen - canWriteLength;
length = canReadLength;
if (!out)
{
if (errno == E2BIG)
{
buffer.resize(AuMax(originalLength, buffer.length) * 2);
outPtr = (char *)buffer.base;
canWriteLength = outLen = buffer.length;
continue;
}
}
SysPushErrorGen();
return {};
}
a += length - canReadLength;
b += outLen - canWriteLength;
return {a, b};
#else
return {};
#endif
}
AuStreamReadWrittenPair_t IConvCPToUTF8(ECodePage page, const void *in, AuUInt length, void *utf8, AuUInt32 utf8Max)
{
#if defined(AURORA_IS_POSIX_DERIVED)
auto handleItr = gIconvCodePage2UTF.find(page);
if (handleItr == gIconvCodePage2UTF.end())
{
return {};
}
auto handle = handleItr->second;
return IConvWork(handle, in, length, utf8, utf8Max);
#else
return {};
#endif
}
AuStreamReadWrittenPair_t IConvUTF8ToCp(ECodePage page, const void *utf8, AuUInt32 utf8Length, void *cp, AuUInt32 cpLen)
{
#if defined(AURORA_IS_POSIX_DERIVED)
auto handleItr = gIconvUTF2CodePage.find(page);
if (handleItr == gIconvUTF2CodePage.end())
{
return {};
}
auto handle = handleItr->second;
return IConvWork(handle, utf8, utf8Length, cp, cpLen);
#else
return {};
#endif
}
static void AddPair(ECodePage page)
{
#if defined(AURORA_IS_POSIX_DERIVED)
AuString temp;
auto fromLocale = PageToString(page, temp);
if (!fromLocale)
{
SysPushErrorUnimplemented();
return;
}
auto toUTF = iconv_open("UTF8", fromLocale);
if (toUTF == (iconv_t) -1)
{
SysPushErrorGen();
return;
}
auto fromUTF = iconv_open(fromLocale, "UTF8");
if (fromUTF == (iconv_t) -1)
{
SysPushErrorGen();
return;
}
AuTryInsert(gIconvCodePage2UTF, page, toUTF);
AuTryInsert(gIconvUTF2CodePage, page, fromUTF);
#endif
}
void InitIConv()
{
AddPair(ECodePage::eSysUnk);
AddPair(ECodePage::eLatin1);
AddPair(ECodePage::eUTF7);
AddPair(ECodePage::e2312);
AddPair(ECodePage::eGBK);
AddPair(ECodePage::eSJIS);
AddPair(ECodePage::eUTF16);
AddPair(ECodePage::e18030);
}
}

View File

@ -9,5 +9,8 @@
namespace Aurora::Locale::Encoding
{
AuStreamReadWrittenPair_t IConvCPToUTF8(ECodePage page, const void *in, AuUInt length, void *utf8, AuUInt32 utf8Max);
AuStreamReadWrittenPair_t IConvUTF8ToCp(ECodePage page, const void *utf8, AuUInt32 utf8Length, void *cp, AuUInt32 cpLen);
void InitIConv();
}

View File

@ -372,6 +372,8 @@ namespace Aurora::Locale
gLanguageCode = AuToLower(gLanguageCode);
gCountryCode = AuToUpper(gCountryCode);
gCodeset = gCodeset;
Encoding::InitIConv();
}
static bool gLockLocale = false;

View File

@ -25,6 +25,7 @@
#endif
#include "Encoding/EncoderAdapter.hpp"
#include "Encoding/EncoderIConv.hpp"
namespace Aurora::Locale
{