AuroraRuntime/Source/Locale/Encoding/EncoderNSL.cpp

286 lines
8.8 KiB
C++

/***
Copyright (C) 2021 J Reece Wilson (a/k/a "Reece"). All rights reserved.
File: EncoderNSL.cpp
Date: 2021-8-19
Author: Reece
***/
#include <RuntimeInternal.hpp>
#include "../Locale.hpp"
#include "Encoding.hpp"
#include "EncoderNSL.hpp"
namespace Aurora::Locale::Encoding
{
static AuStreamReadWrittenPair_t Win32ConvertCpAToCPB(AuUInt32 cpA, AuUInt32 cpB, const void *in, AuUInt32 inLength, void *cpBlob, AuUInt32 cpLen)
{
#if defined(AU_HAS_MSFT_NATIONALLANGSUPPORT)
if (!in)
{
return {};
}
// Get the UTF-16 character count of the cpA string in/inLength
auto chars = MultiByteToWideChar(cpA, 0, (LPCCH)in, inLength, NULL, 0);
if (!chars)
{
return {};
}
// Allocate a temp utf-16/widechar string buffer
auto ret = _new wchar_t[chars];
if (!ret)
{
return {};
}
// Convert the cpA buffer to UTF-16
MultiByteToWideChar(cpA, 0, (LPCCH)in, inLength, ret, chars);
// convert the shortened string with invalid surrogates back into a cpA length
AuUInt32 cpLength;
if (!cpBlob)
{
cpLength = WideCharToMultiByte(cpB, 0, ret, chars, NULL, NULL, NULL, NULL);
}
else
{
cpLength = WideCharToMultiByte(cpB, 0, ret, chars, (LPSTR)cpBlob, cpLen, NULL, NULL);
}
delete[] ret;
return {inLength, cpLength};
#else
return {};
#endif
}
AuStreamReadWrittenPair_t Win32ConvertFromCPToUTF8(AuUInt32 cp, const void *in, AuUInt length, void *utf8, AuUInt32 utf8Len)
{
return Win32ConvertCpAToCPB(cp, CP_UTF8, in, length, utf8, utf8Len);
}
AuStreamReadWrittenPair_t Win32ConvertFromUTF8ToCp(AuUInt32 cp, const void *utf8, AuUInt utf8Length, void *cpBlob, AuUInt32 cpLen)
{
return Win32ConvertCpAToCPB(CP_UTF8, cp, utf8, utf8Length, cpBlob, cpLen);
}
AuStreamReadWrittenPair_t Win32ConvertFromUTF16ToUTF8(const void *in, AuUInt32 inLength, void *utf8, AuUInt32 utf8Len)
{
#if defined(AU_HAS_MSFT_NATIONALLANGSUPPORT)
if (!in)
{
return {};
}
auto pWideChars = (const wchar_t *)in;
AuUInt32 iCChars = inLength / sizeof(wchar_t);
AuUInt32 lastIdx = 0;
AuUInt32 lastEnd = 0;
AuUInt32 curIdx = 0;
for (curIdx = 0; curIdx < iCChars; curIdx++)
{
if (IS_LOW_SURROGATE(pWideChars[curIdx]))
{
if (curIdx + 2 > iCChars)
{
break;
}
if (!IS_HIGH_SURROGATE(pWideChars[curIdx + 1]))
{
break;
}
lastEnd = curIdx + 2;
}
else
{
lastEnd = curIdx + 1;
}
lastIdx = curIdx;
}
AuUInt32 cpLength;
if (!utf8)
{
cpLength = WideCharToMultiByte(CP_UTF8, 0, (const wchar_t *)in, lastEnd, NULL, NULL, NULL, NULL);
return {lastEnd, cpLength};
}
else
{
cpLength = WideCharToMultiByte(CP_UTF8, 0, (const wchar_t *)in, lastEnd, (LPSTR)utf8, utf8Len, NULL, NULL);
#if 0
auto actLen = lastEnd;
#else
auto actLen = MultiByteToWideChar(CP_UTF8, 0, (LPSTR)utf8, utf8Len, NULL, 0); // this might be worth it in the long run
#endif
return {actLen, cpLength};
}
#else
return {};
#endif
}
AuStreamReadWrittenPair_t Win32ConvertFromUTF8ToUTF16(const void *in, AuUInt32 inLength, void *utf16, AuUInt32 utf16Len)
{
#if defined(AU_HAS_MSFT_NATIONALLANGSUPPORT)
if (!in)
{
return {};
}
// Get the UTF-16 character count of the cpA string in/inLength
auto chars = MultiByteToWideChar(CP_UTF8, 0, (LPCCH)in, inLength, NULL, 0);
if (!chars)
{
return {};
}
if (chars > utf16Len)
{
if (!utf16)
{
return {0, utf16Len};
}
else
{
return {};
}
}
// Convert the CP_UTF8 buffer to UTF-16
MultiByteToWideChar(CP_UTF8, 0, (LPCCH)in, inLength, (LPWSTR)utf16, chars);
// convert the shortened string with invalid surrogates back into a CP_UTF8 length
AuUInt32 read = inLength;
#if 0
if (slowPath)
#endif
{
read = WideCharToMultiByte(CP_UTF8, 0, (LPWSTR)utf16, chars, NULL, 0, NULL, NULL);
}
return {read, chars};
#else
return {};
#endif
}
// TODO(reece): Consider implementing bigendian when I can be bothered
AuStreamReadWrittenPair_t Win32CPToUTF8(ECodePage page, void *in, AuUInt length, void *utf8, AuUInt32 utf8Max)
{
AuStreamReadWrittenPair_t ret {};
switch (page)
{
default:
case ECodePage::eUnsupported:
return {};
case ECodePage::e18030:
ret = Win32ConvertFromCPToUTF8(CP_CHINESE, in, length, utf8, utf8Max);
break;
case ECodePage::eSysUnk:
ret = Win32ConvertFromCPToUTF8(CP_ACP, in, length, utf8, utf8Max);
break;
case ECodePage::eLatin1:
ret = Win32ConvertFromCPToUTF8(CP_LATIN_1, in, length, utf8, utf8Max);
break;
case ECodePage::eUTF7:
ret = Win32ConvertFromCPToUTF8(CP_UTF7, in, length, utf8, utf8Max);
break;
case ECodePage::e2312:
ret = Win32ConvertFromCPToUTF8(CP_2312_LIMITED_GBK, in, length, utf8, utf8Max);
break;
case ECodePage::eGBK:
ret = Win32ConvertFromCPToUTF8(CP_2312_LIMITED_GBK, in, length, utf8, utf8Max);
break;
case ECodePage::eSJIS:
ret = Win32ConvertFromCPToUTF8(CP_SHIFTJIS, in, length, utf8, utf8Max);
break;
case ECodePage::eUTF16:
ret = Win32ConvertFromUTF16ToUTF8(in, length, utf8, utf8Max);
break;
}
return ret;
}
AuStreamReadWrittenPair_t Win32CPToUTF8(ECodePage page, const void *in, AuUInt length, void *utf8, AuUInt32 utf8Max)
{
AuStreamReadWrittenPair_t ret {};
switch (page)
{
default:
case ECodePage::eUnsupported:
return {};
case ECodePage::e18030:
ret = Win32ConvertFromUTF8ToCp(CP_CHINESE, in, length, utf8, utf8Max);
break;
case ECodePage::eSysUnk:
ret = Win32ConvertFromCPToUTF8(CP_ACP, in, length, utf8, utf8Max);
break;
case ECodePage::eLatin1:
ret = Win32ConvertFromCPToUTF8(CP_LATIN_1, in, length, utf8, utf8Max);
break;
case ECodePage::eUTF7:
ret = Win32ConvertFromCPToUTF8(CP_UTF7, in, length, utf8, utf8Max);
break;
case ECodePage::e2312:
ret = Win32ConvertFromCPToUTF8(CP_2312_LIMITED_GBK, in, length, utf8, utf8Max);
break;
case ECodePage::eGBK:
ret = Win32ConvertFromCPToUTF8(CP_2312_LIMITED_GBK, in, length, utf8, utf8Max);
break;
case ECodePage::eSJIS:
ret = Win32ConvertFromCPToUTF8(CP_SHIFTJIS, in, length, utf8, utf8Max);
break;
case ECodePage::eUTF16:
ret = Win32ConvertFromUTF16ToUTF8(in, length, utf8, utf8Max);
break;
}
return ret;
}
AuStreamReadWrittenPair_t Win32UTF8ToCp(ECodePage page, const void *utf8, AuUInt32 utf8Length, void *cp, AuUInt32 cpLen)
{
AuStreamReadWrittenPair_t ret {};
switch (page)
{
default:
case ECodePage::eUnsupported:
return {};
case ECodePage::eSysUnk:
ret = Win32ConvertFromUTF8ToCp(CP_ACP, utf8, utf8Length, cp, cpLen);
break;
case ECodePage::eUTF7:
ret = Win32ConvertFromUTF8ToCp(CP_UTF7, utf8, utf8Length, cp, cpLen);
break;
case ECodePage::eLatin1:
ret = Win32ConvertFromUTF8ToCp(CP_LATIN_1, utf8, utf8Length, cp, cpLen);
break;
case ECodePage::e18030:
ret = Win32ConvertFromUTF8ToCp(CP_CHINESE, utf8, utf8Length, cp, cpLen);
break;
case ECodePage::eSJIS:
ret = Win32ConvertFromUTF8ToCp(CP_SHIFTJIS, utf8, utf8Length, cp, cpLen);
break;
case ECodePage::e2312:
case ECodePage::eGBK:
ret = Win32ConvertFromUTF8ToCp(CP_2312_LIMITED_GBK, utf8, utf8Length, cp, cpLen);
break;
case ECodePage::eUTF16:
ret = Win32ConvertFromUTF8ToUTF16(utf8, utf8Length, cp, cpLen);
break;
}
return ret;
}
}