254 lines
8.0 KiB
C++
254 lines
8.0 KiB
C++
/***
|
|
Copyright (C) 2021 J Reece Wilson (a/k/a "Reece"). All rights reserved.
|
|
|
|
File: EncoderNSL.cpp
|
|
Date: 2021-8-19
|
|
Author: Reece
|
|
***/
|
|
#include <RuntimeInternal.hpp>
|
|
#include "../Locale.hpp"
|
|
#include "Encoding.hpp"
|
|
#include "EncoderNSL.hpp"
|
|
|
|
namespace Aurora::Locale::Encoding
|
|
{
|
|
static AuStreamReadWrittenPair_t Win32ConvertCpAToCPB(AuUInt32 cpA, AuUInt32 cpB, const void *in, AuUInt32 inLength, void *cpBlob, AuUInt32 cpLen)
|
|
{
|
|
#if defined(AU_HAS_MSFT_NATIONALLANGSUPPORT)
|
|
if (!in)
|
|
{
|
|
return {};
|
|
}
|
|
// Get the UTF-16 character count of the cpA string in/inLength
|
|
auto chars = MultiByteToWideChar(cpA, 0, (LPCCH)in, inLength, NULL, 0);
|
|
|
|
if (!chars)
|
|
{
|
|
return {};
|
|
}
|
|
|
|
// Allocate a temp utf-16/widechar string buffer
|
|
auto ret = _new wchar_t[chars];
|
|
if (!ret)
|
|
{
|
|
return {};
|
|
}
|
|
|
|
// Convert the cpA buffer to UTF-16
|
|
MultiByteToWideChar(cpA, 0, (LPCCH)in, inLength, ret, chars);
|
|
|
|
// convert the shortened string with invalid surrogates back into a cpA length
|
|
AuUInt32 cpLength;
|
|
if (!cpBlob)
|
|
{
|
|
cpLength = WideCharToMultiByte(cpB, 0, ret, chars, NULL, NULL, NULL, NULL);
|
|
}
|
|
else
|
|
{
|
|
cpLength = WideCharToMultiByte(cpB, 0, ret, chars, (LPSTR)cpBlob, cpLen, NULL, NULL);
|
|
}
|
|
|
|
delete[] ret;
|
|
return {inLength, cpLength};
|
|
#else
|
|
return {};
|
|
#endif
|
|
}
|
|
|
|
AuStreamReadWrittenPair_t Win32ConvertFromCPToUTF8(AuUInt32 cp, const void *in, AuUInt length, void *utf8, AuUInt32 utf8Len)
|
|
{
|
|
return Win32ConvertCpAToCPB(cp, CP_UTF8, in, length, utf8, utf8Len);
|
|
}
|
|
|
|
AuStreamReadWrittenPair_t Win32ConvertFromUTF8ToCp(AuUInt32 cp, const void *utf8, AuUInt utf8Length, void *cpBlob, AuUInt32 cpLen)
|
|
{
|
|
return Win32ConvertCpAToCPB(CP_UTF8, cp, utf8, utf8Length, cpBlob, cpLen);
|
|
}
|
|
|
|
AuStreamReadWrittenPair_t Win32ConvertFromUTF16ToUTF8(const void *in, AuUInt32 inLength, void *utf8, AuUInt32 utf8Len)
|
|
{
|
|
#if defined(AU_HAS_MSFT_NATIONALLANGSUPPORT)
|
|
if (!in)
|
|
{
|
|
return {};
|
|
}
|
|
|
|
auto pWideChars = (const wchar_t *)in;
|
|
|
|
AuUInt32 cpLength;
|
|
if (!utf8)
|
|
{
|
|
auto cpLength = WideCharToMultiByte(CP_UTF8, 0, (const wchar_t *)in, inLength / sizeof(wchar_t), NULL, NULL, NULL, NULL);
|
|
return {inLength, cpLength};
|
|
}
|
|
else
|
|
{
|
|
cpLength = WideCharToMultiByte(CP_UTF8, 0, (const wchar_t *)in, inLength / sizeof(wchar_t), (LPSTR)utf8, utf8Len, NULL, NULL);
|
|
return {inLength, cpLength};
|
|
}
|
|
|
|
#else
|
|
return {};
|
|
#endif
|
|
}
|
|
|
|
AuStreamReadWrittenPair_t Win32ConvertFromUTF8ToUTF16(const void *in, AuUInt32 inLength, void *utf16, AuUInt32 utf16Len)
|
|
{
|
|
#if defined(AU_HAS_MSFT_NATIONALLANGSUPPORT)
|
|
if (!in)
|
|
{
|
|
return {};
|
|
}
|
|
|
|
// Get the UTF-16 character count of the cpA string in/inLength
|
|
auto chars = MultiByteToWideChar(CP_UTF8, 0, (LPCCH)in, inLength, NULL, 0);
|
|
|
|
if (!chars)
|
|
{
|
|
return {};
|
|
}
|
|
|
|
if (chars > utf16Len)
|
|
{
|
|
if (!utf16)
|
|
{
|
|
return {0, utf16Len};
|
|
}
|
|
else
|
|
{
|
|
return {};
|
|
}
|
|
}
|
|
|
|
// Convert the CP_UTF8 buffer to UTF-16
|
|
MultiByteToWideChar(CP_UTF8, 0, (LPCCH)in, inLength, (LPWSTR)utf16, chars);
|
|
|
|
// convert the shortened string with invalid surrogates back into a CP_UTF8 length
|
|
AuUInt32 read = inLength;
|
|
#if 0
|
|
if (slowPath)
|
|
#endif
|
|
{
|
|
read = WideCharToMultiByte(CP_UTF8, 0, (LPWSTR)utf16, chars, NULL, 0, NULL, NULL);
|
|
}
|
|
|
|
return {read, chars * sizeof(wchar_t)};
|
|
#else
|
|
return {};
|
|
#endif
|
|
}
|
|
|
|
// TODO(reece): Consider implementing bigendian when I can be bothered
|
|
|
|
AuStreamReadWrittenPair_t Win32CPToUTF8(ECodePage page, void *in, AuUInt length, void *utf8, AuUInt32 utf8Max)
|
|
{
|
|
AuStreamReadWrittenPair_t ret {};
|
|
|
|
switch (page)
|
|
{
|
|
default:
|
|
case ECodePage::eUnsupported:
|
|
return {};
|
|
case ECodePage::e18030:
|
|
ret = Win32ConvertFromCPToUTF8(CP_CHINESE, in, length, utf8, utf8Max);
|
|
break;
|
|
case ECodePage::eSysUnk:
|
|
ret = Win32ConvertFromCPToUTF8(CP_ACP, in, length, utf8, utf8Max);
|
|
break;
|
|
case ECodePage::eLatin1:
|
|
ret = Win32ConvertFromCPToUTF8(CP_LATIN_1, in, length, utf8, utf8Max);
|
|
break;
|
|
case ECodePage::eUTF7:
|
|
ret = Win32ConvertFromCPToUTF8(CP_UTF7, in, length, utf8, utf8Max);
|
|
break;
|
|
case ECodePage::e2312:
|
|
ret = Win32ConvertFromCPToUTF8(CP_2312_LIMITED_GBK, in, length, utf8, utf8Max);
|
|
break;
|
|
case ECodePage::eGBK:
|
|
ret = Win32ConvertFromCPToUTF8(CP_2312_LIMITED_GBK, in, length, utf8, utf8Max);
|
|
break;
|
|
case ECodePage::eSJIS:
|
|
ret = Win32ConvertFromCPToUTF8(CP_SHIFTJIS, in, length, utf8, utf8Max);
|
|
break;
|
|
case ECodePage::eUTF16:
|
|
ret = Win32ConvertFromUTF16ToUTF8(in, length, utf8, utf8Max);
|
|
break;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
AuStreamReadWrittenPair_t Win32CPToUTF8(ECodePage page, const void *in, AuUInt length, void *utf8, AuUInt32 utf8Max)
|
|
{
|
|
AuStreamReadWrittenPair_t ret {};
|
|
|
|
switch (page)
|
|
{
|
|
default:
|
|
case ECodePage::eUnsupported:
|
|
return {};
|
|
case ECodePage::e18030:
|
|
ret = Win32ConvertFromUTF8ToCp(CP_CHINESE, in, length, utf8, utf8Max);
|
|
break;
|
|
case ECodePage::eSysUnk:
|
|
ret = Win32ConvertFromCPToUTF8(CP_ACP, in, length, utf8, utf8Max);
|
|
break;
|
|
case ECodePage::eLatin1:
|
|
ret = Win32ConvertFromCPToUTF8(CP_LATIN_1, in, length, utf8, utf8Max);
|
|
break;
|
|
case ECodePage::eUTF7:
|
|
ret = Win32ConvertFromCPToUTF8(CP_UTF7, in, length, utf8, utf8Max);
|
|
break;
|
|
case ECodePage::e2312:
|
|
ret = Win32ConvertFromCPToUTF8(CP_2312_LIMITED_GBK, in, length, utf8, utf8Max);
|
|
break;
|
|
case ECodePage::eGBK:
|
|
ret = Win32ConvertFromCPToUTF8(CP_2312_LIMITED_GBK, in, length, utf8, utf8Max);
|
|
break;
|
|
case ECodePage::eSJIS:
|
|
ret = Win32ConvertFromCPToUTF8(CP_SHIFTJIS, in, length, utf8, utf8Max);
|
|
break;
|
|
case ECodePage::eUTF16:
|
|
ret = Win32ConvertFromUTF16ToUTF8(in, length, utf8, utf8Max);
|
|
break;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
AuStreamReadWrittenPair_t Win32UTF8ToCp(ECodePage page, const void *utf8, AuUInt32 utf8Length, void *cp, AuUInt32 cpLen)
|
|
{
|
|
AuStreamReadWrittenPair_t ret {};
|
|
|
|
switch (page)
|
|
{
|
|
default:
|
|
case ECodePage::eUnsupported:
|
|
return {};
|
|
case ECodePage::eSysUnk:
|
|
ret = Win32ConvertFromUTF8ToCp(CP_ACP, utf8, utf8Length, cp, cpLen);
|
|
break;
|
|
case ECodePage::eUTF7:
|
|
ret = Win32ConvertFromUTF8ToCp(CP_UTF7, utf8, utf8Length, cp, cpLen);
|
|
break;
|
|
case ECodePage::eLatin1:
|
|
ret = Win32ConvertFromUTF8ToCp(CP_LATIN_1, utf8, utf8Length, cp, cpLen);
|
|
break;
|
|
case ECodePage::e18030:
|
|
ret = Win32ConvertFromUTF8ToCp(CP_CHINESE, utf8, utf8Length, cp, cpLen);
|
|
break;
|
|
case ECodePage::eSJIS:
|
|
ret = Win32ConvertFromUTF8ToCp(CP_SHIFTJIS, utf8, utf8Length, cp, cpLen);
|
|
break;
|
|
case ECodePage::e2312:
|
|
case ECodePage::eGBK:
|
|
ret = Win32ConvertFromUTF8ToCp(CP_2312_LIMITED_GBK, utf8, utf8Length, cp, cpLen);
|
|
break;
|
|
case ECodePage::eUTF16:
|
|
ret = Win32ConvertFromUTF8ToUTF16(utf8, utf8Length, cp, cpLen);
|
|
break;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
} |