/*** Copyright (C) 2021 J Reece Wilson (a/k/a "Reece"). All rights reserved. File: EncoderNSL.cpp Date: 2021-8-19 Author: Reece ***/ #include #include "../Locale.hpp" #include "Encoding.hpp" #include "EncoderNSL.hpp" namespace Aurora::Locale::Encoding { static AuStreamReadWrittenPair_t Win32ConvertCpAToCPB(AuUInt32 cpA, AuUInt32 cpB, const void *in, AuUInt32 inLength, void *cpBlob, AuUInt32 cpLen) { #if defined(AU_HAS_MSFT_NATIONALLANGSUPPORT) if (!in) { return {}; } // Get the UTF-16 character count of the cpA string in/inLength auto chars = MultiByteToWideChar(cpA, 0, (LPCCH)in, inLength, NULL, 0); if (!chars) { return {}; } // Allocate a temp utf-16/widechar string buffer auto ret = _new wchar_t[chars]; if (!ret) { return {}; } // Convert the cpA buffer to UTF-16 MultiByteToWideChar(cpA, 0, (LPCCH)in, inLength, ret, chars); // convert the shortened string with invalid surrogates back into a cpA length AuUInt32 cpLength; if (!cpBlob) { cpLength = WideCharToMultiByte(cpB, 0, ret, chars, NULL, NULL, NULL, NULL); } else { cpLength = WideCharToMultiByte(cpB, 0, ret, chars, (LPSTR)cpBlob, cpLen, NULL, NULL); } delete[] ret; return {inLength, cpLength}; #else return {}; #endif } AuStreamReadWrittenPair_t Win32ConvertFromCPToUTF8(AuUInt32 cp, const void *in, AuUInt length, void *utf8, AuUInt32 utf8Len) { #if !defined(AU_HAS_MSFT_NATIONALLANGSUPPORT) return {}; #else return Win32ConvertCpAToCPB(cp, CP_UTF8, in, length, utf8, utf8Len); #endif } AuStreamReadWrittenPair_t Win32ConvertFromUTF8ToCp(AuUInt32 cp, const void *utf8, AuUInt utf8Length, void *cpBlob, AuUInt32 cpLen) { #if !defined(AU_HAS_MSFT_NATIONALLANGSUPPORT) return {}; #else return Win32ConvertCpAToCPB(CP_UTF8, cp, utf8, utf8Length, cpBlob, cpLen); #endif } AuStreamReadWrittenPair_t Win32ConvertFromUTF16ToUTF8(const void *in, AuUInt32 inLength, void *utf8, AuUInt32 utf8Len) { #if defined(AU_HAS_MSFT_NATIONALLANGSUPPORT) if (!in) { return {}; } auto pWideChars = (const wchar_t *)in; AuUInt32 cpLength; if (!utf8) { auto cpLength = WideCharToMultiByte(CP_UTF8, 0, (const wchar_t *)in, inLength / sizeof(wchar_t), NULL, NULL, NULL, NULL); return {inLength, cpLength}; } else { cpLength = WideCharToMultiByte(CP_UTF8, 0, (const wchar_t *)in, inLength / sizeof(wchar_t), (LPSTR)utf8, utf8Len, NULL, NULL); return {inLength, cpLength}; } #else return {}; #endif } AuStreamReadWrittenPair_t Win32ConvertFromUTF8ToUTF16(const void *in, AuUInt32 inLength, void *utf16, AuUInt32 utf16Len) { #if defined(AU_HAS_MSFT_NATIONALLANGSUPPORT) if (!in) { return {}; } // Get the UTF-16 character count of the cpA string in/inLength auto chars = MultiByteToWideChar(CP_UTF8, 0, (LPCCH)in, inLength, NULL, 0); if (!chars) { return {}; } if (chars > utf16Len) { if (!utf16) { return {0, utf16Len}; } else { return {}; } } // Convert the CP_UTF8 buffer to UTF-16 MultiByteToWideChar(CP_UTF8, 0, (LPCCH)in, inLength, (LPWSTR)utf16, chars); // convert the shortened string with invalid surrogates back into a CP_UTF8 length AuUInt32 read = inLength; #if 0 if (slowPath) #endif { read = WideCharToMultiByte(CP_UTF8, 0, (LPWSTR)utf16, chars, NULL, 0, NULL, NULL); } return {read, chars * sizeof(wchar_t)}; #else return {}; #endif } AuStreamReadWrittenPair_t Win32CPToUTF8(ECodePage page, const void *in, AuUInt length, void *utf8, AuUInt32 utf8Max) { AuStreamReadWrittenPair_t ret {}; #if defined(AU_HAS_MSFT_NATIONALLANGSUPPORT) switch (page) { default: case ECodePage::eUnsupported: return {}; case ECodePage::e18030: ret = Win32ConvertFromUTF8ToCp(CP_CHINESE, in, length, utf8, utf8Max); break; case ECodePage::eSysUnk: ret = Win32ConvertFromCPToUTF8(CP_ACP, in, length, utf8, utf8Max); break; case ECodePage::eLatin1: ret = Win32ConvertFromCPToUTF8(CP_LATIN_1, in, length, utf8, utf8Max); break; case ECodePage::eUTF7: ret = Win32ConvertFromCPToUTF8(CP_UTF7, in, length, utf8, utf8Max); break; case ECodePage::e2312: ret = Win32ConvertFromCPToUTF8(CP_2312_LIMITED_GBK, in, length, utf8, utf8Max); break; case ECodePage::eGBK: ret = Win32ConvertFromCPToUTF8(CP_2312_LIMITED_GBK, in, length, utf8, utf8Max); break; case ECodePage::eSJIS: ret = Win32ConvertFromCPToUTF8(CP_SHIFTJIS, in, length, utf8, utf8Max); break; case ECodePage::eUTF16: ret = Win32ConvertFromUTF16ToUTF8(in, length, utf8, utf8Max); break; } #endif return ret; } AuStreamReadWrittenPair_t Win32UTF8ToCp(ECodePage page, const void *utf8, AuUInt32 utf8Length, void *cp, AuUInt32 cpLen) { AuStreamReadWrittenPair_t ret {}; #if defined(AU_HAS_MSFT_NATIONALLANGSUPPORT) switch (page) { default: case ECodePage::eUnsupported: return {}; case ECodePage::eSysUnk: ret = Win32ConvertFromUTF8ToCp(CP_ACP, utf8, utf8Length, cp, cpLen); break; case ECodePage::eUTF7: ret = Win32ConvertFromUTF8ToCp(CP_UTF7, utf8, utf8Length, cp, cpLen); break; case ECodePage::eLatin1: ret = Win32ConvertFromUTF8ToCp(CP_LATIN_1, utf8, utf8Length, cp, cpLen); break; case ECodePage::e18030: ret = Win32ConvertFromUTF8ToCp(CP_CHINESE, utf8, utf8Length, cp, cpLen); break; case ECodePage::eSJIS: ret = Win32ConvertFromUTF8ToCp(CP_SHIFTJIS, utf8, utf8Length, cp, cpLen); break; case ECodePage::e2312: case ECodePage::eGBK: ret = Win32ConvertFromUTF8ToCp(CP_2312_LIMITED_GBK, utf8, utf8Length, cp, cpLen); break; case ECodePage::eUTF16: ret = Win32ConvertFromUTF8ToUTF16(utf8, utf8Length, cp, cpLen); break; } #endif return ret; } }