394 lines
11 KiB
C++
394 lines
11 KiB
C++
/***
|
|
Copyright (C) 2021 J Reece Wilson (a/k/a "Reece"). All rights reserved.
|
|
|
|
File: Locale.cpp
|
|
Date: 2021-6-11
|
|
Author: Reece
|
|
***/
|
|
#define I_REALLY_NEED_WIDECHAR_PUBAPI
|
|
#include <Source/RuntimeInternal.hpp>
|
|
#include "Locale.hpp"
|
|
|
|
#if !defined(AU_NO_CPPLOCALE)
|
|
#include <locale>
|
|
#include <codecvt>
|
|
#endif
|
|
|
|
#include <wchar.h>
|
|
#include <tuple>
|
|
|
|
namespace Aurora::Locale
|
|
{
|
|
static AuString gCountryCode;
|
|
static AuString gLanguageCode;
|
|
static AuString gCodeset;
|
|
static ECodePage gInternalCodePage = ECodePage::eEnumInvalid;
|
|
|
|
// Note: [0] out of touch boomers deprecated std::wstring_convert before going for a nappy. we do not have a replacement yet
|
|
// [1] the native win32 implementation appears to be more optimized than MSVC/stl
|
|
#if !defined(AU_NO_CPPLOCALE) && !(defined(AURORA_COMPILER_MSVC) && defined(AU_LANG_CPP_20))
|
|
static std::wstring_convert<std::codecvt_utf8<wchar_t>> gUtf8Conv;
|
|
#endif
|
|
|
|
AUKN_SYM AuString ConvertFromWChar(const wchar_t *in)
|
|
{
|
|
try
|
|
{
|
|
return ConvertFromWChar(in, wcslen(in));
|
|
}
|
|
catch (...)
|
|
{
|
|
SysPushErrorMem("ConvertFromWChar failed");
|
|
return {};
|
|
}
|
|
}
|
|
|
|
AUKN_SYM AuString ConvertFromWChar(const wchar_t *in, AuMach length)
|
|
{
|
|
try
|
|
{
|
|
#if defined(AU_HAS_MSFT_NATIONALLANGSUPPORT)
|
|
AuString ret;
|
|
auto chars = WideCharToMultiByte(CP_UTF8, 0, in, length, NULL, 0, NULL, NULL);
|
|
|
|
if (!chars)
|
|
{
|
|
return {};
|
|
}
|
|
|
|
ret.resize(chars);
|
|
WideCharToMultiByte(CP_UTF8, 0, in, length, ret.data(), ret.size(), NULL, NULL);
|
|
return ret;
|
|
#elif !defined(AU_NO_CPPLOCALE)
|
|
return gUtf8Conv.to_bytes(std::wstring(in, wcsnlen(in, length)));
|
|
#else
|
|
SysPushErrorUnimplemented("ConvertFromWChar");
|
|
return {};
|
|
#endif
|
|
}
|
|
catch (...)
|
|
{
|
|
SysPushErrorMem("ConvertFromWChar failed");
|
|
Debug::CheckErrors();
|
|
}
|
|
return {};
|
|
}
|
|
|
|
AUKN_SYM std::wstring ConvertFromUTF8(const AuString &in)
|
|
{
|
|
try
|
|
{
|
|
#if defined(AU_HAS_MSFT_NATIONALLANGSUPPORT)
|
|
std::wstring ret;
|
|
auto chars = MultiByteToWideChar(CP_UTF8, 0, in.c_str(), in.length(), NULL, 0);
|
|
|
|
if (!chars)
|
|
{
|
|
return {};
|
|
}
|
|
|
|
ret.resize(chars);
|
|
MultiByteToWideChar(CP_UTF8, 0, in.c_str(), in.length(), ret.data(), ret.size());
|
|
return ret;
|
|
#elif !defined(AU_NO_CPPLOCALE)
|
|
return gUtf8Conv.from_bytes(in);
|
|
#else
|
|
SysPushErrorUnimplemented("ConvertFromUTF8");
|
|
return {};
|
|
#endif
|
|
}
|
|
catch (...)
|
|
{
|
|
SysPushErrorMem("ConvertFromUTF8 failed");
|
|
Debug::CheckErrors();
|
|
}
|
|
return {};
|
|
}
|
|
|
|
ECodePage GetInternalCodePage()
|
|
{
|
|
return gInternalCodePage;
|
|
}
|
|
|
|
AuString const &GetInternalCodePageString()
|
|
{
|
|
return gCodeset;
|
|
}
|
|
|
|
#if defined(AU_HAS_MSFT_NATIONALLANGSUPPORT)
|
|
|
|
static void SetCodesetCommonGuessWin32()
|
|
{
|
|
int acp = GetACP();
|
|
|
|
if (acp == CP_CHINESE)
|
|
{
|
|
gCodeset = "GB18030";
|
|
gInternalCodePage = ECodePage::e18030;
|
|
}
|
|
else if (acp == CP_UTF8)
|
|
{
|
|
gCodeset = "UTF-8";
|
|
gInternalCodePage = ECodePage::eUTF8;
|
|
}
|
|
else if (acp == CP_UTF_16)
|
|
{
|
|
gCodeset = "UTF-16";
|
|
gInternalCodePage = ECodePage::eUTF16;
|
|
}
|
|
else if (acp == CP_UTF_16 + 1)
|
|
{
|
|
gCodeset = "UTF-16";
|
|
gInternalCodePage = ECodePage::eUTF16BE;
|
|
}
|
|
else if (acp == CP_LATIN_1)
|
|
{
|
|
gCodeset = "Latin-1";
|
|
gInternalCodePage = ECodePage::eLatin1;
|
|
}
|
|
else if (acp == CP_2312_LIMITED_GBK)
|
|
{
|
|
gCodeset = "GBK";
|
|
gInternalCodePage = ECodePage::eGBK;
|
|
}
|
|
else if (acp == 437)
|
|
{
|
|
gCodeset = "IBM437";
|
|
gInternalCodePage = ECodePage::eSysUnk;
|
|
}
|
|
else if (acp == CP_SHIFTJIS)
|
|
{
|
|
gCodeset = "SJIS";
|
|
gInternalCodePage = ECodePage::eSJIS;
|
|
}
|
|
else
|
|
{
|
|
gCodeset = "MS-" + AuToString(acp);
|
|
gInternalCodePage = ECodePage::eSysUnk;
|
|
}
|
|
}
|
|
|
|
static void SetLanguageWin32()
|
|
{
|
|
int ret;
|
|
wchar_t name[LOCALE_NAME_MAX_LENGTH] = { 0 };
|
|
|
|
ret = LCIDToLocaleName(LOCALE_USER_DEFAULT, name, LOCALE_NAME_MAX_LENGTH, LOCALE_ALLOW_NEUTRAL_NAMES);
|
|
SysAssert(ret, "Couldn't acquire win32 locale information");
|
|
|
|
wchar_t language[LOCALE_NAME_MAX_LENGTH] = { 0 };
|
|
ret = GetLocaleInfoEx(name, LOCALE_SISO639LANGNAME, language, LOCALE_NAME_MAX_LENGTH);
|
|
SysAssert(ret, "Couldn't acquire win32 provided ISO 639 map of {}", ConvertFromWChar(name));
|
|
|
|
wchar_t country[LOCALE_NAME_MAX_LENGTH] = { 0 };
|
|
ret = GetLocaleInfoEx(name, LOCALE_SISO3166CTRYNAME, country, LOCALE_NAME_MAX_LENGTH);
|
|
SysAssert(ret, "Couldn't acquire win32 provided ISO 3166 map of {}", ConvertFromWChar(name));
|
|
|
|
gCountryCode = ConvertFromWChar(country);
|
|
gLanguageCode = ConvertFromWChar(language);
|
|
|
|
SetCodesetCommonGuessWin32();
|
|
}
|
|
|
|
#elif defined(AURORA_IS_POSIX_DERIVED)
|
|
|
|
static AuHashMap<unsigned char, AuString> ParseLocaleString(const AuString &locale)
|
|
{
|
|
static auto isCharacterSplitter = [&](unsigned char ch) -> bool
|
|
{
|
|
static AuList<unsigned char> characterSplitters = { '.', '_', '@' };
|
|
for (auto const splitter : characterSplitters)
|
|
{
|
|
if (splitter == ch)
|
|
{
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
};
|
|
|
|
AuHashMap<unsigned char, AuString> parseTable;
|
|
|
|
AuMach startingIndex = 0;
|
|
unsigned char startingCharacter = '!';
|
|
for (AuMach i = 0; i < locale.size(); i++)
|
|
{
|
|
unsigned char curCh = locale[i];
|
|
|
|
if (!(isCharacterSplitter(curCh)))
|
|
{
|
|
continue;
|
|
}
|
|
|
|
parseTable.insert(AuMakePair(startingCharacter, locale.substr(startingIndex, i - startingIndex)));
|
|
startingIndex = i + 1;
|
|
startingCharacter = curCh;
|
|
}
|
|
|
|
parseTable.insert(AuMakePair(startingCharacter, locale.substr(startingIndex, locale.size() - startingIndex)));
|
|
|
|
return parseTable;
|
|
}
|
|
|
|
static void SetLanguageUnix()
|
|
{
|
|
#if 0
|
|
// this doesn't seem to work with libc++ lol?
|
|
auto locale = -std::--locale("").name();
|
|
#else
|
|
setlocale(LC_ALL, "");
|
|
AuString locale = setlocale(LC_ALL, NULL);
|
|
#endif
|
|
|
|
if (locale == "C")
|
|
{
|
|
AuLogWarn("Improperly configured UNIX environment.");
|
|
AuLogWarn("This localization detection code was written in 2020, please follow the `language[_territory][.codeset][@modifier]` convention for user/sys locales.");
|
|
AuLogWarn("'C' is not a language, country, or anything with which we can discern anything meaningful from. Fix your scuffed unix operating system and try again later...");
|
|
SysPanic("You fools");
|
|
}
|
|
|
|
auto parseTable = ParseLocaleString(locale);
|
|
|
|
AuString *lc;
|
|
if ((AuTryFind(parseTable, '!', lc)) && (lc->size()))
|
|
{
|
|
gLanguageCode = *lc;
|
|
}
|
|
else
|
|
{
|
|
AuLogWarn("Improperly configured UNIX environment.");
|
|
AuLogWarn("Couldn't discern language from localization string: {}", locale);
|
|
SysPanic("You fools");
|
|
}
|
|
|
|
AuString *cc;
|
|
if ((AuTryFind(parseTable, '_', cc)) && (cc->size()))
|
|
{
|
|
gCountryCode = *cc;
|
|
}
|
|
else
|
|
{
|
|
gCountryCode = "GB";
|
|
}
|
|
|
|
AuString *cs;
|
|
if ((AuTryFind(parseTable, '.', cs)) && (cs->size()))
|
|
{
|
|
gCodeset = *cs;
|
|
}
|
|
else
|
|
{
|
|
gCodeset = "UTF-8"; //also technically not true, but most UNIX/Linux applications expect UTF8 byte stirngs or UTF-32 wchar_t strings. this assumption shouldn't break anything
|
|
}
|
|
}
|
|
#define AURORA_HAS_UNIXLOCALE
|
|
#endif
|
|
|
|
#if defined(AURORA_PLATFORM_WIN32) || defined(AURORA_PLATFORM_LINUX) || defined(AURORA_PLATFORM_BSD)
|
|
static void SetLanguageEnvBlock()
|
|
{
|
|
const char *language;
|
|
if (language = getenv("AURORA_ENV_LANGUAGE"))
|
|
{
|
|
gLanguageCode = language;
|
|
}
|
|
|
|
const char *countryCode;
|
|
if (countryCode = getenv("AURORA_ENV_COUNTRY"))
|
|
{
|
|
gCountryCode = countryCode;
|
|
}
|
|
|
|
// You may not overload codeset on win32 targets
|
|
const char *codeSet;
|
|
if (codeSet = getenv("AURORA_ENV_CODESET"))
|
|
{
|
|
gCodeset = codeSet;
|
|
}
|
|
}
|
|
|
|
#define AURORA_HAS_ENVBLOCK
|
|
#endif
|
|
|
|
static void GuessSystemECodePage()
|
|
{
|
|
if (gInternalCodePage != ECodePage::eEnumInvalid)
|
|
{
|
|
return;
|
|
}
|
|
|
|
if (gCodeset == "UTF-8")
|
|
{
|
|
gInternalCodePage = ECodePage::eUTF8;
|
|
}
|
|
else if (gCodeset == "UTF-16")
|
|
{
|
|
// TODO: is big endian
|
|
gInternalCodePage = ECodePage::eUTF16;
|
|
}
|
|
else if (gCodeset == "UTF-32")
|
|
{
|
|
// TODO: is big endian
|
|
gInternalCodePage = ECodePage::eUTF32;
|
|
}
|
|
else if (gCodeset == "SJIS")
|
|
{
|
|
gInternalCodePage = ECodePage::eSJIS;
|
|
}
|
|
// a history of chinese locales
|
|
else if (gCodeset == "GB18030") // is the new legally defined standard
|
|
{
|
|
gInternalCodePage = ECodePage::e18030;
|
|
}
|
|
else if (gCodeset == "GBK") // GB18030 is derived from GBK, GBK is drived from GB2312
|
|
{
|
|
gInternalCodePage = ECodePage::eGBK;
|
|
}
|
|
else if (gCodeset == "GB2312") // GBK is drived from GB2312, GB2312 is derived from telegraph shid
|
|
{
|
|
gInternalCodePage = ECodePage::e2312;
|
|
}
|
|
else
|
|
{
|
|
gInternalCodePage = ECodePage::eSysUnk;
|
|
}
|
|
}
|
|
|
|
void Init()
|
|
{
|
|
#if defined(AU_HAS_MSFT_NATIONALLANGSUPPORT)
|
|
SetLanguageWin32();
|
|
#elif defined(AURORA_HAS_UNIXLOCALE)
|
|
SetLanguageUnix();
|
|
#endif
|
|
|
|
#if defined(AURORA_HAS_ENVBLOCK)
|
|
SetLanguageEnvBlock();
|
|
#endif
|
|
|
|
GuessSystemECodePage();
|
|
|
|
gLanguageCode = AuToLower(gLanguageCode);
|
|
gCountryCode = AuToUpper(gCountryCode);
|
|
gCodeset = gCodeset;
|
|
|
|
Encoding::InitIConv();
|
|
}
|
|
|
|
static bool gLockLocale = false;
|
|
|
|
AUKN_SYM void RuntimeOverloadLocality(const AuPair<AuString, AuString> &locality)
|
|
{
|
|
SysAssert(!AuExchange(gLockLocale, true), "Locality has been locked");
|
|
gLanguageCode = AuToLower(locality.first);
|
|
gCountryCode = AuToUpper(locality.second);
|
|
}
|
|
|
|
AUKN_SYM LocalizationInfo GetLocale()
|
|
{
|
|
gLockLocale = true;
|
|
return LocalizationInfo(gLanguageCode, gCountryCode, gCodeset, gInternalCodePage);
|
|
}
|
|
}
|