AuroraRuntime/Source/Locale/LocaleGetLocale.cpp

336 lines
9.7 KiB
C++

/***
Copyright (C) 2022 J Reece Wilson (a/k/a "Reece"). All rights reserved.
File: LocaleConvertWide.cpp
Date: 2022-9-15
File: Locale.cpp
Date: 2021-6-11
Author: Reece
***/
#include <Source/RuntimeInternal.hpp>
#include "Locale.hpp"
#if !defined(AU_NO_CPPLOCALE)
#include <locale>
#include <codecvt>
#endif
#include <wchar.h>
#include <tuple>
namespace Aurora::Locale
{
static bool gLockLocale = false;
static AuString gCountryCode;
static AuString gLanguageCode;
static AuString gCodeset;
static ECodePage gInternalCodePage = ECodePage::eEnumInvalid;
ECodePage GetInternalCodePage()
{
return gInternalCodePage;
}
AuString const &GetInternalCodePageString()
{
return gCodeset;
}
#if defined(AU_HAS_MSFT_NATIONALLANGSUPPORT)
static void SetCodesetCommonGuessWin32()
{
int acp = GetACP();
if (acp == CP_CHINESE)
{
gCodeset = "GB18030";
gInternalCodePage = ECodePage::e18030;
}
else if (acp == CP_UTF8)
{
gCodeset = "UTF-8";
gInternalCodePage = ECodePage::eUTF8;
}
else if (acp == CP_UTF_16)
{
gCodeset = "UTF-16";
gInternalCodePage = ECodePage::eUTF16;
}
else if (acp == CP_UTF_16 + 1)
{
gCodeset = "UTF-16";
gInternalCodePage = ECodePage::eUTF16BE;
}
else if (acp == CP_LATIN_1)
{
gCodeset = "Latin-1";
gInternalCodePage = ECodePage::eLatin1;
}
else if (acp == CP_2312_LIMITED_GBK)
{
gCodeset = "GBK";
gInternalCodePage = ECodePage::eGBK;
}
else if (acp == 437)
{
gCodeset = "IBM437";
gInternalCodePage = ECodePage::eSysUnk;
}
else if (acp == CP_SHIFTJIS)
{
gCodeset = "SJIS";
gInternalCodePage = ECodePage::eSJIS;
}
else
{
gCodeset = "MS-" + AuToString(acp);
gInternalCodePage = ECodePage::eSysUnk;
}
}
static void SetLanguageWin32()
{
int ret;
wchar_t name[LOCALE_NAME_MAX_LENGTH] = { 0 };
if (pLCIDToLocaleName)
{
ret = pLCIDToLocaleName(LOCALE_USER_DEFAULT, name, LOCALE_NAME_MAX_LENGTH, LOCALE_ALLOW_NEUTRAL_NAMES);
SysAssert(ret, "Couldn't acquire win32 locale information");
}
{
wchar_t language[LOCALE_NAME_MAX_LENGTH] = { 0 };
if (pGetLocaleInfoEx)
{
ret = pGetLocaleInfoEx(name, LOCALE_SISO639LANGNAME, language, LOCALE_NAME_MAX_LENGTH);
SysAssert(ret, "Couldn't acquire win32 provided ISO 639 map of {}", ConvertFromWChar(name));
}
else if (pGetLocaleInfoW)
{
ret = pGetLocaleInfoW(LOCALE_USER_DEFAULT, LOCALE_SISO639LANGNAME, language, LOCALE_NAME_MAX_LENGTH);
SysAssert(ret, "Couldn't acquire win32 provided ISO 639 map of {}", ConvertFromWChar(name));
}
gLanguageCode = ConvertFromWChar(language);
}
{
wchar_t country[LOCALE_NAME_MAX_LENGTH] = { 0 };
if (pGetLocaleInfoEx)
{
ret = pGetLocaleInfoEx(name, LOCALE_SISO3166CTRYNAME, country, LOCALE_NAME_MAX_LENGTH);
SysAssert(ret, "Couldn't acquire win32 provided ISO 3166 map of {}", ConvertFromWChar(name));
}
else if (pGetLocaleInfoW)
{
ret = pGetLocaleInfoW(LOCALE_USER_DEFAULT, LOCALE_SISO3166CTRYNAME, country, LOCALE_NAME_MAX_LENGTH);
SysAssert(ret, "Couldn't acquire win32 provided ISO 3166 map of {}", ConvertFromWChar(name));
}
gCountryCode = ConvertFromWChar(country);
}
SetCodesetCommonGuessWin32();
}
#elif defined(AURORA_IS_POSIX_DERIVED)
static AuHashMap<unsigned char, AuString> ParseLocaleString(const AuString &locale)
{
static auto isCharacterSplitter = [&](unsigned char ch) -> bool
{
static AuList<unsigned char> characterSplitters = { '.', '_', '@' };
for (auto const splitter:characterSplitters)
{
if (splitter == ch)
{
return true;
}
}
return false;
};
AuHashMap<unsigned char, AuString> parseTable;
AuMach startingIndex = 0;
unsigned char startingCharacter = '!';
for (AuMach i = 0; i < locale.size(); i++)
{
unsigned char curCh = locale[i];
if (!(isCharacterSplitter(curCh)))
{
continue;
}
parseTable.insert(AuMakePair(startingCharacter, locale.substr(startingIndex, i - startingIndex)));
startingIndex = i + 1;
startingCharacter = curCh;
}
parseTable.insert(AuMakePair(startingCharacter, locale.substr(startingIndex, locale.size() - startingIndex)));
return parseTable;
}
static void SetLanguageUnix()
{
#if 0
// this doesn't seem to work with libc++ lol?
auto locale = -std::--locale("").name();
#else
setlocale(LC_ALL, "");
AuString locale = setlocale(LC_ALL, NULL);
#endif
if (locale == "C")
{
AuLogWarn("Improperly configured UNIX environment.");
AuLogWarn("This localization detection code was written in 2020, please follow the `language[_territory][.codeset][@modifier]` convention for user/sys locales.");
AuLogWarn("'C' is not a language, country, or anything with which we can discern anything meaningful from. Fix your scuffed unix operating system and try again later...");
SysPanic("You fools");
}
auto parseTable = ParseLocaleString(locale);
AuString *lc;
if ((AuTryFind(parseTable, '!', lc)) && (lc->size()))
{
gLanguageCode = *lc;
}
else
{
AuLogWarn("Improperly configured UNIX environment.");
AuLogWarn("Couldn't discern language from localization string: {}", locale);
SysPanic("You fools");
}
AuString *cc;
if ((AuTryFind(parseTable, '_', cc)) && (cc->size()))
{
gCountryCode = *cc;
}
else
{
gCountryCode = "GB";
}
AuString *cs;
if ((AuTryFind(parseTable, '.', cs)) && (cs->size()))
{
gCodeset = *cs;
}
else
{
gCodeset = "UTF-8"; //also technically not true, but most UNIX/Linux applications expect UTF8 byte stirngs or UTF-32 wchar_t strings. this assumption shouldn't break anything
}
}
#define AURORA_HAS_UNIXLOCALE
#endif
#if defined(AURORA_PLATFORM_WIN32) || defined(AURORA_PLATFORM_LINUX) || defined(AURORA_PLATFORM_BSD)
static void SetLanguageEnvBlock()
{
const char *language;
if ((language = getenv("AURORA_ENV_LANGUAGE")))
{
gLanguageCode = language;
}
const char *countryCode;
if ((countryCode = getenv("AURORA_ENV_COUNTRY")))
{
gCountryCode = countryCode;
}
// You may not overload codeset on win32 targets
const char *codeSet;
if ((codeSet = getenv("AURORA_ENV_CODESET")))
{
gCodeset = codeSet;
}
}
#define AURORA_HAS_ENVBLOCK
#endif
static void GuessSystemECodePage()
{
if (gInternalCodePage != ECodePage::eEnumInvalid)
{
return;
}
if (gCodeset == "UTF-8")
{
gInternalCodePage = ECodePage::eUTF8;
}
else if (gCodeset == "UTF-16")
{
// TODO: is big endian
gInternalCodePage = ECodePage::eUTF16;
}
else if (gCodeset == "UTF-32")
{
// TODO: is big endian
gInternalCodePage = ECodePage::eUTF32;
}
else if (gCodeset == "SJIS")
{
gInternalCodePage = ECodePage::eSJIS;
}
// a history of chinese locales
else if (gCodeset == "GB18030") // is the new legally defined standard
{
gInternalCodePage = ECodePage::e18030;
}
else if (gCodeset == "GBK") // GB18030 is derived from GBK, GBK is drived from GB2312
{
gInternalCodePage = ECodePage::eGBK;
}
else if (gCodeset == "GB2312") // GBK is drived from GB2312, GB2312 is derived from telegraph shid
{
gInternalCodePage = ECodePage::e2312;
}
else
{
gInternalCodePage = ECodePage::eSysUnk;
}
}
void InitPlatformLocale()
{
#if defined(AU_HAS_MSFT_NATIONALLANGSUPPORT)
SetLanguageWin32();
#elif defined(AURORA_HAS_UNIXLOCALE)
SetLanguageUnix();
#endif
#if defined(AURORA_HAS_ENVBLOCK)
SetLanguageEnvBlock();
#endif
GuessSystemECodePage();
gLanguageCode = AuToLower(gLanguageCode);
gCountryCode = AuToUpper(gCountryCode);
gCodeset = gCodeset;
Encoding::InitIConv();
}
AUKN_SYM void RuntimeOverloadLocality(const AuPair<AuString, AuString> &locality)
{
SysAssert(!AuExchange(gLockLocale, true), "Locality has been locked");
gLanguageCode = AuToLower(locality.first);
gCountryCode = AuToUpper(locality.second);
}
AUKN_SYM LocalizationInfo GetLocale()
{
gLockLocale = true;
return LocalizationInfo(gLanguageCode, gCountryCode, gCodeset, gInternalCodePage);
}
}