/*** Copyright (C) 2022 J Reece Wilson (a/k/a "Reece"). All rights reserved. File: LocaleConvertWide.cpp Date: 2022-9-15 File: Locale.cpp Date: 2021-6-11 Author: Reece ***/ #include #include "Locale.hpp" #if !defined(AU_NO_CPPLOCALE) #include #include #endif #include #include namespace Aurora::Locale { static bool gLockLocale = false; static AuString gCountryCode; static AuString gLanguageCode; static AuString gCodeset; static ECodePage gInternalCodePage = ECodePage::eEnumInvalid; ECodePage GetInternalCodePage() { return gInternalCodePage; } AuString const &GetInternalCodePageString() { return gCodeset; } #if defined(AU_HAS_MSFT_NATIONALLANGSUPPORT) static void SetCodesetCommonGuessWin32() { int acp = GetACP(); if (acp == CP_CHINESE) { gCodeset = "GB18030"; gInternalCodePage = ECodePage::e18030; } else if (acp == CP_UTF8) { gCodeset = "UTF-8"; gInternalCodePage = ECodePage::eUTF8; } else if (acp == CP_UTF_16) { gCodeset = "UTF-16"; gInternalCodePage = ECodePage::eUTF16; } else if (acp == CP_UTF_16 + 1) { gCodeset = "UTF-16"; gInternalCodePage = ECodePage::eUTF16BE; } else if (acp == CP_LATIN_1) { gCodeset = "Latin-1"; gInternalCodePage = ECodePage::eLatin1; } else if (acp == CP_2312_LIMITED_GBK) { gCodeset = "GBK"; gInternalCodePage = ECodePage::eGBK; } else if (acp == 437) { gCodeset = "IBM437"; gInternalCodePage = ECodePage::eSysUnk; } else if (acp == CP_SHIFTJIS) { gCodeset = "SJIS"; gInternalCodePage = ECodePage::eSJIS; } else { gCodeset = "MS-" + AuToString(acp); gInternalCodePage = ECodePage::eSysUnk; } } static void SetLanguageWin32() { int ret; wchar_t name[LOCALE_NAME_MAX_LENGTH] = { 0 }; if (pLCIDToLocaleName) { ret = pLCIDToLocaleName(LOCALE_USER_DEFAULT, name, LOCALE_NAME_MAX_LENGTH, LOCALE_ALLOW_NEUTRAL_NAMES); SysAssert(ret, "Couldn't acquire win32 locale information"); } { wchar_t language[LOCALE_NAME_MAX_LENGTH] = { 0 }; if (pGetLocaleInfoEx) { ret = pGetLocaleInfoEx(name, LOCALE_SISO639LANGNAME, language, LOCALE_NAME_MAX_LENGTH); SysAssert(ret, "Couldn't acquire win32 provided ISO 639 map of {}", ConvertFromWChar(name)); } else if (pGetLocaleInfoW) { ret = pGetLocaleInfoW(LOCALE_USER_DEFAULT, LOCALE_SISO639LANGNAME, language, LOCALE_NAME_MAX_LENGTH); SysAssert(ret, "Couldn't acquire win32 provided ISO 639 map of {}", ConvertFromWChar(name)); } gLanguageCode = ConvertFromWChar(language); } { wchar_t country[LOCALE_NAME_MAX_LENGTH] = { 0 }; if (pGetLocaleInfoEx) { ret = pGetLocaleInfoEx(name, LOCALE_SISO3166CTRYNAME, country, LOCALE_NAME_MAX_LENGTH); SysAssert(ret, "Couldn't acquire win32 provided ISO 3166 map of {}", ConvertFromWChar(name)); } else if (pGetLocaleInfoW) { ret = pGetLocaleInfoW(LOCALE_USER_DEFAULT, LOCALE_SISO3166CTRYNAME, country, LOCALE_NAME_MAX_LENGTH); SysAssert(ret, "Couldn't acquire win32 provided ISO 3166 map of {}", ConvertFromWChar(name)); } gCountryCode = ConvertFromWChar(country); } SetCodesetCommonGuessWin32(); } #elif defined(AURORA_IS_POSIX_DERIVED) static AuHashMap ParseLocaleString(const AuString &locale) { static auto isCharacterSplitter = [&](unsigned char ch) -> bool { static AuList characterSplitters = { '.', '_', '@' }; for (auto const splitter:characterSplitters) { if (splitter == ch) { return true; } } return false; }; AuHashMap parseTable; AuMach startingIndex = 0; unsigned char startingCharacter = '!'; for (AuMach i = 0; i < locale.size(); i++) { unsigned char curCh = locale[i]; if (!(isCharacterSplitter(curCh))) { continue; } parseTable.insert(AuMakePair(startingCharacter, locale.substr(startingIndex, i - startingIndex))); startingIndex = i + 1; startingCharacter = curCh; } parseTable.insert(AuMakePair(startingCharacter, locale.substr(startingIndex, locale.size() - startingIndex))); return parseTable; } static void SetLanguageUnix() { #if 0 // this doesn't seem to work with libc++ lol? auto locale = -std::--locale("").name(); #else setlocale(LC_ALL, ""); AuString locale = setlocale(LC_ALL, NULL); #endif if (locale == "C") { AuLogWarn("Improperly configured UNIX environment."); AuLogWarn("This localization detection code was written in 2020, please follow the `language[_territory][.codeset][@modifier]` convention for user/sys locales."); AuLogWarn("'C' is not a language, country, or anything with which we can discern anything meaningful from. Fix your scuffed unix operating system and try again later..."); SysPanic("You fools"); } auto parseTable = ParseLocaleString(locale); AuString *lc; if ((AuTryFind(parseTable, '!', lc)) && (lc->size())) { gLanguageCode = *lc; } else { AuLogWarn("Improperly configured UNIX environment."); AuLogWarn("Couldn't discern language from localization string: {}", locale); SysPanic("You fools"); } AuString *cc; if ((AuTryFind(parseTable, '_', cc)) && (cc->size())) { gCountryCode = *cc; } else { gCountryCode = "GB"; } AuString *cs; if ((AuTryFind(parseTable, '.', cs)) && (cs->size())) { gCodeset = *cs; } else { gCodeset = "UTF-8"; //also technically not true, but most UNIX/Linux applications expect UTF8 byte stirngs or UTF-32 wchar_t strings. this assumption shouldn't break anything } } #define AURORA_HAS_UNIXLOCALE #endif #if defined(AURORA_PLATFORM_WIN32) || defined(AURORA_PLATFORM_LINUX) || defined(AURORA_PLATFORM_BSD) static void SetLanguageEnvBlock() { const char *language; if ((language = getenv("AURORA_ENV_LANGUAGE"))) { gLanguageCode = language; } const char *countryCode; if ((countryCode = getenv("AURORA_ENV_COUNTRY"))) { gCountryCode = countryCode; } // You may not overload codeset on win32 targets const char *codeSet; if ((codeSet = getenv("AURORA_ENV_CODESET"))) { gCodeset = codeSet; } } #define AURORA_HAS_ENVBLOCK #endif static void GuessSystemECodePage() { if (gInternalCodePage != ECodePage::eEnumInvalid) { return; } if (gCodeset == "UTF-8") { gInternalCodePage = ECodePage::eUTF8; } else if (gCodeset == "UTF-16") { // TODO: is big endian gInternalCodePage = ECodePage::eUTF16; } else if (gCodeset == "UTF-32") { // TODO: is big endian gInternalCodePage = ECodePage::eUTF32; } else if (gCodeset == "SJIS") { gInternalCodePage = ECodePage::eSJIS; } // a history of chinese locales else if (gCodeset == "GB18030") // is the new legally defined standard { gInternalCodePage = ECodePage::e18030; } else if (gCodeset == "GBK") // GB18030 is derived from GBK, GBK is drived from GB2312 { gInternalCodePage = ECodePage::eGBK; } else if (gCodeset == "GB2312") // GBK is drived from GB2312, GB2312 is derived from telegraph shid { gInternalCodePage = ECodePage::e2312; } else { gInternalCodePage = ECodePage::eSysUnk; } } void InitPlatformLocale() { #if defined(AU_HAS_MSFT_NATIONALLANGSUPPORT) SetLanguageWin32(); #elif defined(AURORA_HAS_UNIXLOCALE) SetLanguageUnix(); #endif #if defined(AURORA_HAS_ENVBLOCK) SetLanguageEnvBlock(); #endif GuessSystemECodePage(); gLanguageCode = AuToLower(gLanguageCode); gCountryCode = AuToUpper(gCountryCode); gCodeset = gCodeset; Encoding::InitIConv(); } AUKN_SYM void RuntimeOverloadLocality(const AuPair &locality) { SysAssert(!AuExchange(gLockLocale, true), "Locality has been locked"); gLanguageCode = AuToLower(locality.first); gCountryCode = AuToUpper(locality.second); } AUKN_SYM LocalizationInfo GetLocale() { gLockLocale = true; return LocalizationInfo(gLanguageCode, gCountryCode, gCodeset, gInternalCodePage); } }