2021-06-27 21:25:29 +00:00
/***
Copyright ( C ) 2021 J Reece Wilson ( a / k / a " Reece " ) . All rights reserved .
File : Locale . cpp
Date : 2021 - 6 - 11
Author : Reece
* * */
2021-10-02 16:07:33 +00:00
# define I_REALLY_NEED_WIDECHAR_PUBAPI
2021-09-30 14:57:41 +00:00
# include <Source/RuntimeInternal.hpp>
2021-06-27 21:25:29 +00:00
# include "Locale.hpp"
2021-09-06 10:58:08 +00:00
# if !defined(AU_NO_CPPLOCALE)
# include <locale>
# include <codecvt>
# endif
2021-06-27 21:25:29 +00:00
# include <wchar.h>
2021-09-06 10:58:08 +00:00
# include <tuple>
2021-06-27 21:25:29 +00:00
namespace Aurora : : Locale
{
2021-09-06 10:58:08 +00:00
static AuString gCountryCode ;
static AuString gLanguageCode ;
static AuString gCodeset ;
static ECodePage gInternalCodePage = ECodePage : : eUnsupported ;
// Note: [0] out of touch boomers deprecated std::wstring_convert before going for a nappy. we do not have a replacement yet
// [1] the native win32 implementation appears to be more optimized than MSVC/stl
# if !defined(AU_NO_CPPLOCALE)
2021-06-27 21:25:29 +00:00
static std : : wstring_convert < std : : codecvt_utf8 < wchar_t > > gUtf8Conv ;
2021-09-06 10:58:08 +00:00
# endif
2021-06-27 21:25:29 +00:00
AUKN_SYM AuString ConvertFromWChar ( const wchar_t * in )
{
2022-01-24 18:37:06 +00:00
try
{
return ConvertFromWChar ( in , wcslen ( in ) ) ;
}
catch ( . . . )
{
SysPushErrorMem ( " ConvertFromWChar failed " ) ;
return { } ;
}
2021-06-27 21:25:29 +00:00
}
AUKN_SYM AuString ConvertFromWChar ( const wchar_t * in , AuMach length )
{
2022-01-24 18:37:06 +00:00
try
2021-06-27 21:25:29 +00:00
{
2022-01-24 18:37:06 +00:00
# if defined(AU_HAS_MSFT_NATIONALLANGSUPPORT)
AuString ret ;
auto chars = WideCharToMultiByte ( CP_UTF8 , 0 , in , length , NULL , 0 , NULL , NULL ) ;
if ( ! chars )
{
return { } ;
}
ret . resize ( chars ) ;
WideCharToMultiByte ( CP_UTF8 , 0 , in , length , ret . data ( ) , ret . size ( ) , NULL , NULL ) ;
return ret ;
# elif !defined(AU_NO_CPPLOCALE)
return gUtf8Conv . to_bytes ( std : : wstring ( in , wcslen ( in ) ) ) ;
# else
SysPushErrorUnimplemented ( " ConvertFromWChar " ) ;
2021-06-27 21:25:29 +00:00
return { } ;
2022-01-24 18:37:06 +00:00
# endif
2021-06-27 21:25:29 +00:00
}
2022-01-24 18:37:06 +00:00
catch ( . . . )
{
SysPushErrorMem ( " ConvertFromWChar failed " ) ;
Debug : : CheckErrors ( ) ;
}
return { } ;
2021-06-27 21:25:29 +00:00
}
AUKN_SYM std : : wstring ConvertFromUTF8 ( const AuString & in )
{
2022-01-24 18:37:06 +00:00
try
2021-06-27 21:25:29 +00:00
{
2022-01-24 18:37:06 +00:00
# if defined(AU_HAS_MSFT_NATIONALLANGSUPPORT)
std : : wstring ret ;
auto chars = MultiByteToWideChar ( CP_UTF8 , 0 , in . c_str ( ) , in . length ( ) , NULL , 0 ) ;
if ( ! chars )
{
return { } ;
}
ret . resize ( chars ) ;
MultiByteToWideChar ( CP_UTF8 , 0 , in . c_str ( ) , in . length ( ) , ret . data ( ) , ret . size ( ) ) ;
return ret ;
# elif !defined(AU_NO_CPPLOCALE)
return gUtf8Conv . from_bytes ( in ) ;
# else
SysPushErrorUnimplemented ( " ConvertFromUTF8 " ) ;
2021-06-27 21:25:29 +00:00
return { } ;
2022-01-24 18:37:06 +00:00
# endif
2021-06-27 21:25:29 +00:00
}
2022-01-24 18:37:06 +00:00
catch ( . . . )
{
SysPushErrorMem ( " ConvertFromUTF8 failed " ) ;
Debug : : CheckErrors ( ) ;
}
return { } ;
2021-06-27 21:25:29 +00:00
}
2021-09-06 10:58:08 +00:00
ECodePage GetInternalCodePage ( )
{
return gInternalCodePage ;
}
2021-06-27 21:25:29 +00:00
2021-09-06 10:58:08 +00:00
AuString const & GetInternalCodePageString ( )
{
return gCodeset ;
}
# if defined(AU_HAS_MSFT_NATIONALLANGSUPPORT)
static void SetCodesetCommonGuessWin32 ( )
{
int acp = GetACP ( ) ;
2021-06-27 21:25:29 +00:00
2021-09-06 10:58:08 +00:00
if ( acp = = CP_CHINESE )
{
gCodeset = " GB18030 " ;
gInternalCodePage = ECodePage : : e18030 ;
}
else if ( acp = = CP_UTF8 )
{
gCodeset = " UTF-8 " ;
gInternalCodePage = ECodePage : : eUTF8 ;
}
else if ( acp = = CP_UTF_16 )
{
gCodeset = " UTF-16 " ;
gInternalCodePage = ECodePage : : eUTF16 ;
}
else if ( acp = = CP_UTF_16 + 1 )
{
gCodeset = " UTF-16 " ;
gInternalCodePage = ECodePage : : eUTF16BE ;
}
else if ( acp = = CP_LATIN_1 )
{
gCodeset = " Latin-1 " ;
gInternalCodePage = ECodePage : : eLatin1 ;
}
else if ( acp = = CP_2312_LIMITED_GBK )
{
gCodeset = " GBK " ;
gInternalCodePage = ECodePage : : eGBK ;
}
else if ( acp = = 437 )
{
gCodeset = " IBM437 " ;
gInternalCodePage = ECodePage : : eSysUnk ;
}
else if ( acp = = CP_SHIFTJIS )
{
gCodeset = " SJIS " ;
gInternalCodePage = ECodePage : : eSJIS ;
}
else
{
2022-01-19 17:08:13 +00:00
gCodeset = " MS- " + AuToString ( acp ) ;
2021-09-06 10:58:08 +00:00
gInternalCodePage = ECodePage : : eSysUnk ;
}
}
2021-06-27 21:25:29 +00:00
static void SetLanguageWin32 ( )
{
int ret ;
wchar_t name [ LOCALE_NAME_MAX_LENGTH ] = { 0 } ;
ret = LCIDToLocaleName ( LOCALE_USER_DEFAULT , name , LOCALE_NAME_MAX_LENGTH , LOCALE_ALLOW_NEUTRAL_NAMES ) ;
SysAssert ( ret , " Couldn't acquire win32 locale information " ) ;
wchar_t language [ LOCALE_NAME_MAX_LENGTH ] = { 0 } ;
ret = GetLocaleInfoEx ( name , LOCALE_SISO639LANGNAME , language , LOCALE_NAME_MAX_LENGTH ) ;
SysAssert ( ret , " Couldn't acquire win32 provided ISO 639 map of {} " , ConvertFromWChar ( name ) ) ;
wchar_t country [ LOCALE_NAME_MAX_LENGTH ] = { 0 } ;
ret = GetLocaleInfoEx ( name , LOCALE_SISO3166CTRYNAME , country , LOCALE_NAME_MAX_LENGTH ) ;
SysAssert ( ret , " Couldn't acquire win32 provided ISO 3166 map of {} " , ConvertFromWChar ( name ) ) ;
gCountryCode = ConvertFromWChar ( country ) ;
gLanguageCode = ConvertFromWChar ( language ) ;
2021-09-06 10:58:08 +00:00
SetCodesetCommonGuessWin32 ( ) ;
2021-06-27 21:25:29 +00:00
}
2021-09-06 10:58:08 +00:00
# elif defined(AURORA_IS_POSIX_DERIVED)
2021-06-27 21:25:29 +00:00
static AuHashMap < unsigned char , AuString > ParseLocaleString ( const AuString & locale )
{
static auto isCharacterSplitter = [ & ] ( unsigned char ch ) - > bool
{
static AuList < unsigned char > characterSplitters = { ' . ' , ' _ ' , ' @ ' } ;
for ( auto const splitter : characterSplitters )
{
if ( splitter = = ch )
{
return true ;
}
}
return false ;
} ;
AuHashMap < unsigned char , AuString > parseTable ;
AuMach startingIndex = 0 ;
unsigned char startingCharacter = ' ! ' ;
for ( AuMach i = 0 ; i < locale . size ( ) ; i + + )
{
unsigned char curCh = locale [ i ] ;
if ( ! ( isCharacterSplitter ( curCh ) ) )
{
continue ;
}
2021-09-06 10:58:08 +00:00
parseTable . insert ( AuMakePair ( startingCharacter , locale . substr ( startingIndex , i - startingIndex ) ) ) ;
2021-06-27 21:25:29 +00:00
startingIndex = i + 1 ;
startingCharacter = curCh ;
}
2021-09-06 10:58:08 +00:00
parseTable . insert ( AuMakePair ( startingCharacter , locale . substr ( startingIndex , locale . size ( ) - startingIndex ) ) ) ;
2021-06-27 21:25:29 +00:00
return parseTable ;
}
static void SetLanguageUnix ( )
{
#if 0
// this doesn't seem to work with libc++ lol?
2022-01-19 17:08:13 +00:00
auto locale = - std : : - - locale ( " " ) . name ( ) ;
2021-06-27 21:25:29 +00:00
# else
setlocale ( LC_ALL , " " ) ;
AuString locale = setlocale ( LC_ALL , NULL ) ;
# endif
if ( locale = = " C " )
{
2022-01-24 18:37:06 +00:00
AuLogWarn ( " Improperly configured UNIX environment. " ) ;
AuLogWarn ( " This localization detection code was written in 2020, please follow the `language[_territory][.codeset][@modifier]` convention for user/sys locales. " ) ;
AuLogWarn ( " 'C' is not a language, country, or anything with which we can discern anything meaningful from. Fix your scuffed unix operating system and try again later... " ) ;
2021-06-27 21:25:29 +00:00
SysPanic ( " You fools " ) ;
}
auto parseTable = ParseLocaleString ( locale ) ;
AuString * lc ;
2021-10-02 16:07:33 +00:00
if ( ( AuTryFind ( parseTable , ' ! ' , lc ) ) & & ( lc - > size ( ) ) )
2021-06-27 21:25:29 +00:00
{
gLanguageCode = * lc ;
}
else
{
2022-01-24 18:37:06 +00:00
AuLogWarn ( " Improperly configured UNIX environment. " ) ;
AuLogWarn ( " Couldn't discern language from localization string: {} " , locale ) ;
2021-06-27 21:25:29 +00:00
SysPanic ( " You fools " ) ;
}
AuString * cc ;
2021-10-02 16:07:33 +00:00
if ( ( AuTryFind ( parseTable , ' _ ' , cc ) ) & & ( cc - > size ( ) ) )
2021-06-27 21:25:29 +00:00
{
gCountryCode = * cc ;
}
2021-09-06 10:58:08 +00:00
else
{
gCountryCode = " GB " ;
}
2021-06-27 21:25:29 +00:00
AuString * cs ;
2021-10-02 16:07:33 +00:00
if ( ( AuTryFind ( parseTable , ' . ' , cs ) ) & & ( cs - > size ( ) ) )
2021-06-27 21:25:29 +00:00
{
gCodeset = * cs ;
}
else
{
gCodeset = " UTF-8 " ; //also technically not true, but most UNIX/Linux applications expect UTF8 byte stirngs or UTF-32 wchar_t strings. this assumption shouldn't break anything
}
}
# define AURORA_HAS_UNIXLOCALE
# endif
2021-09-06 10:58:08 +00:00
# if defined(AURORA_PLATFORM_WIN32) || defined(AURORA_PLATFORM_LINUX) || defined(AURORA_PLATFORM_BSD)
2021-06-27 21:25:29 +00:00
static void SetLanguageEnvBlock ( )
{
const char * language ;
if ( language = getenv ( " AURORA_ENV_LANGUAGE " ) )
{
gLanguageCode = language ;
}
const char * countryCode ;
if ( countryCode = getenv ( " AURORA_ENV_COUNTRY " ) )
{
gCountryCode = countryCode ;
}
2021-09-06 10:58:08 +00:00
// You may not overload codeset on win32 targets
2021-06-27 21:25:29 +00:00
const char * codeSet ;
if ( codeSet = getenv ( " AURORA_ENV_CODESET " ) )
{
gCodeset = codeSet ;
}
}
# define AURORA_HAS_ENVBLOCK
# endif
2021-09-06 10:58:08 +00:00
static void GuessSystemECodePage ( )
{
if ( gInternalCodePage ! = ECodePage : : eUnsupported )
{
return ;
}
if ( gCodeset = = " UTF-8 " )
{
gInternalCodePage = ECodePage : : eUTF8 ;
}
else if ( gCodeset = = " UTF-16 " )
{
// TODO: is big endian
gInternalCodePage = ECodePage : : eUTF16 ;
}
else if ( gCodeset = = " UTF-32 " )
{
// TODO: is big endian
gInternalCodePage = ECodePage : : eUTF32 ;
}
else if ( gCodeset = = " SJIS " )
{
gInternalCodePage = ECodePage : : eSJIS ;
}
// a history of chinese locales
else if ( gCodeset = = " GB18030 " ) // is the new legally defined standard
{
gInternalCodePage = ECodePage : : e18030 ;
}
else if ( gCodeset = = " GBK " ) // GB18030 is derived from GBK, GBK is drived from GB2312
{
gInternalCodePage = ECodePage : : eGBK ;
}
else if ( gCodeset = = " GB2312 " ) // GBK is drived from GB2312, GB2312 is derived from telegraph shid
{
gInternalCodePage = ECodePage : : e2312 ;
}
else
{
gInternalCodePage = ECodePage : : eSysUnk ;
}
}
2021-06-27 21:25:29 +00:00
void Init ( )
{
2021-09-06 10:58:08 +00:00
# if defined(AU_HAS_MSFT_NATIONALLANGSUPPORT)
2021-06-27 21:25:29 +00:00
SetLanguageWin32 ( ) ;
# elif defined(AURORA_HAS_UNIXLOCALE)
SetLanguageUnix ( ) ;
# endif
# if defined(AURORA_HAS_ENVBLOCK)
SetLanguageEnvBlock ( ) ;
# endif
2021-09-06 10:58:08 +00:00
GuessSystemECodePage ( ) ;
gLanguageCode = AuToLower ( gLanguageCode ) ;
gCountryCode = AuToUpper ( gCountryCode ) ;
2022-01-24 18:37:06 +00:00
gCodeset = gCodeset ;
2021-09-06 10:58:08 +00:00
2022-01-24 18:37:06 +00:00
AuLogDbg ( " Initialized default localization information (language: {}, country: {}, codeset: {}) " , gLanguageCode , gCountryCode , gCodeset ) ;
2021-06-27 21:25:29 +00:00
}
2021-09-06 10:58:08 +00:00
static bool gLockLocale = false ;
AUKN_SYM void RuntimeOverloadLocality ( const AuPair < AuString , AuString > & locality )
{
2022-01-19 17:08:13 +00:00
SysAssert ( ! AuExchange ( gLockLocale , true ) , " Locality has been locked " ) ;
2021-09-06 10:58:08 +00:00
gLanguageCode = AuToLower ( locality . first ) ;
gCountryCode = AuToUpper ( locality . second ) ;
}
2021-06-27 21:25:29 +00:00
AUKN_SYM LocalizationInfo GetLocale ( )
{
2021-09-06 10:58:08 +00:00
gLockLocale = true ;
return LocalizationInfo ( gLanguageCode , gCountryCode , gCodeset , gInternalCodePage ) ;
2021-06-27 21:25:29 +00:00
}
2021-10-02 16:07:33 +00:00
}