/* ****************************************************************************** * * Copyright (C) 1997-2006, International Business Machines * Corporation and others. All Rights Reserved. * ****************************************************************************** * * FILE NAME : putil.c (previously putil.cpp and ptypes.cpp) * * Date Name Description * 04/14/97 aliu Creation. * 04/24/97 aliu Added getDefaultDataDirectory() and * getDefaultLocaleID(). * 04/28/97 aliu Rewritten to assume Unix and apply general methods * for assumed case. Non-UNIX platforms must be * special-cased. Rewrote numeric methods dealing * with NaN and Infinity to be platform independent * over all IEEE 754 platforms. * 05/13/97 aliu Restored sign of timezone * (semantics are hours West of GMT) * 06/16/98 erm Added IEEE_754 stuff, cleaned up isInfinite, isNan, * nextDouble.. * 07/22/98 stephen Added remainder, max, min, trunc * 08/13/98 stephen Added isNegativeInfinity, isPositiveInfinity * 08/24/98 stephen Added longBitsFromDouble * 09/08/98 stephen Minor changes for Mac Port * 03/02/99 stephen Removed openFile(). Added AS400 support. * Fixed EBCDIC tables * 04/15/99 stephen Converted to C. * 06/28/99 stephen Removed mutex locking in u_isBigEndian(). * 08/04/99 jeffrey R. Added OS/2 changes * 11/15/99 helena Integrated S/390 IEEE support. * 04/26/01 Barry N. OS/400 support for uprv_getDefaultLocaleID * 08/15/01 Steven H. OS/400 support for uprv_getDefaultCodepage ****************************************************************************** */ /* Define _XOPEN_SOURCE for Solaris and friends. */ /* NetBSD needs it to be >= 4 */ #ifndef _XOPEN_SOURCE #if __STDC_VERSION__ >= 199901L /* It is invalid to compile an XPG3, XPG4, XPG4v2 or XPG5 application using c99 */ #define _XOPEN_SOURCE 600 #else #define _XOPEN_SOURCE 4 #endif #endif /* Make sure things like readlink and such functions work. */ #ifndef _XOPEN_SOURCE_EXTENDED #define _XOPEN_SOURCE_EXTENDED 1 #endif /* include ICU headers */ #include "unicode/utypes.h" #include "unicode/putil.h" #include "unicode/ustring.h" #include "putilimp.h" #include "uassert.h" #include "umutex.h" #include "cmemory.h" #include "cstring.h" #include "locmap.h" #include "ucln_cmn.h" /* Include standard headers. */ #include <stdio.h> #include <stdlib.h> #include <string.h> #include <math.h> #include <locale.h> #include <float.h> #include <time.h> /* include system headers */ #ifdef U_WINDOWS # define WIN32_LEAN_AND_MEAN # define VC_EXTRALEAN # define NOUSER # define NOSERVICE # define NOIME # define NOMCX # include <windows.h> # include "wintz.h" #elif defined(U_CYGWIN) && defined(__STRICT_ANSI__) /* tzset isn't defined in strict ANSI on Cygwin. */ # undef __STRICT_ANSI__ #elif defined(OS400) # include <float.h> # include <qusec.h> /* error code structure */ # include <qusrjobi.h> # include <qliept.h> /* EPT_CALL macro - this include must be after all other "QSYSINCs" */ #elif defined(XP_MAC) # include <Files.h> # include <IntlResources.h> # include <Script.h> # include <Folders.h> # include <MacTypes.h> # include <TextUtils.h> # define ICU_NO_USER_DATA_OVERRIDE 1 #elif defined(OS390) #include "unicode/ucnv.h" /* Needed for UCNV_SWAP_LFNL_OPTION_STRING */ #elif defined(U_DARWIN) || defined(U_LINUX) || defined(U_BSD) #include <limits.h> #include <unistd.h> #elif defined(U_QNX) #include <sys/neutrino.h> #endif #ifndef U_WINDOWS #include <sys/time.h> #endif /* * Only include langinfo.h if we have a way to get the codeset. If we later * depend on more feature, we can test on U_HAVE_NL_LANGINFO. * */ #if U_HAVE_NL_LANGINFO_CODESET #include <langinfo.h> #endif /* Define the extension for data files, again... */ #define DATA_TYPE "dat" /* Leave this copyright notice here! */ static const char copyright[] = U_COPYRIGHT_STRING; /* floating point implementations ------------------------------------------- */ /* We return QNAN rather than SNAN*/ #define SIGN 0x80000000U /* Make it easy to define certain types of constants */ typedef union { int64_t i64; /* This must be defined first in order to allow the initialization to work. This is a C89 feature. */ double d64; } BitPatternConversion; static const BitPatternConversion gNan = { (int64_t) INT64_C(0x7FF8000000000000) }; static const BitPatternConversion gInf = { (int64_t) INT64_C(0x7FF0000000000000) }; /*--------------------------------------------------------------------------- Platform utilities Our general strategy is to assume we're on a POSIX platform. Platforms which are non-POSIX must declare themselves so. The default POSIX implementation will sometimes work for non-POSIX platforms as well (e.g., the NaN-related functions). ---------------------------------------------------------------------------*/ #if defined(U_WINDOWS) || defined(XP_MAC) || defined(OS400) # undef U_POSIX_LOCALE #else # define U_POSIX_LOCALE 1 #endif /* WARNING! u_topNBytesOfDouble and u_bottomNBytesOfDouble can't be properly optimized by the gcc compiler sometimes (i.e. gcc 3.2). */ #if !IEEE_754 static char* u_topNBytesOfDouble(double* d, int n) { #if U_IS_BIG_ENDIAN return (char*)d; #else return (char*)(d + 1) - n; #endif } #endif static char* u_bottomNBytesOfDouble(double* d, int n) { #if U_IS_BIG_ENDIAN return (char*)(d + 1) - n; #else return (char*)d; #endif } #if defined(U_WINDOWS) typedef union { int64_t int64; FILETIME fileTime; } FileTimeConversion; /* This is like a ULARGE_INTEGER */ /* Number of 100 nanoseconds from 1/1/1601 to 1/1/1970 */ #define EPOCH_BIAS INT64_C(116444736000000000) #define HECTONANOSECOND_PER_MILLISECOND 10000 #endif /*--------------------------------------------------------------------------- Universal Implementations These are designed to work on all platforms. Try these, and if they don't work on your platform, then special case your platform with new implementations. ---------------------------------------------------------------------------*/ /* Return UTC (GMT) time measured in milliseconds since 0:00 on 1/1/70.*/ U_CAPI UDate U_EXPORT2 uprv_getUTCtime() { #ifdef XP_MAC time_t t, t1, t2; struct tm tmrec; uprv_memset( &tmrec, 0, sizeof(tmrec) ); tmrec.tm_year = 70; tmrec.tm_mon = 0; tmrec.tm_mday = 1; t1 = mktime(&tmrec); /* seconds of 1/1/1970*/ time(&t); uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) ); t2 = mktime(&tmrec); /* seconds of current GMT*/ return (UDate)(t2 - t1) * U_MILLIS_PER_SECOND; /* GMT (or UTC) in seconds since 1970*/ #elif defined(U_WINDOWS) FileTimeConversion winTime; GetSystemTimeAsFileTime(&winTime.fileTime); return (UDate)((winTime.int64 - EPOCH_BIAS) / HECTONANOSECOND_PER_MILLISECOND); #else /* struct timeval posixTime; gettimeofday(&posixTime, NULL); return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000)); */ time_t epochtime; time(&epochtime); return (UDate)epochtime * U_MILLIS_PER_SECOND; #endif } /*----------------------------------------------------------------------------- IEEE 754 These methods detect and return NaN and infinity values for doubles conforming to IEEE 754. Platforms which support this standard include X86, Mac 680x0, Mac PowerPC, AIX RS/6000, and most others. If this doesn't work on your platform, you have non-IEEE floating-point, and will need to code your own versions. A naive implementation is to return 0.0 for getNaN and getInfinity, and false for isNaN and isInfinite. ---------------------------------------------------------------------------*/ U_CAPI UBool U_EXPORT2 uprv_isNaN(double number) { #if IEEE_754 BitPatternConversion convertedNumber; convertedNumber.d64 = number; /* Infinity is 0x7FF0000000000000U. Anything greater than that is a NaN */ return (UBool)((convertedNumber.i64 & U_INT64_MAX) > gInf.i64); #elif defined(OS390) uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number, sizeof(uint32_t)); uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number, sizeof(uint32_t)); return ((highBits & 0x7F080000L) == 0x7F080000L) && (lowBits == 0x00000000L); #else /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/ /* you'll need to replace this default implementation with what's correct*/ /* for your platform.*/ return number != number; #endif } U_CAPI UBool U_EXPORT2 uprv_isInfinite(double number) { #if IEEE_754 BitPatternConversion convertedNumber; convertedNumber.d64 = number; /* Infinity is exactly 0x7FF0000000000000U. */ return (UBool)((convertedNumber.i64 & U_INT64_MAX) == gInf.i64); #elif defined(OS390) uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number, sizeof(uint32_t)); uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number, sizeof(uint32_t)); return ((highBits & ~SIGN) == 0x70FF0000L) && (lowBits == 0x00000000L); #else /* If your platform doesn't support IEEE 754 but *does* have an infinity*/ /* value, you'll need to replace this default implementation with what's*/ /* correct for your platform.*/ return number == (2.0 * number); #endif } U_CAPI UBool U_EXPORT2 uprv_isPositiveInfinity(double number) { #if IEEE_754 || defined(OS390) return (UBool)(number > 0 && uprv_isInfinite(number)); #else return uprv_isInfinite(number); #endif } U_CAPI UBool U_EXPORT2 uprv_isNegativeInfinity(double number) { #if IEEE_754 || defined(OS390) return (UBool)(number < 0 && uprv_isInfinite(number)); #else uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number, sizeof(uint32_t)); return((highBits & SIGN) && uprv_isInfinite(number)); #endif } U_CAPI double U_EXPORT2 uprv_getNaN() { #if IEEE_754 || defined(OS390) return gNan.d64; #else /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/ /* you'll need to replace this default implementation with what's correct*/ /* for your platform.*/ return 0.0; #endif } U_CAPI double U_EXPORT2 uprv_getInfinity() { #if IEEE_754 || defined(OS390) return gInf.d64; #else /* If your platform doesn't support IEEE 754 but *does* have an infinity*/ /* value, you'll need to replace this default implementation with what's*/ /* correct for your platform.*/ return 0.0; #endif } U_CAPI double U_EXPORT2 uprv_floor(double x) { return floor(x); } U_CAPI double U_EXPORT2 uprv_ceil(double x) { return ceil(x); } U_CAPI double U_EXPORT2 uprv_round(double x) { return uprv_floor(x + 0.5); } U_CAPI double U_EXPORT2 uprv_fabs(double x) { return fabs(x); } U_CAPI double U_EXPORT2 uprv_modf(double x, double* y) { return modf(x, y); } U_CAPI double U_EXPORT2 uprv_fmod(double x, double y) { return fmod(x, y); } U_CAPI double U_EXPORT2 uprv_pow(double x, double y) { /* This is declared as "double pow(double x, double y)" */ return pow(x, y); } U_CAPI double U_EXPORT2 uprv_pow10(int32_t x) { return pow(10.0, (double)x); } U_CAPI double U_EXPORT2 uprv_fmax(double x, double y) { #if IEEE_754 int32_t lowBits; /* first handle NaN*/ if(uprv_isNaN(x) || uprv_isNaN(y)) return uprv_getNaN(); /* check for -0 and 0*/ lowBits = *(uint32_t*) u_bottomNBytesOfDouble(&x, sizeof(uint32_t)); if(x == 0.0 && y == 0.0 && (lowBits & SIGN)) return y; #endif /* this should work for all flt point w/o NaN and Infpecial cases */ return (x > y ? x : y); } U_CAPI double U_EXPORT2 uprv_fmin(double x, double y) { #if IEEE_754 int32_t lowBits; /* first handle NaN*/ if(uprv_isNaN(x) || uprv_isNaN(y)) return uprv_getNaN(); /* check for -0 and 0*/ lowBits = *(uint32_t*) u_bottomNBytesOfDouble(&y, sizeof(uint32_t)); if(x == 0.0 && y == 0.0 && (lowBits & SIGN)) return y; #endif /* this should work for all flt point w/o NaN and Inf special cases */ return (x > y ? y : x); } /** * Truncates the given double. * trunc(3.3) = 3.0, trunc (-3.3) = -3.0 * This is different than calling floor() or ceil(): * floor(3.3) = 3, floor(-3.3) = -4 * ceil(3.3) = 4, ceil(-3.3) = -3 */ U_CAPI double U_EXPORT2 uprv_trunc(double d) { #if IEEE_754 int32_t lowBits; /* handle error cases*/ if(uprv_isNaN(d)) return uprv_getNaN(); if(uprv_isInfinite(d)) return uprv_getInfinity(); lowBits = *(uint32_t*) u_bottomNBytesOfDouble(&d, sizeof(uint32_t)); if( (d == 0.0 && (lowBits & SIGN)) || d < 0) return ceil(d); else return floor(d); #else return d >= 0 ? floor(d) : ceil(d); #endif } /** * Return the largest positive number that can be represented by an integer * type of arbitrary bit length. */ U_CAPI double U_EXPORT2 uprv_maxMantissa(void) { return pow(2.0, DBL_MANT_DIG + 1.0) - 1.0; } U_CAPI double U_EXPORT2 uprv_log(double d) { return log(d); } #if 0 /* This isn't used. If it's readded, readd putiltst.c tests */ U_CAPI int32_t U_EXPORT2 uprv_digitsAfterDecimal(double x) { char buffer[20]; int32_t numDigits, bytesWritten; char *p = buffer; int32_t ptPos, exponent; /* cheat and use the string-format routine to get a string representation*/ /* (it handles mathematical inaccuracy better than we can), then find out */ /* many characters are to the right of the decimal point */ bytesWritten = sprintf(buffer, "%+.9g", x); while (isdigit(*(++p))) { } ptPos = (int32_t)(p - buffer); numDigits = (int32_t)(bytesWritten - ptPos - 1); /* if the number's string representation is in scientific notation, find */ /* the exponent and take it into account*/ exponent = 0; p = uprv_strchr(buffer, 'e'); if (p != 0) { int16_t expPos = (int16_t)(p - buffer); numDigits -= bytesWritten - expPos; exponent = (int32_t)(atol(p + 1)); } /* the string representation may still have spurious decimal digits in it, */ /* so we cut off at the ninth digit to the right of the decimal, and have */ /* to search backward from there to the first non-zero digit*/ if (numDigits > 9) { numDigits = 9; while (numDigits > 0 && buffer[ptPos + numDigits] == '0') --numDigits; } numDigits -= exponent; if (numDigits < 0) { return 0; } return numDigits; } #endif /*--------------------------------------------------------------------------- Platform-specific Implementations Try these, and if they don't work on your platform, then special case your platform with new implementations. ---------------------------------------------------------------------------*/ /* Generic time zone layer -------------------------------------------------- */ /* Time zone utilities */ U_CAPI void U_EXPORT2 uprv_tzset() { #ifdef U_TZSET U_TZSET(); #else /* no initialization*/ #endif } U_CAPI int32_t U_EXPORT2 uprv_timezone() { #ifdef U_TIMEZONE return U_TIMEZONE; #else time_t t, t1, t2; struct tm tmrec; UBool dst_checked; int32_t tdiff = 0; time(&t); uprv_memcpy( &tmrec, localtime(&t), sizeof(tmrec) ); dst_checked = (tmrec.tm_isdst != 0); /* daylight savings time is checked*/ t1 = mktime(&tmrec); /* local time in seconds*/ uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) ); t2 = mktime(&tmrec); /* GMT (or UTC) in seconds*/ tdiff = t2 - t1; /* imitate NT behaviour, which returns same timezone offset to GMT for winter and summer*/ if (dst_checked) tdiff += 3600; return tdiff; #endif } /* Note that U_TZNAME does *not* have to be tzname, but if it is, some platforms need to have it declared here. */ #if defined(U_TZNAME) && (defined(U_IRIX) || defined(U_DARWIN) || defined(U_CYGWIN)) /* RS6000 and others reject char **tzname. */ extern U_IMPORT char *U_TZNAME[]; #endif #if !UCONFIG_NO_FILE_IO && (defined(U_DARWIN) || defined(U_LINUX) || defined(U_BSD)) /* These platforms are likely to use Olson timezone IDs. */ #define CHECK_LOCALTIME_LINK 1 #define TZZONELINK "/etc/localtime" #define TZZONEINFO "/usr/share/zoneinfo/" static char gTimeZoneBuffer[PATH_MAX]; static char *gTimeZoneBufferPtr = NULL; #endif #ifndef U_WINDOWS #define isNonDigit(ch) (ch < '0' || '9' < ch) static UBool isValidOlsonID(const char *id) { int32_t idx = 0; /* Determine if this is something like Iceland (Olson ID) or AST4ADT (non-Olson ID) */ while (id[idx] && isNonDigit(id[idx]) && id[idx] != ',') { idx++; } /* If we went through the whole string, then it might be okay. The timezone is sometimes set to "CST-7CDT", "CST6CDT5,J129,J131/19:30", "GRNLNDST3GRNLNDDT" or similar, so we cannot use it. The rest of the time it could be an Olson ID. George */ return (UBool)(id[idx] == 0 || uprv_strcmp(id, "PST8PDT") == 0 || uprv_strcmp(id, "MST7MDT") == 0 || uprv_strcmp(id, "CST6CDT") == 0 || uprv_strcmp(id, "EST5EDT") == 0); } #endif U_CAPI const char* U_EXPORT2 uprv_tzname(int n) { #ifdef U_WINDOWS const char *id = uprv_detectWindowsTimeZone(); if (id != NULL) { return id; } #else const char *tzenv = NULL; /*#if defined(U_DARWIN) int ret; tzenv = getenv("TZFILE"); if (tzenv != NULL) { return tzenv; } #endif*/ tzenv = getenv("TZ"); if (tzenv != NULL && isValidOlsonID(tzenv)) { /* This might be a good Olson ID. */ if (uprv_strncmp(tzenv, "posix/", 6) == 0 || uprv_strncmp(tzenv, "right/", 6) == 0) { /* Remove the posix/ or right/ prefix. */ tzenv += 6; } return tzenv; } /* else U_TZNAME will give a better result. */ #if defined(CHECK_LOCALTIME_LINK) /* Caller must handle threading issues */ if (gTimeZoneBufferPtr == NULL) { /* This is a trick to look at the name of the link to get the Olson ID because the tzfile contents is underspecified. This isn't guaranteed to work because it may not be a symlink. */ int32_t ret = (int32_t)readlink(TZZONELINK, gTimeZoneBuffer, sizeof(gTimeZoneBuffer)); if (0 < ret) { int32_t tzZoneInfoLen = uprv_strlen(TZZONEINFO); gTimeZoneBuffer[ret] = 0; if (uprv_strncmp(gTimeZoneBuffer, TZZONEINFO, tzZoneInfoLen) == 0 && isValidOlsonID(gTimeZoneBuffer + tzZoneInfoLen)) { return (gTimeZoneBufferPtr = gTimeZoneBuffer + tzZoneInfoLen); } } } else { return gTimeZoneBufferPtr; } #endif #endif #ifdef U_TZNAME /* U_TZNAME is usually a non-unique abbreviation, which isn't normally usable. */ return U_TZNAME[n]; #else return ""; #endif } /* Get and set the ICU data directory --------------------------------------- */ static char *gDataDirectory = NULL; #if U_POSIX_LOCALE static char *gCorrectedPOSIXLocale = NULL; /* Heap allocated */ #endif static UBool U_CALLCONV putil_cleanup(void) { if (gDataDirectory && *gDataDirectory) { uprv_free(gDataDirectory); } gDataDirectory = NULL; #if U_POSIX_LOCALE if (gCorrectedPOSIXLocale) { uprv_free(gCorrectedPOSIXLocale); gCorrectedPOSIXLocale = NULL; } #endif return TRUE; } /* * Set the data directory. * Make a copy of the passed string, and set the global data dir to point to it. * TODO: see bug #2849, regarding thread safety. */ U_CAPI void U_EXPORT2 u_setDataDirectory(const char *directory) { char *newDataDir; int32_t length; if(directory==NULL || *directory==0) { /* A small optimization to prevent the malloc and copy when the shared library is used, and this is a way to make sure that NULL is never returned. */ newDataDir = (char *)""; } else { length=(int32_t)uprv_strlen(directory); newDataDir = (char *)uprv_malloc(length + 2); uprv_strcpy(newDataDir, directory); #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR) { char *p; while(p = uprv_strchr(newDataDir, U_FILE_ALT_SEP_CHAR)) { *p = U_FILE_SEP_CHAR; } } #endif } umtx_lock(NULL); if (gDataDirectory && *gDataDirectory) { uprv_free(gDataDirectory); } gDataDirectory = newDataDir; ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup); umtx_unlock(NULL); } U_CAPI UBool U_EXPORT2 uprv_pathIsAbsolute(const char *path) { if(!path || !*path) { return FALSE; } if(*path == U_FILE_SEP_CHAR) { return TRUE; } #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR) if(*path == U_FILE_ALT_SEP_CHAR) { return TRUE; } #endif #if defined(U_WINDOWS) if( (((path[0] >= 'A') && (path[0] <= 'Z')) || ((path[0] >= 'a') && (path[0] <= 'z'))) && path[1] == ':' ) { return TRUE; } #endif return FALSE; } U_CAPI const char * U_EXPORT2 u_getDataDirectory(void) { const char *path = NULL; /* if we have the directory, then return it immediately */ umtx_lock(NULL); path = gDataDirectory; umtx_unlock(NULL); if(path) { return path; } /* When ICU_NO_USER_DATA_OVERRIDE is defined, users aren't allowed to override ICU's data with the ICU_DATA environment variable. This prevents problems where multiple custom copies of ICU's specific version of data are installed on a system. Either the application must define the data directory with u_setDataDirectory, define ICU_DATA_DIR when compiling ICU, set the data with udata_setCommonData or trust that all of the required data is contained in ICU's data library that contains the entry point defined by U_ICUDATA_ENTRY_POINT. There may also be some platforms where environment variables are not allowed. */ # if !defined(ICU_NO_USER_DATA_OVERRIDE) && !UCONFIG_NO_FILE_IO /* First try to get the environment variable */ path=getenv("ICU_DATA"); # endif /* ICU_DATA_DIR may be set as a compile option */ # ifdef ICU_DATA_DIR if(path==NULL || *path==0) { path=ICU_DATA_DIR; } # endif if(path==NULL) { /* It looks really bad, set it to something. */ path = ""; } u_setDataDirectory(path); return gDataDirectory; } /* Macintosh-specific locale information ------------------------------------ */ #ifdef XP_MAC typedef struct { int32_t script; int32_t region; int32_t lang; int32_t date_region; const char* posixID; } mac_lc_rec; /* Todo: This will be updated with a newer version from www.unicode.org web page when it's available.*/ #define MAC_LC_MAGIC_NUMBER -5 #define MAC_LC_INIT_NUMBER -9 static const mac_lc_rec mac_lc_recs[] = { MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 0, "en_US", /* United States*/ MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 1, "fr_FR", /* France*/ MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 2, "en_GB", /* Great Britain*/ MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 3, "de_DE", /* Germany*/ MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 4, "it_IT", /* Italy*/ MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 5, "nl_NL", /* Metherlands*/ MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 6, "fr_BE", /* French for Belgium or Lxembourg*/ MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 7, "sv_SE", /* Sweden*/ MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 9, "da_DK", /* Denmark*/ MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 10, "pt_PT", /* Portugal*/ MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 11, "fr_CA", /* French Canada*/ MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 13, "is_IS", /* Israel*/ MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 14, "ja_JP", /* Japan*/ MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 15, "en_AU", /* Australia*/ MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 16, "ar_AE", /* the Arabic world (?)*/ MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 17, "fi_FI", /* Finland*/ MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 18, "fr_CH", /* French for Switzerland*/ MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 19, "de_CH", /* German for Switzerland*/ MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 20, "el_GR", /* Greece*/ MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 21, "is_IS", /* Iceland ===*/ /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 22, "",*/ /* Malta ===*/ /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 23, "",*/ /* Cyprus ===*/ MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 24, "tr_TR", /* Turkey ===*/ MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 25, "sh_YU", /* Croatian system for Yugoslavia*/ /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 33, "",*/ /* Hindi system for India*/ /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 34, "",*/ /* Pakistan*/ MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 41, "lt_LT", /* Lithuania*/ MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 42, "pl_PL", /* Poland*/ MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 43, "hu_HU", /* Hungary*/ MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 44, "et_EE", /* Estonia*/ MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 45, "lv_LV", /* Latvia*/ /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 46, "",*/ /* Lapland [Ask Rich for the data. HS]*/ /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 47, "",*/ /* Faeroe Islands*/ MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 48, "fa_IR", /* Iran*/ MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 49, "ru_RU", /* Russia*/ MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 50, "en_IE", /* Ireland*/ MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 51, "ko_KR", /* Korea*/ MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 52, "zh_CN", /* People's Republic of China*/ MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 53, "zh_TW", /* Taiwan*/ MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 54, "th_TH", /* Thailand*/ /* fallback is en_US*/ MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, "en_US" }; #endif #if U_POSIX_LOCALE /* Return just the POSIX id, whatever happens to be in it */ static const char *uprv_getPOSIXID(void) { static const char* posixID = NULL; if (posixID == 0) { /* * On Solaris two different calls to setlocale can result in * different values. Only get this value once. * * We must check this first because an application can set this. * * LC_ALL can't be used because it's platform dependent. The LANG * environment variable seems to affect LC_CTYPE variable by default. * Here is what setlocale(LC_ALL, NULL) can return. * HPUX can return 'C C C C C C C' * Solaris can return /en_US/C/C/C/C/C on the second try. * Linux can return LC_CTYPE=C;LC_NUMERIC=C;... * * The default codepage detection also needs to use LC_CTYPE. * * Do not call setlocale(LC_*, "")! Using an empty string instead * of NULL, will modify the libc behavior. */ posixID = setlocale(LC_CTYPE, NULL); if ((posixID == 0) || (uprv_strcmp("C", posixID) == 0) || (uprv_strcmp("POSIX", posixID) == 0)) { /* Maybe we got some garbage. Try something more reasonable */ posixID = getenv("LC_ALL"); if (posixID == 0) { posixID = getenv("LC_CTYPE"); if (posixID == 0) { posixID = getenv("LANG"); } } } if ((posixID==0) || (uprv_strcmp("C", posixID) == 0) || (uprv_strcmp("POSIX", posixID) == 0)) { /* Nothing worked. Give it a nice POSIX default value. */ posixID = "en_US_POSIX"; } } return posixID; } #endif /* NOTE: The caller should handle thread safety */ U_CAPI const char* U_EXPORT2 uprv_getDefaultLocaleID() { #if U_POSIX_LOCALE /* Note that: (a '!' means the ID is improper somehow) LC_ALL ----> default_loc codepage -------------------------------------------------------- ab.CD ab CD ab@CD ab__CD - ab@CD.EF ab__CD EF ab_CD.EF@GH ab_CD_GH EF Some 'improper' ways to do the same as above: ! ab_CD@GH.EF ab_CD_GH EF ! ab_CD.EF@GH.IJ ab_CD_GH EF ! ab_CD@ZZ.EF@GH.IJ ab_CD_GH EF _CD@GH _CD_GH - _CD.EF@GH _CD_GH EF The variant cannot have dots in it. The 'rightmost' variant (@xxx) wins. The leftmost codepage (.xxx) wins. */ char *correctedPOSIXLocale = 0; const char* posixID = uprv_getPOSIXID(); const char *p; const char *q; int32_t len; /* Format: (no spaces) ll [ _CC ] [ . MM ] [ @ VV] l = lang, C = ctry, M = charmap, V = variant */ if (gCorrectedPOSIXLocale != NULL) { return gCorrectedPOSIXLocale; } if ((p = uprv_strchr(posixID, '.')) != NULL) { /* assume new locale can't be larger than old one? */ correctedPOSIXLocale = uprv_malloc(uprv_strlen(posixID)+1); uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID); correctedPOSIXLocale[p-posixID] = 0; /* do not copy after the @ */ if ((p = uprv_strchr(correctedPOSIXLocale, '@')) != NULL) { correctedPOSIXLocale[p-correctedPOSIXLocale] = 0; } } /* Note that we scan the *uncorrected* ID. */ if ((p = uprv_strrchr(posixID, '@')) != NULL) { if (correctedPOSIXLocale == NULL) { correctedPOSIXLocale = uprv_malloc(uprv_strlen(posixID)+1); uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID); correctedPOSIXLocale[p-posixID] = 0; } p++; /* Take care of any special cases here.. */ if (!uprv_strcmp(p, "nynorsk")) { p = "NY"; /* Don't worry about no__NY. In practice, it won't appear. */ } if (uprv_strchr(correctedPOSIXLocale,'_') == NULL) { uprv_strcat(correctedPOSIXLocale, "__"); /* aa@b -> aa__b */ } else { uprv_strcat(correctedPOSIXLocale, "_"); /* aa_CC@b -> aa_CC_b */ } if ((q = uprv_strchr(p, '.')) != NULL) { /* How big will the resulting string be? */ len = (int32_t)(uprv_strlen(correctedPOSIXLocale) + (q-p)); uprv_strncat(correctedPOSIXLocale, p, q-p); correctedPOSIXLocale[len] = 0; } else { /* Anything following the @ sign */ uprv_strcat(correctedPOSIXLocale, p); } /* Should there be a map from 'no@nynorsk' -> no_NO_NY here? * How about 'russian' -> 'ru'? * Many of the other locales using ISO codes will be handled by the * canonicalization functions in uloc_getDefault. */ } /* Was a correction made? */ if (correctedPOSIXLocale != NULL) { posixID = correctedPOSIXLocale; } else { /* copy it, just in case the original pointer goes away. See j2395 */ correctedPOSIXLocale = (char *)uprv_malloc(uprv_strlen(posixID) + 1); posixID = uprv_strcpy(correctedPOSIXLocale, posixID); } if (gCorrectedPOSIXLocale == NULL) { gCorrectedPOSIXLocale = correctedPOSIXLocale; ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup); correctedPOSIXLocale = NULL; } if (correctedPOSIXLocale != NULL) { /* Was already set - clean up. */ uprv_free(correctedPOSIXLocale); } return posixID; #elif defined(U_WINDOWS) UErrorCode status = U_ZERO_ERROR; LCID id = GetThreadLocale(); const char* locID = uprv_convertToPosix(id, &status); if (U_FAILURE(status)) { locID = "en_US"; } return locID; #elif defined(XP_MAC) int32_t script = MAC_LC_INIT_NUMBER; /* = IntlScript(); or GetScriptManagerVariable(smSysScript);*/ int32_t region = MAC_LC_INIT_NUMBER; /* = GetScriptManagerVariable(smRegionCode);*/ int32_t lang = MAC_LC_INIT_NUMBER; /* = GetScriptManagerVariable(smScriptLang);*/ int32_t date_region = MAC_LC_INIT_NUMBER; const char* posixID = 0; int32_t count = sizeof(mac_lc_recs) / sizeof(mac_lc_rec); int32_t i; Intl1Hndl ih; ih = (Intl1Hndl) GetIntlResource(1); if (ih) date_region = ((uint16_t)(*ih)->intl1Vers) >> 8; for (i = 0; i < count; i++) { if ( ((mac_lc_recs[i].script == MAC_LC_MAGIC_NUMBER) || (mac_lc_recs[i].script == script)) && ((mac_lc_recs[i].region == MAC_LC_MAGIC_NUMBER) || (mac_lc_recs[i].region == region)) && ((mac_lc_recs[i].lang == MAC_LC_MAGIC_NUMBER) || (mac_lc_recs[i].lang == lang)) && ((mac_lc_recs[i].date_region == MAC_LC_MAGIC_NUMBER) || (mac_lc_recs[i].date_region == date_region)) ) { posixID = mac_lc_recs[i].posixID; break; } } return posixID; #elif defined(OS400) /* locales are process scoped and are by definition thread safe */ static char correctedLocale[64]; const char *localeID = getenv("LC_ALL"); char *p; if (localeID == NULL) localeID = getenv("LANG"); if (localeID == NULL) localeID = setlocale(LC_ALL, NULL); /* Make sure we have something... */ if (localeID == NULL) return "en_US_POSIX"; /* Extract the locale name from the path. */ if((p = uprv_strrchr(localeID, '/')) != NULL) { /* Increment p to start of locale name. */ p++; localeID = p; } /* Copy to work location. */ uprv_strcpy(correctedLocale, localeID); /* Strip off the '.locale' extension. */ if((p = uprv_strchr(correctedLocale, '.')) != NULL) { *p = 0; } /* Upper case the locale name. */ T_CString_toUpperCase(correctedLocale); /* See if we are using the POSIX locale. Any of the * following are equivalent and use the same QLGPGCMA * (POSIX) locale. * QLGPGCMA2 means UCS2 * QLGPGCMA_4 means UTF-32 * QLGPGCMA_8 means UTF-8 */ if ((uprv_strcmp("C", correctedLocale) == 0) || (uprv_strcmp("POSIX", correctedLocale) == 0) || (uprv_strncmp("QLGPGCMA", correctedLocale, 8) == 0)) { uprv_strcpy(correctedLocale, "en_US_POSIX"); } else { int16_t LocaleLen; /* Lower case the lang portion. */ for(p = correctedLocale; *p != 0 && *p != '_'; p++) { *p = uprv_tolower(*p); } /* Adjust for Euro. After '_E' add 'URO'. */ LocaleLen = uprv_strlen(correctedLocale); if (correctedLocale[LocaleLen - 2] == '_' && correctedLocale[LocaleLen - 1] == 'E') { uprv_strcat(correctedLocale, "URO"); } /* If using Lotus-based locale then convert to * equivalent non Lotus. */ else if (correctedLocale[LocaleLen - 2] == '_' && correctedLocale[LocaleLen - 1] == 'L') { correctedLocale[LocaleLen - 2] = 0; } /* There are separate simplified and traditional * locales called zh_HK_S and zh_HK_T. */ else if (uprv_strncmp(correctedLocale, "zh_HK", 5) == 0) { uprv_strcpy(correctedLocale, "zh_HK"); } /* A special zh_CN_GBK locale... */ else if (uprv_strcmp(correctedLocale, "zh_CN_GBK") == 0) { uprv_strcpy(correctedLocale, "zh_CN"); } } return correctedLocale; #endif } #if U_POSIX_LOCALE /* Due to various platform differences, one platform may specify a charset, when they really mean a different charset. Remap the names so that they are compatible with ICU. */ static const char* remapPlatformDependentCodepage(const char *locale, const char *name) { if (locale != NULL && *locale == 0) { /* Make sure that an empty locale is handled the same way. */ locale = NULL; } if (name == NULL) { return NULL; } #if defined(U_AIX) if (uprv_strcmp(name, "IBM-943") == 0) { /* Use the ASCII compatible ibm-943 */ name = "Shift-JIS"; } else if (uprv_strcmp(name, "IBM-1252") == 0) { /* Use the windows-1252 that contains the Euro */ name = "IBM-5348"; } #elif defined(U_SOLARIS) if (locale != NULL && uprv_strcmp(name, "EUC") == 0) { /* Solaris underspecifies the "EUC" name. */ if (uprv_strcmp(locale, "zh_CN") == 0) { name = "EUC-CN"; } else if (uprv_strcmp(locale, "zh_TW") == 0) { name = "EUC-TW"; } else if (uprv_strcmp(locale, "ko_KR") == 0) { name = "EUC-KR"; } } #elif defined(U_DARWIN) if (locale == NULL && *name == 0) { /* No locale was specified, and an empty name was passed in. This usually indicates that nl_langinfo didn't return valid information. Mac OS X uses UTF-8 by default (especially the locale data and console). */ name = "UTF-8"; } #endif /* return NULL when "" is passed in */ if (*name == 0) { name = NULL; } return name; } static const char* getCodepageFromPOSIXID(const char *localeName, char * buffer, int32_t buffCapacity) { char localeBuf[100]; const char *name = NULL; char *variant = NULL; if (localeName != NULL && (name = (uprv_strchr(localeName, '.'))) != NULL) { size_t localeCapacity = uprv_min(sizeof(localeBuf), (name-localeName)+1); uprv_strncpy(localeBuf, localeName, localeCapacity); localeBuf[localeCapacity-1] = 0; /* ensure NULL termination */ name = uprv_strncpy(buffer, name+1, buffCapacity); buffer[buffCapacity-1] = 0; /* ensure NULL termination */ if ((variant = (uprv_strchr(name, '@'))) != NULL) { *variant = 0; } name = remapPlatformDependentCodepage(localeBuf, name); } return name; } #endif static const char* int_getDefaultCodepage() { #if defined(OS400) uint32_t ccsid = 37; /* Default to ibm-37 */ static char codepage[64]; Qwc_JOBI0400_t jobinfo; Qus_EC_t error = { sizeof(Qus_EC_t) }; /* SPI error code */ EPT_CALL(QUSRJOBI)(&jobinfo, sizeof(jobinfo), "JOBI0400", "* ", " ", &error); if (error.Bytes_Available == 0) { if (jobinfo.Coded_Char_Set_ID != 0xFFFF) { ccsid = (uint32_t)jobinfo.Coded_Char_Set_ID; } else if (jobinfo.Default_Coded_Char_Set_Id != 0xFFFF) { ccsid = (uint32_t)jobinfo.Default_Coded_Char_Set_Id; } /* else use the default */ } sprintf(codepage,"ibm-%d", ccsid); return codepage; #elif defined(OS390) static char codepage[64]; sprintf(codepage,"%s" UCNV_SWAP_LFNL_OPTION_STRING, nl_langinfo(CODESET)); return codepage; #elif defined(XP_MAC) return "macintosh"; /* TODO: Macintosh Roman. There must be a better way. fixme! */ #elif defined(U_WINDOWS) static char codepage[64]; sprintf(codepage, "windows-%d", GetACP()); return codepage; #elif U_POSIX_LOCALE static char codesetName[100]; const char *localeName = NULL; const char *name = NULL; uprv_memset(codesetName, 0, sizeof(codesetName)); /* Use setlocale in a nice way, and then check some environment variables. Maybe the application used setlocale already. */ localeName = uprv_getPOSIXID(); name = getCodepageFromPOSIXID(localeName, codesetName, sizeof(codesetName)); if (name) { /* if we can find the codeset name from setlocale, return that. */ return name; } /* else "C" was probably returned. That's underspecified. */ #if U_HAVE_NL_LANGINFO_CODESET if (*codesetName) { uprv_memset(codesetName, 0, sizeof(codesetName)); } /* When available, check nl_langinfo because it usually gives more useful names. It depends on LC_CTYPE and not LANG or LC_ALL. nl_langinfo may use the same buffer as setlocale. */ { const char *codeset = nl_langinfo(U_NL_LANGINFO_CODESET); codeset = remapPlatformDependentCodepage(NULL, codeset); if (codeset != NULL) { uprv_strncpy(codesetName, codeset, sizeof(codesetName)); codesetName[sizeof(codesetName)-1] = 0; return codesetName; } } #endif if (*codesetName == 0) { /* Everything failed. Return US ASCII (ISO 646). */ uprv_strcpy(codesetName, "US-ASCII"); } return codesetName; #else return "US-ASCII"; #endif } U_CAPI const char* U_EXPORT2 uprv_getDefaultCodepage() { static char const *name = NULL; umtx_lock(NULL); if (name == NULL) { name = int_getDefaultCodepage(); } umtx_unlock(NULL); return name; } /* end of platform-specific implementation -------------- */ /* version handling --------------------------------------------------------- */ U_CAPI void U_EXPORT2 u_versionFromString(UVersionInfo versionArray, const char *versionString) { char *end; uint16_t part=0; if(versionArray==NULL) { return; } if(versionString!=NULL) { for(;;) { versionArray[part]=(uint8_t)uprv_strtoul(versionString, &end, 10); if(end==versionString || ++part==U_MAX_VERSION_LENGTH || *end!=U_VERSION_DELIMITER) { break; } versionString=end+1; } } while(part<U_MAX_VERSION_LENGTH) { versionArray[part++]=0; } } U_CAPI void U_EXPORT2 u_versionToString(UVersionInfo versionArray, char *versionString) { uint16_t count, part; uint8_t field; if(versionString==NULL) { return; } if(versionArray==NULL) { versionString[0]=0; return; } /* count how many fields need to be written */ for(count=4; count>0 && versionArray[count-1]==0; --count) { } if(count <= 1) { count = 2; } /* write the first part */ /* write the decimal field value */ field=versionArray[0]; if(field>=100) { *versionString++=(char)('0'+field/100); field%=100; } if(field>=10) { *versionString++=(char)('0'+field/10); field%=10; } *versionString++=(char)('0'+field); /* write the following parts */ for(part=1; part<count; ++part) { /* write a dot first */ *versionString++=U_VERSION_DELIMITER; /* write the decimal field value */ field=versionArray[part]; if(field>=100) { *versionString++=(char)('0'+field/100); field%=100; } if(field>=10) { *versionString++=(char)('0'+field/10); field%=10; } *versionString++=(char)('0'+field); } /* NUL-terminate */ *versionString=0; } U_CAPI void U_EXPORT2 u_getVersion(UVersionInfo versionArray) { u_versionFromString(versionArray, U_ICU_VERSION); } /* * Hey, Emacs, please set the following: * * Local Variables: * indent-tabs-mode: nil * End: * */