scuffed-code/icu4c/source/common/putil.c

2717 lines
84 KiB
C

/*
******************************************************************************
*
* Copyright (C) 1997-2003, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
*
* FILE NAME : putil.c (previously putil.cpp and ptypes.cpp)
*
* Date Name Description
* 04/14/97 aliu Creation.
* 04/24/97 aliu Added getDefaultDataDirectory() and
* getDefaultLocaleID().
* 04/28/97 aliu Rewritten to assume Unix and apply general methods
* for assumed case. Non-UNIX platforms must be
* special-cased. Rewrote numeric methods dealing
* with NaN and Infinity to be platform independent
* over all IEEE 754 platforms.
* 05/13/97 aliu Restored sign of timezone
* (semantics are hours West of GMT)
* 06/16/98 erm Added IEEE_754 stuff, cleaned up isInfinite, isNan,
* nextDouble..
* 07/22/98 stephen Added remainder, max, min, trunc
* 08/13/98 stephen Added isNegativeInfinity, isPositiveInfinity
* 08/24/98 stephen Added longBitsFromDouble
* 09/08/98 stephen Minor changes for Mac Port
* 03/02/99 stephen Removed openFile(). Added AS400 support.
* Fixed EBCDIC tables
* 04/15/99 stephen Converted to C.
* 06/28/99 stephen Removed mutex locking in u_isBigEndian().
* 08/04/99 jeffrey R. Added OS/2 changes
* 11/15/99 helena Integrated S/390 IEEE support.
* 04/26/01 Barry N. OS/400 support for uprv_getDefaultLocaleID
* 08/15/01 Steven H. OS/400 support for uprv_getDefaultCodepage
******************************************************************************
*/
#ifdef _AIX
# include<sys/types.h>
#endif
#ifdef __QNXNTO__
#include <sys/neutrino.h>
#endif
#ifndef PTX
/* Define _XOPEN_SOURCE for Solaris and friends. */
/* NetBSD needs it to be >= 4 */
#ifndef _XOPEN_SOURCE
#define _XOPEN_SOURCE 4
#endif
/* Define __USE_POSIX and __USE_XOPEN for Linux and glibc. */
#ifndef __USE_POSIX
#define __USE_POSIX
#endif
#ifndef __USE_XOPEN
#define __USE_XOPEN
#endif
#endif /* PTX */
/* include ICU headers */
#include "unicode/utypes.h"
#include "unicode/putil.h"
#include "unicode/ustring.h"
#include "uassert.h"
#include "umutex.h"
#include "cmemory.h"
#include "cstring.h"
#include "locmap.h"
#include "ucln_cmn.h"
#include "udataswp.h"
/* Include standard headers. */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <locale.h>
#include <time.h>
#include <float.h>
/* include system headers */
#ifdef WIN32
# define WIN32_LEAN_AND_MEAN
# define NOGDI
# define NOUSER
# define NOSERVICE
# define NOIME
# define NOMCX
# include <windows.h>
#elif defined(OS2)
# define INCL_DOSMISC
# define INCL_DOSERRORS
# define INCL_DOSMODULEMGR
# include <os2.h>
#elif defined(OS400)
# include <float.h>
# include <qusec.h> /* error code structure */
# include <qusrjobi.h>
# include <qliept.h> /* EPT_CALL macro - this include must be after all other "QSYSINCs" */
#elif defined(XP_MAC)
# include <Files.h>
# include <IntlResources.h>
# include <Script.h>
# include <Folders.h>
# include <MacTypes.h>
# include <TextUtils.h>
#elif defined(OS390)
#include "unicode/ucnv.h" /* Needed for UCNV_SWAP_LFNL_OPTION_STRING */
#elif defined(AIX)
/*
# include <sys/ldr.h>
*/
#elif defined(U_SOLARIS) || defined(U_LINUX)
/*
# include <dlfcn.h>
# include <link.h>
*/
#elif defined(HPUX)
/*
# include <dl.h>
*/
#elif defined(U_DARWIN)
#include <sys/file.h>
#include <sys/param.h>
#endif
/* Define the extension for data files, again... */
#define DATA_TYPE "dat"
/* Leave this copyright notice here! */
static const char copyright[] = U_COPYRIGHT_STRING;
/* floating point implementations ------------------------------------------- */
/* We return QNAN rather than SNAN*/
#define SIGN 0x80000000U
#if defined(__GNUC__)
/*
This is an optimization for when u_topNBytesOfDouble
and u_bottomNBytesOfDouble can't be properly optimized by the compiler.
*/
#define USE_64BIT_DOUBLE_OPTIMIZATION 1
#else
#define USE_64BIT_DOUBLE_OPTIMIZATION 0
#endif
#if USE_64BIT_DOUBLE_OPTIMIZATION
/* gcc 3.2 has an optimization bug */
static const int64_t gNan64 = 0x7FF8000000000000LL;
static const int64_t gInf64 = 0x7FF0000000000000LL;
static const double * const fgNan = (const double *)(&gNan64);
static const double * const fgInf = (const double *)(&gInf64);
#else
#if IEEE_754
#define NAN_TOP ((int16_t)0x7FF8)
#define INF_TOP ((int16_t)0x7FF0)
#elif defined(OS390)
#define NAN_TOP ((int16_t)0x7F08)
#define INF_TOP ((int16_t)0x3F00)
#endif
/* statics */
static UBool fgNaNInitialized = FALSE;
static UBool fgInfInitialized = FALSE;
static double gNan;
static double gInf;
static double * const fgNan = &gNan;
static double * const fgInf = &gInf;
#endif
/*---------------------------------------------------------------------------
Platform utilities
Our general strategy is to assume we're on a POSIX platform. Platforms which
are non-POSIX must declare themselves so. The default POSIX implementation
will sometimes work for non-POSIX platforms as well (e.g., the NaN-related
functions).
---------------------------------------------------------------------------*/
#if defined(_WIN32) || defined(XP_MAC) || defined(OS400) || defined(OS2)
# undef U_POSIX_LOCALE
#else
# define U_POSIX_LOCALE 1
#endif
/*
* Only include langinfo.h if we have a way to get the codeset. If we later
* depend on more feature, we can test on U_HAVE_NL_LANGINFO.
*
*/
#if U_HAVE_NL_LANGINFO_CODESET
#include <langinfo.h>
#endif
/* Utilities to get the bits from a double */
static char*
u_topNBytesOfDouble(double* d, int n)
{
#if U_IS_BIG_ENDIAN
return (char*)d;
#else
return (char*)(d + 1) - n;
#endif
}
static char*
u_bottomNBytesOfDouble(double* d, int n)
{
#if U_IS_BIG_ENDIAN
return (char*)(d + 1) - n;
#else
return (char*)d;
#endif
}
/*---------------------------------------------------------------------------
Universal Implementations
These are designed to work on all platforms. Try these, and if they don't
work on your platform, then special case your platform with new
implementations.
---------------------------------------------------------------------------*/
/* Get UTC (GMT) time measured in seconds since 0:00 on 1/1/70.*/
U_CAPI int32_t U_EXPORT2
uprv_getUTCtime()
{
#ifdef XP_MAC
time_t t, t1, t2;
struct tm tmrec;
memset( &tmrec, 0, sizeof(tmrec) );
tmrec.tm_year = 70;
tmrec.tm_mon = 0;
tmrec.tm_mday = 1;
t1 = mktime(&tmrec); /* seconds of 1/1/1970*/
time(&t);
memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
t2 = mktime(&tmrec); /* seconds of current GMT*/
return t2 - t1; /* GMT (or UTC) in seconds since 1970*/
#else
time_t epochtime;
time(&epochtime);
return epochtime;
#endif
}
/*-----------------------------------------------------------------------------
IEEE 754
These methods detect and return NaN and infinity values for doubles
conforming to IEEE 754. Platforms which support this standard include X86,
Mac 680x0, Mac PowerPC, AIX RS/6000, and most others.
If this doesn't work on your platform, you have non-IEEE floating-point, and
will need to code your own versions. A naive implementation is to return 0.0
for getNaN and getInfinity, and false for isNaN and isInfinite.
---------------------------------------------------------------------------*/
U_CAPI UBool U_EXPORT2
uprv_isNaN(double number)
{
#if IEEE_754
#if USE_64BIT_DOUBLE_OPTIMIZATION
/* gcc 3.2 has an optimization bug */
/* Infinity is 0x7FF0000000000000U. Anything greater than that is a NaN */
return (UBool)(((*((int64_t *)&number)) & INT64_MAX) > gInf64);
#else
/* This should work in theory, but it doesn't, so we resort to the more*/
/* complicated method below.*/
/* return number != number;*/
/* You can't return number == getNaN() because, by definition, NaN != x for*/
/* all x, including NaN (that is, NaN != NaN). So instead, we compare*/
/* against the known bit pattern. We must be careful of endianism here.*/
/* The pattern we are looking for id:*/
/* 7FFy yyyy yyyy yyyy (some y non-zero)*/
/* There are two different kinds of NaN, but we ignore the distinction*/
/* here. Note that the y value must be non-zero; if it is zero, then we*/
/* have infinity.*/
uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
sizeof(uint32_t));
uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number,
sizeof(uint32_t));
return (UBool)(((highBits & 0x7FF00000L) == 0x7FF00000L) &&
(((highBits & 0x000FFFFFL) != 0) || (lowBits != 0)));
#endif
#elif defined(OS390)
uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
sizeof(uint32_t));
uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number,
sizeof(uint32_t));
return ((highBits & 0x7F080000L) == 0x7F080000L) &&
(lowBits == 0x00000000L);
#else
/* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
/* you'll need to replace this default implementation with what's correct*/
/* for your platform.*/
return number != number;
#endif
}
U_CAPI UBool U_EXPORT2
uprv_isInfinite(double number)
{
#if IEEE_754
#if USE_64BIT_DOUBLE_OPTIMIZATION
/* gcc 3.2 has an optimization bug */
return (UBool)(((*((int64_t *)&number)) & INT64_MAX) == gInf64);
#else
/* We know the top bit is the sign bit, so we mask that off in a copy of */
/* the number and compare against infinity. [LIU]*/
/* The following approach doesn't work for some reason, so we go ahead and */
/* scrutinize the pattern itself. */
/* double a = number; */
/* *(int8_t*)u_topNBytesOfDouble(&a, 1) &= 0x7F;*/
/* return a == uprv_getInfinity();*/
/* Instead, We want to see either:*/
/* 7FF0 0000 0000 0000*/
/* FFF0 0000 0000 0000*/
uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
sizeof(uint32_t));
uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number,
sizeof(uint32_t));
return (UBool)(((highBits & ~SIGN) == 0x7FF00000U) &&
(lowBits == 0x00000000U));
#endif
#elif defined(OS390)
uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
sizeof(uint32_t));
uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number,
sizeof(uint32_t));
return ((highBits & ~SIGN) == 0x70FF0000L) && (lowBits == 0x00000000L);
#else
/* If your platform doesn't support IEEE 754 but *does* have an infinity*/
/* value, you'll need to replace this default implementation with what's*/
/* correct for your platform.*/
return number == (2.0 * number);
#endif
}
U_CAPI UBool U_EXPORT2
uprv_isPositiveInfinity(double number)
{
#if IEEE_754 || defined(OS390)
return (UBool)(number > 0 && uprv_isInfinite(number));
#else
return uprv_isInfinite(number);
#endif
}
U_CAPI UBool U_EXPORT2
uprv_isNegativeInfinity(double number)
{
#if IEEE_754 || defined(OS390)
return (UBool)(number < 0 && uprv_isInfinite(number));
#else
uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
sizeof(uint32_t));
return((highBits & SIGN) && uprv_isInfinite(number));
#endif
}
U_CAPI double U_EXPORT2
uprv_getNaN()
{
#if IEEE_754 || defined(OS390)
#if !USE_64BIT_DOUBLE_OPTIMIZATION
if (!fgNaNInitialized) {
/* This variable is always initialized with the same value,
so a mutex isn't needed. */
int i;
int8_t* p = (int8_t*)fgNan;
for(i = 0; i < sizeof(double); ++i)
*p++ = 0;
*(int16_t*)u_topNBytesOfDouble(fgNan, sizeof(NAN_TOP)) = NAN_TOP;
fgNaNInitialized = TRUE;
}
#endif
return *fgNan;
#else
/* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
/* you'll need to replace this default implementation with what's correct*/
/* for your platform.*/
return 0.0;
#endif
}
U_CAPI double U_EXPORT2
uprv_getInfinity()
{
#if IEEE_754 || defined(OS390)
#if !USE_64BIT_DOUBLE_OPTIMIZATION
if (!fgInfInitialized)
{
/* This variable is always initialized with the same value,
so a mutex isn't needed. */
int i;
int8_t* p = (int8_t*)fgInf;
for(i = 0; i < sizeof(double); ++i)
*p++ = 0;
*(int16_t*)u_topNBytesOfDouble(fgInf, sizeof(INF_TOP)) = INF_TOP;
fgInfInitialized = TRUE;
}
#endif
return *fgInf;
#else
/* If your platform doesn't support IEEE 754 but *does* have an infinity*/
/* value, you'll need to replace this default implementation with what's*/
/* correct for your platform.*/
return 0.0;
#endif
}
U_CAPI double U_EXPORT2
uprv_floor(double x)
{
return floor(x);
}
U_CAPI double U_EXPORT2
uprv_ceil(double x)
{
return ceil(x);
}
U_CAPI double U_EXPORT2
uprv_round(double x)
{
return uprv_floor(x + 0.5);
}
U_CAPI double U_EXPORT2
uprv_fabs(double x)
{
return fabs(x);
}
U_CAPI double U_EXPORT2
uprv_modf(double x, double* y)
{
return modf(x, y);
}
U_CAPI double U_EXPORT2
uprv_fmod(double x, double y)
{
return fmod(x, y);
}
U_CAPI double U_EXPORT2
uprv_pow(double x, double y)
{
/* This is declared as "double pow(double x, double y)" */
return pow(x, y);
}
U_CAPI double U_EXPORT2
uprv_pow10(int32_t x)
{
return pow(10.0, (double)x);
}
U_CAPI double U_EXPORT2
uprv_fmax(double x, double y)
{
#if IEEE_754
int32_t lowBits;
/* first handle NaN*/
if(uprv_isNaN(x) || uprv_isNaN(y))
return uprv_getNaN();
/* check for -0 and 0*/
lowBits = *(uint32_t*) u_bottomNBytesOfDouble(&x, sizeof(uint32_t));
if(x == 0.0 && y == 0.0 && (lowBits & SIGN))
return y;
#endif
/* this should work for all flt point w/o NaN and Infpecial cases */
return (x > y ? x : y);
}
U_CAPI int32_t U_EXPORT2
uprv_max(int32_t x, int32_t y)
{
return (x > y ? x : y);
}
U_CAPI double U_EXPORT2
uprv_fmin(double x, double y)
{
#if IEEE_754
int32_t lowBits;
/* first handle NaN*/
if(uprv_isNaN(x) || uprv_isNaN(y))
return uprv_getNaN();
/* check for -0 and 0*/
lowBits = *(uint32_t*) u_bottomNBytesOfDouble(&y, sizeof(uint32_t));
if(x == 0.0 && y == 0.0 && (lowBits & SIGN))
return y;
#endif
/* this should work for all flt point w/o NaN and Inf special cases */
return (x > y ? y : x);
}
U_CAPI int32_t U_EXPORT2
uprv_min(int32_t x, int32_t y)
{
return (x > y ? y : x);
}
/**
* Truncates the given double.
* trunc(3.3) = 3.0, trunc (-3.3) = -3.0
* This is different than calling floor() or ceil():
* floor(3.3) = 3, floor(-3.3) = -4
* ceil(3.3) = 4, ceil(-3.3) = -3
*/
U_CAPI double U_EXPORT2
uprv_trunc(double d)
{
#if IEEE_754
int32_t lowBits;
/* handle error cases*/
if(uprv_isNaN(d))
return uprv_getNaN();
if(uprv_isInfinite(d))
return uprv_getInfinity();
lowBits = *(uint32_t*) u_bottomNBytesOfDouble(&d, sizeof(uint32_t));
if( (d == 0.0 && (lowBits & SIGN)) || d < 0)
return ceil(d);
else
return floor(d);
#else
return d >= 0 ? floor(d) : ceil(d);
#endif
}
/**
* Return the largest positive number that can be represented by an integer
* type of arbitrary bit length.
*/
U_CAPI double U_EXPORT2
uprv_maxMantissa(void)
{
return pow(2.0, DBL_MANT_DIG + 1.0) - 1.0;
}
/**
* Return the floor of the log base 10 of a given double.
* This method compensates for inaccuracies which arise naturally when
* computing logs, and always give the correct value. The parameter
* must be positive and finite.
* (Thanks to Alan Liu for supplying this function.)
*/
U_CAPI int16_t U_EXPORT2
uprv_log10(double d)
{
#ifdef OS400
/* We don't use the normal implementation because you can't underflow */
/* a double otherwise an underflow exception occurs */
return log10(d);
#else
/* The reason this routine is needed is that simply taking the*/
/* log and dividing by log10 yields a result which may be off*/
/* by 1 due to rounding errors. For example, the naive log10*/
/* of 1.0e300 taken this way is 299, rather than 300.*/
double alog10 = log(d) / log(10.0);
int16_t ailog10 = (int16_t) floor(alog10);
/* Positive logs could be too small, e.g. 0.99 instead of 1.0*/
if (alog10 > 0 && d >= pow(10.0, (double)(ailog10 + 1)))
++ailog10;
/* Negative logs could be too big, e.g. -0.99 instead of -1.0*/
else if (alog10 < 0 && d < pow(10.0, (double)(ailog10)))
--ailog10;
return ailog10;
#endif
}
U_CAPI double U_EXPORT2
uprv_log(double d)
{
return log(d);
}
U_CAPI int32_t U_EXPORT2
uprv_digitsAfterDecimal(double x)
{
char buffer[20];
int32_t numDigits, bytesWritten;
char *p = buffer;
int32_t ptPos, exponent;
/* cheat and use the string-format routine to get a string representation*/
/* (it handles mathematical inaccuracy better than we can), then find out */
/* many characters are to the right of the decimal point */
bytesWritten = sprintf(buffer, "%+.9g", x);
while (isdigit(*(++p))) {
}
ptPos = (int32_t)(p - buffer);
numDigits = (int32_t)(bytesWritten - ptPos - 1);
/* if the number's string representation is in scientific notation, find */
/* the exponent and take it into account*/
exponent = 0;
p = uprv_strchr(buffer, 'e');
if (p != 0) {
int16_t expPos = (int16_t)(p - buffer);
numDigits -= bytesWritten - expPos;
exponent = (int32_t)(atol(p + 1));
}
/* the string representation may still have spurious decimal digits in it, */
/* so we cut off at the ninth digit to the right of the decimal, and have */
/* to search backward from there to the first non-zero digit*/
if (numDigits > 9) {
numDigits = 9;
while (numDigits > 0 && buffer[ptPos + numDigits] == '0')
--numDigits;
}
numDigits -= exponent;
if (numDigits < 0) {
return 0;
}
return numDigits;
}
/*---------------------------------------------------------------------------
Platform-specific Implementations
Try these, and if they don't work on your platform, then special case your
platform with new implementations.
---------------------------------------------------------------------------*/
/* Win32 time zone detection ------------------------------------------------ */
#ifdef WIN32
/*
This code attempts to detect the Windows time zone, as set in the
Windows Date and Time control panel. It attempts to work on
multiple flavors of Windows (9x, Me, NT, 2000, XP) and on localized
installs. It works by directly interrogating the registry and
comparing the data there with the data returned by the
GetTimeZoneInformation API, along with some other strategies. The
registry contains time zone data under one of two keys (depending on
the flavor of Windows):
HKLM\SOFTWARE\Microsoft\Windows\CurrentVersion\Time Zones\
HKLM\SOFTWARE\Microsoft\Windows NT\CurrentVersion\Time Zones\
Under this key are several subkeys, one for each time zone. These
subkeys are named "Pacific" on Win9x/Me and "Pacific Standard Time"
on WinNT/2k/XP. There are some other wrinkles; see the code for
details. The subkey name is NOT LOCALIZED, allowing us to support
localized installs.
Under the subkey are data values. We care about:
Std Standard time display name, localized
TZI Binary block of data
The TZI data is of particular interest. It contains the offset, two
more offsets for standard and daylight time, and the start and end
rules. This is the same data returned by the GetTimeZoneInformation
API. The API may modify the data on the way out, so we have to be
careful, but essentially we do a binary comparison against the TZI
blocks of various registry keys. When we find a match, we know what
time zone Windows is set to. Since the registry key is not
localized, we can then translate the key through a simple table
lookup into the corresponding ICU time zone.
This strategy doesn't always work because there are zones which
share an offset and rules, so more than one TZI block will match.
For example, both Tokyo and Seoul are at GMT+9 with no DST rules;
their TZI blocks are identical. For these cases, we fall back to a
name lookup. We attempt to match the display name as stored in the
registry for the current zone to the display name stored in the
registry for various Windows zones. By comparing the registry data
directly we avoid conversion complications.
Author: Alan Liu
Since: ICU 2.6
Based on original code by Carl Brown <cbrown@xnetinc.com>
*/
static LONG openTZRegKey(HKEY* hkey, const char* winid, int winType);
/**
* Layout of the binary registry data under the "TZI" key.
*/
typedef struct {
LONG Bias;
LONG StandardBias;
LONG DaylightBias; /* Tweaked by GetTimeZoneInformation */
SYSTEMTIME StandardDate;
SYSTEMTIME DaylightDate;
} TZI;
typedef struct {
const char* icuid;
const char* winid;
} WindowsICUMap;
/**
* Mapping between Windows zone IDs and ICU zone IDs. This list has
* been mechanically checked; all zone offsets match (most important)
* and city names match the display city names (where possible). The
* presence or absence of DST differs in some cases, but this is
* acceptable as long as the zone is semantically the same (which has
* been manually checked).
*
* Windows 9x/Me zone IDs are listed as "Pacific" rather than "Pacific
* Standard Time", which is seen in NT/2k/XP. This is fixed-up at
* runtime as needed. The one exception is "Mexico Standard Time 2",
* which is not present on Windows 9x/Me.
*
* Zones that are not unique under Offset+Rules should be grouped
* together for efficiency (see code below). In addition, rules MUST
* be grouped so that all zones of a single offset are together.
*
* Comments list S(tandard) or D(aylight), as declared by Windows,
* followed by the display name (data from Windows XP).
*
* NOTE: Etc/GMT+12 is CORRECT for offset GMT-12:00. Consult
* documentation elsewhere for an explanation.
*/
static const WindowsICUMap ZONE_MAP[] = {
"Etc/GMT+12", "Dateline", /* S (GMT-12:00) International Date Line West */
"Pacific/Apia", "Samoa", /* S (GMT-11:00) Midway Island, Samoa */
"Pacific/Honolulu", "Hawaiian", /* S (GMT-10:00) Hawaii */
"America/Anchorage", "Alaskan", /* D (GMT-09:00) Alaska */
"America/Los_Angeles", "Pacific", /* D (GMT-08:00) Pacific Time (US & Canada); Tijuana */
"America/Phoenix", "US Mountain", /* S (GMT-07:00) Arizona */
"America/Denver", "Mountain", /* D (GMT-07:00) Mountain Time (US & Canada) */
"America/Chihuahua", "Mexico Standard Time 2", /* D (GMT-07:00) Chihuahua, La Paz, Mazatlan */
"America/Managua", "Central America", /* S (GMT-06:00) Central America */
"America/Regina", "Canada Central", /* S (GMT-06:00) Saskatchewan */
"America/Mexico_City", "Mexico", /* D (GMT-06:00) Guadalajara, Mexico City, Monterrey */
"America/Chicago", "Central", /* D (GMT-06:00) Central Time (US & Canada) */
"America/Indianapolis", "US Eastern", /* S (GMT-05:00) Indiana (East) */
"America/Bogota", "SA Pacific", /* S (GMT-05:00) Bogota, Lima, Quito */
"America/New_York", "Eastern", /* D (GMT-05:00) Eastern Time (US & Canada) */
"America/Caracas", "SA Western", /* S (GMT-04:00) Caracas, La Paz */
"America/Santiago", "Pacific SA", /* D (GMT-04:00) Santiago */
"America/Halifax", "Atlantic", /* D (GMT-04:00) Atlantic Time (Canada) */
"America/St_Johns", "Newfoundland", /* D (GMT-03:30) Newfoundland */
"America/Buenos_Aires", "SA Eastern", /* S (GMT-03:00) Buenos Aires, Georgetown */
"America/Godthab", "Greenland", /* D (GMT-03:00) Greenland */
"America/Sao_Paulo", "E. South America", /* D (GMT-03:00) Brasilia */
"America/Noronha", "Mid-Atlantic", /* D (GMT-02:00) Mid-Atlantic */
"Atlantic/Cape_Verde", "Cape Verde", /* S (GMT-01:00) Cape Verde Is. */
"Atlantic/Azores", "Azores", /* D (GMT-01:00) Azores */
"Africa/Casablanca", "Greenwich", /* S (GMT) Casablanca, Monrovia */
"Europe/London", "GMT", /* D (GMT) Greenwich Mean Time : Dublin, Edinburgh, Lisbon, London */
"Africa/Lagos", "W. Central Africa", /* S (GMT+01:00) West Central Africa */
"Europe/Berlin", "W. Europe", /* D (GMT+01:00) Amsterdam, Berlin, Bern, Rome, Stockholm, Vienna */
"Europe/Paris", "Romance", /* D (GMT+01:00) Brussels, Copenhagen, Madrid, Paris */
"Europe/Sarajevo", "Central European", /* D (GMT+01:00) Sarajevo, Skopje, Warsaw, Zagreb */
"Europe/Belgrade", "Central Europe", /* D (GMT+01:00) Belgrade, Bratislava, Budapest, Ljubljana, Prague */
"Africa/Johannesburg", "South Africa", /* S (GMT+02:00) Harare, Pretoria */
"Asia/Jerusalem", "Israel", /* S (GMT+02:00) Jerusalem */
"Europe/Istanbul", "GTB", /* D (GMT+02:00) Athens, Istanbul, Minsk */
"Europe/Helsinki", "FLE", /* D (GMT+02:00) Helsinki, Kyiv, Riga, Sofia, Tallinn, Vilnius */
"Africa/Cairo", "Egypt", /* D (GMT+02:00) Cairo */
"Europe/Bucharest", "E. Europe", /* D (GMT+02:00) Bucharest */
"Africa/Nairobi", "E. Africa", /* S (GMT+03:00) Nairobi */
"Asia/Riyadh", "Arab", /* S (GMT+03:00) Kuwait, Riyadh */
"Europe/Moscow", "Russian", /* D (GMT+03:00) Moscow, St. Petersburg, Volgograd */
"Asia/Baghdad", "Arabic", /* D (GMT+03:00) Baghdad */
"Asia/Tehran", "Iran", /* D (GMT+03:30) Tehran */
"Asia/Muscat", "Arabian", /* S (GMT+04:00) Abu Dhabi, Muscat */
"Asia/Tbilisi", "Caucasus", /* D (GMT+04:00) Baku, Tbilisi, Yerevan */
"Asia/Kabul", "Afghanistan", /* S (GMT+04:30) Kabul */
"Asia/Karachi", "West Asia", /* S (GMT+05:00) Islamabad, Karachi, Tashkent */
"Asia/Yekaterinburg", "Ekaterinburg", /* D (GMT+05:00) Ekaterinburg */
"Asia/Calcutta", "India", /* S (GMT+05:30) Chennai, Kolkata, Mumbai, New Delhi */
"Asia/Katmandu", "Nepal", /* S (GMT+05:45) Kathmandu */
"Asia/Colombo", "Sri Lanka", /* S (GMT+06:00) Sri Jayawardenepura */
"Asia/Dhaka", "Central Asia", /* S (GMT+06:00) Astana, Dhaka */
"Asia/Novosibirsk", "N. Central Asia", /* D (GMT+06:00) Almaty, Novosibirsk */
"Asia/Rangoon", "Myanmar", /* S (GMT+06:30) Rangoon */
"Asia/Bangkok", "SE Asia", /* S (GMT+07:00) Bangkok, Hanoi, Jakarta */
"Asia/Krasnoyarsk", "North Asia", /* D (GMT+07:00) Krasnoyarsk */
"Australia/Perth", "W. Australia", /* S (GMT+08:00) Perth */
"Asia/Taipei", "Taipei", /* S (GMT+08:00) Taipei */
"Asia/Singapore", "Singapore", /* S (GMT+08:00) Kuala Lumpur, Singapore */
"Asia/Hong_Kong", "China", /* S (GMT+08:00) Beijing, Chongqing, Hong Kong, Urumqi */
"Asia/Irkutsk", "North Asia East", /* D (GMT+08:00) Irkutsk, Ulaan Bataar */
"Asia/Tokyo", "Tokyo", /* S (GMT+09:00) Osaka, Sapporo, Tokyo */
"Asia/Seoul", "Korea", /* S (GMT+09:00) Seoul */
"Asia/Yakutsk", "Yakutsk", /* D (GMT+09:00) Yakutsk */
"Australia/Darwin", "AUS Central", /* S (GMT+09:30) Darwin */
"Australia/Adelaide", "Cen. Australia", /* D (GMT+09:30) Adelaide */
"Pacific/Guam", "West Pacific", /* S (GMT+10:00) Guam, Port Moresby */
"Australia/Brisbane", "E. Australia", /* S (GMT+10:00) Brisbane */
"Asia/Vladivostok", "Vladivostok", /* D (GMT+10:00) Vladivostok */
"Australia/Hobart", "Tasmania", /* D (GMT+10:00) Hobart */
"Australia/Sydney", "AUS Eastern", /* D (GMT+10:00) Canberra, Melbourne, Sydney */
"Asia/Magadan", "Central Pacific", /* S (GMT+11:00) Magadan, Solomon Is., New Caledonia */
"Pacific/Fiji", "Fiji", /* S (GMT+12:00) Fiji, Kamchatka, Marshall Is. */
"Pacific/Auckland", "New Zealand", /* D (GMT+12:00) Auckland, Wellington */
"Pacific/Tongatapu", "Tonga", /* S (GMT+13:00) Nuku'alofa */
NULL, NULL
};
typedef struct {
const char* winid;
const char* altwinid;
} WindowsZoneRemap;
/**
* If a lookup fails, we attempt to remap certain Windows ids to
* alternate Windows ids. If the alternate listed here begins with
* '-', we use it as is (without the '-'). If it begins with '+', we
* append a " Standard Time" if appropriate.
*/
static const WindowsZoneRemap ZONE_REMAP[] = {
"Central European", "-Warsaw",
"Central Europe", "-Prague Bratislava",
"China", "-Beijing",
"Greenwich", "+GMT",
"GTB", "+GFT",
"Arab", "+Saudi Arabia",
"SE Asia", "+Bangkok",
"AUS Eastern", "+Sydney",
NULL, NULL,
};
/**
* Various registry keys and key fragments.
*/
static const char CURRENT_ZONE_REGKEY[] = "SYSTEM\\CurrentControlSet\\Control\\TimeZoneInformation\\";
static const char STANDARD_NAME_REGKEY[] = "StandardName";
static const char STANDARD_TIME_REGKEY[] = " Standard Time";
static const char TZI_REGKEY[] = "TZI";
static const char STD_REGKEY[] = "Std";
/**
* HKLM subkeys used to probe for the flavor of Windows. Note that we
* specifically check for the "GMT" zone subkey; this is present on
* NT, but on XP has become "GMT Standard Time". We need to
* discriminate between these cases.
*/
static const char* const WIN_TYPE_PROBE_REGKEY[] = {
/* WIN_9X_ME_TYPE */
"SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\Time Zones",
/* WIN_NT_TYPE */
"SOFTWARE\\Microsoft\\Windows NT\\CurrentVersion\\Time Zones\\GMT"
/* otherwise: WIN_2K_XP_TYPE */
};
/**
* The time zone root subkeys (under HKLM) for different flavors of
* Windows.
*/
static const char* const TZ_REGKEY[] = {
/* WIN_9X_ME_TYPE */
"SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\Time Zones\\",
/* WIN_NT_TYPE | WIN_2K_XP_TYPE */
"SOFTWARE\\Microsoft\\Windows NT\\CurrentVersion\\Time Zones\\"
};
/**
* Flavor of Windows, from our perspective. Not a real OS version,
* but rather the flavor of the layout of the time zone information in
* the registry.
*/
enum {
WIN_9X_ME_TYPE = 0,
WIN_NT_TYPE = 1,
WIN_2K_XP_TYPE = 2
};
/**
* Main Windows time zone detection function. Returns the Windows
* time zone, translated to an ICU time zone, or NULL upon failure.
*/
static const char* detectWindowsTimeZone() {
int winType;
LONG result;
HKEY hkey;
TZI tziKey;
TZI tziReg;
DWORD cbData = sizeof(TZI);
TIME_ZONE_INFORMATION apiTZI;
char stdName[32];
DWORD stdNameSize;
char stdRegName[64];
DWORD stdRegNameSize;
int firstMatch, lastMatch;
int j;
/* Detect the version of windows by trying to open a sequence of
probe keys. We don't use the OS version API because what we
really want to know is how the registry is laid out.
Specifically, is it 9x/Me or not, and is it "GMT" or "GMT
Standard Time". */
for (winType=0; winType<2; ++winType) {
result = RegOpenKeyEx(HKEY_LOCAL_MACHINE,
WIN_TYPE_PROBE_REGKEY[winType],
0,
KEY_QUERY_VALUE,
&hkey);
RegCloseKey(hkey);
if (result == ERROR_SUCCESS) {
break;
}
}
/* Obtain TIME_ZONE_INFORMATION from the API, and then convert it
to TZI. We could also interrogate the registry directly; we do
this below if needed. */
memset(&apiTZI, 0, sizeof(apiTZI));
GetTimeZoneInformation(&apiTZI);
tziKey.Bias = apiTZI.Bias;
memcpy((char *)&tziKey.StandardDate, (char*)&apiTZI.StandardDate,
sizeof(apiTZI.StandardDate));
memcpy((char *)&tziKey.DaylightDate, (char*)&apiTZI.DaylightDate,
sizeof(apiTZI.DaylightDate));
/* For each zone that can be identified by Offset+Rules, see if we
have a match. Continue scanning after finding a match,
recording the index of the first and the last match. We have
to do this because some zones are not unique under
Offset+Rules. */
firstMatch = lastMatch = -1;
for (j=0; ZONE_MAP[j].icuid; j++) {
result = openTZRegKey(&hkey, ZONE_MAP[j].winid, winType);
if (result == ERROR_SUCCESS) {
result = RegQueryValueEx(hkey,
TZI_REGKEY,
NULL,
NULL,
(LPBYTE)&tziReg,
&cbData);
}
RegCloseKey(hkey);
if (result == ERROR_SUCCESS) {
/* Assume that offsets are grouped together, and bail out
when we've scanned everything with a matching
offset. */
if (firstMatch >= 0 && tziKey.Bias != tziReg.Bias) {
break;
}
/* Windows alters the DaylightBias in some situations.
Using the bias and the rules suffices, so overwrite
these unreliable fields. */
tziKey.StandardBias = tziReg.StandardBias;
tziKey.DaylightBias = tziReg.DaylightBias;
if (memcmp((char *)&tziKey, (char*)&tziReg,
sizeof(tziKey)) == 0) {
if (firstMatch < 0) {
firstMatch = j;
}
lastMatch = j;
}
}
}
/* This should never happen; if it does it means our table doesn't
match Windows AT ALL, perhaps because this is post-XP? */
if (firstMatch < 0) {
return NULL;
}
if (firstMatch != lastMatch) {
/* Offset+Rules lookup yielded >= 2 matches. Try to match the
localized display name. Get the name from the registry
(not the API). This avoids conversion issues. Use the
standard name, since Windows modifies the daylight name to
match the standard name if there is no DST. */
result = RegOpenKeyEx(HKEY_LOCAL_MACHINE,
CURRENT_ZONE_REGKEY,
0,
KEY_QUERY_VALUE,
&hkey);
if (result == ERROR_SUCCESS) {
stdNameSize = sizeof(stdName);
result = RegQueryValueEx(hkey,
(LPTSTR)STANDARD_NAME_REGKEY,
NULL,
NULL,
(LPBYTE)stdName,
&stdNameSize);
RegCloseKey(hkey);
/* Scan through the Windows time zone data in the registry
again (just the range of zones with matching TZIs) and
look for a standard display name match. */
for (j=firstMatch; j<=lastMatch; j++) {
result = openTZRegKey(&hkey, ZONE_MAP[j].winid, winType);
if (result == ERROR_SUCCESS) {
stdRegNameSize = sizeof(stdRegName);
result = RegQueryValueEx(hkey,
(LPTSTR)STD_REGKEY,
NULL,
NULL,
(LPBYTE)stdRegName,
&stdRegNameSize);
}
RegCloseKey(hkey);
if (result == ERROR_SUCCESS &&
stdRegNameSize == stdNameSize &&
memcmp(stdName, stdRegName, stdNameSize) == 0) {
firstMatch = j; /* record the match */
break;
}
}
} else {
RegCloseKey(hkey); /* should never get here */
}
}
return ZONE_MAP[firstMatch].icuid;
}
/**
* Auxiliary Windows time zone function. Attempts to open the given
* Windows time zone ID as a registry key. Returns ERROR_SUCCESS if
* successful. Caller must close the registry key. Handles
* variations in the resource layout in different flavors of Windows.
*
* @param hkey output parameter to receive opened registry key
* @param winid Windows zone ID, e.g., "Pacific", without the
* " Standard Time" suffix (if any). Special case "Mexico Standard Time 2"
* allowed.
* @param winType Windows flavor (WIN_9X_ME_TYPE, etc.)
* @return ERROR_SUCCESS upon success
*/
static LONG openTZRegKey(HKEY *hkey, const char* winid, int winType) {
LONG result;
char subKeyName[96];
char* name;
int i;
strcpy(subKeyName, TZ_REGKEY[(winType == WIN_9X_ME_TYPE) ? 0 : 1]);
name = &subKeyName[strlen(subKeyName)];
strcat(subKeyName, winid);
if (winType != WIN_9X_ME_TYPE) {
/* Don't modify "Mexico Standard Time 2", which does not occur
on WIN_9X_ME_TYPE. Also, if the type is WIN_NT_TYPE, then
in practice this means the GMT key is not followed by
" Standard Time", so don't append in that case. */
int isMexico2 = (winid[strlen(winid)- 1] == '2');
if (!isMexico2 &&
!(winType == WIN_NT_TYPE && strcmp(winid, "GMT") == 0)) {
strcat(subKeyName, STANDARD_TIME_REGKEY);
}
}
result = RegOpenKeyEx(HKEY_LOCAL_MACHINE,
subKeyName,
0,
KEY_QUERY_VALUE,
hkey);
if (result != ERROR_SUCCESS) {
/* If the primary lookup fails, try to remap the Windows zone
ID, according to the remapping table. */
for (i=0; ZONE_REMAP[i].winid; ++i) {
if (strcmp(winid, ZONE_REMAP[i].winid) == 0) {
strcpy(name, ZONE_REMAP[i].altwinid + 1);
if (*(ZONE_REMAP[i].altwinid) == '+' &&
winType != WIN_9X_ME_TYPE) {
strcat(subKeyName, STANDARD_TIME_REGKEY);
}
result = RegOpenKeyEx(HKEY_LOCAL_MACHINE,
subKeyName,
0,
KEY_QUERY_VALUE,
hkey);
break;
}
}
}
return result;
}
#endif /*WIN32*/
/* Generic time zone layer -------------------------------------------------- */
/* Time zone utilities */
U_CAPI void U_EXPORT2
uprv_tzset()
{
#ifdef U_TZSET
U_TZSET();
#else
/* no initialization*/
#endif
}
U_CAPI int32_t U_EXPORT2
uprv_timezone()
{
#if U_HAVE_TIMEZONE
return U_TIMEZONE;
#else
time_t t, t1, t2;
struct tm tmrec;
UBool dst_checked;
int32_t tdiff = 0;
time(&t);
memcpy( &tmrec, localtime(&t), sizeof(tmrec) );
dst_checked = (tmrec.tm_isdst != 0); /* daylight savings time is checked*/
t1 = mktime(&tmrec); /* local time in seconds*/
memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
t2 = mktime(&tmrec); /* GMT (or UTC) in seconds*/
tdiff = t2 - t1;
/* imitate NT behaviour, which returns same timezone offset to GMT for
winter and summer*/
if (dst_checked)
tdiff += 3600;
return tdiff;
#endif
}
/* Note that U_TZNAME does *not* have to be tzname, but if it is,
some platforms need to have it declared here. */
#if defined(IRIX) || defined(U_DARWIN) /* For SGI or Mac OS X. */
extern char *tzname[]; /* RS6000 and others reject char **tzname. */
#elif defined(U_CYGWIN)
extern U_IMPORT char *_tzname[2];
#endif
#if defined(U_DARWIN) /* For Mac OS X */
#define TZZONELINK "/etc/localtime"
#define TZZONEINFO "/usr/share/zoneinfo/"
static char *gTimeZoneBuffer = NULL; /* Heap allocated */
#endif
#include <stdio.h>
U_CAPI char* U_EXPORT2
uprv_tzname(int n)
{
#ifdef WIN32
char* id = (char*) detectWindowsTimeZone();
if (id != NULL) {
return id;
}
#endif
#if defined(U_DARWIN)
int ret;
char *tzenv;
tzenv = getenv("TZFILE");
if (tzenv != NULL) {
return tzenv;
}
#if 0
/* TZ is often set to "PST8PDT" or similar, so we cannot use it. Alan */
tzenv = getenv("TZ");
if (tzenv != NULL) {
return tzenv;
}
#endif
/* Caller must handle threading issues */
if (gTimeZoneBuffer == NULL) {
gTimeZoneBuffer = (char *) uprv_malloc(MAXPATHLEN + 2);
ret = readlink(TZZONELINK, gTimeZoneBuffer, MAXPATHLEN + 2);
if (0 < ret) {
gTimeZoneBuffer[ret] = '\0';
if (strncmp(gTimeZoneBuffer, TZZONEINFO, sizeof(TZZONEINFO) - 1) == 0) {
return (gTimeZoneBuffer += sizeof(TZZONEINFO) - 1);
}
}
uprv_free(gTimeZoneBuffer);
gTimeZoneBuffer = NULL;
}
#endif
#ifdef U_TZNAME
return U_TZNAME[n];
#else
return "";
#endif
}
/* Get and set the ICU data directory --------------------------------------- */
static char *gDataDirectory = NULL;
#if U_POSIX_LOCALE
static char *gCorrectedPOSIXLocale = NULL; /* Heap allocated */
#endif
UBool putil_cleanup(void)
{
if (gDataDirectory) {
uprv_free(gDataDirectory);
gDataDirectory = NULL;
}
#if U_POSIX_LOCALE
if (gCorrectedPOSIXLocale) {
uprv_free(gCorrectedPOSIXLocale);
gCorrectedPOSIXLocale = NULL;
}
#endif
return TRUE;
}
/*
* Set the data directory.
* Make a copy of the passed string, and set the global data dir to point to it.
* TODO: see bug #2849, regarding thread safety.
*/
U_CAPI void U_EXPORT2
u_setDataDirectory(const char *directory) {
char *newDataDir;
int length;
if(directory==NULL) {
directory = "";
}
length=uprv_strlen(directory);
newDataDir = (char *)uprv_malloc(length + 2);
uprv_strcpy(newDataDir, directory);
umtx_lock(NULL);
if (gDataDirectory) {
uprv_free(gDataDirectory);
}
gDataDirectory = newDataDir;
umtx_unlock(NULL);
}
U_CAPI const char * U_EXPORT2
u_getDataDirectory(void) {
const char *path = NULL;
char pathBuffer[1024];
const char *dataDir;
/* if we have the directory, then return it immediately */
umtx_lock(NULL);
dataDir = gDataDirectory;
umtx_unlock(NULL);
if(dataDir) {
return dataDir;
}
/* we need to look for it */
pathBuffer[0] = 0; /* Shuts up compiler warnings about unreferenced */
/* variables when the code using it is ifdefed out */
# if !defined(XP_MAC)
/* first try to get the environment variable */
path=getenv("ICU_DATA");
# else /* XP_MAC */
{
OSErr myErr;
short vRef;
long dir,newDir;
int16_t volNum;
Str255 xpath;
FSSpec spec;
short len;
Handle full;
xpath[0]=0;
myErr = HGetVol(xpath, &volNum, &dir);
if(myErr == noErr) {
myErr = FindFolder(volNum, kApplicationSupportFolderType, TRUE, &vRef, &dir);
newDir=-1;
if (myErr == noErr) {
myErr = DirCreate(volNum,
dir,
"\pICU",
&newDir);
if( (myErr == noErr) || (myErr == dupFNErr) ) {
spec.vRefNum = volNum;
spec.parID = dir;
uprv_memcpy(spec.name, "\pICU", 4);
myErr = FSpGetFullPath(&spec, &len, &full);
if(full != NULL)
{
HLock(full);
uprv_memcpy(pathBuffer, ((char*)(*full)), len);
pathBuffer[len] = 0;
path = pathBuffer;
DisposeHandle(full);
}
}
}
}
}
# endif
# if defined WIN32 && defined ICU_ENABLE_DEPRECATED_WIN_REGISTRY
/* next, try to read the path from the registry */
if(path==NULL || *path==0) {
HKEY key;
if(ERROR_SUCCESS==RegOpenKeyEx(HKEY_LOCAL_MACHINE, "SOFTWARE\\ICU\\Unicode\\Data", 0, KEY_QUERY_VALUE, &key)) {
DWORD type=REG_EXPAND_SZ, size=sizeof(pathBuffer);
if(ERROR_SUCCESS==RegQueryValueEx(key, "Path", NULL, &type, (unsigned char *)pathBuffer, &size) && size>1) {
if(type==REG_EXPAND_SZ) {
/* replace environment variable references by their values */
char temporaryPath[1024];
/* copy the path with variables to the temporary one */
uprv_memcpy(temporaryPath, pathBuffer, size);
/* do the replacement and store it in the pathBuffer */
size=ExpandEnvironmentStrings(temporaryPath, pathBuffer, sizeof(pathBuffer));
if(size>0 && size<sizeof(pathBuffer)) {
path=pathBuffer;
}
} else if(type==REG_SZ) {
path=pathBuffer;
}
}
RegCloseKey(key);
}
}
# endif
/* ICU_DATA_DIR may be set as a compile option */
# ifdef ICU_DATA_DIR
if(path==NULL || *path==0) {
path=ICU_DATA_DIR;
}
# endif
if(path==NULL) {
/* It looks really bad, set it to something. */
path = "";
}
u_setDataDirectory(path);
return gDataDirectory;
}
/* Macintosh-specific locale information ------------------------------------ */
#ifdef XP_MAC
typedef struct {
int32_t script;
int32_t region;
int32_t lang;
int32_t date_region;
const char* posixID;
} mac_lc_rec;
/* Todo: This will be updated with a newer version from www.unicode.org web
page when it's available.*/
#define MAC_LC_MAGIC_NUMBER -5
#define MAC_LC_INIT_NUMBER -9
static const mac_lc_rec mac_lc_recs[] = {
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 0, "en_US",
/* United States*/
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 1, "fr_FR",
/* France*/
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 2, "en_GB",
/* Great Britain*/
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 3, "de_DE",
/* Germany*/
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 4, "it_IT",
/* Italy*/
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 5, "nl_NL",
/* Metherlands*/
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 6, "fr_BE",
/* French for Belgium or Lxembourg*/
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 7, "sv_SE",
/* Sweden*/
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 9, "da_DK",
/* Denmark*/
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 10, "pt_PT",
/* Portugal*/
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 11, "fr_CA",
/* French Canada*/
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 13, "is_IS",
/* Israel*/
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 14, "ja_JP",
/* Japan*/
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 15, "en_AU",
/* Australia*/
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 16, "ar_AE",
/* the Arabic world (?)*/
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 17, "fi_FI",
/* Finland*/
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 18, "fr_CH",
/* French for Switzerland*/
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 19, "de_CH",
/* German for Switzerland*/
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 20, "el_GR",
/* Greece*/
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 21, "is_IS",
/* Iceland ===*/
/*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 22, "",*/
/* Malta ===*/
/*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 23, "",*/
/* Cyprus ===*/
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 24, "tr_TR",
/* Turkey ===*/
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 25, "sh_YU",
/* Croatian system for Yugoslavia*/
/*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 33, "",*/
/* Hindi system for India*/
/*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 34, "",*/
/* Pakistan*/
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 41, "lt_LT",
/* Lithuania*/
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 42, "pl_PL",
/* Poland*/
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 43, "hu_HU",
/* Hungary*/
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 44, "et_EE",
/* Estonia*/
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 45, "lv_LV",
/* Latvia*/
/*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 46, "",*/
/* Lapland [Ask Rich for the data. HS]*/
/*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 47, "",*/
/* Faeroe Islands*/
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 48, "fa_IR",
/* Iran*/
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 49, "ru_RU",
/* Russia*/
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 50, "en_IE",
/* Ireland*/
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 51, "ko_KR",
/* Korea*/
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 52, "zh_CN",
/* People's Republic of China*/
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 53, "zh_TW",
/* Taiwan*/
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 54, "th_TH",
/* Thailand*/
/* fallback is en_US*/
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER,
MAC_LC_MAGIC_NUMBER, "en_US"
};
#endif
#if U_POSIX_LOCALE
/* Return just the POSIX id, whatever happens to be in it */
static const char *uprv_getPOSIXID(void)
{
static const char* posixID = NULL;
if (posixID == 0) {
posixID = getenv("LC_ALL");
if (posixID == 0) {
posixID = getenv("LANG");
if (posixID == 0) {
/*
* On Solaris two different calls to setlocale can result in
* different values. Only get this value once.
*/
posixID = setlocale(LC_ALL, NULL);
}
}
}
if (posixID==0)
{
/* Nothing worked. Give it a nice value. */
posixID = "en_US";
}
else if ((uprv_strcmp("C", posixID) == 0)
|| (uprv_strchr(posixID, ' ') != NULL)
|| (uprv_strchr(posixID, '/') != NULL))
{ /* HPUX returns 'C C C C C C C' */
/* Solaris can return /en_US/C/C/C/C/C on the second try. */
/* Maybe we got some garbage. Give it a nice value. */
posixID = "en_US_POSIX";
}
return posixID;
}
#endif
/* NOTE: The caller should handle thread safety */
U_CAPI const char* U_EXPORT2
uprv_getDefaultLocaleID()
{
#if U_POSIX_LOCALE
/*
Note that: (a '!' means the ID is improper somehow)
LC_ALL ----> default_loc codepage
--------------------------------------------------------
ab.CD ab CD
ab@CD ab__CD -
ab@CD.EF ab__CD EF
ab_CD.EF@GH ab_CD_GH EF
Some 'improper' ways to do the same as above:
! ab_CD@GH.EF ab_CD_GH EF
! ab_CD.EF@GH.IJ ab_CD_GH EF
! ab_CD@ZZ.EF@GH.IJ ab_CD_GH EF
_CD@GH _CD_GH -
_CD.EF@GH _CD_GH EF
The variant cannot have dots in it.
The 'rightmost' variant (@xxx) wins.
The leftmost codepage (.xxx) wins.
*/
char *correctedPOSIXLocale = 0;
const char* posixID = uprv_getPOSIXID();
const char *p;
const char *q;
int32_t len;
/* Format: (no spaces)
ll [ _CC ] [ . MM ] [ @ VV]
l = lang, C = ctry, M = charmap, V = variant
*/
if (gCorrectedPOSIXLocale != NULL) {
return gCorrectedPOSIXLocale;
}
if ((p = uprv_strchr(posixID, '.')) != NULL) {
/* assume new locale can't be larger than old one? */
correctedPOSIXLocale = uprv_malloc(uprv_strlen(posixID));
uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID);
correctedPOSIXLocale[p-posixID] = 0;
/* do not copy after the @ */
if ((p = uprv_strchr(correctedPOSIXLocale, '@')) != NULL) {
correctedPOSIXLocale[p-correctedPOSIXLocale] = 0;
}
}
/* Note that we scan the *uncorrected* ID. */
if ((p = uprv_strrchr(posixID, '@')) != NULL) {
if (correctedPOSIXLocale == NULL) {
correctedPOSIXLocale = uprv_malloc(uprv_strlen(posixID));
uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID);
correctedPOSIXLocale[p-posixID] = 0;
}
p++;
/* Take care of any special cases here.. */
if (!uprv_strcmp(p, "nynorsk")) {
p = "NY";
/* Should we assume no_NO_NY instead of possible no__NY?
* if (!uprv_strcmp(correctedPOSIXLocale, "no")) {
* uprv_strcpy(correctedPOSIXLocale, "no_NO");
* }
*/
}
if (uprv_strchr(correctedPOSIXLocale,'_') == NULL) {
uprv_strcat(correctedPOSIXLocale, "__"); /* aa@b -> aa__b */
}
else {
uprv_strcat(correctedPOSIXLocale, "_"); /* aa_CC@b -> aa_CC_b */
}
if ((q = uprv_strchr(p, '.')) != NULL) {
/* How big will the resulting string be? */
len = uprv_strlen(correctedPOSIXLocale) + (q-p);
uprv_strncat(correctedPOSIXLocale, p, q-p);
correctedPOSIXLocale[len] = 0;
}
else {
/* Anything following the @ sign */
uprv_strcat(correctedPOSIXLocale, p);
}
/* Should there be a map from 'no@nynorsk' -> no_NO_NY here?
* How about 'russian' -> 'ru'?
*/
}
/* Was a correction made? */
if (correctedPOSIXLocale != NULL) {
posixID = correctedPOSIXLocale;
}
else {
/* copy it, just in case the original pointer goes away. See j2395 */
correctedPOSIXLocale = (char *)uprv_malloc(uprv_strlen(posixID) + 1);
posixID = uprv_strcpy(correctedPOSIXLocale, posixID);
}
if (gCorrectedPOSIXLocale == NULL) {
gCorrectedPOSIXLocale = correctedPOSIXLocale;
correctedPOSIXLocale = NULL;
}
if (correctedPOSIXLocale != NULL) { /* Was already set - clean up. */
uprv_free(correctedPOSIXLocale);
}
return posixID;
#elif defined(WIN32)
UErrorCode status = U_ZERO_ERROR;
LCID id = GetThreadLocale();
const char* locID = uprv_convertToPosix(id, &status);
if (U_FAILURE(status)) {
locID = "en_US";
}
return locID;
#elif defined(XP_MAC)
int32_t script = MAC_LC_INIT_NUMBER;
/* = IntlScript(); or GetScriptManagerVariable(smSysScript);*/
int32_t region = MAC_LC_INIT_NUMBER;
/* = GetScriptManagerVariable(smRegionCode);*/
int32_t lang = MAC_LC_INIT_NUMBER;
/* = GetScriptManagerVariable(smScriptLang);*/
int32_t date_region = MAC_LC_INIT_NUMBER;
const char* posixID = 0;
int32_t count = sizeof(mac_lc_recs) / sizeof(mac_lc_rec);
int32_t i;
Intl1Hndl ih;
ih = (Intl1Hndl) GetIntlResource(1);
if (ih)
date_region = ((uint16_t)(*ih)->intl1Vers) >> 8;
for (i = 0; i < count; i++) {
if ( ((mac_lc_recs[i].script == MAC_LC_MAGIC_NUMBER)
|| (mac_lc_recs[i].script == script))
&& ((mac_lc_recs[i].region == MAC_LC_MAGIC_NUMBER)
|| (mac_lc_recs[i].region == region))
&& ((mac_lc_recs[i].lang == MAC_LC_MAGIC_NUMBER)
|| (mac_lc_recs[i].lang == lang))
&& ((mac_lc_recs[i].date_region == MAC_LC_MAGIC_NUMBER)
|| (mac_lc_recs[i].date_region == date_region))
)
{
posixID = mac_lc_recs[i].posixID;
break;
}
}
return posixID;
#elif defined(OS2)
char * locID;
locID = getenv("LC_ALL");
if (!locID || !*locID)
locID = getenv("LANG");
if (!locID || !*locID) {
locID = "en_US";
}
if (!stricmp(locID, "c") || !stricmp(locID, "posix") ||
!stricmp(locID, "univ"))
locID = "en_US_POSIX";
return locID;
#elif defined(OS400)
/* locales are process scoped and are by definition thread safe */
static char correctedLocale[64];
const char *localeID = getenv("LC_ALL");
char *p;
if (localeID == NULL)
localeID = getenv("LANG");
if (localeID == NULL)
localeID = setlocale(LC_ALL, NULL);
/* Make sure we have something... */
if (localeID == NULL)
return "en_US_POSIX";
/* Extract the locale name from the path. */
if((p = uprv_strrchr(localeID, '/')) != NULL)
{
/* Increment p to start of locale name. */
p++;
localeID = p;
}
/* Copy to work location. */
uprv_strcpy(correctedLocale, localeID);
/* Strip off the '.locale' extension. */
if((p = uprv_strchr(correctedLocale, '.')) != NULL) {
*p = 0;
}
/* Upper case the locale name. */
T_CString_toUpperCase(correctedLocale);
/* See if we are using the POSIX locale. Any of the
* following are equivalent and use the same QLGPGCMA
* (POSIX) locale.
*/
if ((uprv_strcmp("C", correctedLocale) == 0) ||
(uprv_strcmp("POSIX", correctedLocale) == 0) ||
(uprv_strcmp("QLGPGCMA", correctedLocale) == 0))
{
uprv_strcpy(correctedLocale, "en_US_POSIX");
}
else
{
int16_t LocaleLen;
/* Lower case the lang portion. */
for(p = correctedLocale; *p != 0 && *p != '_'; p++)
{
*p = uprv_tolower(*p);
}
/* Adjust for Euro. After '_E' add 'URO'. */
LocaleLen = uprv_strlen(correctedLocale);
if (correctedLocale[LocaleLen - 2] == '_' &&
correctedLocale[LocaleLen - 1] == 'E')
{
uprv_strcat(correctedLocale, "URO");
}
/* If using Lotus-based locale then convert to
* equivalent non Lotus.
*/
else if (correctedLocale[LocaleLen - 2] == '_' &&
correctedLocale[LocaleLen - 1] == 'L')
{
correctedLocale[LocaleLen - 2] = 0;
}
/* There are separate simplified and traditional
* locales called zh_HK_S and zh_HK_T.
*/
else if (uprv_strncmp(correctedLocale, "zh_HK", 5) == 0)
{
uprv_strcpy(correctedLocale, "zh_HK");
}
/* A special zh_CN_GBK locale...
*/
else if (uprv_strcmp(correctedLocale, "zh_CN_GBK") == 0)
{
uprv_strcpy(correctedLocale, "zh_CN");
}
}
return correctedLocale;
#endif
}
U_CAPI const char* U_EXPORT2
uprv_getDefaultCodepage()
{
#if defined(OS400)
uint32_t ccsid = 37; /* Default to ibm-37 */
static char codepage[64];
Qwc_JOBI0400_t jobinfo;
Qus_EC_t error = { sizeof(Qus_EC_t) }; /* SPI error code */
EPT_CALL(QUSRJOBI)(&jobinfo, sizeof(jobinfo), "JOBI0400",
"* ", " ", &error);
if (error.Bytes_Available == 0) {
if (jobinfo.Coded_Char_Set_ID != 0xFFFF) {
ccsid = (uint32_t)jobinfo.Coded_Char_Set_ID;
}
else if (jobinfo.Default_Coded_Char_Set_Id != 0xFFFF) {
ccsid = (uint32_t)jobinfo.Default_Coded_Char_Set_Id;
}
/* else use the default */
}
sprintf(codepage,"ibm-%d", ccsid);
return codepage;
#elif defined(OS390)
static char codepage[64];
sprintf(codepage,"%s" UCNV_SWAP_LFNL_OPTION_STRING, nl_langinfo(CODESET));
return codepage;
#elif defined(XP_MAC)
return "ibm-1275"; /* TODO: Macintosh Roman. There must be a better way. fixme! */
#elif defined(WIN32)
static char codepage[64];
sprintf(codepage, "windows-%d", GetACP());
return codepage;
#elif U_POSIX_LOCALE
static char codesetName[100];
char *name = NULL;
char *euro = NULL;
const char *localeName = NULL;
const char *defaultTable = NULL;
uprv_memset(codesetName, 0, sizeof(codesetName));
localeName = uprv_getPOSIXID();
if (localeName != NULL && (name = (uprv_strchr(localeName, (int)'.'))) != NULL)
{
/* strip the locale name and look at the suffix only */
name = uprv_strncpy(codesetName, name+1, sizeof(codesetName));
codesetName[sizeof(codesetName)-1] = 0;
if ((euro = (uprv_strchr(name, (int)'@'))) != NULL)
{
*euro = 0;
}
/* if we can find the codset name, return that. */
if (*name)
{
return name;
}
}
/* otherwise, try CTYPE */
if (*codesetName)
{
uprv_memset(codesetName, 0, sizeof(codesetName));
}
localeName = setlocale(LC_CTYPE, "");
if (localeName != NULL && (name = (uprv_strchr(localeName, (int)'.'))) != NULL)
{
/* strip the locale name and look at the suffix only */
name = uprv_strncpy(codesetName, name+1, sizeof(codesetName));
codesetName[sizeof(codesetName)-1] = 0;
if ((euro = (uprv_strchr(name, (int)'@'))) != NULL)
{
*euro = 0;
}
/* if we can find the codset name from setlocale, return that. */
if (*name)
{
return name;
}
}
if (*codesetName)
{
uprv_memset(codesetName, 0, sizeof(codesetName));
}
#if U_HAVE_NL_LANGINFO_CODESET
{
const char *codeset = nl_langinfo(U_NL_LANGINFO_CODESET);
if (codeset != NULL) {
uprv_strncpy(codesetName, codeset, sizeof(codesetName));
codesetName[sizeof(codesetName)-1] = 0;
}
}
#endif
if (*codesetName == 0)
{
/* look up in srl's table */
defaultTable = uprv_defaultCodePageForLocale(localeName);
if (defaultTable != NULL)
{
uprv_strcpy(codesetName, defaultTable);
}
else
{
/* if the table lookup failed, return US ASCII (ISO 646). */
uprv_strcpy(codesetName, "US-ASCII");
}
}
return codesetName;
#else
return "US-ASCII";
#endif
}
/* invariant-character handling --------------------------------------------- */
/*
* These maps for ASCII to/from EBCDIC map invariant characters (see utypes.h)
* appropriately for most EBCDIC codepages.
*
* They currently also map most other ASCII graphic characters,
* appropriately for codepages 37 and 1047.
* Exceptions: The characters for []^ have different codes in 37 & 1047.
* Both versions are mapped to ASCII.
*
* ASCII 37 1047
* [ 5B BA AD
* ] 5D BB BD
* ^ 5E B0 5F
*
* There are no mappings for variant characters from Unicode to EBCDIC.
*
* Currently, C0 control codes are also included in these maps.
* Exceptions: S/390 Open Edition swaps LF and NEL codes compared with other
* EBCDIC platforms; both codes (15 and 25) are mapped to ASCII LF (0A),
* but there is no mapping for ASCII LF back to EBCDIC.
*
* ASCII EBCDIC S/390-OE
* LF 0A 25 15
* NEL 85 15 25
*
* The maps below explicitly exclude the variant
* control and graphical characters that are in ASCII-based
* codepages at 0x80 and above.
* "No mapping" is expressed by mapping to a 00 byte.
*
* These tables do not establish a converter or a codepage.
*/
static const uint8_t asciiFromEbcdic[256]={
0x00, 0x01, 0x02, 0x03, 0x00, 0x09, 0x00, 0x7f, 0x00, 0x00, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x10, 0x11, 0x12, 0x13, 0x00, 0x0a, 0x08, 0x00, 0x18, 0x19, 0x00, 0x00, 0x1c, 0x1d, 0x1e, 0x1f,
0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x17, 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x06, 0x07,
0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x14, 0x15, 0x00, 0x1a,
0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2e, 0x3c, 0x28, 0x2b, 0x7c,
0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x5e,
0x2d, 0x2f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2c, 0x25, 0x5f, 0x3e, 0x3f,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22,
0x00, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x5b, 0x00, 0x00,
0x5e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5b, 0x5d, 0x00, 0x5d, 0x00, 0x00,
0x7b, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x7d, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x5c, 0x00, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
};
static const uint8_t ebcdicFromAscii[256]={
0x00, 0x01, 0x02, 0x03, 0x37, 0x2d, 0x2e, 0x2f, 0x16, 0x05, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x10, 0x11, 0x12, 0x13, 0x3c, 0x3d, 0x32, 0x26, 0x18, 0x19, 0x3f, 0x27, 0x1c, 0x1d, 0x1e, 0x1f,
0x40, 0x00, 0x7f, 0x00, 0x00, 0x6c, 0x50, 0x7d, 0x4d, 0x5d, 0x5c, 0x4e, 0x6b, 0x60, 0x4b, 0x61,
0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0x7a, 0x5e, 0x4c, 0x7e, 0x6e, 0x6f,
0x00, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6,
0xd7, 0xd8, 0xd9, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0x00, 0x00, 0x00, 0x00, 0x6d,
0x00, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
0x97, 0x98, 0x99, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0x00, 0x00, 0x00, 0x00, 0x07,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
/*
* Bit sets indicating which characters of the ASCII repertoire
* (by ASCII/Unicode code) are "invariant".
* See utypes.h for more details.
*
* As invariant are considered the characters of the ASCII repertoire except
* for the following:
* 21 '!' <exclamation mark>
* 23 '#' <number sign>
* 24 '$' <dollar sign>
*
* 40 '@' <commercial at>
*
* 5b '[' <left bracket>
* 5c '\' <backslash>
* 5d ']' <right bracket>
* 5e '^' <circumflex>
*
* 60 '`' <grave accent>
*
* 7b '{' <left brace>
* 7c '|' <vertical line>
* 7d '}' <right brace>
* 7e '~' <tilde>
*/
static const uint32_t invariantChars[4]={
0xfffffbff, /* 00..1f but not 0a */
0xffffffe5, /* 20..3f but not 21 23 24 */
0x87fffffe, /* 40..5f but not 40 5b..5e */
0x87fffffe /* 60..7f but not 60 7b..7e */
};
/*
* test unsigned types (or values known to be non-negative) for invariant characters,
* tests ASCII-family character values
*/
#define UCHAR_IS_INVARIANT(c) (((c)<=0x7f) && (invariantChars[(c)>>5]&((uint32_t)1<<((c)&0x1f)))!=0)
/* test signed types for invariant characters, adds test for positive values */
#define SCHAR_IS_INVARIANT(c) ((0<=(c)) && UCHAR_IS_INVARIANT(c))
U_CAPI void U_EXPORT2
u_charsToUChars(const char *cs, UChar *us, int32_t length) {
UChar u;
uint8_t c;
UBool onlyInvariantChars;
/*
* Allow the entire ASCII repertoire to be mapped _to_ Unicode.
* For EBCDIC systems, this works for characters with codes from
* codepages 37 and 1047 or compatible.
*/
onlyInvariantChars=TRUE;
while(length>0) {
c=(uint8_t)(*cs++);
#if U_CHARSET_FAMILY==U_ASCII_FAMILY
u=(UChar)c;
#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
u=(UChar)asciiFromEbcdic[c];
#else
# error U_CHARSET_FAMILY is not valid
#endif
if(u==0 && c!=0) {
onlyInvariantChars=FALSE;
}
*us++=u;
--length;
}
U_ASSERT(onlyInvariantChars); /* only invariant chars? */
}
U_CAPI void U_EXPORT2
u_UCharsToChars(const UChar *us, char *cs, int32_t length) {
UChar u;
UBool onlyInvariantChars;
onlyInvariantChars=TRUE;
while(length>0) {
u=*us++;
if(!UCHAR_IS_INVARIANT(u)) {
onlyInvariantChars=FALSE;
u=0;
}
#if U_CHARSET_FAMILY==U_ASCII_FAMILY
*cs++=(char)u;
#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
*cs++=(char)ebcdicFromAscii[u];
#else
# error U_CHARSET_FAMILY is not valid
#endif
--length;
}
U_ASSERT(onlyInvariantChars); /* only invariant chars? */
}
U_CAPI UBool U_EXPORT2
uprv_isInvariantString(const char *s, int32_t length) {
uint8_t c;
for(;;) {
if(length<0) {
/* NUL-terminated */
c=(uint8_t)*s++;
if(c==0) {
break;
}
} else {
/* count length */
if(length==0) {
break;
}
--length;
c=(uint8_t)*s++;
if(c==0) {
continue; /* NUL is invariant */
}
}
/* c!=0 now, one branch below checks c==0 for variant characters */
/*
* no assertions here because these functions are legitimately called
* for strings with variant characters
*/
#if U_CHARSET_FAMILY==U_ASCII_FAMILY
if(!UCHAR_IS_INVARIANT(c)) {
return FALSE; /* found a variant char */
}
#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
c=asciiFromEbcdic[c];
if(c==0 || !UCHAR_IS_INVARIANT(c)) {
return FALSE; /* found a variant char */
}
#else
# error U_CHARSET_FAMILY is not valid
#endif
}
return TRUE;
}
U_CAPI UBool U_EXPORT2
uprv_isInvariantUString(const UChar *s, int32_t length) {
UChar c;
for(;;) {
if(length<0) {
/* NUL-terminated */
c=*s++;
if(c==0) {
break;
}
} else {
/* count length */
if(length==0) {
break;
}
--length;
c=*s++;
}
/*
* no assertions here because these functions are legitimately called
* for strings with variant characters
*/
if(!UCHAR_IS_INVARIANT(c)) {
return FALSE; /* found a variant char */
}
}
return TRUE;
}
/* UDataSwapFn implementations used in udataswp.c ------- */
/* convert ASCII to EBCDIC and verify that all characters are invariant */
U_CFUNC int32_t
uprv_ebcdicFromAscii(const UDataSwapper *ds,
const void *inData, int32_t length, void *outData,
UErrorCode *pErrorCode) {
const uint8_t *s;
uint8_t *t;
uint8_t c;
int32_t count;
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
return 0;
}
if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
/* setup and swapping */
s=(const uint8_t *)inData;
t=(uint8_t *)outData;
count=length;
while(count>0) {
c=*s++;
if(!UCHAR_IS_INVARIANT(c)) {
udata_printError(ds, "uprv_ebcdicFromAscii() string[%d] contains a variant character in position %d\n",
length, length-count);
*pErrorCode=U_INVALID_CHAR_FOUND;
return 0;
}
*t++=ebcdicFromAscii[c];
--count;
}
return length;
}
/* this function only checks and copies ASCII strings without conversion */
U_CFUNC int32_t
uprv_copyAscii(const UDataSwapper *ds,
const void *inData, int32_t length, void *outData,
UErrorCode *pErrorCode) {
const uint8_t *s;
uint8_t c;
int32_t count;
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
return 0;
}
if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
/* setup and checking */
s=(const uint8_t *)inData;
count=length;
while(count>0) {
c=*s++;
if(!UCHAR_IS_INVARIANT(c)) {
udata_printError(ds, "uprv_copyFromAscii() string[%d] contains a variant character in position %d\n",
length, length-count);
*pErrorCode=U_INVALID_CHAR_FOUND;
return 0;
}
--count;
}
if(length>0 && inData!=outData) {
uprv_memcpy(outData, inData, length);
}
return length;
}
/* convert EBCDIC to ASCII and verify that all characters are invariant */
U_CFUNC int32_t
uprv_asciiFromEbcdic(const UDataSwapper *ds,
const void *inData, int32_t length, void *outData,
UErrorCode *pErrorCode) {
const uint8_t *s;
uint8_t *t;
uint8_t c;
int32_t count;
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
return 0;
}
if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
/* setup and swapping */
s=(const uint8_t *)inData;
t=(uint8_t *)outData;
count=length;
while(count>0) {
c=*s++;
if(c!=0 && ((c=asciiFromEbcdic[c])==0 || !UCHAR_IS_INVARIANT(c))) {
udata_printError(ds, "uprv_asciiFromEbcdic() string[%d] contains a variant character in position %d\n",
length, length-count);
*pErrorCode=U_INVALID_CHAR_FOUND;
return 0;
}
*t++=c;
--count;
}
return length;
}
/* this function only checks and copies EBCDIC strings without conversion */
U_CFUNC int32_t
uprv_copyEbcdic(const UDataSwapper *ds,
const void *inData, int32_t length, void *outData,
UErrorCode *pErrorCode) {
const uint8_t *s;
uint8_t c;
int32_t count;
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
return 0;
}
if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
/* setup and checking */
s=(const uint8_t *)inData;
count=length;
while(count>0) {
c=*s++;
if(c!=0 && ((c=asciiFromEbcdic[c])==0 || !UCHAR_IS_INVARIANT(c))) {
udata_printError(ds, "uprv_copyEbcdic() string[%] contains a variant character in position %d\n",
length, length-count);
*pErrorCode=U_INVALID_CHAR_FOUND;
return 0;
}
--count;
}
if(length>0 && inData!=outData) {
uprv_memcpy(outData, inData, length);
}
return length;
}
/* compare invariant strings; variant characters compare less than others and unlike each other */
U_CFUNC int32_t
uprv_compareInvAscii(const UDataSwapper *ds,
const char *outString, int32_t outLength,
const UChar *localString, int32_t localLength) {
int32_t minLength;
UChar32 c1, c2;
uint8_t c;
if(outString==NULL || outLength<-1 || localString==NULL || localLength<-1) {
return 0;
}
if(outLength<0) {
outLength=uprv_strlen(outString);
}
if(localLength<0) {
localLength=u_strlen(localString);
}
minLength= outLength<localLength ? outLength : localLength;
while(minLength>0) {
c=(uint8_t)*outString++;
if(UCHAR_IS_INVARIANT(c)) {
c1=c;
} else {
c1=-1;
}
c2=*localString++;
if(!UCHAR_IS_INVARIANT(c2)) {
c1=-2;
}
if((c1-=c2)!=0) {
return c1;
}
--minLength;
}
/* strings start with same prefix, compare lengths */
return outLength-localLength;
}
U_CFUNC int32_t
uprv_compareInvEbcdic(const UDataSwapper *ds,
const char *outString, int32_t outLength,
const UChar *localString, int32_t localLength) {
int32_t minLength;
UChar32 c1, c2;
uint8_t c;
if(outString==NULL || outLength<-1 || localString==NULL || localLength<-1) {
return 0;
}
if(outLength<0) {
outLength=uprv_strlen(outString);
}
if(localLength<0) {
localLength=u_strlen(localString);
}
minLength= outLength<localLength ? outLength : localLength;
while(minLength>0) {
c=(uint8_t)*outString++;
if(c==0) {
c1=0;
} else if((c1=asciiFromEbcdic[c])!=0 && UCHAR_IS_INVARIANT(c1)) {
/* c1 is set */
} else {
c1=-1;
}
c2=*localString++;
if(!UCHAR_IS_INVARIANT(c2)) {
c1=-2;
}
if((c1-=c2)!=0) {
return c1;
}
--minLength;
}
/* strings start with same prefix, compare lengths */
return outLength-localLength;
}
/* end of platform-specific implementation -------------- */
/* version handling --------------------------------------------------------- */
U_CAPI void U_EXPORT2
u_versionFromString(UVersionInfo versionArray, const char *versionString) {
char *end;
uint16_t part=0;
if(versionArray==NULL) {
return;
}
if(versionString!=NULL) {
for(;;) {
versionArray[part]=(uint8_t)uprv_strtoul(versionString, &end, 10);
if(end==versionString || ++part==U_MAX_VERSION_LENGTH || *end!=U_VERSION_DELIMITER) {
break;
}
versionString=end+1;
}
}
while(part<U_MAX_VERSION_LENGTH) {
versionArray[part++]=0;
}
}
U_CAPI void U_EXPORT2
u_versionToString(UVersionInfo versionArray, char *versionString) {
uint16_t count, part;
uint8_t field;
if(versionString==NULL) {
return;
}
if(versionArray==NULL) {
versionString[0]=0;
return;
}
/* count how many fields need to be written */
for(count=4; count>0 && versionArray[count-1]==0; --count) {
}
if(count <= 1) {
count = 2;
}
/* write the first part */
/* write the decimal field value */
field=versionArray[0];
if(field>=100) {
*versionString++=(char)('0'+field/100);
field%=100;
}
if(field>=10) {
*versionString++=(char)('0'+field/10);
field%=10;
}
*versionString++=(char)('0'+field);
/* write the following parts */
for(part=1; part<count; ++part) {
/* write a dot first */
*versionString++=U_VERSION_DELIMITER;
/* write the decimal field value */
field=versionArray[part];
if(field>=100) {
*versionString++=(char)('0'+field/100);
field%=100;
}
if(field>=10) {
*versionString++=(char)('0'+field/10);
field%=10;
}
*versionString++=(char)('0'+field);
}
/* NUL-terminate */
*versionString=0;
}
U_CAPI void U_EXPORT2
u_getVersion(UVersionInfo versionArray) {
u_versionFromString(versionArray, U_ICU_VERSION);
}
/* u_errorName() ------------------------------------------------------------ */
static const char * const
_uErrorInfoName[U_ERROR_WARNING_LIMIT-U_ERROR_WARNING_START]={
"U_USING_FALLBACK_WARNING",
"U_USING_DEFAULT_WARNING",
"U_SAFECLONE_ALLOCATED_WARNING",
"U_STATE_OLD_WARNING",
"U_STRING_NOT_TERMINATED_WARNING",
"U_SORT_KEY_TOO_SHORT_WARNING",
"U_AMBIGUOUS_ALIAS_WARNING",
"U_DIFFERENT_UCA_VERSION"
};
static const char * const
_uTransErrorName[U_PARSE_ERROR_LIMIT - U_PARSE_ERROR_START]={
"U_BAD_VARIABLE_DEFINITION",
"U_MALFORMED_RULE",
"U_MALFORMED_SET",
"U_MALFORMED_SYMBOL_REFERENCE",
"U_MALFORMED_UNICODE_ESCAPE",
"U_MALFORMED_VARIABLE_DEFINITION",
"U_MALFORMED_VARIABLE_REFERENCE",
"U_MISMATCHED_SEGMENT_DELIMITERS",
"U_MISPLACED_ANCHOR_START",
"U_MISPLACED_CURSOR_OFFSET",
"U_MISPLACED_QUANTIFIER",
"U_MISSING_OPERATOR",
"U_MISSING_SEGMENT_CLOSE",
"U_MULTIPLE_ANTE_CONTEXTS",
"U_MULTIPLE_CURSORS",
"U_MULTIPLE_POST_CONTEXTS",
"U_TRAILING_BACKSLASH",
"U_UNDEFINED_SEGMENT_REFERENCE",
"U_UNDEFINED_VARIABLE",
"U_UNQUOTED_SPECIAL",
"U_UNTERMINATED_QUOTE",
"U_RULE_MASK_ERROR",
"U_MISPLACED_COMPOUND_FILTER",
"U_MULTIPLE_COMPOUND_FILTERS",
"U_INVALID_RBT_SYNTAX",
"U_INVALID_PROPERTY_PATTERN",
"U_MALFORMED_PRAGMA",
"U_UNCLOSED_SEGMENT",
"U_ILLEGAL_CHAR_IN_SEGMENT",
"U_VARIABLE_RANGE_EXHAUSTED",
"U_VARIABLE_RANGE_OVERLAP",
"U_ILLEGAL_CHARACTER",
"U_INTERNAL_TRANSLITERATOR_ERROR",
"U_INVALID_ID",
"U_INVALID_FUNCTION"
};
static const char * const
_uErrorName[U_STANDARD_ERROR_LIMIT]={
"U_ZERO_ERROR",
"U_ILLEGAL_ARGUMENT_ERROR",
"U_MISSING_RESOURCE_ERROR",
"U_INVALID_FORMAT_ERROR",
"U_FILE_ACCESS_ERROR",
"U_INTERNAL_PROGRAM_ERROR",
"U_MESSAGE_PARSE_ERROR",
"U_MEMORY_ALLOCATION_ERROR",
"U_INDEX_OUTOFBOUNDS_ERROR",
"U_PARSE_ERROR",
"U_INVALID_CHAR_FOUND",
"U_TRUNCATED_CHAR_FOUND",
"U_ILLEGAL_CHAR_FOUND",
"U_INVALID_TABLE_FORMAT",
"U_INVALID_TABLE_FILE",
"U_BUFFER_OVERFLOW_ERROR",
"U_UNSUPPORTED_ERROR",
"U_RESOURCE_TYPE_MISMATCH",
"U_ILLEGAL_ESCAPE_SEQUENCE",
"U_UNSUPPORTED_ESCAPE_SEQUENCE",
"U_NO_SPACE_AVAILABLE",
"U_CE_NOT_FOUND_ERROR",
"U_PRIMARY_TOO_LONG_ERROR",
"U_STATE_TOO_OLD_ERROR",
"U_TOO_MANY_ALIASES_ERROR",
"U_ENUM_OUT_OF_SYNC_ERROR",
"U_INVARIANT_CONVERSION_ERROR",
"U_INVALID_STATE_ERROR"
};
static const char * const
_uFmtErrorName[U_FMT_PARSE_ERROR_LIMIT - U_FMT_PARSE_ERROR_START] = {
"U_UNEXPECTED_TOKEN",
"U_MULTIPLE_DECIMAL_SEPARATORS",
"U_MULTIPLE_EXPONENTIAL_SYMBOLS",
"U_MALFORMED_EXPONENTIAL_PATTERN",
"U_MULTIPLE_PERCENT_SYMBOLS",
"U_MULTIPLE_PERMILL_SYMBOLS",
"U_MULTIPLE_PAD_SPECIFIERS",
"U_PATTERN_SYNTAX_ERROR",
"U_ILLEGAL_PAD_POSITION",
"U_UNMATCHED_BRACES",
"U_UNSUPPORTED_PROPERTY",
"U_UNSUPPORTED_ATTRIBUTE"
};
static const char * const
_uBrkErrorName[U_BRK_ERROR_LIMIT - U_BRK_ERROR_START] = {
"U_BRK_ERROR_START",
"U_BRK_INTERNAL_ERROR",
"U_BRK_HEX_DIGITS_EXPECTED",
"U_BRK_SEMICOLON_EXPECTED",
"U_BRK_RULE_SYNTAX",
"U_BRK_UNCLOSED_SET",
"U_BRK_ASSIGN_ERROR",
"U_BRK_VARIABLE_REDFINITION",
"U_BRK_MISMATCHED_PAREN",
"U_BRK_NEW_LINE_IN_QUOTED_STRING",
"U_BRK_UNDEFINED_VARIABLE",
"U_BRK_INIT_ERROR",
"U_BRK_RULE_EMPTY_SET",
"U_BRK_UNRECOGNIZED_OPTION"
};
static const char * const
_uRegexErrorName[U_REGEX_ERROR_LIMIT - U_REGEX_ERROR_START] = {
"U_REGEX_ERROR_START",
"U_REGEX_INTERNAL_ERROR",
"U_REGEX_RULE_SYNTAX",
"U_REGEX_INVALID_STATE",
"U_REGEX_BAD_ESCAPE_SEQUENCE",
"U_REGEX_PROPERTY_SYNTAX",
"U_REGEX_UNIMPLEMENTED",
"U_REGEX_MISMATCHED_PAREN",
"U_REGEX_NUMBER_TOO_BIG",
"U_REGEX_BAD_INTERVAL",
"U_REGEX_MAX_LT_MIN",
"U_REGEX_INVALID_BACK_REF",
"U_REGEX_INVALID_FLAG",
"U_REGEX_LOOK_BEHIND_LIMIT",
"U_REGEX_SET_CONTAINS_STRING"
};
static const char * const
_uIDNAErrorName[U_IDNA_ERROR_LIMIT - U_IDNA_ERROR_START] = {
"U_IDNA_ERROR_START",
"U_IDNA_PROHIBITED_ERROR",
"U_IDNA_UNASSIGNED_ERROR",
"U_IDNA_CHECK_BIDI_ERROR",
"U_IDNA_STD3_ASCII_RULES_ERROR",
"U_IDNA_ACE_PREFIX_ERROR",
"U_IDNA_VERIFICATION_ERROR",
"U_IDNA_LABEL_TOO_LONG_ERROR"
};
U_CAPI const char * U_EXPORT2
u_errorName(UErrorCode code) {
if(U_ZERO_ERROR <= code && code < U_STANDARD_ERROR_LIMIT) {
return _uErrorName[code];
} else if(U_ERROR_WARNING_START <= code && code < U_ERROR_WARNING_LIMIT) {
return _uErrorInfoName[code - U_ERROR_WARNING_START];
} else if(U_PARSE_ERROR_START <= code && code < U_PARSE_ERROR_LIMIT){
return _uTransErrorName[code - U_PARSE_ERROR_START];
} else if(U_FMT_PARSE_ERROR_START <= code && code < U_FMT_PARSE_ERROR_LIMIT){
return _uFmtErrorName[code - U_FMT_PARSE_ERROR_START];
} else if (U_BRK_ERROR_START <= code && code < U_BRK_ERROR_LIMIT){
return _uBrkErrorName[code - U_BRK_ERROR_START];
} else if (U_REGEX_ERROR_START <= code && code < U_REGEX_ERROR_LIMIT) {
return _uRegexErrorName[code - U_REGEX_ERROR_START];
} else if( U_IDNA_ERROR_START <= code && code <= U_IDNA_ERROR_LIMIT) {
return _uIDNAErrorName[code - U_IDNA_ERROR_START];
} else {
return "[BOGUS UErrorCode]";
}
}
/*
* Hey, Emacs, please set the following:
*
* Local Variables:
* indent-tabs-mode: nil
* End:
*
*/