b376366f95
X-SVN-Rev: 16187
2747 lines
85 KiB
C
2747 lines
85 KiB
C
/*
|
|
******************************************************************************
|
|
*
|
|
* Copyright (C) 1997-2004, International Business Machines
|
|
* Corporation and others. All Rights Reserved.
|
|
*
|
|
******************************************************************************
|
|
*
|
|
* FILE NAME : putil.c (previously putil.cpp and ptypes.cpp)
|
|
*
|
|
* Date Name Description
|
|
* 04/14/97 aliu Creation.
|
|
* 04/24/97 aliu Added getDefaultDataDirectory() and
|
|
* getDefaultLocaleID().
|
|
* 04/28/97 aliu Rewritten to assume Unix and apply general methods
|
|
* for assumed case. Non-UNIX platforms must be
|
|
* special-cased. Rewrote numeric methods dealing
|
|
* with NaN and Infinity to be platform independent
|
|
* over all IEEE 754 platforms.
|
|
* 05/13/97 aliu Restored sign of timezone
|
|
* (semantics are hours West of GMT)
|
|
* 06/16/98 erm Added IEEE_754 stuff, cleaned up isInfinite, isNan,
|
|
* nextDouble..
|
|
* 07/22/98 stephen Added remainder, max, min, trunc
|
|
* 08/13/98 stephen Added isNegativeInfinity, isPositiveInfinity
|
|
* 08/24/98 stephen Added longBitsFromDouble
|
|
* 09/08/98 stephen Minor changes for Mac Port
|
|
* 03/02/99 stephen Removed openFile(). Added AS400 support.
|
|
* Fixed EBCDIC tables
|
|
* 04/15/99 stephen Converted to C.
|
|
* 06/28/99 stephen Removed mutex locking in u_isBigEndian().
|
|
* 08/04/99 jeffrey R. Added OS/2 changes
|
|
* 11/15/99 helena Integrated S/390 IEEE support.
|
|
* 04/26/01 Barry N. OS/400 support for uprv_getDefaultLocaleID
|
|
* 08/15/01 Steven H. OS/400 support for uprv_getDefaultCodepage
|
|
******************************************************************************
|
|
*/
|
|
|
|
#ifndef PTX
|
|
|
|
/* Define _XOPEN_SOURCE for Solaris and friends. */
|
|
/* NetBSD needs it to be >= 4 */
|
|
#ifndef _XOPEN_SOURCE
|
|
#define _XOPEN_SOURCE 4
|
|
#endif
|
|
|
|
/* Define __USE_POSIX and __USE_XOPEN for Linux and glibc. */
|
|
#ifndef __USE_POSIX
|
|
#define __USE_POSIX
|
|
#endif
|
|
#ifndef __USE_XOPEN
|
|
#define __USE_XOPEN
|
|
#endif
|
|
|
|
#endif /* PTX */
|
|
|
|
/* include ICU headers */
|
|
#include "unicode/utypes.h"
|
|
#include "unicode/putil.h"
|
|
#include "unicode/ustring.h"
|
|
#include "uassert.h"
|
|
#include "umutex.h"
|
|
#include "cmemory.h"
|
|
#include "cstring.h"
|
|
#include "locmap.h"
|
|
#include "ucln_cmn.h"
|
|
#include "udataswp.h"
|
|
|
|
/* include system headers */
|
|
#ifdef WIN32
|
|
# define WIN32_LEAN_AND_MEAN
|
|
# define NOGDI
|
|
# define NOUSER
|
|
# define NOSERVICE
|
|
# define NOIME
|
|
# define NOMCX
|
|
# include <windows.h>
|
|
#elif defined(U_CYGWIN) && defined(__STRICT_ANSI__)
|
|
/* tzset isn't defined in strict ANSI on Cygwin. */
|
|
# undef __STRICT_ANSI__
|
|
#elif defined(OS2)
|
|
# define INCL_DOSMISC
|
|
# define INCL_DOSERRORS
|
|
# define INCL_DOSMODULEMGR
|
|
# include <os2.h>
|
|
#elif defined(OS400)
|
|
# include <float.h>
|
|
# include <qusec.h> /* error code structure */
|
|
# include <qusrjobi.h>
|
|
# include <qliept.h> /* EPT_CALL macro - this include must be after all other "QSYSINCs" */
|
|
#elif defined(XP_MAC)
|
|
# include <Files.h>
|
|
# include <IntlResources.h>
|
|
# include <Script.h>
|
|
# include <Folders.h>
|
|
# include <MacTypes.h>
|
|
# include <TextUtils.h>
|
|
#elif defined(OS390)
|
|
#include "unicode/ucnv.h" /* Needed for UCNV_SWAP_LFNL_OPTION_STRING */
|
|
#elif defined(U_AIX)
|
|
#elif defined(U_SOLARIS) || defined(U_LINUX)
|
|
#elif defined(U_HPUX)
|
|
#elif defined(U_DARWIN)
|
|
#include <sys/file.h>
|
|
#include <sys/param.h>
|
|
#elif defined(U_QNX)
|
|
#include <sys/neutrino.h>
|
|
#endif
|
|
|
|
/* Include standard headers. */
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <math.h>
|
|
#include <locale.h>
|
|
#include <float.h>
|
|
#include <time.h>
|
|
|
|
/*
|
|
* Only include langinfo.h if we have a way to get the codeset. If we later
|
|
* depend on more feature, we can test on U_HAVE_NL_LANGINFO.
|
|
*
|
|
*/
|
|
|
|
#if U_HAVE_NL_LANGINFO_CODESET
|
|
#include <langinfo.h>
|
|
#endif
|
|
|
|
/* Define the extension for data files, again... */
|
|
#define DATA_TYPE "dat"
|
|
|
|
/* Leave this copyright notice here! */
|
|
static const char copyright[] = U_COPYRIGHT_STRING;
|
|
|
|
/* floating point implementations ------------------------------------------- */
|
|
|
|
/* We return QNAN rather than SNAN*/
|
|
#define SIGN 0x80000000U
|
|
#if defined(__GNUC__)
|
|
/*
|
|
This is an optimization for when u_topNBytesOfDouble
|
|
and u_bottomNBytesOfDouble can't be properly optimized by the compiler.
|
|
*/
|
|
#define USE_64BIT_DOUBLE_OPTIMIZATION 1
|
|
#else
|
|
#define USE_64BIT_DOUBLE_OPTIMIZATION 0
|
|
#endif
|
|
|
|
#if USE_64BIT_DOUBLE_OPTIMIZATION
|
|
/* gcc 3.2 has an optimization bug */
|
|
static const int64_t gNan64 = 0x7FF8000000000000LL;
|
|
static const int64_t gInf64 = 0x7FF0000000000000LL;
|
|
static const double * const fgNan = (const double *)(&gNan64);
|
|
static const double * const fgInf = (const double *)(&gInf64);
|
|
#else
|
|
|
|
#if IEEE_754
|
|
#define NAN_TOP ((int16_t)0x7FF8)
|
|
#define INF_TOP ((int16_t)0x7FF0)
|
|
#elif defined(OS390)
|
|
#define NAN_TOP ((int16_t)0x7F08)
|
|
#define INF_TOP ((int16_t)0x3F00)
|
|
#endif
|
|
|
|
/* statics */
|
|
static UBool fgNaNInitialized = FALSE;
|
|
static UBool fgInfInitialized = FALSE;
|
|
static double gNan;
|
|
static double gInf;
|
|
static double * const fgNan = &gNan;
|
|
static double * const fgInf = &gInf;
|
|
#endif
|
|
|
|
/*---------------------------------------------------------------------------
|
|
Platform utilities
|
|
Our general strategy is to assume we're on a POSIX platform. Platforms which
|
|
are non-POSIX must declare themselves so. The default POSIX implementation
|
|
will sometimes work for non-POSIX platforms as well (e.g., the NaN-related
|
|
functions).
|
|
---------------------------------------------------------------------------*/
|
|
|
|
#if defined(_WIN32) || defined(XP_MAC) || defined(OS400) || defined(OS2)
|
|
# undef U_POSIX_LOCALE
|
|
#else
|
|
# define U_POSIX_LOCALE 1
|
|
#endif
|
|
|
|
/* Utilities to get the bits from a double */
|
|
static char*
|
|
u_topNBytesOfDouble(double* d, int n)
|
|
{
|
|
#if U_IS_BIG_ENDIAN
|
|
return (char*)d;
|
|
#else
|
|
return (char*)(d + 1) - n;
|
|
#endif
|
|
}
|
|
|
|
static char*
|
|
u_bottomNBytesOfDouble(double* d, int n)
|
|
{
|
|
#if U_IS_BIG_ENDIAN
|
|
return (char*)(d + 1) - n;
|
|
#else
|
|
return (char*)d;
|
|
#endif
|
|
}
|
|
|
|
/*---------------------------------------------------------------------------
|
|
Universal Implementations
|
|
These are designed to work on all platforms. Try these, and if they don't
|
|
work on your platform, then special case your platform with new
|
|
implementations.
|
|
---------------------------------------------------------------------------*/
|
|
|
|
/* Get UTC (GMT) time measured in seconds since 0:00 on 1/1/70.*/
|
|
U_CAPI UDate U_EXPORT2
|
|
uprv_getUTCtime()
|
|
{
|
|
#ifdef XP_MAC
|
|
time_t t, t1, t2;
|
|
struct tm tmrec;
|
|
|
|
uprv_memset( &tmrec, 0, sizeof(tmrec) );
|
|
tmrec.tm_year = 70;
|
|
tmrec.tm_mon = 0;
|
|
tmrec.tm_mday = 1;
|
|
t1 = mktime(&tmrec); /* seconds of 1/1/1970*/
|
|
|
|
time(&t);
|
|
uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
|
|
t2 = mktime(&tmrec); /* seconds of current GMT*/
|
|
return (UDate)(t2 - t1) * U_MILLIS_PER_SECOND; /* GMT (or UTC) in seconds since 1970*/
|
|
#else
|
|
time_t epochtime;
|
|
time(&epochtime);
|
|
return (UDate)epochtime * U_MILLIS_PER_SECOND;
|
|
#endif
|
|
}
|
|
|
|
/*-----------------------------------------------------------------------------
|
|
IEEE 754
|
|
These methods detect and return NaN and infinity values for doubles
|
|
conforming to IEEE 754. Platforms which support this standard include X86,
|
|
Mac 680x0, Mac PowerPC, AIX RS/6000, and most others.
|
|
If this doesn't work on your platform, you have non-IEEE floating-point, and
|
|
will need to code your own versions. A naive implementation is to return 0.0
|
|
for getNaN and getInfinity, and false for isNaN and isInfinite.
|
|
---------------------------------------------------------------------------*/
|
|
|
|
U_CAPI UBool U_EXPORT2
|
|
uprv_isNaN(double number)
|
|
{
|
|
#if IEEE_754
|
|
#if USE_64BIT_DOUBLE_OPTIMIZATION
|
|
/* gcc 3.2 has an optimization bug */
|
|
/* Infinity is 0x7FF0000000000000U. Anything greater than that is a NaN */
|
|
return (UBool)(((*((int64_t *)&number)) & U_INT64_MAX) > gInf64);
|
|
|
|
#else
|
|
/* This should work in theory, but it doesn't, so we resort to the more*/
|
|
/* complicated method below.*/
|
|
/* return number != number;*/
|
|
|
|
/* You can't return number == getNaN() because, by definition, NaN != x for*/
|
|
/* all x, including NaN (that is, NaN != NaN). So instead, we compare*/
|
|
/* against the known bit pattern. We must be careful of endianism here.*/
|
|
/* The pattern we are looking for id:*/
|
|
|
|
/* 7FFy yyyy yyyy yyyy (some y non-zero)*/
|
|
|
|
/* There are two different kinds of NaN, but we ignore the distinction*/
|
|
/* here. Note that the y value must be non-zero; if it is zero, then we*/
|
|
/* have infinity.*/
|
|
|
|
uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
|
|
sizeof(uint32_t));
|
|
uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number,
|
|
sizeof(uint32_t));
|
|
|
|
return (UBool)(((highBits & 0x7FF00000L) == 0x7FF00000L) &&
|
|
(((highBits & 0x000FFFFFL) != 0) || (lowBits != 0)));
|
|
#endif
|
|
|
|
#elif defined(OS390)
|
|
uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
|
|
sizeof(uint32_t));
|
|
uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number,
|
|
sizeof(uint32_t));
|
|
|
|
return ((highBits & 0x7F080000L) == 0x7F080000L) &&
|
|
(lowBits == 0x00000000L);
|
|
|
|
#else
|
|
/* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
|
|
/* you'll need to replace this default implementation with what's correct*/
|
|
/* for your platform.*/
|
|
return number != number;
|
|
#endif
|
|
}
|
|
|
|
U_CAPI UBool U_EXPORT2
|
|
uprv_isInfinite(double number)
|
|
{
|
|
#if IEEE_754
|
|
#if USE_64BIT_DOUBLE_OPTIMIZATION
|
|
/* gcc 3.2 has an optimization bug */
|
|
return (UBool)(((*((int64_t *)&number)) & U_INT64_MAX) == gInf64);
|
|
#else
|
|
|
|
/* We know the top bit is the sign bit, so we mask that off in a copy of */
|
|
/* the number and compare against infinity. [LIU]*/
|
|
/* The following approach doesn't work for some reason, so we go ahead and */
|
|
/* scrutinize the pattern itself. */
|
|
/* double a = number; */
|
|
/* *(int8_t*)u_topNBytesOfDouble(&a, 1) &= 0x7F;*/
|
|
/* return a == uprv_getInfinity();*/
|
|
/* Instead, We want to see either:*/
|
|
|
|
/* 7FF0 0000 0000 0000*/
|
|
/* FFF0 0000 0000 0000*/
|
|
|
|
uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
|
|
sizeof(uint32_t));
|
|
uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number,
|
|
sizeof(uint32_t));
|
|
|
|
return (UBool)(((highBits & ~SIGN) == 0x7FF00000U) &&
|
|
(lowBits == 0x00000000U));
|
|
#endif
|
|
|
|
#elif defined(OS390)
|
|
uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
|
|
sizeof(uint32_t));
|
|
uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number,
|
|
sizeof(uint32_t));
|
|
|
|
return ((highBits & ~SIGN) == 0x70FF0000L) && (lowBits == 0x00000000L);
|
|
|
|
#else
|
|
/* If your platform doesn't support IEEE 754 but *does* have an infinity*/
|
|
/* value, you'll need to replace this default implementation with what's*/
|
|
/* correct for your platform.*/
|
|
return number == (2.0 * number);
|
|
#endif
|
|
}
|
|
|
|
U_CAPI UBool U_EXPORT2
|
|
uprv_isPositiveInfinity(double number)
|
|
{
|
|
#if IEEE_754 || defined(OS390)
|
|
return (UBool)(number > 0 && uprv_isInfinite(number));
|
|
#else
|
|
return uprv_isInfinite(number);
|
|
#endif
|
|
}
|
|
|
|
U_CAPI UBool U_EXPORT2
|
|
uprv_isNegativeInfinity(double number)
|
|
{
|
|
#if IEEE_754 || defined(OS390)
|
|
return (UBool)(number < 0 && uprv_isInfinite(number));
|
|
|
|
#else
|
|
uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
|
|
sizeof(uint32_t));
|
|
return((highBits & SIGN) && uprv_isInfinite(number));
|
|
|
|
#endif
|
|
}
|
|
|
|
U_CAPI double U_EXPORT2
|
|
uprv_getNaN()
|
|
{
|
|
#if IEEE_754 || defined(OS390)
|
|
#if !USE_64BIT_DOUBLE_OPTIMIZATION
|
|
if (!fgNaNInitialized) {
|
|
/* This variable is always initialized with the same value,
|
|
so a mutex isn't needed. */
|
|
int i;
|
|
int8_t* p = (int8_t*)fgNan;
|
|
for(i = 0; i < sizeof(double); ++i)
|
|
*p++ = 0;
|
|
*(int16_t*)u_topNBytesOfDouble(fgNan, sizeof(NAN_TOP)) = NAN_TOP;
|
|
fgNaNInitialized = TRUE;
|
|
}
|
|
#endif
|
|
return *fgNan;
|
|
#else
|
|
/* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
|
|
/* you'll need to replace this default implementation with what's correct*/
|
|
/* for your platform.*/
|
|
return 0.0;
|
|
#endif
|
|
}
|
|
|
|
U_CAPI double U_EXPORT2
|
|
uprv_getInfinity()
|
|
{
|
|
#if IEEE_754 || defined(OS390)
|
|
#if !USE_64BIT_DOUBLE_OPTIMIZATION
|
|
if (!fgInfInitialized)
|
|
{
|
|
/* This variable is always initialized with the same value,
|
|
so a mutex isn't needed. */
|
|
int i;
|
|
int8_t* p = (int8_t*)fgInf;
|
|
for(i = 0; i < sizeof(double); ++i)
|
|
*p++ = 0;
|
|
*(int16_t*)u_topNBytesOfDouble(fgInf, sizeof(INF_TOP)) = INF_TOP;
|
|
fgInfInitialized = TRUE;
|
|
}
|
|
#endif
|
|
return *fgInf;
|
|
#else
|
|
/* If your platform doesn't support IEEE 754 but *does* have an infinity*/
|
|
/* value, you'll need to replace this default implementation with what's*/
|
|
/* correct for your platform.*/
|
|
return 0.0;
|
|
#endif
|
|
}
|
|
|
|
U_CAPI double U_EXPORT2
|
|
uprv_floor(double x)
|
|
{
|
|
return floor(x);
|
|
}
|
|
|
|
U_CAPI double U_EXPORT2
|
|
uprv_ceil(double x)
|
|
{
|
|
return ceil(x);
|
|
}
|
|
|
|
U_CAPI double U_EXPORT2
|
|
uprv_round(double x)
|
|
{
|
|
return uprv_floor(x + 0.5);
|
|
}
|
|
|
|
U_CAPI double U_EXPORT2
|
|
uprv_fabs(double x)
|
|
{
|
|
return fabs(x);
|
|
}
|
|
|
|
U_CAPI double U_EXPORT2
|
|
uprv_modf(double x, double* y)
|
|
{
|
|
return modf(x, y);
|
|
}
|
|
|
|
U_CAPI double U_EXPORT2
|
|
uprv_fmod(double x, double y)
|
|
{
|
|
return fmod(x, y);
|
|
}
|
|
|
|
U_CAPI double U_EXPORT2
|
|
uprv_pow(double x, double y)
|
|
{
|
|
/* This is declared as "double pow(double x, double y)" */
|
|
return pow(x, y);
|
|
}
|
|
|
|
U_CAPI double U_EXPORT2
|
|
uprv_pow10(int32_t x)
|
|
{
|
|
return pow(10.0, (double)x);
|
|
}
|
|
|
|
U_CAPI double U_EXPORT2
|
|
uprv_fmax(double x, double y)
|
|
{
|
|
#if IEEE_754
|
|
int32_t lowBits;
|
|
|
|
/* first handle NaN*/
|
|
if(uprv_isNaN(x) || uprv_isNaN(y))
|
|
return uprv_getNaN();
|
|
|
|
/* check for -0 and 0*/
|
|
lowBits = *(uint32_t*) u_bottomNBytesOfDouble(&x, sizeof(uint32_t));
|
|
if(x == 0.0 && y == 0.0 && (lowBits & SIGN))
|
|
return y;
|
|
|
|
#endif
|
|
|
|
/* this should work for all flt point w/o NaN and Infpecial cases */
|
|
return (x > y ? x : y);
|
|
}
|
|
|
|
U_CAPI int32_t U_EXPORT2
|
|
uprv_max(int32_t x, int32_t y)
|
|
{
|
|
return (x > y ? x : y);
|
|
}
|
|
|
|
U_CAPI double U_EXPORT2
|
|
uprv_fmin(double x, double y)
|
|
{
|
|
#if IEEE_754
|
|
int32_t lowBits;
|
|
|
|
/* first handle NaN*/
|
|
if(uprv_isNaN(x) || uprv_isNaN(y))
|
|
return uprv_getNaN();
|
|
|
|
/* check for -0 and 0*/
|
|
lowBits = *(uint32_t*) u_bottomNBytesOfDouble(&y, sizeof(uint32_t));
|
|
if(x == 0.0 && y == 0.0 && (lowBits & SIGN))
|
|
return y;
|
|
|
|
#endif
|
|
|
|
/* this should work for all flt point w/o NaN and Inf special cases */
|
|
return (x > y ? y : x);
|
|
}
|
|
|
|
U_CAPI int32_t U_EXPORT2
|
|
uprv_min(int32_t x, int32_t y)
|
|
{
|
|
return (x > y ? y : x);
|
|
}
|
|
|
|
/**
|
|
* Truncates the given double.
|
|
* trunc(3.3) = 3.0, trunc (-3.3) = -3.0
|
|
* This is different than calling floor() or ceil():
|
|
* floor(3.3) = 3, floor(-3.3) = -4
|
|
* ceil(3.3) = 4, ceil(-3.3) = -3
|
|
*/
|
|
U_CAPI double U_EXPORT2
|
|
uprv_trunc(double d)
|
|
{
|
|
#if IEEE_754
|
|
int32_t lowBits;
|
|
|
|
/* handle error cases*/
|
|
if(uprv_isNaN(d))
|
|
return uprv_getNaN();
|
|
if(uprv_isInfinite(d))
|
|
return uprv_getInfinity();
|
|
|
|
lowBits = *(uint32_t*) u_bottomNBytesOfDouble(&d, sizeof(uint32_t));
|
|
if( (d == 0.0 && (lowBits & SIGN)) || d < 0)
|
|
return ceil(d);
|
|
else
|
|
return floor(d);
|
|
|
|
#else
|
|
return d >= 0 ? floor(d) : ceil(d);
|
|
|
|
#endif
|
|
}
|
|
|
|
/**
|
|
* Return the largest positive number that can be represented by an integer
|
|
* type of arbitrary bit length.
|
|
*/
|
|
U_CAPI double U_EXPORT2
|
|
uprv_maxMantissa(void)
|
|
{
|
|
return pow(2.0, DBL_MANT_DIG + 1.0) - 1.0;
|
|
}
|
|
|
|
/**
|
|
* Return the floor of the log base 10 of a given double.
|
|
* This method compensates for inaccuracies which arise naturally when
|
|
* computing logs, and always give the correct value. The parameter
|
|
* must be positive and finite.
|
|
* (Thanks to Alan Liu for supplying this function.)
|
|
*/
|
|
U_CAPI int16_t U_EXPORT2
|
|
uprv_log10(double d)
|
|
{
|
|
#ifdef OS400
|
|
/* We don't use the normal implementation because you can't underflow */
|
|
/* a double otherwise an underflow exception occurs */
|
|
return log10(d);
|
|
#else
|
|
/* The reason this routine is needed is that simply taking the*/
|
|
/* log and dividing by log10 yields a result which may be off*/
|
|
/* by 1 due to rounding errors. For example, the naive log10*/
|
|
/* of 1.0e300 taken this way is 299, rather than 300.*/
|
|
double alog10 = log(d) / log(10.0);
|
|
int16_t ailog10 = (int16_t) floor(alog10);
|
|
|
|
/* Positive logs could be too small, e.g. 0.99 instead of 1.0*/
|
|
if (alog10 > 0 && d >= pow(10.0, (double)(ailog10 + 1)))
|
|
++ailog10;
|
|
|
|
/* Negative logs could be too big, e.g. -0.99 instead of -1.0*/
|
|
else if (alog10 < 0 && d < pow(10.0, (double)(ailog10)))
|
|
--ailog10;
|
|
|
|
return ailog10;
|
|
#endif
|
|
}
|
|
|
|
U_CAPI double U_EXPORT2
|
|
uprv_log(double d)
|
|
{
|
|
return log(d);
|
|
}
|
|
|
|
#if 0
|
|
/* This isn't used. If it's readded, readd putiltst.c tests */
|
|
U_CAPI int32_t U_EXPORT2
|
|
uprv_digitsAfterDecimal(double x)
|
|
{
|
|
char buffer[20];
|
|
int32_t numDigits, bytesWritten;
|
|
char *p = buffer;
|
|
int32_t ptPos, exponent;
|
|
|
|
/* cheat and use the string-format routine to get a string representation*/
|
|
/* (it handles mathematical inaccuracy better than we can), then find out */
|
|
/* many characters are to the right of the decimal point */
|
|
bytesWritten = sprintf(buffer, "%+.9g", x);
|
|
while (isdigit(*(++p))) {
|
|
}
|
|
|
|
ptPos = (int32_t)(p - buffer);
|
|
numDigits = (int32_t)(bytesWritten - ptPos - 1);
|
|
|
|
/* if the number's string representation is in scientific notation, find */
|
|
/* the exponent and take it into account*/
|
|
exponent = 0;
|
|
p = uprv_strchr(buffer, 'e');
|
|
if (p != 0) {
|
|
int16_t expPos = (int16_t)(p - buffer);
|
|
numDigits -= bytesWritten - expPos;
|
|
exponent = (int32_t)(atol(p + 1));
|
|
}
|
|
|
|
/* the string representation may still have spurious decimal digits in it, */
|
|
/* so we cut off at the ninth digit to the right of the decimal, and have */
|
|
/* to search backward from there to the first non-zero digit*/
|
|
if (numDigits > 9) {
|
|
numDigits = 9;
|
|
while (numDigits > 0 && buffer[ptPos + numDigits] == '0')
|
|
--numDigits;
|
|
}
|
|
numDigits -= exponent;
|
|
if (numDigits < 0) {
|
|
return 0;
|
|
}
|
|
return numDigits;
|
|
}
|
|
#endif
|
|
|
|
/*---------------------------------------------------------------------------
|
|
Platform-specific Implementations
|
|
Try these, and if they don't work on your platform, then special case your
|
|
platform with new implementations.
|
|
---------------------------------------------------------------------------*/
|
|
|
|
/* Win32 time zone detection ------------------------------------------------ */
|
|
|
|
#ifdef WIN32
|
|
|
|
/*
|
|
This code attempts to detect the Windows time zone, as set in the
|
|
Windows Date and Time control panel. It attempts to work on
|
|
multiple flavors of Windows (9x, Me, NT, 2000, XP) and on localized
|
|
installs. It works by directly interrogating the registry and
|
|
comparing the data there with the data returned by the
|
|
GetTimeZoneInformation API, along with some other strategies. The
|
|
registry contains time zone data under one of two keys (depending on
|
|
the flavor of Windows):
|
|
|
|
HKLM\SOFTWARE\Microsoft\Windows\CurrentVersion\Time Zones\
|
|
HKLM\SOFTWARE\Microsoft\Windows NT\CurrentVersion\Time Zones\
|
|
|
|
Under this key are several subkeys, one for each time zone. These
|
|
subkeys are named "Pacific" on Win9x/Me and "Pacific Standard Time"
|
|
on WinNT/2k/XP. There are some other wrinkles; see the code for
|
|
details. The subkey name is NOT LOCALIZED, allowing us to support
|
|
localized installs.
|
|
|
|
Under the subkey are data values. We care about:
|
|
|
|
Std Standard time display name, localized
|
|
TZI Binary block of data
|
|
|
|
The TZI data is of particular interest. It contains the offset, two
|
|
more offsets for standard and daylight time, and the start and end
|
|
rules. This is the same data returned by the GetTimeZoneInformation
|
|
API. The API may modify the data on the way out, so we have to be
|
|
careful, but essentially we do a binary comparison against the TZI
|
|
blocks of various registry keys. When we find a match, we know what
|
|
time zone Windows is set to. Since the registry key is not
|
|
localized, we can then translate the key through a simple table
|
|
lookup into the corresponding ICU time zone.
|
|
|
|
This strategy doesn't always work because there are zones which
|
|
share an offset and rules, so more than one TZI block will match.
|
|
For example, both Tokyo and Seoul are at GMT+9 with no DST rules;
|
|
their TZI blocks are identical. For these cases, we fall back to a
|
|
name lookup. We attempt to match the display name as stored in the
|
|
registry for the current zone to the display name stored in the
|
|
registry for various Windows zones. By comparing the registry data
|
|
directly we avoid conversion complications.
|
|
|
|
Author: Alan Liu
|
|
Since: ICU 2.6
|
|
Based on original code by Carl Brown <cbrown@xnetinc.com>
|
|
*/
|
|
|
|
/**
|
|
* Layout of the binary registry data under the "TZI" key.
|
|
*/
|
|
typedef struct {
|
|
LONG Bias;
|
|
LONG StandardBias;
|
|
LONG DaylightBias; /* Tweaked by GetTimeZoneInformation */
|
|
SYSTEMTIME StandardDate;
|
|
SYSTEMTIME DaylightDate;
|
|
} TZI;
|
|
|
|
typedef struct {
|
|
const char* icuid;
|
|
const char* winid;
|
|
} WindowsICUMap;
|
|
|
|
/**
|
|
* Mapping between Windows zone IDs and ICU zone IDs. This list has
|
|
* been mechanically checked; all zone offsets match (most important)
|
|
* and city names match the display city names (where possible). The
|
|
* presence or absence of DST differs in some cases, but this is
|
|
* acceptable as long as the zone is semantically the same (which has
|
|
* been manually checked).
|
|
*
|
|
* Windows 9x/Me zone IDs are listed as "Pacific" rather than "Pacific
|
|
* Standard Time", which is seen in NT/2k/XP. This is fixed-up at
|
|
* runtime as needed. The one exception is "Mexico Standard Time 2",
|
|
* which is not present on Windows 9x/Me.
|
|
*
|
|
* Zones that are not unique under Offset+Rules should be grouped
|
|
* together for efficiency (see code below). In addition, rules MUST
|
|
* be grouped so that all zones of a single offset are together.
|
|
*
|
|
* Comments list S(tandard) or D(aylight), as declared by Windows,
|
|
* followed by the display name (data from Windows XP).
|
|
*
|
|
* NOTE: Etc/GMT+12 is CORRECT for offset GMT-12:00. Consult
|
|
* documentation elsewhere for an explanation.
|
|
*/
|
|
static const WindowsICUMap ZONE_MAP[] = {
|
|
"Etc/GMT+12", "Dateline", /* S (GMT-12:00) International Date Line West */
|
|
|
|
"Pacific/Apia", "Samoa", /* S (GMT-11:00) Midway Island, Samoa */
|
|
|
|
"Pacific/Honolulu", "Hawaiian", /* S (GMT-10:00) Hawaii */
|
|
|
|
"America/Anchorage", "Alaskan", /* D (GMT-09:00) Alaska */
|
|
|
|
"America/Los_Angeles", "Pacific", /* D (GMT-08:00) Pacific Time (US & Canada); Tijuana */
|
|
|
|
"America/Phoenix", "US Mountain", /* S (GMT-07:00) Arizona */
|
|
"America/Denver", "Mountain", /* D (GMT-07:00) Mountain Time (US & Canada) */
|
|
"America/Chihuahua", "Mexico Standard Time 2", /* D (GMT-07:00) Chihuahua, La Paz, Mazatlan */
|
|
|
|
"America/Managua", "Central America", /* S (GMT-06:00) Central America */
|
|
"America/Regina", "Canada Central", /* S (GMT-06:00) Saskatchewan */
|
|
"America/Mexico_City", "Mexico", /* D (GMT-06:00) Guadalajara, Mexico City, Monterrey */
|
|
"America/Chicago", "Central", /* D (GMT-06:00) Central Time (US & Canada) */
|
|
|
|
"America/Indianapolis", "US Eastern", /* S (GMT-05:00) Indiana (East) */
|
|
"America/Bogota", "SA Pacific", /* S (GMT-05:00) Bogota, Lima, Quito */
|
|
"America/New_York", "Eastern", /* D (GMT-05:00) Eastern Time (US & Canada) */
|
|
|
|
"America/Caracas", "SA Western", /* S (GMT-04:00) Caracas, La Paz */
|
|
"America/Santiago", "Pacific SA", /* D (GMT-04:00) Santiago */
|
|
"America/Halifax", "Atlantic", /* D (GMT-04:00) Atlantic Time (Canada) */
|
|
|
|
"America/St_Johns", "Newfoundland", /* D (GMT-03:30) Newfoundland */
|
|
|
|
"America/Buenos_Aires", "SA Eastern", /* S (GMT-03:00) Buenos Aires, Georgetown */
|
|
"America/Godthab", "Greenland", /* D (GMT-03:00) Greenland */
|
|
"America/Sao_Paulo", "E. South America", /* D (GMT-03:00) Brasilia */
|
|
|
|
"America/Noronha", "Mid-Atlantic", /* D (GMT-02:00) Mid-Atlantic */
|
|
|
|
"Atlantic/Cape_Verde", "Cape Verde", /* S (GMT-01:00) Cape Verde Is. */
|
|
"Atlantic/Azores", "Azores", /* D (GMT-01:00) Azores */
|
|
|
|
"Africa/Casablanca", "Greenwich", /* S (GMT) Casablanca, Monrovia */
|
|
"Europe/London", "GMT", /* D (GMT) Greenwich Mean Time : Dublin, Edinburgh, Lisbon, London */
|
|
|
|
"Africa/Lagos", "W. Central Africa", /* S (GMT+01:00) West Central Africa */
|
|
"Europe/Berlin", "W. Europe", /* D (GMT+01:00) Amsterdam, Berlin, Bern, Rome, Stockholm, Vienna */
|
|
"Europe/Paris", "Romance", /* D (GMT+01:00) Brussels, Copenhagen, Madrid, Paris */
|
|
"Europe/Sarajevo", "Central European", /* D (GMT+01:00) Sarajevo, Skopje, Warsaw, Zagreb */
|
|
"Europe/Belgrade", "Central Europe", /* D (GMT+01:00) Belgrade, Bratislava, Budapest, Ljubljana, Prague */
|
|
|
|
"Africa/Johannesburg", "South Africa", /* S (GMT+02:00) Harare, Pretoria */
|
|
"Asia/Jerusalem", "Israel", /* S (GMT+02:00) Jerusalem */
|
|
"Europe/Istanbul", "GTB", /* D (GMT+02:00) Athens, Istanbul, Minsk */
|
|
"Europe/Helsinki", "FLE", /* D (GMT+02:00) Helsinki, Kyiv, Riga, Sofia, Tallinn, Vilnius */
|
|
"Africa/Cairo", "Egypt", /* D (GMT+02:00) Cairo */
|
|
"Europe/Bucharest", "E. Europe", /* D (GMT+02:00) Bucharest */
|
|
|
|
"Africa/Nairobi", "E. Africa", /* S (GMT+03:00) Nairobi */
|
|
"Asia/Riyadh", "Arab", /* S (GMT+03:00) Kuwait, Riyadh */
|
|
"Europe/Moscow", "Russian", /* D (GMT+03:00) Moscow, St. Petersburg, Volgograd */
|
|
"Asia/Baghdad", "Arabic", /* D (GMT+03:00) Baghdad */
|
|
|
|
"Asia/Tehran", "Iran", /* D (GMT+03:30) Tehran */
|
|
|
|
"Asia/Muscat", "Arabian", /* S (GMT+04:00) Abu Dhabi, Muscat */
|
|
"Asia/Tbilisi", "Caucasus", /* D (GMT+04:00) Baku, Tbilisi, Yerevan */
|
|
|
|
"Asia/Kabul", "Afghanistan", /* S (GMT+04:30) Kabul */
|
|
|
|
"Asia/Karachi", "West Asia", /* S (GMT+05:00) Islamabad, Karachi, Tashkent */
|
|
"Asia/Yekaterinburg", "Ekaterinburg", /* D (GMT+05:00) Ekaterinburg */
|
|
|
|
"Asia/Calcutta", "India", /* S (GMT+05:30) Chennai, Kolkata, Mumbai, New Delhi */
|
|
|
|
"Asia/Katmandu", "Nepal", /* S (GMT+05:45) Kathmandu */
|
|
|
|
"Asia/Colombo", "Sri Lanka", /* S (GMT+06:00) Sri Jayawardenepura */
|
|
"Asia/Dhaka", "Central Asia", /* S (GMT+06:00) Astana, Dhaka */
|
|
"Asia/Novosibirsk", "N. Central Asia", /* D (GMT+06:00) Almaty, Novosibirsk */
|
|
|
|
"Asia/Rangoon", "Myanmar", /* S (GMT+06:30) Rangoon */
|
|
|
|
"Asia/Bangkok", "SE Asia", /* S (GMT+07:00) Bangkok, Hanoi, Jakarta */
|
|
"Asia/Krasnoyarsk", "North Asia", /* D (GMT+07:00) Krasnoyarsk */
|
|
|
|
"Australia/Perth", "W. Australia", /* S (GMT+08:00) Perth */
|
|
"Asia/Taipei", "Taipei", /* S (GMT+08:00) Taipei */
|
|
"Asia/Singapore", "Singapore", /* S (GMT+08:00) Kuala Lumpur, Singapore */
|
|
"Asia/Hong_Kong", "China", /* S (GMT+08:00) Beijing, Chongqing, Hong Kong, Urumqi */
|
|
"Asia/Irkutsk", "North Asia East", /* D (GMT+08:00) Irkutsk, Ulaan Bataar */
|
|
|
|
"Asia/Tokyo", "Tokyo", /* S (GMT+09:00) Osaka, Sapporo, Tokyo */
|
|
"Asia/Seoul", "Korea", /* S (GMT+09:00) Seoul */
|
|
"Asia/Yakutsk", "Yakutsk", /* D (GMT+09:00) Yakutsk */
|
|
|
|
"Australia/Darwin", "AUS Central", /* S (GMT+09:30) Darwin */
|
|
"Australia/Adelaide", "Cen. Australia", /* D (GMT+09:30) Adelaide */
|
|
|
|
"Pacific/Guam", "West Pacific", /* S (GMT+10:00) Guam, Port Moresby */
|
|
"Australia/Brisbane", "E. Australia", /* S (GMT+10:00) Brisbane */
|
|
"Asia/Vladivostok", "Vladivostok", /* D (GMT+10:00) Vladivostok */
|
|
"Australia/Hobart", "Tasmania", /* D (GMT+10:00) Hobart */
|
|
"Australia/Sydney", "AUS Eastern", /* D (GMT+10:00) Canberra, Melbourne, Sydney */
|
|
|
|
"Asia/Magadan", "Central Pacific", /* S (GMT+11:00) Magadan, Solomon Is., New Caledonia */
|
|
|
|
"Pacific/Fiji", "Fiji", /* S (GMT+12:00) Fiji, Kamchatka, Marshall Is. */
|
|
"Pacific/Auckland", "New Zealand", /* D (GMT+12:00) Auckland, Wellington */
|
|
|
|
"Pacific/Tongatapu", "Tonga", /* S (GMT+13:00) Nuku'alofa */
|
|
NULL, NULL
|
|
};
|
|
|
|
typedef struct {
|
|
const char* winid;
|
|
const char* altwinid;
|
|
} WindowsZoneRemap;
|
|
|
|
/**
|
|
* If a lookup fails, we attempt to remap certain Windows ids to
|
|
* alternate Windows ids. If the alternate listed here begins with
|
|
* '-', we use it as is (without the '-'). If it begins with '+', we
|
|
* append a " Standard Time" if appropriate.
|
|
*/
|
|
static const WindowsZoneRemap ZONE_REMAP[] = {
|
|
"Central European", "-Warsaw",
|
|
"Central Europe", "-Prague Bratislava",
|
|
"China", "-Beijing",
|
|
|
|
"Greenwich", "+GMT",
|
|
"GTB", "+GFT",
|
|
"Arab", "+Saudi Arabia",
|
|
"SE Asia", "+Bangkok",
|
|
"AUS Eastern", "+Sydney",
|
|
NULL, NULL,
|
|
};
|
|
|
|
/**
|
|
* Various registry keys and key fragments.
|
|
*/
|
|
static const char CURRENT_ZONE_REGKEY[] = "SYSTEM\\CurrentControlSet\\Control\\TimeZoneInformation\\";
|
|
static const char STANDARD_NAME_REGKEY[] = "StandardName";
|
|
static const char STANDARD_TIME_REGKEY[] = " Standard Time";
|
|
static const char TZI_REGKEY[] = "TZI";
|
|
static const char STD_REGKEY[] = "Std";
|
|
|
|
/**
|
|
* HKLM subkeys used to probe for the flavor of Windows. Note that we
|
|
* specifically check for the "GMT" zone subkey; this is present on
|
|
* NT, but on XP has become "GMT Standard Time". We need to
|
|
* discriminate between these cases.
|
|
*/
|
|
static const char* const WIN_TYPE_PROBE_REGKEY[] = {
|
|
/* WIN_9X_ME_TYPE */
|
|
"SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\Time Zones",
|
|
|
|
/* WIN_NT_TYPE */
|
|
"SOFTWARE\\Microsoft\\Windows NT\\CurrentVersion\\Time Zones\\GMT"
|
|
|
|
/* otherwise: WIN_2K_XP_TYPE */
|
|
};
|
|
|
|
/**
|
|
* The time zone root subkeys (under HKLM) for different flavors of
|
|
* Windows.
|
|
*/
|
|
static const char* const TZ_REGKEY[] = {
|
|
/* WIN_9X_ME_TYPE */
|
|
"SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\Time Zones\\",
|
|
|
|
/* WIN_NT_TYPE | WIN_2K_XP_TYPE */
|
|
"SOFTWARE\\Microsoft\\Windows NT\\CurrentVersion\\Time Zones\\"
|
|
};
|
|
|
|
/**
|
|
* Flavor of Windows, from our perspective. Not a real OS version,
|
|
* but rather the flavor of the layout of the time zone information in
|
|
* the registry.
|
|
*/
|
|
enum {
|
|
WIN_9X_ME_TYPE = 0,
|
|
WIN_NT_TYPE = 1,
|
|
WIN_2K_XP_TYPE = 2
|
|
};
|
|
|
|
/**
|
|
* Auxiliary Windows time zone function. Attempts to open the given
|
|
* Windows time zone ID as a registry key. Returns ERROR_SUCCESS if
|
|
* successful. Caller must close the registry key. Handles
|
|
* variations in the resource layout in different flavors of Windows.
|
|
*
|
|
* @param hkey output parameter to receive opened registry key
|
|
* @param winid Windows zone ID, e.g., "Pacific", without the
|
|
* " Standard Time" suffix (if any). Special case "Mexico Standard Time 2"
|
|
* allowed.
|
|
* @param winType Windows flavor (WIN_9X_ME_TYPE, etc.)
|
|
* @return ERROR_SUCCESS upon success
|
|
*/
|
|
static LONG openTZRegKey(HKEY *hkey, const char* winid, int winType) {
|
|
LONG result;
|
|
char subKeyName[96];
|
|
char* name;
|
|
int i;
|
|
|
|
uprv_strcpy(subKeyName, TZ_REGKEY[(winType == WIN_9X_ME_TYPE) ? 0 : 1]);
|
|
name = &subKeyName[strlen(subKeyName)];
|
|
uprv_strcat(subKeyName, winid);
|
|
if (winType != WIN_9X_ME_TYPE) {
|
|
/* Don't modify "Mexico Standard Time 2", which does not occur
|
|
on WIN_9X_ME_TYPE. Also, if the type is WIN_NT_TYPE, then
|
|
in practice this means the GMT key is not followed by
|
|
" Standard Time", so don't append in that case. */
|
|
int isMexico2 = (winid[uprv_strlen(winid)- 1] == '2');
|
|
if (!isMexico2 &&
|
|
!(winType == WIN_NT_TYPE && uprv_strcmp(winid, "GMT") == 0)) {
|
|
uprv_strcat(subKeyName, STANDARD_TIME_REGKEY);
|
|
}
|
|
}
|
|
result = RegOpenKeyEx(HKEY_LOCAL_MACHINE,
|
|
subKeyName,
|
|
0,
|
|
KEY_QUERY_VALUE,
|
|
hkey);
|
|
|
|
if (result != ERROR_SUCCESS) {
|
|
/* If the primary lookup fails, try to remap the Windows zone
|
|
ID, according to the remapping table. */
|
|
for (i=0; ZONE_REMAP[i].winid; ++i) {
|
|
if (uprv_strcmp(winid, ZONE_REMAP[i].winid) == 0) {
|
|
uprv_strcpy(name, ZONE_REMAP[i].altwinid + 1);
|
|
if (*(ZONE_REMAP[i].altwinid) == '+' &&
|
|
winType != WIN_9X_ME_TYPE) {
|
|
uprv_strcat(subKeyName, STANDARD_TIME_REGKEY);
|
|
}
|
|
result = RegOpenKeyEx(HKEY_LOCAL_MACHINE,
|
|
subKeyName,
|
|
0,
|
|
KEY_QUERY_VALUE,
|
|
hkey);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* Main Windows time zone detection function. Returns the Windows
|
|
* time zone, translated to an ICU time zone, or NULL upon failure.
|
|
*/
|
|
static const char* detectWindowsTimeZone() {
|
|
int winType;
|
|
LONG result;
|
|
HKEY hkey;
|
|
TZI tziKey;
|
|
TZI tziReg;
|
|
DWORD cbData = sizeof(TZI);
|
|
TIME_ZONE_INFORMATION apiTZI;
|
|
char stdName[32];
|
|
DWORD stdNameSize;
|
|
char stdRegName[64];
|
|
DWORD stdRegNameSize;
|
|
int firstMatch, lastMatch;
|
|
int j;
|
|
|
|
/* Detect the version of windows by trying to open a sequence of
|
|
probe keys. We don't use the OS version API because what we
|
|
really want to know is how the registry is laid out.
|
|
Specifically, is it 9x/Me or not, and is it "GMT" or "GMT
|
|
Standard Time". */
|
|
for (winType=0; winType<2; ++winType) {
|
|
result = RegOpenKeyEx(HKEY_LOCAL_MACHINE,
|
|
WIN_TYPE_PROBE_REGKEY[winType],
|
|
0,
|
|
KEY_QUERY_VALUE,
|
|
&hkey);
|
|
RegCloseKey(hkey);
|
|
if (result == ERROR_SUCCESS) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
/* Obtain TIME_ZONE_INFORMATION from the API, and then convert it
|
|
to TZI. We could also interrogate the registry directly; we do
|
|
this below if needed. */
|
|
uprv_memset(&apiTZI, 0, sizeof(apiTZI));
|
|
GetTimeZoneInformation(&apiTZI);
|
|
tziKey.Bias = apiTZI.Bias;
|
|
uprv_memcpy((char *)&tziKey.StandardDate, (char*)&apiTZI.StandardDate,
|
|
sizeof(apiTZI.StandardDate));
|
|
uprv_memcpy((char *)&tziKey.DaylightDate, (char*)&apiTZI.DaylightDate,
|
|
sizeof(apiTZI.DaylightDate));
|
|
|
|
/* For each zone that can be identified by Offset+Rules, see if we
|
|
have a match. Continue scanning after finding a match,
|
|
recording the index of the first and the last match. We have
|
|
to do this because some zones are not unique under
|
|
Offset+Rules. */
|
|
firstMatch = lastMatch = -1;
|
|
for (j=0; ZONE_MAP[j].icuid; j++) {
|
|
result = openTZRegKey(&hkey, ZONE_MAP[j].winid, winType);
|
|
if (result == ERROR_SUCCESS) {
|
|
result = RegQueryValueEx(hkey,
|
|
TZI_REGKEY,
|
|
NULL,
|
|
NULL,
|
|
(LPBYTE)&tziReg,
|
|
&cbData);
|
|
}
|
|
RegCloseKey(hkey);
|
|
if (result == ERROR_SUCCESS) {
|
|
/* Assume that offsets are grouped together, and bail out
|
|
when we've scanned everything with a matching
|
|
offset. */
|
|
if (firstMatch >= 0 && tziKey.Bias != tziReg.Bias) {
|
|
break;
|
|
}
|
|
/* Windows alters the DaylightBias in some situations.
|
|
Using the bias and the rules suffices, so overwrite
|
|
these unreliable fields. */
|
|
tziKey.StandardBias = tziReg.StandardBias;
|
|
tziKey.DaylightBias = tziReg.DaylightBias;
|
|
if (uprv_memcmp((char *)&tziKey, (char*)&tziReg,
|
|
sizeof(tziKey)) == 0) {
|
|
if (firstMatch < 0) {
|
|
firstMatch = j;
|
|
}
|
|
lastMatch = j;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* This should never happen; if it does it means our table doesn't
|
|
match Windows AT ALL, perhaps because this is post-XP? */
|
|
if (firstMatch < 0) {
|
|
return NULL;
|
|
}
|
|
|
|
if (firstMatch != lastMatch) {
|
|
/* Offset+Rules lookup yielded >= 2 matches. Try to match the
|
|
localized display name. Get the name from the registry
|
|
(not the API). This avoids conversion issues. Use the
|
|
standard name, since Windows modifies the daylight name to
|
|
match the standard name if there is no DST. */
|
|
result = RegOpenKeyEx(HKEY_LOCAL_MACHINE,
|
|
CURRENT_ZONE_REGKEY,
|
|
0,
|
|
KEY_QUERY_VALUE,
|
|
&hkey);
|
|
if (result == ERROR_SUCCESS) {
|
|
stdNameSize = sizeof(stdName);
|
|
result = RegQueryValueEx(hkey,
|
|
(LPTSTR)STANDARD_NAME_REGKEY,
|
|
NULL,
|
|
NULL,
|
|
(LPBYTE)stdName,
|
|
&stdNameSize);
|
|
RegCloseKey(hkey);
|
|
|
|
/* Scan through the Windows time zone data in the registry
|
|
again (just the range of zones with matching TZIs) and
|
|
look for a standard display name match. */
|
|
for (j=firstMatch; j<=lastMatch; j++) {
|
|
result = openTZRegKey(&hkey, ZONE_MAP[j].winid, winType);
|
|
if (result == ERROR_SUCCESS) {
|
|
stdRegNameSize = sizeof(stdRegName);
|
|
result = RegQueryValueEx(hkey,
|
|
(LPTSTR)STD_REGKEY,
|
|
NULL,
|
|
NULL,
|
|
(LPBYTE)stdRegName,
|
|
&stdRegNameSize);
|
|
}
|
|
RegCloseKey(hkey);
|
|
if (result == ERROR_SUCCESS &&
|
|
stdRegNameSize == stdNameSize &&
|
|
uprv_memcmp(stdName, stdRegName, stdNameSize) == 0) {
|
|
firstMatch = j; /* record the match */
|
|
break;
|
|
}
|
|
}
|
|
} else {
|
|
RegCloseKey(hkey); /* should never get here */
|
|
}
|
|
}
|
|
|
|
return ZONE_MAP[firstMatch].icuid;
|
|
}
|
|
|
|
#endif /*WIN32*/
|
|
|
|
/* Generic time zone layer -------------------------------------------------- */
|
|
|
|
/* Time zone utilities */
|
|
U_CAPI void U_EXPORT2
|
|
uprv_tzset()
|
|
{
|
|
#ifdef U_TZSET
|
|
U_TZSET();
|
|
#else
|
|
/* no initialization*/
|
|
#endif
|
|
}
|
|
|
|
U_CAPI int32_t U_EXPORT2
|
|
uprv_timezone()
|
|
{
|
|
#ifdef U_TIMEZONE
|
|
return U_TIMEZONE;
|
|
#else
|
|
time_t t, t1, t2;
|
|
struct tm tmrec;
|
|
UBool dst_checked;
|
|
int32_t tdiff = 0;
|
|
|
|
time(&t);
|
|
uprv_memcpy( &tmrec, localtime(&t), sizeof(tmrec) );
|
|
dst_checked = (tmrec.tm_isdst != 0); /* daylight savings time is checked*/
|
|
t1 = mktime(&tmrec); /* local time in seconds*/
|
|
uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
|
|
t2 = mktime(&tmrec); /* GMT (or UTC) in seconds*/
|
|
tdiff = t2 - t1;
|
|
/* imitate NT behaviour, which returns same timezone offset to GMT for
|
|
winter and summer*/
|
|
if (dst_checked)
|
|
tdiff += 3600;
|
|
return tdiff;
|
|
#endif
|
|
}
|
|
|
|
/* Note that U_TZNAME does *not* have to be tzname, but if it is,
|
|
some platforms need to have it declared here. */
|
|
|
|
#if defined(U_TZNAME) && (defined(U_IRIX) || defined(U_DARWIN) || defined(U_CYGWIN))
|
|
/* RS6000 and others reject char **tzname. */
|
|
extern U_IMPORT char *U_TZNAME[];
|
|
#endif
|
|
|
|
#if defined(U_DARWIN) /* For Mac OS X */
|
|
#define TZZONELINK "/etc/localtime"
|
|
#define TZZONEINFO "/usr/share/zoneinfo/"
|
|
static char *gTimeZoneBuffer = NULL; /* Heap allocated */
|
|
#endif
|
|
|
|
U_CAPI const char* U_EXPORT2
|
|
uprv_tzname(int n)
|
|
{
|
|
#ifdef WIN32
|
|
char* id = (char*) detectWindowsTimeZone();
|
|
if (id != NULL) {
|
|
return id;
|
|
}
|
|
#endif
|
|
|
|
#if defined(U_DARWIN)
|
|
int ret;
|
|
|
|
char *tzenv;
|
|
|
|
tzenv = getenv("TZFILE");
|
|
if (tzenv != NULL) {
|
|
return tzenv;
|
|
}
|
|
|
|
#if 0
|
|
/* TZ is often set to "PST8PDT" or similar, so we cannot use it. Alan */
|
|
tzenv = getenv("TZ");
|
|
if (tzenv != NULL) {
|
|
return tzenv;
|
|
}
|
|
#endif
|
|
|
|
/* Caller must handle threading issues */
|
|
if (gTimeZoneBuffer == NULL) {
|
|
gTimeZoneBuffer = (char *) uprv_malloc(MAXPATHLEN + 2);
|
|
|
|
ret = readlink(TZZONELINK, gTimeZoneBuffer, MAXPATHLEN + 2);
|
|
if (0 < ret) {
|
|
gTimeZoneBuffer[ret] = '\0';
|
|
if (uprv_strncmp(gTimeZoneBuffer, TZZONEINFO, sizeof(TZZONEINFO) - 1) == 0) {
|
|
return (gTimeZoneBuffer += sizeof(TZZONEINFO) - 1);
|
|
}
|
|
}
|
|
|
|
uprv_free(gTimeZoneBuffer);
|
|
gTimeZoneBuffer = NULL;
|
|
}
|
|
#endif
|
|
|
|
#ifdef U_TZNAME
|
|
return U_TZNAME[n];
|
|
#else
|
|
return "";
|
|
#endif
|
|
}
|
|
|
|
/* Get and set the ICU data directory --------------------------------------- */
|
|
|
|
static char *gDataDirectory = NULL;
|
|
#if U_POSIX_LOCALE
|
|
static char *gCorrectedPOSIXLocale = NULL; /* Heap allocated */
|
|
#endif
|
|
|
|
UBool putil_cleanup(void)
|
|
{
|
|
if (gDataDirectory) {
|
|
uprv_free(gDataDirectory);
|
|
gDataDirectory = NULL;
|
|
}
|
|
#if U_POSIX_LOCALE
|
|
if (gCorrectedPOSIXLocale) {
|
|
uprv_free(gCorrectedPOSIXLocale);
|
|
gCorrectedPOSIXLocale = NULL;
|
|
}
|
|
#endif
|
|
return TRUE;
|
|
}
|
|
|
|
/*
|
|
* Set the data directory.
|
|
* Make a copy of the passed string, and set the global data dir to point to it.
|
|
* TODO: see bug #2849, regarding thread safety.
|
|
*/
|
|
U_CAPI void U_EXPORT2
|
|
u_setDataDirectory(const char *directory) {
|
|
char *newDataDir;
|
|
#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
|
|
char *p;
|
|
#endif
|
|
int32_t length;
|
|
|
|
if(directory==NULL) {
|
|
directory = "";
|
|
}
|
|
length=(int32_t)uprv_strlen(directory);
|
|
newDataDir = (char *)uprv_malloc(length + 2);
|
|
uprv_strcpy(newDataDir, directory);
|
|
|
|
#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
|
|
while(p = uprv_strchr(newDataDir, U_FILE_ALT_SEP_CHAR)) {
|
|
*p = U_FILE_SEP_CHAR;
|
|
}
|
|
#endif
|
|
|
|
umtx_lock(NULL);
|
|
if (gDataDirectory) {
|
|
uprv_free(gDataDirectory);
|
|
}
|
|
gDataDirectory = newDataDir;
|
|
umtx_unlock(NULL);
|
|
}
|
|
|
|
U_CAPI UBool U_EXPORT2
|
|
uprv_pathIsAbsolute(const char *path)
|
|
{
|
|
if(!path || !*path) {
|
|
return FALSE;
|
|
}
|
|
|
|
if(*path == U_FILE_SEP_CHAR) {
|
|
return TRUE;
|
|
}
|
|
|
|
#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
|
|
if(*path == U_FILE_ALT_SEP_CHAR) {
|
|
return TRUE;
|
|
}
|
|
#endif
|
|
|
|
#if defined(WIN32)
|
|
if( (((path[0] >= 'A') && (path[0] <= 'Z')) ||
|
|
((path[0] >= 'a') && (path[0] <= 'z'))) &&
|
|
path[1] == ':' ) {
|
|
return TRUE;
|
|
}
|
|
#endif
|
|
|
|
return FALSE;
|
|
}
|
|
|
|
U_CAPI const char * U_EXPORT2
|
|
u_getDataDirectory(void) {
|
|
const char *path = NULL;
|
|
char pathBuffer[1024];
|
|
const char *dataDir;
|
|
|
|
/* if we have the directory, then return it immediately */
|
|
umtx_lock(NULL);
|
|
dataDir = gDataDirectory;
|
|
umtx_unlock(NULL);
|
|
|
|
if(dataDir) {
|
|
return dataDir;
|
|
}
|
|
|
|
/* we need to look for it */
|
|
pathBuffer[0] = 0; /* Shuts up compiler warnings about unreferenced */
|
|
/* variables when the code using it is ifdefed out */
|
|
# if !defined(XP_MAC)
|
|
/* first try to get the environment variable */
|
|
path=getenv("ICU_DATA");
|
|
# else /* XP_MAC */
|
|
{
|
|
OSErr myErr;
|
|
short vRef;
|
|
long dir,newDir;
|
|
int16_t volNum;
|
|
Str255 xpath;
|
|
FSSpec spec;
|
|
short len;
|
|
Handle full;
|
|
|
|
xpath[0]=0;
|
|
|
|
myErr = HGetVol(xpath, &volNum, &dir);
|
|
|
|
if(myErr == noErr) {
|
|
myErr = FindFolder(volNum, kApplicationSupportFolderType, TRUE, &vRef, &dir);
|
|
newDir=-1;
|
|
if (myErr == noErr) {
|
|
myErr = DirCreate(volNum,
|
|
dir,
|
|
"\pICU",
|
|
&newDir);
|
|
if( (myErr == noErr) || (myErr == dupFNErr) ) {
|
|
spec.vRefNum = volNum;
|
|
spec.parID = dir;
|
|
uprv_memcpy(spec.name, "\pICU", 4);
|
|
|
|
myErr = FSpGetFullPath(&spec, &len, &full);
|
|
if(full != NULL)
|
|
{
|
|
HLock(full);
|
|
uprv_memcpy(pathBuffer, ((char*)(*full)), len);
|
|
pathBuffer[len] = 0;
|
|
path = pathBuffer;
|
|
DisposeHandle(full);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
# endif
|
|
|
|
|
|
# if defined WIN32 && defined ICU_ENABLE_DEPRECATED_WIN_REGISTRY
|
|
/* next, try to read the path from the registry */
|
|
if(path==NULL || *path==0) {
|
|
HKEY key;
|
|
|
|
if(ERROR_SUCCESS==RegOpenKeyEx(HKEY_LOCAL_MACHINE, "SOFTWARE\\ICU\\Unicode\\Data", 0, KEY_QUERY_VALUE, &key)) {
|
|
DWORD type=REG_EXPAND_SZ, size=sizeof(pathBuffer);
|
|
|
|
if(ERROR_SUCCESS==RegQueryValueEx(key, "Path", NULL, &type, (unsigned char *)pathBuffer, &size) && size>1) {
|
|
if(type==REG_EXPAND_SZ) {
|
|
/* replace environment variable references by their values */
|
|
char temporaryPath[1024];
|
|
|
|
/* copy the path with variables to the temporary one */
|
|
uprv_memcpy(temporaryPath, pathBuffer, size);
|
|
|
|
/* do the replacement and store it in the pathBuffer */
|
|
size=ExpandEnvironmentStrings(temporaryPath, pathBuffer, sizeof(pathBuffer));
|
|
if(size>0 && size<sizeof(pathBuffer)) {
|
|
path=pathBuffer;
|
|
}
|
|
} else if(type==REG_SZ) {
|
|
path=pathBuffer;
|
|
}
|
|
}
|
|
RegCloseKey(key);
|
|
}
|
|
}
|
|
# endif
|
|
|
|
/* ICU_DATA_DIR may be set as a compile option */
|
|
# ifdef ICU_DATA_DIR
|
|
if(path==NULL || *path==0) {
|
|
path=ICU_DATA_DIR;
|
|
}
|
|
# endif
|
|
|
|
if(path==NULL) {
|
|
/* It looks really bad, set it to something. */
|
|
path = "";
|
|
}
|
|
|
|
u_setDataDirectory(path);
|
|
return gDataDirectory;
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* Macintosh-specific locale information ------------------------------------ */
|
|
#ifdef XP_MAC
|
|
|
|
typedef struct {
|
|
int32_t script;
|
|
int32_t region;
|
|
int32_t lang;
|
|
int32_t date_region;
|
|
const char* posixID;
|
|
} mac_lc_rec;
|
|
|
|
/* Todo: This will be updated with a newer version from www.unicode.org web
|
|
page when it's available.*/
|
|
#define MAC_LC_MAGIC_NUMBER -5
|
|
#define MAC_LC_INIT_NUMBER -9
|
|
|
|
static const mac_lc_rec mac_lc_recs[] = {
|
|
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 0, "en_US",
|
|
/* United States*/
|
|
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 1, "fr_FR",
|
|
/* France*/
|
|
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 2, "en_GB",
|
|
/* Great Britain*/
|
|
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 3, "de_DE",
|
|
/* Germany*/
|
|
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 4, "it_IT",
|
|
/* Italy*/
|
|
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 5, "nl_NL",
|
|
/* Metherlands*/
|
|
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 6, "fr_BE",
|
|
/* French for Belgium or Lxembourg*/
|
|
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 7, "sv_SE",
|
|
/* Sweden*/
|
|
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 9, "da_DK",
|
|
/* Denmark*/
|
|
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 10, "pt_PT",
|
|
/* Portugal*/
|
|
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 11, "fr_CA",
|
|
/* French Canada*/
|
|
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 13, "is_IS",
|
|
/* Israel*/
|
|
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 14, "ja_JP",
|
|
/* Japan*/
|
|
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 15, "en_AU",
|
|
/* Australia*/
|
|
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 16, "ar_AE",
|
|
/* the Arabic world (?)*/
|
|
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 17, "fi_FI",
|
|
/* Finland*/
|
|
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 18, "fr_CH",
|
|
/* French for Switzerland*/
|
|
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 19, "de_CH",
|
|
/* German for Switzerland*/
|
|
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 20, "el_GR",
|
|
/* Greece*/
|
|
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 21, "is_IS",
|
|
/* Iceland ===*/
|
|
/*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 22, "",*/
|
|
/* Malta ===*/
|
|
/*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 23, "",*/
|
|
/* Cyprus ===*/
|
|
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 24, "tr_TR",
|
|
/* Turkey ===*/
|
|
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 25, "sh_YU",
|
|
/* Croatian system for Yugoslavia*/
|
|
/*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 33, "",*/
|
|
/* Hindi system for India*/
|
|
/*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 34, "",*/
|
|
/* Pakistan*/
|
|
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 41, "lt_LT",
|
|
/* Lithuania*/
|
|
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 42, "pl_PL",
|
|
/* Poland*/
|
|
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 43, "hu_HU",
|
|
/* Hungary*/
|
|
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 44, "et_EE",
|
|
/* Estonia*/
|
|
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 45, "lv_LV",
|
|
/* Latvia*/
|
|
/*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 46, "",*/
|
|
/* Lapland [Ask Rich for the data. HS]*/
|
|
/*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 47, "",*/
|
|
/* Faeroe Islands*/
|
|
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 48, "fa_IR",
|
|
/* Iran*/
|
|
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 49, "ru_RU",
|
|
/* Russia*/
|
|
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 50, "en_IE",
|
|
/* Ireland*/
|
|
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 51, "ko_KR",
|
|
/* Korea*/
|
|
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 52, "zh_CN",
|
|
/* People's Republic of China*/
|
|
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 53, "zh_TW",
|
|
/* Taiwan*/
|
|
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 54, "th_TH",
|
|
/* Thailand*/
|
|
|
|
/* fallback is en_US*/
|
|
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER,
|
|
MAC_LC_MAGIC_NUMBER, "en_US"
|
|
};
|
|
|
|
#endif
|
|
|
|
#if U_POSIX_LOCALE
|
|
/* Return just the POSIX id, whatever happens to be in it */
|
|
static const char *uprv_getPOSIXID(void)
|
|
{
|
|
static const char* posixID = NULL;
|
|
if (posixID == 0) {
|
|
posixID = getenv("LC_ALL");
|
|
if (posixID == 0) {
|
|
posixID = getenv("LANG");
|
|
if (posixID == 0) {
|
|
/*
|
|
* On Solaris two different calls to setlocale can result in
|
|
* different values. Only get this value once.
|
|
*/
|
|
posixID = setlocale(LC_ALL, NULL);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (posixID==0)
|
|
{
|
|
/* Nothing worked. Give it a nice value. */
|
|
posixID = "en_US";
|
|
}
|
|
else if ((uprv_strcmp("C", posixID) == 0)
|
|
|| (uprv_strchr(posixID, ' ') != NULL)
|
|
|| (uprv_strchr(posixID, '/') != NULL))
|
|
{ /* HPUX returns 'C C C C C C C' */
|
|
/* Solaris can return /en_US/C/C/C/C/C on the second try. */
|
|
/* Maybe we got some garbage. Give it a nice value. */
|
|
posixID = "en_US_POSIX";
|
|
}
|
|
return posixID;
|
|
}
|
|
#endif
|
|
|
|
/* NOTE: The caller should handle thread safety */
|
|
U_CAPI const char* U_EXPORT2
|
|
uprv_getDefaultLocaleID()
|
|
{
|
|
#if U_POSIX_LOCALE
|
|
/*
|
|
Note that: (a '!' means the ID is improper somehow)
|
|
LC_ALL ----> default_loc codepage
|
|
--------------------------------------------------------
|
|
ab.CD ab CD
|
|
ab@CD ab__CD -
|
|
ab@CD.EF ab__CD EF
|
|
|
|
ab_CD.EF@GH ab_CD_GH EF
|
|
|
|
Some 'improper' ways to do the same as above:
|
|
! ab_CD@GH.EF ab_CD_GH EF
|
|
! ab_CD.EF@GH.IJ ab_CD_GH EF
|
|
! ab_CD@ZZ.EF@GH.IJ ab_CD_GH EF
|
|
|
|
_CD@GH _CD_GH -
|
|
_CD.EF@GH _CD_GH EF
|
|
|
|
The variant cannot have dots in it.
|
|
The 'rightmost' variant (@xxx) wins.
|
|
The leftmost codepage (.xxx) wins.
|
|
*/
|
|
char *correctedPOSIXLocale = 0;
|
|
const char* posixID = uprv_getPOSIXID();
|
|
const char *p;
|
|
const char *q;
|
|
int32_t len;
|
|
|
|
/* Format: (no spaces)
|
|
ll [ _CC ] [ . MM ] [ @ VV]
|
|
|
|
l = lang, C = ctry, M = charmap, V = variant
|
|
*/
|
|
|
|
if (gCorrectedPOSIXLocale != NULL) {
|
|
return gCorrectedPOSIXLocale;
|
|
}
|
|
|
|
if ((p = uprv_strchr(posixID, '.')) != NULL) {
|
|
/* assume new locale can't be larger than old one? */
|
|
correctedPOSIXLocale = uprv_malloc(uprv_strlen(posixID));
|
|
uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID);
|
|
correctedPOSIXLocale[p-posixID] = 0;
|
|
|
|
/* do not copy after the @ */
|
|
if ((p = uprv_strchr(correctedPOSIXLocale, '@')) != NULL) {
|
|
correctedPOSIXLocale[p-correctedPOSIXLocale] = 0;
|
|
}
|
|
}
|
|
|
|
/* Note that we scan the *uncorrected* ID. */
|
|
if ((p = uprv_strrchr(posixID, '@')) != NULL) {
|
|
if (correctedPOSIXLocale == NULL) {
|
|
correctedPOSIXLocale = uprv_malloc(uprv_strlen(posixID));
|
|
uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID);
|
|
correctedPOSIXLocale[p-posixID] = 0;
|
|
}
|
|
p++;
|
|
|
|
/* Take care of any special cases here.. */
|
|
if (!uprv_strcmp(p, "nynorsk")) {
|
|
p = "NY";
|
|
|
|
/* Should we assume no_NO_NY instead of possible no__NY?
|
|
* if (!uprv_strcmp(correctedPOSIXLocale, "no")) {
|
|
* uprv_strcpy(correctedPOSIXLocale, "no_NO");
|
|
* }
|
|
*/
|
|
}
|
|
|
|
if (uprv_strchr(correctedPOSIXLocale,'_') == NULL) {
|
|
uprv_strcat(correctedPOSIXLocale, "__"); /* aa@b -> aa__b */
|
|
}
|
|
else {
|
|
uprv_strcat(correctedPOSIXLocale, "_"); /* aa_CC@b -> aa_CC_b */
|
|
}
|
|
|
|
if ((q = uprv_strchr(p, '.')) != NULL) {
|
|
/* How big will the resulting string be? */
|
|
len = (int32_t)(uprv_strlen(correctedPOSIXLocale) + (q-p));
|
|
uprv_strncat(correctedPOSIXLocale, p, q-p);
|
|
correctedPOSIXLocale[len] = 0;
|
|
}
|
|
else {
|
|
/* Anything following the @ sign */
|
|
uprv_strcat(correctedPOSIXLocale, p);
|
|
}
|
|
|
|
/* Should there be a map from 'no@nynorsk' -> no_NO_NY here?
|
|
* How about 'russian' -> 'ru'?
|
|
*/
|
|
}
|
|
|
|
/* Was a correction made? */
|
|
if (correctedPOSIXLocale != NULL) {
|
|
posixID = correctedPOSIXLocale;
|
|
}
|
|
else {
|
|
/* copy it, just in case the original pointer goes away. See j2395 */
|
|
correctedPOSIXLocale = (char *)uprv_malloc(uprv_strlen(posixID) + 1);
|
|
posixID = uprv_strcpy(correctedPOSIXLocale, posixID);
|
|
}
|
|
|
|
if (gCorrectedPOSIXLocale == NULL) {
|
|
gCorrectedPOSIXLocale = correctedPOSIXLocale;
|
|
correctedPOSIXLocale = NULL;
|
|
}
|
|
|
|
if (correctedPOSIXLocale != NULL) { /* Was already set - clean up. */
|
|
uprv_free(correctedPOSIXLocale);
|
|
}
|
|
|
|
return posixID;
|
|
|
|
#elif defined(WIN32)
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
LCID id = GetThreadLocale();
|
|
const char* locID = uprv_convertToPosix(id, &status);
|
|
|
|
if (U_FAILURE(status)) {
|
|
locID = "en_US";
|
|
}
|
|
return locID;
|
|
|
|
#elif defined(XP_MAC)
|
|
int32_t script = MAC_LC_INIT_NUMBER;
|
|
/* = IntlScript(); or GetScriptManagerVariable(smSysScript);*/
|
|
int32_t region = MAC_LC_INIT_NUMBER;
|
|
/* = GetScriptManagerVariable(smRegionCode);*/
|
|
int32_t lang = MAC_LC_INIT_NUMBER;
|
|
/* = GetScriptManagerVariable(smScriptLang);*/
|
|
int32_t date_region = MAC_LC_INIT_NUMBER;
|
|
const char* posixID = 0;
|
|
int32_t count = sizeof(mac_lc_recs) / sizeof(mac_lc_rec);
|
|
int32_t i;
|
|
Intl1Hndl ih;
|
|
|
|
ih = (Intl1Hndl) GetIntlResource(1);
|
|
if (ih)
|
|
date_region = ((uint16_t)(*ih)->intl1Vers) >> 8;
|
|
|
|
for (i = 0; i < count; i++) {
|
|
if ( ((mac_lc_recs[i].script == MAC_LC_MAGIC_NUMBER)
|
|
|| (mac_lc_recs[i].script == script))
|
|
&& ((mac_lc_recs[i].region == MAC_LC_MAGIC_NUMBER)
|
|
|| (mac_lc_recs[i].region == region))
|
|
&& ((mac_lc_recs[i].lang == MAC_LC_MAGIC_NUMBER)
|
|
|| (mac_lc_recs[i].lang == lang))
|
|
&& ((mac_lc_recs[i].date_region == MAC_LC_MAGIC_NUMBER)
|
|
|| (mac_lc_recs[i].date_region == date_region))
|
|
)
|
|
{
|
|
posixID = mac_lc_recs[i].posixID;
|
|
break;
|
|
}
|
|
}
|
|
|
|
return posixID;
|
|
|
|
#elif defined(OS2)
|
|
char * locID;
|
|
|
|
locID = getenv("LC_ALL");
|
|
if (!locID || !*locID)
|
|
locID = getenv("LANG");
|
|
if (!locID || !*locID) {
|
|
locID = "en_US";
|
|
}
|
|
if (!stricmp(locID, "c") || !stricmp(locID, "posix") ||
|
|
!stricmp(locID, "univ"))
|
|
locID = "en_US_POSIX";
|
|
return locID;
|
|
|
|
#elif defined(OS400)
|
|
/* locales are process scoped and are by definition thread safe */
|
|
static char correctedLocale[64];
|
|
const char *localeID = getenv("LC_ALL");
|
|
char *p;
|
|
|
|
if (localeID == NULL)
|
|
localeID = getenv("LANG");
|
|
if (localeID == NULL)
|
|
localeID = setlocale(LC_ALL, NULL);
|
|
/* Make sure we have something... */
|
|
if (localeID == NULL)
|
|
return "en_US_POSIX";
|
|
|
|
/* Extract the locale name from the path. */
|
|
if((p = uprv_strrchr(localeID, '/')) != NULL)
|
|
{
|
|
/* Increment p to start of locale name. */
|
|
p++;
|
|
localeID = p;
|
|
}
|
|
|
|
/* Copy to work location. */
|
|
uprv_strcpy(correctedLocale, localeID);
|
|
|
|
/* Strip off the '.locale' extension. */
|
|
if((p = uprv_strchr(correctedLocale, '.')) != NULL) {
|
|
*p = 0;
|
|
}
|
|
|
|
/* Upper case the locale name. */
|
|
T_CString_toUpperCase(correctedLocale);
|
|
|
|
/* See if we are using the POSIX locale. Any of the
|
|
* following are equivalent and use the same QLGPGCMA
|
|
* (POSIX) locale.
|
|
*/
|
|
if ((uprv_strcmp("C", correctedLocale) == 0) ||
|
|
(uprv_strcmp("POSIX", correctedLocale) == 0) ||
|
|
(uprv_strcmp("QLGPGCMA", correctedLocale) == 0))
|
|
{
|
|
uprv_strcpy(correctedLocale, "en_US_POSIX");
|
|
}
|
|
else
|
|
{
|
|
int16_t LocaleLen;
|
|
|
|
/* Lower case the lang portion. */
|
|
for(p = correctedLocale; *p != 0 && *p != '_'; p++)
|
|
{
|
|
*p = uprv_tolower(*p);
|
|
}
|
|
|
|
/* Adjust for Euro. After '_E' add 'URO'. */
|
|
LocaleLen = uprv_strlen(correctedLocale);
|
|
if (correctedLocale[LocaleLen - 2] == '_' &&
|
|
correctedLocale[LocaleLen - 1] == 'E')
|
|
{
|
|
uprv_strcat(correctedLocale, "URO");
|
|
}
|
|
|
|
/* If using Lotus-based locale then convert to
|
|
* equivalent non Lotus.
|
|
*/
|
|
else if (correctedLocale[LocaleLen - 2] == '_' &&
|
|
correctedLocale[LocaleLen - 1] == 'L')
|
|
{
|
|
correctedLocale[LocaleLen - 2] = 0;
|
|
}
|
|
|
|
/* There are separate simplified and traditional
|
|
* locales called zh_HK_S and zh_HK_T.
|
|
*/
|
|
else if (uprv_strncmp(correctedLocale, "zh_HK", 5) == 0)
|
|
{
|
|
uprv_strcpy(correctedLocale, "zh_HK");
|
|
}
|
|
|
|
/* A special zh_CN_GBK locale...
|
|
*/
|
|
else if (uprv_strcmp(correctedLocale, "zh_CN_GBK") == 0)
|
|
{
|
|
uprv_strcpy(correctedLocale, "zh_CN");
|
|
}
|
|
|
|
}
|
|
|
|
return correctedLocale;
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
static const char*
|
|
int_getDefaultCodepage()
|
|
{
|
|
#if defined(OS400)
|
|
uint32_t ccsid = 37; /* Default to ibm-37 */
|
|
static char codepage[64];
|
|
Qwc_JOBI0400_t jobinfo;
|
|
Qus_EC_t error = { sizeof(Qus_EC_t) }; /* SPI error code */
|
|
|
|
EPT_CALL(QUSRJOBI)(&jobinfo, sizeof(jobinfo), "JOBI0400",
|
|
"* ", " ", &error);
|
|
|
|
if (error.Bytes_Available == 0) {
|
|
if (jobinfo.Coded_Char_Set_ID != 0xFFFF) {
|
|
ccsid = (uint32_t)jobinfo.Coded_Char_Set_ID;
|
|
}
|
|
else if (jobinfo.Default_Coded_Char_Set_Id != 0xFFFF) {
|
|
ccsid = (uint32_t)jobinfo.Default_Coded_Char_Set_Id;
|
|
}
|
|
/* else use the default */
|
|
}
|
|
sprintf(codepage,"ibm-%d", ccsid);
|
|
return codepage;
|
|
|
|
#elif defined(OS390)
|
|
static char codepage[64];
|
|
sprintf(codepage,"%s" UCNV_SWAP_LFNL_OPTION_STRING, nl_langinfo(CODESET));
|
|
return codepage;
|
|
|
|
#elif defined(XP_MAC)
|
|
return "ibm-1275"; /* TODO: Macintosh Roman. There must be a better way. fixme! */
|
|
|
|
#elif defined(WIN32)
|
|
static char codepage[64];
|
|
sprintf(codepage, "windows-%d", GetACP());
|
|
return codepage;
|
|
|
|
#elif U_POSIX_LOCALE
|
|
static char codesetName[100];
|
|
char *name = NULL;
|
|
char *euro = NULL;
|
|
const char *localeName = NULL;
|
|
const char *defaultTable = NULL;
|
|
|
|
uprv_memset(codesetName, 0, sizeof(codesetName));
|
|
localeName = uprv_getPOSIXID();
|
|
if (localeName != NULL && (name = (uprv_strchr(localeName, (int)'.'))) != NULL)
|
|
{
|
|
/* strip the locale name and look at the suffix only */
|
|
name = uprv_strncpy(codesetName, name+1, sizeof(codesetName));
|
|
codesetName[sizeof(codesetName)-1] = 0;
|
|
if ((euro = (uprv_strchr(name, (int)'@'))) != NULL)
|
|
{
|
|
*euro = 0;
|
|
}
|
|
/* if we can find the codset name, return that. */
|
|
if (*name)
|
|
{
|
|
return name;
|
|
}
|
|
}
|
|
|
|
/* otherwise, try CTYPE */
|
|
if (*codesetName)
|
|
{
|
|
uprv_memset(codesetName, 0, sizeof(codesetName));
|
|
}
|
|
localeName = setlocale(LC_CTYPE, NULL);
|
|
if (localeName != NULL && (name = (uprv_strchr(localeName, (int)'.'))) != NULL)
|
|
{
|
|
/* strip the locale name and look at the suffix only */
|
|
name = uprv_strncpy(codesetName, name+1, sizeof(codesetName));
|
|
codesetName[sizeof(codesetName)-1] = 0;
|
|
if ((euro = (uprv_strchr(name, (int)'@'))) != NULL)
|
|
{
|
|
*euro = 0;
|
|
}
|
|
/* if we can find the codset name from setlocale, return that. */
|
|
if (*name)
|
|
{
|
|
return name;
|
|
}
|
|
}
|
|
|
|
if (*codesetName)
|
|
{
|
|
uprv_memset(codesetName, 0, sizeof(codesetName));
|
|
}
|
|
#if U_HAVE_NL_LANGINFO_CODESET
|
|
{
|
|
const char *codeset = nl_langinfo(U_NL_LANGINFO_CODESET);
|
|
if (codeset != NULL) {
|
|
uprv_strncpy(codesetName, codeset, sizeof(codesetName));
|
|
codesetName[sizeof(codesetName)-1] = 0;
|
|
}
|
|
}
|
|
#endif
|
|
if (*codesetName == 0)
|
|
{
|
|
/* if the table lookup failed, return US ASCII (ISO 646). */
|
|
uprv_strcpy(codesetName, "US-ASCII");
|
|
}
|
|
return codesetName;
|
|
#else
|
|
return "US-ASCII";
|
|
#endif
|
|
}
|
|
|
|
|
|
U_CAPI const char* U_EXPORT2
|
|
uprv_getDefaultCodepage()
|
|
{
|
|
static char const *name = NULL;
|
|
umtx_lock(NULL);
|
|
if (name == NULL) {
|
|
name = int_getDefaultCodepage();
|
|
}
|
|
umtx_unlock(NULL);
|
|
return name;
|
|
}
|
|
|
|
|
|
|
|
/* invariant-character handling --------------------------------------------- */
|
|
|
|
/*
|
|
* These maps for ASCII to/from EBCDIC map invariant characters (see utypes.h)
|
|
* appropriately for most EBCDIC codepages.
|
|
*
|
|
* They currently also map most other ASCII graphic characters,
|
|
* appropriately for codepages 37 and 1047.
|
|
* Exceptions: The characters for []^ have different codes in 37 & 1047.
|
|
* Both versions are mapped to ASCII.
|
|
*
|
|
* ASCII 37 1047
|
|
* [ 5B BA AD
|
|
* ] 5D BB BD
|
|
* ^ 5E B0 5F
|
|
*
|
|
* There are no mappings for variant characters from Unicode to EBCDIC.
|
|
*
|
|
* Currently, C0 control codes are also included in these maps.
|
|
* Exceptions: S/390 Open Edition swaps LF and NEL codes compared with other
|
|
* EBCDIC platforms; both codes (15 and 25) are mapped to ASCII LF (0A),
|
|
* but there is no mapping for ASCII LF back to EBCDIC.
|
|
*
|
|
* ASCII EBCDIC S/390-OE
|
|
* LF 0A 25 15
|
|
* NEL 85 15 25
|
|
*
|
|
* The maps below explicitly exclude the variant
|
|
* control and graphical characters that are in ASCII-based
|
|
* codepages at 0x80 and above.
|
|
* "No mapping" is expressed by mapping to a 00 byte.
|
|
*
|
|
* These tables do not establish a converter or a codepage.
|
|
*/
|
|
|
|
static const uint8_t asciiFromEbcdic[256]={
|
|
0x00, 0x01, 0x02, 0x03, 0x00, 0x09, 0x00, 0x7f, 0x00, 0x00, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
|
|
0x10, 0x11, 0x12, 0x13, 0x00, 0x0a, 0x08, 0x00, 0x18, 0x19, 0x00, 0x00, 0x1c, 0x1d, 0x1e, 0x1f,
|
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x17, 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x06, 0x07,
|
|
0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x14, 0x15, 0x00, 0x1a,
|
|
|
|
0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2e, 0x3c, 0x28, 0x2b, 0x7c,
|
|
0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x5e,
|
|
0x2d, 0x2f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2c, 0x25, 0x5f, 0x3e, 0x3f,
|
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22,
|
|
|
|
0x00, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
0x00, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
0x00, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x5b, 0x00, 0x00,
|
|
0x5e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5b, 0x5d, 0x00, 0x5d, 0x00, 0x00,
|
|
|
|
0x7b, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
0x7d, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
0x5c, 0x00, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
|
|
};
|
|
|
|
static const uint8_t ebcdicFromAscii[256]={
|
|
0x00, 0x01, 0x02, 0x03, 0x37, 0x2d, 0x2e, 0x2f, 0x16, 0x05, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
|
|
0x10, 0x11, 0x12, 0x13, 0x3c, 0x3d, 0x32, 0x26, 0x18, 0x19, 0x3f, 0x27, 0x1c, 0x1d, 0x1e, 0x1f,
|
|
0x40, 0x00, 0x7f, 0x00, 0x00, 0x6c, 0x50, 0x7d, 0x4d, 0x5d, 0x5c, 0x4e, 0x6b, 0x60, 0x4b, 0x61,
|
|
0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0x7a, 0x5e, 0x4c, 0x7e, 0x6e, 0x6f,
|
|
|
|
0x00, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6,
|
|
0xd7, 0xd8, 0xd9, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0x00, 0x00, 0x00, 0x00, 0x6d,
|
|
0x00, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
|
|
0x97, 0x98, 0x99, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0x00, 0x00, 0x00, 0x00, 0x07,
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
|
};
|
|
|
|
/*
|
|
* Bit sets indicating which characters of the ASCII repertoire
|
|
* (by ASCII/Unicode code) are "invariant".
|
|
* See utypes.h for more details.
|
|
*
|
|
* As invariant are considered the characters of the ASCII repertoire except
|
|
* for the following:
|
|
* 21 '!' <exclamation mark>
|
|
* 23 '#' <number sign>
|
|
* 24 '$' <dollar sign>
|
|
*
|
|
* 40 '@' <commercial at>
|
|
*
|
|
* 5b '[' <left bracket>
|
|
* 5c '\' <backslash>
|
|
* 5d ']' <right bracket>
|
|
* 5e '^' <circumflex>
|
|
*
|
|
* 60 '`' <grave accent>
|
|
*
|
|
* 7b '{' <left brace>
|
|
* 7c '|' <vertical line>
|
|
* 7d '}' <right brace>
|
|
* 7e '~' <tilde>
|
|
*/
|
|
static const uint32_t invariantChars[4]={
|
|
0xfffffbff, /* 00..1f but not 0a */
|
|
0xffffffe5, /* 20..3f but not 21 23 24 */
|
|
0x87fffffe, /* 40..5f but not 40 5b..5e */
|
|
0x87fffffe /* 60..7f but not 60 7b..7e */
|
|
};
|
|
|
|
/*
|
|
* test unsigned types (or values known to be non-negative) for invariant characters,
|
|
* tests ASCII-family character values
|
|
*/
|
|
#define UCHAR_IS_INVARIANT(c) (((c)<=0x7f) && (invariantChars[(c)>>5]&((uint32_t)1<<((c)&0x1f)))!=0)
|
|
|
|
/* test signed types for invariant characters, adds test for positive values */
|
|
#define SCHAR_IS_INVARIANT(c) ((0<=(c)) && UCHAR_IS_INVARIANT(c))
|
|
|
|
U_CAPI void U_EXPORT2
|
|
u_charsToUChars(const char *cs, UChar *us, int32_t length) {
|
|
UChar u;
|
|
uint8_t c;
|
|
UBool onlyInvariantChars;
|
|
|
|
/*
|
|
* Allow the entire ASCII repertoire to be mapped _to_ Unicode.
|
|
* For EBCDIC systems, this works for characters with codes from
|
|
* codepages 37 and 1047 or compatible.
|
|
*/
|
|
onlyInvariantChars=TRUE;
|
|
while(length>0) {
|
|
c=(uint8_t)(*cs++);
|
|
#if U_CHARSET_FAMILY==U_ASCII_FAMILY
|
|
u=(UChar)c;
|
|
#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
|
|
u=(UChar)asciiFromEbcdic[c];
|
|
#else
|
|
# error U_CHARSET_FAMILY is not valid
|
|
#endif
|
|
if(u==0 && c!=0) {
|
|
onlyInvariantChars=FALSE;
|
|
}
|
|
*us++=u;
|
|
--length;
|
|
}
|
|
U_ASSERT(onlyInvariantChars); /* only invariant chars? */
|
|
}
|
|
|
|
U_CAPI void U_EXPORT2
|
|
u_UCharsToChars(const UChar *us, char *cs, int32_t length) {
|
|
UChar u;
|
|
UBool onlyInvariantChars;
|
|
|
|
onlyInvariantChars=TRUE;
|
|
while(length>0) {
|
|
u=*us++;
|
|
if(!UCHAR_IS_INVARIANT(u)) {
|
|
onlyInvariantChars=FALSE;
|
|
u=0;
|
|
}
|
|
#if U_CHARSET_FAMILY==U_ASCII_FAMILY
|
|
*cs++=(char)u;
|
|
#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
|
|
*cs++=(char)ebcdicFromAscii[u];
|
|
#else
|
|
# error U_CHARSET_FAMILY is not valid
|
|
#endif
|
|
--length;
|
|
}
|
|
U_ASSERT(onlyInvariantChars); /* only invariant chars? */
|
|
}
|
|
|
|
U_CAPI UBool U_EXPORT2
|
|
uprv_isInvariantString(const char *s, int32_t length) {
|
|
uint8_t c;
|
|
|
|
for(;;) {
|
|
if(length<0) {
|
|
/* NUL-terminated */
|
|
c=(uint8_t)*s++;
|
|
if(c==0) {
|
|
break;
|
|
}
|
|
} else {
|
|
/* count length */
|
|
if(length==0) {
|
|
break;
|
|
}
|
|
--length;
|
|
c=(uint8_t)*s++;
|
|
if(c==0) {
|
|
continue; /* NUL is invariant */
|
|
}
|
|
}
|
|
/* c!=0 now, one branch below checks c==0 for variant characters */
|
|
|
|
/*
|
|
* no assertions here because these functions are legitimately called
|
|
* for strings with variant characters
|
|
*/
|
|
#if U_CHARSET_FAMILY==U_ASCII_FAMILY
|
|
if(!UCHAR_IS_INVARIANT(c)) {
|
|
return FALSE; /* found a variant char */
|
|
}
|
|
#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
|
|
c=asciiFromEbcdic[c];
|
|
if(c==0 || !UCHAR_IS_INVARIANT(c)) {
|
|
return FALSE; /* found a variant char */
|
|
}
|
|
#else
|
|
# error U_CHARSET_FAMILY is not valid
|
|
#endif
|
|
}
|
|
return TRUE;
|
|
}
|
|
|
|
U_CAPI UBool U_EXPORT2
|
|
uprv_isInvariantUString(const UChar *s, int32_t length) {
|
|
UChar c;
|
|
|
|
for(;;) {
|
|
if(length<0) {
|
|
/* NUL-terminated */
|
|
c=*s++;
|
|
if(c==0) {
|
|
break;
|
|
}
|
|
} else {
|
|
/* count length */
|
|
if(length==0) {
|
|
break;
|
|
}
|
|
--length;
|
|
c=*s++;
|
|
}
|
|
|
|
/*
|
|
* no assertions here because these functions are legitimately called
|
|
* for strings with variant characters
|
|
*/
|
|
if(!UCHAR_IS_INVARIANT(c)) {
|
|
return FALSE; /* found a variant char */
|
|
}
|
|
}
|
|
return TRUE;
|
|
}
|
|
|
|
/* UDataSwapFn implementations used in udataswp.c ------- */
|
|
|
|
/* convert ASCII to EBCDIC and verify that all characters are invariant */
|
|
U_CFUNC int32_t
|
|
uprv_ebcdicFromAscii(const UDataSwapper *ds,
|
|
const void *inData, int32_t length, void *outData,
|
|
UErrorCode *pErrorCode) {
|
|
const uint8_t *s;
|
|
uint8_t *t;
|
|
uint8_t c;
|
|
|
|
int32_t count;
|
|
|
|
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
|
return 0;
|
|
}
|
|
if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) {
|
|
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
|
return 0;
|
|
}
|
|
|
|
/* setup and swapping */
|
|
s=(const uint8_t *)inData;
|
|
t=(uint8_t *)outData;
|
|
count=length;
|
|
while(count>0) {
|
|
c=*s++;
|
|
if(!UCHAR_IS_INVARIANT(c)) {
|
|
udata_printError(ds, "uprv_ebcdicFromAscii() string[%d] contains a variant character in position %d\n",
|
|
length, length-count);
|
|
*pErrorCode=U_INVALID_CHAR_FOUND;
|
|
return 0;
|
|
}
|
|
*t++=ebcdicFromAscii[c];
|
|
--count;
|
|
}
|
|
|
|
return length;
|
|
}
|
|
|
|
/* this function only checks and copies ASCII strings without conversion */
|
|
U_CFUNC int32_t
|
|
uprv_copyAscii(const UDataSwapper *ds,
|
|
const void *inData, int32_t length, void *outData,
|
|
UErrorCode *pErrorCode) {
|
|
const uint8_t *s;
|
|
uint8_t c;
|
|
|
|
int32_t count;
|
|
|
|
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
|
return 0;
|
|
}
|
|
if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) {
|
|
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
|
return 0;
|
|
}
|
|
|
|
/* setup and checking */
|
|
s=(const uint8_t *)inData;
|
|
count=length;
|
|
while(count>0) {
|
|
c=*s++;
|
|
if(!UCHAR_IS_INVARIANT(c)) {
|
|
udata_printError(ds, "uprv_copyFromAscii() string[%d] contains a variant character in position %d\n",
|
|
length, length-count);
|
|
*pErrorCode=U_INVALID_CHAR_FOUND;
|
|
return 0;
|
|
}
|
|
--count;
|
|
}
|
|
|
|
if(length>0 && inData!=outData) {
|
|
uprv_memcpy(outData, inData, length);
|
|
}
|
|
|
|
return length;
|
|
}
|
|
|
|
/* convert EBCDIC to ASCII and verify that all characters are invariant */
|
|
U_CFUNC int32_t
|
|
uprv_asciiFromEbcdic(const UDataSwapper *ds,
|
|
const void *inData, int32_t length, void *outData,
|
|
UErrorCode *pErrorCode) {
|
|
const uint8_t *s;
|
|
uint8_t *t;
|
|
uint8_t c;
|
|
|
|
int32_t count;
|
|
|
|
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
|
return 0;
|
|
}
|
|
if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) {
|
|
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
|
return 0;
|
|
}
|
|
|
|
/* setup and swapping */
|
|
s=(const uint8_t *)inData;
|
|
t=(uint8_t *)outData;
|
|
count=length;
|
|
while(count>0) {
|
|
c=*s++;
|
|
if(c!=0 && ((c=asciiFromEbcdic[c])==0 || !UCHAR_IS_INVARIANT(c))) {
|
|
udata_printError(ds, "uprv_asciiFromEbcdic() string[%d] contains a variant character in position %d\n",
|
|
length, length-count);
|
|
*pErrorCode=U_INVALID_CHAR_FOUND;
|
|
return 0;
|
|
}
|
|
*t++=c;
|
|
--count;
|
|
}
|
|
|
|
return length;
|
|
}
|
|
|
|
/* this function only checks and copies EBCDIC strings without conversion */
|
|
U_CFUNC int32_t
|
|
uprv_copyEbcdic(const UDataSwapper *ds,
|
|
const void *inData, int32_t length, void *outData,
|
|
UErrorCode *pErrorCode) {
|
|
const uint8_t *s;
|
|
uint8_t c;
|
|
|
|
int32_t count;
|
|
|
|
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
|
return 0;
|
|
}
|
|
if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) {
|
|
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
|
return 0;
|
|
}
|
|
|
|
/* setup and checking */
|
|
s=(const uint8_t *)inData;
|
|
count=length;
|
|
while(count>0) {
|
|
c=*s++;
|
|
if(c!=0 && ((c=asciiFromEbcdic[c])==0 || !UCHAR_IS_INVARIANT(c))) {
|
|
udata_printError(ds, "uprv_copyEbcdic() string[%] contains a variant character in position %d\n",
|
|
length, length-count);
|
|
*pErrorCode=U_INVALID_CHAR_FOUND;
|
|
return 0;
|
|
}
|
|
--count;
|
|
}
|
|
|
|
if(length>0 && inData!=outData) {
|
|
uprv_memcpy(outData, inData, length);
|
|
}
|
|
|
|
return length;
|
|
}
|
|
|
|
/* compare invariant strings; variant characters compare less than others and unlike each other */
|
|
U_CFUNC int32_t
|
|
uprv_compareInvAscii(const UDataSwapper *ds,
|
|
const char *outString, int32_t outLength,
|
|
const UChar *localString, int32_t localLength) {
|
|
int32_t minLength;
|
|
UChar32 c1, c2;
|
|
uint8_t c;
|
|
|
|
if(outString==NULL || outLength<-1 || localString==NULL || localLength<-1) {
|
|
return 0;
|
|
}
|
|
|
|
if(outLength<0) {
|
|
outLength=(int32_t)uprv_strlen(outString);
|
|
}
|
|
if(localLength<0) {
|
|
localLength=u_strlen(localString);
|
|
}
|
|
|
|
minLength= outLength<localLength ? outLength : localLength;
|
|
|
|
while(minLength>0) {
|
|
c=(uint8_t)*outString++;
|
|
if(UCHAR_IS_INVARIANT(c)) {
|
|
c1=c;
|
|
} else {
|
|
c1=-1;
|
|
}
|
|
|
|
c2=*localString++;
|
|
if(!UCHAR_IS_INVARIANT(c2)) {
|
|
c1=-2;
|
|
}
|
|
|
|
if((c1-=c2)!=0) {
|
|
return c1;
|
|
}
|
|
|
|
--minLength;
|
|
}
|
|
|
|
/* strings start with same prefix, compare lengths */
|
|
return outLength-localLength;
|
|
}
|
|
|
|
U_CFUNC int32_t
|
|
uprv_compareInvEbcdic(const UDataSwapper *ds,
|
|
const char *outString, int32_t outLength,
|
|
const UChar *localString, int32_t localLength) {
|
|
int32_t minLength;
|
|
UChar32 c1, c2;
|
|
uint8_t c;
|
|
|
|
if(outString==NULL || outLength<-1 || localString==NULL || localLength<-1) {
|
|
return 0;
|
|
}
|
|
|
|
if(outLength<0) {
|
|
outLength=(int32_t)uprv_strlen(outString);
|
|
}
|
|
if(localLength<0) {
|
|
localLength=u_strlen(localString);
|
|
}
|
|
|
|
minLength= outLength<localLength ? outLength : localLength;
|
|
|
|
while(minLength>0) {
|
|
c=(uint8_t)*outString++;
|
|
if(c==0) {
|
|
c1=0;
|
|
} else if((c1=asciiFromEbcdic[c])!=0 && UCHAR_IS_INVARIANT(c1)) {
|
|
/* c1 is set */
|
|
} else {
|
|
c1=-1;
|
|
}
|
|
|
|
c2=*localString++;
|
|
if(!UCHAR_IS_INVARIANT(c2)) {
|
|
c1=-2;
|
|
}
|
|
|
|
if((c1-=c2)!=0) {
|
|
return c1;
|
|
}
|
|
|
|
--minLength;
|
|
}
|
|
|
|
/* strings start with same prefix, compare lengths */
|
|
return outLength-localLength;
|
|
}
|
|
|
|
/* end of platform-specific implementation -------------- */
|
|
|
|
/* version handling --------------------------------------------------------- */
|
|
|
|
U_CAPI void U_EXPORT2
|
|
u_versionFromString(UVersionInfo versionArray, const char *versionString) {
|
|
char *end;
|
|
uint16_t part=0;
|
|
|
|
if(versionArray==NULL) {
|
|
return;
|
|
}
|
|
|
|
if(versionString!=NULL) {
|
|
for(;;) {
|
|
versionArray[part]=(uint8_t)uprv_strtoul(versionString, &end, 10);
|
|
if(end==versionString || ++part==U_MAX_VERSION_LENGTH || *end!=U_VERSION_DELIMITER) {
|
|
break;
|
|
}
|
|
versionString=end+1;
|
|
}
|
|
}
|
|
|
|
while(part<U_MAX_VERSION_LENGTH) {
|
|
versionArray[part++]=0;
|
|
}
|
|
}
|
|
|
|
U_CAPI void U_EXPORT2
|
|
u_versionToString(UVersionInfo versionArray, char *versionString) {
|
|
uint16_t count, part;
|
|
uint8_t field;
|
|
|
|
if(versionString==NULL) {
|
|
return;
|
|
}
|
|
|
|
if(versionArray==NULL) {
|
|
versionString[0]=0;
|
|
return;
|
|
}
|
|
|
|
/* count how many fields need to be written */
|
|
for(count=4; count>0 && versionArray[count-1]==0; --count) {
|
|
}
|
|
|
|
if(count <= 1) {
|
|
count = 2;
|
|
}
|
|
|
|
/* write the first part */
|
|
/* write the decimal field value */
|
|
field=versionArray[0];
|
|
if(field>=100) {
|
|
*versionString++=(char)('0'+field/100);
|
|
field%=100;
|
|
}
|
|
if(field>=10) {
|
|
*versionString++=(char)('0'+field/10);
|
|
field%=10;
|
|
}
|
|
*versionString++=(char)('0'+field);
|
|
|
|
/* write the following parts */
|
|
for(part=1; part<count; ++part) {
|
|
/* write a dot first */
|
|
*versionString++=U_VERSION_DELIMITER;
|
|
|
|
/* write the decimal field value */
|
|
field=versionArray[part];
|
|
if(field>=100) {
|
|
*versionString++=(char)('0'+field/100);
|
|
field%=100;
|
|
}
|
|
if(field>=10) {
|
|
*versionString++=(char)('0'+field/10);
|
|
field%=10;
|
|
}
|
|
*versionString++=(char)('0'+field);
|
|
}
|
|
|
|
/* NUL-terminate */
|
|
*versionString=0;
|
|
}
|
|
|
|
U_CAPI void U_EXPORT2
|
|
u_getVersion(UVersionInfo versionArray) {
|
|
u_versionFromString(versionArray, U_ICU_VERSION);
|
|
}
|
|
|
|
/* u_errorName() ------------------------------------------------------------ */
|
|
|
|
static const char * const
|
|
_uErrorInfoName[U_ERROR_WARNING_LIMIT-U_ERROR_WARNING_START]={
|
|
"U_USING_FALLBACK_WARNING",
|
|
"U_USING_DEFAULT_WARNING",
|
|
"U_SAFECLONE_ALLOCATED_WARNING",
|
|
"U_STATE_OLD_WARNING",
|
|
"U_STRING_NOT_TERMINATED_WARNING",
|
|
"U_SORT_KEY_TOO_SHORT_WARNING",
|
|
"U_AMBIGUOUS_ALIAS_WARNING",
|
|
"U_DIFFERENT_UCA_VERSION"
|
|
};
|
|
|
|
static const char * const
|
|
_uTransErrorName[U_PARSE_ERROR_LIMIT - U_PARSE_ERROR_START]={
|
|
"U_BAD_VARIABLE_DEFINITION",
|
|
"U_MALFORMED_RULE",
|
|
"U_MALFORMED_SET",
|
|
"U_MALFORMED_SYMBOL_REFERENCE",
|
|
"U_MALFORMED_UNICODE_ESCAPE",
|
|
"U_MALFORMED_VARIABLE_DEFINITION",
|
|
"U_MALFORMED_VARIABLE_REFERENCE",
|
|
"U_MISMATCHED_SEGMENT_DELIMITERS",
|
|
"U_MISPLACED_ANCHOR_START",
|
|
"U_MISPLACED_CURSOR_OFFSET",
|
|
"U_MISPLACED_QUANTIFIER",
|
|
"U_MISSING_OPERATOR",
|
|
"U_MISSING_SEGMENT_CLOSE",
|
|
"U_MULTIPLE_ANTE_CONTEXTS",
|
|
"U_MULTIPLE_CURSORS",
|
|
"U_MULTIPLE_POST_CONTEXTS",
|
|
"U_TRAILING_BACKSLASH",
|
|
"U_UNDEFINED_SEGMENT_REFERENCE",
|
|
"U_UNDEFINED_VARIABLE",
|
|
"U_UNQUOTED_SPECIAL",
|
|
"U_UNTERMINATED_QUOTE",
|
|
"U_RULE_MASK_ERROR",
|
|
"U_MISPLACED_COMPOUND_FILTER",
|
|
"U_MULTIPLE_COMPOUND_FILTERS",
|
|
"U_INVALID_RBT_SYNTAX",
|
|
"U_INVALID_PROPERTY_PATTERN",
|
|
"U_MALFORMED_PRAGMA",
|
|
"U_UNCLOSED_SEGMENT",
|
|
"U_ILLEGAL_CHAR_IN_SEGMENT",
|
|
"U_VARIABLE_RANGE_EXHAUSTED",
|
|
"U_VARIABLE_RANGE_OVERLAP",
|
|
"U_ILLEGAL_CHARACTER",
|
|
"U_INTERNAL_TRANSLITERATOR_ERROR",
|
|
"U_INVALID_ID",
|
|
"U_INVALID_FUNCTION"
|
|
};
|
|
|
|
static const char * const
|
|
_uErrorName[U_STANDARD_ERROR_LIMIT]={
|
|
"U_ZERO_ERROR",
|
|
|
|
"U_ILLEGAL_ARGUMENT_ERROR",
|
|
"U_MISSING_RESOURCE_ERROR",
|
|
"U_INVALID_FORMAT_ERROR",
|
|
"U_FILE_ACCESS_ERROR",
|
|
"U_INTERNAL_PROGRAM_ERROR",
|
|
"U_MESSAGE_PARSE_ERROR",
|
|
"U_MEMORY_ALLOCATION_ERROR",
|
|
"U_INDEX_OUTOFBOUNDS_ERROR",
|
|
"U_PARSE_ERROR",
|
|
"U_INVALID_CHAR_FOUND",
|
|
"U_TRUNCATED_CHAR_FOUND",
|
|
"U_ILLEGAL_CHAR_FOUND",
|
|
"U_INVALID_TABLE_FORMAT",
|
|
"U_INVALID_TABLE_FILE",
|
|
"U_BUFFER_OVERFLOW_ERROR",
|
|
"U_UNSUPPORTED_ERROR",
|
|
"U_RESOURCE_TYPE_MISMATCH",
|
|
"U_ILLEGAL_ESCAPE_SEQUENCE",
|
|
"U_UNSUPPORTED_ESCAPE_SEQUENCE",
|
|
"U_NO_SPACE_AVAILABLE",
|
|
"U_CE_NOT_FOUND_ERROR",
|
|
"U_PRIMARY_TOO_LONG_ERROR",
|
|
"U_STATE_TOO_OLD_ERROR",
|
|
"U_TOO_MANY_ALIASES_ERROR",
|
|
"U_ENUM_OUT_OF_SYNC_ERROR",
|
|
"U_INVARIANT_CONVERSION_ERROR",
|
|
"U_INVALID_STATE_ERROR"
|
|
};
|
|
static const char * const
|
|
_uFmtErrorName[U_FMT_PARSE_ERROR_LIMIT - U_FMT_PARSE_ERROR_START] = {
|
|
"U_UNEXPECTED_TOKEN",
|
|
"U_MULTIPLE_DECIMAL_SEPARATORS",
|
|
"U_MULTIPLE_EXPONENTIAL_SYMBOLS",
|
|
"U_MALFORMED_EXPONENTIAL_PATTERN",
|
|
"U_MULTIPLE_PERCENT_SYMBOLS",
|
|
"U_MULTIPLE_PERMILL_SYMBOLS",
|
|
"U_MULTIPLE_PAD_SPECIFIERS",
|
|
"U_PATTERN_SYNTAX_ERROR",
|
|
"U_ILLEGAL_PAD_POSITION",
|
|
"U_UNMATCHED_BRACES",
|
|
"U_UNSUPPORTED_PROPERTY",
|
|
"U_UNSUPPORTED_ATTRIBUTE"
|
|
};
|
|
|
|
static const char * const
|
|
_uBrkErrorName[U_BRK_ERROR_LIMIT - U_BRK_ERROR_START] = {
|
|
"U_BRK_ERROR_START",
|
|
"U_BRK_INTERNAL_ERROR",
|
|
"U_BRK_HEX_DIGITS_EXPECTED",
|
|
"U_BRK_SEMICOLON_EXPECTED",
|
|
"U_BRK_RULE_SYNTAX",
|
|
"U_BRK_UNCLOSED_SET",
|
|
"U_BRK_ASSIGN_ERROR",
|
|
"U_BRK_VARIABLE_REDFINITION",
|
|
"U_BRK_MISMATCHED_PAREN",
|
|
"U_BRK_NEW_LINE_IN_QUOTED_STRING",
|
|
"U_BRK_UNDEFINED_VARIABLE",
|
|
"U_BRK_INIT_ERROR",
|
|
"U_BRK_RULE_EMPTY_SET",
|
|
"U_BRK_UNRECOGNIZED_OPTION",
|
|
"U_BRK_MALFORMED_RULE_TAG"
|
|
};
|
|
|
|
static const char * const
|
|
_uRegexErrorName[U_REGEX_ERROR_LIMIT - U_REGEX_ERROR_START] = {
|
|
"U_REGEX_ERROR_START",
|
|
"U_REGEX_INTERNAL_ERROR",
|
|
"U_REGEX_RULE_SYNTAX",
|
|
"U_REGEX_INVALID_STATE",
|
|
"U_REGEX_BAD_ESCAPE_SEQUENCE",
|
|
"U_REGEX_PROPERTY_SYNTAX",
|
|
"U_REGEX_UNIMPLEMENTED",
|
|
"U_REGEX_MISMATCHED_PAREN",
|
|
"U_REGEX_NUMBER_TOO_BIG",
|
|
"U_REGEX_BAD_INTERVAL",
|
|
"U_REGEX_MAX_LT_MIN",
|
|
"U_REGEX_INVALID_BACK_REF",
|
|
"U_REGEX_INVALID_FLAG",
|
|
"U_REGEX_LOOK_BEHIND_LIMIT",
|
|
"U_REGEX_SET_CONTAINS_STRING"
|
|
};
|
|
|
|
static const char * const
|
|
_uIDNAErrorName[U_IDNA_ERROR_LIMIT - U_IDNA_ERROR_START] = {
|
|
"U_IDNA_ERROR_START",
|
|
"U_IDNA_PROHIBITED_ERROR",
|
|
"U_IDNA_UNASSIGNED_ERROR",
|
|
"U_IDNA_CHECK_BIDI_ERROR",
|
|
"U_IDNA_STD3_ASCII_RULES_ERROR",
|
|
"U_IDNA_ACE_PREFIX_ERROR",
|
|
"U_IDNA_VERIFICATION_ERROR",
|
|
"U_IDNA_LABEL_TOO_LONG_ERROR"
|
|
};
|
|
|
|
U_CAPI const char * U_EXPORT2
|
|
u_errorName(UErrorCode code) {
|
|
if(U_ZERO_ERROR <= code && code < U_STANDARD_ERROR_LIMIT) {
|
|
return _uErrorName[code];
|
|
} else if(U_ERROR_WARNING_START <= code && code < U_ERROR_WARNING_LIMIT) {
|
|
return _uErrorInfoName[code - U_ERROR_WARNING_START];
|
|
} else if(U_PARSE_ERROR_START <= code && code < U_PARSE_ERROR_LIMIT){
|
|
return _uTransErrorName[code - U_PARSE_ERROR_START];
|
|
} else if(U_FMT_PARSE_ERROR_START <= code && code < U_FMT_PARSE_ERROR_LIMIT){
|
|
return _uFmtErrorName[code - U_FMT_PARSE_ERROR_START];
|
|
} else if (U_BRK_ERROR_START <= code && code < U_BRK_ERROR_LIMIT){
|
|
return _uBrkErrorName[code - U_BRK_ERROR_START];
|
|
} else if (U_REGEX_ERROR_START <= code && code < U_REGEX_ERROR_LIMIT) {
|
|
return _uRegexErrorName[code - U_REGEX_ERROR_START];
|
|
} else if( U_IDNA_ERROR_START <= code && code <= U_IDNA_ERROR_LIMIT) {
|
|
return _uIDNAErrorName[code - U_IDNA_ERROR_START];
|
|
} else {
|
|
return "[BOGUS UErrorCode]";
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Hey, Emacs, please set the following:
|
|
*
|
|
* Local Variables:
|
|
* indent-tabs-mode: nil
|
|
* End:
|
|
*
|
|
*/
|