ICU-6606 add #define option for >>default charset is UTF-8<<

X-SVN-Rev: 25544
This commit is contained in:
Markus Scherer 2009-03-11 03:16:35 +00:00
parent 20ddfafeb2
commit 93e35cf3db
7 changed files with 78 additions and 16 deletions

View File

@ -1572,6 +1572,7 @@ The leftmost codepage (.xxx) wins.
}
#if !U_CHARSET_IS_UTF8
#if U_POSIX_LOCALE
/*
Due to various platform differences, one platform may specify a charset,
@ -1806,6 +1807,7 @@ uprv_getDefaultCodepage()
umtx_unlock(NULL);
return name;
}
#endif /* !U_CHARSET_IS_UTF8 */
/* end of platform-specific implementation -------------- */

View File

@ -1,7 +1,7 @@
/*
********************************************************************
* COPYRIGHT:
* Copyright (c) 1996-2008, International Business Machines Corporation and
* Copyright (c) 1996-2009, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************
*
@ -158,6 +158,8 @@ static UMTX cnvCacheMutex = NULL; /* Mutex for synchronizing cnv cache a
static const char **gAvailableConverters = NULL;
static uint16_t gAvailableConverterCount = 0;
#if !U_CHARSET_IS_UTF8
/* This contains the resolved converter name. So no further alias lookup is needed again. */
static char gDefaultConverterNameBuffer[UCNV_MAX_CONVERTER_NAME_LENGTH + 1]; /* +1 for NULL */
static const char *gDefaultConverterName = NULL;
@ -173,6 +175,7 @@ static const UConverterSharedData *gDefaultAlgorithmicSharedData = NULL;
/* Does gDefaultConverterName have a converter option and require extra parsing? */
static UBool gDefaultConverterContainsOption;
#endif /* !U_CHARSET_IS_UTF8 */
static const char DATA_TYPE[] = "cnv";
@ -201,10 +204,12 @@ static UBool U_CALLCONV ucnv_cleanup(void) {
/* Isn't called from flushCache because other threads may have preexisting references to the table. */
ucnv_flushAvailableConverterCache();
#if !U_CHARSET_IS_UTF8
gDefaultConverterName = NULL;
gDefaultConverterNameBuffer[0] = 0;
gDefaultConverterContainsOption = FALSE;
gDefaultAlgorithmicSharedData = NULL;
#endif
umtx_destroy(&cnvCacheMutex); /* Don't worry about destroying the mutex even */
/* if the hash table still exists. The mutex */
@ -707,6 +712,9 @@ ucnv_loadSharedData(const char *converterName, UConverterLookupData *lookup, UEr
/* In case "name" is NULL we want to open the default converter. */
if (converterName == NULL) {
#if U_CHARSET_IS_UTF8
return (UConverterSharedData *)converterData[UCNV_UTF8];
#else
/* Call ucnv_getDefaultName first to query the name from the OS. */
lookup->realName = ucnv_getDefaultName();
if (lookup->realName == NULL) {
@ -717,6 +725,7 @@ ucnv_loadSharedData(const char *converterName, UConverterLookupData *lookup, UEr
checkForAlgorithmic = FALSE;
mayContainOption = gDefaultConverterContainsOption;
/* the default converter name is already canonical */
#endif
}
else if((converterName[0] == 'U' ?
( converterName[1] == 'T' && converterName[2] == 'F') :
@ -1113,6 +1122,7 @@ ucnv_bld_getAvailableConverter(uint16_t n, UErrorCode *pErrorCode) {
/* default converter name --------------------------------------------------- */
#if !U_CHARSET_IS_UTF8
/*
Copy the canonical converter name.
ucnv_getDefaultName must be thread safe, which can call this function.
@ -1147,7 +1157,7 @@ internalSetName(const char *name, UErrorCode *status) {
gDefaultConverterContainsOption = containsOption;
uprv_memcpy(gDefaultConverterNameBuffer, name, length);
gDefaultConverterNameBuffer[length]=0;
/* gDefaultConverterName MUST be the last global var set by this function. */
/* It is the variable checked in ucnv_getDefaultName() to see if initialization is required. */
gDefaultConverterName = gDefaultConverterNameBuffer;
@ -1156,6 +1166,7 @@ internalSetName(const char *name, UErrorCode *status) {
umtx_unlock(&cnvCacheMutex);
}
#endif
/*
* In order to be really thread-safe, the get function would have to take
@ -1167,6 +1178,9 @@ internalSetName(const char *name, UErrorCode *status) {
U_CAPI const char* U_EXPORT2
ucnv_getDefaultName() {
#if U_CHARSET_IS_UTF8
return "UTF-8";
#else
/* local variable to be thread-safe */
const char *name;
@ -1211,6 +1225,7 @@ ucnv_getDefaultName() {
}
return name;
#endif
}
/*
@ -1219,6 +1234,7 @@ See internalSetName or the API reference for details.
*/
U_CAPI void U_EXPORT2
ucnv_setDefaultName(const char *converterName) {
#if !U_CHARSET_IS_UTF8
if(converterName==NULL) {
/* reset to the default codepage */
gDefaultConverterName=NULL;
@ -1241,6 +1257,7 @@ ucnv_setDefaultName(const char *converterName) {
/* The close may make the current name go away. */
ucnv_close(cnv);
}
#endif
}
/* data swapping ------------------------------------------------------------ */

View File

@ -1,7 +1,7 @@
/*
******************************************************************************
*
* Copyright (C) 1997-2008, International Business Machines
* Copyright (C) 1997-2009, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
@ -29,7 +29,7 @@
* \file
* \brief C API: Platform Utilities
*/
/** Define this to 1 if your platform supports IEEE 754 floating point,
to 0 if it does not. */
#ifndef IEEE_754
@ -93,6 +93,7 @@ U_STABLE const char* U_EXPORT2 u_getDataDirectory(void);
*/
U_STABLE void U_EXPORT2 u_setDataDirectory(const char *directory);
#if !U_CHARSET_IS_UTF8
/**
* Please use ucnv_getDefaultName() instead.
* Return the default codepage for this platform and locale.
@ -102,6 +103,7 @@ U_STABLE void U_EXPORT2 u_setDataDirectory(const char *directory);
* @internal
*/
U_INTERNAL const char* U_EXPORT2 uprv_getDefaultCodepage(void);
#endif
/**
* Please use uloc_getDefault() instead.

View File

@ -1,6 +1,6 @@
/*
**********************************************************************
* Copyright (C) 1999-2008, International Business Machines
* Copyright (C) 1999-2009, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* ucnv.h:
@ -1790,6 +1790,9 @@ ucnv_getCanonicalName(const char *alias, const char *standard, UErrorCode *pErro
* It is faster if you pass a NULL argument to ucnv_open the
* default converter.
*
* If U_CHARSET_IS_UTF8 is defined to 1 in utypes.h then this function
* always returns "UTF-8".
*
* @return returns the current default converter name.
* Storage owned by the library
* @see ucnv_setDefaultName
@ -1805,6 +1808,10 @@ ucnv_getDefaultName(void);
* should be called during application initialization. Most of the time, the
* results from ucnv_getDefaultName() or ucnv_open with a NULL string argument
* is sufficient for your application.
*
* If U_CHARSET_IS_UTF8 is defined to 1 in utypes.h then this function
* does nothing.
*
* @param name the converter name to be the default (must be known by ICU).
* @see ucnv_getDefaultName
* @system

View File

@ -1,6 +1,6 @@
/*
**********************************************************************
* Copyright (C) 2002-2008, International Business Machines
* Copyright (C) 2002-2009, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* file name: uconfig.h
@ -104,7 +104,11 @@
* ICU will not completely build with this switch turned on.
* This switch turns off all converters.
*
* You may want to use this together with U_CHARSET_IS_UTF8 defined to 1
* in utypes.h if char* strings in your environment are always in UTF-8.
*
* @stable ICU 3.2
* @see U_CHARSET_IS_UTF8
*/
#ifndef UCONFIG_NO_CONVERSION
# define UCONFIG_NO_CONVERSION 0

View File

@ -1,6 +1,6 @@
/*
**********************************************************************
* Copyright (C) 1996-2008, International Business Machines
* Copyright (C) 1996-2009, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*
@ -139,6 +139,30 @@
# define U_CHARSET_FAMILY 0
#endif
/**
* \def U_CHARSET_IS_UTF8
*
* Hardcode the default charset to UTF-8.
*
* If this is set to 1, then
* - ICU will assume that all non-invariant char*, StringPiece, std::string etc.
* contain UTF-8 text, regardless of what the system API uses
* - some ICU code will use fast functions like u_strFromUTF8()
* rather than the more general and more heavy-weight conversion API (ucnv.h)
* - ucnv_getDefaultName() always returns "UTF-8"
* - ucnv_setDefaultName() is disabled and will not change the default charset
* - static builds of ICU are smaller
* - more functionality is available with the UCONFIG_NO_CONVERSION build-time
* configuration option (see unicode/uconfig.h)
* - the UCONFIG_NO_CONVERSION build option in uconfig.h is more usable
*
* @draft ICU 4.2
* @see UCONFIG_NO_CONVERSION
*/
#ifndef U_CHARSET_IS_UTF8
# define U_CHARSET_IS_UTF8 0
#endif
/*===========================================================================*/
/* ICUDATA naming scheme */
/*===========================================================================*/

View File

@ -1,6 +1,6 @@
/********************************************************************
* COPYRIGHT:
* Copyright (c) 1997-2008, International Business Machines Corporation and
* Copyright (c) 1997-2009, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
/*****************************************************************************
@ -3335,11 +3335,11 @@ TestToUCountPending(){
#endif
}
static void TestOneDefaultNameChange(const char *name) {
static void TestOneDefaultNameChange(const char *name, const char *expected) {
UErrorCode status = U_ZERO_ERROR;
UConverter *cnv;
ucnv_setDefaultName(name);
if(strcmp(ucnv_getDefaultName(), name)==0)
if(strcmp(ucnv_getDefaultName(), expected)==0)
log_verbose("setDefaultName of %s works.\n", name);
else
log_err("setDefaultName of %s failed\n", name);
@ -3348,7 +3348,7 @@ static void TestOneDefaultNameChange(const char *name) {
log_err("opening the default converter of %s failed\n", name);
return;
}
if(strcmp(ucnv_getName(cnv, &status), name)==0)
if(strcmp(ucnv_getName(cnv, &status), expected)==0)
log_verbose("ucnv_getName of %s works.\n", name);
else
log_err("ucnv_getName of %s failed\n", name);
@ -3363,12 +3363,18 @@ static void TestDefaultName(void) {
log_verbose("getDefaultName returned %s\n", defaultName);
/*change the default name by setting it */
TestOneDefaultNameChange("UTF-8");
#if !UCONFIG_NO_LEGACY_CONVERSION
TestOneDefaultNameChange("ISCII,version=1");
TestOneDefaultNameChange("ISCII,version=2");
TestOneDefaultNameChange("UTF-8", "UTF-8");
#if U_CHARSET_IS_UTF8
TestOneDefaultNameChange("ISCII,version=1", "UTF-8");
TestOneDefaultNameChange("ISCII,version=2", "UTF-8");
TestOneDefaultNameChange("ISO-8859-1", "UTF-8");
#else
# if !UCONFIG_NO_LEGACY_CONVERSION
TestOneDefaultNameChange("ISCII,version=1", "ISCII,version=1");
TestOneDefaultNameChange("ISCII,version=2", "ISCII,version=2");
# endif
TestOneDefaultNameChange("ISO-8859-1", "ISO-8859-1");
#endif
TestOneDefaultNameChange("ISO-8859-1");
/*set the default name back*/
ucnv_setDefaultName(defaultName);