ICU-6606 add #define option for >>default charset is UTF-8<<

X-SVN-Rev: 25544
This commit is contained in:
Markus Scherer 2009-03-11 03:16:35 +00:00
parent 20ddfafeb2
commit 93e35cf3db
7 changed files with 78 additions and 16 deletions

View File

@ -1572,6 +1572,7 @@ The leftmost codepage (.xxx) wins.
} }
#if !U_CHARSET_IS_UTF8
#if U_POSIX_LOCALE #if U_POSIX_LOCALE
/* /*
Due to various platform differences, one platform may specify a charset, Due to various platform differences, one platform may specify a charset,
@ -1806,6 +1807,7 @@ uprv_getDefaultCodepage()
umtx_unlock(NULL); umtx_unlock(NULL);
return name; return name;
} }
#endif /* !U_CHARSET_IS_UTF8 */
/* end of platform-specific implementation -------------- */ /* end of platform-specific implementation -------------- */

View File

@ -1,7 +1,7 @@
/* /*
******************************************************************** ********************************************************************
* COPYRIGHT: * COPYRIGHT:
* Copyright (c) 1996-2008, International Business Machines Corporation and * Copyright (c) 1996-2009, International Business Machines Corporation and
* others. All Rights Reserved. * others. All Rights Reserved.
******************************************************************** ********************************************************************
* *
@ -158,6 +158,8 @@ static UMTX cnvCacheMutex = NULL; /* Mutex for synchronizing cnv cache a
static const char **gAvailableConverters = NULL; static const char **gAvailableConverters = NULL;
static uint16_t gAvailableConverterCount = 0; static uint16_t gAvailableConverterCount = 0;
#if !U_CHARSET_IS_UTF8
/* This contains the resolved converter name. So no further alias lookup is needed again. */ /* This contains the resolved converter name. So no further alias lookup is needed again. */
static char gDefaultConverterNameBuffer[UCNV_MAX_CONVERTER_NAME_LENGTH + 1]; /* +1 for NULL */ static char gDefaultConverterNameBuffer[UCNV_MAX_CONVERTER_NAME_LENGTH + 1]; /* +1 for NULL */
static const char *gDefaultConverterName = NULL; static const char *gDefaultConverterName = NULL;
@ -173,6 +175,7 @@ static const UConverterSharedData *gDefaultAlgorithmicSharedData = NULL;
/* Does gDefaultConverterName have a converter option and require extra parsing? */ /* Does gDefaultConverterName have a converter option and require extra parsing? */
static UBool gDefaultConverterContainsOption; static UBool gDefaultConverterContainsOption;
#endif /* !U_CHARSET_IS_UTF8 */
static const char DATA_TYPE[] = "cnv"; static const char DATA_TYPE[] = "cnv";
@ -201,10 +204,12 @@ static UBool U_CALLCONV ucnv_cleanup(void) {
/* Isn't called from flushCache because other threads may have preexisting references to the table. */ /* Isn't called from flushCache because other threads may have preexisting references to the table. */
ucnv_flushAvailableConverterCache(); ucnv_flushAvailableConverterCache();
#if !U_CHARSET_IS_UTF8
gDefaultConverterName = NULL; gDefaultConverterName = NULL;
gDefaultConverterNameBuffer[0] = 0; gDefaultConverterNameBuffer[0] = 0;
gDefaultConverterContainsOption = FALSE; gDefaultConverterContainsOption = FALSE;
gDefaultAlgorithmicSharedData = NULL; gDefaultAlgorithmicSharedData = NULL;
#endif
umtx_destroy(&cnvCacheMutex); /* Don't worry about destroying the mutex even */ umtx_destroy(&cnvCacheMutex); /* Don't worry about destroying the mutex even */
/* if the hash table still exists. The mutex */ /* if the hash table still exists. The mutex */
@ -707,6 +712,9 @@ ucnv_loadSharedData(const char *converterName, UConverterLookupData *lookup, UEr
/* In case "name" is NULL we want to open the default converter. */ /* In case "name" is NULL we want to open the default converter. */
if (converterName == NULL) { if (converterName == NULL) {
#if U_CHARSET_IS_UTF8
return (UConverterSharedData *)converterData[UCNV_UTF8];
#else
/* Call ucnv_getDefaultName first to query the name from the OS. */ /* Call ucnv_getDefaultName first to query the name from the OS. */
lookup->realName = ucnv_getDefaultName(); lookup->realName = ucnv_getDefaultName();
if (lookup->realName == NULL) { if (lookup->realName == NULL) {
@ -717,6 +725,7 @@ ucnv_loadSharedData(const char *converterName, UConverterLookupData *lookup, UEr
checkForAlgorithmic = FALSE; checkForAlgorithmic = FALSE;
mayContainOption = gDefaultConverterContainsOption; mayContainOption = gDefaultConverterContainsOption;
/* the default converter name is already canonical */ /* the default converter name is already canonical */
#endif
} }
else if((converterName[0] == 'U' ? else if((converterName[0] == 'U' ?
( converterName[1] == 'T' && converterName[2] == 'F') : ( converterName[1] == 'T' && converterName[2] == 'F') :
@ -1113,6 +1122,7 @@ ucnv_bld_getAvailableConverter(uint16_t n, UErrorCode *pErrorCode) {
/* default converter name --------------------------------------------------- */ /* default converter name --------------------------------------------------- */
#if !U_CHARSET_IS_UTF8
/* /*
Copy the canonical converter name. Copy the canonical converter name.
ucnv_getDefaultName must be thread safe, which can call this function. ucnv_getDefaultName must be thread safe, which can call this function.
@ -1156,6 +1166,7 @@ internalSetName(const char *name, UErrorCode *status) {
umtx_unlock(&cnvCacheMutex); umtx_unlock(&cnvCacheMutex);
} }
#endif
/* /*
* In order to be really thread-safe, the get function would have to take * In order to be really thread-safe, the get function would have to take
@ -1167,6 +1178,9 @@ internalSetName(const char *name, UErrorCode *status) {
U_CAPI const char* U_EXPORT2 U_CAPI const char* U_EXPORT2
ucnv_getDefaultName() { ucnv_getDefaultName() {
#if U_CHARSET_IS_UTF8
return "UTF-8";
#else
/* local variable to be thread-safe */ /* local variable to be thread-safe */
const char *name; const char *name;
@ -1211,6 +1225,7 @@ ucnv_getDefaultName() {
} }
return name; return name;
#endif
} }
/* /*
@ -1219,6 +1234,7 @@ See internalSetName or the API reference for details.
*/ */
U_CAPI void U_EXPORT2 U_CAPI void U_EXPORT2
ucnv_setDefaultName(const char *converterName) { ucnv_setDefaultName(const char *converterName) {
#if !U_CHARSET_IS_UTF8
if(converterName==NULL) { if(converterName==NULL) {
/* reset to the default codepage */ /* reset to the default codepage */
gDefaultConverterName=NULL; gDefaultConverterName=NULL;
@ -1241,6 +1257,7 @@ ucnv_setDefaultName(const char *converterName) {
/* The close may make the current name go away. */ /* The close may make the current name go away. */
ucnv_close(cnv); ucnv_close(cnv);
} }
#endif
} }
/* data swapping ------------------------------------------------------------ */ /* data swapping ------------------------------------------------------------ */

View File

@ -1,7 +1,7 @@
/* /*
****************************************************************************** ******************************************************************************
* *
* Copyright (C) 1997-2008, International Business Machines * Copyright (C) 1997-2009, International Business Machines
* Corporation and others. All Rights Reserved. * Corporation and others. All Rights Reserved.
* *
****************************************************************************** ******************************************************************************
@ -93,6 +93,7 @@ U_STABLE const char* U_EXPORT2 u_getDataDirectory(void);
*/ */
U_STABLE void U_EXPORT2 u_setDataDirectory(const char *directory); U_STABLE void U_EXPORT2 u_setDataDirectory(const char *directory);
#if !U_CHARSET_IS_UTF8
/** /**
* Please use ucnv_getDefaultName() instead. * Please use ucnv_getDefaultName() instead.
* Return the default codepage for this platform and locale. * Return the default codepage for this platform and locale.
@ -102,6 +103,7 @@ U_STABLE void U_EXPORT2 u_setDataDirectory(const char *directory);
* @internal * @internal
*/ */
U_INTERNAL const char* U_EXPORT2 uprv_getDefaultCodepage(void); U_INTERNAL const char* U_EXPORT2 uprv_getDefaultCodepage(void);
#endif
/** /**
* Please use uloc_getDefault() instead. * Please use uloc_getDefault() instead.

View File

@ -1,6 +1,6 @@
/* /*
********************************************************************** **********************************************************************
* Copyright (C) 1999-2008, International Business Machines * Copyright (C) 1999-2009, International Business Machines
* Corporation and others. All Rights Reserved. * Corporation and others. All Rights Reserved.
********************************************************************** **********************************************************************
* ucnv.h: * ucnv.h:
@ -1790,6 +1790,9 @@ ucnv_getCanonicalName(const char *alias, const char *standard, UErrorCode *pErro
* It is faster if you pass a NULL argument to ucnv_open the * It is faster if you pass a NULL argument to ucnv_open the
* default converter. * default converter.
* *
* If U_CHARSET_IS_UTF8 is defined to 1 in utypes.h then this function
* always returns "UTF-8".
*
* @return returns the current default converter name. * @return returns the current default converter name.
* Storage owned by the library * Storage owned by the library
* @see ucnv_setDefaultName * @see ucnv_setDefaultName
@ -1805,6 +1808,10 @@ ucnv_getDefaultName(void);
* should be called during application initialization. Most of the time, the * should be called during application initialization. Most of the time, the
* results from ucnv_getDefaultName() or ucnv_open with a NULL string argument * results from ucnv_getDefaultName() or ucnv_open with a NULL string argument
* is sufficient for your application. * is sufficient for your application.
*
* If U_CHARSET_IS_UTF8 is defined to 1 in utypes.h then this function
* does nothing.
*
* @param name the converter name to be the default (must be known by ICU). * @param name the converter name to be the default (must be known by ICU).
* @see ucnv_getDefaultName * @see ucnv_getDefaultName
* @system * @system

View File

@ -1,6 +1,6 @@
/* /*
********************************************************************** **********************************************************************
* Copyright (C) 2002-2008, International Business Machines * Copyright (C) 2002-2009, International Business Machines
* Corporation and others. All Rights Reserved. * Corporation and others. All Rights Reserved.
********************************************************************** **********************************************************************
* file name: uconfig.h * file name: uconfig.h
@ -104,7 +104,11 @@
* ICU will not completely build with this switch turned on. * ICU will not completely build with this switch turned on.
* This switch turns off all converters. * This switch turns off all converters.
* *
* You may want to use this together with U_CHARSET_IS_UTF8 defined to 1
* in utypes.h if char* strings in your environment are always in UTF-8.
*
* @stable ICU 3.2 * @stable ICU 3.2
* @see U_CHARSET_IS_UTF8
*/ */
#ifndef UCONFIG_NO_CONVERSION #ifndef UCONFIG_NO_CONVERSION
# define UCONFIG_NO_CONVERSION 0 # define UCONFIG_NO_CONVERSION 0

View File

@ -1,6 +1,6 @@
/* /*
********************************************************************** **********************************************************************
* Copyright (C) 1996-2008, International Business Machines * Copyright (C) 1996-2009, International Business Machines
* Corporation and others. All Rights Reserved. * Corporation and others. All Rights Reserved.
********************************************************************** **********************************************************************
* *
@ -139,6 +139,30 @@
# define U_CHARSET_FAMILY 0 # define U_CHARSET_FAMILY 0
#endif #endif
/**
* \def U_CHARSET_IS_UTF8
*
* Hardcode the default charset to UTF-8.
*
* If this is set to 1, then
* - ICU will assume that all non-invariant char*, StringPiece, std::string etc.
* contain UTF-8 text, regardless of what the system API uses
* - some ICU code will use fast functions like u_strFromUTF8()
* rather than the more general and more heavy-weight conversion API (ucnv.h)
* - ucnv_getDefaultName() always returns "UTF-8"
* - ucnv_setDefaultName() is disabled and will not change the default charset
* - static builds of ICU are smaller
* - more functionality is available with the UCONFIG_NO_CONVERSION build-time
* configuration option (see unicode/uconfig.h)
* - the UCONFIG_NO_CONVERSION build option in uconfig.h is more usable
*
* @draft ICU 4.2
* @see UCONFIG_NO_CONVERSION
*/
#ifndef U_CHARSET_IS_UTF8
# define U_CHARSET_IS_UTF8 0
#endif
/*===========================================================================*/ /*===========================================================================*/
/* ICUDATA naming scheme */ /* ICUDATA naming scheme */
/*===========================================================================*/ /*===========================================================================*/

View File

@ -1,6 +1,6 @@
/******************************************************************** /********************************************************************
* COPYRIGHT: * COPYRIGHT:
* Copyright (c) 1997-2008, International Business Machines Corporation and * Copyright (c) 1997-2009, International Business Machines Corporation and
* others. All Rights Reserved. * others. All Rights Reserved.
********************************************************************/ ********************************************************************/
/***************************************************************************** /*****************************************************************************
@ -3335,11 +3335,11 @@ TestToUCountPending(){
#endif #endif
} }
static void TestOneDefaultNameChange(const char *name) { static void TestOneDefaultNameChange(const char *name, const char *expected) {
UErrorCode status = U_ZERO_ERROR; UErrorCode status = U_ZERO_ERROR;
UConverter *cnv; UConverter *cnv;
ucnv_setDefaultName(name); ucnv_setDefaultName(name);
if(strcmp(ucnv_getDefaultName(), name)==0) if(strcmp(ucnv_getDefaultName(), expected)==0)
log_verbose("setDefaultName of %s works.\n", name); log_verbose("setDefaultName of %s works.\n", name);
else else
log_err("setDefaultName of %s failed\n", name); log_err("setDefaultName of %s failed\n", name);
@ -3348,7 +3348,7 @@ static void TestOneDefaultNameChange(const char *name) {
log_err("opening the default converter of %s failed\n", name); log_err("opening the default converter of %s failed\n", name);
return; return;
} }
if(strcmp(ucnv_getName(cnv, &status), name)==0) if(strcmp(ucnv_getName(cnv, &status), expected)==0)
log_verbose("ucnv_getName of %s works.\n", name); log_verbose("ucnv_getName of %s works.\n", name);
else else
log_err("ucnv_getName of %s failed\n", name); log_err("ucnv_getName of %s failed\n", name);
@ -3363,12 +3363,18 @@ static void TestDefaultName(void) {
log_verbose("getDefaultName returned %s\n", defaultName); log_verbose("getDefaultName returned %s\n", defaultName);
/*change the default name by setting it */ /*change the default name by setting it */
TestOneDefaultNameChange("UTF-8"); TestOneDefaultNameChange("UTF-8", "UTF-8");
#if U_CHARSET_IS_UTF8
TestOneDefaultNameChange("ISCII,version=1", "UTF-8");
TestOneDefaultNameChange("ISCII,version=2", "UTF-8");
TestOneDefaultNameChange("ISO-8859-1", "UTF-8");
#else
# if !UCONFIG_NO_LEGACY_CONVERSION # if !UCONFIG_NO_LEGACY_CONVERSION
TestOneDefaultNameChange("ISCII,version=1"); TestOneDefaultNameChange("ISCII,version=1", "ISCII,version=1");
TestOneDefaultNameChange("ISCII,version=2"); TestOneDefaultNameChange("ISCII,version=2", "ISCII,version=2");
# endif
TestOneDefaultNameChange("ISO-8859-1", "ISO-8859-1");
#endif #endif
TestOneDefaultNameChange("ISO-8859-1");
/*set the default name back*/ /*set the default name back*/
ucnv_setDefaultName(defaultName); ucnv_setDefaultName(defaultName);