diff --git a/icu4c/source/common/putil.c b/icu4c/source/common/putil.c index 1e670b9afe..9ab015d6ab 100644 --- a/icu4c/source/common/putil.c +++ b/icu4c/source/common/putil.c @@ -1572,6 +1572,7 @@ The leftmost codepage (.xxx) wins. } +#if !U_CHARSET_IS_UTF8 #if U_POSIX_LOCALE /* Due to various platform differences, one platform may specify a charset, @@ -1806,6 +1807,7 @@ uprv_getDefaultCodepage() umtx_unlock(NULL); return name; } +#endif /* !U_CHARSET_IS_UTF8 */ /* end of platform-specific implementation -------------- */ diff --git a/icu4c/source/common/ucnv_bld.c b/icu4c/source/common/ucnv_bld.c index 0a448b280d..7d8d1d5ac5 100644 --- a/icu4c/source/common/ucnv_bld.c +++ b/icu4c/source/common/ucnv_bld.c @@ -1,7 +1,7 @@ /* ******************************************************************** * COPYRIGHT: - * Copyright (c) 1996-2008, International Business Machines Corporation and + * Copyright (c) 1996-2009, International Business Machines Corporation and * others. All Rights Reserved. ******************************************************************** * @@ -158,6 +158,8 @@ static UMTX cnvCacheMutex = NULL; /* Mutex for synchronizing cnv cache a static const char **gAvailableConverters = NULL; static uint16_t gAvailableConverterCount = 0; +#if !U_CHARSET_IS_UTF8 + /* This contains the resolved converter name. So no further alias lookup is needed again. */ static char gDefaultConverterNameBuffer[UCNV_MAX_CONVERTER_NAME_LENGTH + 1]; /* +1 for NULL */ static const char *gDefaultConverterName = NULL; @@ -173,6 +175,7 @@ static const UConverterSharedData *gDefaultAlgorithmicSharedData = NULL; /* Does gDefaultConverterName have a converter option and require extra parsing? */ static UBool gDefaultConverterContainsOption; +#endif /* !U_CHARSET_IS_UTF8 */ static const char DATA_TYPE[] = "cnv"; @@ -201,10 +204,12 @@ static UBool U_CALLCONV ucnv_cleanup(void) { /* Isn't called from flushCache because other threads may have preexisting references to the table. */ ucnv_flushAvailableConverterCache(); +#if !U_CHARSET_IS_UTF8 gDefaultConverterName = NULL; gDefaultConverterNameBuffer[0] = 0; gDefaultConverterContainsOption = FALSE; gDefaultAlgorithmicSharedData = NULL; +#endif umtx_destroy(&cnvCacheMutex); /* Don't worry about destroying the mutex even */ /* if the hash table still exists. The mutex */ @@ -707,6 +712,9 @@ ucnv_loadSharedData(const char *converterName, UConverterLookupData *lookup, UEr /* In case "name" is NULL we want to open the default converter. */ if (converterName == NULL) { +#if U_CHARSET_IS_UTF8 + return (UConverterSharedData *)converterData[UCNV_UTF8]; +#else /* Call ucnv_getDefaultName first to query the name from the OS. */ lookup->realName = ucnv_getDefaultName(); if (lookup->realName == NULL) { @@ -717,6 +725,7 @@ ucnv_loadSharedData(const char *converterName, UConverterLookupData *lookup, UEr checkForAlgorithmic = FALSE; mayContainOption = gDefaultConverterContainsOption; /* the default converter name is already canonical */ +#endif } else if((converterName[0] == 'U' ? ( converterName[1] == 'T' && converterName[2] == 'F') : @@ -1113,6 +1122,7 @@ ucnv_bld_getAvailableConverter(uint16_t n, UErrorCode *pErrorCode) { /* default converter name --------------------------------------------------- */ +#if !U_CHARSET_IS_UTF8 /* Copy the canonical converter name. ucnv_getDefaultName must be thread safe, which can call this function. @@ -1147,7 +1157,7 @@ internalSetName(const char *name, UErrorCode *status) { gDefaultConverterContainsOption = containsOption; uprv_memcpy(gDefaultConverterNameBuffer, name, length); gDefaultConverterNameBuffer[length]=0; - + /* gDefaultConverterName MUST be the last global var set by this function. */ /* It is the variable checked in ucnv_getDefaultName() to see if initialization is required. */ gDefaultConverterName = gDefaultConverterNameBuffer; @@ -1156,6 +1166,7 @@ internalSetName(const char *name, UErrorCode *status) { umtx_unlock(&cnvCacheMutex); } +#endif /* * In order to be really thread-safe, the get function would have to take @@ -1167,6 +1178,9 @@ internalSetName(const char *name, UErrorCode *status) { U_CAPI const char* U_EXPORT2 ucnv_getDefaultName() { +#if U_CHARSET_IS_UTF8 + return "UTF-8"; +#else /* local variable to be thread-safe */ const char *name; @@ -1211,6 +1225,7 @@ ucnv_getDefaultName() { } return name; +#endif } /* @@ -1219,6 +1234,7 @@ See internalSetName or the API reference for details. */ U_CAPI void U_EXPORT2 ucnv_setDefaultName(const char *converterName) { +#if !U_CHARSET_IS_UTF8 if(converterName==NULL) { /* reset to the default codepage */ gDefaultConverterName=NULL; @@ -1241,6 +1257,7 @@ ucnv_setDefaultName(const char *converterName) { /* The close may make the current name go away. */ ucnv_close(cnv); } +#endif } /* data swapping ------------------------------------------------------------ */ diff --git a/icu4c/source/common/unicode/putil.h b/icu4c/source/common/unicode/putil.h index 48b7817254..090b226f1d 100644 --- a/icu4c/source/common/unicode/putil.h +++ b/icu4c/source/common/unicode/putil.h @@ -1,7 +1,7 @@ /* ****************************************************************************** * -* Copyright (C) 1997-2008, International Business Machines +* Copyright (C) 1997-2009, International Business Machines * Corporation and others. All Rights Reserved. * ****************************************************************************** @@ -29,7 +29,7 @@ * \file * \brief C API: Platform Utilities */ - + /** Define this to 1 if your platform supports IEEE 754 floating point, to 0 if it does not. */ #ifndef IEEE_754 @@ -93,6 +93,7 @@ U_STABLE const char* U_EXPORT2 u_getDataDirectory(void); */ U_STABLE void U_EXPORT2 u_setDataDirectory(const char *directory); +#if !U_CHARSET_IS_UTF8 /** * Please use ucnv_getDefaultName() instead. * Return the default codepage for this platform and locale. @@ -102,6 +103,7 @@ U_STABLE void U_EXPORT2 u_setDataDirectory(const char *directory); * @internal */ U_INTERNAL const char* U_EXPORT2 uprv_getDefaultCodepage(void); +#endif /** * Please use uloc_getDefault() instead. diff --git a/icu4c/source/common/unicode/ucnv.h b/icu4c/source/common/unicode/ucnv.h index e5dafdcf83..236867979f 100644 --- a/icu4c/source/common/unicode/ucnv.h +++ b/icu4c/source/common/unicode/ucnv.h @@ -1,6 +1,6 @@ /* ********************************************************************** -* Copyright (C) 1999-2008, International Business Machines +* Copyright (C) 1999-2009, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * ucnv.h: @@ -1790,6 +1790,9 @@ ucnv_getCanonicalName(const char *alias, const char *standard, UErrorCode *pErro * It is faster if you pass a NULL argument to ucnv_open the * default converter. * + * If U_CHARSET_IS_UTF8 is defined to 1 in utypes.h then this function + * always returns "UTF-8". + * * @return returns the current default converter name. * Storage owned by the library * @see ucnv_setDefaultName @@ -1805,6 +1808,10 @@ ucnv_getDefaultName(void); * should be called during application initialization. Most of the time, the * results from ucnv_getDefaultName() or ucnv_open with a NULL string argument * is sufficient for your application. + * + * If U_CHARSET_IS_UTF8 is defined to 1 in utypes.h then this function + * does nothing. + * * @param name the converter name to be the default (must be known by ICU). * @see ucnv_getDefaultName * @system diff --git a/icu4c/source/common/unicode/uconfig.h b/icu4c/source/common/unicode/uconfig.h index f28ee3d83c..6521eed472 100644 --- a/icu4c/source/common/unicode/uconfig.h +++ b/icu4c/source/common/unicode/uconfig.h @@ -1,6 +1,6 @@ /* ********************************************************************** -* Copyright (C) 2002-2008, International Business Machines +* Copyright (C) 2002-2009, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * file name: uconfig.h @@ -104,7 +104,11 @@ * ICU will not completely build with this switch turned on. * This switch turns off all converters. * + * You may want to use this together with U_CHARSET_IS_UTF8 defined to 1 + * in utypes.h if char* strings in your environment are always in UTF-8. + * * @stable ICU 3.2 + * @see U_CHARSET_IS_UTF8 */ #ifndef UCONFIG_NO_CONVERSION # define UCONFIG_NO_CONVERSION 0 diff --git a/icu4c/source/common/unicode/utypes.h b/icu4c/source/common/unicode/utypes.h index 688b50ba56..cbf663a114 100644 --- a/icu4c/source/common/unicode/utypes.h +++ b/icu4c/source/common/unicode/utypes.h @@ -1,6 +1,6 @@ /* ********************************************************************** -* Copyright (C) 1996-2008, International Business Machines +* Copyright (C) 1996-2009, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * @@ -139,6 +139,30 @@ # define U_CHARSET_FAMILY 0 #endif +/** + * \def U_CHARSET_IS_UTF8 + * + * Hardcode the default charset to UTF-8. + * + * If this is set to 1, then + * - ICU will assume that all non-invariant char*, StringPiece, std::string etc. + * contain UTF-8 text, regardless of what the system API uses + * - some ICU code will use fast functions like u_strFromUTF8() + * rather than the more general and more heavy-weight conversion API (ucnv.h) + * - ucnv_getDefaultName() always returns "UTF-8" + * - ucnv_setDefaultName() is disabled and will not change the default charset + * - static builds of ICU are smaller + * - more functionality is available with the UCONFIG_NO_CONVERSION build-time + * configuration option (see unicode/uconfig.h) + * - the UCONFIG_NO_CONVERSION build option in uconfig.h is more usable + * + * @draft ICU 4.2 + * @see UCONFIG_NO_CONVERSION + */ +#ifndef U_CHARSET_IS_UTF8 +# define U_CHARSET_IS_UTF8 0 +#endif + /*===========================================================================*/ /* ICUDATA naming scheme */ /*===========================================================================*/ diff --git a/icu4c/source/test/cintltst/ccapitst.c b/icu4c/source/test/cintltst/ccapitst.c index 18accc4533..2e2349698f 100644 --- a/icu4c/source/test/cintltst/ccapitst.c +++ b/icu4c/source/test/cintltst/ccapitst.c @@ -1,6 +1,6 @@ /******************************************************************** * COPYRIGHT: - * Copyright (c) 1997-2008, International Business Machines Corporation and + * Copyright (c) 1997-2009, International Business Machines Corporation and * others. All Rights Reserved. ********************************************************************/ /***************************************************************************** @@ -3335,11 +3335,11 @@ TestToUCountPending(){ #endif } -static void TestOneDefaultNameChange(const char *name) { +static void TestOneDefaultNameChange(const char *name, const char *expected) { UErrorCode status = U_ZERO_ERROR; UConverter *cnv; ucnv_setDefaultName(name); - if(strcmp(ucnv_getDefaultName(), name)==0) + if(strcmp(ucnv_getDefaultName(), expected)==0) log_verbose("setDefaultName of %s works.\n", name); else log_err("setDefaultName of %s failed\n", name); @@ -3348,7 +3348,7 @@ static void TestOneDefaultNameChange(const char *name) { log_err("opening the default converter of %s failed\n", name); return; } - if(strcmp(ucnv_getName(cnv, &status), name)==0) + if(strcmp(ucnv_getName(cnv, &status), expected)==0) log_verbose("ucnv_getName of %s works.\n", name); else log_err("ucnv_getName of %s failed\n", name); @@ -3363,12 +3363,18 @@ static void TestDefaultName(void) { log_verbose("getDefaultName returned %s\n", defaultName); /*change the default name by setting it */ - TestOneDefaultNameChange("UTF-8"); -#if !UCONFIG_NO_LEGACY_CONVERSION - TestOneDefaultNameChange("ISCII,version=1"); - TestOneDefaultNameChange("ISCII,version=2"); + TestOneDefaultNameChange("UTF-8", "UTF-8"); +#if U_CHARSET_IS_UTF8 + TestOneDefaultNameChange("ISCII,version=1", "UTF-8"); + TestOneDefaultNameChange("ISCII,version=2", "UTF-8"); + TestOneDefaultNameChange("ISO-8859-1", "UTF-8"); +#else +# if !UCONFIG_NO_LEGACY_CONVERSION + TestOneDefaultNameChange("ISCII,version=1", "ISCII,version=1"); + TestOneDefaultNameChange("ISCII,version=2", "ISCII,version=2"); +# endif + TestOneDefaultNameChange("ISO-8859-1", "ISO-8859-1"); #endif - TestOneDefaultNameChange("ISO-8859-1"); /*set the default name back*/ ucnv_setDefaultName(defaultName);