diff --git a/icu4c/source/common/unicode/uscript.h b/icu4c/source/common/unicode/uscript.h index 943e2ea326..6759b4f294 100644 --- a/icu4c/source/common/unicode/uscript.h +++ b/icu4c/source/common/unicode/uscript.h @@ -1,6 +1,6 @@ /* ********************************************************************** - * Copyright (C) 1997-2014, International Business Machines + * Copyright (C) 1997-2015, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * @@ -21,25 +21,31 @@ * \file * \brief C API: Unicode Script Information */ - + /** * Constants for ISO 15924 script codes. * - * Many of these script codes - those from Unicode's ScriptNames.txt - - * are character property values for Unicode's Script property. - * See UAX #24 Script Names (http://www.unicode.org/reports/tr24/). + * The current set of script code constants supports at least all scripts + * that are encoded in the version of Unicode which ICU currently supports. + * The names of the constants are usually derived from the + * Unicode script property value aliases. + * See UAX #24 Unicode Script Property (http://www.unicode.org/reports/tr24/) + * and http://www.unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt . * * Starting with ICU 3.6, constants for most ISO 15924 script codes - * are included (currently excluding private-use codes Qaaa..Qabx). - * For scripts for which there are codes in ISO 15924 but which are not - * used in the Unicode Character Database (UCD), there are no Unicode characters - * associated with those scripts. + * are included, for use with language tags, CLDR data, and similar. + * Some of those codes are not used in the Unicode Character Database (UCD). + * For example, there are no characters that have a UCD script property value of + * Hans or Hant. All Han ideographs have the Hani script property value in Unicode. * - * For example, there are no characters that have a UCD script code of - * Hans or Hant. All Han ideographs have the Hani script code. - * The Hans and Hant script codes are used with CLDR data. + * Private-use codes Qaaa..Qabx are not included. * - * ISO 15924 script codes are included for use with CLDR and similar. + * Starting with ICU 55, script codes are only added when their scripts + * have been or will certainly be encoded in Unicode, + * and have been assigned Unicode script property value aliases, + * to ensure that their script names are stable and match the names of the constants. + * Script codes like Latf and Aran that are not subject to separate encoding + * may be added at any time. * * @stable ICU 2.2 */ @@ -418,9 +424,16 @@ typedef enum UScriptCode { /** @stable ICU 54 */ USCRIPT_SIDDHAM = 166,/* Sidd */ - /* Private use codes from Qaaa - Qabx are not supported */ - - /** @stable ICU 2.2 */ + /** + * One higher than the last script code constant. + * + * There are constants for Unicode 7 script property values. + * There are constants for ISO 15924 script codes assigned on or before 2013-10-12. + * There are no constants for private use codes from Qaaa - Qabx + * except as used in the UCD. + * + * @stable ICU 2.2 + */ USCRIPT_CODE_LIMIT = 167 } UScriptCode; diff --git a/icu4c/source/data/unidata/changes.txt b/icu4c/source/data/unidata/changes.txt index 23f29bf2e3..37feb9a18e 100644 --- a/icu4c/source/data/unidata/changes.txt +++ b/icu4c/source/data/unidata/changes.txt @@ -1,4 +1,4 @@ -* Copyright (C) 2004-2014, International Business Machines +* Copyright (C) 2004-2015, International Business Machines * Corporation and others. All Rights Reserved. * * file name: changes.txt @@ -13,6 +13,38 @@ ---------------------------------------------------------------------------- *** +* New ISO 15924 script codes + +Starting with ICU 55, we do not add UScriptCode constants any more until their scripts +are encoded in Unicode, or can be assumed to be encoded in the next Unicode version. +Script enum constant names want to follow the Unicode script property value aliases, +which are assigned only when the scripts are encoded. +When we encode scripts early and guess wrong, then we have confusing enum constants +and have sometimes added aliases. + +Exception: Script codes like Latf and Aran that are not subject to separate encoding +can be added at any time. + +Script codes not yet in ICU: http://www.unicode.org/iso15924/codechanges.html + +Added 2014-11-15, see http://bugs.icu-project.org/trac/ticket/11561 +- Adlm 166 Adlam +- Aran 161 Arabic (Nastaliq variant) +- Kitl 505 Khitan large script +- Kits 288 Khitan small script +- Marc 332 Marchen +- Osge 219 Osage + +Aran can be added as USCRIPT_ARABIC_NASTALIQ at any time. + +Adlam, Marchen, and Osage are expected to go into Unicode 9; +we should assign Unicode script property value aliases for them +soon after Unicode 8 is released, and add them in ICU 56. + +Khitan scripts will be encoded later. + +---------------------------------------------------------------------------- *** + Unicode 8.0 update for ICU ?? * UCA issue from 7.0