From 8bc9f647b8423fb94724124103d6d851ac25d4f9 Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Sat, 19 Apr 2003 00:21:15 +0000 Subject: [PATCH] ICU-2427 final Unicode 4 update X-SVN-Rev: 11595 --- icu4c/source/data/unidata/Blocks.txt | 3 +++ icu4c/source/data/unidata/DerivedCoreProperties.txt | 6 +++--- icu4c/source/data/unidata/DerivedJoiningGroup.txt | 10 ++++++---- icu4c/source/data/unidata/DerivedJoiningType.txt | 12 +++++++----- .../data/unidata/DerivedNormalizationProps.txt | 9 +++++---- icu4c/source/data/unidata/DerivedNumericValues.txt | 6 +++--- icu4c/source/data/unidata/EastAsianWidth.txt | 1 + icu4c/source/data/unidata/PropList.txt | 6 +++--- icu4c/source/data/unidata/Scripts.txt | 9 +++++---- icu4c/source/data/unidata/SpecialCasing.txt | 4 +++- icu4c/source/test/cintltst/cucdtst.c | 3 ++- 11 files changed, 41 insertions(+), 28 deletions(-) diff --git a/icu4c/source/data/unidata/Blocks.txt b/icu4c/source/data/unidata/Blocks.txt index dbb16527da..6dc2bd2fe0 100644 --- a/icu4c/source/data/unidata/Blocks.txt +++ b/icu4c/source/data/unidata/Blocks.txt @@ -2,6 +2,9 @@ # Correlated with Unicode 4.0 # Note: The casing of block names is not normative. # For example, "Basic Latin" and "BASIC LATIN" are equivalent. +# +# Code points not explicitly listed in this file are given the value No_Block. +# # Start Code..End Code; Block Name 0000..007F; Basic Latin 0080..00FF; Latin-1 Supplement diff --git a/icu4c/source/data/unidata/DerivedCoreProperties.txt b/icu4c/source/data/unidata/DerivedCoreProperties.txt index e70e77c2cc..d9fb00c9aa 100644 --- a/icu4c/source/data/unidata/DerivedCoreProperties.txt +++ b/icu4c/source/data/unidata/DerivedCoreProperties.txt @@ -1,11 +1,11 @@ # DerivedCoreProperties-4.0.0.txt -# Date: 2003-03-12, 23:52:06 GMT [MD] +# Date: 2003-03-20, 20:07:28 GMT [MD] # # Unicode Character Database: Derived Property Data # Generated algorithmically from the Unicode Character Database # For documentation, see UCD.html -# Note: Unassigned and Noncharacter codepoints are omitted, -# except when listing Noncharacter or Cn. +# Note: Unassigned and Noncharacter codepoints may be omitted +# if they have default property values. # ================================================ # ================================================ diff --git a/icu4c/source/data/unidata/DerivedJoiningGroup.txt b/icu4c/source/data/unidata/DerivedJoiningGroup.txt index 0899e4522d..85326cc0a0 100644 --- a/icu4c/source/data/unidata/DerivedJoiningGroup.txt +++ b/icu4c/source/data/unidata/DerivedJoiningGroup.txt @@ -1,16 +1,18 @@ # DerivedJoiningGroup-4.0.0.txt -# Date: 2003-02-20,17:13:55 GMT [MD] +# Date: 2003-04-05, 02:41:45 GMT [MD] # # Unicode Character Database: Derived Property Data # Generated algorithmically from the Unicode Character Database # For documentation, see UCD.html -# Note: Unassigned and Noncharacter codepoints are omitted, -# except when listing Noncharacter or Cn. +# Note: Unassigned and Noncharacter codepoints may be omitted +# if they have default property values. # ================================================ # ================================================ -# Joining Group (listing ArabicShaping.txt, field 2) +# Joining Group (listing ArabicShaping.txt, field 3) +# All code points not explicitly listed in this file have the property +# value: NO_JOINING_GROUP. # ================================================ 0639..063A ; AIN # Lo [2] ARABIC LETTER AIN..ARABIC LETTER GHAIN diff --git a/icu4c/source/data/unidata/DerivedJoiningType.txt b/icu4c/source/data/unidata/DerivedJoiningType.txt index e7970c97d7..34ebb52bd0 100644 --- a/icu4c/source/data/unidata/DerivedJoiningType.txt +++ b/icu4c/source/data/unidata/DerivedJoiningType.txt @@ -1,18 +1,19 @@ # DerivedJoiningType-4.0.0.txt -# Date: 2003-03-12, 23:52:29 GMT [MD] +# Date: 2003-04-05, 02:42:00 GMT [MD] # # Unicode Character Database: Derived Property Data # Generated algorithmically from the Unicode Character Database # For documentation, see UCD.html -# Note: Unassigned and Noncharacter codepoints are omitted, -# except when listing Noncharacter or Cn. +# Note: Unassigned and Noncharacter codepoints may be omitted +# if they have default property values. # ================================================ # ================================================ -# Joining Type (listing ArabicShaping.txt, field 1). +# Joining Type (listing ArabicShaping.txt, field 2). # Type T is derived, as described in ArabicShaping.txt -# All code points not listed here have the type U +# All code points not explicitly listed in this file have the property +# value: U. # ================================================ 0640 ; C # Lm ARABIC TATWEEL @@ -77,6 +78,7 @@ # ================================================ +# No values for L # Total code points: 0 diff --git a/icu4c/source/data/unidata/DerivedNormalizationProps.txt b/icu4c/source/data/unidata/DerivedNormalizationProps.txt index 27fc7df696..bbd05a5304 100644 --- a/icu4c/source/data/unidata/DerivedNormalizationProps.txt +++ b/icu4c/source/data/unidata/DerivedNormalizationProps.txt @@ -1,11 +1,11 @@ -# DerivedNormalizationProperties-4.0.0.txt -# Date: 2003-02-26,02:57:10 GMT [MD] +# DerivedNormalizationProps-4.0.0.txt +# Date: 2003-03-20, 20:07:31 GMT [MD] # # Unicode Character Database: Derived Property Data # Generated algorithmically from the Unicode Character Database # For documentation, see UCD.html -# Note: Unassigned and Noncharacter codepoints are omitted, -# except when listing Noncharacter or Cn. +# Note: Unassigned and Noncharacter codepoints may be omitted +# if they have default property values. # ================================================ # ================================================ @@ -14,6 +14,7 @@ # Generated from computing: b = NFKC(Fold(a)); c = NFKC(Fold(b)); # Then if (c != b) add the mapping from a to c to the set of # mappings that constitute the FC_NFKC_Closure list +# Uses the full case folding from CaseFolding.txt, without the T option. 037A ; FNC; 0020 03B9 03D2 ; FNC; 03C5 diff --git a/icu4c/source/data/unidata/DerivedNumericValues.txt b/icu4c/source/data/unidata/DerivedNumericValues.txt index 433997f4b3..7aac3a55c2 100644 --- a/icu4c/source/data/unidata/DerivedNumericValues.txt +++ b/icu4c/source/data/unidata/DerivedNumericValues.txt @@ -1,11 +1,11 @@ # DerivedNumericValues-4.0.0.txt -# Date: 2003-03-12, 23:52:49 GMT [MD] +# Date: 2003-03-19, 01:18:09 GMT [MD] # # Unicode Character Database: Derived Property Data # Generated algorithmically from the Unicode Character Database # For documentation, see UCD.html -# Note: Unassigned and Noncharacter codepoints are omitted, -# except when listing Noncharacter or Cn. +# Note: Unassigned and Noncharacter codepoints may be omitted +# if they have default property values. # ================================================ diff --git a/icu4c/source/data/unidata/EastAsianWidth.txt b/icu4c/source/data/unidata/EastAsianWidth.txt index 8003c13024..cc5c5dd07f 100644 --- a/icu4c/source/data/unidata/EastAsianWidth.txt +++ b/icu4c/source/data/unidata/EastAsianWidth.txt @@ -736,6 +736,7 @@ FFFD;A 2A6D7..2F7FF;W 2F800..2FA1D;W 2FA1E..2FFFD;W +30000..3FFFD;W E0001;N E0020..E007F;N E0100..E01EF;A diff --git a/icu4c/source/data/unidata/PropList.txt b/icu4c/source/data/unidata/PropList.txt index d4b4dcea0b..90176ad019 100644 --- a/icu4c/source/data/unidata/PropList.txt +++ b/icu4c/source/data/unidata/PropList.txt @@ -1,10 +1,10 @@ # PropList-4.0.0.txt -# Date: 2003-03-14, 23:37:56 GMT [MD] +# Date: 2003-03-20, 20:07:40 GMT [MD] # # Unicode Character Database: Extended Properties # For documentation, see UCD.html -# Note: Unassigned and Noncharacter codepoints are omitted, -# except when listing Noncharacter or Cn. +# Note: Unassigned and Noncharacter codepoints may be omitted +# if they have default property values. # ================================================ # ================================================ diff --git a/icu4c/source/data/unidata/Scripts.txt b/icu4c/source/data/unidata/Scripts.txt index 4951e28fb3..696720aaa0 100644 --- a/icu4c/source/data/unidata/Scripts.txt +++ b/icu4c/source/data/unidata/Scripts.txt @@ -1,15 +1,16 @@ # Scripts-4.0.0.txt -# Date: 2003-03-14, 20:22:09 GMT [MD] +# Date: 2003-03-20, 20:07:48 GMT [MD] # # For documentation, see UCD.html -# Note: Unassigned and Noncharacter codepoints are omitted, -# except when listing Noncharacter or Cn. +# Note: Unassigned and Noncharacter codepoints may be omitted +# if they have default property values. # ================================================ # ================================================ # Script -# The value for all code points not explicitly listed in this file is COMMON. +# All code points not explicitly listed in this file have the property +# value: COMMON. # ================================================ 0041..005A ; LATIN # L& [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z diff --git a/icu4c/source/data/unidata/SpecialCasing.txt b/icu4c/source/data/unidata/SpecialCasing.txt index 259e8147ed..34d1c61de3 100644 --- a/icu4c/source/data/unidata/SpecialCasing.txt +++ b/icu4c/source/data/unidata/SpecialCasing.txt @@ -1,5 +1,5 @@ # SpecialCasing-4.0.0.txt -# Date: 2003-02-25,23:12:50 GMT [MD] +# Date: 2003-03-14, 20:22:04 GMT [MD] # # Special Casing Properties # @@ -9,6 +9,8 @@ # characters where they are 1-1, and does not have locale-specific mappings.) # For more information, see the discussion of Case Mappings in the Unicode Standard. # +# All code points not listed in this file that do not have a simple case mappings +# in UnicodeData.txt map to themselves. # ================================================================================ # Format # ================================================================================ diff --git a/icu4c/source/test/cintltst/cucdtst.c b/icu4c/source/test/cintltst/cucdtst.c index b594ed448a..1c19186e3f 100644 --- a/icu4c/source/test/cintltst/cucdtst.c +++ b/icu4c/source/test/cintltst/cucdtst.c @@ -2269,7 +2269,8 @@ TestAdditionalProperties() { { 0x1D198, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL }, { 0x20000, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE }, { 0x2F8C7, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE }, - { 0x3a5bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL }, + { 0x3a5bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE }, /* plane 3 got default W values in Unicode 4 */ + { 0x5a5bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL }, { 0xFEEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS }, { 0x10EEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },