ICU-11574 near-final Unicode 8 data with new block API constants, code adjustments & fixes; svn merge --reintegrate branches/markus/uni80; plus Java port of C++ r37450 spoof data d6 update

X-SVN-Rev: 37474
This commit is contained in:
Markus Scherer 2015-05-28 23:17:23 +00:00
parent fc290db4c1
commit 88e2877fd6
25 changed files with 27388 additions and 28261 deletions

View File

@ -1,6 +1,6 @@
/*
*******************************************************************************
* Copyright (C) 2012-2014, International Business Machines
* Copyright (C) 2012-2015, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* CollationFCD.java, ported from collationfcd.h/.cpp
@ -200,9 +200,9 @@ public final class CollationFCD {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0x36,0x33,0,0,0x37,0,0,0,0,0,0,0,0,
0x20,0,0,0,0,0,0x29,0x38,0,0x39,0x3a,0,0,0x3a,0x3b,0,
0,0,0,0,0,0x3c,0x3d,0x3e,0,0,0,0,0,0,0,0x17,
0,0,0,0x36,0x37,0,0,0x38,0,0,0,0,0,0,0,0,
0x20,0,0,0,0,0,0x29,0x39,0,0x3a,0x3b,0,0,0x3b,0x3c,0,
0,0,0,0,0,0x3d,0x3e,0x3f,0,0,0,0,0,0,0,0x17,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
@ -225,7 +225,7 @@ public final class CollationFCD {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0x3f,0x40,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0x40,0x41,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
@ -242,9 +242,9 @@ public final class CollationFCD {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0x41,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0x42,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0x42,0,0,0,0,0,0,0,0,0,0,0,0,0,0
0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0
};
private static final byte[] tcccIndex={
@ -331,9 +331,9 @@ public final class CollationFCD {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0x66,0x60,0,0,0x67,0,0,0,0,0,0,0,0,
0x3a,0,0,0,0,0,0x43,0x68,0,0x69,0x6a,0,0,0x6a,0x6b,0,
0,0,0,0,0,0x6c,0x6d,0x6e,0,0,0,0,0,0,0,0x30,
0,0,0,0x66,0x67,0,0,0x68,0,0,0,0,0,0,0,0,
0x3a,0,0,0,0,0,0x43,0x69,0,0x6a,0x6b,0,0,0x6b,0x6c,0,
0,0,0,0,0,0x6d,0x6e,0x6f,0,0,0,0,0,0,0,0x30,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
@ -356,7 +356,7 @@ public final class CollationFCD {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0x6f,0x70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0x70,0x71,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
@ -373,27 +373,27 @@ public final class CollationFCD {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0x3c,0x71,0x72,0,0,0,0,0,
0,0,0,0,0,0,0,0,0x3c,0x72,0x73,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0x73,0,0,0,0,0,0,0,0,0,0,0,0,0,0
0,0xe,0,0,0,0,0,0,0,0,0,0,0,0,0,0
};
private static final int[] lcccBits={
0,0xffffffff,0xffff7fff,0xffff,0xf8,0xfffe0000,0xbfffffff,0xb6,0x7ff0000,0xfffff800,0x10000,0x9fc00000,0x3d9f,0x20000,0xffff0000,0x7ff,
0xff800,0xfbc00000,0x3eef,0xe000000,0xfffffff0,0x10000000,0x1e2000,0x2000,0x602000,0x400,0x7000000,0xf00,0x3000000,0x2a00000,0x3c3e0000,0xdf,
0xff800,0xfbc00000,0x3eef,0xe000000,0xfffffff8,0x10000000,0x1e2000,0x2000,0x602000,0x400,0x7000000,0xf00,0x3000000,0x2a00000,0x3c3e0000,0xdf,
0x40,0x6800000,0xe0000000,0x100000,0x20040000,0x200,0x1800000,0x9fe00001,0x3fff0000,0x10,0xc00,0xc0040,0x800000,0xfff70000,0x31021fd,0xf03fffff,
0x1fff0000,0x1ffe2,0x38000,0x80000000,0xfc00,0x6000000,0x3ff08000,0x30000,0x3ffff,0x3800,0x80000,1,0xc19d0000,2,0x400000,0x4000035,
0x4108000,0x40000000,0x3fff
0x1fff0000,0x1ffe2,0x38000,0x80000000,0xfc00,0x6000000,0x3ff08000,0xc0000000,0x30000,0x3ffff,0x3800,0x80000,1,0xc19d0000,2,0x400000,
0x4000035,0x4108000,0x40000000
};
private static final int[] tcccBits={
0,0xffffffff,0x3e7effbf,0xbe7effbf,0xfffcffff,0x7ef1ff3f,0xfff3f1f8,0x7fffff3f,0x18003,0xdfffe000,0xff31ffcf,0xcfffffff,0xfffc0,0xffff7fff,0xffff,0x1d760,
0x1fc00,0x187c00,0x200708b,0x2000000,0x708b0000,0xc00000,0xf8,0xfccf0006,0x33ffcfc,0xfffe0000,0xbfffffff,0xb6,0x7ff0000,0x7c,0xfffff800,0x10000,
0x9fc80005,0x3d9f,0x20000,0xffff0000,0x7ff,0xff800,0xfbc00000,0x3eef,0xe000000,0xfffffff0,0x10120200,0xff1e2000,0x10000000,0xb0002000,0x10480000,0x4e002000,
0x9fc80005,0x3d9f,0x20000,0xffff0000,0x7ff,0xff800,0xfbc00000,0x3eef,0xe000000,0xfffffff8,0x10120200,0xff1e2000,0x10000000,0xb0002000,0x10480000,0x4e002000,
0x2000,0x30002000,0x602100,0x24000400,0x7000000,0xf00,0x3000000,0x2a00000,0x3d7e0000,0xdf,0x40,0x6800000,0xe0000000,0x100000,0x20040000,0x200,
0x1800000,0x9fe00001,0x3fff0000,0x10,0xc00,0xc0040,0x800000,0xfff70000,0x31021fd,0xf03fffff,0xbffffff,0x3ffffff,0x3f3fffff,0xaaff3f3f,0x3fffffff,0x1fdfffff,
0xefcfffde,0x1fdc7fff,0x1fff0000,0x1ffe2,0x800,0xc000000,0x4000,0xe000,0x1210,0x50,0x292,0x333e005,0x333,0xf000,0x3c0f,0x38000,
0x80000000,0xfc00,0x55555000,0x36db02a5,0x46100000,0x47900000,0x3ff08000,0x30000,0x3ffff,0x3800,0x80000,1,0xc19d0000,2,0x400000,0x4000035,
0x4108000,0x5f7ffc00,0x7fdb,0x3fff
0x80000000,0xfc00,0x55555000,0x36db02a5,0x46100000,0x47900000,0x3ff08000,0xc0000000,0x30000,0x3ffff,0x3800,0x80000,1,0xc19d0000,2,0x400000,
0x4000035,0x4108000,0x5f7ffc00,0x7fdb
};
}

View File

@ -1,7 +1,7 @@
/*
*******************************************************************************
*
* Copyright (C) 1999-2014, International Business Machines
* Copyright (C) 1999-2015, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@ -367,33 +367,53 @@ public final class CollationWeights {
middle.count=(int)((middle.end-middle.start)>>(8*(4-middleLength)))+1;
} else {
/* no middle range, eliminate overlaps */
/* reduce or remove the lower ranges that go beyond upperLimit */
for(int length=4; length>middleLength; --length) {
if(lower[length] != null && upper[length] != null &&
lower[length].count>0 && upper[length].count>0) {
long start=upper[length].start;
long end=lower[length].end;
// Note: The lowerEnd and upperStart weights are versions of
// lowerLimit and upperLimit (which are lowerLimit<upperLimit),
// truncated (still less-or-equal)
// and then with their last bytes changed to the
// maxByte (for lowerEnd) or minByte (for upperStart).
final long lowerEnd=lower[length].end;
final long upperStart=upper[length].start;
boolean merged=false;
if(end>=start || incWeight(end, length)==start) {
/* lower and upper ranges collide or are directly adjacent: merge these two and remove all shorter ranges */
start=lower[length].start;
end=lower[length].end=upper[length].end;
/*
* merging directly adjacent ranges needs to subtract the 0/1 gaps in between;
* it may result in a range with count>countBytes
*/
if(lowerEnd>upperStart) {
// These two lower and upper ranges collide.
// Since lowerLimit<upperLimit and lowerEnd and upperStart
// are versions with only their last bytes modified
// (and following ones removed/reset to 0),
// lowerEnd>upperStart is only possible
// if the leading bytes are equal
// and lastByte(lowerEnd)>lastByte(upperStart).
assert(truncateWeight(lowerEnd, length-1)==
truncateWeight(upperStart, length-1));
// Intersect these two ranges.
lower[length].end=upper[length].end;
lower[length].count=
getWeightTrail(end, length)-getWeightTrail(start, length)+1+
countBytes(length)*(getWeightByte(end, length-1)-getWeightByte(start, length-1));
getWeightTrail(lower[length].end, length)-
getWeightTrail(lower[length].start, length)+1;
// count might be <=0 in which case there is no room,
// and the range-collecting code below will ignore this range.
merged=true;
} else if(lowerEnd==upperStart) {
// Not possible, unless minByte==maxByte which is not allowed.
assert(minBytes[length]<maxBytes[length]);
} else /* lowerEnd<upperStart */ {
if(incWeight(lowerEnd, length)==upperStart) {
// Merge adjacent ranges.
lower[length].end=upper[length].end;
lower[length].count+=upper[length].count; // might be >countBytes
merged=true;
}
}
if(merged) {
// Remove all shorter ranges.
// There was no room available for them between the ranges we just merged.
upper[length].count=0;
while(--length>middleLength) {
if(lower[length] != null) {
lower[length].count = 0;
}
if(upper[length] != null) {
upper[length].count = 0;
}
lower[length]=upper[length]=null;
}
break;
}

View File

@ -1116,10 +1116,33 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
/** @stable ICU 54 */
public static final int WARANG_CITI_ID = 252; /*[118A0]*/
/* New blocks in Unicode 8.0 */
/** @stable ICU 56 */
public static final int AHOM_ID = 253; /*[11700]*/
/** @stable ICU 56 */
public static final int ANATOLIAN_HIEROGLYPHS_ID = 254; /*[14400]*/
/** @stable ICU 56 */
public static final int CHEROKEE_SUPPLEMENT_ID = 255; /*[AB70]*/
/** @stable ICU 56 */
public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_ID = 256; /*[2B820]*/
/** @stable ICU 56 */
public static final int EARLY_DYNASTIC_CUNEIFORM_ID = 257; /*[12480]*/
/** @stable ICU 56 */
public static final int HATRAN_ID = 258; /*[108E0]*/
/** @stable ICU 56 */
public static final int MULTANI_ID = 259; /*[11280]*/
/** @stable ICU 56 */
public static final int OLD_HUNGARIAN_ID = 260; /*[10C80]*/
/** @stable ICU 56 */
public static final int SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS_ID = 261; /*[1F900]*/
/** @stable ICU 56 */
public static final int SUTTON_SIGNWRITING_ID = 262; /*[1D800]*/
/**
* @stable ICU 2.4
*/
public static final int COUNT = 253;
public static final int COUNT = 263;
// blocks objects ---------------------------------------------------
@ -2315,6 +2338,38 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
/** @stable ICU 54 */
public static final UnicodeBlock WARANG_CITI = new UnicodeBlock("WARANG_CITI", WARANG_CITI_ID); /*[118A0]*/
/* New blocks in Unicode 8.0 */
/** @stable ICU 56 */
public static final UnicodeBlock AHOM = new UnicodeBlock("AHOM", AHOM_ID); /*[11700]*/
/** @stable ICU 56 */
public static final UnicodeBlock ANATOLIAN_HIEROGLYPHS =
new UnicodeBlock("ANATOLIAN_HIEROGLYPHS", ANATOLIAN_HIEROGLYPHS_ID); /*[14400]*/
/** @stable ICU 56 */
public static final UnicodeBlock CHEROKEE_SUPPLEMENT =
new UnicodeBlock("CHEROKEE_SUPPLEMENT", CHEROKEE_SUPPLEMENT_ID); /*[AB70]*/
/** @stable ICU 56 */
public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E =
new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E",
CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_ID); /*[2B820]*/
/** @stable ICU 56 */
public static final UnicodeBlock EARLY_DYNASTIC_CUNEIFORM =
new UnicodeBlock("EARLY_DYNASTIC_CUNEIFORM", EARLY_DYNASTIC_CUNEIFORM_ID); /*[12480]*/
/** @stable ICU 56 */
public static final UnicodeBlock HATRAN = new UnicodeBlock("HATRAN", HATRAN_ID); /*[108E0]*/
/** @stable ICU 56 */
public static final UnicodeBlock MULTANI = new UnicodeBlock("MULTANI", MULTANI_ID); /*[11280]*/
/** @stable ICU 56 */
public static final UnicodeBlock OLD_HUNGARIAN =
new UnicodeBlock("OLD_HUNGARIAN", OLD_HUNGARIAN_ID); /*[10C80]*/
/** @stable ICU 56 */
public static final UnicodeBlock SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS =
new UnicodeBlock("SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS",
SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS_ID); /*[1F900]*/
/** @stable ICU 56 */
public static final UnicodeBlock SUTTON_SIGNWRITING =
new UnicodeBlock("SUTTON_SIGNWRITING", SUTTON_SIGNWRITING_ID); /*[1D800]*/
/**
* @stable ICU 2.4
*/

View File

@ -647,7 +647,7 @@ public final class UScript {
*/
public static final int SAURASHTRA = 111;/* Saur */
/**
* ISO 15924 script code
* ISO 15924 script code for Sutton SignWriting
* @stable ICU 3.8
*/
public static final int SIGN_WRITING = 112;/* Sgnw */
@ -1249,12 +1249,12 @@ public final class UScript {
// tools/trunk/unicode/py/parsescriptmetadata.py
// or from icu/trunk/source/common/uscript_props.cpp
0x0040 | RECOMMENDED, // Zyyy
0x0308 | UNKNOWN, // Zinh
0x0308 | RECOMMENDED, // Zinh
0x0628 | RECOMMENDED | RTL, // Arab
0x0531 | RECOMMENDED | CASED, // Armn
0x0995 | RECOMMENDED, // Beng
0x3105 | RECOMMENDED | LB_LETTERS, // Bopo
0x13C4 | LIMITED_USE, // Cher
0x13C4 | LIMITED_USE | CASED, // Cher
0x03E2 | EXCLUSION | CASED, // Copt
0x042F | RECOMMENDED | CASED, // Cyrl
0x10414 | EXCLUSION | CASED, // Dsrt
@ -1278,7 +1278,7 @@ public final class UScript {
0x1826 | ASPIRATIONAL, // Mong
0x1000 | RECOMMENDED | LB_LETTERS, // Mymr
0x168F | EXCLUSION, // Ogam
0x10300 | EXCLUSION, // Ital
0x10308 | EXCLUSION, // Ital
0x0B15 | RECOMMENDED, // Orya
0x16A0 | EXCLUSION, // Runr
0x0D85 | RECOMMENDED, // Sinh
@ -1294,7 +1294,7 @@ public final class UScript {
0x1723 | EXCLUSION, // Hano
0x1743 | EXCLUSION, // Buhd
0x1763 | EXCLUSION, // Tagb
0x2800 | UNKNOWN, // Brai
0x280E | UNKNOWN, // Brai
0x10800 | EXCLUSION | RTL, // Cprt
0x1900 | LIMITED_USE, // Limb
0x10000 | EXCLUSION, // Linb
@ -1308,7 +1308,7 @@ public final class UScript {
0x10A00 | EXCLUSION | RTL, // Khar
0xA800 | LIMITED_USE, // Sylo
0x1980 | LIMITED_USE | LB_LETTERS, // Talu
0x2D30 | ASPIRATIONAL, // Tfng
0x2D5E | ASPIRATIONAL, // Tfng
0x103A0 | EXCLUSION, // Xpeo
0x1B05 | LIMITED_USE, // Bali
0x1BC0 | LIMITED_USE, // Batk
@ -1324,7 +1324,7 @@ public final class UScript {
0x5B57 | RECOMMENDED | LB_LETTERS, // Hans
0x5B57 | RECOMMENDED | LB_LETTERS, // Hant
0x16B1C | EXCLUSION, // Hmng
0,
0x10CA1 | EXCLUSION | RTL | CASED, // Hung
0,
0xA984 | LIMITED_USE, // Java
0xA90A | LIMITED_USE, // Kali
@ -1335,7 +1335,7 @@ public final class UScript {
0x0840 | LIMITED_USE | RTL, // Mand
0,
0x10980 | EXCLUSION | RTL, // Mero
0x07CA | LIMITED_USE | RTL, // Nkoo
0x07D8 | LIMITED_USE | RTL, // Nkoo
0x10C00 | EXCLUSION | RTL, // Orkh
0x1036B | EXCLUSION, // Perm
0xA840 | EXCLUSION, // Phag
@ -1352,7 +1352,7 @@ public final class UScript {
0x12000 | EXCLUSION, // Xsux
0,
0xFDD0 | UNKNOWN, // Zzzz
0x102A0 | EXCLUSION, // Cari
0x102B7 | EXCLUSION, // Cari
0x304B | RECOMMENDED | LB_LETTERS, // Jpan
0x1A20 | LIMITED_USE | LB_LETTERS, // Lana
0x10280 | EXCLUSION, // Lyci
@ -1360,7 +1360,7 @@ public final class UScript {
0x1C5A | LIMITED_USE, // Olck
0xA930 | EXCLUSION, // Rjng
0xA882 | LIMITED_USE, // Saur
0,
0x1D850 | EXCLUSION, // Sgnw
0x1B83 | LIMITED_USE, // Sund
0,
0xABC0 | LIMITED_USE, // Mtei
@ -1369,7 +1369,7 @@ public final class UScript {
0x11103 | LIMITED_USE, // Cakm
0xAC00 | RECOMMENDED, // Kore
0x11083 | EXCLUSION, // Kthi
0x10AD8 | EXCLUSION | RTL, // Mani
0x10AC1 | EXCLUSION | RTL, // Mani
0x10B60 | EXCLUSION | RTL, // Phli
0x10B8F | EXCLUSION | RTL, // Phlp
0,
@ -1379,7 +1379,7 @@ public final class UScript {
0,
0,
0xA6A0 | LIMITED_USE, // Bamu
0xA4D0 | LIMITED_USE, // Lisu
0xA4E8 | LIMITED_USE, // Lisu
0,
0x10A60 | EXCLUSION | RTL, // Sarb
0x16AE6 | EXCLUSION, // Bass
@ -1404,15 +1404,15 @@ public final class UScript {
0x11680 | EXCLUSION, // Takr
0,
0,
0,
0x14400 | EXCLUSION, // Hluw
0x11208 | EXCLUSION, // Khoj
0x11484 | EXCLUSION, // Tirh
0x10537 | EXCLUSION, // Aghb
0x11152 | EXCLUSION, // Mahj
0,
0,
0x11717 | EXCLUSION | LB_LETTERS, // Ahom
0x108F4 | EXCLUSION | RTL, // Hatr
0x1160E | EXCLUSION, // Modi
0,
0x1128F | EXCLUSION, // Mult
0x11AC0 | EXCLUSION, // Pauc
0x1158E | EXCLUSION, // Sidd
// End copy-paste from parsescriptmetadata.py

View File

@ -214,68 +214,66 @@ public class SpoofChecker {
@Deprecated
public static final UnicodeSet RECOMMENDED = new UnicodeSet(
"[\\u0030-\\u0039\\u0041-\\u005A\\u005F\\u0061-\\u007A\\u00C0-\\u00D6\\u00D8-\\u00F6" +
"\\u00F8-\\u0131\\u0134-\\u013E\\u0141-\\u0148\\u014A-\\u017E\\u01A0-\\u01A1" +
"\\u01AF-\\u01B0\\u01CD-\\u01DC\\u01DE-\\u01E3\\u01E6-\\u01F0\\u01F4-\\u01F5" +
"\\u01F8-\\u021B\\u021E-\\u021F\\u0226-\\u0233\\u0259\\u02BB-\\u02BC\\u02EC" +
"\\u0300-\\u0304\\u0306-\\u030C\\u030F-\\u0311\\u0313-\\u0314\\u031B\\u0323-\\u0328" +
"\\u032D-\\u032E\\u0330-\\u0331\\u0335\\u0338-\\u0339\\u0342\\u0345\\u037B-\\u037D" +
"\\u0386\\u0388-\\u038A\\u038C\\u038E-\\u03A1\\u03A3-\\u03CE\\u03FC-\\u045F" +
"\\u048A-\\u0529\\u052E-\\u052F\\u0531-\\u0556\\u0559\\u0561-\\u0586\\u05B4" +
"\\u05D0-\\u05EA\\u05F0-\\u05F2\\u0620-\\u063F\\u0641-\\u0655\\u0660-\\u0669" +
"\\u0670-\\u0672\\u0674\\u0679-\\u068D\\u068F-\\u06D3\\u06D5\\u06E5-\\u06E6" +
"\\u06EE-\\u06FC\\u06FF\\u0750-\\u07B1\\u08A0-\\u08AC\\u08B2\\u0901-\\u094D" +
"\\u094F-\\u0950\\u0956-\\u0957\\u0960-\\u0963\\u0966-\\u096F\\u0971-\\u0977" +
"\\u0979-\\u097F\\u0981-\\u0983\\u0985-\\u098C\\u098F-\\u0990\\u0993-\\u09A8" +
"\\u09AA-\\u09B0\\u09B2\\u09B6-\\u09B9\\u09BC-\\u09C4\\u09C7-\\u09C8\\u09CB-\\u09CE" +
"\\u09D7\\u09E0-\\u09E3\\u09E6-\\u09F1\\u0A01-\\u0A03\\u0A05-\\u0A0A\\u0A0F-\\u0A10" +
"\\u0A13-\\u0A28\\u0A2A-\\u0A30\\u0A32\\u0A35\\u0A38-\\u0A39\\u0A3C\\u0A3E-\\u0A42" +
"\\u0A47-\\u0A48\\u0A4B-\\u0A4D\\u0A5C\\u0A66-\\u0A74\\u0A81-\\u0A83\\u0A85-\\u0A8D" +
"\\u0A8F-\\u0A91\\u0A93-\\u0AA8\\u0AAA-\\u0AB0\\u0AB2-\\u0AB3\\u0AB5-\\u0AB9" +
"\\u0ABC-\\u0AC5\\u0AC7-\\u0AC9\\u0ACB-\\u0ACD\\u0AD0\\u0AE0-\\u0AE3\\u0AE6-\\u0AEF" +
"\\u0B01-\\u0B03\\u0B05-\\u0B0C\\u0B0F-\\u0B10\\u0B13-\\u0B28\\u0B2A-\\u0B30" +
"\\u0B32-\\u0B33\\u0B35-\\u0B39\\u0B3C-\\u0B43\\u0B47-\\u0B48\\u0B4B-\\u0B4D" +
"\\u00F8-\\u0131\\u0134-\\u013E\\u0141-\\u0148\\u014A-\\u017E\\u018F\\u01A0-\\u01A1" +
"\\u01AF-\\u01B0\\u01CD-\\u01DC\\u01DE-\\u01E3\\u01E6-\\u01F0\\u01F4-\\u01F5\\u01F8-\\u021B" +
"\\u021E-\\u021F\\u0226-\\u0233\\u0259\\u02BB-\\u02BC\\u02EC\\u0300-\\u0304\\u0306-\\u030C" +
"\\u030F-\\u0311\\u0313-\\u0314\\u031B\\u0323-\\u0328\\u032D-\\u032E\\u0330-\\u0331" +
"\\u0335\\u0338-\\u0339\\u0342\\u0345\\u037B-\\u037D\\u0386\\u0388-\\u038A\\u038C" +
"\\u038E-\\u03A1\\u03A3-\\u03CE\\u03FC-\\u045F\\u048A-\\u0529\\u052E-\\u052F\\u0531-\\u0556" +
"\\u0559\\u0561-\\u0586\\u05B4\\u05D0-\\u05EA\\u05F0-\\u05F2\\u0620-\\u063F\\u0641-\\u0655" +
"\\u0660-\\u0669\\u0670-\\u0672\\u0674\\u0679-\\u068D\\u068F-\\u06D3\\u06D5\\u06E5-\\u06E6" +
"\\u06EE-\\u06FC\\u06FF\\u0750-\\u07B1\\u08A0-\\u08AC\\u08B2\\u0901-\\u094D\\u094F-\\u0950" +
"\\u0956-\\u0957\\u0960-\\u0963\\u0966-\\u096F\\u0971-\\u0977\\u0979-\\u097F\\u0981-\\u0983" +
"\\u0985-\\u098C\\u098F-\\u0990\\u0993-\\u09A8\\u09AA-\\u09B0\\u09B2\\u09B6-\\u09B9" +
"\\u09BC-\\u09C4\\u09C7-\\u09C8\\u09CB-\\u09CE\\u09D7\\u09E0-\\u09E3\\u09E6-\\u09F1" +
"\\u0A01-\\u0A03\\u0A05-\\u0A0A\\u0A0F-\\u0A10\\u0A13-\\u0A28\\u0A2A-\\u0A30\\u0A32" +
"\\u0A35\\u0A38-\\u0A39\\u0A3C\\u0A3E-\\u0A42\\u0A47-\\u0A48\\u0A4B-\\u0A4D\\u0A5C" +
"\\u0A66-\\u0A74\\u0A81-\\u0A83\\u0A85-\\u0A8D\\u0A8F-\\u0A91\\u0A93-\\u0AA8\\u0AAA-\\u0AB0" +
"\\u0AB2-\\u0AB3\\u0AB5-\\u0AB9\\u0ABC-\\u0AC5\\u0AC7-\\u0AC9\\u0ACB-\\u0ACD\\u0AD0" +
"\\u0AE0-\\u0AE3\\u0AE6-\\u0AEF\\u0B01-\\u0B03\\u0B05-\\u0B0C\\u0B0F-\\u0B10\\u0B13-\\u0B28" +
"\\u0B2A-\\u0B30\\u0B32-\\u0B33\\u0B35-\\u0B39\\u0B3C-\\u0B43\\u0B47-\\u0B48\\u0B4B-\\u0B4D" +
"\\u0B56-\\u0B57\\u0B5F-\\u0B61\\u0B66-\\u0B6F\\u0B71\\u0B82-\\u0B83\\u0B85-\\u0B8A" +
"\\u0B8E-\\u0B90\\u0B92-\\u0B95\\u0B99-\\u0B9A\\u0B9C\\u0B9E-\\u0B9F\\u0BA3-\\u0BA4" +
"\\u0BA8-\\u0BAA\\u0BAE-\\u0BB9\\u0BBE-\\u0BC2\\u0BC6-\\u0BC8\\u0BCA-\\u0BCD" +
"\\u0BD0\\u0BD7\\u0BE6-\\u0BEF\\u0C01-\\u0C03\\u0C05-\\u0C0C\\u0C0E-\\u0C10" +
"\\u0C12-\\u0C28\\u0C2A-\\u0C33\\u0C35-\\u0C39\\u0C3D-\\u0C44\\u0C46-\\u0C48" +
"\\u0C4A-\\u0C4D\\u0C55-\\u0C56\\u0C60-\\u0C61\\u0C66-\\u0C6F\\u0C82-\\u0C83" +
"\\u0C85-\\u0C8C\\u0C8E-\\u0C90\\u0C92-\\u0CA8\\u0CAA-\\u0CB3\\u0CB5-\\u0CB9" +
"\\u0CBC-\\u0CC4\\u0CC6-\\u0CC8\\u0CCA-\\u0CCD\\u0CD5-\\u0CD6\\u0CE0-\\u0CE3" +
"\\u0CE6-\\u0CEF\\u0CF1-\\u0CF2\\u0D02-\\u0D03\\u0D05-\\u0D0C\\u0D0E-\\u0D10" +
"\\u0BA8-\\u0BAA\\u0BAE-\\u0BB9\\u0BBE-\\u0BC2\\u0BC6-\\u0BC8\\u0BCA-\\u0BCD\\u0BD0" +
"\\u0BD7\\u0BE6-\\u0BEF\\u0C01-\\u0C03\\u0C05-\\u0C0C\\u0C0E-\\u0C10\\u0C12-\\u0C28" +
"\\u0C2A-\\u0C33\\u0C35-\\u0C39\\u0C3D-\\u0C44\\u0C46-\\u0C48\\u0C4A-\\u0C4D\\u0C55-\\u0C56" +
"\\u0C60-\\u0C61\\u0C66-\\u0C6F\\u0C82-\\u0C83\\u0C85-\\u0C8C\\u0C8E-\\u0C90\\u0C92-\\u0CA8" +
"\\u0CAA-\\u0CB3\\u0CB5-\\u0CB9\\u0CBC-\\u0CC4\\u0CC6-\\u0CC8\\u0CCA-\\u0CCD\\u0CD5-\\u0CD6" +
"\\u0CE0-\\u0CE3\\u0CE6-\\u0CEF\\u0CF1-\\u0CF2\\u0D02-\\u0D03\\u0D05-\\u0D0C\\u0D0E-\\u0D10" +
"\\u0D12-\\u0D3A\\u0D3D-\\u0D43\\u0D46-\\u0D48\\u0D4A-\\u0D4E\\u0D57\\u0D60-\\u0D61" +
"\\u0D66-\\u0D6F\\u0D7A-\\u0D7F\\u0D82-\\u0D83\\u0D85-\\u0D8E\\u0D91-\\u0D96" +
"\\u0D9A-\\u0DA5\\u0DA7-\\u0DB1\\u0DB3-\\u0DBB\\u0DBD\\u0DC0-\\u0DC6\\u0DCA" +
"\\u0DCF-\\u0DD4\\u0DD6\\u0DD8-\\u0DDE\\u0DF2\\u0E01-\\u0E32\\u0E34-\\u0E3A" +
"\\u0E40-\\u0E4E\\u0E50-\\u0E59\\u0E81-\\u0E82\\u0E84\\u0E87-\\u0E88\\u0E8A" +
"\\u0E8D\\u0E94-\\u0E97\\u0E99-\\u0E9F\\u0EA1-\\u0EA3\\u0EA5\\u0EA7\\u0EAA-\\u0EAB" +
"\\u0EAD-\\u0EB2\\u0EB4-\\u0EB9\\u0EBB-\\u0EBD\\u0EC0-\\u0EC4\\u0EC6\\u0EC8-\\u0ECD" +
"\\u0ED0-\\u0ED9\\u0EDE-\\u0EDF\\u0F00\\u0F20-\\u0F29\\u0F35\\u0F37\\u0F3E-\\u0F42" +
"\\u0F44-\\u0F47\\u0F49-\\u0F4C\\u0F4E-\\u0F51\\u0F53-\\u0F56\\u0F58-\\u0F5B" +
"\\u0F5D-\\u0F68\\u0F6A-\\u0F6C\\u0F71-\\u0F72\\u0F74\\u0F7A-\\u0F80\\u0F82-\\u0F84" +
"\\u0F86-\\u0F92\\u0F94-\\u0F97\\u0F99-\\u0F9C\\u0F9E-\\u0FA1\\u0FA3-\\u0FA6" +
"\\u0D66-\\u0D6F\\u0D7A-\\u0D7F\\u0D82-\\u0D83\\u0D85-\\u0D8E\\u0D91-\\u0D96\\u0D9A-\\u0DA5" +
"\\u0DA7-\\u0DB1\\u0DB3-\\u0DBB\\u0DBD\\u0DC0-\\u0DC6\\u0DCA\\u0DCF-\\u0DD4\\u0DD6" +
"\\u0DD8-\\u0DDE\\u0DF2\\u0E01-\\u0E32\\u0E34-\\u0E3A\\u0E40-\\u0E4E\\u0E50-\\u0E59" +
"\\u0E81-\\u0E82\\u0E84\\u0E87-\\u0E88\\u0E8A\\u0E8D\\u0E94-\\u0E97\\u0E99-\\u0E9F" +
"\\u0EA1-\\u0EA3\\u0EA5\\u0EA7\\u0EAA-\\u0EAB\\u0EAD-\\u0EB2\\u0EB4-\\u0EB9\\u0EBB-\\u0EBD" +
"\\u0EC0-\\u0EC4\\u0EC6\\u0EC8-\\u0ECD\\u0ED0-\\u0ED9\\u0EDE-\\u0EDF\\u0F00\\u0F20-\\u0F29" +
"\\u0F35\\u0F37\\u0F3E-\\u0F42\\u0F44-\\u0F47\\u0F49-\\u0F4C\\u0F4E-\\u0F51\\u0F53-\\u0F56" +
"\\u0F58-\\u0F5B\\u0F5D-\\u0F68\\u0F6A-\\u0F6C\\u0F71-\\u0F72\\u0F74\\u0F7A-\\u0F80" +
"\\u0F82-\\u0F84\\u0F86-\\u0F92\\u0F94-\\u0F97\\u0F99-\\u0F9C\\u0F9E-\\u0FA1\\u0FA3-\\u0FA6" +
"\\u0FA8-\\u0FAB\\u0FAD-\\u0FB8\\u0FBA-\\u0FBC\\u0FC6\\u1000-\\u1049\\u1050-\\u109D" +
"\\u10C7\\u10CD\\u10D0-\\u10F0\\u10F7-\\u10FA\\u10FD-\\u10FF\\u1200-\\u1248" +
"\\u124A-\\u124D\\u1250-\\u1256\\u1258\\u125A-\\u125D\\u1260-\\u1288\\u128A-\\u128D" +
"\\u1290-\\u12B0\\u12B2-\\u12B5\\u12B8-\\u12BE\\u12C0\\u12C2-\\u12C5\\u12C8-\\u12D6" +
"\\u12D8-\\u1310\\u1312-\\u1315\\u1318-\\u135A\\u135D-\\u135F\\u1380-\\u138F" +
"\\u1780-\\u17A2\\u17A5-\\u17A7\\u17A9-\\u17B3\\u17B6-\\u17CA\\u17D2\\u17D7" +
"\\u17DC\\u17E0-\\u17E9\\u1E00-\\u1E99\\u1EBF\\u1F00-\\u1F15\\u1F18-\\u1F1D" +
"\\u1F20-\\u1F45\\u1F48-\\u1F4D\\u1F50-\\u1F57\\u1F59\\u1F5B\\u1F5D\\u1F5F-\\u1F70" +
"\\u1F72\\u1F74\\u1F76\\u1F78\\u1F7A\\u1F7C\\u1F80-\\u1FB4\\u1FB6-\\u1FBA" +
"\\u1FBC\\u1FC2-\\u1FC4\\u1FC6-\\u1FC8\\u1FCA\\u1FCC\\u1FD0-\\u1FD2\\u1FD6-\\u1FDA" +
"\\u1FE0-\\u1FE2\\u1FE4-\\u1FEA\\u1FEC\\u1FF2-\\u1FF4\\u1FF6-\\u1FF8\\u1FFA" +
"\\u1FFC\\u2D27\\u2D2D\\u2D80-\\u2D96\\u2DA0-\\u2DA6\\u2DA8-\\u2DAE\\u2DB0-\\u2DB6" +
"\\u2DB8-\\u2DBE\\u2DC0-\\u2DC6\\u2DC8-\\u2DCE\\u2DD0-\\u2DD6\\u2DD8-\\u2DDE" +
"\\u3005-\\u3007\\u3041-\\u3096\\u3099-\\u309A\\u309D-\\u309E\\u30A1-\\u30FA" +
"\\u30FC-\\u30FE\\u3105-\\u312D\\u31A0-\\u31BA\\u3400-\\u4DB5\\u4E00-\\u9FCC" +
"\\uA660-\\uA661\\uA674-\\uA67B\\uA67F\\uA69F\\uA717-\\uA71F\\uA788\\uA78D-\\uA78E" +
"\\uA790-\\uA793\\uA7A0-\\uA7AA\\uA7FA\\uA9E7-\\uA9FE\\uAA60-\\uAA76\\uAA7A-\\uAA7F" +
"\\uAB01-\\uAB06\\uAB09-\\uAB0E\\uAB11-\\uAB16\\uAB20-\\uAB26\\uAB28-\\uAB2E" +
"\\uAC00-\\uD7A3\\uFA0E-\\uFA0F\\uFA11\\uFA13-\\uFA14\\uFA1F\\uFA21\\uFA23-\\uFA24" +
"\\uFA27-\\uFA29\\U0001B000-\\U0001B001\\U00020000-\\U0002A6D6\\U0002A700-\\U0002B734" +
"\\U0002B740-\\U0002B81D]").freeze();
// Note: data from http://unicode.org/Public/security/latest/xidmodifications.txt version 7.0.0
"\\u10C7\\u10CD\\u10D0-\\u10F0\\u10F7-\\u10FA\\u10FD-\\u10FF\\u1200-\\u1248\\u124A-\\u124D" +
"\\u1250-\\u1256\\u1258\\u125A-\\u125D\\u1260-\\u1288\\u128A-\\u128D\\u1290-\\u12B0" +
"\\u12B2-\\u12B5\\u12B8-\\u12BE\\u12C0\\u12C2-\\u12C5\\u12C8-\\u12D6\\u12D8-\\u1310" +
"\\u1312-\\u1315\\u1318-\\u135A\\u135D-\\u135F\\u1380-\\u138F\\u1780-\\u17A2\\u17A5-\\u17A7" +
"\\u17A9-\\u17B3\\u17B6-\\u17CA\\u17D2\\u17D7\\u17DC\\u17E0-\\u17E9\\u1E00-\\u1E99" +
"\\u1E9E\\u1EA0-\\u1EF9\\u1F00-\\u1F15\\u1F18-\\u1F1D\\u1F20-\\u1F45\\u1F48-\\u1F4D" +
"\\u1F50-\\u1F57\\u1F59\\u1F5B\\u1F5D\\u1F5F-\\u1F70\\u1F72\\u1F74\\u1F76\\u1F78" +
"\\u1F7A\\u1F7C\\u1F80-\\u1FB4\\u1FB6-\\u1FBA\\u1FBC\\u1FC2-\\u1FC4\\u1FC6-\\u1FC8" +
"\\u1FCA\\u1FCC\\u1FD0-\\u1FD2\\u1FD6-\\u1FDA\\u1FE0-\\u1FE2\\u1FE4-\\u1FEA\\u1FEC" +
"\\u1FF2-\\u1FF4\\u1FF6-\\u1FF8\\u1FFA\\u1FFC\\u2D27\\u2D2D\\u2D80-\\u2D96\\u2DA0-\\u2DA6" +
"\\u2DA8-\\u2DAE\\u2DB0-\\u2DB6\\u2DB8-\\u2DBE\\u2DC0-\\u2DC6\\u2DC8-\\u2DCE\\u2DD0-\\u2DD6" +
"\\u2DD8-\\u2DDE\\u3005-\\u3007\\u3041-\\u3096\\u3099-\\u309A\\u309D-\\u309E\\u30A1-\\u30FA" +
"\\u30FC-\\u30FE\\u3105-\\u312D\\u31A0-\\u31BA\\u3400-\\u4DB5\\u4E00-\\u9FD5\\uA660-\\uA661" +
"\\uA674-\\uA67B\\uA67F\\uA69F\\uA717-\\uA71F\\uA788\\uA78D-\\uA78E\\uA790-\\uA793" +
"\\uA7A0-\\uA7AA\\uA7FA\\uA9E7-\\uA9FE\\uAA60-\\uAA76\\uAA7A-\\uAA7F\\uAB01-\\uAB06" +
"\\uAB09-\\uAB0E\\uAB11-\\uAB16\\uAB20-\\uAB26\\uAB28-\\uAB2E\\uAC00-\\uD7A3\\uFA0E-\\uFA0F" +
"\\uFA11\\uFA13-\\uFA14\\uFA1F\\uFA21\\uFA23-\\uFA24\\uFA27-\\uFA29\\U00020000-\\U0002A6D6" +
"\\U0002A700-\\U0002B734\\U0002B740-\\U0002B81D\\U0002B820-\\U0002CEA1]"
).freeze();
// Note: data from http://unicode.org/Public/security/latest/xidmodifications.txt version 8.0.0
// There is no tooling to generate this from the .txt file,
// copy the set contents from ICU4C source/i18n/uspoof.cpp recommendedPat.
// (Add '+' for string concatenation.)
/**
* Constants for the kinds of checks that USpoofChecker can perform. These values are used both to select the set of
@ -1086,12 +1084,34 @@ public class SpoofChecker {
SPUString smapString = stringPool.addString(mapString.toString());
// Add the char . string mapping to the appropriate table.
Hashtable<Integer, SPUString> table = matcher.start(3) >= 0 ? fSLTable
: matcher.start(4) >= 0 ? fSATable : matcher.start(5) >= 0 ? fMLTable
: matcher.start(6) >= 0 ? fMATable : null;
assert (table != null);
table.put(keyChar, smapString);
fKeySet.add(keyChar);
Hashtable<Integer, SPUString> table =
matcher.start(3) >= 0 ? fSLTable :
matcher.start(4) >= 0 ? fSATable :
matcher.start(5) >= 0 ? fMLTable :
matcher.start(6) >= 0 ? fMATable :
null;
assert (table != null);
// For Unicode 8, the SL, SA and ML tables have been discontinued.
// All input data from confusables.txt is tagged MA.
// ICU spoof check functions should ignore the specified table and always
// use this MA Data.
// For now, implement by populating the MA data into all four tables, and
// keep the multiple table implementation in place, in case it comes back
// at some time in the future.
// There is no run time size penalty to keeping the four table implementation -
// the data is shared when it's the same betweeen tables.
if (table != fMATable) {
throw new ParseException("Confusables, line " + fLineNum + ": Table must be 'MA'.", 0);
}
// table.put(keyChar, smapString);
fSLTable.put(keyChar, smapString);
fSATable.put(keyChar, smapString);
fMLTable.put(keyChar, smapString);
fMATable.put(keyChar, smapString);
fKeySet.add(keyChar);
}
// Input data is now all parsed and collected.

View File

@ -155,6 +155,12 @@ public final class VersionInfo implements Comparable<VersionInfo>
*/
public static final VersionInfo UNICODE_7_0;
/**
* Unicode 8.0 version
* @stable ICU 56
*/
public static final VersionInfo UNICODE_8_0;
/**
* ICU4J current release version
* @stable ICU 2.8
@ -524,10 +530,11 @@ public final class VersionInfo implements Comparable<VersionInfo>
UNICODE_6_2 = getInstance(6, 2, 0, 0);
UNICODE_6_3 = getInstance(6, 3, 0, 0);
UNICODE_7_0 = getInstance(7, 0, 0, 0);
UNICODE_8_0 = getInstance(8, 0, 0, 0);
ICU_VERSION = getInstance(56, 0, 1, 0);
ICU_DATA_VERSION = getInstance(56, 0, 1, 0);
UNICODE_VERSION = UNICODE_7_0;
UNICODE_VERSION = UNICODE_8_0;
UCOL_RUNTIME_VERSION = getInstance(9);
UCOL_BUILDER_VERSION = getInstance(9);

View File

@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:d28829147bf60b0ba7aec19aaca36d2a2463d6a692657d66871dcb49e746feec
size 11844442
oid sha256:49e15d8a9e4ed0af649bc576eb05631b754373278842a03206f4700d2529dc5d
size 11888181

View File

@ -1,6 +1,6 @@
/*
*******************************************************************************
* Copyright (C) 2008-2014, International Business Machines Corporation and
* Copyright (C) 2008-2015, International Business Machines Corporation and
* others. All Rights Reserved.
*******************************************************************************
*/
@ -246,7 +246,11 @@ public class AlphabeticIndexTest extends TestFmwk {
collator.setStrength(Collator.IDENTICAL);
Collection<String> firsts = alphabeticIndex.getFirstCharactersInScripts();
// Verify that each script is represented exactly once.
UnicodeSet missingScripts = new UnicodeSet("[^[:sc=inherited:][:sc=unknown:][:sc=common:][:Script=Braille:]]");
// Exclude pseudo-scripts like Common (no letters).
// Exclude scripts like Braille and Sutton SignWriting
// because they only have symbols, not letters.
UnicodeSet missingScripts = new UnicodeSet(
"[^[:inherited:][:unknown:][:common:][:Braille:][:SignWriting:]]");
String last = "";
for (String index : firsts) {
if (collator.compare(last,index) >= 0) {
@ -1039,7 +1043,8 @@ public class AlphabeticIndexTest extends TestFmwk {
// bucketIndex = radical number, adjusted for simplified radicals in lower buckets.
int bucketIndex = index.getBucketIndex("\u4e5d");
assertEquals("getBucketIndex(U+4E5D)", 5, bucketIndex);
// radical 100, and there is a 90' since Unicode 8
bucketIndex = index.getBucketIndex("\u7527");
assertEquals("getBucketIndex(U+7527)", 100, bucketIndex);
assertEquals("getBucketIndex(U+7527)", 101, bucketIndex);
}
}

View File

@ -1,8 +1,8 @@
# BidiTest-7.0.0.txt
# Date: 2013-11-27, 09:54:24 GMT [MD]
# BidiTest-8.0.0.txt
# Date: 2014-12-16, 23:07:28 GMT [MD]
#
# Unicode Character Database
# Copyright (c) 1991-2013 Unicode, Inc.
# Copyright (c) 1991-2014 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see http://www.unicode.org/reports/tr44/
#

View File

@ -1,5 +1,5 @@
# CompositionExclusions-7.0.0.txt
# Date: 2014-01-24, 15:00:00 GMT [KW, LI]
# CompositionExclusions-8.0.0.txt
# Date: 2015-02-19, 00:30:00 GMT [KW, LI]
#
# This file lists the characters for the Composition Exclusion Table
# defined in UAX #15, Unicode Normalization Forms.
@ -7,7 +7,7 @@
# This file is a normative contributory data file in the
# Unicode Character Database.
#
# Copyright (c) 1991-2014 Unicode, Inc.
# Copyright (c) 1991-2015 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# For more information, see

View File

@ -1,10 +1,10 @@
# NormalizationCorrections-7.0.0.txt
# Date: 2013-12-17, 00:00:00 GMT [KW, LI]
# NormalizationCorrections-8.0.0.txt
# Date: 2015-03-07, 01:30:00 GMT [KW, LI]
#
# This file is a normative contributory data file in the
# Unicode Character Database.
#
# Copyright (c) 1991-2014 Unicode, Inc.
# Copyright (c) 1991-2015 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# The normalization stability policy of the Unicode Consortium

View File

@ -1,8 +1,8 @@
# NormalizationTest-7.0.0.txt
# Date: 2013-11-27, 09:54:41 GMT [MD]
# NormalizationTest-8.0.0.txt
# Date: 2015-02-13, 13:30:27 GMT [MD]
#
# Unicode Character Database
# Copyright (c) 1991-2013 Unicode, Inc.
# Copyright (c) 1991-2015 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see http://www.unicode.org/reports/tr44/
#
@ -17523,6 +17523,8 @@ FFEE;FFEE;FFEE;25CB;25CB;
0061 085A 059A 0316 302A 0062;0061 302A 085A 0316 059A 0062;0061 302A 085A 0316 059A 0062;0061 302A 085A 0316 059A 0062;0061 302A 085A 0316 059A 0062;
0061 059A 0316 302A 085B 0062;0061 302A 0316 085B 059A 0062;0061 302A 0316 085B 059A 0062;0061 302A 0316 085B 059A 0062;0061 302A 0316 085B 059A 0062;
0061 085B 059A 0316 302A 0062;0061 302A 085B 0316 059A 0062;0061 302A 085B 0316 059A 0062;0061 302A 085B 0316 059A 0062;0061 302A 085B 0316 059A 0062;
0061 059A 0316 302A 08E3 0062;0061 302A 0316 08E3 059A 0062;0061 302A 0316 08E3 059A 0062;0061 302A 0316 08E3 059A 0062;0061 302A 0316 08E3 059A 0062;
0061 08E3 059A 0316 302A 0062;0061 302A 08E3 0316 059A 0062;0061 302A 08E3 0316 059A 0062;0061 302A 08E3 0316 059A 0062;0061 302A 08E3 0316 059A 0062;
0061 0315 0300 05AE 08E4 0062;00E0 05AE 08E4 0315 0062;0061 05AE 0300 08E4 0315 0062;00E0 05AE 08E4 0315 0062;0061 05AE 0300 08E4 0315 0062;
0061 08E4 0315 0300 05AE 0062;0061 05AE 08E4 0300 0315 0062;0061 05AE 08E4 0300 0315 0062;0061 05AE 08E4 0300 0315 0062;0061 05AE 08E4 0300 0315 0062;
0061 0315 0300 05AE 08E5 0062;00E0 05AE 08E5 0315 0062;0061 05AE 0300 08E5 0315 0062;00E0 05AE 08E5 0315 0062;0061 05AE 0300 08E5 0315 0062;
@ -18135,6 +18137,8 @@ FFEE;FFEE;FFEE;25CB;25CB;
0061 A67C 0315 0300 05AE 0062;0061 05AE A67C 0300 0315 0062;0061 05AE A67C 0300 0315 0062;0061 05AE A67C 0300 0315 0062;0061 05AE A67C 0300 0315 0062;
0061 0315 0300 05AE A67D 0062;00E0 05AE A67D 0315 0062;0061 05AE 0300 A67D 0315 0062;00E0 05AE A67D 0315 0062;0061 05AE 0300 A67D 0315 0062;
0061 A67D 0315 0300 05AE 0062;0061 05AE A67D 0300 0315 0062;0061 05AE A67D 0300 0315 0062;0061 05AE A67D 0300 0315 0062;0061 05AE A67D 0300 0315 0062;
0061 0315 0300 05AE A69E 0062;00E0 05AE A69E 0315 0062;0061 05AE 0300 A69E 0315 0062;00E0 05AE A69E 0315 0062;0061 05AE 0300 A69E 0315 0062;
0061 A69E 0315 0300 05AE 0062;0061 05AE A69E 0300 0315 0062;0061 05AE A69E 0300 0315 0062;0061 05AE A69E 0300 0315 0062;0061 05AE A69E 0300 0315 0062;
0061 0315 0300 05AE A69F 0062;00E0 05AE A69F 0315 0062;0061 05AE 0300 A69F 0315 0062;00E0 05AE A69F 0315 0062;0061 05AE 0300 A69F 0315 0062;
0061 A69F 0315 0300 05AE 0062;0061 05AE A69F 0300 0315 0062;0061 05AE A69F 0300 0315 0062;0061 05AE A69F 0300 0315 0062;0061 05AE A69F 0300 0315 0062;
0061 0315 0300 05AE A6F0 0062;00E0 05AE A6F0 0315 0062;0061 05AE 0300 A6F0 0315 0062;00E0 05AE A6F0 0315 0062;0061 05AE 0300 A6F0 0315 0062;
@ -18245,6 +18249,10 @@ FFEE;FFEE;FFEE;25CB;25CB;
0061 FE2C 059A 0316 302A 0062;0061 302A FE2C 0316 059A 0062;0061 302A FE2C 0316 059A 0062;0061 302A FE2C 0316 059A 0062;0061 302A FE2C 0316 059A 0062;
0061 059A 0316 302A FE2D 0062;0061 302A 0316 FE2D 059A 0062;0061 302A 0316 FE2D 059A 0062;0061 302A 0316 FE2D 059A 0062;0061 302A 0316 FE2D 059A 0062;
0061 FE2D 059A 0316 302A 0062;0061 302A FE2D 0316 059A 0062;0061 302A FE2D 0316 059A 0062;0061 302A FE2D 0316 059A 0062;0061 302A FE2D 0316 059A 0062;
0061 0315 0300 05AE FE2E 0062;00E0 05AE FE2E 0315 0062;0061 05AE 0300 FE2E 0315 0062;00E0 05AE FE2E 0315 0062;0061 05AE 0300 FE2E 0315 0062;
0061 FE2E 0315 0300 05AE 0062;0061 05AE FE2E 0300 0315 0062;0061 05AE FE2E 0300 0315 0062;0061 05AE FE2E 0300 0315 0062;0061 05AE FE2E 0300 0315 0062;
0061 0315 0300 05AE FE2F 0062;00E0 05AE FE2F 0315 0062;0061 05AE 0300 FE2F 0315 0062;00E0 05AE FE2F 0315 0062;0061 05AE 0300 FE2F 0315 0062;
0061 FE2F 0315 0300 05AE 0062;0061 05AE FE2F 0300 0315 0062;0061 05AE FE2F 0300 0315 0062;0061 05AE FE2F 0300 0315 0062;0061 05AE FE2F 0300 0315 0062;
0061 059A 0316 302A 101FD 0062;0061 302A 0316 101FD 059A 0062;0061 302A 0316 101FD 059A 0062;0061 302A 0316 101FD 059A 0062;0061 302A 0316 101FD 059A 0062;
0061 101FD 059A 0316 302A 0062;0061 302A 101FD 0316 059A 0062;0061 302A 101FD 0316 059A 0062;0061 302A 101FD 0316 059A 0062;0061 302A 101FD 0316 059A 0062;
0061 059A 0316 302A 102E0 0062;0061 302A 0316 102E0 059A 0062;0061 302A 0316 102E0 059A 0062;0061 302A 0316 102E0 059A 0062;0061 302A 0316 102E0 059A 0062;
@ -18297,6 +18305,8 @@ FFEE;FFEE;FFEE;25CB;25CB;
0061 11173 3099 093C 0334 0062;0061 0334 11173 093C 3099 0062;0061 0334 11173 093C 3099 0062;0061 0334 11173 093C 3099 0062;0061 0334 11173 093C 3099 0062;
0061 05B0 094D 3099 111C0 0062;0061 3099 094D 111C0 05B0 0062;0061 3099 094D 111C0 05B0 0062;0061 3099 094D 111C0 05B0 0062;0061 3099 094D 111C0 05B0 0062;
0061 111C0 05B0 094D 3099 0062;0061 3099 111C0 094D 05B0 0062;0061 3099 111C0 094D 05B0 0062;0061 3099 111C0 094D 05B0 0062;0061 3099 111C0 094D 05B0 0062;
0061 3099 093C 0334 111CA 0062;0061 0334 093C 111CA 3099 0062;0061 0334 093C 111CA 3099 0062;0061 0334 093C 111CA 3099 0062;0061 0334 093C 111CA 3099 0062;
0061 111CA 3099 093C 0334 0062;0061 0334 111CA 093C 3099 0062;0061 0334 111CA 093C 3099 0062;0061 0334 111CA 093C 3099 0062;0061 0334 111CA 093C 3099 0062;
0061 05B0 094D 3099 11235 0062;0061 3099 094D 11235 05B0 0062;0061 3099 094D 11235 05B0 0062;0061 3099 094D 11235 05B0 0062;0061 3099 094D 11235 05B0 0062;
0061 11235 05B0 094D 3099 0062;0061 3099 11235 094D 05B0 0062;0061 3099 11235 094D 05B0 0062;0061 3099 11235 094D 05B0 0062;0061 3099 11235 094D 05B0 0062;
0061 3099 093C 0334 11236 0062;0061 0334 093C 11236 3099 0062;0061 0334 093C 11236 3099 0062;0061 0334 093C 11236 3099 0062;0061 0334 093C 11236 3099 0062;
@ -18347,6 +18357,8 @@ FFEE;FFEE;FFEE;25CB;25CB;
0061 116B6 05B0 094D 3099 0062;0061 3099 116B6 094D 05B0 0062;0061 3099 116B6 094D 05B0 0062;0061 3099 116B6 094D 05B0 0062;0061 3099 116B6 094D 05B0 0062;
0061 3099 093C 0334 116B7 0062;0061 0334 093C 116B7 3099 0062;0061 0334 093C 116B7 3099 0062;0061 0334 093C 116B7 3099 0062;0061 0334 093C 116B7 3099 0062;
0061 116B7 3099 093C 0334 0062;0061 0334 116B7 093C 3099 0062;0061 0334 116B7 093C 3099 0062;0061 0334 116B7 093C 3099 0062;0061 0334 116B7 093C 3099 0062;
0061 05B0 094D 3099 1172B 0062;0061 3099 094D 1172B 05B0 0062;0061 3099 094D 1172B 05B0 0062;0061 3099 094D 1172B 05B0 0062;0061 3099 094D 1172B 05B0 0062;
0061 1172B 05B0 094D 3099 0062;0061 3099 1172B 094D 05B0 0062;0061 3099 1172B 094D 05B0 0062;0061 3099 1172B 094D 05B0 0062;0061 3099 1172B 094D 05B0 0062;
0061 093C 0334 16AF0 0062;0061 0334 16AF0 093C 0062;0061 0334 16AF0 093C 0062;0061 0334 16AF0 093C 0062;0061 0334 16AF0 093C 0062;
0061 16AF0 093C 0334 0062;0061 16AF0 0334 093C 0062;0061 16AF0 0334 093C 0062;0061 16AF0 0334 093C 0062;0061 16AF0 0334 093C 0062;
0061 093C 0334 16AF1 0062;0061 0334 16AF1 093C 0062;0061 0334 16AF1 093C 0062;0061 0334 16AF1 093C 0062;0061 0334 16AF1 093C 0062;

View File

@ -1,5 +1,5 @@
# SpecialCasing-7.0.0.txt
# Date: 2014-03-18, 07:18:02 GMT [MD]
# SpecialCasing-8.0.0.txt
# Date: 2014-12-16, 23:08:04 GMT [MD]
#
# Unicode Character Database
# Copyright (c) 1991-2014 Unicode, Inc.

View File

@ -1,6 +1,6 @@
/**
*******************************************************************************
* Copyright (C) 1996-2014, International Business Machines Corporation and
* Copyright (C) 1996-2015, International Business Machines Corporation and
* others. All Rights Reserved.
*******************************************************************************
*/
@ -609,13 +609,13 @@ public class TestUScript extends TestFmwk {
String[] expectedLong = new String[]{
"Balinese", "Batak", "Blis", "Brahmi", "Cham", "Cirt", "Cyrs",
"Egyd", "Egyh", "Egyptian_Hieroglyphs",
"Geok", "Hans", "Hant", "Pahawh_Hmong", "Hung", "Inds",
"Geok", "Hans", "Hant", "Pahawh_Hmong", "Old_Hungarian", "Inds",
"Javanese", "Kayah_Li", "Latf", "Latg",
"Lepcha", "Linear_A", "Mandaic", "Maya", "Meroitic_Hieroglyphs",
"Nko", "Old_Turkic", "Old_Permic", "Phags_Pa", "Phoenician",
"Miao", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vai", "Visp", "Cuneiform",
"Zxxx", "Unknown",
"Carian", "Jpan", "Tai_Tham", "Lycian", "Lydian", "Ol_Chiki", "Rejang", "Saurashtra", "Sgnw", "Sundanese",
"Carian", "Jpan", "Tai_Tham", "Lycian", "Lydian", "Ol_Chiki", "Rejang", "Saurashtra", "SignWriting", "Sundanese",
"Moon", "Meetei_Mayek",
/* new in ICU 4.0 */
"Imperial_Aramaic", "Avestan", "Chakma", "Kore",
@ -631,11 +631,11 @@ public class TestUScript extends TestFmwk {
/* new in ICU 4.8 */
"Afak", "Jurc", "Mro", "Nshu", "Sharada", "Sora_Sompeng", "Takri", "Tang", "Wole",
/* new in ICU 49 */
"Hluw", "Khojki", "Tirhuta",
"Anatolian_Hieroglyphs", "Khojki", "Tirhuta",
/* new in ICU 52 */
"Caucasian_Albanian", "Mahajani",
/* new in ICU 54 */
"Ahom", "Hatr", "Modi", "Mult", "Pau_Cin_Hau", "Siddham"
"Ahom", "Hatran", "Modi", "Multani", "Pau_Cin_Hau", "Siddham"
};
String[] expectedShort = new String[]{
"Bali", "Batk", "Blis", "Brah", "Cham", "Cirt", "Cyrs", "Egyd", "Egyh", "Egyp",

View File

@ -45,9 +45,9 @@ public final class UCharacterTest extends TestFmwk
// private variables =============================================
/**
* ICU4J data version number
*/
private final VersionInfo VERSION_ = VersionInfo.getInstance("7.0.0.0");
* Expected Unicode version.
*/
private final VersionInfo VERSION_ = VersionInfo.getInstance(8);
// constructor ===================================================

View File

@ -1,6 +1,6 @@
/*
*******************************************************************************
* Copyright (C) 2003-2014 International Business Machines Corporation and
* Copyright (C) 2003-2015 International Business Machines Corporation and
* others. All Rights Reserved.
*******************************************************************************
*/
@ -949,6 +949,7 @@ public class RBBITestMonkey extends TestFmwk {
// LB 22
if (fAL.contains(prevChar) && fIN.contains(thisChar) ||
fEX.contains(prevChar) && fIN.contains(thisChar) ||
fHL.contains(prevChar) && fIN.contains(thisChar) ||
fID.contains(prevChar) && fIN.contains(thisChar) ||
fIN.contains(prevChar) && fIN.contains(thisChar) ||
@ -1407,8 +1408,9 @@ public class RBBITestMonkey extends TestFmwk {
continue;
}
// Rule (7). Upper ATerm x Uppper
if (fUpperSet.contains(c0) && fATermSet.contains(c1) && fUpperSet.contains(c2)) {
// Rule (7). (Upper | Lower) ATerm x Uppper
if ((fUpperSet.contains(c0) || fLowerSet.contains(c0)) &&
fATermSet.contains(c1) && fUpperSet.contains(c2)) {
continue;
}

View File

@ -132,7 +132,7 @@ public class SpoofCheckerTest extends TestFmwk {
String stubConfusables =
"# Stub confusables data\n" +
"05AD ; 0596 ; SL # ( ֭ → ֖ ) HEBREW ACCENT DEHI → HEBREW ACCENT TIPEHA #\n";
"05AD ; 0596 ; MA # ( ֭ → ֖ ) HEBREW ACCENT DEHI → HEBREW ACCENT TIPEHA #\n";
// Verify that re-using a builder doesn't alter SpoofCheckers that were
// previously created by that builder. (The builder could modify data

View File

@ -1,14 +1,14 @@
/*
*
* (C) Copyright IBM Corp. 1998-2014. All Rights Reserved.
* (C) Copyright IBM Corp. 1998-2015. All Rights Reserved.
*
* WARNING: THIS FILE IS MACHINE GENERATED. DO NOT HAND EDIT IT UNLESS
* YOU REALLY KNOW WHAT YOU'RE DOING.
*
* Generated on: 04/08/2014 04:03:38 PM PDT
* Generated on: 04/27/2015 10:39:37 AM PDT
*/
ICU Version=53.1
ICU Version=56.0
Total Script=166
Total Language=71
Scripts={

View File

@ -1,6 +1,6 @@
/*
*******************************************************************************
* Copyright (C) 1998-2014, International Business Machines Corporation and
* Copyright (C) 1998-2015, International Business Machines Corporation and
* others. All Rights Reserved.
*******************************************************************************
*/
@ -8,11 +8,14 @@ package com.ibm.icu.dev.tool.layout;
import java.util.Date;
import com.ibm.icu.lang.UScript;
import com.ibm.icu.text.MessageFormat;
import com.ibm.icu.util.VersionInfo;
public class ScriptIDModuleWriter extends ScriptModuleWriter
{
private static final boolean WITH_STABLE_TAGS = false;
public ScriptIDModuleWriter(ScriptData scriptData, LanguageData languageData)
{
super(scriptData, languageData);
@ -50,7 +53,7 @@ public class ScriptIDModuleWriter extends ScriptModuleWriter
if(!initialheader){
output.println(format.format(args));
initialheader = true;
}else{
}else if(WITH_STABLE_TAGS) {
// Unicode API constants are "born stable".
format = new MessageFormat(scriptPreambleStable);
output.println(format.format(args));
@ -67,16 +70,21 @@ public class ScriptIDModuleWriter extends ScriptModuleWriter
output.print(script);
output.println(",");
if(script == UScript.INHERITED) {
output.println(" qaaiScriptCode = zinhScriptCode, /* manually added alias, for API stability */");
}
script++;
}
arrayListIndex++;
}
if(newScripts){//Processing newly added scripts
format = new MessageFormat(scriptPreambleStable);
Object args[] = { VersionInfo.ICU_VERSION };
output.println(format.format(args));
if(WITH_STABLE_TAGS) {
format = new MessageFormat(scriptPreambleStable);
Object args[] = { VersionInfo.ICU_VERSION };
output.println(format.format(args));
}
for (int script = previousTotalScripts+1; script <= totalScript; script += 1) {
output.print(" ");
output.print(scriptData.getTagLabel(script));
@ -133,7 +141,7 @@ public class ScriptIDModuleWriter extends ScriptModuleWriter
if(!initialheader){
output.println(format.format(args));
initialheader = true;
}else{
} else if(WITH_STABLE_TAGS) {
// Unicode API constants are "born stable".
format = new MessageFormat(languagePreambleStable);
output.println(format.format(args));
@ -156,10 +164,12 @@ public class ScriptIDModuleWriter extends ScriptModuleWriter
}
if(newLanguage){
//Processing newly added languages
format = new MessageFormat(languagePreambleStable);
Object args[] = { VersionInfo.ICU_VERSION };
output.println(format.format(args));
if(WITH_STABLE_TAGS) {
format = new MessageFormat(languagePreambleStable);
Object args[] = { VersionInfo.ICU_VERSION };
output.println(format.format(args));
}
for (int langauge = previousTotalLanguages+1; langauge <= totalLanguage; langauge += 1) {
output.print(" ");
output.print(languageData.getTagLabel(langauge).toLowerCase());
@ -190,7 +200,7 @@ public class ScriptIDModuleWriter extends ScriptModuleWriter
" * Constants for Unicode script values, generated using\n" +
" * ICU4J''s <code>UScript</code> class.\n" +
" *\n" +
" * @stable ICU {0}\n" +
" * @deprecated ICU 54. See '{@link icu::LayoutEngine}'\n" + // was " * @stable ICU {0}\n"
" */\n" +
"\n" +
"enum ScriptCodes '{'";
@ -209,7 +219,7 @@ public class ScriptIDModuleWriter extends ScriptModuleWriter
" * this is just a list of languages which the LayoutEngine\n" +
" * supports.\n" +
" *\n" +
" * @stable ICU {0}\n" +
" * @deprecated ICU 54. See '{@link icu::LayoutEngine}'\n" + // was " * @stable ICU {0}\n"
" */\n" +
"\n" +
"enum LanguageCodes '{'";

View File

@ -1,11 +1,13 @@
/*
*******************************************************************************
* Copyright (C) 1998-2006, International Business Machines Corporation and *
* others. All Rights Reserved. *
* Copyright (C) 1998-2015, International Business Machines Corporation and
* others. All Rights Reserved.
*******************************************************************************
*/
package com.ibm.icu.dev.tool.layout;
import com.ibm.icu.lang.UScript;
public class ScriptTagModuleWriter extends ScriptModuleWriter
{
public ScriptTagModuleWriter(ScriptData theScriptData, LanguageData theLanguageData)
@ -29,6 +31,39 @@ public class ScriptTagModuleWriter extends ScriptModuleWriter
output.print("' (");
output.print(data.getName(value));
output.println(") */");
if(kind.equals("Script")) {
switch(value) {
case UScript.BENGALI:
output.println("const LETag bng2ScriptTag = 0x626E6732; /* 'bng2' (BENGALI v.2) (manually added) */");
break;
case UScript.DEVANAGARI:
output.println("const LETag dev2ScriptTag = 0x64657632; /* 'dev2' (DEVANAGARI v.2) (manually added) */");
break;
case UScript.GUJARATI:
output.println("const LETag gjr2ScriptTag = 0x676A7232; /* 'gjr2' (GUJARATI v.2) (manually added) */");
break;
case UScript.GURMUKHI:
output.println("const LETag gur2ScriptTag = 0x67757232; /* 'gur2' (GURMUKHI v.2) (manually added) */");
break;
case UScript.KANNADA:
output.println("const LETag knd2ScriptTag = 0x6B6E6432; /* 'knd2' (KANNADA v.2) (manually added) */");
break;
case UScript.MALAYALAM:
output.println("const LETag mlm2ScriptTag = 0x6D6C6D32; /* 'mlm2' (MALAYALAM v.2) (manually added) */");
break;
case UScript.ORIYA:
output.println("const LETag ory2ScriptTag = 0x6F727932; /* 'ory2' (ORIYA v.2) (manually added) */");
break;
case UScript.TAMIL:
output.println("const LETag tml2ScriptTag = 0x746D6C32; /* 'tml2' (TAMIL v.2) (manually added) */");
break;
case UScript.TELUGU:
output.println("const LETag tel2ScriptTag = 0x74656C32; /* 'tel2' (TELUGU v.2) (manually added) */");
break;
default:
break;
}
}
}
}