ICU-8044 Spoof Detection data update to latest from Unicode.

X-SVN-Rev: 28838
This commit is contained in:
Andy Heninger 2010-10-15 00:01:43 +00:00
parent 5714200c0d
commit c05e310791
5 changed files with 45833 additions and 38262 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -77,7 +77,7 @@ void addUSpoofTest(TestNode** root)
/*
* Identifiers for verifying that spoof checking is minimally alive and working.
*/
const UChar goodLatin[] = {(UChar)0x75, (UChar)0x77, 0}; /* "uw", all ASCII */
const UChar goodLatin[] = {(UChar)0x75, (UChar)0x7a, 0}; /* "uz", all ASCII */
/* (not confusable) */
const UChar scMixed[] = {(UChar)0x73, (UChar)0x0441, 0}; /* "sc", with Cyrillic 'c' */
/* (mixed script, confusable */
@ -95,8 +95,8 @@ const UChar lll_Latin_b[] = {(UChar)0xff29, (UChar)0x217c, (UChar)0x196, 0};
const UChar lll_Cyrl[] = {(UChar)0x0406, (UChar)0x04C0, (UChar)0x31, 0};
/* The skeleton transform for all of thes 'lll' lookalikes is all ascii digit 1. */
const UChar lll_Skel[] = {(UChar)0x31, (UChar)0x31, (UChar)0x31, 0};
/* The skeleton transform for all of thes 'lll' lookalikes is all lower case l. */
const UChar lll_Skel[] = {(UChar)0x6c, (UChar)0x6c, (UChar)0x6c, 0};
/* Provide better code coverage */
const char goodLatinUTF8[] = {0x75, 0x77, 0};

View File

@ -1,6 +1,6 @@
/*
**********************************************************************
* Copyright (C) 2009, International Business Machines Corporation
* Copyright (C) 2010, International Business Machines Corporation
* and others. All Rights Reserved.
**********************************************************************
*/
@ -98,7 +98,8 @@ void IntlTestSpoof::runIndexedTest( int32_t index, UBool exec, const char* &name
void IntlTestSpoof::testSpoofAPI() {
TEST_SETUP
UnicodeString s("uvw");
UnicodeString s("xyz"); // Many latin ranges are whole-script confusable with other scripts.
// If this test starts failing, consult confusablesWholeScript.txt
int32_t position = 666;
int32_t checkResults = uspoof_checkUnicodeString(sc, s, &position, &status);
TEST_ASSERT_SUCCESS(status);
@ -119,7 +120,7 @@ void IntlTestSpoof::testSpoofAPI() {
UnicodeString dest;
UnicodeString &retStr = uspoof_getSkeletonUnicodeString(sc, USPOOF_ANY_CASE, s, dest, &status);
TEST_ASSERT_SUCCESS(status);
TEST_ASSERT(UnicodeString("11100") == dest);
TEST_ASSERT(UnicodeString("lllOO") == dest);
TEST_ASSERT(&dest == &retStr);
TEST_TEARDOWN;
}
@ -142,15 +143,15 @@ void IntlTestSpoof::testSkeleton() {
TEST_SETUP
// A long "identifier" that will overflow implementation stack buffers, forcing heap allocations.
CHECK_SKELETON(SL, " A long 'identifier' that will overflow implementation stack buffers, forcing heap allocations."
" A long 'identifier' that will overflow implementation stack buffers, forcing heap allocations."
" A long 'identifier' that will overflow implementation stack buffers, forcing heap allocations."
" A long 'identifier' that will overflow implementation stack buffers, forcing heap allocations.",
CHECK_SKELETON(SL, " A 1ong \\u02b9identifier' that will overflow implementation stack buffers, forcing heap allocations."
" A 1ong 'identifier' that will overflow implementation stack buffers, forcing heap allocations."
" A 1ong 'identifier' that will overflow implementation stack buffers, forcing heap allocations."
" A 1ong 'identifier' that will overflow implementation stack buffers, forcing heap allocations.",
" A 1ong \\u02b9identifier\\u02b9 that wi11 overf1ow imp1ementation stack buffers, forcing heap a11ocations."
" A 1ong \\u02b9identifier\\u02b9 that wi11 overf1ow imp1ementation stack buffers, forcing heap a11ocations."
" A 1ong \\u02b9identifier\\u02b9 that wi11 overf1ow imp1ementation stack buffers, forcing heap a11ocations."
" A 1ong \\u02b9identifier\\u02b9 that wi11 overf1ow imp1ementation stack buffers, forcing heap a11ocations.")
" A long 'identifier' that vvill overflovv irnplernentation stack buffers, forcing heap allocations."
" A long 'identifier' that vvill overflovv irnplernentation stack buffers, forcing heap allocations."
" A long 'identifier' that vvill overflovv irnplernentation stack buffers, forcing heap allocations."
" A long 'identifier' that vvill overflovv irnplernentation stack buffers, forcing heap allocations.")
// FC5F ; FE74 0651 ; ML #* ARABIC LIGATURE SHADDA WITH KASRATAN ISOLATED FORM to
// ARABIC KASRATAN ISOLATED FORM, ARABIC SHADDA
@ -159,20 +160,23 @@ void IntlTestSpoof::testSkeleton() {
CHECK_SKELETON(SL, "\\uFC5F", " \\u064d\\u0651");
CHECK_SKELETON(SL, "nochange", "nochange");
CHECK_SKELETON(MA, "love", "1ove"); // lower case l to digit 1
CHECK_SKELETON(MA, "love", "love");
CHECK_SKELETON(MA, "1ove", "love"); // Digit 1 to letter l
CHECK_SKELETON(ML, "OOPS", "OOPS");
CHECK_SKELETON(MA, "OOPS", "00PS"); // Letter O to digit 0 in any case mode only
CHECK_SKELETON(ML, "00PS", "00PS"); // Digit 0 unchanged in lower case mode.
CHECK_SKELETON(MA, "OOPS", "OOPS");
CHECK_SKELETON(MA, "00PS", "OOPS"); // Digit 0 to letter O in any case mode only
CHECK_SKELETON(SL, "\\u059c", "\\u0301");
CHECK_SKELETON(SL, "\\u2A74", "\\u003A\\u003A\\u003D");
CHECK_SKELETON(SL, "\\u247E", "\\u0028\\u0031\\u0031\\u0029");
CHECK_SKELETON(SL, "\\u247E", "\\u0028\\u006C\\u006C\\u0029"); // "(ll)"
CHECK_SKELETON(SL, "\\uFDFB", "\\u062C\\u0644\\u0020\\u062C\\u0644\\u0627\\u0644\\u0647");
// This mapping exists in the ML and MA tables, does not exist in SL, SA
//0C83 ; 0C03 ;
CHECK_SKELETON(SL, "\\u0C83", "\\u0C83");
CHECK_SKELETON(SA, "\\u0C83", "\\u0C83");
CHECK_SKELETON(ML, "\\u0C83", "\\u0C03");
CHECK_SKELETON(MA, "\\u0C83", "\\u0C03");
CHECK_SKELETON(ML, "\\u0C83", "\\u0983");
CHECK_SKELETON(MA, "\\u0C83", "\\u0983");
// 0391 ; 0041 ;
// This mapping exists only in the MA table.
@ -188,12 +192,12 @@ void IntlTestSpoof::testSkeleton() {
CHECK_SKELETON(SL, "\\u13CF", "\\u13CF");
CHECK_SKELETON(SA, "\\u13CF", "\\u13CF");
// 0022 ; 02B9 02B9 ;
// 0022 ; 0027 0027 ;
// all tables.
CHECK_SKELETON(SL, "\\u0022", "\\u02B9\\u02B9");
CHECK_SKELETON(SA, "\\u0022", "\\u02B9\\u02B9");
CHECK_SKELETON(ML, "\\u0022", "\\u02B9\\u02B9");
CHECK_SKELETON(MA, "\\u0022", "\\u02B9\\u02B9");
CHECK_SKELETON(SL, "\\u0022", "\\u0027\\u0027");
CHECK_SKELETON(SA, "\\u0022", "\\u0027\\u0027");
CHECK_SKELETON(ML, "\\u0022", "\\u0027\\u0027");
CHECK_SKELETON(MA, "\\u0022", "\\u0027\\u0027");
TEST_TEARDOWN;
}

File diff suppressed because it is too large Load Diff