ICU-8044 Spoof Detection data update to latest from Unicode.
X-SVN-Rev: 28838
This commit is contained in:
parent
5714200c0d
commit
c05e310791
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -77,7 +77,7 @@ void addUSpoofTest(TestNode** root)
|
||||
/*
|
||||
* Identifiers for verifying that spoof checking is minimally alive and working.
|
||||
*/
|
||||
const UChar goodLatin[] = {(UChar)0x75, (UChar)0x77, 0}; /* "uw", all ASCII */
|
||||
const UChar goodLatin[] = {(UChar)0x75, (UChar)0x7a, 0}; /* "uz", all ASCII */
|
||||
/* (not confusable) */
|
||||
const UChar scMixed[] = {(UChar)0x73, (UChar)0x0441, 0}; /* "sc", with Cyrillic 'c' */
|
||||
/* (mixed script, confusable */
|
||||
@ -95,8 +95,8 @@ const UChar lll_Latin_b[] = {(UChar)0xff29, (UChar)0x217c, (UChar)0x196, 0};
|
||||
|
||||
const UChar lll_Cyrl[] = {(UChar)0x0406, (UChar)0x04C0, (UChar)0x31, 0};
|
||||
|
||||
/* The skeleton transform for all of thes 'lll' lookalikes is all ascii digit 1. */
|
||||
const UChar lll_Skel[] = {(UChar)0x31, (UChar)0x31, (UChar)0x31, 0};
|
||||
/* The skeleton transform for all of thes 'lll' lookalikes is all lower case l. */
|
||||
const UChar lll_Skel[] = {(UChar)0x6c, (UChar)0x6c, (UChar)0x6c, 0};
|
||||
|
||||
/* Provide better code coverage */
|
||||
const char goodLatinUTF8[] = {0x75, 0x77, 0};
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 2009, International Business Machines Corporation
|
||||
* Copyright (C) 2010, International Business Machines Corporation
|
||||
* and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
*/
|
||||
@ -98,7 +98,8 @@ void IntlTestSpoof::runIndexedTest( int32_t index, UBool exec, const char* &name
|
||||
void IntlTestSpoof::testSpoofAPI() {
|
||||
|
||||
TEST_SETUP
|
||||
UnicodeString s("uvw");
|
||||
UnicodeString s("xyz"); // Many latin ranges are whole-script confusable with other scripts.
|
||||
// If this test starts failing, consult confusablesWholeScript.txt
|
||||
int32_t position = 666;
|
||||
int32_t checkResults = uspoof_checkUnicodeString(sc, s, &position, &status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
@ -119,7 +120,7 @@ void IntlTestSpoof::testSpoofAPI() {
|
||||
UnicodeString dest;
|
||||
UnicodeString &retStr = uspoof_getSkeletonUnicodeString(sc, USPOOF_ANY_CASE, s, dest, &status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
TEST_ASSERT(UnicodeString("11100") == dest);
|
||||
TEST_ASSERT(UnicodeString("lllOO") == dest);
|
||||
TEST_ASSERT(&dest == &retStr);
|
||||
TEST_TEARDOWN;
|
||||
}
|
||||
@ -142,15 +143,15 @@ void IntlTestSpoof::testSkeleton() {
|
||||
|
||||
TEST_SETUP
|
||||
// A long "identifier" that will overflow implementation stack buffers, forcing heap allocations.
|
||||
CHECK_SKELETON(SL, " A long 'identifier' that will overflow implementation stack buffers, forcing heap allocations."
|
||||
" A long 'identifier' that will overflow implementation stack buffers, forcing heap allocations."
|
||||
" A long 'identifier' that will overflow implementation stack buffers, forcing heap allocations."
|
||||
" A long 'identifier' that will overflow implementation stack buffers, forcing heap allocations.",
|
||||
CHECK_SKELETON(SL, " A 1ong \\u02b9identifier' that will overflow implementation stack buffers, forcing heap allocations."
|
||||
" A 1ong 'identifier' that will overflow implementation stack buffers, forcing heap allocations."
|
||||
" A 1ong 'identifier' that will overflow implementation stack buffers, forcing heap allocations."
|
||||
" A 1ong 'identifier' that will overflow implementation stack buffers, forcing heap allocations.",
|
||||
|
||||
" A 1ong \\u02b9identifier\\u02b9 that wi11 overf1ow imp1ementation stack buffers, forcing heap a11ocations."
|
||||
" A 1ong \\u02b9identifier\\u02b9 that wi11 overf1ow imp1ementation stack buffers, forcing heap a11ocations."
|
||||
" A 1ong \\u02b9identifier\\u02b9 that wi11 overf1ow imp1ementation stack buffers, forcing heap a11ocations."
|
||||
" A 1ong \\u02b9identifier\\u02b9 that wi11 overf1ow imp1ementation stack buffers, forcing heap a11ocations.")
|
||||
" A long 'identifier' that vvill overflovv irnplernentation stack buffers, forcing heap allocations."
|
||||
" A long 'identifier' that vvill overflovv irnplernentation stack buffers, forcing heap allocations."
|
||||
" A long 'identifier' that vvill overflovv irnplernentation stack buffers, forcing heap allocations."
|
||||
" A long 'identifier' that vvill overflovv irnplernentation stack buffers, forcing heap allocations.")
|
||||
|
||||
// FC5F ; FE74 0651 ; ML #* ARABIC LIGATURE SHADDA WITH KASRATAN ISOLATED FORM to
|
||||
// ARABIC KASRATAN ISOLATED FORM, ARABIC SHADDA
|
||||
@ -159,20 +160,23 @@ void IntlTestSpoof::testSkeleton() {
|
||||
CHECK_SKELETON(SL, "\\uFC5F", " \\u064d\\u0651");
|
||||
|
||||
CHECK_SKELETON(SL, "nochange", "nochange");
|
||||
CHECK_SKELETON(MA, "love", "1ove"); // lower case l to digit 1
|
||||
CHECK_SKELETON(MA, "love", "love");
|
||||
CHECK_SKELETON(MA, "1ove", "love"); // Digit 1 to letter l
|
||||
CHECK_SKELETON(ML, "OOPS", "OOPS");
|
||||
CHECK_SKELETON(MA, "OOPS", "00PS"); // Letter O to digit 0 in any case mode only
|
||||
CHECK_SKELETON(ML, "00PS", "00PS"); // Digit 0 unchanged in lower case mode.
|
||||
CHECK_SKELETON(MA, "OOPS", "OOPS");
|
||||
CHECK_SKELETON(MA, "00PS", "OOPS"); // Digit 0 to letter O in any case mode only
|
||||
CHECK_SKELETON(SL, "\\u059c", "\\u0301");
|
||||
CHECK_SKELETON(SL, "\\u2A74", "\\u003A\\u003A\\u003D");
|
||||
CHECK_SKELETON(SL, "\\u247E", "\\u0028\\u0031\\u0031\\u0029");
|
||||
CHECK_SKELETON(SL, "\\u247E", "\\u0028\\u006C\\u006C\\u0029"); // "(ll)"
|
||||
CHECK_SKELETON(SL, "\\uFDFB", "\\u062C\\u0644\\u0020\\u062C\\u0644\\u0627\\u0644\\u0647");
|
||||
|
||||
// This mapping exists in the ML and MA tables, does not exist in SL, SA
|
||||
//0C83 ; 0C03 ;
|
||||
CHECK_SKELETON(SL, "\\u0C83", "\\u0C83");
|
||||
CHECK_SKELETON(SA, "\\u0C83", "\\u0C83");
|
||||
CHECK_SKELETON(ML, "\\u0C83", "\\u0C03");
|
||||
CHECK_SKELETON(MA, "\\u0C83", "\\u0C03");
|
||||
CHECK_SKELETON(ML, "\\u0C83", "\\u0983");
|
||||
CHECK_SKELETON(MA, "\\u0C83", "\\u0983");
|
||||
|
||||
// 0391 ; 0041 ;
|
||||
// This mapping exists only in the MA table.
|
||||
@ -188,12 +192,12 @@ void IntlTestSpoof::testSkeleton() {
|
||||
CHECK_SKELETON(SL, "\\u13CF", "\\u13CF");
|
||||
CHECK_SKELETON(SA, "\\u13CF", "\\u13CF");
|
||||
|
||||
// 0022 ; 02B9 02B9 ;
|
||||
// 0022 ; 0027 0027 ;
|
||||
// all tables.
|
||||
CHECK_SKELETON(SL, "\\u0022", "\\u02B9\\u02B9");
|
||||
CHECK_SKELETON(SA, "\\u0022", "\\u02B9\\u02B9");
|
||||
CHECK_SKELETON(ML, "\\u0022", "\\u02B9\\u02B9");
|
||||
CHECK_SKELETON(MA, "\\u0022", "\\u02B9\\u02B9");
|
||||
CHECK_SKELETON(SL, "\\u0022", "\\u0027\\u0027");
|
||||
CHECK_SKELETON(SA, "\\u0022", "\\u0027\\u0027");
|
||||
CHECK_SKELETON(ML, "\\u0022", "\\u0027\\u0027");
|
||||
CHECK_SKELETON(MA, "\\u0022", "\\u0027\\u0027");
|
||||
|
||||
TEST_TEARDOWN;
|
||||
}
|
||||
|
40159
icu4c/source/test/testdata/confusables.txt
vendored
40159
icu4c/source/test/testdata/confusables.txt
vendored
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user