From c4f6c43b863f1204011510963519ce5b160b656a Mon Sep 17 00:00:00 2001 From: Andy Heninger Date: Wed, 19 Feb 2014 23:53:30 +0000 Subject: [PATCH] ICU-10706 Spoof Check, bug fix & test for Identifier Restriction Level; update spoof checker one time initialization. X-SVN-Rev: 35175 --- icu4c/source/i18n/identifier_info.cpp | 69 ++++++++++++-------------- icu4c/source/i18n/identifier_info.h | 16 +----- icu4c/source/i18n/uspoof.cpp | 2 +- icu4c/source/test/intltest/itspoof.cpp | 33 +++++++++--- 4 files changed, 59 insertions(+), 61 deletions(-) diff --git a/icu4c/source/i18n/identifier_info.cpp b/icu4c/source/i18n/identifier_info.cpp index af2a19f24d..87ddda1a79 100644 --- a/icu4c/source/i18n/identifier_info.cpp +++ b/icu4c/source/i18n/identifier_info.cpp @@ -20,16 +20,17 @@ U_NAMESPACE_BEGIN #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) -static UMutex gInitMutex = U_MUTEX_INITIALIZER; -static UBool gStaticsAreInitialized = FALSE; +static UnicodeSet *ASCII; +static ScriptSet *JAPANESE; +static ScriptSet *CHINESE; +static ScriptSet *KOREAN; +static ScriptSet *CONFUSABLE_WITH_LATIN; +static UInitOnce gIdentifierInfoInitOnce = U_INITONCE_INITIALIZER; -UnicodeSet *IdentifierInfo::ASCII; -ScriptSet *IdentifierInfo::JAPANESE; -ScriptSet *IdentifierInfo::CHINESE; -ScriptSet *IdentifierInfo::KOREAN; -ScriptSet *IdentifierInfo::CONFUSABLE_WITH_LATIN; -UBool IdentifierInfo::cleanup() { +U_CDECL_BEGIN +static UBool U_CALLCONV +IdentifierInfo_cleanup(void) { delete ASCII; ASCII = NULL; delete JAPANESE; @@ -40,14 +41,30 @@ UBool IdentifierInfo::cleanup() { KOREAN = NULL; delete CONFUSABLE_WITH_LATIN; CONFUSABLE_WITH_LATIN = NULL; - gStaticsAreInitialized = FALSE; + gIdentifierInfoInitOnce.reset(); return TRUE; } -U_CDECL_BEGIN -static UBool U_CALLCONV -IdentifierInfo_cleanup(void) { - return IdentifierInfo::cleanup(); +static void U_CALLCONV +IdentifierInfo_init(UErrorCode &status) { + ASCII = new UnicodeSet(0, 0x7f); + JAPANESE = new ScriptSet(); + CHINESE = new ScriptSet(); + KOREAN = new ScriptSet(); + CONFUSABLE_WITH_LATIN = new ScriptSet(); + if (ASCII == NULL || JAPANESE == NULL || CHINESE == NULL || KOREAN == NULL + || CONFUSABLE_WITH_LATIN == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + ASCII->freeze(); + JAPANESE->set(USCRIPT_LATIN, status).set(USCRIPT_HAN, status).set(USCRIPT_HIRAGANA, status) + .set(USCRIPT_KATAKANA, status); + CHINESE->set(USCRIPT_LATIN, status).set(USCRIPT_HAN, status).set(USCRIPT_BOPOMOFO, status); + KOREAN->set(USCRIPT_LATIN, status).set(USCRIPT_HAN, status).set(USCRIPT_HANGUL, status); + CONFUSABLE_WITH_LATIN->set(USCRIPT_CYRILLIC, status).set(USCRIPT_GREEK, status) + .set(USCRIPT_CHEROKEE, status); + ucln_i18n_registerCleanup(UCLN_I18N_IDENTIFIER_INFO, IdentifierInfo_cleanup); } U_CDECL_END @@ -55,33 +72,11 @@ U_CDECL_END IdentifierInfo::IdentifierInfo(UErrorCode &status): fIdentifier(NULL), fRequiredScripts(NULL), fScriptSetSet(NULL), fCommonAmongAlternates(NULL), fNumerics(NULL), fIdentifierProfile(NULL) { + umtx_initOnce(gIdentifierInfoInitOnce, &IdentifierInfo_init, status); if (U_FAILURE(status)) { return; } - { - Mutex lock(&gInitMutex); - if (!gStaticsAreInitialized) { - ASCII = new UnicodeSet(0, 0x7f); - JAPANESE = new ScriptSet(); - CHINESE = new ScriptSet(); - KOREAN = new ScriptSet(); - CONFUSABLE_WITH_LATIN = new ScriptSet(); - if (ASCII == NULL || JAPANESE == NULL || CHINESE == NULL || KOREAN == NULL - || CONFUSABLE_WITH_LATIN == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - return; - } - ASCII->freeze(); - JAPANESE->set(USCRIPT_LATIN, status).set(USCRIPT_HAN, status).set(USCRIPT_HIRAGANA, status) - .set(USCRIPT_KATAKANA, status); - CHINESE->set(USCRIPT_LATIN, status).set(USCRIPT_HAN, status).set(USCRIPT_BOPOMOFO, status); - KOREAN->set(USCRIPT_LATIN, status).set(USCRIPT_HAN, status).set(USCRIPT_HANGUL, status); - CONFUSABLE_WITH_LATIN->set(USCRIPT_CYRILLIC, status).set(USCRIPT_GREEK, status) - .set(USCRIPT_CHEROKEE, status); - ucln_i18n_registerCleanup(UCLN_I18N_IDENTIFIER_INFO, IdentifierInfo_cleanup); - gStaticsAreInitialized = TRUE; - } - } + fIdentifier = new UnicodeString(); fRequiredScripts = new ScriptSet(); fScriptSetSet = uhash_open(uhash_hashScriptSet, uhash_compareScriptSet, NULL, &status); diff --git a/icu4c/source/i18n/identifier_info.h b/icu4c/source/i18n/identifier_info.h index c7cab61792..ab0b8b23a8 100644 --- a/icu4c/source/i18n/identifier_info.h +++ b/icu4c/source/i18n/identifier_info.h @@ -1,6 +1,6 @@ /* ********************************************************************** -* Copyright (C) 2013, International Business Machines +* Copyright (C) 2014, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * @@ -171,11 +171,6 @@ class U_I18N_API IdentifierInfo : public UMemory { */ static UnicodeString &displayAlternates(UnicodeString &dest, const UHashtable *alternates, UErrorCode &status); - /** - * Static memory cleanup function. - * @internal - */ - static UBool cleanup(); private: IdentifierInfo & clear(); @@ -187,15 +182,6 @@ class U_I18N_API IdentifierInfo : public UMemory { ScriptSet *fCommonAmongAlternates; UnicodeSet *fNumerics; UnicodeSet *fIdentifierProfile; - - static UnicodeSet *ASCII; - static ScriptSet *JAPANESE; - static ScriptSet *CHINESE; - static ScriptSet *KOREAN; - static ScriptSet *CONFUSABLE_WITH_LATIN; - - - }; U_NAMESPACE_END diff --git a/icu4c/source/i18n/uspoof.cpp b/icu4c/source/i18n/uspoof.cpp index 23a9de844a..b9051b26b4 100644 --- a/icu4c/source/i18n/uspoof.cpp +++ b/icu4c/source/i18n/uspoof.cpp @@ -198,7 +198,7 @@ uspoof_setChecks(USpoofChecker *sc, int32_t checks, UErrorCode *status) { // Verify that the requested checks are all ones (bits) that // are acceptable, known values. - if (checks & ~USPOOF_ALL_CHECKS) { + if (checks & ~(USPOOF_ALL_CHECKS | USPOOF_AUX_INFO)) { *status = U_ILLEGAL_ARGUMENT_ERROR; return; } diff --git a/icu4c/source/test/intltest/itspoof.cpp b/icu4c/source/test/intltest/itspoof.cpp index 527f2f031d..7e5c8550fd 100644 --- a/icu4c/source/test/intltest/itspoof.cpp +++ b/icu4c/source/test/intltest/itspoof.cpp @@ -682,8 +682,9 @@ void IntlTestSpoof::testRestrictionLevel() { }; char msgBuffer[100]; - URestrictionLevel restrictionLevels[] = { USPOOF_ASCII, USPOOF_HIGHLY_RESTRICTIVE, - USPOOF_MODERATELY_RESTRICTIVE, USPOOF_MINIMALLY_RESTRICTIVE, USPOOF_UNRESTRICTIVE}; + URestrictionLevel restrictionLevels[] = { USPOOF_ASCII, USPOOF_SINGLE_SCRIPT_RESTRICTIVE, + USPOOF_HIGHLY_RESTRICTIVE, USPOOF_MODERATELY_RESTRICTIVE, USPOOF_MINIMALLY_RESTRICTIVE, + USPOOF_UNRESTRICTIVE}; UErrorCode status = U_ZERO_ERROR; IdentifierInfo idInfo(status); @@ -706,14 +707,30 @@ void IntlTestSpoof::testRestrictionLevel() { uspoof_setChecks(sc, USPOOF_RESTRICTION_LEVEL, &status); uspoof_setAllowedChars(sc, uspoof_getRecommendedSet(&status), &status); uspoof_setRestrictionLevel(sc, levelSetInSpoofChecker); - UBool actualValue = uspoof_checkUnicodeString(sc, testString, NULL, &status) != 0; - + int32_t actualValue = uspoof_checkUnicodeString(sc, testString, NULL, &status); + // we want to fail if the text is (say) MODERATE and the testLevel is ASCII - UBool expectedFailure = expectedLevel > levelSetInSpoofChecker || - !uspoof_getRecommendedUnicodeSet(&status)->containsAll(testString); - sprintf(msgBuffer, "testNum = %d, levelIndex = %d", testNum, levelIndex); - TEST_ASSERT_MSG(expectedFailure == actualValue, msgBuffer); + int32_t expectedValue = 0; + if (expectedLevel > levelSetInSpoofChecker) { + expectedValue |= USPOOF_RESTRICTION_LEVEL; + } + if (!uspoof_getRecommendedUnicodeSet(&status)->containsAll(testString)) { + expectedValue |= USPOOF_CHAR_LIMIT; + } + sprintf(msgBuffer, "testNum = %d, levelIndex = %d, expected = %#x, actual = %#x", + testNum, levelIndex, expectedValue, actualValue); + TEST_ASSERT_MSG(expectedValue == actualValue, msgBuffer); TEST_ASSERT_SUCCESS(status); + + // Run the same check again, with the Spoof Checker configured to return + // the actual restriction level. + uspoof_setChecks(sc, USPOOF_AUX_INFO | USPOOF_RESTRICTION_LEVEL, &status); + uspoof_setAllowedChars(sc, uspoof_getRecommendedSet(&status), &status); + uspoof_setRestrictionLevel(sc, levelSetInSpoofChecker); + int32_t result = uspoof_checkUnicodeString(sc, testString, NULL, &status); + TEST_ASSERT_SUCCESS(status); + TEST_ASSERT_EQ(expectedLevel, result & USPOOF_RESTRICTION_LEVEL_MASK); + TEST_ASSERT_EQ(expectedValue, result & USPOOF_ALL_CHECKS); uspoof_close(sc); } }