ICU-10706 Spoof Check, bug fix & test for Identifier Restriction Level; update spoof checker one time initialization.

X-SVN-Rev: 35175
This commit is contained in:
Andy Heninger 2014-02-19 23:53:30 +00:00
parent 5b67287476
commit c4f6c43b86
4 changed files with 59 additions and 61 deletions

View File

@ -20,16 +20,17 @@ U_NAMESPACE_BEGIN
#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
static UMutex gInitMutex = U_MUTEX_INITIALIZER;
static UBool gStaticsAreInitialized = FALSE;
static UnicodeSet *ASCII;
static ScriptSet *JAPANESE;
static ScriptSet *CHINESE;
static ScriptSet *KOREAN;
static ScriptSet *CONFUSABLE_WITH_LATIN;
static UInitOnce gIdentifierInfoInitOnce = U_INITONCE_INITIALIZER;
UnicodeSet *IdentifierInfo::ASCII;
ScriptSet *IdentifierInfo::JAPANESE;
ScriptSet *IdentifierInfo::CHINESE;
ScriptSet *IdentifierInfo::KOREAN;
ScriptSet *IdentifierInfo::CONFUSABLE_WITH_LATIN;
UBool IdentifierInfo::cleanup() {
U_CDECL_BEGIN
static UBool U_CALLCONV
IdentifierInfo_cleanup(void) {
delete ASCII;
ASCII = NULL;
delete JAPANESE;
@ -40,14 +41,30 @@ UBool IdentifierInfo::cleanup() {
KOREAN = NULL;
delete CONFUSABLE_WITH_LATIN;
CONFUSABLE_WITH_LATIN = NULL;
gStaticsAreInitialized = FALSE;
gIdentifierInfoInitOnce.reset();
return TRUE;
}
U_CDECL_BEGIN
static UBool U_CALLCONV
IdentifierInfo_cleanup(void) {
return IdentifierInfo::cleanup();
static void U_CALLCONV
IdentifierInfo_init(UErrorCode &status) {
ASCII = new UnicodeSet(0, 0x7f);
JAPANESE = new ScriptSet();
CHINESE = new ScriptSet();
KOREAN = new ScriptSet();
CONFUSABLE_WITH_LATIN = new ScriptSet();
if (ASCII == NULL || JAPANESE == NULL || CHINESE == NULL || KOREAN == NULL
|| CONFUSABLE_WITH_LATIN == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
ASCII->freeze();
JAPANESE->set(USCRIPT_LATIN, status).set(USCRIPT_HAN, status).set(USCRIPT_HIRAGANA, status)
.set(USCRIPT_KATAKANA, status);
CHINESE->set(USCRIPT_LATIN, status).set(USCRIPT_HAN, status).set(USCRIPT_BOPOMOFO, status);
KOREAN->set(USCRIPT_LATIN, status).set(USCRIPT_HAN, status).set(USCRIPT_HANGUL, status);
CONFUSABLE_WITH_LATIN->set(USCRIPT_CYRILLIC, status).set(USCRIPT_GREEK, status)
.set(USCRIPT_CHEROKEE, status);
ucln_i18n_registerCleanup(UCLN_I18N_IDENTIFIER_INFO, IdentifierInfo_cleanup);
}
U_CDECL_END
@ -55,33 +72,11 @@ U_CDECL_END
IdentifierInfo::IdentifierInfo(UErrorCode &status):
fIdentifier(NULL), fRequiredScripts(NULL), fScriptSetSet(NULL),
fCommonAmongAlternates(NULL), fNumerics(NULL), fIdentifierProfile(NULL) {
umtx_initOnce(gIdentifierInfoInitOnce, &IdentifierInfo_init, status);
if (U_FAILURE(status)) {
return;
}
{
Mutex lock(&gInitMutex);
if (!gStaticsAreInitialized) {
ASCII = new UnicodeSet(0, 0x7f);
JAPANESE = new ScriptSet();
CHINESE = new ScriptSet();
KOREAN = new ScriptSet();
CONFUSABLE_WITH_LATIN = new ScriptSet();
if (ASCII == NULL || JAPANESE == NULL || CHINESE == NULL || KOREAN == NULL
|| CONFUSABLE_WITH_LATIN == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
ASCII->freeze();
JAPANESE->set(USCRIPT_LATIN, status).set(USCRIPT_HAN, status).set(USCRIPT_HIRAGANA, status)
.set(USCRIPT_KATAKANA, status);
CHINESE->set(USCRIPT_LATIN, status).set(USCRIPT_HAN, status).set(USCRIPT_BOPOMOFO, status);
KOREAN->set(USCRIPT_LATIN, status).set(USCRIPT_HAN, status).set(USCRIPT_HANGUL, status);
CONFUSABLE_WITH_LATIN->set(USCRIPT_CYRILLIC, status).set(USCRIPT_GREEK, status)
.set(USCRIPT_CHEROKEE, status);
ucln_i18n_registerCleanup(UCLN_I18N_IDENTIFIER_INFO, IdentifierInfo_cleanup);
gStaticsAreInitialized = TRUE;
}
}
fIdentifier = new UnicodeString();
fRequiredScripts = new ScriptSet();
fScriptSetSet = uhash_open(uhash_hashScriptSet, uhash_compareScriptSet, NULL, &status);

View File

@ -1,6 +1,6 @@
/*
**********************************************************************
* Copyright (C) 2013, International Business Machines
* Copyright (C) 2014, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*
@ -171,11 +171,6 @@ class U_I18N_API IdentifierInfo : public UMemory {
*/
static UnicodeString &displayAlternates(UnicodeString &dest, const UHashtable *alternates, UErrorCode &status);
/**
* Static memory cleanup function.
* @internal
*/
static UBool cleanup();
private:
IdentifierInfo & clear();
@ -187,15 +182,6 @@ class U_I18N_API IdentifierInfo : public UMemory {
ScriptSet *fCommonAmongAlternates;
UnicodeSet *fNumerics;
UnicodeSet *fIdentifierProfile;
static UnicodeSet *ASCII;
static ScriptSet *JAPANESE;
static ScriptSet *CHINESE;
static ScriptSet *KOREAN;
static ScriptSet *CONFUSABLE_WITH_LATIN;
};
U_NAMESPACE_END

View File

@ -198,7 +198,7 @@ uspoof_setChecks(USpoofChecker *sc, int32_t checks, UErrorCode *status) {
// Verify that the requested checks are all ones (bits) that
// are acceptable, known values.
if (checks & ~USPOOF_ALL_CHECKS) {
if (checks & ~(USPOOF_ALL_CHECKS | USPOOF_AUX_INFO)) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}

View File

@ -682,8 +682,9 @@ void IntlTestSpoof::testRestrictionLevel() {
};
char msgBuffer[100];
URestrictionLevel restrictionLevels[] = { USPOOF_ASCII, USPOOF_HIGHLY_RESTRICTIVE,
USPOOF_MODERATELY_RESTRICTIVE, USPOOF_MINIMALLY_RESTRICTIVE, USPOOF_UNRESTRICTIVE};
URestrictionLevel restrictionLevels[] = { USPOOF_ASCII, USPOOF_SINGLE_SCRIPT_RESTRICTIVE,
USPOOF_HIGHLY_RESTRICTIVE, USPOOF_MODERATELY_RESTRICTIVE, USPOOF_MINIMALLY_RESTRICTIVE,
USPOOF_UNRESTRICTIVE};
UErrorCode status = U_ZERO_ERROR;
IdentifierInfo idInfo(status);
@ -706,14 +707,30 @@ void IntlTestSpoof::testRestrictionLevel() {
uspoof_setChecks(sc, USPOOF_RESTRICTION_LEVEL, &status);
uspoof_setAllowedChars(sc, uspoof_getRecommendedSet(&status), &status);
uspoof_setRestrictionLevel(sc, levelSetInSpoofChecker);
UBool actualValue = uspoof_checkUnicodeString(sc, testString, NULL, &status) != 0;
int32_t actualValue = uspoof_checkUnicodeString(sc, testString, NULL, &status);
// we want to fail if the text is (say) MODERATE and the testLevel is ASCII
UBool expectedFailure = expectedLevel > levelSetInSpoofChecker ||
!uspoof_getRecommendedUnicodeSet(&status)->containsAll(testString);
sprintf(msgBuffer, "testNum = %d, levelIndex = %d", testNum, levelIndex);
TEST_ASSERT_MSG(expectedFailure == actualValue, msgBuffer);
int32_t expectedValue = 0;
if (expectedLevel > levelSetInSpoofChecker) {
expectedValue |= USPOOF_RESTRICTION_LEVEL;
}
if (!uspoof_getRecommendedUnicodeSet(&status)->containsAll(testString)) {
expectedValue |= USPOOF_CHAR_LIMIT;
}
sprintf(msgBuffer, "testNum = %d, levelIndex = %d, expected = %#x, actual = %#x",
testNum, levelIndex, expectedValue, actualValue);
TEST_ASSERT_MSG(expectedValue == actualValue, msgBuffer);
TEST_ASSERT_SUCCESS(status);
// Run the same check again, with the Spoof Checker configured to return
// the actual restriction level.
uspoof_setChecks(sc, USPOOF_AUX_INFO | USPOOF_RESTRICTION_LEVEL, &status);
uspoof_setAllowedChars(sc, uspoof_getRecommendedSet(&status), &status);
uspoof_setRestrictionLevel(sc, levelSetInSpoofChecker);
int32_t result = uspoof_checkUnicodeString(sc, testString, NULL, &status);
TEST_ASSERT_SUCCESS(status);
TEST_ASSERT_EQ(expectedLevel, result & USPOOF_RESTRICTION_LEVEL_MASK);
TEST_ASSERT_EQ(expectedValue, result & USPOOF_ALL_CHECKS);
uspoof_close(sc);
}
}