ICU-4790 spoof cleanups

X-SVN-Rev: 25876
This commit is contained in:
Andy Heninger 2009-04-23 01:30:02 +00:00
parent 1c326702cc
commit b08b9e8625
4 changed files with 82 additions and 39 deletions

View File

@ -603,21 +603,16 @@ uspoof_checkUnicodeString(const USpoofChecker *sc,
* or whole script - are determined by the check options set for the
* USpoofChecker.
*
* TODO: expand on the following
* There are four possible types of comarisons:
* Mixed Script, Lower Case
* Mixed Script, Any Case
* Single Script, Lower Case
* Single Script, Any Case
* Which tests are performed is controlled by the flags
* The tests to be performed are controlled by the flags
* USPOOF_SINGLE_SCRIPT_CONFUSABLE
* USPOOF_MIXED_SCRIPT_CONFUSABLE
* One or both of these must be set.
* USPOOF_WHOLE_SCRIPT_CONFUSABLE
* At least one of these tests must be selected.
*
* USPOOF_ANY_CASE is a modifier. Choose it if the identifiers
* are case-sensitive and may be of mixed case.
* If identifiers are normalized to lower case for comparison or
* display to the user, do not select the ANY_CASE option.
* USPOOF_ANY_CASE is a modifier for the tests. Select it if the identifiers
* may be of mixed case.
* If identifiers are case folded for comparison and
* display to the user, do not select the USPOOF_ANY_CASE option.
*
*
* @param sc The USpoofChecker

View File

@ -551,6 +551,12 @@ uspoof_getSkeleton(const USpoofChecker *sc,
UChar *dest, int32_t destCapacity,
UErrorCode *status) {
// TODO: this function could be sped up a bit
// Skip the input normalization when not needed, work from callers data.
// Put the initial skeleton straight into the caller's destination buffer.
// It probably won't need normalization.
// But these would make the structure more complicated.
const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
if (U_FAILURE(*status)) {
return 0;
@ -582,8 +588,8 @@ uspoof_getSkeleton(const USpoofChecker *sc,
// NFKD transform of the user supplied input
UChar nfkdBuf[USPOOF_STACK_BUFFER_SIZE];
UChar *nfkdInput = nfkdBuf;
UChar nfkdStackBuf[USPOOF_STACK_BUFFER_SIZE];
UChar *nfkdInput = nfkdStackBuf;
int32_t normalizedLen = unorm_normalize(
s, length, UNORM_NFKD, 0, nfkdInput, USPOOF_STACK_BUFFER_SIZE, status);
if (*status == U_BUFFER_OVERFLOW_ERROR) {
@ -597,47 +603,67 @@ uspoof_getSkeleton(const USpoofChecker *sc,
nfkdInput, normalizedLen+1, status);
}
if (U_FAILURE(*status)) {
if (nfkdInput != nfkdStackBuf) {
uprv_free(nfkdInput);
}
return 0;
}
// buffer to hold the Unicode defined mappings for a single code point
// buffer to hold the Unicode defined skeleton mappings for a single code point
UChar buf[USPOOF_MAX_SKELETON_EXPANSION];
// Apply the mapping to the NFKD form string
// Apply the skeleton mapping to the NFKD normalized input string
// Accumulate the skeleton, possibly unnormalized, in a UnicodeString.
int32_t inputIndex = 0;
int32_t resultLen = 0;
UnicodeString skelStr;
while (inputIndex < normalizedLen) {
UChar32 c;
U16_NEXT(nfkdInput, inputIndex, normalizedLen, c);
int32_t replaceLen = This->confusableLookup(c, tableMask, buf);
if (resultLen + replaceLen < destCapacity) {
int i;
for (i=0; i<replaceLen; i++) {
dest[resultLen++] = buf[i];
}
} else {
// Storing the transformed string would overflow the dest buffer.
// Don't bother storing anything, just sum up the required buffer size.
// (We dont guarantee that a truncated buffer is filled to it's end)
resultLen += replaceLen;
}
skelStr.append(buf, replaceLen);
}
if (resultLen < destCapacity) {
dest[resultLen] = 0;
} else if (resultLen == destCapacity) {
*status = U_STRING_NOT_TERMINATED_WARNING;
} else {
*status = U_BUFFER_OVERFLOW_ERROR;
}
if (nfkdInput != nfkdBuf) {
if (nfkdInput != nfkdStackBuf) {
uprv_free(nfkdInput);
}
return resultLen;
const UChar *result = skelStr.getBuffer();
int32_t resultLen = skelStr.length();
UChar *normedResult = NULL;
// Check the skeleton for NFKD, normalize it if needed.
// Unnormalized results should be very rare.
if (!unorm_isNormalized(result, resultLen, UNORM_NFKD, status)) {
normalizedLen = unorm_normalize(dest, resultLen, UNORM_NFKD, 0, NULL, 0, status);
UChar *normedResult = static_cast<UChar *>(uprv_malloc((normalizedLen+1)*sizeof(UChar)));
if (normedResult == NULL) {
*status = U_MEMORY_ALLOCATION_ERROR;
return 0;
}
unorm_normalize(result, resultLen, UNORM_NFKD, 0, normedResult, normalizedLen+1, status);
result = normedResult;
resultLen = normalizedLen;
}
// Copy the skeleton to the caller's buffer
if (U_SUCCESS(*status)) {
if (destCapacity == 0 || resultLen > destCapacity) {
*status = resultLen>destCapacity ? U_BUFFER_OVERFLOW_ERROR : U_STRING_NOT_TERMINATED_WARNING;
} else {
u_memcpy(dest, result, resultLen);
if (destCapacity > resultLen) {
dest[resultLen] = 0;
} else {
*status = U_STRING_NOT_TERMINATED_WARNING;
}
}
}
uprv_free(normedResult);
return resultLen;
}
U_CAPI UnicodeString & U_EXPORT2
uspoof_getSkeletonUnicodeString(const USpoofChecker *sc,
uint32_t type,

View File

@ -67,6 +67,12 @@ void IntlTestSpoof::runIndexedTest( int32_t index, UBool exec, const char* &name
testSkeleton();
}
break;
case 2:
name = "TestAreConfusable";
if (exec) {
testAreConfusable();
}
break;
default: name=""; break;
}
}
@ -199,4 +205,18 @@ void IntlTestSpoof::checkSkeleton(const USpoofChecker *sc, uint32_t type,
}
}
void IntlTestSpoof::testAreConfusable() {
UErrorCode status = U_ZERO_ERROR;
TEST_SETUP
UnicodeString s1("A long string that will overflow stack buffers. A long string that will overflow stack buffers. "
"A long string that will overflow stack buffers. A long string that will overflow stack buffers. ");
UnicodeString s2("A long string that wi11 overflow stack buffers. A long string that will overflow stack buffers. "
"A long string that wi11 overflow stack buffers. A long string that will overflow stack buffers. ");
TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT_CONFUSABLE, uspoof_areConfusableUnicodeString(sc, s1, s2, &status));
TEST_ASSERT_SUCCESS(status);
TEST_TEARDOWN;
}
#endif /* #if !UCONFIG_NO_SPOOF_DETECT*/

View File

@ -26,6 +26,8 @@ public:
void testSpoofAPI();
void testSkeleton();
void testAreConfusable();
// Internal function to run a single skeleton test case.
void checkSkeleton(const USpoofChecker *sc, uint32_t flags,