ICU-4790 spoof cleanups
X-SVN-Rev: 25876
This commit is contained in:
parent
1c326702cc
commit
b08b9e8625
@ -603,21 +603,16 @@ uspoof_checkUnicodeString(const USpoofChecker *sc,
|
||||
* or whole script - are determined by the check options set for the
|
||||
* USpoofChecker.
|
||||
*
|
||||
* TODO: expand on the following
|
||||
* There are four possible types of comarisons:
|
||||
* Mixed Script, Lower Case
|
||||
* Mixed Script, Any Case
|
||||
* Single Script, Lower Case
|
||||
* Single Script, Any Case
|
||||
* Which tests are performed is controlled by the flags
|
||||
* The tests to be performed are controlled by the flags
|
||||
* USPOOF_SINGLE_SCRIPT_CONFUSABLE
|
||||
* USPOOF_MIXED_SCRIPT_CONFUSABLE
|
||||
* One or both of these must be set.
|
||||
* USPOOF_WHOLE_SCRIPT_CONFUSABLE
|
||||
* At least one of these tests must be selected.
|
||||
*
|
||||
* USPOOF_ANY_CASE is a modifier. Choose it if the identifiers
|
||||
* are case-sensitive and may be of mixed case.
|
||||
* If identifiers are normalized to lower case for comparison or
|
||||
* display to the user, do not select the ANY_CASE option.
|
||||
* USPOOF_ANY_CASE is a modifier for the tests. Select it if the identifiers
|
||||
* may be of mixed case.
|
||||
* If identifiers are case folded for comparison and
|
||||
* display to the user, do not select the USPOOF_ANY_CASE option.
|
||||
*
|
||||
*
|
||||
* @param sc The USpoofChecker
|
||||
|
@ -551,6 +551,12 @@ uspoof_getSkeleton(const USpoofChecker *sc,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
UErrorCode *status) {
|
||||
|
||||
// TODO: this function could be sped up a bit
|
||||
// Skip the input normalization when not needed, work from callers data.
|
||||
// Put the initial skeleton straight into the caller's destination buffer.
|
||||
// It probably won't need normalization.
|
||||
// But these would make the structure more complicated.
|
||||
|
||||
const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
|
||||
if (U_FAILURE(*status)) {
|
||||
return 0;
|
||||
@ -582,8 +588,8 @@ uspoof_getSkeleton(const USpoofChecker *sc,
|
||||
|
||||
// NFKD transform of the user supplied input
|
||||
|
||||
UChar nfkdBuf[USPOOF_STACK_BUFFER_SIZE];
|
||||
UChar *nfkdInput = nfkdBuf;
|
||||
UChar nfkdStackBuf[USPOOF_STACK_BUFFER_SIZE];
|
||||
UChar *nfkdInput = nfkdStackBuf;
|
||||
int32_t normalizedLen = unorm_normalize(
|
||||
s, length, UNORM_NFKD, 0, nfkdInput, USPOOF_STACK_BUFFER_SIZE, status);
|
||||
if (*status == U_BUFFER_OVERFLOW_ERROR) {
|
||||
@ -597,47 +603,67 @@ uspoof_getSkeleton(const USpoofChecker *sc,
|
||||
nfkdInput, normalizedLen+1, status);
|
||||
}
|
||||
if (U_FAILURE(*status)) {
|
||||
if (nfkdInput != nfkdStackBuf) {
|
||||
uprv_free(nfkdInput);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// buffer to hold the Unicode defined mappings for a single code point
|
||||
// buffer to hold the Unicode defined skeleton mappings for a single code point
|
||||
UChar buf[USPOOF_MAX_SKELETON_EXPANSION];
|
||||
|
||||
// Apply the mapping to the NFKD form string
|
||||
|
||||
// Apply the skeleton mapping to the NFKD normalized input string
|
||||
// Accumulate the skeleton, possibly unnormalized, in a UnicodeString.
|
||||
int32_t inputIndex = 0;
|
||||
int32_t resultLen = 0;
|
||||
UnicodeString skelStr;
|
||||
while (inputIndex < normalizedLen) {
|
||||
UChar32 c;
|
||||
U16_NEXT(nfkdInput, inputIndex, normalizedLen, c);
|
||||
int32_t replaceLen = This->confusableLookup(c, tableMask, buf);
|
||||
if (resultLen + replaceLen < destCapacity) {
|
||||
int i;
|
||||
for (i=0; i<replaceLen; i++) {
|
||||
dest[resultLen++] = buf[i];
|
||||
}
|
||||
} else {
|
||||
// Storing the transformed string would overflow the dest buffer.
|
||||
// Don't bother storing anything, just sum up the required buffer size.
|
||||
// (We dont guarantee that a truncated buffer is filled to it's end)
|
||||
resultLen += replaceLen;
|
||||
}
|
||||
skelStr.append(buf, replaceLen);
|
||||
}
|
||||
|
||||
if (resultLen < destCapacity) {
|
||||
dest[resultLen] = 0;
|
||||
} else if (resultLen == destCapacity) {
|
||||
*status = U_STRING_NOT_TERMINATED_WARNING;
|
||||
} else {
|
||||
*status = U_BUFFER_OVERFLOW_ERROR;
|
||||
}
|
||||
if (nfkdInput != nfkdBuf) {
|
||||
|
||||
if (nfkdInput != nfkdStackBuf) {
|
||||
uprv_free(nfkdInput);
|
||||
}
|
||||
return resultLen;
|
||||
|
||||
const UChar *result = skelStr.getBuffer();
|
||||
int32_t resultLen = skelStr.length();
|
||||
UChar *normedResult = NULL;
|
||||
|
||||
// Check the skeleton for NFKD, normalize it if needed.
|
||||
// Unnormalized results should be very rare.
|
||||
if (!unorm_isNormalized(result, resultLen, UNORM_NFKD, status)) {
|
||||
normalizedLen = unorm_normalize(dest, resultLen, UNORM_NFKD, 0, NULL, 0, status);
|
||||
UChar *normedResult = static_cast<UChar *>(uprv_malloc((normalizedLen+1)*sizeof(UChar)));
|
||||
if (normedResult == NULL) {
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return 0;
|
||||
}
|
||||
unorm_normalize(result, resultLen, UNORM_NFKD, 0, normedResult, normalizedLen+1, status);
|
||||
result = normedResult;
|
||||
resultLen = normalizedLen;
|
||||
}
|
||||
|
||||
// Copy the skeleton to the caller's buffer
|
||||
if (U_SUCCESS(*status)) {
|
||||
if (destCapacity == 0 || resultLen > destCapacity) {
|
||||
*status = resultLen>destCapacity ? U_BUFFER_OVERFLOW_ERROR : U_STRING_NOT_TERMINATED_WARNING;
|
||||
} else {
|
||||
u_memcpy(dest, result, resultLen);
|
||||
if (destCapacity > resultLen) {
|
||||
dest[resultLen] = 0;
|
||||
} else {
|
||||
*status = U_STRING_NOT_TERMINATED_WARNING;
|
||||
}
|
||||
}
|
||||
}
|
||||
uprv_free(normedResult);
|
||||
return resultLen;
|
||||
}
|
||||
|
||||
|
||||
|
||||
U_CAPI UnicodeString & U_EXPORT2
|
||||
uspoof_getSkeletonUnicodeString(const USpoofChecker *sc,
|
||||
uint32_t type,
|
||||
|
@ -67,6 +67,12 @@ void IntlTestSpoof::runIndexedTest( int32_t index, UBool exec, const char* &name
|
||||
testSkeleton();
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
name = "TestAreConfusable";
|
||||
if (exec) {
|
||||
testAreConfusable();
|
||||
}
|
||||
break;
|
||||
default: name=""; break;
|
||||
}
|
||||
}
|
||||
@ -199,4 +205,18 @@ void IntlTestSpoof::checkSkeleton(const USpoofChecker *sc, uint32_t type,
|
||||
}
|
||||
}
|
||||
|
||||
void IntlTestSpoof::testAreConfusable() {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
TEST_SETUP
|
||||
UnicodeString s1("A long string that will overflow stack buffers. A long string that will overflow stack buffers. "
|
||||
"A long string that will overflow stack buffers. A long string that will overflow stack buffers. ");
|
||||
UnicodeString s2("A long string that wi11 overflow stack buffers. A long string that will overflow stack buffers. "
|
||||
"A long string that wi11 overflow stack buffers. A long string that will overflow stack buffers. ");
|
||||
TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT_CONFUSABLE, uspoof_areConfusableUnicodeString(sc, s1, s2, &status));
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
|
||||
TEST_TEARDOWN;
|
||||
}
|
||||
|
||||
|
||||
#endif /* #if !UCONFIG_NO_SPOOF_DETECT*/
|
||||
|
@ -26,6 +26,8 @@ public:
|
||||
void testSpoofAPI();
|
||||
|
||||
void testSkeleton();
|
||||
|
||||
void testAreConfusable();
|
||||
|
||||
// Internal function to run a single skeleton test case.
|
||||
void checkSkeleton(const USpoofChecker *sc, uint32_t flags,
|
||||
|
Loading…
Reference in New Issue
Block a user