ICU-4790 spoof cleanups
X-SVN-Rev: 25876
This commit is contained in:
parent
1c326702cc
commit
b08b9e8625
@ -603,21 +603,16 @@ uspoof_checkUnicodeString(const USpoofChecker *sc,
|
|||||||
* or whole script - are determined by the check options set for the
|
* or whole script - are determined by the check options set for the
|
||||||
* USpoofChecker.
|
* USpoofChecker.
|
||||||
*
|
*
|
||||||
* TODO: expand on the following
|
* The tests to be performed are controlled by the flags
|
||||||
* There are four possible types of comarisons:
|
|
||||||
* Mixed Script, Lower Case
|
|
||||||
* Mixed Script, Any Case
|
|
||||||
* Single Script, Lower Case
|
|
||||||
* Single Script, Any Case
|
|
||||||
* Which tests are performed is controlled by the flags
|
|
||||||
* USPOOF_SINGLE_SCRIPT_CONFUSABLE
|
* USPOOF_SINGLE_SCRIPT_CONFUSABLE
|
||||||
* USPOOF_MIXED_SCRIPT_CONFUSABLE
|
* USPOOF_MIXED_SCRIPT_CONFUSABLE
|
||||||
* One or both of these must be set.
|
* USPOOF_WHOLE_SCRIPT_CONFUSABLE
|
||||||
|
* At least one of these tests must be selected.
|
||||||
*
|
*
|
||||||
* USPOOF_ANY_CASE is a modifier. Choose it if the identifiers
|
* USPOOF_ANY_CASE is a modifier for the tests. Select it if the identifiers
|
||||||
* are case-sensitive and may be of mixed case.
|
* may be of mixed case.
|
||||||
* If identifiers are normalized to lower case for comparison or
|
* If identifiers are case folded for comparison and
|
||||||
* display to the user, do not select the ANY_CASE option.
|
* display to the user, do not select the USPOOF_ANY_CASE option.
|
||||||
*
|
*
|
||||||
*
|
*
|
||||||
* @param sc The USpoofChecker
|
* @param sc The USpoofChecker
|
||||||
|
@ -551,6 +551,12 @@ uspoof_getSkeleton(const USpoofChecker *sc,
|
|||||||
UChar *dest, int32_t destCapacity,
|
UChar *dest, int32_t destCapacity,
|
||||||
UErrorCode *status) {
|
UErrorCode *status) {
|
||||||
|
|
||||||
|
// TODO: this function could be sped up a bit
|
||||||
|
// Skip the input normalization when not needed, work from callers data.
|
||||||
|
// Put the initial skeleton straight into the caller's destination buffer.
|
||||||
|
// It probably won't need normalization.
|
||||||
|
// But these would make the structure more complicated.
|
||||||
|
|
||||||
const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
|
const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
|
||||||
if (U_FAILURE(*status)) {
|
if (U_FAILURE(*status)) {
|
||||||
return 0;
|
return 0;
|
||||||
@ -582,8 +588,8 @@ uspoof_getSkeleton(const USpoofChecker *sc,
|
|||||||
|
|
||||||
// NFKD transform of the user supplied input
|
// NFKD transform of the user supplied input
|
||||||
|
|
||||||
UChar nfkdBuf[USPOOF_STACK_BUFFER_SIZE];
|
UChar nfkdStackBuf[USPOOF_STACK_BUFFER_SIZE];
|
||||||
UChar *nfkdInput = nfkdBuf;
|
UChar *nfkdInput = nfkdStackBuf;
|
||||||
int32_t normalizedLen = unorm_normalize(
|
int32_t normalizedLen = unorm_normalize(
|
||||||
s, length, UNORM_NFKD, 0, nfkdInput, USPOOF_STACK_BUFFER_SIZE, status);
|
s, length, UNORM_NFKD, 0, nfkdInput, USPOOF_STACK_BUFFER_SIZE, status);
|
||||||
if (*status == U_BUFFER_OVERFLOW_ERROR) {
|
if (*status == U_BUFFER_OVERFLOW_ERROR) {
|
||||||
@ -597,47 +603,67 @@ uspoof_getSkeleton(const USpoofChecker *sc,
|
|||||||
nfkdInput, normalizedLen+1, status);
|
nfkdInput, normalizedLen+1, status);
|
||||||
}
|
}
|
||||||
if (U_FAILURE(*status)) {
|
if (U_FAILURE(*status)) {
|
||||||
|
if (nfkdInput != nfkdStackBuf) {
|
||||||
|
uprv_free(nfkdInput);
|
||||||
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// buffer to hold the Unicode defined mappings for a single code point
|
// buffer to hold the Unicode defined skeleton mappings for a single code point
|
||||||
UChar buf[USPOOF_MAX_SKELETON_EXPANSION];
|
UChar buf[USPOOF_MAX_SKELETON_EXPANSION];
|
||||||
|
|
||||||
// Apply the mapping to the NFKD form string
|
// Apply the skeleton mapping to the NFKD normalized input string
|
||||||
|
// Accumulate the skeleton, possibly unnormalized, in a UnicodeString.
|
||||||
int32_t inputIndex = 0;
|
int32_t inputIndex = 0;
|
||||||
int32_t resultLen = 0;
|
UnicodeString skelStr;
|
||||||
while (inputIndex < normalizedLen) {
|
while (inputIndex < normalizedLen) {
|
||||||
UChar32 c;
|
UChar32 c;
|
||||||
U16_NEXT(nfkdInput, inputIndex, normalizedLen, c);
|
U16_NEXT(nfkdInput, inputIndex, normalizedLen, c);
|
||||||
int32_t replaceLen = This->confusableLookup(c, tableMask, buf);
|
int32_t replaceLen = This->confusableLookup(c, tableMask, buf);
|
||||||
if (resultLen + replaceLen < destCapacity) {
|
skelStr.append(buf, replaceLen);
|
||||||
int i;
|
|
||||||
for (i=0; i<replaceLen; i++) {
|
|
||||||
dest[resultLen++] = buf[i];
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Storing the transformed string would overflow the dest buffer.
|
|
||||||
// Don't bother storing anything, just sum up the required buffer size.
|
|
||||||
// (We dont guarantee that a truncated buffer is filled to it's end)
|
|
||||||
resultLen += replaceLen;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (resultLen < destCapacity) {
|
if (nfkdInput != nfkdStackBuf) {
|
||||||
dest[resultLen] = 0;
|
|
||||||
} else if (resultLen == destCapacity) {
|
|
||||||
*status = U_STRING_NOT_TERMINATED_WARNING;
|
|
||||||
} else {
|
|
||||||
*status = U_BUFFER_OVERFLOW_ERROR;
|
|
||||||
}
|
|
||||||
if (nfkdInput != nfkdBuf) {
|
|
||||||
uprv_free(nfkdInput);
|
uprv_free(nfkdInput);
|
||||||
}
|
}
|
||||||
return resultLen;
|
|
||||||
|
const UChar *result = skelStr.getBuffer();
|
||||||
|
int32_t resultLen = skelStr.length();
|
||||||
|
UChar *normedResult = NULL;
|
||||||
|
|
||||||
|
// Check the skeleton for NFKD, normalize it if needed.
|
||||||
|
// Unnormalized results should be very rare.
|
||||||
|
if (!unorm_isNormalized(result, resultLen, UNORM_NFKD, status)) {
|
||||||
|
normalizedLen = unorm_normalize(dest, resultLen, UNORM_NFKD, 0, NULL, 0, status);
|
||||||
|
UChar *normedResult = static_cast<UChar *>(uprv_malloc((normalizedLen+1)*sizeof(UChar)));
|
||||||
|
if (normedResult == NULL) {
|
||||||
|
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
unorm_normalize(result, resultLen, UNORM_NFKD, 0, normedResult, normalizedLen+1, status);
|
||||||
|
result = normedResult;
|
||||||
|
resultLen = normalizedLen;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Copy the skeleton to the caller's buffer
|
||||||
|
if (U_SUCCESS(*status)) {
|
||||||
|
if (destCapacity == 0 || resultLen > destCapacity) {
|
||||||
|
*status = resultLen>destCapacity ? U_BUFFER_OVERFLOW_ERROR : U_STRING_NOT_TERMINATED_WARNING;
|
||||||
|
} else {
|
||||||
|
u_memcpy(dest, result, resultLen);
|
||||||
|
if (destCapacity > resultLen) {
|
||||||
|
dest[resultLen] = 0;
|
||||||
|
} else {
|
||||||
|
*status = U_STRING_NOT_TERMINATED_WARNING;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
uprv_free(normedResult);
|
||||||
|
return resultLen;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
U_CAPI UnicodeString & U_EXPORT2
|
U_CAPI UnicodeString & U_EXPORT2
|
||||||
uspoof_getSkeletonUnicodeString(const USpoofChecker *sc,
|
uspoof_getSkeletonUnicodeString(const USpoofChecker *sc,
|
||||||
uint32_t type,
|
uint32_t type,
|
||||||
|
@ -67,6 +67,12 @@ void IntlTestSpoof::runIndexedTest( int32_t index, UBool exec, const char* &name
|
|||||||
testSkeleton();
|
testSkeleton();
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
case 2:
|
||||||
|
name = "TestAreConfusable";
|
||||||
|
if (exec) {
|
||||||
|
testAreConfusable();
|
||||||
|
}
|
||||||
|
break;
|
||||||
default: name=""; break;
|
default: name=""; break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -199,4 +205,18 @@ void IntlTestSpoof::checkSkeleton(const USpoofChecker *sc, uint32_t type,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void IntlTestSpoof::testAreConfusable() {
|
||||||
|
UErrorCode status = U_ZERO_ERROR;
|
||||||
|
TEST_SETUP
|
||||||
|
UnicodeString s1("A long string that will overflow stack buffers. A long string that will overflow stack buffers. "
|
||||||
|
"A long string that will overflow stack buffers. A long string that will overflow stack buffers. ");
|
||||||
|
UnicodeString s2("A long string that wi11 overflow stack buffers. A long string that will overflow stack buffers. "
|
||||||
|
"A long string that wi11 overflow stack buffers. A long string that will overflow stack buffers. ");
|
||||||
|
TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT_CONFUSABLE, uspoof_areConfusableUnicodeString(sc, s1, s2, &status));
|
||||||
|
TEST_ASSERT_SUCCESS(status);
|
||||||
|
|
||||||
|
TEST_TEARDOWN;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
#endif /* #if !UCONFIG_NO_SPOOF_DETECT*/
|
#endif /* #if !UCONFIG_NO_SPOOF_DETECT*/
|
||||||
|
@ -26,6 +26,8 @@ public:
|
|||||||
void testSpoofAPI();
|
void testSpoofAPI();
|
||||||
|
|
||||||
void testSkeleton();
|
void testSkeleton();
|
||||||
|
|
||||||
|
void testAreConfusable();
|
||||||
|
|
||||||
// Internal function to run a single skeleton test case.
|
// Internal function to run a single skeleton test case.
|
||||||
void checkSkeleton(const USpoofChecker *sc, uint32_t flags,
|
void checkSkeleton(const USpoofChecker *sc, uint32_t flags,
|
||||||
|
Loading…
Reference in New Issue
Block a user