ICU-3077 canonical search performed for full set of accents now. also added more detailed out-of-bounds checks

X-SVN-Rev: 12594
This commit is contained in:
Syn Wee Quek 2003-07-03 19:17:35 +00:00
parent 7a91e9d8df
commit 9ebff88b24
2 changed files with 16 additions and 7 deletions

View File

@ -1288,7 +1288,7 @@ int32_t doNextCanonicalPrefixMatch(UStringSearch *strsrch,
int32_t accentsindex[INITIAL_ARRAY_SIZE_]; int32_t accentsindex[INITIAL_ARRAY_SIZE_];
int32_t accentsize = getUnblockedAccentIndex(accents, int32_t accentsize = getUnblockedAccentIndex(accents,
accentsindex); accentsindex);
int32_t count = (2 << (accentsize - 1)) - 2; int32_t count = (2 << (accentsize - 1)) - 1;
UChar buffer[INITIAL_ARRAY_SIZE_]; UChar buffer[INITIAL_ARRAY_SIZE_];
UCollationElements *coleiter = strsrch->utilIter; UCollationElements *coleiter = strsrch->utilIter;
while (U_SUCCESS(*status) && count > 0) { while (U_SUCCESS(*status) && count > 0) {
@ -1560,8 +1560,8 @@ UBool doNextCanonicalMatch(UStringSearch *strsrch,
int32_t accentsindex[INITIAL_ARRAY_SIZE_]; int32_t accentsindex[INITIAL_ARRAY_SIZE_];
int32_t size = getUnblockedAccentIndex(accents, accentsindex); int32_t size = getUnblockedAccentIndex(accents, accentsindex);
// 2 power n - 1 minus the full set of accents // 2 power n - 1 plus the full set of accents
int32_t count = (2 << (size - 1)) - 2; int32_t count = (2 << (size - 1)) - 1;
while (U_SUCCESS(*status) && count > 0) { while (U_SUCCESS(*status) && count > 0) {
UChar *rearrange = strsrch->canonicalSuffixAccents; UChar *rearrange = strsrch->canonicalSuffixAccents;
// copy the base characters // copy the base characters
@ -1984,7 +1984,7 @@ int32_t doPreviousCanonicalSuffixMatch(UStringSearch *strsrch,
int32_t accentsindex[INITIAL_ARRAY_SIZE_]; int32_t accentsindex[INITIAL_ARRAY_SIZE_];
int32_t accentsize = getUnblockedAccentIndex(accents, int32_t accentsize = getUnblockedAccentIndex(accents,
accentsindex); accentsindex);
int32_t count = (2 << (accentsize - 1)) - 2; int32_t count = (2 << (accentsize - 1)) - 1;
UChar buffer[INITIAL_ARRAY_SIZE_]; UChar buffer[INITIAL_ARRAY_SIZE_];
UCollationElements *coleiter = strsrch->utilIter; UCollationElements *coleiter = strsrch->utilIter;
while (U_SUCCESS(*status) && count > 0) { while (U_SUCCESS(*status) && count > 0) {
@ -2217,8 +2217,8 @@ UBool doPreviousCanonicalMatch(UStringSearch *strsrch,
int32_t accentsindex[INITIAL_ARRAY_SIZE_]; int32_t accentsindex[INITIAL_ARRAY_SIZE_];
int32_t size = getUnblockedAccentIndex(accents, accentsindex); int32_t size = getUnblockedAccentIndex(accents, accentsindex);
// 2 power n - 1 minus the full set of accents // 2 power n - 1 plus the full set of accents
int32_t count = (2 << (size - 1)) - 2; int32_t count = (2 << (size - 1)) - 1;
while (U_SUCCESS(*status) && count > 0) { while (U_SUCCESS(*status) && count > 0) {
UChar *rearrange = strsrch->canonicalPrefixAccents; UChar *rearrange = strsrch->canonicalPrefixAccents;
// copy the base characters // copy the base characters
@ -2989,9 +2989,16 @@ U_CAPI int32_t U_EXPORT2 usearch_next(UStringSearch *strsrch,
} }
if (U_FAILURE(*status)) { if (U_FAILURE(*status)) {
// bcos of the backwards iteration, we might have detected a
// match too far front
return USEARCH_DONE; return USEARCH_DONE;
} }
if (search->matchedIndex < offset) {
setMatchNotFound(strsrch);
return USEARCH_DONE;
}
return search->matchedIndex; return search->matchedIndex;
} }
} }

View File

@ -271,6 +271,8 @@ static const SearchData BASICCANONICAL[] = {
NULL, {0, -1}, {5}}, NULL, {0, -1}, {5}},
{"a\\u0300\\u0325b\\u0300\\u0325c \\u0325b\\u0300 \\u0300b\\u0325", {"a\\u0300\\u0325b\\u0300\\u0325c \\u0325b\\u0300 \\u0300b\\u0325",
"\\u0300b\\u0325", NULL, UCOL_TERTIARY, NULL, {1, 12, -1}, {5, 3}}, "\\u0300b\\u0325", NULL, UCOL_TERTIARY, NULL, {1, 12, -1}, {5, 3}},
{"\\u00c4\\u0323", "A\\u0323\\u0308", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}},
{"\\u0308\\u0323", "\\u0323\\u0308", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}},
{NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
}; };