ICU-3077 canonical search performed for full set of accents now. also added more detailed out-of-bounds checks
X-SVN-Rev: 12594
This commit is contained in:
parent
7a91e9d8df
commit
9ebff88b24
@ -1288,7 +1288,7 @@ int32_t doNextCanonicalPrefixMatch(UStringSearch *strsrch,
|
|||||||
int32_t accentsindex[INITIAL_ARRAY_SIZE_];
|
int32_t accentsindex[INITIAL_ARRAY_SIZE_];
|
||||||
int32_t accentsize = getUnblockedAccentIndex(accents,
|
int32_t accentsize = getUnblockedAccentIndex(accents,
|
||||||
accentsindex);
|
accentsindex);
|
||||||
int32_t count = (2 << (accentsize - 1)) - 2;
|
int32_t count = (2 << (accentsize - 1)) - 1;
|
||||||
UChar buffer[INITIAL_ARRAY_SIZE_];
|
UChar buffer[INITIAL_ARRAY_SIZE_];
|
||||||
UCollationElements *coleiter = strsrch->utilIter;
|
UCollationElements *coleiter = strsrch->utilIter;
|
||||||
while (U_SUCCESS(*status) && count > 0) {
|
while (U_SUCCESS(*status) && count > 0) {
|
||||||
@ -1560,8 +1560,8 @@ UBool doNextCanonicalMatch(UStringSearch *strsrch,
|
|||||||
int32_t accentsindex[INITIAL_ARRAY_SIZE_];
|
int32_t accentsindex[INITIAL_ARRAY_SIZE_];
|
||||||
int32_t size = getUnblockedAccentIndex(accents, accentsindex);
|
int32_t size = getUnblockedAccentIndex(accents, accentsindex);
|
||||||
|
|
||||||
// 2 power n - 1 minus the full set of accents
|
// 2 power n - 1 plus the full set of accents
|
||||||
int32_t count = (2 << (size - 1)) - 2;
|
int32_t count = (2 << (size - 1)) - 1;
|
||||||
while (U_SUCCESS(*status) && count > 0) {
|
while (U_SUCCESS(*status) && count > 0) {
|
||||||
UChar *rearrange = strsrch->canonicalSuffixAccents;
|
UChar *rearrange = strsrch->canonicalSuffixAccents;
|
||||||
// copy the base characters
|
// copy the base characters
|
||||||
@ -1984,7 +1984,7 @@ int32_t doPreviousCanonicalSuffixMatch(UStringSearch *strsrch,
|
|||||||
int32_t accentsindex[INITIAL_ARRAY_SIZE_];
|
int32_t accentsindex[INITIAL_ARRAY_SIZE_];
|
||||||
int32_t accentsize = getUnblockedAccentIndex(accents,
|
int32_t accentsize = getUnblockedAccentIndex(accents,
|
||||||
accentsindex);
|
accentsindex);
|
||||||
int32_t count = (2 << (accentsize - 1)) - 2;
|
int32_t count = (2 << (accentsize - 1)) - 1;
|
||||||
UChar buffer[INITIAL_ARRAY_SIZE_];
|
UChar buffer[INITIAL_ARRAY_SIZE_];
|
||||||
UCollationElements *coleiter = strsrch->utilIter;
|
UCollationElements *coleiter = strsrch->utilIter;
|
||||||
while (U_SUCCESS(*status) && count > 0) {
|
while (U_SUCCESS(*status) && count > 0) {
|
||||||
@ -2217,8 +2217,8 @@ UBool doPreviousCanonicalMatch(UStringSearch *strsrch,
|
|||||||
int32_t accentsindex[INITIAL_ARRAY_SIZE_];
|
int32_t accentsindex[INITIAL_ARRAY_SIZE_];
|
||||||
int32_t size = getUnblockedAccentIndex(accents, accentsindex);
|
int32_t size = getUnblockedAccentIndex(accents, accentsindex);
|
||||||
|
|
||||||
// 2 power n - 1 minus the full set of accents
|
// 2 power n - 1 plus the full set of accents
|
||||||
int32_t count = (2 << (size - 1)) - 2;
|
int32_t count = (2 << (size - 1)) - 1;
|
||||||
while (U_SUCCESS(*status) && count > 0) {
|
while (U_SUCCESS(*status) && count > 0) {
|
||||||
UChar *rearrange = strsrch->canonicalPrefixAccents;
|
UChar *rearrange = strsrch->canonicalPrefixAccents;
|
||||||
// copy the base characters
|
// copy the base characters
|
||||||
@ -2989,9 +2989,16 @@ U_CAPI int32_t U_EXPORT2 usearch_next(UStringSearch *strsrch,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (U_FAILURE(*status)) {
|
if (U_FAILURE(*status)) {
|
||||||
|
// bcos of the backwards iteration, we might have detected a
|
||||||
|
// match too far front
|
||||||
return USEARCH_DONE;
|
return USEARCH_DONE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (search->matchedIndex < offset) {
|
||||||
|
setMatchNotFound(strsrch);
|
||||||
|
return USEARCH_DONE;
|
||||||
|
}
|
||||||
|
|
||||||
return search->matchedIndex;
|
return search->matchedIndex;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -271,6 +271,8 @@ static const SearchData BASICCANONICAL[] = {
|
|||||||
NULL, {0, -1}, {5}},
|
NULL, {0, -1}, {5}},
|
||||||
{"a\\u0300\\u0325b\\u0300\\u0325c \\u0325b\\u0300 \\u0300b\\u0325",
|
{"a\\u0300\\u0325b\\u0300\\u0325c \\u0325b\\u0300 \\u0300b\\u0325",
|
||||||
"\\u0300b\\u0325", NULL, UCOL_TERTIARY, NULL, {1, 12, -1}, {5, 3}},
|
"\\u0300b\\u0325", NULL, UCOL_TERTIARY, NULL, {1, 12, -1}, {5, 3}},
|
||||||
|
{"\\u00c4\\u0323", "A\\u0323\\u0308", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}},
|
||||||
|
{"\\u0308\\u0323", "\\u0323\\u0308", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}},
|
||||||
{NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
|
{NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user