ICU-3109 port of the thai collation bug fix
X-SVN-Rev: 12657
This commit is contained in:
parent
68ef0bde89
commit
9372fb574c
@ -20,6 +20,8 @@ package com.ibm.icu.dev.test.collator;
|
||||
import com.ibm.icu.dev.test.*;
|
||||
import com.ibm.icu.text.*;
|
||||
import java.util.Locale;
|
||||
import java.util.Comparator;
|
||||
import java.util.Arrays;
|
||||
import java.io.*;
|
||||
|
||||
public class CollationThaiTest extends TestFmwk {
|
||||
@ -75,7 +77,7 @@ public class CollationThaiTest extends TestFmwk {
|
||||
|
||||
Collator coll = null;
|
||||
try {
|
||||
coll = Collator.getInstance(new Locale("th", "TH", ""));
|
||||
coll = getThaiCollator();
|
||||
} catch (Exception e) {
|
||||
errln("Error: could not construct Thai collator");
|
||||
return;
|
||||
@ -152,7 +154,7 @@ public class CollationThaiTest extends TestFmwk {
|
||||
public void TestDictionary() {
|
||||
Collator coll = null;
|
||||
try {
|
||||
coll = Collator.getInstance(new Locale("th", "TH", ""));
|
||||
coll = getThaiCollator();
|
||||
} catch (Exception e) {
|
||||
errln("Error: could not construct Thai collator");
|
||||
return;
|
||||
@ -252,6 +254,43 @@ public class CollationThaiTest extends TestFmwk {
|
||||
logln("Words checked: " + wordCount);
|
||||
}
|
||||
|
||||
public void TestInvalidThai()
|
||||
{
|
||||
String tests[] = { "\u0E44\u0E01\u0E44\u0E01",
|
||||
"\u0E44\u0E01\u0E01\u0E44",
|
||||
"\u0E01\u0E44\u0E01\u0E44",
|
||||
"\u0E01\u0E01\u0E44\u0E44",
|
||||
"\u0E44\u0E44\u0E01\u0E01",
|
||||
"\u0E01\u0E44\u0E44\u0E01",
|
||||
};
|
||||
|
||||
RuleBasedCollator collator;
|
||||
StrCmp comparator;
|
||||
try {
|
||||
collator = getThaiCollator();
|
||||
comparator = new StrCmp();
|
||||
} catch (Exception e) {
|
||||
errln("Error: could not construct Thai collator");
|
||||
return;
|
||||
}
|
||||
|
||||
Arrays.sort(tests, comparator);
|
||||
|
||||
for (int i = 0; i < tests.length; i ++)
|
||||
{
|
||||
for (int j = i + 1; j < tests.length; j ++) {
|
||||
if (collator.compare(tests[i], tests[j]) > 0) {
|
||||
// inconsistency ordering found!
|
||||
errln("Inconsistent ordering between strings " + i
|
||||
+ " and " + j);
|
||||
}
|
||||
}
|
||||
CollationElementIterator iterator
|
||||
= collator.getCollationElementIterator(tests[i]);
|
||||
CollationIteratorTest.backAndForth(this, iterator);
|
||||
}
|
||||
}
|
||||
|
||||
private static final byte BOM[] = {(byte) 0xEF, (byte) 0xBB, (byte) 0xBF};
|
||||
|
||||
private byte savedBytes[]= new byte[BOM.length];
|
||||
@ -343,4 +382,36 @@ public class CollationThaiTest extends TestFmwk {
|
||||
target += "]";
|
||||
return target;
|
||||
}
|
||||
|
||||
// private inner class -------------------------------------------------
|
||||
|
||||
private static final class StrCmp implements Comparator
|
||||
{
|
||||
public int compare(Object string1, Object string2)
|
||||
{
|
||||
return collator.compare(string1, string2);
|
||||
}
|
||||
|
||||
StrCmp() throws Exception
|
||||
{
|
||||
collator = getThaiCollator();
|
||||
}
|
||||
|
||||
Collator collator;
|
||||
}
|
||||
|
||||
// private data members ------------------------------------------------
|
||||
|
||||
private static RuleBasedCollator m_collator_;
|
||||
|
||||
// private methods -----------------------------------------------------
|
||||
|
||||
private static RuleBasedCollator getThaiCollator() throws Exception
|
||||
{
|
||||
if (m_collator_ == null) {
|
||||
m_collator_ = (RuleBasedCollator)Collator.getInstance(
|
||||
new Locale("th", "TH", ""));
|
||||
}
|
||||
return m_collator_;
|
||||
}
|
||||
}
|
@ -355,15 +355,41 @@ public final class CollationElementIterator
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (m_bufferOffset_ < 0 && m_source_.getIndex() != 0) {
|
||||
if (m_bufferOffset_ < 0 && m_source_.getIndex() != 0
|
||||
&& isThaiPreVowel(peekCharacter(-1))) {
|
||||
// we now rearrange unconditionally
|
||||
if (isThaiPreVowel(m_source_.previous())) {
|
||||
backupInternalState(m_utilSpecialBackUp_);
|
||||
// we have to check if the previous character is also Thai
|
||||
// if not, we can just set the result
|
||||
// we have already determined that the normalization
|
||||
// buffer is empty
|
||||
m_source_.previous();
|
||||
if (m_source_.getIndex() == 0
|
||||
|| !isThaiPreVowel(peekCharacter(-1))) {
|
||||
result = CE_THAI_;
|
||||
}
|
||||
else {
|
||||
result = m_collator_.m_trie_.getLeadValue(ch);
|
||||
else {
|
||||
// previous is also reordered
|
||||
// we need to go back as long as they are being
|
||||
// reordered
|
||||
// count over the range of reorderable characters
|
||||
// and see
|
||||
// if there is an even or odd number of them
|
||||
// if even, we should not reorder.
|
||||
// If odd we should reorder.
|
||||
int noReordered = 1; // the one we already detected
|
||||
while (m_source_.getIndex() != 0
|
||||
&& isThaiPreVowel(m_source_.previous())) {
|
||||
noReordered ++;
|
||||
}
|
||||
if ((noReordered & 1) != 0) {
|
||||
// odd number of reorderables
|
||||
result = CE_THAI_;
|
||||
} else {
|
||||
result = m_collator_.m_trie_.getLeadValue(ch);
|
||||
}
|
||||
}
|
||||
m_source_.next();
|
||||
updateInternalState(m_utilSpecialBackUp_);
|
||||
}
|
||||
else {
|
||||
result = m_collator_.m_trie_.getLeadValue(ch);
|
||||
@ -1862,6 +1888,12 @@ public final class CollationElementIterator
|
||||
collator.m_expansion_[++ offset];
|
||||
}
|
||||
}
|
||||
// in case of one element expansion, we
|
||||
// want to immediately return CEpos
|
||||
if (m_CEBufferSize_ == 1) {
|
||||
m_CEBufferSize_ = 0;
|
||||
m_CEBufferOffset_ = 0;
|
||||
}
|
||||
return m_CEBuffer_[0];
|
||||
}
|
||||
|
||||
@ -2526,4 +2558,26 @@ public final class CollationElementIterator
|
||||
}
|
||||
return cp + NON_CJK_OFFSET_; // non-CJK
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a character from the source string at a given offset
|
||||
* Handles both normal and iterative cases.
|
||||
* No error checking - caller beware!
|
||||
* @param offset offset from current position which character is to be
|
||||
* retrieved
|
||||
* @return character at current position + offset
|
||||
*/
|
||||
private char peekCharacter(int offset)
|
||||
{
|
||||
if (offset != 0) {
|
||||
int currentoffset = m_source_.getIndex();
|
||||
m_source_.setIndex(currentoffset + offset);
|
||||
char result = m_source_.current();
|
||||
m_source_.setIndex(currentoffset);
|
||||
return result;
|
||||
}
|
||||
else {
|
||||
return m_source_.current();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user