2002-10-05 01:28:58 +00:00
|
|
|
/**
|
|
|
|
*******************************************************************************
|
|
|
|
* Copyright (C) 1996-2001, International Business Machines Corporation and *
|
|
|
|
* others. All Rights Reserved. *
|
|
|
|
*******************************************************************************
|
|
|
|
*
|
|
|
|
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateThaiBreaks-old.java,v $
|
2005-03-04 02:50:26 +00:00
|
|
|
* $Date: 2005/03/04 02:50:26 $
|
|
|
|
* $Revision: 1.2 $
|
2002-10-05 01:28:58 +00:00
|
|
|
*
|
|
|
|
*******************************************************************************
|
|
|
|
*/
|
|
|
|
|
|
|
|
package com.ibm.text.UCD;
|
|
|
|
import java.io.*;
|
|
|
|
import com.ibm.text.utility.*;
|
2005-03-04 02:50:26 +00:00
|
|
|
import com.ibm.icu.text.UnicodeSet;
|
2002-10-05 01:28:58 +00:00
|
|
|
import java.util.*;
|
|
|
|
|
|
|
|
public class GenerateThaiBreaks {
|
|
|
|
public static void main(String [] args) throws IOException {
|
|
|
|
|
|
|
|
BufferedReader br = new BufferedReader(
|
|
|
|
new InputStreamReader(
|
|
|
|
new FileInputStream("\\icu4j\\src\\data\\thai6.ucs"), "UnicodeLittle"));
|
|
|
|
try {
|
|
|
|
Main.setUCD();
|
|
|
|
UnicodeSet ignorables = new UnicodeSet("[:M:]");
|
|
|
|
ignorables.retain(0x0E00, 0x0E7F); // just Thai block
|
|
|
|
ignorables.add(0x0E40, 0x0E44); // add logical order exception
|
|
|
|
ignorables.add(0, ' '); // add controls
|
|
|
|
ignorables.add('.');
|
|
|
|
|
|
|
|
UnicodeSet initials = new UnicodeSet();
|
|
|
|
UnicodeSet finals = new UnicodeSet();
|
|
|
|
UnicodeSet medials = new UnicodeSet();
|
|
|
|
while (true) {
|
|
|
|
String line = br.readLine();
|
|
|
|
if (line == null) break;
|
|
|
|
int end;
|
|
|
|
|
|
|
|
// find final consonant
|
|
|
|
for (int i = line.length() - 1; ; --i) {
|
|
|
|
char c = line.charAt(i);
|
|
|
|
if (!ignorables.contains(c)) {
|
|
|
|
finals.add(c);
|
|
|
|
end = i;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
boolean haveFirst = false;
|
|
|
|
for (int i = 0; i < end; ++i) {
|
|
|
|
char c = line.charAt(i);
|
|
|
|
if (ignorables.contains(c)) continue;
|
|
|
|
if (!haveFirst) {
|
|
|
|
initials.add(c);
|
|
|
|
haveFirst = true;
|
|
|
|
} else {
|
|
|
|
medials.add(c);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
initials.removeAll(medials);
|
|
|
|
finals.removeAll(medials);
|
|
|
|
Utility.showSetNames("initials: ", initials, false, Main.ucd);
|
|
|
|
Utility.showSetNames("finals: ", finals, false, Main.ucd);
|
|
|
|
Utility.showSetNames("medials: ", medials, false, Main.ucd);
|
|
|
|
} finally {
|
|
|
|
br.close();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|