/** ******************************************************************************* * Copyright (C) 1996-2001, International Business Machines Corporation and * * others. All Rights Reserved. * ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateThaiBreaks-old.java,v $ * $Date: 2005/03/04 02:50:26 $ * $Revision: 1.2 $ * ******************************************************************************* */ package com.ibm.text.UCD; import java.io.*; import com.ibm.text.utility.*; import com.ibm.icu.text.UnicodeSet; import java.util.*; public class GenerateThaiBreaks { public static void main(String [] args) throws IOException { BufferedReader br = new BufferedReader( new InputStreamReader( new FileInputStream("\\icu4j\\src\\data\\thai6.ucs"), "UnicodeLittle")); try { Main.setUCD(); UnicodeSet ignorables = new UnicodeSet("[:M:]"); ignorables.retain(0x0E00, 0x0E7F); // just Thai block ignorables.add(0x0E40, 0x0E44); // add logical order exception ignorables.add(0, ' '); // add controls ignorables.add('.'); UnicodeSet initials = new UnicodeSet(); UnicodeSet finals = new UnicodeSet(); UnicodeSet medials = new UnicodeSet(); while (true) { String line = br.readLine(); if (line == null) break; int end; // find final consonant for (int i = line.length() - 1; ; --i) { char c = line.charAt(i); if (!ignorables.contains(c)) { finals.add(c); end = i; break; } } boolean haveFirst = false; for (int i = 0; i < end; ++i) { char c = line.charAt(i); if (ignorables.contains(c)) continue; if (!haveFirst) { initials.add(c); haveFirst = true; } else { medials.add(c); } } } initials.removeAll(medials); finals.removeAll(medials); Utility.showSetNames("initials: ", initials, false, Main.ucd); Utility.showSetNames("finals: ", finals, false, Main.ucd); Utility.showSetNames("medials: ", medials, false, Main.ucd); } finally { br.close(); } } }