/* ***************************************************************************************** * Copyright (C) 1997-1999, International Business Machines * Corporation and others. All Rights Reserved. ***************************************************************************************** * * File WDBKTBL.CPP * * Modification History: * * Date Name Description * 02/18/97 aliu Converted from OpenClass. Made statics const. ***************************************************************************************** */ // ***************************************************************************** // This file was generated from the java source file WordBreakTable.java // ***************************************************************************** #include "wdbktbl.h" // ***************************************************************************** // class WordBreakTable // // The word break table implements a state machine that leads to the next // transition state from the current one and is used by BreakIterator for // character, word or sentence. To better illustrate the use of transition // tables, the following example shows a very simplified version of the // word break table that deals with only kNB (not a blank char) and kB // (a blank char) character categories. The state machine of the word break // table would look like, // // Diagram 1 : the state machine for kNB and kB // // kNB // ---- // kNB +----+/ \ // ------>|SI+1| | // / +----+<----/ // +----+ kB| kNB +-------+ // 0->|stop| V -------------> |SI_stop| // +----+\------>+----+/ +-------+ // |SI+2|<----\ // kB +----+ | // \----/ // kB // // Table 1 : flattened state table for Diagram 1 // --------------------------------------------- // States kB kNB // 0 stop stop // 1 SI+2 SI+1 // 2 SI+2 SI_stop // // In the table, SI+n shows where the characters will be "marked" and led // to a different state if necessary. For example, consider the string // "This is a test.". // Iterating through the string shows the following, // (stop)->'T'(SI+1)->'h'(SI+1)->'i'(SI+1)->'s'(SI+1)->' '(SI+2)->i(SI_stop) // When a (SI_stop) is reached, we know that we have found a word break right // after ' '. // // The actual char, word and sentence break data is a lot more complicated // than the above. The character type showed here is only limited to kNB // and kB for ease of demonstration. All the break tables are essentially // a flattened state table of their orginal state machine diagrams. // // ***************************************************************************** // ------------------------------------- WordBreakTable::WordBreakTable(int32_t cols, const WordBreakTable::Node data[], int32_t data_length) : fData(data), fData_length(data_length), fCols(cols) { } // ------------------------------------- const WordBreakTable::Node WordBreakTable::kMark_mask = (WordBreakTable::Node)0x80; const WordBreakTable::Node WordBreakTable::kIndex_mask = (WordBreakTable::Node)0x7F; const WordBreakTable::Node WordBreakTable::kInitial_state = 1; const WordBreakTable::Node WordBreakTable::kEnd_state = 0; //eof