/* ***************************************************************************************** * * * COPYRIGHT: * * (C) Copyright Taligent, Inc., 1997 * * (C) Copyright International Business Machines Corporation, 1997-1998 * * Licensed Material - Program-Property of IBM - All Rights Reserved. * * US Government Users Restricted Rights - Use, duplication, or disclosure * * restricted by GSA ADP Schedule Contract with IBM Corp. * * * ***************************************************************************************** * * File CHBKDAT.CPP * * Modification History: * * Date Name Description * 02/18/97 aliu Converted from OpenClass. * Recoded kRawMapping table for Unicode::getType() type codes. * Made static data members const where appropriate. * 03/25/97 aliu Moved into TextBoundaryData; no longer a subclass. * 04/15/97 aliu Worked around bug in AIX xlC compiler which occurs if static * arrays contain const elements. * 05/06/97 aliu Made SpecialMapping an array of objects instead of pointers, * to help out non-compliant compilers. * 08/14/98 helena Sync-up JDK1.2. * 07/12/99 helena HPUX 11 CC port. ***************************************************************************************** */ // ***************************************************************************** // This file was generated from the java source file CharacterBreakData.java // ***************************************************************************** #include "txtbdat.h" #include "wdbktbl.h" #include "unicdcm.h" // ***************************************************************************** // class CharacterBreakData // The following tables contain the transition state data for character break. // Take forward data for example, the state machine looks like, // Diagram 1 : the forward state machine for accent and base // // accent // ---- // accent +----+/ \ // -------> |SI+2| | // / +----+<----/ // +----+ | base +-------+ // 0->|stop| +-----------------> |SI_stop| // +----+\-------> +----+--------------> +-------+ // base |SI+2| base // +----+ // ^ \ // | | // \----/ // accent // // ***************************************************************************** // The forward transition states of character boundary data. TextBoundaryData::Node TextBoundaryData::kCharacterForwardData[] = { // acct base cr lf // cho jung jong EOS kStop, kStop, kStop, kStop, kStop, kStop, kStop, kStop, // 1 kSI_2, kSI_2, kSI_3, kSI_7, kSI_4, kSI_5, kSI_6, kSI_Stop, // 2 kSI_2, kSI_Stop, kSI_Stop, kSI_Stop, kSI_Stop, kSI_Stop, kSI_Stop, kSI_Stop, // 3 kSI_Stop, kSI_Stop, kSI_Stop, kSI_7, kSI_Stop, kSI_Stop, kSI_Stop, kSI_Stop, // 4 kSI_2, kSI_Stop, kSI_Stop, kSI_Stop, kSI_4, kSI_5, kSI_6, kSI_Stop, // 5 kSI_2, kSI_Stop, kSI_Stop, kSI_Stop, kSI_Stop, kSI_5, kSI_6, kSI_Stop, // 6 kSI_2, kSI_Stop, kSI_Stop, kSI_Stop, kSI_Stop, kSI_Stop, kSI_6, kSI_Stop, // 7 kSI_Stop, kSI_Stop, kSI_Stop, kSI_Stop, kSI_Stop, kSI_Stop, kSI_Stop, kSI_Stop }; const int32_t TextBoundaryData::kCharacterForwardData_length = sizeof(TextBoundaryData::kCharacterForwardData) / sizeof(TextBoundaryData::kCharacterForwardData[0]); WordBreakTable* TextBoundaryData::kCharacterForward = new WordBreakTable(kCharacterCol_count, kCharacterForwardData, kCharacterForwardData_length); // ***************************************************************************** // // Diagram 2 : the backward state machine for accent and base // // accent // ---- // accent +----+/ \ // -------> |SI+1| | // / +----+<----/ // +----+ | base +-------+ // 0->|stop| +-----------------> |SI_stop| // +----+\-----------------------------> +-------+ // base // // ***************************************************************************** // The backward transition states of character boundary data. TextBoundaryData::Node TextBoundaryData::kCharacterBackwardData[] = { // acct base cr lf // cho jung jong EOS kStop, kStop, kStop, kStop, kStop, kStop, kStop, kStop, // 1 kSI_1, kSI_Stop, kSI_Stop, kSI_1, kSI_Stop, kSI_1, kSI_1, kSI_Stop }; const int32_t TextBoundaryData::kCharacterBackwardData_length = sizeof(TextBoundaryData::kCharacterBackwardData) / sizeof(TextBoundaryData::kCharacterBackwardData[0]); WordBreakTable* TextBoundaryData::kCharacterBackward = new WordBreakTable(kCharacterCol_count, kCharacterBackwardData, kCharacterBackwardData_length); // The character type mapping of the break table. TextBoundaryData::Type TextBoundaryData::kCharacterRawMapping[] = { // Re-coded to match Unicode 2 types [LIU] kBaseForm, // UNASSIGNED = 0, kBaseForm, // UPPERCASE_LETTER = 1, kBaseForm, // LOWERCASE_LETTER = 2, kBaseForm, // TITLECASE_LETTER = 3, kBaseForm, // MODIFIER_LETTER = 4, kBaseForm, // OTHER_LETTER = 5, kAccent_diacritic, // NON_SPACING_MARK = 6, kAccent_diacritic, // ENCLOSING_MARK = 7, kBaseForm, // COMBINING_SPACING_MARK = 8, kBaseForm, // DECIMAL_DIGIT_NUMBER = 9, kBaseForm, // LETTER_NUMBER = 10, kBaseForm, // OTHER_NUMBER = 11, kBaseForm, // SPACE_SEPARATOR = 12, kBaseForm, // LINE_SEPARATOR = 13, kBaseForm, // PARAGRAPH_SEPARATOR = 14, kBaseForm, // CONTROL = 15, kBaseForm, // FORMAT = 16, kBaseForm, // PRIVATE_USE = 17, kBaseForm, // SURROGATE = 18, kBaseForm, // DASH_PUNCTUATION = 19, kBaseForm, // START_PUNCTUATION = 20, kBaseForm, // END_PUNCTUATION = 21, kBaseForm, // CONNECTOR_PUNCTUATION = 22, kBaseForm, // OTHER_PUNCTUATION = 23, kBaseForm, // MATH_SYMBOL = 24, kBaseForm, // CURRENCY_SYMBOL = 25, kBaseForm, // MODIFIER_SYMBOL = 26, kBaseForm, // OTHER_SYMBOL = 27, kBaseForm // UNDEFINED = 28 }; const int32_t TextBoundaryData::kCharacterRawMapping_length = sizeof(TextBoundaryData::kCharacterRawMapping) / sizeof(TextBoundaryData::kCharacterRawMapping[0]); SpecialMapping TextBoundaryData::kCharacterExceptionChar[] = { SpecialMapping(TextBoundaryData::ASCII_LINEFEED, TextBoundaryData::kBaseLF), SpecialMapping(TextBoundaryData::ASCII_CARRIAGE_RETURN, TextBoundaryData::kBaseCR), SpecialMapping(TextBoundaryData::HANGUL_CHOSEONG_LOW, TextBoundaryData::HANGUL_CHOSEONG_HIGH, TextBoundaryData::kChoseong), SpecialMapping(TextBoundaryData::HANGUL_JUNGSEONG_LOW, TextBoundaryData::HANGUL_JUNGSEONG_HIGH, TextBoundaryData::kJungseong), SpecialMapping(TextBoundaryData::HANGUL_JONGSEONG_LOW, TextBoundaryData::HANGUL_JONGSEONG_HIGH, TextBoundaryData::kJongseong), SpecialMapping(TextBoundaryData::PUNCTUATION_LINE_SEPARATOR, TextBoundaryData::PUNCTUATION_PARAGRAPH_SEPARATOR, TextBoundaryData::kBaseLF), SpecialMapping(TextBoundaryData::END_OF_STRING, TextBoundaryData::kEOS) }; const int32_t TextBoundaryData::kCharacterExceptionChar_length = sizeof(TextBoundaryData::kCharacterExceptionChar) / sizeof(TextBoundaryData::kCharacterExceptionChar[0]); const bool_t TextBoundaryData::kCharacterExceptionFlags[] = { FALSE, // kNonCharacter = 0, FALSE, // kUppercaseLetter = 1, FALSE, // kLowercaseLetter = 2, FALSE, // kTitlecaseLetter = 3, FALSE, // kModifierLetter = 4, TRUE, // kOtherLetter = 5, FALSE, // kNonSpacingMark = 6, FALSE, // kEnclosingMark = 7, FALSE, // kCombiningSpacingMark = 8, FALSE, // kDecimalNumber = 9, FALSE, // kLetterNumber = 10, FALSE, // kOtherNumber = 11, FALSE, // kSpaceSeparator = 12, TRUE, // kLineSeparator = 13, TRUE, // kParagraphSeparator = 14, TRUE, // kControlCharacter = 15, FALSE, // kFormatCharacter = 16, FALSE, // kPrivateUseCharacter = 17, FALSE, // kSurrogate = 18, FALSE, // kDashPunctuation = 19, FALSE, // kOpenPunctuation = 20, FALSE, // kClosePunctuation = 21, FALSE, // kConnectorPunctuation = 22, FALSE, // kOtherPunctuation = 23, FALSE, // kMathSymbol = 24, FALSE, // kCurrencySymbol = 25, FALSE, // kModifierSymbol = 26, FALSE, // kOtherSymbol = 27 FALSE // UNDEFINED = 28, }; TextBoundaryData::Type TextBoundaryData::kCharacterAsciiValues[] = { // null soh stx etx eot enq ask bell kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, // bs ht lf vt ff cr so si kBaseForm, kBaseForm, kBaseLF, kBaseForm, kBaseForm, kBaseCR, kBaseForm, kBaseForm, // dle dc1 dc2 dc3 dc4 nak syn etb kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, // can em sub esc fs gs rs us kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, // sp ! " # $ % & ' kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, // ( ) * + , - . / kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, // 0 1 2 3 4 5 6 7 kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, // 8 9 : ; < = > ? kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, // @ A B C D E F G kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, // H I J K L M N O kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, // P Q R S T U V W kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, // X Y Z [ \ ] ^ _ kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, // ` a b c d e f g kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, // h i j k l m n o kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, // p q r s t u v w kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, // x y z { | } ~ del kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, // ctrl ctrl ctrl ctrl ctrl ctrl ctrl ctrl kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, // ctrl ctrl ctrl ctrl ctrl ctrl ctrl ctrl kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, // ctrl ctrl ctrl ctrl ctrl ctrl ctrl ctrl kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, // ctrl ctrl ctrl ctrl ctrl ctrl ctrl ctrl kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, // nbsp ¡ ¢ £ ¤ ¥ ¦ kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, // ¨ © ª « ¬ ­ ® ¯ kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, // ° ± ² ³ ´ µ ¶ · kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, // ¸ ¹ º » ¼ ½ ¾ ¿ kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, // À Á Â Ã Ä Å Æ Ç kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, // È É Ê Ë Ì Í Î Ï kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, // Ð Ñ Ò Ó Ô Õ Ö × kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, // Ø Ù Ú Û Ü Ý Þ ß kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, // à á â ã ä å æ ç kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, // è é ê ë ì í î ï kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, // ð ñ ò ó ô õ ö ÷ kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, // ø ù ú û ü ý þ ÿ kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm, kBaseForm }; UnicodeClassMapping* TextBoundaryData::kCharacterMap = new UnicodeClassMapping(kCharacterRawMapping, kCharacterRawMapping_length, kCharacterExceptionChar, kCharacterExceptionChar_length, kCharacterExceptionFlags, kCharacterAsciiValues ); /** * This is the single instance of TextBoundaryData containing character * break data. */ const TextBoundaryData TextBoundaryData::kCharacterBreakData(TextBoundaryData::kCharacterForward, TextBoundaryData::kCharacterBackward, TextBoundaryData::kCharacterMap); //eof