scuffed-code/icu4c/source/i18n/txtbdat.cpp
1999-11-23 01:30:04 +00:00

156 lines
11 KiB
C++

/*
*****************************************************************************************
* Copyright (C) 1997-1999, International Business Machines
* Corporation and others. All Rights Reserved.
*****************************************************************************************
*
* File TXTBDAT.CPP
*
* Modification History:
*
* Date Name Description
* 02/18/97 aliu Converted from OpenClass.
* Made static data members const where appropriate.
* 05/06/97 aliu Made kSI, kStop, and kSI_Stop into #defines to help out
* non-compliant compilers.
*****************************************************************************************
*/
#include "txtbdat.h"
// *****************************************************************************
// class TextBoundaryData
// *****************************************************************************
// The following is removed and became #define(s) because of compiler problems.
//const TextBoundaryData::Node TextBoundaryData::kSI = 0x80;
//const TextBoundaryData::Node TextBoundaryData::kStop = 0;
//const TextBoundaryData::Node TextBoundaryData::kSI_Stop = kSI + kStop;
// The following Unicode character may need special mappings in a particular
// text boundary.
const UChar TextBoundaryData::ASCII_END_OF_TEXT = (UChar)0x0003;
const UChar TextBoundaryData::ASCII_HORIZONTAL_TABULATION = (UChar)0x0009;
const UChar TextBoundaryData::ASCII_LINEFEED = (UChar)0x000A;
const UChar TextBoundaryData::ASCII_VERTICAL_TABULATION = (UChar)0x000B;
const UChar TextBoundaryData::ASCII_FORM_FEED = (UChar)0x000C;
const UChar TextBoundaryData::ASCII_CARRIAGE_RETURN = (UChar)0x000D;
const UChar TextBoundaryData::ASCII_SPACE = (UChar)0x0020;
const UChar TextBoundaryData::ASCII_EXCLAMATION_MARK = (UChar)0x0021;
const UChar TextBoundaryData::ASCII_QUOTATION_MARK = (UChar)0x0022;
const UChar TextBoundaryData::ASCII_NUMBER_SIGN = (UChar)0x0023;
const UChar TextBoundaryData::ASCII_DOLLAR_SIGN = (UChar)0x0024;
const UChar TextBoundaryData::ASCII_PERCENT = (UChar)0x0025;
const UChar TextBoundaryData::ASCII_AMPERSAND = (UChar)0x0026;
const UChar TextBoundaryData::ASCII_APOSTROPHE = (UChar)0x0027;
const UChar TextBoundaryData::ASCII_COMMA = (UChar)0x002C;
const UChar TextBoundaryData::ASCII_FULL_STOP = (UChar)0x002E;
const UChar TextBoundaryData::ASCII_COLON = (UChar)0x003A;
const UChar TextBoundaryData::ASCII_SEMICOLON = (UChar)0x003B;
const UChar TextBoundaryData::ASCII_QUESTION_MARK = (UChar)0x003F;
const UChar TextBoundaryData::ASCII_NONBREAKING_SPACE = (UChar)0x00A0;
const UChar TextBoundaryData::ASCII_CENT_SIGN = (UChar)0x00A2;
const UChar TextBoundaryData::ASCII_POUND_SIGN = (UChar)0x00A3;
const UChar TextBoundaryData::ASCII_YEN_SIGN = (UChar)0x00A5;
const UChar TextBoundaryData::LATIN1_SOFTHYPHEN = (UChar)0x00AD;
const UChar TextBoundaryData::LATIN1_DEGREE_SIGN = (UChar)0x00B0;
const UChar TextBoundaryData::ARABIC_PERCENT_SIGN = (UChar)0x066A;
const UChar TextBoundaryData::ARABIC_DECIMAL_SEPARATOR = (UChar)0x066B;
const UChar TextBoundaryData::HANGUL_CHOSEONG_LOW = (UChar)0x1100;
const UChar TextBoundaryData::HANGUL_CHOSEONG_HIGH = (UChar)0x115F;
const UChar TextBoundaryData::HANGUL_JUNGSEONG_LOW = (UChar)0x1160;
const UChar TextBoundaryData::HANGUL_JUNGSEONG_HIGH = (UChar)0x11A7;
const UChar TextBoundaryData::HANGUL_JONGSEONG_LOW = (UChar)0x11A8;
const UChar TextBoundaryData::HANGUL_JONGSEONG_HIGH = (UChar)0x11FF;
const UChar TextBoundaryData::FIGURE_SPACE = (UChar)0x2007;
const UChar TextBoundaryData::NONBREAKING_HYPHEN = (UChar)0x2011;
const UChar TextBoundaryData::PUNCTUATION_HYPHENATION_POINT = (UChar)0x2027;
const UChar TextBoundaryData::PUNCTUATION_LINE_SEPARATOR = (UChar)0x2028;
const UChar TextBoundaryData::PUNCTUATION_PARAGRAPH_SEPARATOR = (UChar)0x2029;
const UChar TextBoundaryData::PER_MILLE_SIGN = (UChar)0x2030;
const UChar TextBoundaryData::PER_TEN_THOUSAND_SIGN = (UChar)0x2031;
const UChar TextBoundaryData::PRIME = (UChar)0x2032;
const UChar TextBoundaryData::DOUBLE_PRIME = (UChar)0x2033;
const UChar TextBoundaryData::TRIPLE_PRIME = (UChar)0x2034;
const UChar TextBoundaryData::DEGREE_CELSIUS = (UChar)0x2103;
const UChar TextBoundaryData::DEGREE_FAHRENHEIT = (UChar)0x2109;
const UChar TextBoundaryData::PUNCTUATION_IDEOGRAPHIC_COMMA = (UChar)0x3001;
const UChar TextBoundaryData::PUNCTUATION_IDEOGRAPHIC_FULL_STOP = (UChar)0x3002;
const UChar TextBoundaryData::IDEOGRAPHIC_ITERATION_MARK = (UChar)0x3005;
const UChar TextBoundaryData::HIRAGANA_LETTER_SMALL_A = (UChar)0x3041;
const UChar TextBoundaryData::HIRAGANA_LETTER_A = (UChar)0x3042;
const UChar TextBoundaryData::HIRAGANA_LETTER_SMALL_I = (UChar)0x3043;
const UChar TextBoundaryData::HIRAGANA_LETTER_I = (UChar)0x3044;
const UChar TextBoundaryData::HIRAGANA_LETTER_SMALL_U = (UChar)0x3045;
const UChar TextBoundaryData::HIRAGANA_LETTER_U = (UChar)0x3046;
const UChar TextBoundaryData::HIRAGANA_LETTER_SMALL_E = (UChar)0x3047;
const UChar TextBoundaryData::HIRAGANA_LETTER_E = (UChar)0x3048;
const UChar TextBoundaryData::HIRAGANA_LETTER_SMALL_O = (UChar)0x3049;
const UChar TextBoundaryData::HIRAGANA_LETTER_O = (UChar)0x304A;
const UChar TextBoundaryData::HIRAGANA_LETTER_DI = (UChar)0x3062;
const UChar TextBoundaryData::HIRAGANA_LETTER_SMALL_TU = (UChar)0x3063;
const UChar TextBoundaryData::HIRAGANA_LETTER_TU = (UChar)0x3064;
const UChar TextBoundaryData::HIRAGANA_LETTER_MO = (UChar)0x3082;
const UChar TextBoundaryData::HIRAGANA_LETTER_SMALL_YA = (UChar)0x3083;
const UChar TextBoundaryData::HIRAGANA_LETTER_YA = (UChar)0x3084;
const UChar TextBoundaryData::HIRAGANA_LETTER_SMALL_YU = (UChar)0x3085;
const UChar TextBoundaryData::HIRAGANA_LETTER_YU = (UChar)0x3086;
const UChar TextBoundaryData::HIRAGANA_LETTER_SMALL_YO = (UChar)0x3087;
const UChar TextBoundaryData::HIRAGANA_LETTER_YO = (UChar)0x3088;
const UChar TextBoundaryData::HIRAGANA_LETTER_RO = (UChar)0x308D;
const UChar TextBoundaryData::HIRAGANA_LETTER_SMALL_WA = (UChar)0x308E;
const UChar TextBoundaryData::HIRAGANA_LETTER_WA = (UChar)0x308F;
const UChar TextBoundaryData::HIRAGANA_LETTER_VU = (UChar)0x3094;
const UChar TextBoundaryData::COMBINING_KATAKANA_HIRAGANA_VOICED_SOUND_MARK = (UChar)0x3099;
const UChar TextBoundaryData::HIRAGANA_SEMIVOICED_SOUND_MARK = (UChar)0x309C;
const UChar TextBoundaryData::HIRAGANA_ITERATION_MARK = (UChar)0x309D;
const UChar TextBoundaryData::HIRAGANA_VOICED_ITERATION_MARK = (UChar)0x309E;
const UChar TextBoundaryData::KATAKANA_LETTER_SMALL_A = (UChar)0x30A1;
const UChar TextBoundaryData::KATAKANA_LETTER_A = (UChar)0x30A2;
const UChar TextBoundaryData::KATAKANA_LETTER_SMALL_I = (UChar)0x30A3;
const UChar TextBoundaryData::KATAKANA_LETTER_I = (UChar)0x30A4;
const UChar TextBoundaryData::KATAKANA_LETTER_SMALL_U = (UChar)0x30A5;
const UChar TextBoundaryData::KATAKANA_LETTER_U = (UChar)0x30A6;
const UChar TextBoundaryData::KATAKANA_LETTER_SMALL_E = (UChar)0x30A7;
const UChar TextBoundaryData::KATAKANA_LETTER_E = (UChar)0x30A8;
const UChar TextBoundaryData::KATAKANA_LETTER_SMALL_O = (UChar)0x30A9;
const UChar TextBoundaryData::KATAKANA_LETTER_O = (UChar)0x30AA;
const UChar TextBoundaryData::KATAKANA_LETTER_DI = (UChar)0x30C2;
const UChar TextBoundaryData::KATAKANA_LETTER_SMALL_TU = (UChar)0x30C3;
const UChar TextBoundaryData::KATAKANA_LETTER_TU = (UChar)0x30C4;
const UChar TextBoundaryData::KATAKANA_LETTER_MO = (UChar)0x30E2;
const UChar TextBoundaryData::KATAKANA_LETTER_SMALL_YA = (UChar)0x30E3;
const UChar TextBoundaryData::KATAKANA_LETTER_YA = (UChar)0x30E4;
const UChar TextBoundaryData::KATAKANA_LETTER_SMALL_YU = (UChar)0x30E5;
const UChar TextBoundaryData::KATAKANA_LETTER_YU = (UChar)0x30E6;
const UChar TextBoundaryData::KATAKANA_LETTER_SMALL_YO = (UChar)0x30E7;
const UChar TextBoundaryData::KATAKANA_LETTER_YO = (UChar)0x30E8;
const UChar TextBoundaryData::KATAKANA_LETTER_RO = (UChar)0x30ED;
const UChar TextBoundaryData::KATAKANA_LETTER_SMALL_WA = (UChar)0x30EE;
const UChar TextBoundaryData::KATAKANA_LETTER_WA = (UChar)0x30EF;
const UChar TextBoundaryData::KATAKANA_LETTER_VU = (UChar)0x30F4;
const UChar TextBoundaryData::KATAKANA_LETTER_SMALL_KA = (UChar)0x30F5;
const UChar TextBoundaryData::KATAKANA_LETTER_SMALL_KE = (UChar)0x30F6;
const UChar TextBoundaryData::KATAKANA_LETTER_VA = (UChar)0x30F7;
const UChar TextBoundaryData::KATAKANA_LETTER_VO = (UChar)0x30FA;
const UChar TextBoundaryData::KATAKANA_HIRAGANA_PROLONGED_SOUND_MARK = (UChar)0x30FC;
const UChar TextBoundaryData::KATAKANA_ITERATION_MARK = (UChar)0x30FD;
const UChar TextBoundaryData::KATAKANA_VOICED_ITERATION_MARK = (UChar)0x30FE;
const UChar TextBoundaryData::UNICODE_LOW_BOUND_HAN = (UChar)0x4E00;
const UChar TextBoundaryData::UNICODE_HIGH_BOUND_HAN = (UChar)0x9FA5;
const UChar TextBoundaryData::HANGUL_SYL_LOW = (UChar)0xAC00;
const UChar TextBoundaryData::HANGUL_SYL_HIGH = (UChar)0xD7A3;
const UChar TextBoundaryData::CJK_COMPATIBILITY_F900 = (UChar)0xF900;
const UChar TextBoundaryData::CJK_COMPATIBILITY_FA2D = (UChar)0xFA2D;
const UChar TextBoundaryData::UNICODE_ZERO_WIDTH_NON_BREAKING_SPACE = (UChar)0xFEFF;
const UChar TextBoundaryData::FULLWIDTH_EXCLAMATION_MARK = (UChar)0xFF01;
const UChar TextBoundaryData::FULLWIDTH_FULL_STOP = (UChar)0xFF0E;
const UChar TextBoundaryData::FULLWIDTH_QUESTION_MARK = (UChar)0xFF1F;
// SimpleTextBoundary has an internal convention that the not-a-Unicode value
// $FFFF is used to signify the end of the string when looking a proper state
// transition for the end of the string
const UChar TextBoundaryData::END_OF_STRING = (UChar)0xFFFF;
//eof