ICU-2015 version 3.2, needed by genpname

X-SVN-Rev: 10109
2002-10-30 18:40:28 +00:00 · 2002-10-30 18:40:28 +00:00 · e1ce0aca2b
commit e1ce0aca2b
parent 3f2b839107
2 changed files with 435 additions and 0 deletions
--- a/icu4c/source/data/unidata/PropertyAliases.txt
+++ b/icu4c/source/data/unidata/PropertyAliases.txt
@ -0,0 +1,132 @@
+# PropertyAliases-3.2.0.txt
+# Date: 2002-03-19,23:31:21 GMT [MD]
+#
+# This file contains aliases for properties used in the UCD.
+# These names can be used for XML formats of UCD data, for regular-expression
+# property tests, and other programmatic textual descriptions of Unicode data.
+# The names are not normative, except where they correspond to normative
+# properties in the UCD. For information on which properties are normative,
+# see UnicodeCharacterDatabase.html.
+#
+# The names may be translated in appropriate environments, and additional
+# aliases may be useful.
+#
+# FORMAT
+#
+# Each line has two fields, separated by semicolons.
+#
+# First Field: The first field is an abbreviated name for the property
+#
+# Second Field: The second field is a long name
+#
+# With loose matching of property names, the case distinctions, whitespace,
+# and '_' are ignored.
+#
+# NOTE: Currently there is at most one abbreviated name and one long name for
+# each property. However, in the future additional aliases
+# may be added. In such a case, the first line for the property
+# would have the preferred alias for output.
+#
+# NOTE: The property value names are NOT unique across properties, especially
+# with loose matches. For example,
+#
+# AL means Arabic Letter for the Bidi_Class property, and
+# AL means Alpha_Left for the Combining_Class property, and
+# AL means Alphabetic for the Line_Break property.
+#
+# In addition, some property names may be the same as some property value names.
+#
+# The combination of property value and property name is, however, unique.
+# For more information, see UTR #24: Regular Expression Guidelines
+# ================================================
+
+
+# ================================================
+# Non-enumerated Properties
+# ================================================
+age       ; Age
+blk       ; Block
+bmg       ; Bidi_Mirroring_Glyph
+cf        ; Case_Folding
+dm        ; Decomposition_Mapping
+FC_NFKC   ; FC_NFKC_Closure
+isc       ; ISO_Comment
+lc        ; Lowercase_Mapping
+na        ; Name
+na1       ; Unicode_1_Name
+nv        ; Numeric_Value
+scc       ; Special_Case_Condition
+sfc       ; Simple_Case_Folding
+slc       ; Simple_Lowercase_Mapping
+stc       ; Simple_Titlecase_Mapping
+suc       ; Simple_Uppercase_Mapping
+tc        ; Titlecase_Mapping
+uc        ; Uppercase_Mapping
+
+# ================================================
+# Enumerated Non-Binary Properties
+# ================================================
+bc        ; Bidi_Class
+ccc       ; Canonical_Combining_Class
+dt        ; Decomposition_Type
+ea        ; East_Asian_Width
+gc        ; General_Category
+jg        ; Joining_Group
+jt        ; Joining_Type
+lb        ; Line_Break
+NFC_QC    ; NFC_Quick_Check
+NFKC_QC   ; NFKC_Quick_Check
+nt        ; Numeric_Type
+sc        ; Script
+
+# ================================================
+# Binary Properties
+# ================================================
+AHex      ; ASCII_Hex_Digit
+Alpha     ; Alphabetic
+Bidi_C    ; Bidi_Control
+Bidi_M    ; Bidi_Mirrored
+CE        ; Composition_Exclusion
+Comp_Ex   ; Full_Composition_Exclusion
+Dash      ; Dash
+Dep       ; Deprecated
+DI        ; Default_Ignorable_Code_Point
+Dia       ; Diacritic
+Ext       ; Extender
+Gr_Base   ; Grapheme_Base
+Gr_Ext    ; Grapheme_Extend
+Gr_Link   ; Grapheme_Link
+Hex       ; Hex_Digit
+Hyphen    ; Hyphen
+IDC       ; ID_Continue
+Ideo      ; Ideographic
+IDS       ; ID_Start
+IDSB      ; IDS_Binary_Operator
+IDST      ; IDS_Trinary_Operator
+Join_C    ; Join_Control
+LOE       ; Logical_Order_Exception
+Lower     ; Lowercase
+Math      ; Math
+NBrk      ; Non_Break
+NChar     ; Noncharacter_Code_Point
+NFD_QC    ; NFD_Quick_Check
+NFKD_QC   ; NFKD_Quick_Check
+OAlpha    ; Other_Alphabetic
+ODI       ; Other_Default_Ignorable_Code_Point
+OGr_Ext   ; Other_Grapheme_Extend
+OLower    ; Other_Lowercase
+OMath     ; Other_Math
+OUpper    ; Other_Uppercase
+QMark     ; Quotation_Mark
+Radical   ; Radical
+SD        ; Soft_Dotted
+Term      ; Terminal_Punctuation
+UIdeo     ; Unified_Ideograph
+Upper     ; Uppercase
+WSpace    ; White_Space
+XIDC      ; XID_Continue
+XIDS      ; XID_Start
+XO_NFC    ; Expands_On_NFC
+XO_NFD    ; Expands_On_NFD
+XO_NFKC   ; Expands_On_NFKC
+XO_NFKD   ; Expands_On_NFKD
--- a/icu4c/source/data/unidata/PropertyValueAliases.txt
+++ b/icu4c/source/data/unidata/PropertyValueAliases.txt
@ -0,0 +1,303 @@
+# PropertyValueAliases-3.2.0.txt
+# Date: 2002-03-19,23:31:21 GMT [MD]
+#
+# This file contains aliases for property values used in the UCD.
+# These names can be used for XML formats of UCD data, for regular-expression
+# property tests, and other programmatic textual descriptions of Unicode data.
+# The names are not normative, except where they correspond to normative property
+# values in the UCD. For information on which properties are normative, see
+# UnicodeCharacterDatabase.html.
+#
+# The names may be translated in appropriate environments, and additional
+# aliases may be useful.
+#
+# FORMAT
+#
+# Each line describes a property value name.
+# This consists of three fields, separated by semicolons.
+#
+# First Field: The first field describes the property for which that
+# property value name is used.
+# There is one special pseudo-property: "qc" stands for any quick-check property
+#
+# Second Field: The second field is an abbreviated name.
+# If there is no abbreviated name available, the field is marked with "n/a".
+#
+# Third Field: The third field is a long name.
+#
+# In the case of ccc, their are 4 fields. The second field is numeric, third
+# is abbreviated, and fourth is long.
+#
+# With loose matching of property names, the case distinctions, whitespace,
+# and '_' are ignored.
+#
+# NOTE: The Block property values are in Blocks.txt, and not repeated here.
+# For more information on the use of blocks, see UTR #24: Regular Expression Guidelines
+#
+# NOTE: Currently there is at most one abbreviated name and one long name for
+# property value. However, in the future additional aliases
+# may be added. In such a case, the first line for the property value
+# would have the preferred alias for output.
+#
+# NOTE: The property value names are NOT unique across properties, especially
+# with loose matches. For example,
+# AL means Arabic Letter for the Bidi_Class property, and
+# AL means Alpha_Left for the Combining_Class property, and
+# AL means Alphabetic for the Line_Break property.
+#
+# In addition, some property names may be the same as some property value names:
+# cc means Combining_Class property, and
+# cc means the General_Category property value Control (cc)
+#
+# The combination of property value and property name is, however, unique.
+# For more information, see UTR #24: Regular Expression Guidelines
+# ================================================
+
+
+bc ; AL        ; Arabic_Letter
+bc ; AN        ; Arabic_Number
+bc ; B         ; Paragraph_Separator
+bc ; BN        ; Boundary_Neutral
+bc ; CS        ; Common_Separator
+bc ; EN        ; European_Number
+bc ; ES        ; European_Separator
+bc ; ET        ; European_Terminator
+bc ; L         ; Left_To_Right
+bc ; LRE       ; Left_To_Right_Embedding
+bc ; LRO       ; Left_To_Right_Override
+bc ; NSM       ; Nonspacing_Mark
+bc ; ON        ; Other_Neutral
+bc ; PDF       ; Pop_Directional_Format
+bc ; R         ; Right_To_Left
+bc ; RLE       ; Right_To_Left_Embedding
+bc ; RLO       ; Right_To_Left_Override
+bc ; S         ; Segment_Separator
+bc ; WS        ; White_Space
+
+ccc;   0; NR   ; Not_Reordered
+ccc;   1; OV   ; Overlay
+ccc; 202; ATBL ; Attached_Below_Left
+ccc; 216; ATAR ; Attached_Above_Right
+ccc; 218; BL   ; Below_Left
+ccc; 220; B    ; Below
+ccc; 222; BR   ; Below_Right
+ccc; 224; L    ; Left
+ccc; 226; R    ; Right
+ccc; 228; AL   ; Above_Left
+ccc; 230; A    ; Above
+ccc; 232; AR   ; Above_Right
+ccc; 233; DB   ; Double_Below
+ccc; 234; DA   ; Double_Above
+ccc; 240; IS   ; Iota_Subscript
+ccc;   7; NK   ; Nukta
+ccc;   8; KV   ; Kana_Voicing
+ccc;   9; VR   ; Virama
+
+dt ; can       ; canonical
+dt ; com       ; compat
+dt ; enc       ; circle
+dt ; fin       ; final
+dt ; font      ; font
+dt ; fra       ; fraction
+dt ; init      ; initial
+dt ; iso       ; isolated
+dt ; med       ; medial
+dt ; n/a       ; none
+dt ; nar       ; narrow
+dt ; nb        ; noBreak
+dt ; sml       ; small
+dt ; sqr       ; square
+dt ; sub       ; sub
+dt ; sup       ; super
+dt ; vert      ; vertical
+dt ; wide      ; wide
+
+ea ; A         ; Ambiguous
+ea ; F         ; Fullwidth
+ea ; H         ; Halfwidth
+ea ; N         ; Neutral
+ea ; Na        ; Narrow
+ea ; W         ; Wide
+
+gc ; C         ; Other                            # Cc | Cf | Cn | Co | Cs
+gc ; Cc        ; Control
+gc ; Cf        ; Format
+gc ; Cn        ; Unassigned
+gc ; Co        ; Private_Use
+gc ; Cs        ; Surrogate
+gc ; L         ; Letter                           # Ll | Lm | Lo | Lt | Lu
+gc ; LC        ; Cased_Letter                     # Ll | Lt | Lu
+gc ; Ll        ; Lowercase_Letter
+gc ; Lm        ; Modifier_Letter
+gc ; Lo        ; Other_Letter
+gc ; Lt        ; Titlecase_Letter
+gc ; Lu        ; Uppercase_Letter
+gc ; M         ; Mark                             # Mc | Me | Mn
+gc ; Mc        ; Spacing_Mark
+gc ; Me        ; Enclosing_Mark
+gc ; Mn        ; Nonspacing_Mark
+gc ; N         ; Number                           # Nd | Nl | No
+gc ; Nd        ; Decimal_Number
+gc ; Nl        ; Letter_Number
+gc ; No        ; Other_Number
+gc ; P         ; Punctuation                      # Pc | Pd | Pe | Pf | Pi | Po | Ps
+gc ; Pc        ; Connector_Punctuation
+gc ; Pd        ; Dash_Punctuation
+gc ; Pe        ; Close_Punctuation
+gc ; Pf        ; Final_Punctuation
+gc ; Pi        ; Initial_Punctuation
+gc ; Po        ; Other_Punctuation
+gc ; Ps        ; Open_Punctuation
+gc ; S         ; Symbol                           # Sc | Sk | Sm | So
+gc ; Sc        ; Currency_Symbol
+gc ; Sk        ; Modifier_Symbol
+gc ; Sm        ; Math_Symbol
+gc ; So        ; Other_Symbol
+gc ; Z         ; Separator                        # Zl | Zp | Zs
+gc ; Zl        ; Line_Separator
+gc ; Zp        ; Paragraph_Separator
+gc ; Zs        ; Space_Separator
+
+jg ; n/a       ; AIN
+jg ; n/a       ; ALAPH
+jg ; n/a       ; ALEF
+jg ; n/a       ; BEH
+jg ; n/a       ; BETH
+jg ; n/a       ; DAL
+jg ; n/a       ; DALATH_RISH
+jg ; n/a       ; E
+jg ; n/a       ; FEH
+jg ; n/a       ; FINAL_SEMKATH
+jg ; n/a       ; GAF
+jg ; n/a       ; GAMAL
+jg ; n/a       ; HAH
+jg ; n/a       ; HAMZA_ON_HEH_GOAL
+jg ; n/a       ; HE
+jg ; n/a       ; HEH
+jg ; n/a       ; HEH_GOAL
+jg ; n/a       ; HETH
+jg ; n/a       ; KAF
+jg ; n/a       ; KAPH
+jg ; n/a       ; KNOTTED_HEH
+jg ; n/a       ; LAM
+jg ; n/a       ; LAMADH
+jg ; n/a       ; MEEM
+jg ; n/a       ; MIM
+jg ; n/a       ; NO_JOINING_GROUP
+jg ; n/a       ; NOON
+jg ; n/a       ; NUN
+jg ; n/a       ; PE
+jg ; n/a       ; QAF
+jg ; n/a       ; QAPH
+jg ; n/a       ; REH
+jg ; n/a       ; REVERSED_PE
+jg ; n/a       ; SAD
+jg ; n/a       ; SADHE
+jg ; n/a       ; SEEN
+jg ; n/a       ; SEMKATH
+jg ; n/a       ; SHIN
+jg ; n/a       ; SWASH_KAF
+jg ; n/a       ; SYRIAC_WAW
+jg ; n/a       ; TAH
+jg ; n/a       ; TAW
+jg ; n/a       ; TEH_MARBUTA
+jg ; n/a       ; TETH
+jg ; n/a       ; WAW
+jg ; n/a       ; YEH
+jg ; n/a       ; YEH_BARREE
+jg ; n/a       ; YEH_WITH_TAIL
+jg ; n/a       ; YUDH
+jg ; n/a       ; YUDH_HE
+jg ; n/a       ; ZAIN
+
+jt ; C         ; Join_Causing
+jt ; D         ; Dual_Joining
+jt ; L         ; Left_Joining
+jt ; R         ; Right_Joining
+jt ; T         ; Transparent
+jt ; U         ; Non_Joining
+
+lb ; AI        ; Ambiguous
+lb ; AL        ; Alphabetic
+lb ; B2        ; Break_Both
+lb ; BA        ; Break_After
+lb ; BB        ; Break_Before
+lb ; BK        ; Mandatory_Break
+lb ; CB        ; Contingent_Break
+lb ; CL        ; Close_Punctuation
+lb ; CM        ; Combining_Mark
+lb ; CR        ; Carriage_Return
+lb ; EX        ; Exclamation
+lb ; GL        ; Glue
+lb ; HY        ; Hyphen
+lb ; ID        ; Ideographic
+lb ; IN        ; Inseperable
+lb ; IS        ; Infix_Numeric
+lb ; LF        ; Line_Feed
+lb ; NS        ; Nonstarter
+lb ; NU        ; Numeric
+lb ; OP        ; Open_Punctuation
+lb ; PO        ; Postfix_Numeric
+lb ; PR        ; Prefix_Numeric
+lb ; QU        ; Quotation
+lb ; SA        ; Complex_Context
+lb ; SG        ; Surrogate
+lb ; SP        ; Space
+lb ; SY        ; Break_Symbols
+lb ; XX        ; Unknown
+lb ; ZW        ; ZWSpace
+
+nt ; de        ; Decimal
+nt ; di        ; Digit
+nt ; n/a       ; None
+nt ; nu        ; Numeric
+
+qc ; M         ; Maybe
+qc ; N         ; No
+qc ; Y         ; Yes
+
+sc ; Arab      ; Arabic
+sc ; Armn      ; Armenian
+sc ; Beng      ; Bengali
+sc ; Bopo      ; Bopomofo
+sc ; Buhd      ; Buhid
+sc ; Cans      ; Canadian_Aboriginal
+sc ; Cher      ; Cherokee
+sc ; Cyrl      ; Cyrillic
+sc ; Deva      ; Devanagari
+sc ; Dsrt      ; Deseret
+sc ; Ethi      ; Ethiopic
+sc ; Geor      ; Georgian
+sc ; Goth      ; Gothic
+sc ; Grek      ; Greek
+sc ; Gujr      ; Gujarati
+sc ; Guru      ; Gurmukhi
+sc ; Hang      ; Hangul
+sc ; Hani      ; Han
+sc ; Hano      ; Hanunoo
+sc ; Hebr      ; Hebrew
+sc ; Hira      ; Hiragana
+sc ; Ital      ; Old_Italic
+sc ; Kana      ; Katakana
+sc ; Khmr      ; Khmer
+sc ; Knda      ; Kannada
+sc ; Laoo      ; Lao
+sc ; Latn      ; Latin
+sc ; Mlym      ; Malayalam
+sc ; Mong      ; Mongolian
+sc ; Mymr      ; Myanmar
+sc ; Ogam      ; Ogham
+sc ; Orya      ; Oriya
+sc ; Qaai      ; Inherited
+sc ; Runr      ; Runic
+sc ; Sinh      ; Sinhala
+sc ; Syrc      ; Syriac
+sc ; Tagb      ; Tagbanwa
+sc ; Taml      ; Tamil
+sc ; Telu      ; Telugu
+sc ; Tglg      ; Tagalog
+sc ; Thaa      ; Thaana
+sc ; Thai      ; Thai
+sc ; Tibt      ; Tibetan
+sc ; Yiii      ; Yi
+sc ; Zyyy      ; Common