scuffed-code/icu4c/source/data/unidata/PropertyValueAliases.txt
2002-10-30 18:40:28 +00:00

304 lines
9.0 KiB
Plaintext

# PropertyValueAliases-3.2.0.txt
# Date: 2002-03-19,23:31:21 GMT [MD]
#
# This file contains aliases for property values used in the UCD.
# These names can be used for XML formats of UCD data, for regular-expression
# property tests, and other programmatic textual descriptions of Unicode data.
# The names are not normative, except where they correspond to normative property
# values in the UCD. For information on which properties are normative, see
# UnicodeCharacterDatabase.html.
#
# The names may be translated in appropriate environments, and additional
# aliases may be useful.
#
# FORMAT
#
# Each line describes a property value name.
# This consists of three fields, separated by semicolons.
#
# First Field: The first field describes the property for which that
# property value name is used.
# There is one special pseudo-property: "qc" stands for any quick-check property
#
# Second Field: The second field is an abbreviated name.
# If there is no abbreviated name available, the field is marked with "n/a".
#
# Third Field: The third field is a long name.
#
# In the case of ccc, their are 4 fields. The second field is numeric, third
# is abbreviated, and fourth is long.
#
# With loose matching of property names, the case distinctions, whitespace,
# and '_' are ignored.
#
# NOTE: The Block property values are in Blocks.txt, and not repeated here.
# For more information on the use of blocks, see UTR #24: Regular Expression Guidelines
#
# NOTE: Currently there is at most one abbreviated name and one long name for
# property value. However, in the future additional aliases
# may be added. In such a case, the first line for the property value
# would have the preferred alias for output.
#
# NOTE: The property value names are NOT unique across properties, especially
# with loose matches. For example,
# AL means Arabic Letter for the Bidi_Class property, and
# AL means Alpha_Left for the Combining_Class property, and
# AL means Alphabetic for the Line_Break property.
#
# In addition, some property names may be the same as some property value names:
# cc means Combining_Class property, and
# cc means the General_Category property value Control (cc)
#
# The combination of property value and property name is, however, unique.
# For more information, see UTR #24: Regular Expression Guidelines
# ================================================
bc ; AL ; Arabic_Letter
bc ; AN ; Arabic_Number
bc ; B ; Paragraph_Separator
bc ; BN ; Boundary_Neutral
bc ; CS ; Common_Separator
bc ; EN ; European_Number
bc ; ES ; European_Separator
bc ; ET ; European_Terminator
bc ; L ; Left_To_Right
bc ; LRE ; Left_To_Right_Embedding
bc ; LRO ; Left_To_Right_Override
bc ; NSM ; Nonspacing_Mark
bc ; ON ; Other_Neutral
bc ; PDF ; Pop_Directional_Format
bc ; R ; Right_To_Left
bc ; RLE ; Right_To_Left_Embedding
bc ; RLO ; Right_To_Left_Override
bc ; S ; Segment_Separator
bc ; WS ; White_Space
ccc; 0; NR ; Not_Reordered
ccc; 1; OV ; Overlay
ccc; 202; ATBL ; Attached_Below_Left
ccc; 216; ATAR ; Attached_Above_Right
ccc; 218; BL ; Below_Left
ccc; 220; B ; Below
ccc; 222; BR ; Below_Right
ccc; 224; L ; Left
ccc; 226; R ; Right
ccc; 228; AL ; Above_Left
ccc; 230; A ; Above
ccc; 232; AR ; Above_Right
ccc; 233; DB ; Double_Below
ccc; 234; DA ; Double_Above
ccc; 240; IS ; Iota_Subscript
ccc; 7; NK ; Nukta
ccc; 8; KV ; Kana_Voicing
ccc; 9; VR ; Virama
dt ; can ; canonical
dt ; com ; compat
dt ; enc ; circle
dt ; fin ; final
dt ; font ; font
dt ; fra ; fraction
dt ; init ; initial
dt ; iso ; isolated
dt ; med ; medial
dt ; n/a ; none
dt ; nar ; narrow
dt ; nb ; noBreak
dt ; sml ; small
dt ; sqr ; square
dt ; sub ; sub
dt ; sup ; super
dt ; vert ; vertical
dt ; wide ; wide
ea ; A ; Ambiguous
ea ; F ; Fullwidth
ea ; H ; Halfwidth
ea ; N ; Neutral
ea ; Na ; Narrow
ea ; W ; Wide
gc ; C ; Other # Cc | Cf | Cn | Co | Cs
gc ; Cc ; Control
gc ; Cf ; Format
gc ; Cn ; Unassigned
gc ; Co ; Private_Use
gc ; Cs ; Surrogate
gc ; L ; Letter # Ll | Lm | Lo | Lt | Lu
gc ; LC ; Cased_Letter # Ll | Lt | Lu
gc ; Ll ; Lowercase_Letter
gc ; Lm ; Modifier_Letter
gc ; Lo ; Other_Letter
gc ; Lt ; Titlecase_Letter
gc ; Lu ; Uppercase_Letter
gc ; M ; Mark # Mc | Me | Mn
gc ; Mc ; Spacing_Mark
gc ; Me ; Enclosing_Mark
gc ; Mn ; Nonspacing_Mark
gc ; N ; Number # Nd | Nl | No
gc ; Nd ; Decimal_Number
gc ; Nl ; Letter_Number
gc ; No ; Other_Number
gc ; P ; Punctuation # Pc | Pd | Pe | Pf | Pi | Po | Ps
gc ; Pc ; Connector_Punctuation
gc ; Pd ; Dash_Punctuation
gc ; Pe ; Close_Punctuation
gc ; Pf ; Final_Punctuation
gc ; Pi ; Initial_Punctuation
gc ; Po ; Other_Punctuation
gc ; Ps ; Open_Punctuation
gc ; S ; Symbol # Sc | Sk | Sm | So
gc ; Sc ; Currency_Symbol
gc ; Sk ; Modifier_Symbol
gc ; Sm ; Math_Symbol
gc ; So ; Other_Symbol
gc ; Z ; Separator # Zl | Zp | Zs
gc ; Zl ; Line_Separator
gc ; Zp ; Paragraph_Separator
gc ; Zs ; Space_Separator
jg ; n/a ; AIN
jg ; n/a ; ALAPH
jg ; n/a ; ALEF
jg ; n/a ; BEH
jg ; n/a ; BETH
jg ; n/a ; DAL
jg ; n/a ; DALATH_RISH
jg ; n/a ; E
jg ; n/a ; FEH
jg ; n/a ; FINAL_SEMKATH
jg ; n/a ; GAF
jg ; n/a ; GAMAL
jg ; n/a ; HAH
jg ; n/a ; HAMZA_ON_HEH_GOAL
jg ; n/a ; HE
jg ; n/a ; HEH
jg ; n/a ; HEH_GOAL
jg ; n/a ; HETH
jg ; n/a ; KAF
jg ; n/a ; KAPH
jg ; n/a ; KNOTTED_HEH
jg ; n/a ; LAM
jg ; n/a ; LAMADH
jg ; n/a ; MEEM
jg ; n/a ; MIM
jg ; n/a ; NO_JOINING_GROUP
jg ; n/a ; NOON
jg ; n/a ; NUN
jg ; n/a ; PE
jg ; n/a ; QAF
jg ; n/a ; QAPH
jg ; n/a ; REH
jg ; n/a ; REVERSED_PE
jg ; n/a ; SAD
jg ; n/a ; SADHE
jg ; n/a ; SEEN
jg ; n/a ; SEMKATH
jg ; n/a ; SHIN
jg ; n/a ; SWASH_KAF
jg ; n/a ; SYRIAC_WAW
jg ; n/a ; TAH
jg ; n/a ; TAW
jg ; n/a ; TEH_MARBUTA
jg ; n/a ; TETH
jg ; n/a ; WAW
jg ; n/a ; YEH
jg ; n/a ; YEH_BARREE
jg ; n/a ; YEH_WITH_TAIL
jg ; n/a ; YUDH
jg ; n/a ; YUDH_HE
jg ; n/a ; ZAIN
jt ; C ; Join_Causing
jt ; D ; Dual_Joining
jt ; L ; Left_Joining
jt ; R ; Right_Joining
jt ; T ; Transparent
jt ; U ; Non_Joining
lb ; AI ; Ambiguous
lb ; AL ; Alphabetic
lb ; B2 ; Break_Both
lb ; BA ; Break_After
lb ; BB ; Break_Before
lb ; BK ; Mandatory_Break
lb ; CB ; Contingent_Break
lb ; CL ; Close_Punctuation
lb ; CM ; Combining_Mark
lb ; CR ; Carriage_Return
lb ; EX ; Exclamation
lb ; GL ; Glue
lb ; HY ; Hyphen
lb ; ID ; Ideographic
lb ; IN ; Inseperable
lb ; IS ; Infix_Numeric
lb ; LF ; Line_Feed
lb ; NS ; Nonstarter
lb ; NU ; Numeric
lb ; OP ; Open_Punctuation
lb ; PO ; Postfix_Numeric
lb ; PR ; Prefix_Numeric
lb ; QU ; Quotation
lb ; SA ; Complex_Context
lb ; SG ; Surrogate
lb ; SP ; Space
lb ; SY ; Break_Symbols
lb ; XX ; Unknown
lb ; ZW ; ZWSpace
nt ; de ; Decimal
nt ; di ; Digit
nt ; n/a ; None
nt ; nu ; Numeric
qc ; M ; Maybe
qc ; N ; No
qc ; Y ; Yes
sc ; Arab ; Arabic
sc ; Armn ; Armenian
sc ; Beng ; Bengali
sc ; Bopo ; Bopomofo
sc ; Buhd ; Buhid
sc ; Cans ; Canadian_Aboriginal
sc ; Cher ; Cherokee
sc ; Cyrl ; Cyrillic
sc ; Deva ; Devanagari
sc ; Dsrt ; Deseret
sc ; Ethi ; Ethiopic
sc ; Geor ; Georgian
sc ; Goth ; Gothic
sc ; Grek ; Greek
sc ; Gujr ; Gujarati
sc ; Guru ; Gurmukhi
sc ; Hang ; Hangul
sc ; Hani ; Han
sc ; Hano ; Hanunoo
sc ; Hebr ; Hebrew
sc ; Hira ; Hiragana
sc ; Ital ; Old_Italic
sc ; Kana ; Katakana
sc ; Khmr ; Khmer
sc ; Knda ; Kannada
sc ; Laoo ; Lao
sc ; Latn ; Latin
sc ; Mlym ; Malayalam
sc ; Mong ; Mongolian
sc ; Mymr ; Myanmar
sc ; Ogam ; Ogham
sc ; Orya ; Oriya
sc ; Qaai ; Inherited
sc ; Runr ; Runic
sc ; Sinh ; Sinhala
sc ; Syrc ; Syriac
sc ; Tagb ; Tagbanwa
sc ; Taml ; Tamil
sc ; Telu ; Telugu
sc ; Tglg ; Tagalog
sc ; Thaa ; Thaana
sc ; Thai ; Thai
sc ; Tibt ; Tibetan
sc ; Yiii ; Yi
sc ; Zyyy ; Common