68 lines
2.7 KiB
Plaintext
68 lines
2.7 KiB
Plaintext
|
# ================================================================================
|
||
|
# Conditional mappings
|
||
|
# ================================================================================
|
||
|
|
||
|
# Special case for final form of sigma
|
||
|
|
||
|
03A3; 03C2; 03A3; 03A3; FINAL_SIGMA; # GREEK CAPITAL LETTER SIGMA
|
||
|
|
||
|
# Note: the following cases for non-final are already in the UnicodeData file.
|
||
|
|
||
|
# 03A3; 03C3; 03A3; 03A3; # GREEK CAPITAL LETTER SIGMA
|
||
|
# 03C3; 03C3; 03A3; 03A3; # GREEK SMALL LETTER SIGMA
|
||
|
# 03C2; 03C2; 03A3; 03A3; # GREEK SMALL LETTER FINAL SIGMA
|
||
|
|
||
|
# Note: the following cases are not included, since they would case-fold in lowercasing
|
||
|
|
||
|
# 03C3; 03C2; 03A3; 03A3; FINAL_SIGMA; # GREEK SMALL LETTER SIGMA
|
||
|
# 03C2; 03C3; 03A3; 03A3; NOT_FINAL_SIGMA; # GREEK SMALL LETTER FINAL SIGMA
|
||
|
|
||
|
# ================================================================================
|
||
|
# Locale-sensitive mappings
|
||
|
# ================================================================================
|
||
|
|
||
|
# Lithuanian
|
||
|
|
||
|
# Lithuanian retains the dot in a lowercase i when followed by accents.
|
||
|
|
||
|
# Remove DOT ABOVE after "i" with upper or titlecase
|
||
|
|
||
|
0307; 0307; ; ; lt AFTER_i # COMBINING DOT ABOVE
|
||
|
|
||
|
# Introduce an explicit dot above when lowercasing capital I's and J's
|
||
|
# whenever there are more accents above
|
||
|
# (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek)
|
||
|
|
||
|
0049; 0069 0307; 0049; 0049; lt MORE_ABOVE # LATIN CAPITAL LETTER I
|
||
|
004A; 006A 0307; 004A; 004A; lt MORE_ABOVE # LATIN CAPITAL LETTER J
|
||
|
012E; 012F 0307; 012E; 012E; lt MORE_ABOVE # LATIN CAPITAL LETTER I WITH OGONEK
|
||
|
00CC; 0069 0307 0300; 00CC; 00CC; lt # LATIN CAPITAL LETTER I WITH GRAVE
|
||
|
00CD; 0069 0307 0301; 00CD; 00CD; lt # LATIN CAPITAL LETTER I WITH ACUTE
|
||
|
0128; 0069 0307 0303; 0128; 0128; lt # LATIN CAPITAL LETTER I WITH TILDE
|
||
|
|
||
|
# ================================================================================
|
||
|
|
||
|
# Turkish and Azeri
|
||
|
|
||
|
# I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
|
||
|
# The following rules handle those cases.
|
||
|
|
||
|
# When lowercasing, remove dot_ above in the sequence I + dot_ above, which will turn into i.
|
||
|
# This matches the behavior of the canonically equivalent I-dot_above
|
||
|
|
||
|
0307; ; 0307; 0307; AFTER_I # COMBINING DOT ABOVE
|
||
|
# When lowercasing, unless an I is before a dot_above, it turns into a dotless i.
|
||
|
|
||
|
0049; 0131; 0049; 0049; tr NOT_BEFORE_DOT; # LATIN CAPITAL LETTER I
|
||
|
0049; 0131; 0049; 0049; az NOT_BEFORE_DOT; # LATIN CAPITAL LETTER I
|
||
|
|
||
|
# When uppercasing, i turns into a dotted capital I
|
||
|
|
||
|
0069; 0069; 0130; 0130; tr; # LATIN SMALL LETTER I
|
||
|
0069; 0069; 0130; 0130; az; # LATIN SMALL LETTER I
|
||
|
|
||
|
# Note: the following cases are already in the UnicodeData file.
|
||
|
|
||
|
# 0131; 0131; 0049; 0049; tr; # LATIN SMALL LETTER DOTLESS I
|
||
|
# 0130; 0069; 0130; 0130; tr; # LATIN CAPITAL LETTER I WITH DOT ABOVE
|