f82d62a85c
X-SVN-Rev: 39273
154 lines
6.6 KiB
Plaintext
154 lines
6.6 KiB
Plaintext
# © 2016 and later: Unicode, Inc. and others.
|
||
# License & terms of use: http://www.unicode.org/copyright.html#License
|
||
#
|
||
# File: ThaiLogical_Latin.txt
|
||
# Generated from CLDR
|
||
#
|
||
|
||
# Thai-Latin
|
||
# This set of rules follows ISO 11940
|
||
# see http://homepage.mac.com/sirbinks/pdf/Thai.r2.pdf
|
||
# except that that does not mention an implicit vowel, so we use o\u0323
|
||
#
|
||
# The transcription is fairly ugly, so we ought to also do the UNGEGN version
|
||
# see: http://www.eki.ee/wgrs/rom1_th.pdf
|
||
# and probably make that the main variant.
|
||
#
|
||
# Note: this is an internal file. The NFD/NFC is handled externally, in the index
|
||
# The insertion of spaces between words, the reversal of the vowels
|
||
# and the conversion of space to semicolon are done *outside* of these rules.
|
||
# So as far as these rules are concerned, the vowels are in logical order!
|
||
# insert implicit vowel (and remove it going the other way)
|
||
# COMMENTED out: the implicit vowel positions cannot be predicted algorithmically
|
||
#$consonant = [ก-ฮ];
|
||
#$vowel = [ะ-\u0E3Aเ-ไ\u0E47];
|
||
#{ ( $consonant ) } [^$vowel \uE000] → | $1 \uE000 ;
|
||
#\uE000 → o\u0323 ;
|
||
# ← o\u0323 ;
|
||
$notAbove = [^\p{ccc=0}\p{ccc=above}] ;
|
||
$notBelow = [^\p{ccc=0}\p{ccc=below}] ;
|
||
# Consonants
|
||
# Warning: the 'h's need to be handled carefully!
|
||
# What we really want to say is the following, but we can't
|
||
# $notHAccent = !($notAbove* \u0304 | $notBelow* \u0323) ;
|
||
# Since the only accents we care about that could cause problems are free-standing accents below, we use instead:
|
||
$freeStandingBelow = [\u0325 ];
|
||
$hAccent = [ \u0304 \u0323];
|
||
$notHAccent0 = [^$freeStandingBelow$hAccent];
|
||
$notHAccent1 = $freeStandingBelow [^$hAccent];
|
||
ห → h\u0304 ; # THAI CHARACTER HO HIP
|
||
ห | $1 ← h ($notAbove*) \u0304; # backward case, account for reordering
|
||
ฮ ↔ h\u0323 ; # THAI CHARACTER HO NOKHUK
|
||
ข ↔ k\u0304h ; # THAI CHARACTER KHO KHAI
|
||
ฃ ↔ k\u0323\u0304h ; # THAI CHARACTER KHO KHUAT
|
||
ฅ ↔ kʹh ; # THAI CHARACTER KHO KHON
|
||
ฆ ↔ k\u0323h ; # THAI CHARACTER KHO RAKHANG
|
||
ค ← kh } $notHAccent1 ; # THAI CHARACTER KHO KHWAI
|
||
ค ↔ kh } $notHAccent0 ; # THAI CHARACTER KHO KHWAI
|
||
ก ↔ k ; # THAI CHARACTER KO KAI
|
||
ภ ↔ p\u0323h ; # THAI CHARACTER PHO SAMPHAO
|
||
ผ ↔ p\u0304h ; # THAI CHARACTER PHO PHUNG
|
||
พ ← ph } $notHAccent1 ; # THAI CHARACTER PHO PHAN
|
||
พ ↔ ph } $notHAccent0 ; # THAI CHARACTER PHO PHAN
|
||
ป ↔ p ; # THAI CHARACTER PO PLA
|
||
ฉ ↔ c\u0304h ; # THAI CHARACTER CHO CHING
|
||
ฌ ↔ c\u0323h ; # THAI CHARACTER CHO CHOE
|
||
ช ← ch } $notHAccent1 ; # THAI CHARACTER CHO CHANG
|
||
ช ↔ ch } $notHAccent0 ; # THAI CHARACTER CHO CHANG
|
||
จ ↔ c ; # THAI CHARACTER CHO CHAN
|
||
ฐ ↔ t\u0323\u0304h ; # THAI CHARACTER THO THAN
|
||
ฑ ↔ t\u0331h ; # THAI CHARACTER THO NANGMONTHO
|
||
ฒ ↔ tʹh ; # THAI CHARACTER THO PHUTHAO
|
||
ถ ↔ t\u0304h ; # THAI CHARACTER THO THUNG
|
||
ธ ↔ t\u0323h ; # THAI CHARACTER THO THONG
|
||
ท ← th } $notHAccent1 ; # THAI CHARACTER THO THAHAN
|
||
ท ↔ th } $notHAccent0 ; # THAI CHARACTER THO THAHAN
|
||
#Note: TO PATAK deviates from ISO since t-dotunder + h would be ambigous. So it uses vertical tick.
|
||
ฏ ↔ t\u0329 ; # THAI CHARACTER TO PATAK
|
||
ต ↔ t ; # THAI CHARACTER TO TAO
|
||
# since there is no singleton g (generated), don't worry about that.
|
||
ง ↔ ng ; # THAI CHARACTER NGO NGU
|
||
ณ ↔ n\u0323 ; # THAI CHARACTER NO NEN
|
||
น ↔ n ; # THAI CHARACTER NO NU
|
||
ญ ↔ y\u0323 ; # THAI CHARACTER YO YING
|
||
ฎ ↔ d\u0323 ; # THAI CHARACTER DO CHADA
|
||
ด ↔ d ; # THAI CHARACTER DO DEK
|
||
บ ↔ b ; # THAI CHARACTER BO BAIMAI
|
||
ฝ ↔ f\u0304 ; # THAI CHARACTER FO FA
|
||
ฝ | $1 ← f ($notAbove*) \u0304; # backward case, account for reordering
|
||
ม ↔ m ; # THAI CHARACTER MO MA
|
||
ย ↔ y ; # THAI CHARACTER YO YAK
|
||
ร ↔ r ; # THAI CHARACTER RO RUA
|
||
ฤ ↔ v ; # THAI CHARACTER RU
|
||
ฦ ↔ ł ; # THAI CHARACTER LU
|
||
ว ↔ w ; # THAI CHARACTER WO WAEN
|
||
ศ ↔ s\u0323\u0304 ; # THAI CHARACTER SO SALA***
|
||
ศ | $1 ← s \u0323 ($notAbove*) \u0304; # backward case, account for reordering
|
||
ษ ↔ s\u0304ʹ ; # THAI CHARACTER SO RUSI
|
||
ส → s\u0304 ; # THAI CHARACTER SO SUA***
|
||
ส | $1 ← s ($notAbove*) \u0304; # backward case, account for reordering
|
||
ฬ ↔ l\u0323 ; # THAI CHARACTER LO CHULA
|
||
ล ↔ l ; # THAI CHARACTER LO LING
|
||
ฟ ↔ f ; # THAI CHARACTER FO FAN
|
||
อ ↔ x ; # THAI CHARACTER O ANG
|
||
ซ ↔ s ; # THAI CHARACTER SO SO
|
||
# vowels
|
||
\u0E31 ↔ a\u0323 ; # THAI CHARACTER MAI HAN-AKAT
|
||
า → a\u0304 ; # THAI CHARACTER SARA AA
|
||
า | $1 ← a ($notAbove*) \u0304; # backward case, account for reordering
|
||
# We deviate from ISO for SARA AM for disambiguation
|
||
ำ → a \u0309; # THAI CHARACTER SARA AM
|
||
ำ | $1 ← a ($notAbove*) \u0309 ; # backward case, account for reordering
|
||
ะ ↔ a ; # THAI CHARACTER SARA A
|
||
\u0E35 ↔ i\u0304 ; # THAI CHARACTER SARA II
|
||
\u0E35 | $1 ← i ($notAbove*) \u0304 ; # backward case, account for reordering
|
||
\u0E37 ↔ u\u0323\u0304 ; # THAI CHARACTER SARA UEE
|
||
\u0E37 | $1 ← u \u0323 ($notAbove*) \u0304 ; # backward case, account for reordering
|
||
\u0E36 ↔ u\u0323 ; # THAI CHARACTER SARA UE
|
||
\u0E39 ↔ u\u0304 ; # THAI CHARACTER SARA UU
|
||
\u0E39 | $1 ← u ($notAbove*) \u0304 ; # backward case, account for reordering
|
||
\u0E38 ↔ u ; # THAI CHARACTER SARA U
|
||
ฯ ↔ ‡ ; # THAI CHARACTER PAIYANNOI
|
||
# ฿ ↔ XXX ; # THAI CURRENCY SYMBOL BAHT
|
||
เ ↔ e ; # THAI CHARACTER SARA E
|
||
แ ↔ æ ; # THAI CHARACTER SARA AE
|
||
โ ↔ o ; # THAI CHARACTER SARA O
|
||
ใ ↔ ı ; # THAI CHARACTER SARA AI MAIMUAN
|
||
ไ ↔ i\u0323 ; # THAI CHARACTER SARA AI MAIMALAI
|
||
ๅ ↔ ɨ ; # THAI CHARACTER LAKKHANGYAO
|
||
\u0E47 ↔ \u0306 ; # THAI CHARACTER MAITAIKHU
|
||
\u0E48 ↔ \u0300 ; # THAI CHARACTER MAI EK
|
||
\u0E49 ↔ \u0302 ; # THAI CHARACTER MAI THO
|
||
\u0E4A ↔ \u0301 ; # THAI CHARACTER MAI TRI
|
||
\u0E4B ↔ \u030C ; # THAI CHARACTER MAI CHATTAWA
|
||
\u0E4C ↔ \u0312 ; # THAI CHARACTER THANTHAKHAT
|
||
\u0E4E ↔ '~' ; # THAI CHARACTER YAMAKKAN
|
||
# We deviate from ISO for disambiguation
|
||
\u0E4D ↔ \u030A ; # THAI CHARACTER NIKHAHIT
|
||
๏ ↔ '§' ; # THAI CHARACTER FONGMAN
|
||
๐ ↔ 0 ; # THAI DIGIT ZERO
|
||
๑ ↔ 1 ; # THAI DIGIT ONE
|
||
๒ ↔ 2 ; # THAI DIGIT TWO
|
||
๓ ↔ 3 ; # THAI DIGIT THREE
|
||
๔ ↔ 4 ; # THAI DIGIT FOUR
|
||
๕ ↔ 5 ; # THAI DIGIT FIVE
|
||
๖ ↔ 6 ; # THAI DIGIT SIX
|
||
๗ ↔ 7 ; # THAI DIGIT SEVEN
|
||
๘ ↔ 8 ; # THAI DIGIT EIGHT
|
||
๙ ↔ 9 ; # THAI DIGIT NINE
|
||
๚ ↔ '||' ; # THAI CHARACTER ANGKHANKHU
|
||
๛ ↔ » ; # THAI CHARACTER KHOMUT
|
||
ๆ ↔ « ; # THAI CHARACTER MAIYAMOK
|
||
# moved down to make shorter first
|
||
#Note: PHINTHU deviates from ISO since underring causes canonical problems. So it uses spacing tick below.
|
||
\u0E3A ↔ ˌ ; # THAI CHARACTER PHINTHU
|
||
\u0E34 ↔ i ; # THAI CHARACTER SARA I
|
||
# fallbacks
|
||
| k ← g ;
|
||
| k ← h ;
|
||
| c ← j ;
|
||
| k ← q ;
|
||
| s ← z ;
|
||
:: (lower);
|
||
|