61607c2773
X-SVN-Rev: 38848
186 lines
4.8 KiB
Plaintext
186 lines
4.8 KiB
Plaintext
# ***************************************************************************
|
||
# Copyright (C) 2016 and later: Unicode, Inc. and others.
|
||
# License & terms of use: http://www.unicode.org/copyright.html
|
||
# *
|
||
# * Copyright (C) 2004-2016, International Business Machines
|
||
# * Corporation; Unicode, Inc.; and others. All Rights Reserved.
|
||
# *
|
||
# ***************************************************************************
|
||
# File: sat_Olck_sat_FONIPA.txt
|
||
# Generated from CLDR
|
||
#
|
||
|
||
# Santali (Ol Chiki) → Santali (International Phonetic Alphabet)
|
||
# Output
|
||
# ------
|
||
# m mː n nː ɳ ɳː ɲ ɲː ŋ ŋː
|
||
# p pʰ pʼ b bʰ t tʰ tʼ d dʰ ʈ ʈʰ ɖ ɖʰ c cʰ cʼ k kʰ kʼ ɡ ʔ
|
||
# s sː h
|
||
# d\u0361ʒ
|
||
# ɽ r
|
||
# l lː
|
||
# w wː w\u0303 w\u0303ː
|
||
#
|
||
# i iː ĩ ĩː u uː ũ ũː
|
||
# e eː ẽ ẽː ə əː ə\u0303 ə\u0303ː o oː õ õː
|
||
# ɛ ɛː ɛ\u0303 ɛ\u0303ː ɔ ɔː ɔ\u0303 ɔ\u0303ː
|
||
# a aː ã ãː
|
||
# References
|
||
# ----------
|
||
# [1] Michael Everson: Final proposal to encode the Ol Chiki script
|
||
# in the UCS. ISO/IEC JTC1/SC2/WG2 Working Group Document N2984R,
|
||
# September 21, 2005. http://std.dkuug.dk/jtc1/sc2/wg2/docs/n2984.pdf
|
||
#
|
||
# [2] George L. Campbell: Compendium of the World's Languages.
|
||
# Volume 2: Ladakhi to Zuni. ISBN 0-415-20297-3. Taylor & Francis, 2000.
|
||
# Pages 1454 to 1458.
|
||
# Notes
|
||
# -----
|
||
# According to [1] (page 3), ᱽ can only follow the four ejective
|
||
# consonants ᱵ /pʼ/, ᱡ /cʼ/, ᱫ /tʼ/, and ᱜ /kʼ/; these become
|
||
# ᱵᱽ /b/, ᱫᱽ /d/, ᱡᱽ /d\u0361ʒ/, and ᱜᱽ /ɡ/. In online texts, however,
|
||
# we have occasionally encountered ᱽ following non-ejective plosives,
|
||
# for example after ᱯ /p/. These might possibly be typos. Our rules
|
||
# try to be resilient and handle ᱯᱽ as /b/.
|
||
#
|
||
# According to [1] (page 2), U+1C7C PHAARKAA follows the four “glottal”
|
||
# consonants ᱵ /pʼ/, ᱡ /cʼ/, ᱫ /tʼ/, and ᱜ /kʼ/ (these are actually
|
||
# ejective, not glottal). In online texts, however, we have frequently
|
||
# encountered ᱼ following non-ejective consonants.
|
||
$inword = [[:L:][:M:]];
|
||
# Some online texts use a decomposed form of U+1C7A MU-GAAHLAA TTUDDAG.
|
||
ᱹᱸ → ᱺ ;
|
||
ᱸᱹ → ᱺ ;
|
||
::null();
|
||
# To simplify the rules below, enforce a uniform ordering of marks.
|
||
ᱻᱹ → ᱹᱻ ;
|
||
ᱻᱸ → ᱸᱻ ;
|
||
ᱻᱺ → ᱺᱻ ;
|
||
ᱼᱹ → ᱹᱼ ;
|
||
ᱼᱸ → ᱸᱼ ;
|
||
ᱼᱺ → ᱺᱼ ;
|
||
::null();
|
||
# Some online texts use U+1C7C PHAARKAA instead of U+1C7B RELAA for indicating
|
||
# long phonemes, presumably because the graphemes look similar in some fonts.
|
||
# Since phaarkaa is used for voicing ejectives and plosives (which cannot
|
||
# be lenghtened), we rewrite phaarkaa to relaa.
|
||
[ᱚᱟᱤᱩᱮᱳᱶᱢᱝᱞᱱ] [ᱹᱸᱺ]* {ᱼ} → ᱻ ;
|
||
::null();
|
||
ᱚᱹᱻ → ɔː ;
|
||
ᱚᱹ → ɔ ;
|
||
ᱚᱸᱻ → ɔ\u0303ː ;
|
||
ᱚᱸ → ɔ\u0303 ;
|
||
ᱚᱺᱻ → ɔ\u0303ː ;
|
||
ᱚᱺ → ɔ\u0303 ;
|
||
ᱚᱻ → ɔː ;
|
||
ᱚ → ɔ ;
|
||
ᱛᱼ → t ;
|
||
ᱛᱷ → tʰ ;
|
||
ᱛᱽ → d ;
|
||
$inword {ᱛ} → d ;
|
||
ᱛ → t ;
|
||
ᱜᱼ → kʼ ;
|
||
ᱜᱷ → kʰ ;
|
||
ᱜᱽ → ɡ ;
|
||
$inword {ᱜ} → ɡ ;
|
||
ᱜ → kʼ ;
|
||
ᱝᱻ → ŋː ;
|
||
ᱝ → ŋ ;
|
||
ᱞᱻ → lː ;
|
||
ᱞ → l ;
|
||
ᱟᱹᱻ → əː ;
|
||
ᱟᱹ → ə ;
|
||
ᱟᱸᱻ → ãː ;
|
||
ᱟᱸ → ã ;
|
||
ᱟᱺᱻ → ə\u0303ː ;
|
||
ᱟᱺ → ə\u0303 ;
|
||
ᱟᱻ → aː ;
|
||
ᱟ → a ;
|
||
ᱠᱼ → k ;
|
||
ᱠᱷ → kʰ ;
|
||
ᱠᱽ → ɡ ;
|
||
ᱠ → k ;
|
||
ᱡᱼ → cʼ ;
|
||
ᱡᱷ → cʰ ;
|
||
ᱡᱽ → d\u0361ʒ ;
|
||
$inword {ᱡ} → d\u0361ʒ ;
|
||
ᱡ → cʼ ;
|
||
ᱢᱻ → mː ;
|
||
ᱢ → m ;
|
||
# According to [1], ᱣ is sometimes /v/ and sometimes /w/.
|
||
# TODO: Find out if there is a rule for this.
|
||
ᱣᱸ → w\u0303 ;
|
||
ᱣ → w ;
|
||
ᱤᱹᱻ → iː ;
|
||
ᱤᱹ → i ;
|
||
ᱤᱸᱻ → ĩː ;
|
||
ᱤᱸ → ĩ ;
|
||
ᱤᱺᱻ → ĩː ;
|
||
ᱤᱺ → ĩ ;
|
||
ᱤᱻ → iː ;
|
||
ᱤ → i ;
|
||
ᱥᱻ → sː ;
|
||
ᱥ → s ;
|
||
# According to [1], ᱦ is sometimes /h/ and sometimes /ʔ/.
|
||
# TODO: Find out if there is a rule for this.
|
||
ᱦ → h ;
|
||
ᱧᱻ → ɲː ;
|
||
ᱧ → ɲ ;
|
||
ᱨᱻ → r ;
|
||
ᱨ → r ;
|
||
ᱩᱹᱻ → uː ;
|
||
ᱩᱹ → u ;
|
||
ᱩᱸᱻ → ũː ;
|
||
ᱩᱸ → ũ ;
|
||
ᱩᱺᱻ → ũː ;
|
||
ᱩᱺ → ũ ;
|
||
ᱩᱻ → uː ;
|
||
ᱩ → u ;
|
||
ᱪᱼ → c ;
|
||
ᱪᱷ → cʰ ;
|
||
ᱪᱽ → d\u0361ʒ ;
|
||
ᱪ → c ;
|
||
ᱫᱼ → tʼ ;
|
||
ᱫᱷ → tʰ ;
|
||
ᱫᱽ → d ;
|
||
$inword {ᱫ} → d ;
|
||
ᱫ → tʼ ;
|
||
ᱬᱻ → ɳː ;
|
||
ᱬ → ɳ ;
|
||
# TODO: ᱵᱷᱭᱨᱚᱵ → bʰhrɔb seems unlikely; would be good to verify.
|
||
ᱭ → h ;
|
||
ᱮᱹᱻ → ɛː ;
|
||
ᱮᱹ → ɛ ;
|
||
ᱮᱺᱻ → ɛ\u0303ː ;
|
||
ᱮᱺ → ɛ\u0303 ;
|
||
ᱮᱸᱻ → ẽː ;
|
||
ᱮᱸ → ẽ ;
|
||
ᱮᱻ → eː ;
|
||
ᱮ → e ;
|
||
ᱯᱼ → p ;
|
||
ᱯᱷ → pʰ ;
|
||
ᱯᱽ → b ;
|
||
ᱯ → p ;
|
||
ᱰᱷ → ɖʰ ;
|
||
ᱰ → ɖ ;
|
||
ᱱᱻ → nː ;
|
||
ᱱ → n ;
|
||
ᱲᱻ → ɽ ;
|
||
ᱲ → ɽ ;
|
||
ᱳᱸᱻ → õː ;
|
||
ᱳᱸ → õ ;
|
||
ᱳᱻ → oː ;
|
||
ᱳ → o ;
|
||
ᱴᱼ → ʈ ;
|
||
ᱴᱷ → ʈʰ ;
|
||
ᱴᱽ → ɖ ;
|
||
ᱴ → ʈ ;
|
||
ᱵᱼ → pʼ ;
|
||
ᱵᱷ → bʰ ;
|
||
ᱵᱽ → b ;
|
||
$inword {ᱵ} → b ;
|
||
ᱵ → pʼ ;
|
||
ᱶᱻ → w\u0303ː ;
|
||
ᱶ → w\u0303 ;
|
||
|