2016-06-30 23:41:56 +00:00
|
|
|
|
# © 2016 and later: Unicode, Inc. and others.
|
|
|
|
|
# License & terms of use: http://www.unicode.org/copyright.html#License
|
2016-09-19 05:09:40 +00:00
|
|
|
|
#
|
2016-02-05 03:37:50 +00:00
|
|
|
|
# File: si_si_FONIPA.txt
|
2016-09-19 05:09:40 +00:00
|
|
|
|
# Generated from CLDR
|
2016-02-05 03:37:50 +00:00
|
|
|
|
#
|
|
|
|
|
|
|
|
|
|
# Sinhala pronunciation rules
|
|
|
|
|
#
|
|
|
|
|
# Output
|
|
|
|
|
# k ɡ ŋ ᵑɡ c ɟ ɲ ʈ ɖ ⁿɖ t d n ⁿd p b m ᵐb j r l w ʃ s h f
|
|
|
|
|
# ə əː a aː æ æː i iː u uː e eː o oː
|
|
|
|
|
#
|
|
|
|
|
# References
|
|
|
|
|
# [1] Asanka Wasala, Ruvan Weerasinghe, and Kumudu Gamage:
|
|
|
|
|
# Sinhala Grapheme-to-Phoneme Conversion and Rules for Schwa Epenthesis.
|
|
|
|
|
# Proceedings of the COLING/ACL 2006 Main Conference Poster Sessions,
|
|
|
|
|
# pages 890–897. http://www.aclweb.org/anthology/P06-2114
|
|
|
|
|
# Simplify ya + yansaya to plain ya after a consonant.
|
|
|
|
|
[\u0D9A-\u0DC6] \u0DCA (\u200D)? { ය\u0DCAය → ය;
|
|
|
|
|
# Delete ZWNJ and ZWJ to simplify further processing.
|
|
|
|
|
\u200C → ;
|
|
|
|
|
\u200D → ;
|
|
|
|
|
# Insert a schwa after every consonant that is not followed by a dependent vowel
|
|
|
|
|
# or virama.
|
|
|
|
|
::Null;
|
|
|
|
|
([\u0D9A-\u0DC6]) } [^\u0DCA-\u0DDF \u0DF2\u0DF3] → $1 ə;
|
|
|
|
|
# Pronunciation rules proper.
|
|
|
|
|
::Null;
|
|
|
|
|
# fප is an alternative spelling of ෆ.
|
|
|
|
|
# This occurs e.g. in ඩේව\u0DD2ඩ\u0DCA කොපර\u0DCAfප\u0DD3ල\u0DCAඩ\u0DCA (David Copperfield)
|
|
|
|
|
# [see http://bradshawofthefuture.blogspot.com/2013/02/f.html].
|
|
|
|
|
[Ff]ප → f;
|
|
|
|
|
# zස is seemingly the only way to unambiguously indicate a voiced /z/ sound.
|
|
|
|
|
# This occurs in e.g. ඇල\u0DCAzසය\u0DD2ම' රෝගය (Alzheimer's disease)
|
|
|
|
|
# [see https://si.wikipedia.org/wiki/ඇල\u0DCAzසය\u0DD2ම%27_රෝගය]
|
|
|
|
|
# or in zස\u0DD3බ\u0DCAරා (zebra) [see https://si.wikipedia.org/wiki/zස\u0DD3බ\u0DCAරා].
|
|
|
|
|
[Zz]ස → z;
|
|
|
|
|
ං → ŋ;
|
|
|
|
|
o → ŋ; # common substitution for anusvaraya
|
|
|
|
|
ඃ ([\u0D9A-\u0DC6]) → | $1 \u0DCA $1; # TODO: check which consonants geminate
|
|
|
|
|
ඃ → h;
|
|
|
|
|
අ → a;
|
|
|
|
|
ආ → aː;
|
|
|
|
|
ඇ → æ;
|
|
|
|
|
ඈ → æː;
|
|
|
|
|
ඉ → i;
|
|
|
|
|
ඊ → iː;
|
|
|
|
|
උ → u;
|
|
|
|
|
ඌ → uː;
|
|
|
|
|
ඍ → ri;
|
|
|
|
|
ඎ → ruː;
|
|
|
|
|
ඏ → ilu;
|
|
|
|
|
ඐ → iluː;
|
|
|
|
|
එ → e;
|
|
|
|
|
ඒ → eː;
|
|
|
|
|
ඓ → aj;
|
|
|
|
|
ඔ → o;
|
|
|
|
|
ඕ → oː;
|
|
|
|
|
ඖ → aw; # TODO: check if this is correct
|
|
|
|
|
ක → k;
|
|
|
|
|
ඛ → k;
|
|
|
|
|
ග → ɡ;
|
|
|
|
|
ඝ → ɡ;
|
|
|
|
|
ඞ → ŋ;
|
|
|
|
|
ඟ → ᵑɡ;
|
|
|
|
|
ච → c;
|
|
|
|
|
ඡ → c;
|
|
|
|
|
ජ → ɟ;
|
|
|
|
|
ඣ → ɟ;
|
|
|
|
|
ඤ → ɲ;
|
|
|
|
|
ඥ → kɲ; # TODO: double-check
|
|
|
|
|
ඦ → ɟ;
|
|
|
|
|
ට → ʈ;
|
|
|
|
|
ඨ → ʈ;
|
|
|
|
|
ඩ → ɖ;
|
|
|
|
|
ඪ → ɖ;
|
|
|
|
|
ණ → n;
|
|
|
|
|
ඬ → ⁿɖ;
|
|
|
|
|
ත → t;
|
|
|
|
|
ථ → t;
|
|
|
|
|
ද → d;
|
|
|
|
|
ධ → d;
|
|
|
|
|
න → n;
|
|
|
|
|
ඳ → ⁿd;
|
|
|
|
|
ප → p;
|
|
|
|
|
ඵ → p;
|
|
|
|
|
බ → b;
|
|
|
|
|
භ → b;
|
|
|
|
|
ම → m;
|
|
|
|
|
ඹ → ᵐb;
|
|
|
|
|
ය → j;
|
|
|
|
|
ර → r;
|
|
|
|
|
ල → l;
|
|
|
|
|
ව → w;
|
|
|
|
|
ශ → ʃ;
|
|
|
|
|
ෂ → ʃ;
|
|
|
|
|
ස → s;
|
|
|
|
|
හ → h;
|
|
|
|
|
ළ → l;
|
|
|
|
|
ෆ → f;
|
|
|
|
|
\u0DCA → ; # delete virama
|
|
|
|
|
ා → aː;
|
|
|
|
|
ැ → æ;
|
|
|
|
|
ෑ → æː;
|
|
|
|
|
\u0DD2 → i;
|
|
|
|
|
\u0DD3 → iː;
|
|
|
|
|
\u0DD4 → u;
|
|
|
|
|
\u0DD6 → uː;
|
|
|
|
|
ෘ → ru;
|
|
|
|
|
ෙ → e;
|
|
|
|
|
ේ → eː;
|
|
|
|
|
ෛ → aj;
|
|
|
|
|
ො → o;
|
|
|
|
|
ෝ → oː;
|
|
|
|
|
ෞ → aw; # TODO: check if this is correct
|
|
|
|
|
ෟ → lu;
|
|
|
|
|
ෲ → ruː;
|
|
|
|
|
ෳ → luː;
|
|
|
|
|
# Heuristics for turning /ə/ into /a/. Based on [1].
|
|
|
|
|
$c=[k ɡ ŋ {ᵑɡ} c ɟ ɲ ʈ ɖ {ⁿɖ} t d n {ⁿd} p b m {ᵐb} j r l w ʃ s z h f];
|
|
|
|
|
$s=[:^L:];
|
|
|
|
|
# Rule #1
|
|
|
|
|
::Null;
|
|
|
|
|
$s sv { ə → ə; # exception (a)
|
|
|
|
|
$s k { ə } r → ə; # exception (b)
|
|
|
|
|
$s $c { ə } $s → ə; # exception (c)
|
|
|
|
|
$s $c $c { ə → a;
|
|
|
|
|
$s $c { ə → a;
|
|
|
|
|
# Rule #2
|
|
|
|
|
::Null;
|
|
|
|
|
$c r { ə } $c → a; # clause (a) and (b)
|
|
|
|
|
$c r { a } h → a; # clause (d), exception
|
|
|
|
|
$c r { a } $c → ə; # clause (c)
|
|
|
|
|
# Rule #3
|
|
|
|
|
# The paper is unclear about what this rule means. The interpretation here
|
|
|
|
|
# assumes that "preceded" in the paper is a typo and should be read "followed".
|
|
|
|
|
::Null;
|
|
|
|
|
[a e æ o ə] h { ə → a;
|
|
|
|
|
# Rules #4 through #7
|
|
|
|
|
::Null;
|
|
|
|
|
ə } $c $c → a; # Rule #4
|
|
|
|
|
ə } [rbɖʈ] $s → ə; # Rule #5 exception
|
|
|
|
|
ə } $c $s → a; # Rule #5
|
|
|
|
|
ə } ji $s → a; # Rule #6
|
|
|
|
|
k { ə } [rl] u → a; # Rule #7
|
|
|
|
|
# Rule #8
|
|
|
|
|
# Note that the paper doesn't say explicitly that this rule should be
|
|
|
|
|
# anchored at the beginning of a word, but the remarks before the rules
|
|
|
|
|
# seem to imply this.
|
|
|
|
|
::Null;
|
|
|
|
|
$s k { a } l[aeo]ːj → ə; # Typo in paper: /j/ was /y/.
|
|
|
|
|
$s k { a } le[mh][ui] → ə;
|
|
|
|
|
$s k { alə } h[ui] → əle;
|
|
|
|
|
$s k { a } lə → ə;
|
|
|
|
|
# Diphthongs
|
|
|
|
|
::Null;
|
|
|
|
|
www+ → ww; # යෞව\u0DCAවන
|
|
|
|
|
[i {iː} e {eː} æ {æː} o {oː} a {aː}] { wu → w;
|
|
|
|
|
əji → aj;
|
|
|
|
|
iji → iː; # perhaps: ij
|
|
|
|
|
[u {uː} e {eː} æ {æː} o {oː} a {aː}] { ji → j;
|
|
|
|
|
|