c70ebe3ce1
X-SVN-Rev: 40193
196 lines
5.4 KiB
Plaintext
196 lines
5.4 KiB
Plaintext
# © 2016 and later: Unicode, Inc. and others.
|
||
# License & terms of use: http://www.unicode.org/copyright.html#License
|
||
#
|
||
# File: cy_cy_FONIPA.txt
|
||
# Generated from CLDR
|
||
#
|
||
|
||
# Transformation from Welsh (cy) to its IPA transcription (cy_FONIPA).
|
||
# Based on description of Northern Welsh in:
|
||
#
|
||
# http://en.wikipedia.org/wiki/Welsh_orthography
|
||
# http://en.wikipedia.org/wiki/Welsh_phonology
|
||
#
|
||
# Note that these rules are NOT complete: to be complete we would have to know
|
||
# the morphological analysis of the word. For example, final ‹au› is pronounced
|
||
# /a/ if it is the noun plural marker, otherwise it is /aɨ/. Similarly in
|
||
# “llongyfarch” (‘congratulating’), the morphological decomposition — “llon +
|
||
# cyfarch” — is needed to know that the ‹ng› is pronounced as /ŋg/, not as
|
||
# /ŋ/.
|
||
#
|
||
# Author: Richard Sproat
|
||
::Lower;
|
||
::NFC;
|
||
[’ [:P:]] → ;
|
||
# Class definitions
|
||
$end = [$ ];
|
||
# Both orthographic and phonetic vowels
|
||
$vowel = [aeiouwyâêîôûŵŷɑɨəɛɪɔʊ];
|
||
# W is a placeholder for the glide -- see below
|
||
$cons = [
|
||
m {m\u0325} n {n\u0325} ŋ {ŋ\u030A}
|
||
p b t d k ɡ
|
||
f v θ ð s ʃ h χ
|
||
l ɬ r {r\u0325}
|
||
{d\u0361ʒ} g W w j
|
||
];
|
||
# Preprocessing of letters that sometimes occur
|
||
k → c;
|
||
v → f;
|
||
x → s;
|
||
z → s;
|
||
::Null;
|
||
# Consonant transductions:
|
||
# Trigraphs
|
||
ngh → ŋ\u030A;
|
||
# Digraphs
|
||
ch → χ;
|
||
dd → ð;
|
||
ff → f;
|
||
ll → ɬ;
|
||
mh → m\u0325;
|
||
nh → n\u0325;
|
||
ng → ŋ;
|
||
ph → f;
|
||
rh → r\u0325;
|
||
th → θ;
|
||
# Monographs
|
||
b → b;
|
||
c → k;
|
||
d → d;
|
||
f → v;
|
||
g → ɡ;
|
||
h → h;
|
||
j → d\u0361ʒ; # Loan words
|
||
l → l;
|
||
m → m;
|
||
n → n;
|
||
p → p;
|
||
r → r;
|
||
s → s;
|
||
t → t;
|
||
::Null;
|
||
# Transduce ‹si› to /ʃ/ before vowels
|
||
si} $vowel → ʃ;
|
||
::Null;
|
||
# Treatment of glides.
|
||
# First transduce ‹i›, ‹w› to glides prior to vowels. With ‹w› we want to
|
||
# do this also before /r,l/ after /ɡ/ (from Proto-Celtic *w) e.g. “gwlad”,
|
||
# “gwraig”. However the “after g” environment must allow for the following
|
||
# possibilities:
|
||
#
|
||
# ɡ → ŋ via nasal mutation
|
||
# ɡ → 0 via soft mutation
|
||
{i} $vowel → j;
|
||
{w} $vowel → W; # Temporary register
|
||
[ɡŋ] {w} [rl] $vowel → W; # Plain or nasal mutation environment
|
||
^ {w} [rl] $vowel → W; # Soft mutation at the beginning of a word
|
||
# Transduce accented ‹ẃ› to ‹w›: this is used to indicate when a ‹w› that would
|
||
# normally be expected to be a glide, is instead a vowel:
|
||
ẃ → w;
|
||
::Null;
|
||
# Stress placement, needed for vowel quality/quantity prediction
|
||
# Basic rule of stress in Welsh is to place it on the penult,
|
||
# except of course in monosyllables.
|
||
{($vowel+ $cons+ $vowel+ $cons*)} $end → ˈ $1; ## Polysyllabic words
|
||
$end $cons* {($vowel+ $cons*)} $end → ˈ $1; ## Monosyllabic words
|
||
::Null;
|
||
# Transduction of vowels
|
||
# The first rule above overgenerates streams of stress marks. The rule below
|
||
# cleans that up.
|
||
ˈ+ → ˈ;
|
||
# Diphthongs
|
||
# Deal with ‹y› first since we also need to lengthen the /ɨ/ if that is in the
|
||
# correct environment for lengthening.
|
||
# ‹y› is /ɨ/ in final syllable, otherwise /ə/
|
||
yw } $cons* $end → ɨu;
|
||
yw → əu;
|
||
y} $cons* $end → ɨ;
|
||
y → ə;
|
||
::Null;
|
||
# Diphthongs in long environment
|
||
# Final, or before word-final s
|
||
ˈ { ɨu } s? $end → ɨːu;
|
||
ˈ { aw } s? $end → ɑːu;
|
||
ˈ { ew } s? $end → eːu;
|
||
ˈ { oe } s? $end → ɔːɨ;
|
||
ˈ { ou } s? $end → ɔːɨ;
|
||
ˈ { wy } s? $end → uːɨ;
|
||
# before b, ch, d, dd, g, f, ff, th followed by the end of a word
|
||
# or a vowel
|
||
ˈ { ɨu } [bχdðɡvfθ] $end → ɨːu;
|
||
ˈ { aw } [bχdðɡvfθ] $end → ɑːu;
|
||
ˈ { ew } [bχdðɡvfθ] $end → eːu;
|
||
ˈ { oe } [bχdðɡvfθ] $end → ɔːɨ;
|
||
ˈ { ou } [bχdðɡvfθ] $end → ɔːɨ;
|
||
ˈ { wy } [bχdðɡvfθ] $end → uːɨ;
|
||
ˈ { ɨu } [bχdðɡvfθ] $vowel → ɨːu;
|
||
ˈ { aw } [bχdðɡvfθ] $vowel → ɑːu;
|
||
ˈ { ew } [bχdðɡvfθ] $vowel → eːu;
|
||
ˈ { oe } [bχdðɡvfθ] $vowel → ɔːɨ;
|
||
ˈ { ou } [bχdðɡvfθ] $vowel → ɔːɨ;
|
||
ˈ { wy } [bχdðɡvfθ] $vowel → uːɨ;
|
||
# Diphthongs in other environments
|
||
ae → ɑːɨ;
|
||
ai → ai;
|
||
au → aɨ; ## As plural ending /a/, but we can't predict this
|
||
aw → au;
|
||
ei → əi;
|
||
eu → əɨ;
|
||
ew → ɛu;
|
||
ey → əɨ;
|
||
iw → ɪu;
|
||
oe → ɔɨ;
|
||
oi → ɔi;
|
||
ou → ɔɨ;
|
||
uw → ɨu;
|
||
wy → ʊɨ;
|
||
# Long environments
|
||
# Final, or before word-final s
|
||
ˈ { ɨ } s? $end → ɨː;
|
||
ˈ { a } s? $end → ɑː;
|
||
ˈ { e } s? $end → eː;
|
||
ˈ { i } s? $end → iː;
|
||
ˈ { o } s? $end → oː;
|
||
ˈ { u } s? $end → ɨː;
|
||
ˈ { w } s? $end → uː;
|
||
# before b, ch, d, dd, g, f, ff, th followed by the end of a word
|
||
# or a vowel
|
||
ˈ { ɨ } [bχdðɡvfθ] $end → ɨː;
|
||
ˈ { a } [bχdðɡvfθ] $end → ɑː;
|
||
ˈ { e } [bχdðɡvfθ] $end → eː;
|
||
ˈ { i } [bχdðɡvfθ] $end → iː;
|
||
ˈ { o } [bχdðɡvfθ] $end → oː;
|
||
ˈ { u } [bχdðɡvfθ] $end → ɨː;
|
||
ˈ { w } [bχdðɡvfθ] $end → uː;
|
||
ˈ { ɨ } [bχdðɡvfθ] $vowel → ɨː;
|
||
ˈ { a } [bχdðɡvfθ] $vowel → ɑː;
|
||
ˈ { e } [bχdðɡvfθ] $vowel → eː;
|
||
ˈ { i } [bχdðɡvfθ] $vowel → iː;
|
||
ˈ { o } [bχdðɡvfθ] $vowel → oː;
|
||
ˈ { u } [bχdðɡvfθ] $vowel → ɨː;
|
||
ˈ { w } [bχdðɡvfθ] $vowel → uː;
|
||
# Short environments
|
||
a → a;
|
||
e → ɛ;
|
||
i → ɪ;
|
||
o → ɔ;
|
||
u → ɨ\u031E;
|
||
w → ʊ;
|
||
::Null;
|
||
W → w;
|
||
# Finally, deal with vowels that are marked as long with a circumflex
|
||
# (“to bach”). Do this last because we don't want the other vowel
|
||
# changes messing this up.
|
||
â → ɑː;
|
||
ê → eː;
|
||
î → iː;
|
||
ô → oː;
|
||
û → ɨː;
|
||
ŵ → uː;
|
||
ŷ → ɨː;
|
||
::Null;
|
||
# Move IPA stress marker to start of syllable.
|
||
([$cons w] [l ɬ r {r\u0325}]? j? w?) ˈ → ˈ $1;
|
||
|