scuffed-code/icu4c/data/translit/lgreek.txt
Alan Liu 6c1640e9d4 ICU-199 rule fixes from Mark
X-SVN-Rev: 636
2000-01-18 20:12:46 +00:00

377 lines
14 KiB
Plaintext

//--------------------------------------------------------------------
// Copyright (C) 1999, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// Date Name Description
// 11/17/99 aliu Creation.
//--------------------------------------------------------------------
// Latin-Greek
lgreek {
Rule {
// ==============================================
// Modern Greek Transliteration Rules
//
// This transliterates modern Greek characters, but using rules
// that are traditional for Ancient Greek, and
// thus more resemble Greek words that have become part
// of English. It differs from the official Greek
// transliteration, which is more phonetic (since
// most modern Greek vowels, for example, have
// degenerated simply to sound like "ee").
//
// There are only a few tricky parts.
// 1. eta and omega don't map directly to Latin vowels,
// so we use a macron on e and o, and some
// other combinations if they are accented.
// 2. The accented, diaeresis i and y are substituted too.
// 3. Some letters use digraphs, like "ph". While typical,
// they need some special handling.
// 4. A gamma before a gamma or a few other letters is
// transliterated as an "n", as in "Anglo"
// 5. An ypsilon after a vowel is a "u", as in
// "Mouseio". Otherwise it is a "y" as in "Physikon"
// 6. The construction of the rules is made simpler by making sure
// that most rules for lowercase letters exactly correspond to the
// rules for uppercase letters, *except* for the case of the letters
// in the rule itself. That way, after modifying the uppercase rules,
// you can just copy, paste, and "set to lowercase" to get
// the rules for lowercase letters!
// ==============================================
// ==============================================
// Variables, used to make the rules more comprehensible
// and for conditionals.
// ==============================================
// Latin Letters
"E-MACRON=\u0112;"
"e-macron=\u0113;"
"O-MACRON=\u014C;"
"o-macron=\u014D;"
"Y-UMLAUT=\u0178;"
"y-umlaut=\u00FF;"
//! // with real accents.
//! + "E-MACRON-ACUTE=\u0112\u0301;"
//! + "e-macron-acute=\u0113\u0301;"
//! + "O-MACRON-ACUTE=\u014C\u0301;"
//! + "o-macron-acute=\u014D\u0301;"
//! + "y-umlaut-acute=\u00FF\u0301;"
//! + "\u00ef-acute=\u00ef\u0301;"
//! + "\u00fc-acute=\u00fc\u0301;"
//! //
// single letter equivalents
"E-MACRON-ACUTE=\u00CA;"
"e-macron-acute=\u00EA;"
"O-MACRON-ACUTE=\u00D4;"
"o-macron-acute=\u00F4;"
"y-umlaut-acute=\u0177;"
"\u00ef-acute=\u00EE;"
"\u00fc-acute=\u00FB;"
// Greek Letters
"ALPHA=\u0391;"
"BETA=\u0392;"
"GAMMA=\u0393;"
"DELTA=\u0394;"
"EPSILON=\u0395;"
"ZETA=\u0396;"
"ETA=\u0397;"
"THETA=\u0398;"
"IOTA=\u0399;"
"KAPPA=\u039A;"
"LAMBDA=\u039B;"
"MU=\u039C;"
"NU=\u039D;"
"XI=\u039E;"
"OMICRON=\u039F;"
"PI=\u03A0;"
"RHO=\u03A1;"
"SIGMA=\u03A3;"
"TAU=\u03A4;"
"YPSILON=\u03A5;"
"PHI=\u03A6;"
"CHI=\u03A7;"
"PSI=\u03A8;"
"OMEGA=\u03A9;"
"ALPHA+=\u0386;"
"EPSILON+=\u0388;"
"ETA+=\u0389;"
"IOTA+=\u038A;"
"OMICRON+=\u038C;"
"YPSILON+=\u038E;"
"OMEGA+=\u038F;"
"IOTA_DIAERESIS=\u03AA;"
"YPSILON_DIAERESIS=\u03AB;"
"alpha=\u03B1;"
"beta=\u03B2;"
"gamma=\u03B3;"
"delta=\u03B4;"
"epsilon=\u03B5;"
"zeta=\u03B6;"
"eta=\u03B7;"
"theta=\u03B8;"
"iota=\u03B9;"
"kappa=\u03BA;"
"lambda=\u03BB;"
"mu=\u03BC;"
"nu=\u03BD;"
"xi=\u03BE;"
"omicron=\u03BF;"
"pi=\u03C0;"
"rho=\u03C1;"
"sigma=\u03C3;"
"tau=\u03C4;"
"ypsilon=\u03C5;"
"phi=\u03C6;"
"chi=\u03C7;"
"psi=\u03C8;"
"omega=\u03C9;"
//forms
"alpha+=\u03AC;"
"epsilon+=\u03AD;"
"eta+=\u03AE;"
"iota+=\u03AF;"
"omicron+=\u03CC;"
"ypsilon+=\u03CD;"
"omega+=\u03CE;"
"iota_diaeresis=\u03CA;"
"ypsilon_diaeresis=\u03CB;"
"iota_diaeresis+=\u0390;"
"ypsilon_diaeresis+=\u03B0;"
"sigma+=\u03C2;"
// Variables for conditional mappings
// Use lowercase for all variable names, to allow cut/paste below.
"letter=[~[:Lu:][:Ll:]];"
"lower=[[:Ll:]];"
"softener=[eiyEIY];"
"vowel=[aeiouAEIOU"
"{ALPHA}{EPSILON}{ETA}{IOTA}{OMICRON}{YPSILON}{OMEGA}"
"{ALPHA+}{EPSILON+}{ETA+}{IOTA+}{OMICRON+}{YPSILON+}{OMEGA+}"
"{IOTA_DIAERESIS}{YPSILON_DIAERESIS}"
"{alpha}{epsilon}{eta}{iota}{omicron}{ypsilon}{omega}"
"{alpha+}{epsilon+}{eta+}{iota+}{omicron+}{ypsilon+}{omega+}"
"{iota_diaeresis}{ypsilon_diaeresis}"
"{iota_diaeresis+}{ypsilon_diaeresis+}"
"];"
"n-gamma=[GKXCgkxc];"
"gamma-n=[{GAMMA}{KAPPA}{CHI}{XI}{gamma}{kappa}{chi}{xi}];"
"pp=[Pp];"
// ==============================================
// Rules
// ==============================================
// The following are special titlecases, and should
// not be copied when duplicating the lowercase
// ==============================================
"Th <> {THETA}({lower};"
"Ph <> {PHI}({lower};"
"Ch <> {CHI}({lower};"
//masked: + "Ps<{PHI}({lower};"
// Because there is no uppercase forms for final sigma,
// we had to move all the sigma rules up here.
// Remember to insert ' to preserve round trip, for double letters
// don't need to do this for the digraphs with h,
// since it is not created when mapping back from greek
// use special form for s
"''S <> ({pp}) {SIGMA} ;" // handle PS
"S <> {SIGMA};"
// The following are a bit tricky. 's' takes two forms in greek
// final or non final.
// We use ~s to represent the abnormal form: final before letter
// or non-final before non-letter.
// We use 's to separate p and s (otherwise ps is one letter)
// so, we break out the following forms:
"''s < ({pp}) {sigma} ({letter});"
"s < {sigma} ({letter});"
"~s < {sigma} ;"
"~s < {sigma+} ({letter});"
"''s < ({pp}) {sigma+} ;"
"s < {sigma+} ;"
"~s ({letter}) > {sigma+};"
"~s > {sigma};"
"''s ({letter}) > {sigma};"
"''s > {sigma+};"
"s ({letter}) > {sigma};"
"s > {sigma+};"
// because there are no uppercase forms, had to move these up too.
"i\"`>{iota_diaeresis+};"
"y\"`>{ypsilon_diaeresis+};"
"{\u00ef-acute} <> {iota_diaeresis+};"
"{\u00fc-acute} <> {vowel}){ypsilon_diaeresis+};"
"{y-umlaut-acute} <> {ypsilon_diaeresis+};"
// ==============================================
// Uppercase Forms.
// To make lowercase forms, just copy and lowercase below
// ==============================================
// Typing variants, in case the keyboard doesn't have accents
"A`>{ALPHA+};"
"E`>{EPSILON+};"
"EE`>{ETA+};"
"EE>{ETA};"
"I`>{IOTA+};"
"O`>{OMICRON+};"
"OO`>{OMEGA+};"
"OO>{OMEGA};"
"I\">{IOTA_DIAERESIS};"
"Y\">{YPSILON_DIAERESIS};"
// Basic Letters
"A<>{ALPHA};"
"\u00c1<>{ALPHA+};"
"B<>{BETA};"
"N ({n-gamma}) <> {GAMMA} ({gamma-n});"
"G<>{GAMMA};"
"D<>{DELTA};"
"''E <> ([Ee]){EPSILON};" // handle EE
"E<>{EPSILON};"
"\u00c9<>{EPSILON+};"
"Z<>{ZETA};"
"{E-MACRON-ACUTE}<>{ETA+};"
"{E-MACRON}<>{ETA};"
"TH<>{THETA};"
"I<>{IOTA};"
"\u00cd<>{IOTA+};"
"\u00cf<>{IOTA_DIAERESIS};"
"K<>{KAPPA};"
"L<>{LAMBDA};"
"M<>{MU};"
"N'' <> {NU} ({gamma-n});"
"N<>{NU};"
"X<>{XI};"
"''O <> ([Oo]) {OMICRON};" // handle OO
"O<>{OMICRON};"
"\u00d3<>{OMICRON+};"
"PH<>{PHI};" // needs ordering before P
"PS<>{PSI};" // needs ordering before P
"P<>{PI};"
"R<>{RHO};"
"T<>{TAU};"
"U <> ({vowel}) {YPSILON};"
"\u00da <> ({vowel}) {YPSILON+};"
"\u00dc <> ({vowel}) {YPSILON_DIAERESIS};"
"Y<>{YPSILON};"
"\u00dd<>{YPSILON+};"
"{Y-UMLAUT}<>{YPSILON_DIAERESIS};"
"CH<>{CHI};"
"{O-MACRON-ACUTE}<>{OMEGA+};"
"{O-MACRON}<>{OMEGA};"
// Extra English Letters. Mapped for completeness
"C({softener})>|S;"
"C>|K;"
"F>|PH;"
"H>|CH;"
"J>|I;"
"Q>|K;"
"V>|U;"
"W>|U;"
// ==============================================
// Lowercase Forms. Just copy above and lowercase
// ==============================================
// typing variants, in case the keyboard doesn't have accents
"a`>{alpha+};"
"e`>{epsilon+};"
"ee`>{eta+};"
"ee>{eta};"
"i`>{iota+};"
"o`>{omicron+};"
"oo`>{omega+};"
"oo>{omega};"
"i\">{iota_diaeresis};"
"y\">{ypsilon_diaeresis};"
// basic letters
"a<>{alpha};"
"\u00e1<>{alpha+};"
"b<>{beta};"
"n ({n-gamma}) <> {gamma} ({gamma-n});"
"g<>{gamma};"
"d<>{delta};"
"''e <> ([Ee]){epsilon};" // handle EE
"e<>{epsilon};"
"\u00e9<>{epsilon+};"
"z<>{zeta};"
"{e-macron-acute}<>{eta+};"
"{e-macron}<>{eta};"
"th<>{theta};"
"i<>{iota};"
"\u00ed<>{iota+};"
"\u00ef<>{iota_diaeresis};"
"k<>{kappa};"
"l<>{lambda};"
"m<>{mu};"
"n'' <> {nu} ({gamma-n});"
"n<>{nu};"
"x<>{xi};"
"''o <> ([Oo]) {omicron};" // handle OO
"o<>{omicron};"
"\u00f3<>{omicron+};"
"ph<>{phi};" // needs ordering before p
"ps<>{psi};" // needs ordering before p
"p<>{pi};"
"r<>{rho};"
"t<>{tau};"
"u <> ({vowel}){ypsilon};"
"\u00fa <> ({vowel}){ypsilon+};"
"\u00fc <> ({vowel}){ypsilon_diaeresis};"
"y<>{ypsilon};"
"\u00fd<>{ypsilon+};"
"{y-umlaut}<>{ypsilon_diaeresis};"
"ch<>{chi};"
"{o-macron-acute}<>{omega+};"
"{o-macron}<>{omega};"
// extra english letters. mapped for completeness
"c({softener})>|s;"
"c>|k;"
"f>|ph;"
"h>|ch;"
"j>|i;"
"q>|k;"
"v>|u;"
"w>|u;"
// ====================================
// Normal final rule: remove '
// ====================================
//+ "''>;"
}
}