ec927f3702
X-SVN-Rev: 1372
377 lines
14 KiB
Plaintext
377 lines
14 KiB
Plaintext
//--------------------------------------------------------------------
|
|
// Copyright (C) 1999, International Business Machines
|
|
// Corporation and others. All Rights Reserved.
|
|
//--------------------------------------------------------------------
|
|
// Date Name Description
|
|
// 11/17/99 aliu Creation.
|
|
//--------------------------------------------------------------------
|
|
|
|
// Latin-Greek
|
|
|
|
lgreek {
|
|
Rule {
|
|
// ==============================================
|
|
// Modern Greek Transliteration Rules
|
|
//
|
|
// This transliterates modern Greek characters, but using rules
|
|
// that are traditional for Ancient Greek, and
|
|
// thus more resemble Greek words that have become part
|
|
// of English. It differs from the official Greek
|
|
// transliteration, which is more phonetic (since
|
|
// most modern Greek vowels, for example, have
|
|
// degenerated simply to sound like "ee").
|
|
//
|
|
// There are only a few tricky parts.
|
|
// 1. eta and omega don't map directly to Latin vowels,
|
|
// so we use a macron on e and o, and some
|
|
// other combinations if they are accented.
|
|
// 2. The accented, diaeresis i and y are substituted too.
|
|
// 3. Some letters use digraphs, like "ph". While typical,
|
|
// they need some special handling.
|
|
// 4. A gamma before a gamma or a few other letters is
|
|
// transliterated as an "n", as in "Anglo"
|
|
// 5. An ypsilon after a vowel is a "u", as in
|
|
// "Mouseio". Otherwise it is a "y" as in "Physikon"
|
|
// 6. The construction of the rules is made simpler by making sure
|
|
// that most rules for lowercase letters exactly correspond to the
|
|
// rules for uppercase letters, *except* for the case of the letters
|
|
// in the rule itself. That way, after modifying the uppercase rules,
|
|
// you can just copy, paste, and "set to lowercase" to get
|
|
// the rules for lowercase letters!
|
|
// ==============================================
|
|
|
|
// ==============================================
|
|
// Variables, used to make the rules more comprehensible
|
|
// and for conditionals.
|
|
// ==============================================
|
|
|
|
// Latin Letters
|
|
|
|
"E-MACRON=\u0112;"
|
|
"e-macron=\u0113;"
|
|
"O-MACRON=\u014C;"
|
|
"o-macron=\u014D;"
|
|
"Y-UMLAUT=\u0178;"
|
|
"y-umlaut=\u00FF;"
|
|
|
|
//! // with real accents.
|
|
//! + "E-MACRON-ACUTE=\u0112\u0301;"
|
|
//! + "e-macron-acute=\u0113\u0301;"
|
|
//! + "O-MACRON-ACUTE=\u014C\u0301;"
|
|
//! + "o-macron-acute=\u014D\u0301;"
|
|
//! + "y-umlaut-acute=\u00FF\u0301;"
|
|
//! + "\u00ef-acute=\u00ef\u0301;"
|
|
//! + "\u00fc-acute=\u00fc\u0301;"
|
|
//! //
|
|
|
|
// single letter equivalents
|
|
|
|
"E-MACRON-ACUTE=\u00CA;"
|
|
"e-macron-acute=\u00EA;"
|
|
"O-MACRON-ACUTE=\u00D4;"
|
|
"o-macron-acute=\u00F4;"
|
|
"y-umlaut-acute=\u0177;"
|
|
"\u00ef-acute=\u00EE;"
|
|
"\u00fc-acute=\u00FB;"
|
|
|
|
// Greek Letters
|
|
|
|
"ALPHA=\u0391;"
|
|
"BETA=\u0392;"
|
|
"GAMMA=\u0393;"
|
|
"DELTA=\u0394;"
|
|
"EPSILON=\u0395;"
|
|
"ZETA=\u0396;"
|
|
"ETA=\u0397;"
|
|
"THETA=\u0398;"
|
|
"IOTA=\u0399;"
|
|
"KAPPA=\u039A;"
|
|
"LAMBDA=\u039B;"
|
|
"MU=\u039C;"
|
|
"NU=\u039D;"
|
|
"XI=\u039E;"
|
|
"OMICRON=\u039F;"
|
|
"PI=\u03A0;"
|
|
"RHO=\u03A1;"
|
|
"SIGMA=\u03A3;"
|
|
"TAU=\u03A4;"
|
|
"YPSILON=\u03A5;"
|
|
"PHI=\u03A6;"
|
|
"CHI=\u03A7;"
|
|
"PSI=\u03A8;"
|
|
"OMEGA=\u03A9;"
|
|
|
|
"ALPHA+=\u0386;"
|
|
"EPSILON+=\u0388;"
|
|
"ETA+=\u0389;"
|
|
"IOTA+=\u038A;"
|
|
"OMICRON+=\u038C;"
|
|
"YPSILON+=\u038E;"
|
|
"OMEGA+=\u038F;"
|
|
"IOTA_DIAERESIS=\u03AA;"
|
|
"YPSILON_DIAERESIS=\u03AB;"
|
|
|
|
"alpha=\u03B1;"
|
|
"beta=\u03B2;"
|
|
"gamma=\u03B3;"
|
|
"delta=\u03B4;"
|
|
"epsilon=\u03B5;"
|
|
"zeta=\u03B6;"
|
|
"eta=\u03B7;"
|
|
"theta=\u03B8;"
|
|
"iota=\u03B9;"
|
|
"kappa=\u03BA;"
|
|
"lambda=\u03BB;"
|
|
"mu=\u03BC;"
|
|
"nu=\u03BD;"
|
|
"xi=\u03BE;"
|
|
"omicron=\u03BF;"
|
|
"pi=\u03C0;"
|
|
"rho=\u03C1;"
|
|
"sigma=\u03C3;"
|
|
"tau=\u03C4;"
|
|
"ypsilon=\u03C5;"
|
|
"phi=\u03C6;"
|
|
"chi=\u03C7;"
|
|
"psi=\u03C8;"
|
|
"omega=\u03C9;"
|
|
|
|
//forms
|
|
|
|
"alpha+=\u03AC;"
|
|
"epsilon+=\u03AD;"
|
|
"eta+=\u03AE;"
|
|
"iota+=\u03AF;"
|
|
"omicron+=\u03CC;"
|
|
"ypsilon+=\u03CD;"
|
|
"omega+=\u03CE;"
|
|
"iota_diaeresis=\u03CA;"
|
|
"ypsilon_diaeresis=\u03CB;"
|
|
"iota_diaeresis+=\u0390;"
|
|
"ypsilon_diaeresis+=\u03B0;"
|
|
"sigma+=\u03C2;"
|
|
|
|
// Variables for conditional mappings
|
|
|
|
// Use lowercase for all variable names, to allow cut/paste below.
|
|
|
|
"letter=[~[:Lu:][:Ll:]];"
|
|
"lower=[[:Ll:]];"
|
|
"softener=[eiyEIY];"
|
|
"vowel=[aeiouAEIOU"
|
|
"{ALPHA}{EPSILON}{ETA}{IOTA}{OMICRON}{YPSILON}{OMEGA}"
|
|
"{ALPHA+}{EPSILON+}{ETA+}{IOTA+}{OMICRON+}{YPSILON+}{OMEGA+}"
|
|
"{IOTA_DIAERESIS}{YPSILON_DIAERESIS}"
|
|
"{alpha}{epsilon}{eta}{iota}{omicron}{ypsilon}{omega}"
|
|
"{alpha+}{epsilon+}{eta+}{iota+}{omicron+}{ypsilon+}{omega+}"
|
|
"{iota_diaeresis}{ypsilon_diaeresis}"
|
|
"{iota_diaeresis+}{ypsilon_diaeresis+}"
|
|
"];"
|
|
"n-gamma=[GKXCgkxc];"
|
|
"gamma-n=[{GAMMA}{KAPPA}{CHI}{XI}{gamma}{kappa}{chi}{xi}];"
|
|
"pp=[Pp];"
|
|
|
|
// ==============================================
|
|
// Rules
|
|
// ==============================================
|
|
// The following are special titlecases, and should
|
|
// not be copied when duplicating the lowercase
|
|
// ==============================================
|
|
|
|
"Th <> {THETA}({lower};"
|
|
"Ph <> {PHI}({lower};"
|
|
"Ch <> {CHI}({lower};"
|
|
//masked: + "Ps<{PHI}({lower};"
|
|
|
|
// Because there is no uppercase forms for final sigma,
|
|
// we had to move all the sigma rules up here.
|
|
|
|
// Remember to insert ' to preserve round trip, for double letters
|
|
// don't need to do this for the digraphs with h,
|
|
// since it is not created when mapping back from greek
|
|
|
|
// use special form for s
|
|
|
|
"''S <> ({pp}) {SIGMA} ;" // handle PS
|
|
"S <> {SIGMA};"
|
|
|
|
// The following are a bit tricky. 's' takes two forms in greek
|
|
// final or non final.
|
|
// We use ~s to represent the abnormal form: final before letter
|
|
// or non-final before non-letter.
|
|
// We use 's to separate p and s (otherwise ps is one letter)
|
|
// so, we break out the following forms:
|
|
|
|
"''s < ({pp}) {sigma} ({letter});"
|
|
"s < {sigma} ({letter});"
|
|
"~s < {sigma} ;"
|
|
|
|
"~s < {sigma+} ({letter});"
|
|
"''s < ({pp}) {sigma+} ;"
|
|
"s < {sigma+} ;"
|
|
|
|
"~s ({letter}) > {sigma+};"
|
|
"~s > {sigma};"
|
|
"''s ({letter}) > {sigma};"
|
|
"''s > {sigma+};"
|
|
"s ({letter}) > {sigma};"
|
|
"s > {sigma+};"
|
|
|
|
// because there are no uppercase forms, had to move these up too.
|
|
|
|
"i\"`>{iota_diaeresis+};"
|
|
"y\"`>{ypsilon_diaeresis+};"
|
|
|
|
"{\u00ef-acute} <> {iota_diaeresis+};"
|
|
"{\u00fc-acute} <> {vowel}){ypsilon_diaeresis+};"
|
|
"{y-umlaut-acute} <> {ypsilon_diaeresis+};"
|
|
|
|
// ==============================================
|
|
// Uppercase Forms.
|
|
// To make lowercase forms, just copy and lowercase below
|
|
// ==============================================
|
|
|
|
// Typing variants, in case the keyboard doesn't have accents
|
|
|
|
"A`>{ALPHA+};"
|
|
"E`>{EPSILON+};"
|
|
"EE`>{ETA+};"
|
|
"EE>{ETA};"
|
|
"I`>{IOTA+};"
|
|
"O`>{OMICRON+};"
|
|
"OO`>{OMEGA+};"
|
|
"OO>{OMEGA};"
|
|
"I\">{IOTA_DIAERESIS};"
|
|
"Y\">{YPSILON_DIAERESIS};"
|
|
|
|
// Basic Letters
|
|
|
|
"A<>{ALPHA};"
|
|
"\u00c1<>{ALPHA+};"
|
|
"B<>{BETA};"
|
|
"N ({n-gamma}) <> {GAMMA} ({gamma-n});"
|
|
"G<>{GAMMA};"
|
|
"D<>{DELTA};"
|
|
"''E <> ([Ee]){EPSILON};" // handle EE
|
|
"E<>{EPSILON};"
|
|
"\u00c9<>{EPSILON+};"
|
|
"Z<>{ZETA};"
|
|
"{E-MACRON-ACUTE}<>{ETA+};"
|
|
"{E-MACRON}<>{ETA};"
|
|
"TH<>{THETA};"
|
|
"I<>{IOTA};"
|
|
"\u00cd<>{IOTA+};"
|
|
"\u00cf<>{IOTA_DIAERESIS};"
|
|
"K<>{KAPPA};"
|
|
"L<>{LAMBDA};"
|
|
"M<>{MU};"
|
|
"N'' <> {NU} ({gamma-n});"
|
|
"N<>{NU};"
|
|
"X<>{XI};"
|
|
"''O <> ([Oo]) {OMICRON};" // handle OO
|
|
"O<>{OMICRON};"
|
|
"\u00d3<>{OMICRON+};"
|
|
"PH<>{PHI};" // needs ordering before P
|
|
"PS<>{PSI};" // needs ordering before P
|
|
"P<>{PI};"
|
|
"R<>{RHO};"
|
|
"T<>{TAU};"
|
|
"U <> ({vowel}) {YPSILON};"
|
|
"\u00da <> ({vowel}) {YPSILON+};"
|
|
"\u00dc <> ({vowel}) {YPSILON_DIAERESIS};"
|
|
"Y<>{YPSILON};"
|
|
"\u00dd<>{YPSILON+};"
|
|
"{Y-UMLAUT}<>{YPSILON_DIAERESIS};"
|
|
"CH<>{CHI};"
|
|
"{O-MACRON-ACUTE}<>{OMEGA+};"
|
|
"{O-MACRON}<>{OMEGA};"
|
|
|
|
// Extra English Letters. Mapped for completeness
|
|
|
|
"C({softener})>|S;"
|
|
"C>|K;"
|
|
"F>|PH;"
|
|
"H>|CH;"
|
|
"J>|I;"
|
|
"Q>|K;"
|
|
"V>|U;"
|
|
"W>|U;"
|
|
|
|
// ==============================================
|
|
// Lowercase Forms. Just copy above and lowercase
|
|
// ==============================================
|
|
|
|
// typing variants, in case the keyboard doesn't have accents
|
|
|
|
"a`>{alpha+};"
|
|
"e`>{epsilon+};"
|
|
"ee`>{eta+};"
|
|
"ee>{eta};"
|
|
"i`>{iota+};"
|
|
"o`>{omicron+};"
|
|
"oo`>{omega+};"
|
|
"oo>{omega};"
|
|
"i\">{iota_diaeresis};"
|
|
"y\">{ypsilon_diaeresis};"
|
|
|
|
// basic letters
|
|
|
|
"a<>{alpha};"
|
|
"\u00e1<>{alpha+};"
|
|
"b<>{beta};"
|
|
"n ({n-gamma}) <> {gamma} ({gamma-n});"
|
|
"g<>{gamma};"
|
|
"d<>{delta};"
|
|
"''e <> ([Ee]){epsilon};" // handle EE
|
|
"e<>{epsilon};"
|
|
"\u00e9<>{epsilon+};"
|
|
"z<>{zeta};"
|
|
"{e-macron-acute}<>{eta+};"
|
|
"{e-macron}<>{eta};"
|
|
"th<>{theta};"
|
|
"i<>{iota};"
|
|
"\u00ed<>{iota+};"
|
|
"\u00ef<>{iota_diaeresis};"
|
|
"k<>{kappa};"
|
|
"l<>{lambda};"
|
|
"m<>{mu};"
|
|
"n'' <> {nu} ({gamma-n});"
|
|
"n<>{nu};"
|
|
"x<>{xi};"
|
|
"''o <> ([Oo]) {omicron};" // handle OO
|
|
"o<>{omicron};"
|
|
"\u00f3<>{omicron+};"
|
|
"ph<>{phi};" // needs ordering before p
|
|
"ps<>{psi};" // needs ordering before p
|
|
"p<>{pi};"
|
|
"r<>{rho};"
|
|
"t<>{tau};"
|
|
"u <> ({vowel}){ypsilon};"
|
|
"\u00fa <> ({vowel}){ypsilon+};"
|
|
"\u00fc <> ({vowel}){ypsilon_diaeresis};"
|
|
"y<>{ypsilon};"
|
|
"\u00fd<>{ypsilon+};"
|
|
"{y-umlaut}<>{ypsilon_diaeresis};"
|
|
"ch<>{chi};"
|
|
"{o-macron-acute}<>{omega+};"
|
|
"{o-macron}<>{omega};"
|
|
|
|
// extra english letters. mapped for completeness
|
|
|
|
"c({softener})>|s;"
|
|
"c>|k;"
|
|
"f>|ph;"
|
|
"h>|ch;"
|
|
"j>|i;"
|
|
"q>|k;"
|
|
"v>|u;"
|
|
"w>|u;"
|
|
|
|
// ====================================
|
|
// Normal final rule: remove '
|
|
// ====================================
|
|
|
|
//+ "''>;"
|
|
}
|
|
}
|