//-------------------------------------------------------------------- // Copyright (C) 1999, International Business Machines // Corporation and others. All Rights Reserved. //-------------------------------------------------------------------- // Date Name Description // 11/17/99 aliu Creation. //-------------------------------------------------------------------- // Latin-Greek lgreek { Rule { // ============================================== // Modern Greek Transliteration Rules // // This transliterates modern Greek characters, but using rules // that are traditional for Ancient Greek, and // thus more resemble Greek words that have become part // of English. It differs from the official Greek // transliteration, which is more phonetic (since // most modern Greek vowels, for example, have // degenerated simply to sound like "ee"). // // There are only a few tricky parts. // 1. eta and omega don't map directly to Latin vowels, // so we use a macron on e and o, and some // other combinations if they are accented. // 2. The accented, diaeresis i and y are substituted too. // 3. Some letters use digraphs, like "ph". While typical, // they need some special handling. // 4. A gamma before a gamma or a few other letters is // transliterated as an "n", as in "Anglo" // 5. An ypsilon after a vowel is a "u", as in // "Mouseio". Otherwise it is a "y" as in "Physikon" // 6. The construction of the rules is made simpler by making sure // that most rules for lowercase letters exactly correspond to the // rules for uppercase letters, *except* for the case of the letters // in the rule itself. That way, after modifying the uppercase rules, // you can just copy, paste, and "set to lowercase" to get // the rules for lowercase letters! // ============================================== // ============================================== // Variables, used to make the rules more comprehensible // and for conditionals. // ============================================== // Latin Letters "E-MACRON=\u0112;" "e-macron=\u0113;" "O-MACRON=\u014C;" "o-macron=\u014D;" "Y-UMLAUT=\u0178;" "y-umlaut=\u00FF;" //! // with real accents. //! + "E-MACRON-ACUTE=\u0112\u0301;" //! + "e-macron-acute=\u0113\u0301;" //! + "O-MACRON-ACUTE=\u014C\u0301;" //! + "o-macron-acute=\u014D\u0301;" //! + "y-umlaut-acute=\u00FF\u0301;" //! + "\u00ef-acute=\u00ef\u0301;" //! + "\u00fc-acute=\u00fc\u0301;" //! // // single letter equivalents "E-MACRON-ACUTE=\u00CA;" "e-macron-acute=\u00EA;" "O-MACRON-ACUTE=\u00D4;" "o-macron-acute=\u00F4;" "y-umlaut-acute=\u0177;" "\u00ef-acute=\u00EE;" "\u00fc-acute=\u00FB;" // Greek Letters "ALPHA=\u0391;" "BETA=\u0392;" "GAMMA=\u0393;" "DELTA=\u0394;" "EPSILON=\u0395;" "ZETA=\u0396;" "ETA=\u0397;" "THETA=\u0398;" "IOTA=\u0399;" "KAPPA=\u039A;" "LAMBDA=\u039B;" "MU=\u039C;" "NU=\u039D;" "XI=\u039E;" "OMICRON=\u039F;" "PI=\u03A0;" "RHO=\u03A1;" "SIGMA=\u03A3;" "TAU=\u03A4;" "YPSILON=\u03A5;" "PHI=\u03A6;" "CHI=\u03A7;" "PSI=\u03A8;" "OMEGA=\u03A9;" "ALPHA+=\u0386;" "EPSILON+=\u0388;" "ETA+=\u0389;" "IOTA+=\u038A;" "OMICRON+=\u038C;" "YPSILON+=\u038E;" "OMEGA+=\u038F;" "IOTA_DIAERESIS=\u03AA;" "YPSILON_DIAERESIS=\u03AB;" "alpha=\u03B1;" "beta=\u03B2;" "gamma=\u03B3;" "delta=\u03B4;" "epsilon=\u03B5;" "zeta=\u03B6;" "eta=\u03B7;" "theta=\u03B8;" "iota=\u03B9;" "kappa=\u03BA;" "lambda=\u03BB;" "mu=\u03BC;" "nu=\u03BD;" "xi=\u03BE;" "omicron=\u03BF;" "pi=\u03C0;" "rho=\u03C1;" "sigma=\u03C3;" "tau=\u03C4;" "ypsilon=\u03C5;" "phi=\u03C6;" "chi=\u03C7;" "psi=\u03C8;" "omega=\u03C9;" //forms "alpha+=\u03AC;" "epsilon+=\u03AD;" "eta+=\u03AE;" "iota+=\u03AF;" "omicron+=\u03CC;" "ypsilon+=\u03CD;" "omega+=\u03CE;" "iota_diaeresis=\u03CA;" "ypsilon_diaeresis=\u03CB;" "iota_diaeresis+=\u0390;" "ypsilon_diaeresis+=\u03B0;" "sigma+=\u03C2;" // Variables for conditional mappings // Use lowercase for all variable names, to allow cut/paste below. "letter=[~[:Lu:][:Ll:]];" "lower=[[:Ll:]];" "softener=[eiyEIY];" "vowel=[aeiouAEIOU" "{ALPHA}{EPSILON}{ETA}{IOTA}{OMICRON}{YPSILON}{OMEGA}" "{ALPHA+}{EPSILON+}{ETA+}{IOTA+}{OMICRON+}{YPSILON+}{OMEGA+}" "{IOTA_DIAERESIS}{YPSILON_DIAERESIS}" "{alpha}{epsilon}{eta}{iota}{omicron}{ypsilon}{omega}" "{alpha+}{epsilon+}{eta+}{iota+}{omicron+}{ypsilon+}{omega+}" "{iota_diaeresis}{ypsilon_diaeresis}" "{iota_diaeresis+}{ypsilon_diaeresis+}" "];" "n-gamma=[GKXCgkxc];" "gamma-n=[{GAMMA}{KAPPA}{CHI}{XI}{gamma}{kappa}{chi}{xi}];" "pp=[Pp];" // ============================================== // Rules // ============================================== // The following are special titlecases, and should // not be copied when duplicating the lowercase // ============================================== "Th <> {THETA}({lower};" "Ph <> {PHI}({lower};" "Ch <> {CHI}({lower};" //masked: + "Ps<{PHI}({lower};" // Because there is no uppercase forms for final sigma, // we had to move all the sigma rules up here. // Remember to insert ' to preserve round trip, for double letters // don't need to do this for the digraphs with h, // since it is not created when mapping back from greek // use special form for s "''S <> ({pp}) {SIGMA} ;" // handle PS "S <> {SIGMA};" // The following are a bit tricky. 's' takes two forms in greek // final or non final. // We use ~s to represent the abnormal form: final before letter // or non-final before non-letter. // We use 's to separate p and s (otherwise ps is one letter) // so, we break out the following forms: "''s < ({pp}) {sigma} ({letter});" "s < {sigma} ({letter});" "~s < {sigma} ;" "~s < {sigma+} ({letter});" "''s < ({pp}) {sigma+} ;" "s < {sigma+} ;" "~s ({letter}) > {sigma+};" "~s > {sigma};" "''s ({letter}) > {sigma};" "''s > {sigma+};" "s ({letter}) > {sigma};" "s > {sigma+};" // because there are no uppercase forms, had to move these up too. "i\"`>{iota_diaeresis+};" "y\"`>{ypsilon_diaeresis+};" "{\u00ef-acute} <> {iota_diaeresis+};" "{\u00fc-acute} <> {vowel}){ypsilon_diaeresis+};" "{y-umlaut-acute} <> {ypsilon_diaeresis+};" // ============================================== // Uppercase Forms. // To make lowercase forms, just copy and lowercase below // ============================================== // Typing variants, in case the keyboard doesn't have accents "A`>{ALPHA+};" "E`>{EPSILON+};" "EE`>{ETA+};" "EE>{ETA};" "I`>{IOTA+};" "O`>{OMICRON+};" "OO`>{OMEGA+};" "OO>{OMEGA};" "I\">{IOTA_DIAERESIS};" "Y\">{YPSILON_DIAERESIS};" // Basic Letters "A<>{ALPHA};" "\u00c1<>{ALPHA+};" "B<>{BETA};" "N ({n-gamma}) <> {GAMMA} ({gamma-n});" "G<>{GAMMA};" "D<>{DELTA};" "''E <> ([Ee]){EPSILON};" // handle EE "E<>{EPSILON};" "\u00c9<>{EPSILON+};" "Z<>{ZETA};" "{E-MACRON-ACUTE}<>{ETA+};" "{E-MACRON}<>{ETA};" "TH<>{THETA};" "I<>{IOTA};" "\u00cd<>{IOTA+};" "\u00cf<>{IOTA_DIAERESIS};" "K<>{KAPPA};" "L<>{LAMBDA};" "M<>{MU};" "N'' <> {NU} ({gamma-n});" "N<>{NU};" "X<>{XI};" "''O <> ([Oo]) {OMICRON};" // handle OO "O<>{OMICRON};" "\u00d3<>{OMICRON+};" "PH<>{PHI};" // needs ordering before P "PS<>{PSI};" // needs ordering before P "P<>{PI};" "R<>{RHO};" "T<>{TAU};" "U <> ({vowel}) {YPSILON};" "\u00da <> ({vowel}) {YPSILON+};" "\u00dc <> ({vowel}) {YPSILON_DIAERESIS};" "Y<>{YPSILON};" "\u00dd<>{YPSILON+};" "{Y-UMLAUT}<>{YPSILON_DIAERESIS};" "CH<>{CHI};" "{O-MACRON-ACUTE}<>{OMEGA+};" "{O-MACRON}<>{OMEGA};" // Extra English Letters. Mapped for completeness "C({softener})>|S;" "C>|K;" "F>|PH;" "H>|CH;" "J>|I;" "Q>|K;" "V>|U;" "W>|U;" // ============================================== // Lowercase Forms. Just copy above and lowercase // ============================================== // typing variants, in case the keyboard doesn't have accents "a`>{alpha+};" "e`>{epsilon+};" "ee`>{eta+};" "ee>{eta};" "i`>{iota+};" "o`>{omicron+};" "oo`>{omega+};" "oo>{omega};" "i\">{iota_diaeresis};" "y\">{ypsilon_diaeresis};" // basic letters "a<>{alpha};" "\u00e1<>{alpha+};" "b<>{beta};" "n ({n-gamma}) <> {gamma} ({gamma-n});" "g<>{gamma};" "d<>{delta};" "''e <> ([Ee]){epsilon};" // handle EE "e<>{epsilon};" "\u00e9<>{epsilon+};" "z<>{zeta};" "{e-macron-acute}<>{eta+};" "{e-macron}<>{eta};" "th<>{theta};" "i<>{iota};" "\u00ed<>{iota+};" "\u00ef<>{iota_diaeresis};" "k<>{kappa};" "l<>{lambda};" "m<>{mu};" "n'' <> {nu} ({gamma-n});" "n<>{nu};" "x<>{xi};" "''o <> ([Oo]) {omicron};" // handle OO "o<>{omicron};" "\u00f3<>{omicron+};" "ph<>{phi};" // needs ordering before p "ps<>{psi};" // needs ordering before p "p<>{pi};" "r<>{rho};" "t<>{tau};" "u <> ({vowel}){ypsilon};" "\u00fa <> ({vowel}){ypsilon+};" "\u00fc <> ({vowel}){ypsilon_diaeresis};" "y<>{ypsilon};" "\u00fd<>{ypsilon+};" "{y-umlaut}<>{ypsilon_diaeresis};" "ch<>{chi};" "{o-macron-acute}<>{omega+};" "{o-macron}<>{omega};" // extra english letters. mapped for completeness "c({softener})>|s;" "c>|k;" "f>|ph;" "h>|ch;" "j>|i;" "q>|k;" "v>|u;" "w>|u;" // ==================================== // Normal final rule: remove ' // ==================================== //+ "''>;" } }