// -*- Coding: utf-8; -*- //-------------------------------------------------------------------- // Copyright (c) 1999-2002, International Business Machines // Corporation and others. All Rights Reserved. //-------------------------------------------------------------------- // THIS IS A MACHINE-GENERATED FILE // Tool: dumpicurules.bat // Source: ../../../impl/data/Transliterator_Arabic_Latin.txt // Date: Sat Jul 27 10:31:01 2002 //-------------------------------------------------------------------- // Arabic_Latin t_Arab_Latn { Rule { //-------------------------------------------------------------------- //-------------------------------------------------------------------- //-------------------------------------------------------------------- // Generally follows UNGEGN // Occasionally deviates in the direction of ISO 233 // a) where required for disambiguation. // b) with underdot instead of cedilla for letter like SAD, since // those are explicitly in Unicode for transliteration. // c) with extra non-Arabic-language letters, like PEH // Does *not* do assimilation of "al", nor hyphenation. // While it could be done, we need to determine whether a prefix "al" could // occur other than as the definite article (since no space is used). ":: [[:Arabic:] [‎ⁿ\u060C\u061B\u061F\u0640\u064B-\u0655\u0660-\u066C\u06F0-\u06F9\uFDFC]] ;" ":: NFKD (NFC);" "$disambig = ̱ ;" "$disambig2 = ̰ ;" "$under = ̣ ;" "$notAbove = [[:^ccc=0:]&[:^ccc=230:]];" // non-letters "٫ <> '.' $disambig ;" // ARABIC DECIMAL SEPARATOR "٬ <> ',' $disambig ;" // ARABIC THOUSANDS SEPARATOR // ٭ <> ; # ARABIC FIVE POINTED STAR // no need to transliterate "، <> ',' ;" // ARABIC COMMA "؛ <> ';' ;" // ARABIC SEMICOLON "؟ <> '?' ;" // ARABIC QUESTION MARK "٪ <> '%' ;" // ARABIC PERCENT SIGN "۰ <> 0 $disambig ;" // EXTENDED ARABIC-INDIC DIGIT ZERO "۱ <> 1 $disambig ;" // EXTENDED ARABIC-INDIC DIGIT ONE "۲ <> 2 $disambig ;" // EXTENDED ARABIC-INDIC DIGIT TWO "۳ <> 3 $disambig ;" // EXTENDED ARABIC-INDIC DIGIT THREE "۴ <> 4 $disambig ;" // EXTENDED ARABIC-INDIC DIGIT FOUR "۵ <> 5 $disambig ;" // EXTENDED ARABIC-INDIC DIGIT FIVE "۶ <> 6 $disambig ;" // EXTENDED ARABIC-INDIC DIGIT SIX "۷ <> 7 $disambig ;" // EXTENDED ARABIC-INDIC DIGIT SEVEN "۸ <> 8 $disambig ;" // EXTENDED ARABIC-INDIC DIGIT EIGHT "۹ <> 9 $disambig ;" // EXTENDED ARABIC-INDIC DIGIT NINE "٠ <> 0 ;" // ARABIC-INDIC DIGIT ZERO "١ <> 1 ;" // ARABIC-INDIC DIGIT ONE "٢ <> 2 ;" // ARABIC-INDIC DIGIT TWO "٣ <> 3 ;" // ARABIC-INDIC DIGIT THREE "٤ <> 4 ;" // ARABIC-INDIC DIGIT FOUR "٥ <> 5 ;" // ARABIC-INDIC DIGIT FIVE "٦ <> 6 ;" // ARABIC-INDIC DIGIT SIX "٧ <> 7 ;" // ARABIC-INDIC DIGIT SEVEN "٨ <> 8 ;" // ARABIC-INDIC DIGIT EIGHT "٩ <> 9 ;" // ARABIC-INDIC DIGIT NINE // letters // long vowels "َا<> ā ;" // ARABIC FATHA, ARABIC LETTER ALEF "ُو <> ū ;" // ARABIC DAMMA, ARABIC LETTER WAW "ِي <> ī ;" // ARABIC KASRA, ARABIC LETTER YEH // longer items moved here to prevent masking "ث <> t h $disambig ;" // ARABIC LETTER THEH "ذ <> d h $disambig ;" // ARABIC LETTER THAL "ش <> s h $disambig ;" // ARABIC LETTER SHEEN "ص <> s $under ;" // ARABIC LETTER SAD "ض <> d $under ;" // ARABIC LETTER DAD "ط <> t $under ;" // ARABIC LETTER TAH "ظ <> z $under ;" // ARABIC LETTER ZAH "غ <> g h $disambig ;" // ARABIC LETTER GHAIN // WARNING: special case // will be canonically ordered as // so on the return, we have to skip over (but preserve) the half-ring below (or others like it) // ةٕ < ẗ̹ ; # LATIN SMALL LETTER T, COMBINING RIGHT HALF RING BELOW, COMBINING DIAERESIS "ة <> t \u0308 ;" // ARABIC LETTER TEH MARBUTA "ة | $1 < t ($notAbove+) \u0308 ;" // ARABIC LETTER TEH MARBUTA // non-Arabic language "ژ <> z h $disambig ;" // ARABIC LETTER JEH "ڭ <> n $disambig g ;" // ARABIC LETTER NG "ۋ <> v $disambig ;" // ARABIC LETTER VE "ی <> y $disambig2 ;" // ARABIC LETTER FARSI YEH // Arabic language "ء <> ʾ ;" // ARABIC LETTER HAMZA "ا <> a $under;" // ARABIC LETTER ALEF "ب <> b ;" // ARABIC LETTER BEH "ت <> t ;" // ARABIC LETTER TEH "ج <> j ;" // ARABIC LETTER JEEM "ح <> h $under ;" // ARABIC LETTER HAH "خ <> k h $disambig ;" // ARABIC LETTER KHAH "د <> d ;" // ARABIC LETTER DAL "ر <> r ;" // ARABIC LETTER REH "ز <> z ;" // ARABIC LETTER ZAIN "س <> s ;" // ARABIC LETTER SEEN "ع <> ʿ ;" // ARABIC LETTER AIN "ـ > ;" // ARABIC TATWEEL "ف <> f ;" // ARABIC LETTER FEH "ق <> q ;" // ARABIC LETTER QAF "ك <> k ;" // ARABIC LETTER KAF "ل <> l ;" // ARABIC LETTER LAM "م <> m ;" // ARABIC LETTER MEEM "ن <> n ;" // ARABIC LETTER NOON "ه <> h ;" // ARABIC LETTER HEH "و <> w ;" // ARABIC LETTER WAW "ى <> y $disambig ;" // ARABIC LETTER ALEF MAKSURA "ي <> y ;" // ARABIC LETTER YEH "ً <> aⁿ ;" // ARABIC FATHATAN "ٌ <> uⁿ ;" // ARABIC DAMMATAN "ٍ <> iⁿ ;" // ARABIC KASRATAN "َ <> a ;" // ARABIC FATHA "ُ <> u ;" // ARABIC DAMMA "ِ <> i ;" // ARABIC KASRA "ّ <> ̃ ;" // ARABIC SHADDA "ْ <> ̊ ;" // ARABIC SUKUN // special combining marks "ٓ <> ̂ ;" // ARABIC MADDAH ABOVE "ٔ <> ̉ ;" // ARABIC HAMZA ABOVE "ٕ <> ̹ ;" // ARABIC HAMZA BELOW // Some non-Arabic language (not in UNGEGN) "پ <> p ;" // ARABIC LETTER PEH "چ <> c h $disambig ;" // ARABIC LETTER TCHEH "ڤ <> v ;" // ARABIC LETTER VEH // ڥ <> v $disambig ; # ARABIC LETTER FEH WITH THREE DOTS BELOW // ڢ <> f $disambig ; # ARABIC LETTER FEH WITH DOT MOVED BELOW "گ <> g ;" // ARABIC LETTER GAF // fallbacks "| s < c } [eiy];" "| k < c ;" "| i < e ;" "| u < o ;" "| ks < x ;" "| n < ‎ⁿ;" ":: (lower) ;" "::NFC (NFD);" ":: ( [[:Latin:] [%,.0-9;?\u02BE-\u02BF\u0302-\u0304\u0308-\u030A\u0323\u0330-\u0331\u0339\u037E]] );" } }