diff --git a/icu4c/data/translit_Bengali_InterIndic.txt b/icu4c/data/translit_Bengali_InterIndic.txt index 87f5f2848a..75ed4c6ab1 100644 --- a/icu4c/data/translit_Bengali_InterIndic.txt +++ b/icu4c/data/translit_Bengali_InterIndic.txt @@ -6,7 +6,7 @@ // THIS IS A MACHINE-GENERATED FILE // Tool: dumpICUrules.bat // Source: ../../text/resources/Transliterator_Bengali_InterIndic.txt -// Date: Thu Oct 25 22:17:21 2001 +// Date: Sat Nov 10 17:25:41 2001 //-------------------------------------------------------------------- // Bengali_InterIndic @@ -104,8 +104,8 @@ translit_Bengali_InterIndic { "\u09ed>\ue06d;" // DIGIT SEVEN "\u09ee>\ue06e;" // DIGIT EIGHT "\u09ef>\ue06f;" // DIGIT NINE -// \u09f0>; # UNMAPPED Bengali-InterIndic: LETTER RA WITH MIDDLE DIAGONAL -// \u09f1>; # UNMAPPED Bengali-InterIndic: LETTER RA WITH LOWER DIAGONAL +"\u09f0>\ue0f0;" // UNMAPPED Bengali-InterIndic: LETTER RA WITH MIDDLE DIAGONAL +"\u09f1>\ue0f1;" // UNMAPPED Bengali-InterIndic: LETTER RA WITH LOWER DIAGONAL // \u09f2>; # UNMAPPED Bengali-InterIndic: RUPEE MARK // \u09f3>; # UNMAPPED Bengali-InterIndic: RUPEE SIGN // \u09f4>; # UNMAPPED Bengali-InterIndic: CURRENCY NUMERATOR ONE diff --git a/icu4c/data/translit_Devanagari_InterIndic.txt b/icu4c/data/translit_Devanagari_InterIndic.txt index 9e05f58512..bdfd011671 100644 --- a/icu4c/data/translit_Devanagari_InterIndic.txt +++ b/icu4c/data/translit_Devanagari_InterIndic.txt @@ -6,7 +6,7 @@ // THIS IS A MACHINE-GENERATED FILE // Tool: dumpICUrules.bat // Source: ../../text/resources/Transliterator_Devanagari_InterIndic.txt -// Date: Thu Oct 25 22:17:21 2001 +// Date: Sat Nov 10 17:25:41 2001 //-------------------------------------------------------------------- // Devanagari_InterIndic @@ -50,11 +50,11 @@ translit_Devanagari_InterIndic { "\u090b>\ue00b;" // LETTER VOCALIC R "\u090c>\ue00c;" // LETTER VOCALIC L "\u090d>\ue00d;" // LETTER CANDRA E (For representing English sounds) -//\u090e>\ue00e; # UNMAPPED LETTER SHORT E(For Southern Scripts) + "\u090e>\ue00e;" // UNMAPPED LETTER SHORT E(For Southern Scripts) "\u090f>\ue00f;" // LETTER E "\u0910>\ue010;" // LETTER AI "\u0911>\ue011;" // LETTER CANDRA O (For representing English sounds) -//\u0912>\ue012; # UNMAPPED LETTER SHORT O (For Southern Scripts) + "\u0912>\ue012;" // UNMAPPED LETTER SHORT O (For Southern Scripts) "\u0913>\ue013;" // LETTER O "\u0914>\ue014;" // LETTER AU "\u0915>\ue015;" // LETTER KA @@ -85,10 +85,10 @@ translit_Devanagari_InterIndic { "\u092e>\ue02e;" // LETTER MA "\u092f>\ue02f;" // LETTER YA "\u0930>\ue030;" // LETTER RA - //\u0931>\ue031; # UNMAPPED LETTER RRA (Eyelash RA for Southern scripts) + "\u0931>\ue031;" // UNMAPPED LETTER RRA (Eyelash RA for Southern scripts) "\u0932>\ue032;" // LETTER LA "\u0933>\ue033;" // LETTER LLA - //\u0934>\ue034; # UNMAPPED LETTER LLLA (LLLA for Southern scripts) + "\u0934>\ue034;" // UNMAPPED LETTER LLLA (LLLA for Southern scripts) "\u0935>\ue035;" // LETTER VA "\u0936>\ue036;" // LETTER SHA "\u0937>\ue037;" // LETTER SSA @@ -104,11 +104,11 @@ translit_Devanagari_InterIndic { "\u0943>\ue043;" // VOWEL SIGN VOCALIC R "\u0944>\ue044;" // VOWEL SIGN VOCALIC RR "\u0945>\ue045;" // VOWEL SIGN CANDRA E -//\u0946>\ue046; # UNMAPPED VOWEL SIGN SHORT E + "\u0946>\ue046;" // UNMAPPED VOWEL SIGN SHORT E "\u0947>\ue047;" // VOWEL SIGN E "\u0948>\ue048;" // VOWEL SIGN AI "\u0949>\ue049;" // VOWEL SIGN CANDRA O -//\u094a>\ue04a; # UNMAPPED VOWEL SIGN SHORT O + "\u094a>\ue04a;" // UNMAPPED VOWEL SIGN SHORT O "\u094b>\ue04b;" // VOWEL SIGN O "\u094c>\ue04c;" // VOWEL SIGN AU "\u094d>\ue04d;" // SIGN VIRAMA diff --git a/icu4c/data/translit_Gujarati_InterIndic.txt b/icu4c/data/translit_Gujarati_InterIndic.txt index 4a9caaee81..1de781768a 100644 --- a/icu4c/data/translit_Gujarati_InterIndic.txt +++ b/icu4c/data/translit_Gujarati_InterIndic.txt @@ -6,7 +6,7 @@ // THIS IS A MACHINE-GENERATED FILE // Tool: dumpICUrules.bat // Source: ../../text/resources/Transliterator_Gujarati_InterIndic.txt -// Date: Thu Oct 25 22:17:21 2001 +// Date: Sat Nov 10 17:25:41 2001 //-------------------------------------------------------------------- // Gujarati_InterIndic @@ -41,10 +41,10 @@ translit_Gujarati_InterIndic { "\u0a89>\ue009;" // LETTER U "\u0a8a>\ue00a;" // LETTER UU "\u0a8b>\ue00b;" // LETTER VOCALIC R -// \u0a8d>; # UNMAPPED Gujarati-InterIndic: VOWEL CANDRA E +"\u0a8d>\ue00d;" // UNMAPPED Gujarati-InterIndic: VOWEL CANDRA E "\u0a8f>\ue00f;" // LETTER E "\u0a90>\ue010;" // LETTER AI -// \u0a91>; # UNMAPPED Gujarati-InterIndic: VOWEL CANDRA O +"\u0a91>\ue011;" // UNMAPPED Gujarati-InterIndic: VOWEL CANDRA O "\u0a93>\ue013;" // LETTER O "\u0a94>\ue014;" // LETTER AU "\u0a95>\ue015;" // LETTER KA diff --git a/icu4c/data/translit_Gurmukhi_InterIndic.txt b/icu4c/data/translit_Gurmukhi_InterIndic.txt index 7033480bca..c9b3748c07 100644 --- a/icu4c/data/translit_Gurmukhi_InterIndic.txt +++ b/icu4c/data/translit_Gurmukhi_InterIndic.txt @@ -6,7 +6,7 @@ // THIS IS A MACHINE-GENERATED FILE // Tool: dumpICUrules.bat // Source: ../../text/resources/Transliterator_Gurmukhi_InterIndic.txt -// Date: Thu Oct 25 22:17:21 2001 +// Date: Sat Nov 10 17:25:41 2001 //-------------------------------------------------------------------- // Gurmukhi_InterIndic @@ -101,11 +101,11 @@ translit_Gurmukhi_InterIndic { "\u0a6d>\ue06d;" // DIGIT SEVEN "\u0a6e>\ue06e;" // DIGIT EIGHT "\u0a6f>\ue06f;" // DIGIT NINE -// \u0a70>; # UNMAPPED Gurmukhi-InterIndic: TIPPI -// \u0a71>; # UNMAPPED Gurmukhi-InterIndic: ADDAK -// \u0a72>; # UNMAPPED Gurmukhi-InterIndic: IRI -// \u0a73>; # UNMAPPED Gurmukhi-InterIndic: URA -// \u0a74>; # UNMAPPED Gurmukhi-InterIndic: EK ONKAR +"\u0a70>\ue070;" // TIPPI +"\u0a71>\ue071;" // ADDAK +"\u0a72>\ue072;" // IRI +"\u0a73>\ue073;" // URA +"\u0a74>\ue074;" // EK ONKAR // :: NFC (NFD) ; // eof } diff --git a/icu4c/data/translit_InterIndic_Bengali.txt b/icu4c/data/translit_InterIndic_Bengali.txt index 261ca20648..d50509dfd5 100644 --- a/icu4c/data/translit_InterIndic_Bengali.txt +++ b/icu4c/data/translit_InterIndic_Bengali.txt @@ -6,7 +6,7 @@ // THIS IS A MACHINE-GENERATED FILE // Tool: dumpICUrules.bat // Source: ../../text/resources/Transliterator_InterIndic_Bengali.txt -// Date: Thu Oct 25 22:17:21 2001 +// Date: Sat Nov 10 17:25:41 2001 //-------------------------------------------------------------------- // InterIndic_Bengali @@ -42,9 +42,13 @@ translit_InterIndic_Bengali { "\ue00a>\u098a;" // LETTER UU "\ue00b>\u098b;" // LETTER VOCALIC R "\ue00c>\u098c;" // LETTER VOCALIC L -// \ue00f>; # UNMAPPED InterIndic-Bengali: LETTER EE (\u098f = LETTER E) +"\ue00d>\u098f;" // FALLBACK +"\ue00e>\u098f;" // FALLBACK +"\ue00f>\u098f;" // LETTER E "\ue010>\u0990;" // LETTER AI -// \ue013>; # UNMAPPED InterIndic-Bengali: LETTER OO (\u0993 = LETTER O) +"\ue011>\u0993;" // FALLBACK +"\ue012>\u0993;" // FALLBACK +"\ue013>\u0993;" // LETTER O "\ue014>\u0994;" // LETTER AU "\ue015>\u0995;" // LETTER KA "\ue016>\u0996;" // LETTER KHA @@ -74,6 +78,7 @@ translit_InterIndic_Bengali { "\ue02e>\u09ae;" // LETTER MA "\ue02f>\u09af;" // LETTER YA "\ue030>\u09b0;" // LETTER RA +"\ue031>\u09b0\u09bc;" // FALLBACK to RA "\ue032>\u09b2;" // LETTER LA "\ue033>\u09b2;" // REMAP (indicExceptions.txt): \u09b3>\u09b2 = LETTER LLA>LETTER LA "\ue034>\u09b2;" // REMAP (indicExceptions.txt): \u09b4>\u09b2 = LETTER LLLA>LETTER LA @@ -83,7 +88,7 @@ translit_InterIndic_Bengali { "\ue038>\u09b8;" // LETTER SA "\ue039>\u09b9;" // LETTER HA "\ue03c>\u09bc;" // SIGN NUKTA -// \ue03d>; # UNMAPPED InterIndic-Bengali: SIGN AVAGRAHA +"\ue03d>;" // FALLBACK BLOW AWAY AVAGRAHA "\ue03e>\u09be;" // VOWEL SIGN AA "\ue03f>\u09bf;" // VOWEL SIGN I "\ue040>\u09c0;" // VOWEL SIGN II @@ -92,19 +97,23 @@ translit_InterIndic_Bengali { "\ue043>\u09c3;" // VOWEL SIGN VOCALIC R "\ue044>\u09c4;" // VOWEL SIGN VOCALIC RR "\ue045>\u09c7;" // REMAP (indicExceptions.txt): \u09c5>\u09c7 = VOWEL SIGN CANDRA E>VOWEL SIGN E -// \ue047>; # UNMAPPED InterIndic-Bengali: VOWEL SIGN EE (\u09c7 = VOWEL SIGN E) +"\ue046>\u09c7;" // FALLBACK +"\ue047>\u09c7;" // VOWEL SIGN E "\ue048>\u09c8;" // VOWEL SIGN AI "\ue049>\u09cb;" // REMAP (indicExceptions.txt): \u09c9>\u09cb = VOWEL SIGN CANDRA O>VOWEL SIGN O -// \ue04b>; # UNMAPPED InterIndic-Bengali: VOWEL SIGN OO (\u09cb = VOWEL SIGN O) +"\ue04a>\u09cb;" // FALLBACK +"\ue04b>\u09cb;" // VOWEL SIGN O "\ue04c>\u09cc;" // VOWEL SIGN AU "\ue04d>\u09cd;" // SIGN VIRAMA -// \ue050>; # UNMAPPED InterIndic-Bengali: OM +"\ue050>\u0993\u0982;" // InterIndic-Bengali: OM // \ue055>; # UNMAPPED InterIndic-Bengali: LENGTH MARK "\ue056>\u09c8;" // REMAP (indicExceptions.txt): \u09d6>\u09c8 = AI LENGTH MARK>VOWEL SIGN AI "\ue057>\u09d7;" // AU LENGTH MARK +"\ue058>\u0995;" // FALLBACK "\ue059>\u0996;" // REMAP (indicExceptions.txt): \u09d9>\u0996 = LETTER KHHA>LETTER KHA "\ue05a>\u0997;" // REMAP (indicExceptions.txt): \u09da>\u0997 = LETTER GHHA>LETTER GA "\ue05b>\u099c;" // REMAP (indicExceptions.txt): \u09db>\u099c = LETTER ZA>LETTER JA +"\ue05c>\u09Dc;" // FALLBACK "\ue05d>\u09dd;" // LETTER RHA "\ue05e>\u09ab;" // REMAP (indicExceptions.txt): \u09de>\u09ab = LETTER FA>LETTER PHA "\ue05f>\u09df;" // LETTER YYA @@ -123,11 +132,9 @@ translit_InterIndic_Bengali { "\ue06e>\u09ee;" // DIGIT EIGHT "\ue06f>\u09ef;" // DIGIT NINE "\ue0fa>\u09fa;" // ISSHAR -"\ue00f>\u098f;" // LETTER E -"\ue013>\u0993;" // LETTER O -"\ue031>\u09dc;" // LETTER RRA -"\ue047>\u09c7;" // VOWEL SIGN E -"\ue04b>\u09cb;" // VOWEL SIGN O + + + // :: NFC (NFD) ; // eof } diff --git a/icu4c/data/translit_InterIndic_Devanagari.txt b/icu4c/data/translit_InterIndic_Devanagari.txt index a38a82090d..140c6bd01a 100644 --- a/icu4c/data/translit_InterIndic_Devanagari.txt +++ b/icu4c/data/translit_InterIndic_Devanagari.txt @@ -6,7 +6,7 @@ // THIS IS A MACHINE-GENERATED FILE // Tool: dumpICUrules.bat // Source: ../../text/resources/Transliterator_InterIndic_Devanagari.txt -// Date: Thu Oct 25 22:17:21 2001 +// Date: Sat Nov 10 17:25:41 2001 //-------------------------------------------------------------------- // InterIndic_Devanagari @@ -43,6 +43,25 @@ translit_InterIndic_Devanagari { "\ue022\ue03c > \u095d;" //\ue05d LETTER RHA (pronounced RRHA) "\ue02b\ue03c > \u095e;" //\ue05e LETTER FA "\ue02f\ue03c > \u095f;" //\ue05f LETTER YYA + + //Decomposed compatibility transliterations + "\ue012\ue057>\u0914;" // FALLBACK FOR TAMIL AU + "\u0030 > \u0966;" // FALLBACK FOR TAMIL + + "\ue046\ue056 > \u0948;" // FALLBACK FOR TELUGU + "\ue055>;" // FALLBACK BLOW AWAY KANNADA AND TELUGU LENGTH MARK + "\ue056>;" // FALLBACK BLOW AWAY KANNADA AND TELUGU AI LENGTH MARK + "\ue057>;" // FALLBACK BLOW AWAY TAMIL AU LENGTH MARK + "\ue070>;" // FALLBACK BLOW AWAY GURMUKHI + "\ue071>;" // FALLBACK BLOW AWAY GURMUKHI + "\ue072>;" // FALLBACK BLOW AWAY GURMUKHI + "\ue073>;" // FALLBACK BLOW AWAY GURMUKHI + "\ue074>;" // FALLBACK BLOW AWAY GURMUKHI + + //Decomposed compatibility transliterations + "\ue032\ue03C > \u0933;" // FALLBACK FOR GURMUKHI + "\ue038\ue03c > \u0936;" // FALLBACK FOR GURMUKHI + "\ue001 > \u0901;" // SIGN CANDRABINDU "\ue002 > \u0902;" // SIGN ANUSVARA "\ue003 > \u0903;" // SIGN VISARGA @@ -55,13 +74,13 @@ translit_InterIndic_Devanagari { "\ue00b > \u090b;" // LETTER VOCALIC R "\ue00c > \u090c;" // LETTER VOCALIC L "\ue00d > \u090d;" // LETTER CANDRA E (For representing English sounds) -//\ue00e > \u090e; # UNMAPPED LETTER SHORT E(For Southern Scripts) - "\ue00e > \u090f;" + "\ue00e > \u090e;" // UNMAPPED LETTER SHORT E(For Southern Scripts) + //\ue00e > \u090f; "\ue00f > \u090f;" // LETTER E "\ue010 > \u0910;" // LETTER AI "\ue011 > \u0911;" // LETTER CANDRA O (For representing English sounds) -//\ue012 > \u0912; # UNMAPPED LETTER SHORT O (For Southern Scripts) - "\ue012 > \u0913;" + "\ue012 > \u0912;" // UNMAPPED LETTER SHORT O (For Southern Scripts) + //\ue012 > \u0913; "\ue013 > \u0913;" // LETTER O "\ue014 > \u0914;" // LETTER AU "\ue015 > \u0915;" // LETTER KA @@ -92,12 +111,12 @@ translit_InterIndic_Devanagari { "\ue02e > \u092e;" // LETTER MA "\ue02f > \u092f;" // LETTER YA "\ue030 > \u0930;" // LETTER RA - //\ue031 > \u0931; # LETTER RRA (Eyelash RA for Southern scripts) - "\ue031 > \u0930;" + "\ue031 > \u0931;" // LETTER RRA (Eyelash RA for Southern scripts) + //\ue031 > \u0930; "\ue032 > \u0932;" // LETTER LA "\ue033 > \u0933;" // LETTER LLA - //\ue034 > \u0934; # LETTER LLLA (LLLA for Southern scripts) - "\ue034 > \u0933;" + "\ue034 > \u0934;" // LETTER LLLA (LLLA for Southern scripts) + //\ue034 > \u0933; "\ue035 > \u0935;" // LETTER VA "\ue036 > \u0936;" // LETTER SHA "\ue037 > \u0937;" // LETTER SSA @@ -113,13 +132,13 @@ translit_InterIndic_Devanagari { "\ue043 > \u0943;" // VOWEL SIGN VOCALIC R "\ue044 > \u0944;" // VOWEL SIGN VOCALIC RR "\ue045 > \u0945;" // VOWEL SIGN CANDRA E -//\ue046 > \u0946; # UNMAPPED VOWEL SIGN SHORT E - "\ue046 > \u0947;" + "\ue046 > \u0946;" // UNMAPPED VOWEL SIGN SHORT E + //\ue046 > \u0947; "\ue047 > \u0947;" // VOWEL SIGN E "\ue048 > \u0948;" // VOWEL SIGN AI "\ue049 > \u0949;" // VOWEL SIGN CANDRA O -//\ue04a > \u094a; # UNMAPPED VOWEL SIGN SHORT O - "\ue04a > \u094b;" + "\ue04a > \u094a;" // UNMAPPED VOWEL SIGN SHORT O + //\ue04a > \u094b; "\ue04b > \u094b;" // VOWEL SIGN O "\ue04c > \u094c;" // VOWEL SIGN AU "\ue04d > \u094d;" // SIGN VIRAMA @@ -140,8 +159,8 @@ translit_InterIndic_Devanagari { "\ue061 > \u0961;" // LETTER VOCALIC LL "\ue062 > \u0962;" // VOWEL SIGN VOCALIC L "\ue063 > \u0963;" // VOWEL SIGN VOCALIC LL -// > ; \u0964 # UNMAPPED Devanagari-InterIndic: DANDA -// > ; \u0965 # UNMAPPED Devanagari-InterIndic: DOUBLE DANDA + "\ue064 > \u0964;" // UNMAPPED Devanagari-InterIndic: DANDA + "\ue065 > \u0965;" // UNMAPPED Devanagari-InterIndic: DOUBLE DANDA "\ue066 > \u0966;" // DIGIT ZERO "\ue067 > \u0967;" // DIGIT ONE "\ue068 > \u0968;" // DIGIT TWO @@ -152,6 +171,10 @@ translit_InterIndic_Devanagari { "\ue06d > \u096d;" // DIGIT SEVEN "\ue06e > \u096e;" // DIGIT EIGHT "\ue06f > \u096f;" // DIGIT NINE + "\ue0f0 > \u0930;" // FALLBACK RA + "\ue0f1 > \u0930;" // FALLBACK RA + + // \u0970 # UNMAPPED Devanagari-InterIndic: ABBREVIATION SIGN // :: NFC; // eof diff --git a/icu4c/data/translit_InterIndic_Gujarati.txt b/icu4c/data/translit_InterIndic_Gujarati.txt index 25789ce7e1..142c2f092d 100644 --- a/icu4c/data/translit_InterIndic_Gujarati.txt +++ b/icu4c/data/translit_InterIndic_Gujarati.txt @@ -6,7 +6,7 @@ // THIS IS A MACHINE-GENERATED FILE // Tool: dumpICUrules.bat // Source: ../../text/resources/Transliterator_InterIndic_Gujarati.txt -// Date: Thu Oct 25 22:17:21 2001 +// Date: Sat Nov 10 17:25:41 2001 //-------------------------------------------------------------------- // InterIndic_Gujarati @@ -42,9 +42,13 @@ translit_InterIndic_Gujarati { "\ue00a>\u0a8a;" // LETTER UU "\ue00b>\u0a8b;" // LETTER VOCALIC R "\ue00c>\u0ab2\u0ac3;" // REMAP (indicExceptions.txt): \u0a8c>\u0ab2\u0ac3 = LETTER VOCALIC L>LETTER LA.VOWEL SIGN VOCALIC R -// \ue00f>; # UNMAPPED InterIndic-Gujarati: LETTER EE (\u0a8f = LETTER E) +"\ue00d>\u0a8d;" // GUJARATI VOWEL CANDRA E +"\ue00e>\u0a8f;" // FALLBACK +"\ue00f>\u0a8f;" // InterIndic-Gujarati: LETTER EE (\u0a8f = LETTER E) "\ue010>\u0a90;" // LETTER AI -// \ue013>; # UNMAPPED InterIndic-Gujarati: LETTER OO (\u0a93 = LETTER O) +"\ue011>\u0a91;" // FALLBACK +"\ue012>\u0a93;" // FALLBACK +"\ue013>\u0a93;" // UNMAPPED InterIndic-Gujarati: LETTER OO (\u0a93 = LETTER O) "\ue014>\u0a94;" // LETTER AU "\ue015>\u0a95;" // LETTER KA "\ue016>\u0a96;" // LETTER KHA @@ -66,7 +70,7 @@ translit_InterIndic_Gujarati { "\ue026>\u0aa6;" // LETTER DA "\ue027>\u0aa7;" // LETTER DHA "\ue028>\u0aa8;" // LETTER NA -"\ue029>\u0aa8;" // REMAP (indicExceptions.txt): \u0aa9>\u0aa8 = LETTER NNNA>LETTER NA +"\ue029>\u0aa8\u0abc;" // FALLBACK to NA+NUKTA "\ue02a>\u0aaa;" // LETTER PA "\ue02b>\u0aab;" // LETTER PHA "\ue02c>\u0aac;" // LETTER BA @@ -74,9 +78,10 @@ translit_InterIndic_Gujarati { "\ue02e>\u0aae;" // LETTER MA "\ue02f>\u0aaf;" // LETTER YA "\ue030>\u0ab0;" // LETTER RA +"\ue031>\u0ab0\u0abc;" // FALLBACK "\ue032>\u0ab2;" // LETTER LA "\ue033>\u0ab3;" // LETTER LLA -"\ue034>\u0ab3;" // REMAP (indicExceptions.txt): \u0ab4>\u0ab3 = LETTER LLLA>LETTER LLA +"\ue034>\u0ab3\u0abc;" // LETTER LLLA>LETTER LLA+NUKTA "\ue035>\u0ab5;" // LETTER VA "\ue036>\u0ab6;" // LETTER SHA "\ue037>\u0ab7;" // LETTER SSA @@ -92,19 +97,23 @@ translit_InterIndic_Gujarati { "\ue043>\u0ac3;" // VOWEL SIGN VOCALIC R "\ue044>\u0ac4;" // VOWEL SIGN VOCALIC RR "\ue045>\u0ac5;" // VOWEL SIGN CANDRA E -// \ue047>; # UNMAPPED InterIndic-Gujarati: VOWEL SIGN EE (\u0ac7 = VOWEL SIGN E) +"\ue046>\u0ac7;" // FALLBACK +"\ue047>\u0ac7;" // InterIndic-Gujarati: VOWEL SIGN EE (\u0ac7 = VOWEL SIGN E) "\ue048>\u0ac8;" // VOWEL SIGN AI "\ue049>\u0ac9;" // VOWEL SIGN CANDRA O -// \ue04b>; # UNMAPPED InterIndic-Gujarati: VOWEL SIGN OO (\u0acb = VOWEL SIGN O) +"\ue04a>\u0acb;" // FALLBACK +"\ue04b>\u0acb;" // UNMAPPED InterIndic-Gujarati: VOWEL SIGN OO (\u0acb = VOWEL SIGN O) "\ue04c>\u0acc;" // VOWEL SIGN AU "\ue04d>\u0acd;" // SIGN VIRAMA "\ue050>\u0ad0;" // OM -// \ue055>; # UNMAPPED InterIndic-Gujarati: LENGTH MARK +//\ue055>; # UNMAPPED InterIndic-Gujarati: LENGTH MARK "\ue056>\u0ac8;" // REMAP (indicExceptions.txt): \u0ad6>\u0ac8 = AI LENGTH MARK>VOWEL SIGN AI "\ue057>\u0acc;" // REMAP (indicExceptions.txt): \u0ad7>\u0acc = AU LENGTH MARK>VOWEL SIGN AU +"\ue058>\u0a95\u0abc;" // FALLBACK "\ue059>\u0a96\u0abc;" // REMAP (indicExceptions.txt): \u0ad9>\u0a96\u0abc = LETTER KHHA>LETTER KHA.SIGN NUKTA "\ue05a>\u0a97\u0abc;" // REMAP (indicExceptions.txt): \u0ada>\u0a97\u0abc = LETTER GHHA>LETTER GA.SIGN NUKTA "\ue05b>\u0a9c\u0abc;" // REMAP (indicExceptions.txt): \u0adb>\u0a9c\u0abc = LETTER ZA>LETTER JA.SIGN NUKTA +"\ue05c>\u0aa1\u0abc;" // FALLBACK "\ue05d>\u0aa2\u0abc;" // REMAP (indicExceptions.txt): \u0add>\u0aa2\u0abc = LETTER RHA>LETTER DDHA.SIGN NUKTA "\ue05e>\u0aab\u0abc;" // REMAP (indicExceptions.txt): \u0ade>\u0aab\u0abc = LETTER FA>LETTER PHA.SIGN NUKTA "\ue05f>\u0aaf\u0abc;" // REMAP (indicExceptions.txt): \u0adf>\u0aaf\u0abc = LETTER YYA>LETTER YA.SIGN NUKTA @@ -122,12 +131,7 @@ translit_InterIndic_Gujarati { "\ue06d>\u0aed;" // DIGIT SEVEN "\ue06e>\u0aee;" // DIGIT EIGHT "\ue06f>\u0aef;" // DIGIT NINE -// \ue080>; # UNMAPPED InterIndic-Gujarati: ISSHAR -"\ue00f>\u0a8f;" // LETTER E -"\ue013>\u0a93;" // LETTER O -// \ue083>; # UNMAPPED InterIndic-Gujarati: LETTER RRA (\u0a83 = SIGN VISARGA) -"\ue047>\u0ac7;" // VOWEL SIGN E -"\ue04b>\u0acb;" // VOWEL SIGN O +//\ue080>; # UNMAPPED InterIndic-Gujarati: ISSHAR // :: NFC (NFD) ; // eof } diff --git a/icu4c/data/translit_InterIndic_Gurmukhi.txt b/icu4c/data/translit_InterIndic_Gurmukhi.txt index e6efc08469..a01bc99e15 100644 --- a/icu4c/data/translit_InterIndic_Gurmukhi.txt +++ b/icu4c/data/translit_InterIndic_Gurmukhi.txt @@ -6,7 +6,7 @@ // THIS IS A MACHINE-GENERATED FILE // Tool: dumpICUrules.bat // Source: ../../text/resources/Transliterator_InterIndic_Gurmukhi.txt -// Date: Thu Oct 25 22:17:21 2001 +// Date: Sat Nov 10 17:25:41 2001 //-------------------------------------------------------------------- // InterIndic_Gurmukhi @@ -32,8 +32,8 @@ translit_InterIndic_Gurmukhi { // InterIndic-Gurmukhi //:: NFD (NFC) ; "\ue001>\u0a02;" // REMAP (indicExceptions.txt): \u0a01>\u0a02 = SIGN CANDRABINDU>SIGN BINDI -// \ue002>; # UNMAPPED InterIndic-Gurmukhi: SIGN ANUSVARA (\u0a02 = SIGN BINDI) -// \ue003>; # UNMAPPED InterIndic-Gurmukhi: SIGN VISARGA +"\ue002>;" // FALLBACK BLOW AWAY SIGN ANUSVARA (\u0a02 = SIGN BINDI) +"\ue003>;" // FALLBACK BLOW AWAY SIGN VISARGA "\ue005>\u0a05;" // LETTER A "\ue006>\u0a06;" // LETTER AA "\ue007>\u0a07;" // LETTER I @@ -41,9 +41,13 @@ translit_InterIndic_Gurmukhi { "\ue009>\u0a09;" // LETTER U "\ue00a>\u0a0a;" // LETTER UU "\ue00b>\u0a30\u0a3f;" // REMAP (indicExceptions.txt): \u0a0b>\u0a30\u0a3f = LETTER VOCALIC R>LETTER RA.VOWEL SIGN I -"\ue00c>\u0a07;" // REMAP (indicExceptions.txt): \u0a0c>\u0a07 = LETTER VOCALIC L>LETTER I +"\ue00c>\u0a33;" // FALLBACK +"\ue00d>\u0a0f;" // FALLBACK +"\ue00e>\u0a0f;" // FALLBACK "\ue00f>\u0a0f;" // LETTER EE "\ue010>\u0a10;" // LETTER AI +"\ue011>\u0a13;" // FALLBACK +"\ue012>\u0a13;" // FALLBACK "\ue013>\u0a13;" // LETTER OO "\ue014>\u0a14;" // LETTER AU "\ue015>\u0a15;" // LETTER KA @@ -66,7 +70,7 @@ translit_InterIndic_Gurmukhi { "\ue026>\u0a26;" // LETTER DA "\ue027>\u0a27;" // LETTER DHA "\ue028>\u0a28;" // LETTER NA -"\ue029>\u0a28;" // REMAP (indicExceptions.txt): \u0a29>\u0a28 = LETTER NNNA>LETTER NA +"\ue029>\u0a28\u0a3c;" // REMAP (indicExceptions.txt): \u0a29>\u0a28 = LETTER NNNA>LETTER NA "\ue02a>\u0a2a;" // LETTER PA "\ue02b>\u0a2b;" // LETTER PHA "\ue02c>\u0a2c;" // LETTER BA @@ -74,6 +78,7 @@ translit_InterIndic_Gurmukhi { "\ue02e>\u0a2e;" // LETTER MA "\ue02f>\u0a2f;" // LETTER YA "\ue030>\u0a30;" // LETTER RA +"\ue031>\u0a30\u0a3c;" // FALLBACK LETTER RA+NUKTA "\ue032>\u0a32;" // LETTER LA "\ue033>\u0a33;" // LETTER LLA "\ue034>\u0a33;" // REMAP (indicExceptions.txt): \u0a34>\u0a33 = LETTER LLLA>LETTER LLA @@ -83,33 +88,37 @@ translit_InterIndic_Gurmukhi { "\ue038>\u0a38;" // LETTER SA "\ue039>\u0a39;" // LETTER HA "\ue03c>\u0a3c;" // SIGN NUKTA -// \ue03d>; # UNMAPPED InterIndic-Gurmukhi: SIGN AVAGRAHA +"\ue03d>;" // FALLBACK BLOW AWAY SIGN AVAGRAHA "\ue03e>\u0a3e;" // VOWEL SIGN AA "\ue03f>\u0a3f;" // VOWEL SIGN I "\ue040>\u0a40;" // VOWEL SIGN II "\ue041>\u0a41;" // VOWEL SIGN U "\ue042>\u0a42;" // VOWEL SIGN UU -// \ue043>; # UNMAPPED InterIndic-Gurmukhi: VOWEL SIGN VOCALIC R -// \ue044>; # UNMAPPED InterIndic-Gurmukhi: VOWEL SIGN VOCALIC RR +"\ue043>;" // FALLBACK BLOW AWAY VOWEL SIGN VOCALIC R +"\ue044>;" // FALLBACK BLOW AWAY VOWEL SIGN VOCALIC RR "\ue045>\u0a48;" // REMAP (indicExceptions.txt): \u0a45>\u0a48 = VOWEL SIGN CANDRA E>VOWEL SIGN AI +"\ue046>\u0a47;" // FALLABCK "\ue047>\u0a47;" // VOWEL SIGN EE "\ue048>\u0a48;" // VOWEL SIGN AI "\ue049>\u0a4c;" // REMAP (indicExceptions.txt): \u0a49>\u0a4c = VOWEL SIGN CANDRA O>VOWEL SIGN AU +"\ue04a>\u0a4b;" // FALLBACK "\ue04b>\u0a4b;" // VOWEL SIGN OO "\ue04c>\u0a4c;" // VOWEL SIGN AU "\ue04d>\u0a4d;" // SIGN VIRAMA -// \ue050>; # UNMAPPED InterIndic-Gurmukhi: OM -// \ue055>; # UNMAPPED InterIndic-Gurmukhi: LENGTH MARK +"\ue050>\u0a0f\u0a02;" // FALLBACK to OO+BINDI : OM +"\ue055>;" // FALLBACK BLOW AWAY LENGTH MARK "\ue056>\u0a48;" // REMAP (indicExceptions.txt): \u0a56>\u0a48 = AI LENGTH MARK>VOWEL SIGN AI "\ue057>\u0a4c;" // REMAP (indicExceptions.txt): \u0a57>\u0a4c = AU LENGTH MARK>VOWEL SIGN AU +"\ue058>\u0a15\u0a3c;" // FALLBACK RA+ NUKTA "\ue059>\u0a59;" // LETTER KHHA "\ue05a>\u0a5a;" // LETTER GHHA "\ue05b>\u0a5b;" // LETTER ZA +"\ue05c>\u0a5c;" // LETTER RRA "\ue05d>\u0a22\u0a3c;" // REMAP (indicExceptions.txt): \u0a5d>\u0a22\u0a3c = LETTER RHA>LETTER DDHA.SIGN NUKTA "\ue05e>\u0a5e;" // LETTER FA -"\ue05f>\u0a2f;" // REMAP (indicExceptions.txt): \u0a5f>\u0a2f = LETTER YYA>LETTER YA +"\ue05f>\u0a2f\u0a3c;" // REMAP (indicExceptions.txt): \u0a5f>\u0a2f = LETTER YYA>LETTER YA "\ue060>\u0a30\u0a3f;" // REMAP (indicExceptions.txt): \u0a60>\u0a30\u0a3f = LETTER VOCALIC RR>LETTER RA.VOWEL SIGN I -"\ue061>\u0a08\u0a3c;" // REMAP (indicExceptions.txt): \u0a61>\u0a08\u0a3c = LETTER VOCALIC LL>LETTER II.SIGN NUKTA +"\ue061>\u0a33\u0a3c;" // "\ue062>\u0a3f\u0a3c;" // REMAP (indicExceptions.txt): \u0a62>\u0a3f\u0a3c = VOWEL SIGN VOCALIC L>VOWEL SIGN I.SIGN NUKTA "\ue063>\u0a40\u0a3c;" // REMAP (indicExceptions.txt): \u0a63>\u0a40\u0a3c = VOWEL SIGN VOCALIC LL>VOWEL SIGN II.SIGN NUKTA "\ue066>\u0a66;" // DIGIT ZERO @@ -122,12 +131,16 @@ translit_InterIndic_Gurmukhi { "\ue06d>\u0a6d;" // DIGIT SEVEN "\ue06e>\u0a6e;" // DIGIT EIGHT "\ue06f>\u0a6f;" // DIGIT NINE -// \ue080>; # UNMAPPED InterIndic-Gurmukhi: ISSHAR -// \ue081>; # UNMAPPED InterIndic-Gurmukhi: LETTER E -// \ue082>; # UNMAPPED InterIndic-Gurmukhi: LETTER O (\u0a02 = SIGN BINDI) -"\ue05c>\u0a5c;" // LETTER RRA -// \ue084>; # UNMAPPED InterIndic-Gurmukhi: VOWEL SIGN E -// \ue085>; # UNMAPPED InterIndic-Gurmukhi: VOWEL SIGN O (\u0a05 = LETTER A) +"\ue070>\u0a70;" // TIPPI +"\ue071>\u0a71;" // ADDAK +"\ue072>\u0a72;" // IRI +"\ue073>\u0a73;" // URA +"\ue074>\u0a74;" // EK ONKAR +"\ue080>;" // FALLBACK BLOW AWAY ISSHAR +"\ue081>;" // FALLBACK BLOW AWAY LETTER E +"\ue082>;" // FALLBACK BLOW AWAY LETTER O (\u0a02 = SIGN BINDI) +"\ue084>;" // FALLBACK BLOW AWAY VOWEL SIGN E +"\ue085>;" // FALLBACK BLOW AWAY VOWEL SIGN O (\u0a05 = LETTER A) // :: NFC (NFD) ; // eof } diff --git a/icu4c/data/translit_InterIndic_Kannada.txt b/icu4c/data/translit_InterIndic_Kannada.txt index d70621e63a..81c3e3347d 100644 --- a/icu4c/data/translit_InterIndic_Kannada.txt +++ b/icu4c/data/translit_InterIndic_Kannada.txt @@ -6,7 +6,7 @@ // THIS IS A MACHINE-GENERATED FILE // Tool: dumpICUrules.bat // Source: ../../text/resources/Transliterator_InterIndic_Kannada.txt -// Date: Thu Oct 25 22:17:21 2001 +// Date: Sat Nov 10 17:25:41 2001 //-------------------------------------------------------------------- // InterIndic_Kannada @@ -42,8 +42,12 @@ translit_InterIndic_Kannada { "\ue00a>\u0c8a;" // LETTER UU "\ue00b>\u0c8b;" // LETTER VOCALIC R "\ue00c>\u0c8c;" // LETTER VOCALIC L +"\ue00d>\u0c8e;" // LETTER E +"\ue00e>\u0c8e;" // FALLBACK "\ue00f>\u0c8f;" // LETTER EE "\ue010>\u0c90;" // LETTER AI +"\ue011>\u0c92;" // FALLBACK +"\ue012>\u0c92;" // LETTER O "\ue013>\u0c93;" // LETTER OO "\ue014>\u0c94;" // LETTER AU "\ue015>\u0c95;" // LETTER KA @@ -74,6 +78,7 @@ translit_InterIndic_Kannada { "\ue02e>\u0cae;" // LETTER MA "\ue02f>\u0caf;" // LETTER YA "\ue030>\u0cb0;" // LETTER RA +"\ue031>\u0cb1;" // LETTER RRA "\ue032>\u0cb2;" // LETTER LA "\ue033>\u0cb3;" // LETTER LLA "\ue034>\u0cb3;" // REMAP (indicExceptions.txt): \u0cb4>\u0cb3 = LETTER LLLA>LETTER LLA @@ -82,8 +87,10 @@ translit_InterIndic_Kannada { "\ue037>\u0cb7;" // LETTER SSA "\ue038>\u0cb8;" // LETTER SA "\ue039>\u0cb9;" // LETTER HA -// \ue03c>; # UNMAPPED InterIndic-Kannada: SIGN NUKTA -// \ue03d>; # UNMAPPED InterIndic-Kannada: SIGN AVAGRAHA + +"\ue03c>;" // FALLBACK BLOW AWAY NUKTA +"\ue03d>;" // FALLBACK BLOW AWAY AVAGRAHA + "\ue03e>\u0cbe;" // VOWEL SIGN AA "\ue03f>\u0cbf;" // VOWEL SIGN I "\ue040>\u0cc0;" // VOWEL SIGN II @@ -92,9 +99,11 @@ translit_InterIndic_Kannada { "\ue043>\u0cc3;" // VOWEL SIGN VOCALIC R "\ue044>\u0cc4;" // VOWEL SIGN VOCALIC RR "\ue045>\u0cc6;" // REMAP (indicExceptions.txt): \u0cc5>\u0cc6 = VOWEL SIGN CANDRA E>VOWEL SIGN E +"\ue046>\u0cc6;" // VOWEL SIGN E "\ue047>\u0cc7;" // VOWEL SIGN EE "\ue048>\u0cc8;" // VOWEL SIGN AI "\ue049>\u0cca;" // REMAP (indicExceptions.txt): \u0cc9>\u0cca = VOWEL SIGN CANDRA O>VOWEL SIGN O +"\ue04a>\u0cca;" // VOWEL SIGN O "\ue04b>\u0ccb;" // VOWEL SIGN OO "\ue04c>\u0ccc;" // VOWEL SIGN AU "\ue04d>\u0ccd;" // SIGN VIRAMA @@ -102,9 +111,11 @@ translit_InterIndic_Kannada { "\ue055>\u0cd5;" // LENGTH MARK "\ue056>\u0cd6;" // AI LENGTH MARK "\ue057>\u0ccc;" // REMAP (indicExceptions.txt): \u0cd7>\u0ccc = AU LENGTH MARK>VOWEL SIGN AU +"\ue058>\u0c95;" // FALLBACK "\ue059>\u0c96;" // REMAP (indicExceptions.txt): \u0cd9>\u0c96 = LETTER KHHA>LETTER KHA "\ue05a>\u0c97;" // REMAP (indicExceptions.txt): \u0cda>\u0c97 = LETTER GHHA>LETTER GA "\ue05b>\u0c9c;" // REMAP (indicExceptions.txt): \u0cdb>\u0c9c = LETTER ZA>LETTER JA +"\ue05c>\u0ca2;" // FALLBACK "\ue05d>\u0ca2;" // REMAP (indicExceptions.txt): \u0cdd>\u0ca2 = LETTER RHA>LETTER DDHA "\ue05e>\u0cde;" // LETTER FA "\ue05f>\u0caf;" // REMAP (indicExceptions.txt): \u0cdf>\u0caf = LETTER YYA>LETTER YA @@ -123,11 +134,6 @@ translit_InterIndic_Kannada { "\ue06e>\u0cee;" // DIGIT EIGHT "\ue06f>\u0cef;" // DIGIT NINE // \ue080>; # UNMAPPED InterIndic-Kannada: ISSHAR -"\ue00e>\u0c8e;" // LETTER E -"\ue012>\u0c92;" // LETTER O -"\ue031>\u0cb1;" // LETTER RRA -"\ue046>\u0cc6;" // VOWEL SIGN E -"\ue04a>\u0cca;" // VOWEL SIGN O // :: NFC (NFD) ; // eof } diff --git a/icu4c/data/translit_InterIndic_Latin.txt b/icu4c/data/translit_InterIndic_Latin.txt index e4d9a0eea7..235399bd3b 100644 --- a/icu4c/data/translit_InterIndic_Latin.txt +++ b/icu4c/data/translit_InterIndic_Latin.txt @@ -6,7 +6,7 @@ // THIS IS A MACHINE-GENERATED FILE // Tool: dumpICUrules.bat // Source: ../../text/resources/Transliterator_InterIndic_Latin.txt -// Date: Thu Oct 25 22:17:21 2001 +// Date: Sat Nov 10 17:25:41 2001 //-------------------------------------------------------------------- // InterIndic_Latin @@ -102,7 +102,7 @@ translit_InterIndic_Latin { "$virama=\ue04d;" // \u094e Reserved // \u094f Reserved -//\u0950>\ue050; # OM + "$om=\ue050;" // OM // \u0951>; # UNMAPPED STRESS SIGN UDATTA // \u0952>; # UNMAPPED STRESS SIGN ANUDATTA // \u0953>; # UNMAPPED GRAVE ACCENT @@ -253,6 +253,9 @@ translit_InterIndic_Latin { "$ra$virama}$ha>r'';" "$ra}$x>r;" "$ra>ra;" + "$rra$virama}$ha>r\u0331'';" + "$rra}$x>r\u0331;" + "$rra>r\u0331a;" "$la$virama}$ha>l'';" "$la}$x>l;" "$la>la;" @@ -282,8 +285,8 @@ translit_InterIndic_Latin { "$ena > n\u0331a ;" "$uka}$x > q ;" "$uka > qa ;" - "$ukha}$x > k\u0323 ;" - "$ukha > k\u0323a ;" + "$ukha}$x > k\u0331h\u0331 ;" + "$ukha > k\u0331h\u0331a ;" "$ugha}$x > g\u0307 ;" "$ugha > g\u0307a ;" "$ujha}$x > z ;" @@ -292,8 +295,8 @@ translit_InterIndic_Latin { "$udha > r\u0323ha;" "$uddha}$x> r\u0323 ;" "$uddha > r\u0323a ;" - "$ufa}$x > f\u0323 ;" - "$ufa > f\u0323a ;" + "$ufa}$x > f ;" + "$ufa > fa ;" // dependent vowels (should never occur except following consonants) "$aa > a\u0304 ;" "$ai > ai ;" @@ -315,33 +318,33 @@ translit_InterIndic_Latin { "$se > e ;" "$so > o ;" // independent vowels (when following consonants) - "a}$waa > ''a\u0304 ;" - "$z}$waa > ''a\u0304 ;" - "a}$wai > ''ai ;" - "$z}$wai > ''ai ;" - "a}$wau > ''au ;" - "$z}$wau > ''au ;" - "a}$wii > ''i\u0304 ;" - "$z}$wii > ''i\u0304 ;" - "a}$wi > ''i ;" - "$z}$wi > ''i ;" - "a}$wuu > ''u\u0304 ;" - "$z}$wuu > ''u\u0304 ;" - "a}$wu > ''u ;" - "$z}$wu > ''u ;" - "$z}$wrr > ''r\u0325\u0304 ;" - "$z}$wr > ''r\u0325 ;" - "$z}$wll > ''l\u0325\u0304 ;" - "$z}$wl > ''l\u0325 ;" - "$z}$we > ''e\u0304 ;" - "$z}$wo > ''o\u0304 ;" - "a}$wa > ''a ;" - "$z}$wa > ''a ;" + "a{$waa > ''a\u0304 ;" + "$z{$waa > ''a\u0304 ;" + "a{$wai > ''ai ;" + "$z{$wai > ''ai ;" + "a{$wau > ''au ;" + "$z{$wau > ''au ;" + "a{$wii > ''i\u0304 ;" + "$z{$wii > ''i\u0304 ;" + "a{$wi > ''i ;" + "$z{$wi > ''i ;" + "a{$wuu > ''u\u0304 ;" + "$z{$wuu > ''u\u0304 ;" + "a{$wu > ''u ;" + "$z{$wu > ''u ;" + "$z{$wrr > ''r\u0325\u0304 ;" + "$z{$wr > ''r\u0325 ;" + "$z{$wll > ''l\u0325\u0304 ;" + "$z{$wl > ''l\u0325 ;" + "$z{$we > ''e\u0304 ;" + "$z{$wo > ''o\u0304 ;" + "a{$wa > ''a ;" + "$z{$wa > ''a ;" //extra vowels - "$z}$wce > ''e\u0306 ;" - "$z}$wco > ''o\u0306 ;" - "$z}$wse > ''e ;" - "$z}$wso > ''o ;" + "$z{$wce > ''e\u0306 ;" + "$z{$wco > ''o\u0306 ;" + "$z{$wse > ''e ;" + "$z{$wso > ''o ;" // independent vowels (otherwise) "$waa > a\u0304 ;" "$wai > ai ;" @@ -362,6 +365,7 @@ translit_InterIndic_Latin { "$wco > o\u0306 ;" "$wse > e ;" "$wso > o ;" + "$om > ''om ;" //stress marks "$avagraha > \u0315;" "$chandrabindu$anusvara>'-'\u0303;" diff --git a/icu4c/data/translit_InterIndic_Malayalam.txt b/icu4c/data/translit_InterIndic_Malayalam.txt index c28edd196a..30037e8b60 100644 --- a/icu4c/data/translit_InterIndic_Malayalam.txt +++ b/icu4c/data/translit_InterIndic_Malayalam.txt @@ -6,7 +6,7 @@ // THIS IS A MACHINE-GENERATED FILE // Tool: dumpICUrules.bat // Source: ../../text/resources/Transliterator_InterIndic_Malayalam.txt -// Date: Thu Oct 25 22:17:21 2001 +// Date: Sat Nov 10 17:25:41 2001 //-------------------------------------------------------------------- // InterIndic_Malayalam @@ -42,8 +42,12 @@ translit_InterIndic_Malayalam { "\ue00a>\u0d0a;" // LETTER UU "\ue00b>\u0d0b;" // LETTER VOCALIC R "\ue00c>\u0d0c;" // LETTER VOCALIC L +"\ue00d>\u0d0e;" // FALLLBACK LETTER E +"\ue00e>\u0d0e;" // LETTER E "\ue00f>\u0d0f;" // LETTER EE "\ue010>\u0d10;" // LETTER AI +"\ue011>\u0d12;" // FALLBACK TO O +"\ue012>\u0d12;" // LETTER O "\ue013>\u0d13;" // LETTER OO "\ue014>\u0d14;" // LETTER AU "\ue015>\u0d15;" // LETTER KA @@ -74,6 +78,7 @@ translit_InterIndic_Malayalam { "\ue02e>\u0d2e;" // LETTER MA "\ue02f>\u0d2f;" // LETTER YA "\ue030>\u0d30;" // LETTER RA +"\ue031>\u0d31;" // LETTER RRA "\ue032>\u0d32;" // LETTER LA "\ue033>\u0d33;" // LETTER LLA "\ue034>\u0d34;" // LETTER LLLA @@ -82,36 +87,42 @@ translit_InterIndic_Malayalam { "\ue037>\u0d37;" // LETTER SSA "\ue038>\u0d38;" // LETTER SA "\ue039>\u0d39;" // LETTER HA -// \ue03c>; # UNMAPPED InterIndic-Malayalam: SIGN NUKTA -// \ue03d>; # UNMAPPED InterIndic-Malayalam: SIGN AVAGRAHA + +"\ue03c>;" // FALLBACK BLOW AWAY NUKTA +"\ue03d>;" // FALLBACK BLOW AWAY AVAGRAHA + "\ue03e>\u0d3e;" // VOWEL SIGN AA "\ue03f>\u0d3f;" // VOWEL SIGN I "\ue040>\u0d40;" // VOWEL SIGN II "\ue041>\u0d41;" // VOWEL SIGN U "\ue042>\u0d42;" // VOWEL SIGN UU "\ue043>\u0d43;" // VOWEL SIGN VOCALIC R -// \ue044>; # UNMAPPED InterIndic-Malayalam: VOWEL SIGN VOCALIC RR +"\ue044>;" // FALLBACK BLOW AWAY VOWEL SIGN VOCALIC RR "\ue045>\u0d3e;" // REMAP (indicExceptions.txt): \u0d45>\u0d3e = VOWEL SIGN CANDRA E>VOWEL SIGN AA +"\ue046>\u0d46;" // VOWEL SIGN E "\ue047>\u0d47;" // VOWEL SIGN EE "\ue048>\u0d48;" // VOWEL SIGN AI "\ue049>\u0d4b;" // REMAP (indicExceptions.txt): \u0d49>\u0d4b = VOWEL SIGN CANDRA O>VOWEL SIGN OO +"\ue04a>\u0d4a;" // VOWEL SIGN O "\ue04b>\u0d4b;" // VOWEL SIGN OO "\ue04c>\u0d4c;" // VOWEL SIGN AU "\ue04d>\u0d4d;" // SIGN VIRAMA -// \ue050>; # UNMAPPED InterIndic-Malayalam: OM -// \ue055>; # UNMAPPED InterIndic-Malayalam: LENGTH MARK +"\ue050>\u0d13\u0d02;" // UNMAPPED InterIndic-Malayalam: OM +"\ue055>;" // FALLBACK BLOW AWAY LENGTH MARK "\ue056>\u0d48;" // REMAP (indicExceptions.txt): \u0d56>\u0d48 = AI LENGTH MARK>VOWEL SIGN AI "\ue057>\u0d57;" // AU LENGTH MARK +"\ue058>\u0d15;" // FALLBACK "\ue059>\u0d16;" // REMAP (indicExceptions.txt): \u0d59>\u0d16 = LETTER KHHA>LETTER KHA "\ue05a>\u0d17;" // REMAP (indicExceptions.txt): \u0d5a>\u0d17 = LETTER GHHA>LETTER GA "\ue05b>\u0d1c;" // REMAP (indicExceptions.txt): \u0d5b>\u0d1c = LETTER ZA>LETTER JA "\ue05d>\u0d22;" // REMAP (indicExceptions.txt): \u0d5d>\u0d22 = LETTER RHA>LETTER DDHA +"\ue05c>\u0d21;" // FALLBACK "\ue05e>\u0d2b;" // REMAP (indicExceptions.txt): \u0d5e>\u0d2b = LETTER FA>LETTER PHA "\ue05f>\u0d2f;" // REMAP (indicExceptions.txt): \u0d5f>\u0d2f = LETTER YYA>LETTER YA "\ue060>\u0d60;" // LETTER VOCALIC RR "\ue061>\u0d61;" // LETTER VOCALIC LL -// \ue062>; # UNMAPPED InterIndic-Malayalam: VOWEL SIGN VOCALIC L -// \ue063>; # UNMAPPED InterIndic-Malayalam: VOWEL SIGN VOCALIC LL +"\ue062>;" // FALLBACK BLOW AWAY VOWEL SIGN VOCALIC L +"\ue063>;" // FALLBACK BLOW AWAY VOWEL SIGN VOCALIC LL "\ue066>\u0d66;" // DIGIT ZERO "\ue067>\u0d67;" // DIGIT ONE "\ue068>\u0d68;" // DIGIT TWO @@ -123,11 +134,11 @@ translit_InterIndic_Malayalam { "\ue06e>\u0d6e;" // DIGIT EIGHT "\ue06f>\u0d6f;" // DIGIT NINE // \ue080>; # UNMAPPED InterIndic-Malayalam: ISSHAR -"\ue00e>\u0d0e;" // LETTER E -"\ue012>\u0d12;" // LETTER O -"\ue031>\u0d31;" // LETTER RRA -"\ue046>\u0d46;" // VOWEL SIGN E -"\ue04a>\u0d4a;" // VOWEL SIGN O + + + + + // :: NFC (NFD) ; // eof } diff --git a/icu4c/data/translit_InterIndic_Oriya.txt b/icu4c/data/translit_InterIndic_Oriya.txt index ee8fc159de..3a03a72823 100644 --- a/icu4c/data/translit_InterIndic_Oriya.txt +++ b/icu4c/data/translit_InterIndic_Oriya.txt @@ -6,7 +6,7 @@ // THIS IS A MACHINE-GENERATED FILE // Tool: dumpICUrules.bat // Source: ../../text/resources/Transliterator_InterIndic_Oriya.txt -// Date: Thu Oct 25 22:17:21 2001 +// Date: Sat Nov 10 17:25:41 2001 //-------------------------------------------------------------------- // InterIndic_Oriya @@ -42,9 +42,13 @@ translit_InterIndic_Oriya { "\ue00a>\u0b0a;" // LETTER UU "\ue00b>\u0b0b;" // LETTER VOCALIC R "\ue00c>\u0b0c;" // LETTER VOCALIC L -// \ue00f>; # UNMAPPED InterIndic-Oriya: LETTER EE (\u0b0f = LETTER E) +"\ue00d>\u0b0f;" // FALLBACK +"\ue00e>\u0b0f;" // FALLBACK +"\ue00f>\u0b0f;" // LETTER E "\ue010>\u0b10;" // LETTER AI -// \ue013>; # UNMAPPED InterIndic-Oriya: LETTER OO (\u0b13 = LETTER O) +"\ue011>\u0b13;" // FALLBACK +"\ue012>\u0b13;" // FALLBACK +"\ue013>\u0b13;" // UNMAPPED InterIndic-Oriya: LETTER OO (\u0b13 = LETTER O) "\ue014>\u0b14;" // LETTER AU "\ue015>\u0b15;" // LETTER KA "\ue016>\u0b16;" // LETTER KHA @@ -66,7 +70,7 @@ translit_InterIndic_Oriya { "\ue026>\u0b26;" // LETTER DA "\ue027>\u0b27;" // LETTER DHA "\ue028>\u0b28;" // LETTER NA -"\ue029>\u0b28;" // REMAP (indicExceptions.txt): \u0b29>\u0b28 = LETTER NNNA>LETTER NA +"\ue029>\u0b28\u0b3c;" // REMAP (indicExceptions.txt): \u0b29>\u0b28 = LETTER NNNA>LETTER NA "\ue02a>\u0b2a;" // LETTER PA "\ue02b>\u0b2b;" // LETTER PHA "\ue02c>\u0b2c;" // LETTER BA @@ -74,9 +78,10 @@ translit_InterIndic_Oriya { "\ue02e>\u0b2e;" // LETTER MA "\ue02f>\u0b2f;" // LETTER YA "\ue030>\u0b30;" // LETTER RA +"\ue031>\u0b5c;" // LETTER RRA "\ue032>\u0b32;" // LETTER LA "\ue033>\u0b33;" // LETTER LLA -"\ue034>\u0b33;" // REMAP (indicExceptions.txt): \u0b34>\u0b33 = LETTER LLLA>LETTER LLA +"\ue034>\u0b33\u0b3c;" // REMAP (indicExceptions.txt): \u0b34>\u0b33 = LETTER LLLA>LETTER LLA "\ue035>\u0b2c;" // REMAP (indicExceptions.txt): \u0b35>\u0b2c = LETTER VA>LETTER BA "\ue036>\u0b36;" // LETTER SHA "\ue037>\u0b37;" // LETTER SSA @@ -91,11 +96,13 @@ translit_InterIndic_Oriya { "\ue042>\u0b42;" // VOWEL SIGN UU "\ue043>\u0b43;" // VOWEL SIGN VOCALIC R "\ue044>\u0b43\u0b3c;" // REMAP (indicExceptions.txt): \u0b44>\u0b43\u0b3c = VOWEL SIGN VOCALIC RR>VOWEL SIGN VOCALIC R.SIGN NUKTA -"\ue045>\u0b47;" // REMAP (indicExceptions.txt): \u0b45>\u0b47 = VOWEL SIGN CANDRA E>VOWEL SIGN E -// \ue047>; # UNMAPPED InterIndic-Oriya: VOWEL SIGN EE (\u0b47 = VOWEL SIGN E) +"\ue045>\u0b47;" // FALLBACK +"\ue046>\u0b47;" // FALLBACK +"\ue047>\u0b47;" // VOWEL SIGN E "\ue048>\u0b48;" // VOWEL SIGN AI -"\ue049>\u0b4b;" // REMAP (indicExceptions.txt): \u0b49>\u0b4b = VOWEL SIGN CANDRA O>VOWEL SIGN O -// \ue04b>; # UNMAPPED InterIndic-Oriya: VOWEL SIGN OO (\u0b4b = VOWEL SIGN O) +"\ue049>\u0b4b;" // FALLBACK +"\ue04a>\u0b4b;" // FALLBACK +"\ue04b>\u0b4b;" // VOWEL SIGN E "\ue04c>\u0b4c;" // VOWEL SIGN AU "\ue04d>\u0b4d;" // SIGN VIRAMA "\ue050>\u0b13\u0b01;" // REMAP (indicExceptions.txt): \u0b50>\u0b13\u0b01 = OM>LETTER O.SIGN CANDRABINDU @@ -103,8 +110,10 @@ translit_InterIndic_Oriya { "\ue056>\u0b56;" // AI LENGTH MARK "\ue057>\u0b57;" // AU LENGTH MARK "\ue059>\u0b16\u0b3c;" // REMAP (indicExceptions.txt): \u0b59>\u0b16\u0b3c = LETTER KHHA>LETTER KHA.SIGN NUKTA +"\ue058>\u0b15\u0b3c;" // FALLBACK "\ue05a>\u0b17\u0b3c;" // REMAP (indicExceptions.txt): \u0b5a>\u0b17\u0b3c = LETTER GHHA>LETTER GA.SIGN NUKTA "\ue05b>\u0b1c\u0b3c;" // REMAP (indicExceptions.txt): \u0b5b>\u0b1c\u0b3c = LETTER ZA>LETTER JA.SIGN NUKTA +"\ue05c>\u0b21\u0b3c;" // FALLBACK "\ue05d>\u0b5d;" // LETTER RHA "\ue05e>\u0b2b\u0b3c;" // REMAP (indicExceptions.txt): \u0b5e>\u0b2b\u0b3c = LETTER FA>LETTER PHA.SIGN NUKTA "\ue05f>\u0b5f;" // LETTER YYA @@ -123,11 +132,8 @@ translit_InterIndic_Oriya { "\ue06e>\u0b6e;" // DIGIT EIGHT "\ue06f>\u0b6f;" // DIGIT NINE "\ue070>\u0b70;" // ISSHAR -"\ue00e>\u0b0f;" // LETTER E -"\ue013>\u0b13;" // LETTER O -"\ue031>\u0b5c;" // LETTER RRA -"\ue047>\u0b47;" // VOWEL SIGN E -"\ue04b>\u0b4b;" // VOWEL SIGN O + + // :: NFC (NFD) ; // eof } diff --git a/icu4c/data/translit_InterIndic_Tamil.txt b/icu4c/data/translit_InterIndic_Tamil.txt index 1c6d1ba9a2..780fb029b4 100644 --- a/icu4c/data/translit_InterIndic_Tamil.txt +++ b/icu4c/data/translit_InterIndic_Tamil.txt @@ -6,7 +6,7 @@ // THIS IS A MACHINE-GENERATED FILE // Tool: dumpICUrules.bat // Source: ../../text/resources/Transliterator_InterIndic_Tamil.txt -// Date: Thu Oct 25 22:17:21 2001 +// Date: Sat Nov 10 17:25:41 2001 //-------------------------------------------------------------------- // InterIndic_Tamil @@ -31,7 +31,7 @@ translit_InterIndic_Tamil { //-------------------------------------------------------------------- // InterIndic-Tamil //:: NFD (NFC) ; -// \ue001>; # UNMAPPED InterIndic-Tamil: SIGN CANDRABINDU +"\ue001>\u0b82;" // FALLBACK SIGN CANDRABINDU "\ue002>\u0b82;" // SIGN ANUSVARA "\ue003>\u0b83;" // SIGN VISARGA "\ue005>\u0b85;" // LETTER A @@ -42,8 +42,12 @@ translit_InterIndic_Tamil { "\ue00a>\u0b8a;" // LETTER UU "\ue00b>\u0bb0\u0bbf;" // REMAP (indicExceptions.txt): \u0b8b>\u0bb0\u0bbf = LETTER VOCALIC R>LETTER RA.VOWEL SIGN I "\ue00c>\u0b87;" // REMAP (indicExceptions.txt): \u0b8c>\u0b87 = LETTER VOCALIC L>LETTER I +"\ue00d>\u0b8f;" // FALLBACK +"\ue00e>\u0b8e;" // LETTER E "\ue00f>\u0b8f;" // LETTER EE "\ue010>\u0b90;" // LETTER AI +"\ue011>\u0b92;" // FALLBACK +"\ue012>\u0b92;" // LETTER O "\ue013>\u0b93;" // LETTER OO "\ue014>\u0b94;" // LETTER AU "\ue015>\u0b95;" // LETTER KA @@ -74,6 +78,7 @@ translit_InterIndic_Tamil { "\ue02e>\u0bae;" // LETTER MA "\ue02f>\u0baf;" // LETTER YA "\ue030>\u0bb0;" // LETTER RA +"\ue031>\u0bb1;" // LETTER RRA "\ue032>\u0bb2;" // LETTER LA "\ue033>\u0bb3;" // LETTER LLA "\ue034>\u0bb4;" // LETTER LLLA @@ -82,8 +87,10 @@ translit_InterIndic_Tamil { "\ue037>\u0bb7;" // LETTER SSA "\ue038>\u0bb8;" // LETTER SA "\ue039>\u0bb9;" // LETTER HA -// \ue03c>; # UNMAPPED InterIndic-Tamil: SIGN NUKTA -// \ue03d>; # UNMAPPED InterIndic-Tamil: SIGN AVAGRAHA + +"\ue03c>;" // FALLBACK BLOW AWAY NUKTA +"\ue03d>;" // FALLBACK BLOW AWAY AVAGRAHA + "\ue03e>\u0bbe;" // VOWEL SIGN AA "\ue03f>\u0bbf;" // VOWEL SIGN I "\ue040>\u0bc0;" // VOWEL SIGN II @@ -92,27 +99,32 @@ translit_InterIndic_Tamil { "\ue043>\u0bcd\u0bb0\u0bbf;" // REMAP (indicExceptions.txt): \u0bc3>\u0bcd\u0bb0\u0bbf = VOWEL SIGN VOCALIC R>SIGN VIRAMA.LETTER RA.VOWEL SIGN I "\ue044>\u0bcd\u0bb0\u0bbf;" // REMAP (indicExceptions.txt): \u0bc4>\u0bcd\u0bb0\u0bbf = VOWEL SIGN VOCALIC RR>SIGN VIRAMA.LETTER RA.VOWEL SIGN I "\ue045>\u0bbe;" // REMAP (indicExceptions.txt): \u0bc5>\u0bbe = VOWEL SIGN CANDRA E>VOWEL SIGN AA +"\ue046>\u0bc6;" // VOWEL SIGN E "\ue047>\u0bc7;" // VOWEL SIGN EE "\ue048>\u0bc8;" // VOWEL SIGN AI "\ue049>\u0bbe;" // REMAP (indicExceptions.txt): \u0bc9>\u0bbe = VOWEL SIGN CANDRA O>VOWEL SIGN AA +"\ue04a>\u0bca;" // VOWEL SIGN O "\ue04b>\u0bcb;" // VOWEL SIGN OO "\ue04c>\u0bcc;" // VOWEL SIGN AU "\ue04d>\u0bcd;" // SIGN VIRAMA "\ue050>\u0b93\u0bae\u0bcd;" // REMAP (indicExceptions.txt): \u0bd0>\u0b93\u0bae\u0bcd = OM>LETTER OO.LETTER MA.SIGN VIRAMA + // \ue055>; # UNMAPPED InterIndic-Tamil: LENGTH MARK "\ue056>\u0bc8;" // REMAP (indicExceptions.txt): \u0bd6>\u0bc8 = AI LENGTH MARK>VOWEL SIGN AI "\ue057>\u0bd7;" // AU LENGTH MARK +"\ue058>\u0b95;" // FALLBACK "\ue059>\u0b95;" // REMAP (indicExceptions.txt): \u0bd9>\u0b95 = LETTER KHHA>LETTER KA "\ue05a>\u0b95;" // REMAP (indicExceptions.txt): \u0bda>\u0b95 = LETTER GHHA>LETTER KA "\ue05b>\u0b9c;" // REMAP (indicExceptions.txt): \u0bdb>\u0b9c = LETTER ZA>LETTER JA +"\ue05c>\u0ba4;" // FALLBACK "\ue05d>\u0b9f;" // REMAP (indicExceptions.txt): \u0bdd>\u0b9f = LETTER RHA>LETTER TTA "\ue05e>\u0baa;" // REMAP (indicExceptions.txt): \u0bde>\u0baa = LETTER FA>LETTER PA "\ue05f>\u0baf;" // REMAP (indicExceptions.txt): \u0bdf>\u0baf = LETTER YYA>LETTER YA "\ue060>\u0bb0\u0bbf;" // REMAP (indicExceptions.txt): \u0be0>\u0bb0\u0bbf = LETTER VOCALIC RR>LETTER RA.VOWEL SIGN I "\ue061>\u0b88;" // REMAP (indicExceptions.txt): \u0be1>\u0b88 = LETTER VOCALIC LL>LETTER II -// \ue062>; # UNMAPPED InterIndic-Tamil: VOWEL SIGN VOCALIC L -// \ue063>; # UNMAPPED InterIndic-Tamil: VOWEL SIGN VOCALIC LL -// \ue066>; # UNMAPPED InterIndic-Tamil: DIGIT ZERO +"\ue062>\u0bbf;"// FALLBACK VOWEL SIGN VOCALIC L +"\ue063>\u0bc0;"// FALLBACK VOWEL SIGN VOCALIC LL +"\ue066>\u0030;" // FALLBACK DIGIT ZERO "\ue067>\u0be7;" // DIGIT ONE "\ue068>\u0be8;" // DIGIT TWO "\ue069>\u0be9;" // DIGIT THREE @@ -123,11 +135,6 @@ translit_InterIndic_Tamil { "\ue06e>\u0bee;" // DIGIT EIGHT "\ue06f>\u0bef;" // DIGIT NINE // \ue080>; # UNMAPPED InterIndic-Tamil: ISSHAR -"\ue00e>\u0b8e;" // LETTER E -"\ue012>\u0b92;" // LETTER O -"\ue031>\u0bb1;" // LETTER RRA -"\ue046>\u0bc6;" // VOWEL SIGN E -"\ue04a>\u0bca;" // VOWEL SIGN O // :: NFC (NFD) ; // eof } diff --git a/icu4c/data/translit_InterIndic_Telugu.txt b/icu4c/data/translit_InterIndic_Telugu.txt index 24145b786a..7ff58f3e2f 100644 --- a/icu4c/data/translit_InterIndic_Telugu.txt +++ b/icu4c/data/translit_InterIndic_Telugu.txt @@ -6,7 +6,7 @@ // THIS IS A MACHINE-GENERATED FILE // Tool: dumpICUrules.bat // Source: ../../text/resources/Transliterator_InterIndic_Telugu.txt -// Date: Thu Oct 25 22:17:21 2001 +// Date: Sat Nov 10 17:25:41 2001 //-------------------------------------------------------------------- // InterIndic_Telugu @@ -42,8 +42,12 @@ translit_InterIndic_Telugu { "\ue00a>\u0c0a;" // LETTER UU "\ue00b>\u0c0b;" // LETTER VOCALIC R "\ue00c>\u0c0c;" // LETTER VOCALIC L +"\ue00d>\u0c0E;" // FALLBACK MAPPING +"\ue00e>\u0c0E;" // LETTER E "\ue00f>\u0c0f;" // LETTER EE "\ue010>\u0c10;" // LETTER AI +"\ue011>\u0c12;" // FALBACK MAPPING +"\ue012>\u0c12;" // LETTER O "\ue013>\u0c13;" // LETTER OO "\ue014>\u0c14;" // LETTER AU "\ue015>\u0c15;" // LETTER KA @@ -82,8 +86,10 @@ translit_InterIndic_Telugu { "\ue037>\u0c37;" // LETTER SSA "\ue038>\u0c38;" // LETTER SA "\ue039>\u0c39;" // LETTER HA -// \ue03c>; # UNMAPPED InterIndic-Telugu: SIGN NUKTA -// \ue03d>; # UNMAPPED InterIndic-Telugu: SIGN AVAGRAHA + +"\ue03c>;" // FALLBACK BLOW AWAY NUKTA +"\ue03d>;" // FALLBACK BLOW AWAY AVAGRAHA + "\ue03e>\u0c3e;" // VOWEL SIGN AA "\ue03f>\u0c3f;" // VOWEL SIGN I "\ue040>\u0c40;" // VOWEL SIGN II @@ -102,9 +108,11 @@ translit_InterIndic_Telugu { "\ue055>\u0c55;" // LENGTH MARK "\ue056>\u0c56;" // AI LENGTH MARK "\ue057>\u0c4c;" // REMAP (indicExceptions.txt): \u0c57>\u0c4c = AU LENGTH MARK>VOWEL SIGN AU +"\ue058>\u0c15;" // REMAP "\ue059>\u0c16;" // REMAP (indicExceptions.txt): \u0c59>\u0c16 = LETTER KHHA>LETTER KHA "\ue05a>\u0c17;" // REMAP (indicExceptions.txt): \u0c5a>\u0c17 = LETTER GHHA>LETTER GA "\ue05b>\u0c1c;" // REMAP (indicExceptions.txt): \u0c5b>\u0c1c = LETTER ZA>LETTER JA +"\ue05c>\u0c22;" // REMAP "\ue05d>\u0c22;" // REMAP (indicExceptions.txt): \u0c5d>\u0c22 = LETTER RHA>LETTER DDHA "\ue05e>\u0c2b;" // REMAP (indicExceptions.txt): \u0c5e>\u0c2b = LETTER FA>LETTER PHA "\ue05f>\u0c2f;" // REMAP (indicExceptions.txt): \u0c5f>\u0c2f = LETTER YYA>LETTER YA @@ -123,8 +131,7 @@ translit_InterIndic_Telugu { "\ue06e>\u0c6e;" // DIGIT EIGHT "\ue06f>\u0c6f;" // DIGIT NINE // \ue080>; # UNMAPPED InterIndic-Telugu: ISSHAR -"\ue00e>\u0c0e;" // LETTER E -"\ue012>\u0c12;" // LETTER O + "\ue031>\u0c31;" // LETTER RRA "\ue046>\u0c46;" // VOWEL SIGN E "\ue04a>\u0c4a;" // VOWEL SIGN O diff --git a/icu4c/data/translit_Kannada_InterIndic.txt b/icu4c/data/translit_Kannada_InterIndic.txt index 587e66d0d7..9f445b99a5 100644 --- a/icu4c/data/translit_Kannada_InterIndic.txt +++ b/icu4c/data/translit_Kannada_InterIndic.txt @@ -6,7 +6,7 @@ // THIS IS A MACHINE-GENERATED FILE // Tool: dumpICUrules.bat // Source: ../../text/resources/Transliterator_Kannada_InterIndic.txt -// Date: Thu Oct 25 22:17:21 2001 +// Date: Sat Nov 10 17:25:41 2001 //-------------------------------------------------------------------- // Kannada_InterIndic diff --git a/icu4c/data/translit_Latin_InterIndic.txt b/icu4c/data/translit_Latin_InterIndic.txt index b7c91cf855..0fadbe3ff8 100644 --- a/icu4c/data/translit_Latin_InterIndic.txt +++ b/icu4c/data/translit_Latin_InterIndic.txt @@ -6,7 +6,7 @@ // THIS IS A MACHINE-GENERATED FILE // Tool: dumpICUrules.bat // Source: ../../text/resources/Transliterator_Latin_InterIndic.txt -// Date: Thu Oct 25 22:17:21 2001 +// Date: Sat Nov 10 17:25:41 2001 //-------------------------------------------------------------------- // Latin_InterIndic @@ -102,7 +102,7 @@ translit_Latin_InterIndic { "$virama=\ue04d;" // \u094e Reserved // \u094f Reserved -//\u0950>\ue050; # OM + "$om = \ue050;" // OM // \u0951>; # UNMAPPED STRESS SIGN UDATTA // \u0952>; # UNMAPPED STRESS SIGN ANUDATTA // \u0953>; # UNMAPPED GRAVE ACCENT @@ -166,19 +166,19 @@ translit_Latin_InterIndic { "'-'h\u0323>$visarga;" "mm>$anusvara;" "x>$visarga;" - "aa>$waa;" +// convert to independent forms at start of word or syllable: +// e.g. keai -> {ka}{e}{wai}; k'ai -> {ka}{wai}; (ai) -> ({wai}) "a\u0304>$waa;" "ai>$wai;" "au>$wau;" - "ii>$wii;" "i\u0304>$wii;" "i>$wi;" - "uu>$wuu;" + //uu>$wuu; "u\u0304>$wuu;" "u>$wu;" - "rrh>$wrr;" + //rrh>$wrr; "r\u0325\u0304>$wrr;" - "rh>$wr;" + //rh>$wr; "r\u0325>$wr;" "l\u0325\u0304>$wll;" "lh>$wl;l\u0325>$wl;" @@ -188,6 +188,7 @@ translit_Latin_InterIndic { "e\u0306>$wce;" "o\u0306>$wco;" "e>$wse;" + "''om>$om;" "o>$wso;" "n}na > $na|$virama;" "n\u0307}[kg] > $anusvara;" @@ -199,6 +200,15 @@ translit_Latin_InterIndic { "m}[pbm] > $anusvara;" "n} [yrlvsh] > $anusvara;" "'-'m\u0307 > $anusvara;" + + //urdu compatibility + "q>$uka|$virama;" + "k\u0331h\u0331>$ukha |$virama;" + "g\u0307> $ugha | $virama;" + "z > $ujha |$virama;" + "f > $ufa|$virama;" + + // dev "y\u0307>$uya|$virama;" "l\u0331>$ela|$virama;" "n\u0331>$ena|$virama;" @@ -213,7 +223,6 @@ translit_Latin_InterIndic { "d\u0323>$dda|$virama;" "kh>$kha|$virama;" "k>$ka|$virama;" - "q>$ka|$virama;" "gh>$gha|$virama;" "g>$ga|$virama;" "ch>$cha|$virama;" @@ -234,36 +243,37 @@ translit_Latin_InterIndic { "b>$ba|$virama;" "m>$ma|$virama;" "y>$ya|$virama;" + "r\u0331>$rra|$virama;" "r>$ra|$virama;" "l\u0323a>$lla;" "l>$la|$virama;" "v>$va|$virama;" - "f>$va|$virama;" "w>$va|$virama;" "sh>$sha|$virama;" "ss>$ssa|$virama;" "s\u0323>$ssa|$virama;" "s\u0301>$sha|$virama;" "s>$sa|$virama;" - "z>$sa|$virama;" "h>$ha|$virama;" "'.'>$danda;" "$danda'.'>$doubleDanda;" "$depVowelAbove{'~'>$anusvara;" "$depVowelBelow{'~'>$chandrabindu;" - "$virama aa>$aa;" +// convert to dependent forms after consonant with no vowel: +// e.g. kai -> {ka}{virama}ai -> {ka}{ai} + //$virama aa>$aa; "$virama a\u0304>$aa;" "$virama ai>$ai;" "$virama au>$au;" "$virama ii>$ii;" "$virama i\u0304>$ii;" "$virama i>$i;" - "$virama uu>$uu;" + //$virama uu>$uu; "$virama u\u0304>$uu;" "$virama u>$u;" - "$virama rrh>$rrh;" + //$virama rrh>$rrh; "$virama r\u0325\u0304>$rrh;" - "$virama rh>$rh;" + //$virama rh>$rh; "$virama r\u0325a>$rh;" "$virama r\u0325>$rh;" "$virama l\u0325\u0304>$llh;" @@ -276,22 +286,23 @@ translit_Latin_InterIndic { "$virama o\u0306>$co;" "$virama e>$se;" "$virama o>$so;" - "$virama''aa>$waa;" +// otherwise convert independent forms when separated by ': k'ai -> {ka}{virama}{wai} + //$virama''aa>$waa; "$virama''a\u0304>$waa;" "$virama''ai>$wai;" "$virama''au>$wau;" - "$virama''ii>$wii;" + //$virama''ii>$wii; "$virama''i\u0304>$wii;" "$virama''i>$wi;" - "$virama''uu>$wuu;" + //$virama''uu>$wuu; "$virama''u\u0304>$wuu;" "$virama''u>$wu;" - "$virama''rrh>$wrr;" + //$virama''rrh>$wrr; "$virama''r\u0325\u0304>$wrr;" - "$virama''rh>$wr;" + //$virama''rh>$wr; "$virama''r\u0325>$wr;" "$virama''l\u0325\u0304>$wll;" - "$virama''lh>$wl;" + //$virama''lh>$wl; "$virama''l\u0325>$wl;" "$virama''e\u0304>$we;" "$virama''o\u0304>$wo;" diff --git a/icu4c/data/translit_Malayalam_InterIndic.txt b/icu4c/data/translit_Malayalam_InterIndic.txt index 6fca3b1342..0818f1d055 100644 --- a/icu4c/data/translit_Malayalam_InterIndic.txt +++ b/icu4c/data/translit_Malayalam_InterIndic.txt @@ -6,7 +6,7 @@ // THIS IS A MACHINE-GENERATED FILE // Tool: dumpICUrules.bat // Source: ../../text/resources/Transliterator_Malayalam_InterIndic.txt -// Date: Thu Oct 25 22:17:22 2001 +// Date: Sat Nov 10 17:25:42 2001 //-------------------------------------------------------------------- // Malayalam_InterIndic diff --git a/icu4c/data/translit_Oriya_InterIndic.txt b/icu4c/data/translit_Oriya_InterIndic.txt index 7a5b247388..ca81e9acc4 100644 --- a/icu4c/data/translit_Oriya_InterIndic.txt +++ b/icu4c/data/translit_Oriya_InterIndic.txt @@ -6,7 +6,7 @@ // THIS IS A MACHINE-GENERATED FILE // Tool: dumpICUrules.bat // Source: ../../text/resources/Transliterator_Oriya_InterIndic.txt -// Date: Thu Oct 25 22:17:22 2001 +// Date: Sat Nov 10 17:25:42 2001 //-------------------------------------------------------------------- // Oriya_InterIndic diff --git a/icu4c/data/translit_Tamil_InterIndic.txt b/icu4c/data/translit_Tamil_InterIndic.txt index dbc2aee910..b64602ed87 100644 --- a/icu4c/data/translit_Tamil_InterIndic.txt +++ b/icu4c/data/translit_Tamil_InterIndic.txt @@ -6,7 +6,7 @@ // THIS IS A MACHINE-GENERATED FILE // Tool: dumpICUrules.bat // Source: ../../text/resources/Transliterator_Tamil_InterIndic.txt -// Date: Thu Oct 25 22:17:22 2001 +// Date: Sat Nov 10 17:25:42 2001 //-------------------------------------------------------------------- // Tamil_InterIndic @@ -31,6 +31,7 @@ translit_Tamil_InterIndic { //-------------------------------------------------------------------- // Tamil-InterIndic //:: NFD (NFC) ; +"\u0bc6\u0bd7>\ue04c;" "\u0b82>\ue002;" // SIGN ANUSVARA "\u0b83>\ue003;" // SIGN VISARGA "\u0b85>\ue005;" // LETTER A diff --git a/icu4c/data/translit_Telugu_InterIndic.txt b/icu4c/data/translit_Telugu_InterIndic.txt index e7cded3f0f..7308ab6b43 100644 --- a/icu4c/data/translit_Telugu_InterIndic.txt +++ b/icu4c/data/translit_Telugu_InterIndic.txt @@ -6,7 +6,7 @@ // THIS IS A MACHINE-GENERATED FILE // Tool: dumpICUrules.bat // Source: ../../text/resources/Transliterator_Telugu_InterIndic.txt -// Date: Thu Oct 25 22:17:22 2001 +// Date: Sat Nov 10 17:25:42 2001 //-------------------------------------------------------------------- // Telugu_InterIndic diff --git a/icu4c/source/data/translit/t_Beng_InterIndic.txt b/icu4c/source/data/translit/t_Beng_InterIndic.txt index 87f5f2848a..75ed4c6ab1 100644 --- a/icu4c/source/data/translit/t_Beng_InterIndic.txt +++ b/icu4c/source/data/translit/t_Beng_InterIndic.txt @@ -6,7 +6,7 @@ // THIS IS A MACHINE-GENERATED FILE // Tool: dumpICUrules.bat // Source: ../../text/resources/Transliterator_Bengali_InterIndic.txt -// Date: Thu Oct 25 22:17:21 2001 +// Date: Sat Nov 10 17:25:41 2001 //-------------------------------------------------------------------- // Bengali_InterIndic @@ -104,8 +104,8 @@ translit_Bengali_InterIndic { "\u09ed>\ue06d;" // DIGIT SEVEN "\u09ee>\ue06e;" // DIGIT EIGHT "\u09ef>\ue06f;" // DIGIT NINE -// \u09f0>; # UNMAPPED Bengali-InterIndic: LETTER RA WITH MIDDLE DIAGONAL -// \u09f1>; # UNMAPPED Bengali-InterIndic: LETTER RA WITH LOWER DIAGONAL +"\u09f0>\ue0f0;" // UNMAPPED Bengali-InterIndic: LETTER RA WITH MIDDLE DIAGONAL +"\u09f1>\ue0f1;" // UNMAPPED Bengali-InterIndic: LETTER RA WITH LOWER DIAGONAL // \u09f2>; # UNMAPPED Bengali-InterIndic: RUPEE MARK // \u09f3>; # UNMAPPED Bengali-InterIndic: RUPEE SIGN // \u09f4>; # UNMAPPED Bengali-InterIndic: CURRENCY NUMERATOR ONE diff --git a/icu4c/source/data/translit/t_Deva_InterIndic.txt b/icu4c/source/data/translit/t_Deva_InterIndic.txt index 9e05f58512..bdfd011671 100644 --- a/icu4c/source/data/translit/t_Deva_InterIndic.txt +++ b/icu4c/source/data/translit/t_Deva_InterIndic.txt @@ -6,7 +6,7 @@ // THIS IS A MACHINE-GENERATED FILE // Tool: dumpICUrules.bat // Source: ../../text/resources/Transliterator_Devanagari_InterIndic.txt -// Date: Thu Oct 25 22:17:21 2001 +// Date: Sat Nov 10 17:25:41 2001 //-------------------------------------------------------------------- // Devanagari_InterIndic @@ -50,11 +50,11 @@ translit_Devanagari_InterIndic { "\u090b>\ue00b;" // LETTER VOCALIC R "\u090c>\ue00c;" // LETTER VOCALIC L "\u090d>\ue00d;" // LETTER CANDRA E (For representing English sounds) -//\u090e>\ue00e; # UNMAPPED LETTER SHORT E(For Southern Scripts) + "\u090e>\ue00e;" // UNMAPPED LETTER SHORT E(For Southern Scripts) "\u090f>\ue00f;" // LETTER E "\u0910>\ue010;" // LETTER AI "\u0911>\ue011;" // LETTER CANDRA O (For representing English sounds) -//\u0912>\ue012; # UNMAPPED LETTER SHORT O (For Southern Scripts) + "\u0912>\ue012;" // UNMAPPED LETTER SHORT O (For Southern Scripts) "\u0913>\ue013;" // LETTER O "\u0914>\ue014;" // LETTER AU "\u0915>\ue015;" // LETTER KA @@ -85,10 +85,10 @@ translit_Devanagari_InterIndic { "\u092e>\ue02e;" // LETTER MA "\u092f>\ue02f;" // LETTER YA "\u0930>\ue030;" // LETTER RA - //\u0931>\ue031; # UNMAPPED LETTER RRA (Eyelash RA for Southern scripts) + "\u0931>\ue031;" // UNMAPPED LETTER RRA (Eyelash RA for Southern scripts) "\u0932>\ue032;" // LETTER LA "\u0933>\ue033;" // LETTER LLA - //\u0934>\ue034; # UNMAPPED LETTER LLLA (LLLA for Southern scripts) + "\u0934>\ue034;" // UNMAPPED LETTER LLLA (LLLA for Southern scripts) "\u0935>\ue035;" // LETTER VA "\u0936>\ue036;" // LETTER SHA "\u0937>\ue037;" // LETTER SSA @@ -104,11 +104,11 @@ translit_Devanagari_InterIndic { "\u0943>\ue043;" // VOWEL SIGN VOCALIC R "\u0944>\ue044;" // VOWEL SIGN VOCALIC RR "\u0945>\ue045;" // VOWEL SIGN CANDRA E -//\u0946>\ue046; # UNMAPPED VOWEL SIGN SHORT E + "\u0946>\ue046;" // UNMAPPED VOWEL SIGN SHORT E "\u0947>\ue047;" // VOWEL SIGN E "\u0948>\ue048;" // VOWEL SIGN AI "\u0949>\ue049;" // VOWEL SIGN CANDRA O -//\u094a>\ue04a; # UNMAPPED VOWEL SIGN SHORT O + "\u094a>\ue04a;" // UNMAPPED VOWEL SIGN SHORT O "\u094b>\ue04b;" // VOWEL SIGN O "\u094c>\ue04c;" // VOWEL SIGN AU "\u094d>\ue04d;" // SIGN VIRAMA diff --git a/icu4c/source/data/translit/t_Gujr_InterIndic.txt b/icu4c/source/data/translit/t_Gujr_InterIndic.txt index 4a9caaee81..1de781768a 100644 --- a/icu4c/source/data/translit/t_Gujr_InterIndic.txt +++ b/icu4c/source/data/translit/t_Gujr_InterIndic.txt @@ -6,7 +6,7 @@ // THIS IS A MACHINE-GENERATED FILE // Tool: dumpICUrules.bat // Source: ../../text/resources/Transliterator_Gujarati_InterIndic.txt -// Date: Thu Oct 25 22:17:21 2001 +// Date: Sat Nov 10 17:25:41 2001 //-------------------------------------------------------------------- // Gujarati_InterIndic @@ -41,10 +41,10 @@ translit_Gujarati_InterIndic { "\u0a89>\ue009;" // LETTER U "\u0a8a>\ue00a;" // LETTER UU "\u0a8b>\ue00b;" // LETTER VOCALIC R -// \u0a8d>; # UNMAPPED Gujarati-InterIndic: VOWEL CANDRA E +"\u0a8d>\ue00d;" // UNMAPPED Gujarati-InterIndic: VOWEL CANDRA E "\u0a8f>\ue00f;" // LETTER E "\u0a90>\ue010;" // LETTER AI -// \u0a91>; # UNMAPPED Gujarati-InterIndic: VOWEL CANDRA O +"\u0a91>\ue011;" // UNMAPPED Gujarati-InterIndic: VOWEL CANDRA O "\u0a93>\ue013;" // LETTER O "\u0a94>\ue014;" // LETTER AU "\u0a95>\ue015;" // LETTER KA diff --git a/icu4c/source/data/translit/t_Guru_InterIndic.txt b/icu4c/source/data/translit/t_Guru_InterIndic.txt index 7033480bca..c9b3748c07 100644 --- a/icu4c/source/data/translit/t_Guru_InterIndic.txt +++ b/icu4c/source/data/translit/t_Guru_InterIndic.txt @@ -6,7 +6,7 @@ // THIS IS A MACHINE-GENERATED FILE // Tool: dumpICUrules.bat // Source: ../../text/resources/Transliterator_Gurmukhi_InterIndic.txt -// Date: Thu Oct 25 22:17:21 2001 +// Date: Sat Nov 10 17:25:41 2001 //-------------------------------------------------------------------- // Gurmukhi_InterIndic @@ -101,11 +101,11 @@ translit_Gurmukhi_InterIndic { "\u0a6d>\ue06d;" // DIGIT SEVEN "\u0a6e>\ue06e;" // DIGIT EIGHT "\u0a6f>\ue06f;" // DIGIT NINE -// \u0a70>; # UNMAPPED Gurmukhi-InterIndic: TIPPI -// \u0a71>; # UNMAPPED Gurmukhi-InterIndic: ADDAK -// \u0a72>; # UNMAPPED Gurmukhi-InterIndic: IRI -// \u0a73>; # UNMAPPED Gurmukhi-InterIndic: URA -// \u0a74>; # UNMAPPED Gurmukhi-InterIndic: EK ONKAR +"\u0a70>\ue070;" // TIPPI +"\u0a71>\ue071;" // ADDAK +"\u0a72>\ue072;" // IRI +"\u0a73>\ue073;" // URA +"\u0a74>\ue074;" // EK ONKAR // :: NFC (NFD) ; // eof } diff --git a/icu4c/source/data/translit/t_InterIndic_Beng.txt b/icu4c/source/data/translit/t_InterIndic_Beng.txt index 261ca20648..d50509dfd5 100644 --- a/icu4c/source/data/translit/t_InterIndic_Beng.txt +++ b/icu4c/source/data/translit/t_InterIndic_Beng.txt @@ -6,7 +6,7 @@ // THIS IS A MACHINE-GENERATED FILE // Tool: dumpICUrules.bat // Source: ../../text/resources/Transliterator_InterIndic_Bengali.txt -// Date: Thu Oct 25 22:17:21 2001 +// Date: Sat Nov 10 17:25:41 2001 //-------------------------------------------------------------------- // InterIndic_Bengali @@ -42,9 +42,13 @@ translit_InterIndic_Bengali { "\ue00a>\u098a;" // LETTER UU "\ue00b>\u098b;" // LETTER VOCALIC R "\ue00c>\u098c;" // LETTER VOCALIC L -// \ue00f>; # UNMAPPED InterIndic-Bengali: LETTER EE (\u098f = LETTER E) +"\ue00d>\u098f;" // FALLBACK +"\ue00e>\u098f;" // FALLBACK +"\ue00f>\u098f;" // LETTER E "\ue010>\u0990;" // LETTER AI -// \ue013>; # UNMAPPED InterIndic-Bengali: LETTER OO (\u0993 = LETTER O) +"\ue011>\u0993;" // FALLBACK +"\ue012>\u0993;" // FALLBACK +"\ue013>\u0993;" // LETTER O "\ue014>\u0994;" // LETTER AU "\ue015>\u0995;" // LETTER KA "\ue016>\u0996;" // LETTER KHA @@ -74,6 +78,7 @@ translit_InterIndic_Bengali { "\ue02e>\u09ae;" // LETTER MA "\ue02f>\u09af;" // LETTER YA "\ue030>\u09b0;" // LETTER RA +"\ue031>\u09b0\u09bc;" // FALLBACK to RA "\ue032>\u09b2;" // LETTER LA "\ue033>\u09b2;" // REMAP (indicExceptions.txt): \u09b3>\u09b2 = LETTER LLA>LETTER LA "\ue034>\u09b2;" // REMAP (indicExceptions.txt): \u09b4>\u09b2 = LETTER LLLA>LETTER LA @@ -83,7 +88,7 @@ translit_InterIndic_Bengali { "\ue038>\u09b8;" // LETTER SA "\ue039>\u09b9;" // LETTER HA "\ue03c>\u09bc;" // SIGN NUKTA -// \ue03d>; # UNMAPPED InterIndic-Bengali: SIGN AVAGRAHA +"\ue03d>;" // FALLBACK BLOW AWAY AVAGRAHA "\ue03e>\u09be;" // VOWEL SIGN AA "\ue03f>\u09bf;" // VOWEL SIGN I "\ue040>\u09c0;" // VOWEL SIGN II @@ -92,19 +97,23 @@ translit_InterIndic_Bengali { "\ue043>\u09c3;" // VOWEL SIGN VOCALIC R "\ue044>\u09c4;" // VOWEL SIGN VOCALIC RR "\ue045>\u09c7;" // REMAP (indicExceptions.txt): \u09c5>\u09c7 = VOWEL SIGN CANDRA E>VOWEL SIGN E -// \ue047>; # UNMAPPED InterIndic-Bengali: VOWEL SIGN EE (\u09c7 = VOWEL SIGN E) +"\ue046>\u09c7;" // FALLBACK +"\ue047>\u09c7;" // VOWEL SIGN E "\ue048>\u09c8;" // VOWEL SIGN AI "\ue049>\u09cb;" // REMAP (indicExceptions.txt): \u09c9>\u09cb = VOWEL SIGN CANDRA O>VOWEL SIGN O -// \ue04b>; # UNMAPPED InterIndic-Bengali: VOWEL SIGN OO (\u09cb = VOWEL SIGN O) +"\ue04a>\u09cb;" // FALLBACK +"\ue04b>\u09cb;" // VOWEL SIGN O "\ue04c>\u09cc;" // VOWEL SIGN AU "\ue04d>\u09cd;" // SIGN VIRAMA -// \ue050>; # UNMAPPED InterIndic-Bengali: OM +"\ue050>\u0993\u0982;" // InterIndic-Bengali: OM // \ue055>; # UNMAPPED InterIndic-Bengali: LENGTH MARK "\ue056>\u09c8;" // REMAP (indicExceptions.txt): \u09d6>\u09c8 = AI LENGTH MARK>VOWEL SIGN AI "\ue057>\u09d7;" // AU LENGTH MARK +"\ue058>\u0995;" // FALLBACK "\ue059>\u0996;" // REMAP (indicExceptions.txt): \u09d9>\u0996 = LETTER KHHA>LETTER KHA "\ue05a>\u0997;" // REMAP (indicExceptions.txt): \u09da>\u0997 = LETTER GHHA>LETTER GA "\ue05b>\u099c;" // REMAP (indicExceptions.txt): \u09db>\u099c = LETTER ZA>LETTER JA +"\ue05c>\u09Dc;" // FALLBACK "\ue05d>\u09dd;" // LETTER RHA "\ue05e>\u09ab;" // REMAP (indicExceptions.txt): \u09de>\u09ab = LETTER FA>LETTER PHA "\ue05f>\u09df;" // LETTER YYA @@ -123,11 +132,9 @@ translit_InterIndic_Bengali { "\ue06e>\u09ee;" // DIGIT EIGHT "\ue06f>\u09ef;" // DIGIT NINE "\ue0fa>\u09fa;" // ISSHAR -"\ue00f>\u098f;" // LETTER E -"\ue013>\u0993;" // LETTER O -"\ue031>\u09dc;" // LETTER RRA -"\ue047>\u09c7;" // VOWEL SIGN E -"\ue04b>\u09cb;" // VOWEL SIGN O + + + // :: NFC (NFD) ; // eof } diff --git a/icu4c/source/data/translit/t_InterIndic_Deva.txt b/icu4c/source/data/translit/t_InterIndic_Deva.txt index a38a82090d..140c6bd01a 100644 --- a/icu4c/source/data/translit/t_InterIndic_Deva.txt +++ b/icu4c/source/data/translit/t_InterIndic_Deva.txt @@ -6,7 +6,7 @@ // THIS IS A MACHINE-GENERATED FILE // Tool: dumpICUrules.bat // Source: ../../text/resources/Transliterator_InterIndic_Devanagari.txt -// Date: Thu Oct 25 22:17:21 2001 +// Date: Sat Nov 10 17:25:41 2001 //-------------------------------------------------------------------- // InterIndic_Devanagari @@ -43,6 +43,25 @@ translit_InterIndic_Devanagari { "\ue022\ue03c > \u095d;" //\ue05d LETTER RHA (pronounced RRHA) "\ue02b\ue03c > \u095e;" //\ue05e LETTER FA "\ue02f\ue03c > \u095f;" //\ue05f LETTER YYA + + //Decomposed compatibility transliterations + "\ue012\ue057>\u0914;" // FALLBACK FOR TAMIL AU + "\u0030 > \u0966;" // FALLBACK FOR TAMIL + + "\ue046\ue056 > \u0948;" // FALLBACK FOR TELUGU + "\ue055>;" // FALLBACK BLOW AWAY KANNADA AND TELUGU LENGTH MARK + "\ue056>;" // FALLBACK BLOW AWAY KANNADA AND TELUGU AI LENGTH MARK + "\ue057>;" // FALLBACK BLOW AWAY TAMIL AU LENGTH MARK + "\ue070>;" // FALLBACK BLOW AWAY GURMUKHI + "\ue071>;" // FALLBACK BLOW AWAY GURMUKHI + "\ue072>;" // FALLBACK BLOW AWAY GURMUKHI + "\ue073>;" // FALLBACK BLOW AWAY GURMUKHI + "\ue074>;" // FALLBACK BLOW AWAY GURMUKHI + + //Decomposed compatibility transliterations + "\ue032\ue03C > \u0933;" // FALLBACK FOR GURMUKHI + "\ue038\ue03c > \u0936;" // FALLBACK FOR GURMUKHI + "\ue001 > \u0901;" // SIGN CANDRABINDU "\ue002 > \u0902;" // SIGN ANUSVARA "\ue003 > \u0903;" // SIGN VISARGA @@ -55,13 +74,13 @@ translit_InterIndic_Devanagari { "\ue00b > \u090b;" // LETTER VOCALIC R "\ue00c > \u090c;" // LETTER VOCALIC L "\ue00d > \u090d;" // LETTER CANDRA E (For representing English sounds) -//\ue00e > \u090e; # UNMAPPED LETTER SHORT E(For Southern Scripts) - "\ue00e > \u090f;" + "\ue00e > \u090e;" // UNMAPPED LETTER SHORT E(For Southern Scripts) + //\ue00e > \u090f; "\ue00f > \u090f;" // LETTER E "\ue010 > \u0910;" // LETTER AI "\ue011 > \u0911;" // LETTER CANDRA O (For representing English sounds) -//\ue012 > \u0912; # UNMAPPED LETTER SHORT O (For Southern Scripts) - "\ue012 > \u0913;" + "\ue012 > \u0912;" // UNMAPPED LETTER SHORT O (For Southern Scripts) + //\ue012 > \u0913; "\ue013 > \u0913;" // LETTER O "\ue014 > \u0914;" // LETTER AU "\ue015 > \u0915;" // LETTER KA @@ -92,12 +111,12 @@ translit_InterIndic_Devanagari { "\ue02e > \u092e;" // LETTER MA "\ue02f > \u092f;" // LETTER YA "\ue030 > \u0930;" // LETTER RA - //\ue031 > \u0931; # LETTER RRA (Eyelash RA for Southern scripts) - "\ue031 > \u0930;" + "\ue031 > \u0931;" // LETTER RRA (Eyelash RA for Southern scripts) + //\ue031 > \u0930; "\ue032 > \u0932;" // LETTER LA "\ue033 > \u0933;" // LETTER LLA - //\ue034 > \u0934; # LETTER LLLA (LLLA for Southern scripts) - "\ue034 > \u0933;" + "\ue034 > \u0934;" // LETTER LLLA (LLLA for Southern scripts) + //\ue034 > \u0933; "\ue035 > \u0935;" // LETTER VA "\ue036 > \u0936;" // LETTER SHA "\ue037 > \u0937;" // LETTER SSA @@ -113,13 +132,13 @@ translit_InterIndic_Devanagari { "\ue043 > \u0943;" // VOWEL SIGN VOCALIC R "\ue044 > \u0944;" // VOWEL SIGN VOCALIC RR "\ue045 > \u0945;" // VOWEL SIGN CANDRA E -//\ue046 > \u0946; # UNMAPPED VOWEL SIGN SHORT E - "\ue046 > \u0947;" + "\ue046 > \u0946;" // UNMAPPED VOWEL SIGN SHORT E + //\ue046 > \u0947; "\ue047 > \u0947;" // VOWEL SIGN E "\ue048 > \u0948;" // VOWEL SIGN AI "\ue049 > \u0949;" // VOWEL SIGN CANDRA O -//\ue04a > \u094a; # UNMAPPED VOWEL SIGN SHORT O - "\ue04a > \u094b;" + "\ue04a > \u094a;" // UNMAPPED VOWEL SIGN SHORT O + //\ue04a > \u094b; "\ue04b > \u094b;" // VOWEL SIGN O "\ue04c > \u094c;" // VOWEL SIGN AU "\ue04d > \u094d;" // SIGN VIRAMA @@ -140,8 +159,8 @@ translit_InterIndic_Devanagari { "\ue061 > \u0961;" // LETTER VOCALIC LL "\ue062 > \u0962;" // VOWEL SIGN VOCALIC L "\ue063 > \u0963;" // VOWEL SIGN VOCALIC LL -// > ; \u0964 # UNMAPPED Devanagari-InterIndic: DANDA -// > ; \u0965 # UNMAPPED Devanagari-InterIndic: DOUBLE DANDA + "\ue064 > \u0964;" // UNMAPPED Devanagari-InterIndic: DANDA + "\ue065 > \u0965;" // UNMAPPED Devanagari-InterIndic: DOUBLE DANDA "\ue066 > \u0966;" // DIGIT ZERO "\ue067 > \u0967;" // DIGIT ONE "\ue068 > \u0968;" // DIGIT TWO @@ -152,6 +171,10 @@ translit_InterIndic_Devanagari { "\ue06d > \u096d;" // DIGIT SEVEN "\ue06e > \u096e;" // DIGIT EIGHT "\ue06f > \u096f;" // DIGIT NINE + "\ue0f0 > \u0930;" // FALLBACK RA + "\ue0f1 > \u0930;" // FALLBACK RA + + // \u0970 # UNMAPPED Devanagari-InterIndic: ABBREVIATION SIGN // :: NFC; // eof diff --git a/icu4c/source/data/translit/t_InterIndic_Gujr.txt b/icu4c/source/data/translit/t_InterIndic_Gujr.txt index 25789ce7e1..142c2f092d 100644 --- a/icu4c/source/data/translit/t_InterIndic_Gujr.txt +++ b/icu4c/source/data/translit/t_InterIndic_Gujr.txt @@ -6,7 +6,7 @@ // THIS IS A MACHINE-GENERATED FILE // Tool: dumpICUrules.bat // Source: ../../text/resources/Transliterator_InterIndic_Gujarati.txt -// Date: Thu Oct 25 22:17:21 2001 +// Date: Sat Nov 10 17:25:41 2001 //-------------------------------------------------------------------- // InterIndic_Gujarati @@ -42,9 +42,13 @@ translit_InterIndic_Gujarati { "\ue00a>\u0a8a;" // LETTER UU "\ue00b>\u0a8b;" // LETTER VOCALIC R "\ue00c>\u0ab2\u0ac3;" // REMAP (indicExceptions.txt): \u0a8c>\u0ab2\u0ac3 = LETTER VOCALIC L>LETTER LA.VOWEL SIGN VOCALIC R -// \ue00f>; # UNMAPPED InterIndic-Gujarati: LETTER EE (\u0a8f = LETTER E) +"\ue00d>\u0a8d;" // GUJARATI VOWEL CANDRA E +"\ue00e>\u0a8f;" // FALLBACK +"\ue00f>\u0a8f;" // InterIndic-Gujarati: LETTER EE (\u0a8f = LETTER E) "\ue010>\u0a90;" // LETTER AI -// \ue013>; # UNMAPPED InterIndic-Gujarati: LETTER OO (\u0a93 = LETTER O) +"\ue011>\u0a91;" // FALLBACK +"\ue012>\u0a93;" // FALLBACK +"\ue013>\u0a93;" // UNMAPPED InterIndic-Gujarati: LETTER OO (\u0a93 = LETTER O) "\ue014>\u0a94;" // LETTER AU "\ue015>\u0a95;" // LETTER KA "\ue016>\u0a96;" // LETTER KHA @@ -66,7 +70,7 @@ translit_InterIndic_Gujarati { "\ue026>\u0aa6;" // LETTER DA "\ue027>\u0aa7;" // LETTER DHA "\ue028>\u0aa8;" // LETTER NA -"\ue029>\u0aa8;" // REMAP (indicExceptions.txt): \u0aa9>\u0aa8 = LETTER NNNA>LETTER NA +"\ue029>\u0aa8\u0abc;" // FALLBACK to NA+NUKTA "\ue02a>\u0aaa;" // LETTER PA "\ue02b>\u0aab;" // LETTER PHA "\ue02c>\u0aac;" // LETTER BA @@ -74,9 +78,10 @@ translit_InterIndic_Gujarati { "\ue02e>\u0aae;" // LETTER MA "\ue02f>\u0aaf;" // LETTER YA "\ue030>\u0ab0;" // LETTER RA +"\ue031>\u0ab0\u0abc;" // FALLBACK "\ue032>\u0ab2;" // LETTER LA "\ue033>\u0ab3;" // LETTER LLA -"\ue034>\u0ab3;" // REMAP (indicExceptions.txt): \u0ab4>\u0ab3 = LETTER LLLA>LETTER LLA +"\ue034>\u0ab3\u0abc;" // LETTER LLLA>LETTER LLA+NUKTA "\ue035>\u0ab5;" // LETTER VA "\ue036>\u0ab6;" // LETTER SHA "\ue037>\u0ab7;" // LETTER SSA @@ -92,19 +97,23 @@ translit_InterIndic_Gujarati { "\ue043>\u0ac3;" // VOWEL SIGN VOCALIC R "\ue044>\u0ac4;" // VOWEL SIGN VOCALIC RR "\ue045>\u0ac5;" // VOWEL SIGN CANDRA E -// \ue047>; # UNMAPPED InterIndic-Gujarati: VOWEL SIGN EE (\u0ac7 = VOWEL SIGN E) +"\ue046>\u0ac7;" // FALLBACK +"\ue047>\u0ac7;" // InterIndic-Gujarati: VOWEL SIGN EE (\u0ac7 = VOWEL SIGN E) "\ue048>\u0ac8;" // VOWEL SIGN AI "\ue049>\u0ac9;" // VOWEL SIGN CANDRA O -// \ue04b>; # UNMAPPED InterIndic-Gujarati: VOWEL SIGN OO (\u0acb = VOWEL SIGN O) +"\ue04a>\u0acb;" // FALLBACK +"\ue04b>\u0acb;" // UNMAPPED InterIndic-Gujarati: VOWEL SIGN OO (\u0acb = VOWEL SIGN O) "\ue04c>\u0acc;" // VOWEL SIGN AU "\ue04d>\u0acd;" // SIGN VIRAMA "\ue050>\u0ad0;" // OM -// \ue055>; # UNMAPPED InterIndic-Gujarati: LENGTH MARK +//\ue055>; # UNMAPPED InterIndic-Gujarati: LENGTH MARK "\ue056>\u0ac8;" // REMAP (indicExceptions.txt): \u0ad6>\u0ac8 = AI LENGTH MARK>VOWEL SIGN AI "\ue057>\u0acc;" // REMAP (indicExceptions.txt): \u0ad7>\u0acc = AU LENGTH MARK>VOWEL SIGN AU +"\ue058>\u0a95\u0abc;" // FALLBACK "\ue059>\u0a96\u0abc;" // REMAP (indicExceptions.txt): \u0ad9>\u0a96\u0abc = LETTER KHHA>LETTER KHA.SIGN NUKTA "\ue05a>\u0a97\u0abc;" // REMAP (indicExceptions.txt): \u0ada>\u0a97\u0abc = LETTER GHHA>LETTER GA.SIGN NUKTA "\ue05b>\u0a9c\u0abc;" // REMAP (indicExceptions.txt): \u0adb>\u0a9c\u0abc = LETTER ZA>LETTER JA.SIGN NUKTA +"\ue05c>\u0aa1\u0abc;" // FALLBACK "\ue05d>\u0aa2\u0abc;" // REMAP (indicExceptions.txt): \u0add>\u0aa2\u0abc = LETTER RHA>LETTER DDHA.SIGN NUKTA "\ue05e>\u0aab\u0abc;" // REMAP (indicExceptions.txt): \u0ade>\u0aab\u0abc = LETTER FA>LETTER PHA.SIGN NUKTA "\ue05f>\u0aaf\u0abc;" // REMAP (indicExceptions.txt): \u0adf>\u0aaf\u0abc = LETTER YYA>LETTER YA.SIGN NUKTA @@ -122,12 +131,7 @@ translit_InterIndic_Gujarati { "\ue06d>\u0aed;" // DIGIT SEVEN "\ue06e>\u0aee;" // DIGIT EIGHT "\ue06f>\u0aef;" // DIGIT NINE -// \ue080>; # UNMAPPED InterIndic-Gujarati: ISSHAR -"\ue00f>\u0a8f;" // LETTER E -"\ue013>\u0a93;" // LETTER O -// \ue083>; # UNMAPPED InterIndic-Gujarati: LETTER RRA (\u0a83 = SIGN VISARGA) -"\ue047>\u0ac7;" // VOWEL SIGN E -"\ue04b>\u0acb;" // VOWEL SIGN O +//\ue080>; # UNMAPPED InterIndic-Gujarati: ISSHAR // :: NFC (NFD) ; // eof } diff --git a/icu4c/source/data/translit/t_InterIndic_Guru.txt b/icu4c/source/data/translit/t_InterIndic_Guru.txt index e6efc08469..a01bc99e15 100644 --- a/icu4c/source/data/translit/t_InterIndic_Guru.txt +++ b/icu4c/source/data/translit/t_InterIndic_Guru.txt @@ -6,7 +6,7 @@ // THIS IS A MACHINE-GENERATED FILE // Tool: dumpICUrules.bat // Source: ../../text/resources/Transliterator_InterIndic_Gurmukhi.txt -// Date: Thu Oct 25 22:17:21 2001 +// Date: Sat Nov 10 17:25:41 2001 //-------------------------------------------------------------------- // InterIndic_Gurmukhi @@ -32,8 +32,8 @@ translit_InterIndic_Gurmukhi { // InterIndic-Gurmukhi //:: NFD (NFC) ; "\ue001>\u0a02;" // REMAP (indicExceptions.txt): \u0a01>\u0a02 = SIGN CANDRABINDU>SIGN BINDI -// \ue002>; # UNMAPPED InterIndic-Gurmukhi: SIGN ANUSVARA (\u0a02 = SIGN BINDI) -// \ue003>; # UNMAPPED InterIndic-Gurmukhi: SIGN VISARGA +"\ue002>;" // FALLBACK BLOW AWAY SIGN ANUSVARA (\u0a02 = SIGN BINDI) +"\ue003>;" // FALLBACK BLOW AWAY SIGN VISARGA "\ue005>\u0a05;" // LETTER A "\ue006>\u0a06;" // LETTER AA "\ue007>\u0a07;" // LETTER I @@ -41,9 +41,13 @@ translit_InterIndic_Gurmukhi { "\ue009>\u0a09;" // LETTER U "\ue00a>\u0a0a;" // LETTER UU "\ue00b>\u0a30\u0a3f;" // REMAP (indicExceptions.txt): \u0a0b>\u0a30\u0a3f = LETTER VOCALIC R>LETTER RA.VOWEL SIGN I -"\ue00c>\u0a07;" // REMAP (indicExceptions.txt): \u0a0c>\u0a07 = LETTER VOCALIC L>LETTER I +"\ue00c>\u0a33;" // FALLBACK +"\ue00d>\u0a0f;" // FALLBACK +"\ue00e>\u0a0f;" // FALLBACK "\ue00f>\u0a0f;" // LETTER EE "\ue010>\u0a10;" // LETTER AI +"\ue011>\u0a13;" // FALLBACK +"\ue012>\u0a13;" // FALLBACK "\ue013>\u0a13;" // LETTER OO "\ue014>\u0a14;" // LETTER AU "\ue015>\u0a15;" // LETTER KA @@ -66,7 +70,7 @@ translit_InterIndic_Gurmukhi { "\ue026>\u0a26;" // LETTER DA "\ue027>\u0a27;" // LETTER DHA "\ue028>\u0a28;" // LETTER NA -"\ue029>\u0a28;" // REMAP (indicExceptions.txt): \u0a29>\u0a28 = LETTER NNNA>LETTER NA +"\ue029>\u0a28\u0a3c;" // REMAP (indicExceptions.txt): \u0a29>\u0a28 = LETTER NNNA>LETTER NA "\ue02a>\u0a2a;" // LETTER PA "\ue02b>\u0a2b;" // LETTER PHA "\ue02c>\u0a2c;" // LETTER BA @@ -74,6 +78,7 @@ translit_InterIndic_Gurmukhi { "\ue02e>\u0a2e;" // LETTER MA "\ue02f>\u0a2f;" // LETTER YA "\ue030>\u0a30;" // LETTER RA +"\ue031>\u0a30\u0a3c;" // FALLBACK LETTER RA+NUKTA "\ue032>\u0a32;" // LETTER LA "\ue033>\u0a33;" // LETTER LLA "\ue034>\u0a33;" // REMAP (indicExceptions.txt): \u0a34>\u0a33 = LETTER LLLA>LETTER LLA @@ -83,33 +88,37 @@ translit_InterIndic_Gurmukhi { "\ue038>\u0a38;" // LETTER SA "\ue039>\u0a39;" // LETTER HA "\ue03c>\u0a3c;" // SIGN NUKTA -// \ue03d>; # UNMAPPED InterIndic-Gurmukhi: SIGN AVAGRAHA +"\ue03d>;" // FALLBACK BLOW AWAY SIGN AVAGRAHA "\ue03e>\u0a3e;" // VOWEL SIGN AA "\ue03f>\u0a3f;" // VOWEL SIGN I "\ue040>\u0a40;" // VOWEL SIGN II "\ue041>\u0a41;" // VOWEL SIGN U "\ue042>\u0a42;" // VOWEL SIGN UU -// \ue043>; # UNMAPPED InterIndic-Gurmukhi: VOWEL SIGN VOCALIC R -// \ue044>; # UNMAPPED InterIndic-Gurmukhi: VOWEL SIGN VOCALIC RR +"\ue043>;" // FALLBACK BLOW AWAY VOWEL SIGN VOCALIC R +"\ue044>;" // FALLBACK BLOW AWAY VOWEL SIGN VOCALIC RR "\ue045>\u0a48;" // REMAP (indicExceptions.txt): \u0a45>\u0a48 = VOWEL SIGN CANDRA E>VOWEL SIGN AI +"\ue046>\u0a47;" // FALLABCK "\ue047>\u0a47;" // VOWEL SIGN EE "\ue048>\u0a48;" // VOWEL SIGN AI "\ue049>\u0a4c;" // REMAP (indicExceptions.txt): \u0a49>\u0a4c = VOWEL SIGN CANDRA O>VOWEL SIGN AU +"\ue04a>\u0a4b;" // FALLBACK "\ue04b>\u0a4b;" // VOWEL SIGN OO "\ue04c>\u0a4c;" // VOWEL SIGN AU "\ue04d>\u0a4d;" // SIGN VIRAMA -// \ue050>; # UNMAPPED InterIndic-Gurmukhi: OM -// \ue055>; # UNMAPPED InterIndic-Gurmukhi: LENGTH MARK +"\ue050>\u0a0f\u0a02;" // FALLBACK to OO+BINDI : OM +"\ue055>;" // FALLBACK BLOW AWAY LENGTH MARK "\ue056>\u0a48;" // REMAP (indicExceptions.txt): \u0a56>\u0a48 = AI LENGTH MARK>VOWEL SIGN AI "\ue057>\u0a4c;" // REMAP (indicExceptions.txt): \u0a57>\u0a4c = AU LENGTH MARK>VOWEL SIGN AU +"\ue058>\u0a15\u0a3c;" // FALLBACK RA+ NUKTA "\ue059>\u0a59;" // LETTER KHHA "\ue05a>\u0a5a;" // LETTER GHHA "\ue05b>\u0a5b;" // LETTER ZA +"\ue05c>\u0a5c;" // LETTER RRA "\ue05d>\u0a22\u0a3c;" // REMAP (indicExceptions.txt): \u0a5d>\u0a22\u0a3c = LETTER RHA>LETTER DDHA.SIGN NUKTA "\ue05e>\u0a5e;" // LETTER FA -"\ue05f>\u0a2f;" // REMAP (indicExceptions.txt): \u0a5f>\u0a2f = LETTER YYA>LETTER YA +"\ue05f>\u0a2f\u0a3c;" // REMAP (indicExceptions.txt): \u0a5f>\u0a2f = LETTER YYA>LETTER YA "\ue060>\u0a30\u0a3f;" // REMAP (indicExceptions.txt): \u0a60>\u0a30\u0a3f = LETTER VOCALIC RR>LETTER RA.VOWEL SIGN I -"\ue061>\u0a08\u0a3c;" // REMAP (indicExceptions.txt): \u0a61>\u0a08\u0a3c = LETTER VOCALIC LL>LETTER II.SIGN NUKTA +"\ue061>\u0a33\u0a3c;" // "\ue062>\u0a3f\u0a3c;" // REMAP (indicExceptions.txt): \u0a62>\u0a3f\u0a3c = VOWEL SIGN VOCALIC L>VOWEL SIGN I.SIGN NUKTA "\ue063>\u0a40\u0a3c;" // REMAP (indicExceptions.txt): \u0a63>\u0a40\u0a3c = VOWEL SIGN VOCALIC LL>VOWEL SIGN II.SIGN NUKTA "\ue066>\u0a66;" // DIGIT ZERO @@ -122,12 +131,16 @@ translit_InterIndic_Gurmukhi { "\ue06d>\u0a6d;" // DIGIT SEVEN "\ue06e>\u0a6e;" // DIGIT EIGHT "\ue06f>\u0a6f;" // DIGIT NINE -// \ue080>; # UNMAPPED InterIndic-Gurmukhi: ISSHAR -// \ue081>; # UNMAPPED InterIndic-Gurmukhi: LETTER E -// \ue082>; # UNMAPPED InterIndic-Gurmukhi: LETTER O (\u0a02 = SIGN BINDI) -"\ue05c>\u0a5c;" // LETTER RRA -// \ue084>; # UNMAPPED InterIndic-Gurmukhi: VOWEL SIGN E -// \ue085>; # UNMAPPED InterIndic-Gurmukhi: VOWEL SIGN O (\u0a05 = LETTER A) +"\ue070>\u0a70;" // TIPPI +"\ue071>\u0a71;" // ADDAK +"\ue072>\u0a72;" // IRI +"\ue073>\u0a73;" // URA +"\ue074>\u0a74;" // EK ONKAR +"\ue080>;" // FALLBACK BLOW AWAY ISSHAR +"\ue081>;" // FALLBACK BLOW AWAY LETTER E +"\ue082>;" // FALLBACK BLOW AWAY LETTER O (\u0a02 = SIGN BINDI) +"\ue084>;" // FALLBACK BLOW AWAY VOWEL SIGN E +"\ue085>;" // FALLBACK BLOW AWAY VOWEL SIGN O (\u0a05 = LETTER A) // :: NFC (NFD) ; // eof } diff --git a/icu4c/source/data/translit/t_InterIndic_Knda.txt b/icu4c/source/data/translit/t_InterIndic_Knda.txt index d70621e63a..81c3e3347d 100644 --- a/icu4c/source/data/translit/t_InterIndic_Knda.txt +++ b/icu4c/source/data/translit/t_InterIndic_Knda.txt @@ -6,7 +6,7 @@ // THIS IS A MACHINE-GENERATED FILE // Tool: dumpICUrules.bat // Source: ../../text/resources/Transliterator_InterIndic_Kannada.txt -// Date: Thu Oct 25 22:17:21 2001 +// Date: Sat Nov 10 17:25:41 2001 //-------------------------------------------------------------------- // InterIndic_Kannada @@ -42,8 +42,12 @@ translit_InterIndic_Kannada { "\ue00a>\u0c8a;" // LETTER UU "\ue00b>\u0c8b;" // LETTER VOCALIC R "\ue00c>\u0c8c;" // LETTER VOCALIC L +"\ue00d>\u0c8e;" // LETTER E +"\ue00e>\u0c8e;" // FALLBACK "\ue00f>\u0c8f;" // LETTER EE "\ue010>\u0c90;" // LETTER AI +"\ue011>\u0c92;" // FALLBACK +"\ue012>\u0c92;" // LETTER O "\ue013>\u0c93;" // LETTER OO "\ue014>\u0c94;" // LETTER AU "\ue015>\u0c95;" // LETTER KA @@ -74,6 +78,7 @@ translit_InterIndic_Kannada { "\ue02e>\u0cae;" // LETTER MA "\ue02f>\u0caf;" // LETTER YA "\ue030>\u0cb0;" // LETTER RA +"\ue031>\u0cb1;" // LETTER RRA "\ue032>\u0cb2;" // LETTER LA "\ue033>\u0cb3;" // LETTER LLA "\ue034>\u0cb3;" // REMAP (indicExceptions.txt): \u0cb4>\u0cb3 = LETTER LLLA>LETTER LLA @@ -82,8 +87,10 @@ translit_InterIndic_Kannada { "\ue037>\u0cb7;" // LETTER SSA "\ue038>\u0cb8;" // LETTER SA "\ue039>\u0cb9;" // LETTER HA -// \ue03c>; # UNMAPPED InterIndic-Kannada: SIGN NUKTA -// \ue03d>; # UNMAPPED InterIndic-Kannada: SIGN AVAGRAHA + +"\ue03c>;" // FALLBACK BLOW AWAY NUKTA +"\ue03d>;" // FALLBACK BLOW AWAY AVAGRAHA + "\ue03e>\u0cbe;" // VOWEL SIGN AA "\ue03f>\u0cbf;" // VOWEL SIGN I "\ue040>\u0cc0;" // VOWEL SIGN II @@ -92,9 +99,11 @@ translit_InterIndic_Kannada { "\ue043>\u0cc3;" // VOWEL SIGN VOCALIC R "\ue044>\u0cc4;" // VOWEL SIGN VOCALIC RR "\ue045>\u0cc6;" // REMAP (indicExceptions.txt): \u0cc5>\u0cc6 = VOWEL SIGN CANDRA E>VOWEL SIGN E +"\ue046>\u0cc6;" // VOWEL SIGN E "\ue047>\u0cc7;" // VOWEL SIGN EE "\ue048>\u0cc8;" // VOWEL SIGN AI "\ue049>\u0cca;" // REMAP (indicExceptions.txt): \u0cc9>\u0cca = VOWEL SIGN CANDRA O>VOWEL SIGN O +"\ue04a>\u0cca;" // VOWEL SIGN O "\ue04b>\u0ccb;" // VOWEL SIGN OO "\ue04c>\u0ccc;" // VOWEL SIGN AU "\ue04d>\u0ccd;" // SIGN VIRAMA @@ -102,9 +111,11 @@ translit_InterIndic_Kannada { "\ue055>\u0cd5;" // LENGTH MARK "\ue056>\u0cd6;" // AI LENGTH MARK "\ue057>\u0ccc;" // REMAP (indicExceptions.txt): \u0cd7>\u0ccc = AU LENGTH MARK>VOWEL SIGN AU +"\ue058>\u0c95;" // FALLBACK "\ue059>\u0c96;" // REMAP (indicExceptions.txt): \u0cd9>\u0c96 = LETTER KHHA>LETTER KHA "\ue05a>\u0c97;" // REMAP (indicExceptions.txt): \u0cda>\u0c97 = LETTER GHHA>LETTER GA "\ue05b>\u0c9c;" // REMAP (indicExceptions.txt): \u0cdb>\u0c9c = LETTER ZA>LETTER JA +"\ue05c>\u0ca2;" // FALLBACK "\ue05d>\u0ca2;" // REMAP (indicExceptions.txt): \u0cdd>\u0ca2 = LETTER RHA>LETTER DDHA "\ue05e>\u0cde;" // LETTER FA "\ue05f>\u0caf;" // REMAP (indicExceptions.txt): \u0cdf>\u0caf = LETTER YYA>LETTER YA @@ -123,11 +134,6 @@ translit_InterIndic_Kannada { "\ue06e>\u0cee;" // DIGIT EIGHT "\ue06f>\u0cef;" // DIGIT NINE // \ue080>; # UNMAPPED InterIndic-Kannada: ISSHAR -"\ue00e>\u0c8e;" // LETTER E -"\ue012>\u0c92;" // LETTER O -"\ue031>\u0cb1;" // LETTER RRA -"\ue046>\u0cc6;" // VOWEL SIGN E -"\ue04a>\u0cca;" // VOWEL SIGN O // :: NFC (NFD) ; // eof } diff --git a/icu4c/source/data/translit/t_InterIndic_Latn.txt b/icu4c/source/data/translit/t_InterIndic_Latn.txt index e4d9a0eea7..235399bd3b 100644 --- a/icu4c/source/data/translit/t_InterIndic_Latn.txt +++ b/icu4c/source/data/translit/t_InterIndic_Latn.txt @@ -6,7 +6,7 @@ // THIS IS A MACHINE-GENERATED FILE // Tool: dumpICUrules.bat // Source: ../../text/resources/Transliterator_InterIndic_Latin.txt -// Date: Thu Oct 25 22:17:21 2001 +// Date: Sat Nov 10 17:25:41 2001 //-------------------------------------------------------------------- // InterIndic_Latin @@ -102,7 +102,7 @@ translit_InterIndic_Latin { "$virama=\ue04d;" // \u094e Reserved // \u094f Reserved -//\u0950>\ue050; # OM + "$om=\ue050;" // OM // \u0951>; # UNMAPPED STRESS SIGN UDATTA // \u0952>; # UNMAPPED STRESS SIGN ANUDATTA // \u0953>; # UNMAPPED GRAVE ACCENT @@ -253,6 +253,9 @@ translit_InterIndic_Latin { "$ra$virama}$ha>r'';" "$ra}$x>r;" "$ra>ra;" + "$rra$virama}$ha>r\u0331'';" + "$rra}$x>r\u0331;" + "$rra>r\u0331a;" "$la$virama}$ha>l'';" "$la}$x>l;" "$la>la;" @@ -282,8 +285,8 @@ translit_InterIndic_Latin { "$ena > n\u0331a ;" "$uka}$x > q ;" "$uka > qa ;" - "$ukha}$x > k\u0323 ;" - "$ukha > k\u0323a ;" + "$ukha}$x > k\u0331h\u0331 ;" + "$ukha > k\u0331h\u0331a ;" "$ugha}$x > g\u0307 ;" "$ugha > g\u0307a ;" "$ujha}$x > z ;" @@ -292,8 +295,8 @@ translit_InterIndic_Latin { "$udha > r\u0323ha;" "$uddha}$x> r\u0323 ;" "$uddha > r\u0323a ;" - "$ufa}$x > f\u0323 ;" - "$ufa > f\u0323a ;" + "$ufa}$x > f ;" + "$ufa > fa ;" // dependent vowels (should never occur except following consonants) "$aa > a\u0304 ;" "$ai > ai ;" @@ -315,33 +318,33 @@ translit_InterIndic_Latin { "$se > e ;" "$so > o ;" // independent vowels (when following consonants) - "a}$waa > ''a\u0304 ;" - "$z}$waa > ''a\u0304 ;" - "a}$wai > ''ai ;" - "$z}$wai > ''ai ;" - "a}$wau > ''au ;" - "$z}$wau > ''au ;" - "a}$wii > ''i\u0304 ;" - "$z}$wii > ''i\u0304 ;" - "a}$wi > ''i ;" - "$z}$wi > ''i ;" - "a}$wuu > ''u\u0304 ;" - "$z}$wuu > ''u\u0304 ;" - "a}$wu > ''u ;" - "$z}$wu > ''u ;" - "$z}$wrr > ''r\u0325\u0304 ;" - "$z}$wr > ''r\u0325 ;" - "$z}$wll > ''l\u0325\u0304 ;" - "$z}$wl > ''l\u0325 ;" - "$z}$we > ''e\u0304 ;" - "$z}$wo > ''o\u0304 ;" - "a}$wa > ''a ;" - "$z}$wa > ''a ;" + "a{$waa > ''a\u0304 ;" + "$z{$waa > ''a\u0304 ;" + "a{$wai > ''ai ;" + "$z{$wai > ''ai ;" + "a{$wau > ''au ;" + "$z{$wau > ''au ;" + "a{$wii > ''i\u0304 ;" + "$z{$wii > ''i\u0304 ;" + "a{$wi > ''i ;" + "$z{$wi > ''i ;" + "a{$wuu > ''u\u0304 ;" + "$z{$wuu > ''u\u0304 ;" + "a{$wu > ''u ;" + "$z{$wu > ''u ;" + "$z{$wrr > ''r\u0325\u0304 ;" + "$z{$wr > ''r\u0325 ;" + "$z{$wll > ''l\u0325\u0304 ;" + "$z{$wl > ''l\u0325 ;" + "$z{$we > ''e\u0304 ;" + "$z{$wo > ''o\u0304 ;" + "a{$wa > ''a ;" + "$z{$wa > ''a ;" //extra vowels - "$z}$wce > ''e\u0306 ;" - "$z}$wco > ''o\u0306 ;" - "$z}$wse > ''e ;" - "$z}$wso > ''o ;" + "$z{$wce > ''e\u0306 ;" + "$z{$wco > ''o\u0306 ;" + "$z{$wse > ''e ;" + "$z{$wso > ''o ;" // independent vowels (otherwise) "$waa > a\u0304 ;" "$wai > ai ;" @@ -362,6 +365,7 @@ translit_InterIndic_Latin { "$wco > o\u0306 ;" "$wse > e ;" "$wso > o ;" + "$om > ''om ;" //stress marks "$avagraha > \u0315;" "$chandrabindu$anusvara>'-'\u0303;" diff --git a/icu4c/source/data/translit/t_InterIndic_Mlym.txt b/icu4c/source/data/translit/t_InterIndic_Mlym.txt index c28edd196a..30037e8b60 100644 --- a/icu4c/source/data/translit/t_InterIndic_Mlym.txt +++ b/icu4c/source/data/translit/t_InterIndic_Mlym.txt @@ -6,7 +6,7 @@ // THIS IS A MACHINE-GENERATED FILE // Tool: dumpICUrules.bat // Source: ../../text/resources/Transliterator_InterIndic_Malayalam.txt -// Date: Thu Oct 25 22:17:21 2001 +// Date: Sat Nov 10 17:25:41 2001 //-------------------------------------------------------------------- // InterIndic_Malayalam @@ -42,8 +42,12 @@ translit_InterIndic_Malayalam { "\ue00a>\u0d0a;" // LETTER UU "\ue00b>\u0d0b;" // LETTER VOCALIC R "\ue00c>\u0d0c;" // LETTER VOCALIC L +"\ue00d>\u0d0e;" // FALLLBACK LETTER E +"\ue00e>\u0d0e;" // LETTER E "\ue00f>\u0d0f;" // LETTER EE "\ue010>\u0d10;" // LETTER AI +"\ue011>\u0d12;" // FALLBACK TO O +"\ue012>\u0d12;" // LETTER O "\ue013>\u0d13;" // LETTER OO "\ue014>\u0d14;" // LETTER AU "\ue015>\u0d15;" // LETTER KA @@ -74,6 +78,7 @@ translit_InterIndic_Malayalam { "\ue02e>\u0d2e;" // LETTER MA "\ue02f>\u0d2f;" // LETTER YA "\ue030>\u0d30;" // LETTER RA +"\ue031>\u0d31;" // LETTER RRA "\ue032>\u0d32;" // LETTER LA "\ue033>\u0d33;" // LETTER LLA "\ue034>\u0d34;" // LETTER LLLA @@ -82,36 +87,42 @@ translit_InterIndic_Malayalam { "\ue037>\u0d37;" // LETTER SSA "\ue038>\u0d38;" // LETTER SA "\ue039>\u0d39;" // LETTER HA -// \ue03c>; # UNMAPPED InterIndic-Malayalam: SIGN NUKTA -// \ue03d>; # UNMAPPED InterIndic-Malayalam: SIGN AVAGRAHA + +"\ue03c>;" // FALLBACK BLOW AWAY NUKTA +"\ue03d>;" // FALLBACK BLOW AWAY AVAGRAHA + "\ue03e>\u0d3e;" // VOWEL SIGN AA "\ue03f>\u0d3f;" // VOWEL SIGN I "\ue040>\u0d40;" // VOWEL SIGN II "\ue041>\u0d41;" // VOWEL SIGN U "\ue042>\u0d42;" // VOWEL SIGN UU "\ue043>\u0d43;" // VOWEL SIGN VOCALIC R -// \ue044>; # UNMAPPED InterIndic-Malayalam: VOWEL SIGN VOCALIC RR +"\ue044>;" // FALLBACK BLOW AWAY VOWEL SIGN VOCALIC RR "\ue045>\u0d3e;" // REMAP (indicExceptions.txt): \u0d45>\u0d3e = VOWEL SIGN CANDRA E>VOWEL SIGN AA +"\ue046>\u0d46;" // VOWEL SIGN E "\ue047>\u0d47;" // VOWEL SIGN EE "\ue048>\u0d48;" // VOWEL SIGN AI "\ue049>\u0d4b;" // REMAP (indicExceptions.txt): \u0d49>\u0d4b = VOWEL SIGN CANDRA O>VOWEL SIGN OO +"\ue04a>\u0d4a;" // VOWEL SIGN O "\ue04b>\u0d4b;" // VOWEL SIGN OO "\ue04c>\u0d4c;" // VOWEL SIGN AU "\ue04d>\u0d4d;" // SIGN VIRAMA -// \ue050>; # UNMAPPED InterIndic-Malayalam: OM -// \ue055>; # UNMAPPED InterIndic-Malayalam: LENGTH MARK +"\ue050>\u0d13\u0d02;" // UNMAPPED InterIndic-Malayalam: OM +"\ue055>;" // FALLBACK BLOW AWAY LENGTH MARK "\ue056>\u0d48;" // REMAP (indicExceptions.txt): \u0d56>\u0d48 = AI LENGTH MARK>VOWEL SIGN AI "\ue057>\u0d57;" // AU LENGTH MARK +"\ue058>\u0d15;" // FALLBACK "\ue059>\u0d16;" // REMAP (indicExceptions.txt): \u0d59>\u0d16 = LETTER KHHA>LETTER KHA "\ue05a>\u0d17;" // REMAP (indicExceptions.txt): \u0d5a>\u0d17 = LETTER GHHA>LETTER GA "\ue05b>\u0d1c;" // REMAP (indicExceptions.txt): \u0d5b>\u0d1c = LETTER ZA>LETTER JA "\ue05d>\u0d22;" // REMAP (indicExceptions.txt): \u0d5d>\u0d22 = LETTER RHA>LETTER DDHA +"\ue05c>\u0d21;" // FALLBACK "\ue05e>\u0d2b;" // REMAP (indicExceptions.txt): \u0d5e>\u0d2b = LETTER FA>LETTER PHA "\ue05f>\u0d2f;" // REMAP (indicExceptions.txt): \u0d5f>\u0d2f = LETTER YYA>LETTER YA "\ue060>\u0d60;" // LETTER VOCALIC RR "\ue061>\u0d61;" // LETTER VOCALIC LL -// \ue062>; # UNMAPPED InterIndic-Malayalam: VOWEL SIGN VOCALIC L -// \ue063>; # UNMAPPED InterIndic-Malayalam: VOWEL SIGN VOCALIC LL +"\ue062>;" // FALLBACK BLOW AWAY VOWEL SIGN VOCALIC L +"\ue063>;" // FALLBACK BLOW AWAY VOWEL SIGN VOCALIC LL "\ue066>\u0d66;" // DIGIT ZERO "\ue067>\u0d67;" // DIGIT ONE "\ue068>\u0d68;" // DIGIT TWO @@ -123,11 +134,11 @@ translit_InterIndic_Malayalam { "\ue06e>\u0d6e;" // DIGIT EIGHT "\ue06f>\u0d6f;" // DIGIT NINE // \ue080>; # UNMAPPED InterIndic-Malayalam: ISSHAR -"\ue00e>\u0d0e;" // LETTER E -"\ue012>\u0d12;" // LETTER O -"\ue031>\u0d31;" // LETTER RRA -"\ue046>\u0d46;" // VOWEL SIGN E -"\ue04a>\u0d4a;" // VOWEL SIGN O + + + + + // :: NFC (NFD) ; // eof } diff --git a/icu4c/source/data/translit/t_InterIndic_Orya.txt b/icu4c/source/data/translit/t_InterIndic_Orya.txt index ee8fc159de..3a03a72823 100644 --- a/icu4c/source/data/translit/t_InterIndic_Orya.txt +++ b/icu4c/source/data/translit/t_InterIndic_Orya.txt @@ -6,7 +6,7 @@ // THIS IS A MACHINE-GENERATED FILE // Tool: dumpICUrules.bat // Source: ../../text/resources/Transliterator_InterIndic_Oriya.txt -// Date: Thu Oct 25 22:17:21 2001 +// Date: Sat Nov 10 17:25:41 2001 //-------------------------------------------------------------------- // InterIndic_Oriya @@ -42,9 +42,13 @@ translit_InterIndic_Oriya { "\ue00a>\u0b0a;" // LETTER UU "\ue00b>\u0b0b;" // LETTER VOCALIC R "\ue00c>\u0b0c;" // LETTER VOCALIC L -// \ue00f>; # UNMAPPED InterIndic-Oriya: LETTER EE (\u0b0f = LETTER E) +"\ue00d>\u0b0f;" // FALLBACK +"\ue00e>\u0b0f;" // FALLBACK +"\ue00f>\u0b0f;" // LETTER E "\ue010>\u0b10;" // LETTER AI -// \ue013>; # UNMAPPED InterIndic-Oriya: LETTER OO (\u0b13 = LETTER O) +"\ue011>\u0b13;" // FALLBACK +"\ue012>\u0b13;" // FALLBACK +"\ue013>\u0b13;" // UNMAPPED InterIndic-Oriya: LETTER OO (\u0b13 = LETTER O) "\ue014>\u0b14;" // LETTER AU "\ue015>\u0b15;" // LETTER KA "\ue016>\u0b16;" // LETTER KHA @@ -66,7 +70,7 @@ translit_InterIndic_Oriya { "\ue026>\u0b26;" // LETTER DA "\ue027>\u0b27;" // LETTER DHA "\ue028>\u0b28;" // LETTER NA -"\ue029>\u0b28;" // REMAP (indicExceptions.txt): \u0b29>\u0b28 = LETTER NNNA>LETTER NA +"\ue029>\u0b28\u0b3c;" // REMAP (indicExceptions.txt): \u0b29>\u0b28 = LETTER NNNA>LETTER NA "\ue02a>\u0b2a;" // LETTER PA "\ue02b>\u0b2b;" // LETTER PHA "\ue02c>\u0b2c;" // LETTER BA @@ -74,9 +78,10 @@ translit_InterIndic_Oriya { "\ue02e>\u0b2e;" // LETTER MA "\ue02f>\u0b2f;" // LETTER YA "\ue030>\u0b30;" // LETTER RA +"\ue031>\u0b5c;" // LETTER RRA "\ue032>\u0b32;" // LETTER LA "\ue033>\u0b33;" // LETTER LLA -"\ue034>\u0b33;" // REMAP (indicExceptions.txt): \u0b34>\u0b33 = LETTER LLLA>LETTER LLA +"\ue034>\u0b33\u0b3c;" // REMAP (indicExceptions.txt): \u0b34>\u0b33 = LETTER LLLA>LETTER LLA "\ue035>\u0b2c;" // REMAP (indicExceptions.txt): \u0b35>\u0b2c = LETTER VA>LETTER BA "\ue036>\u0b36;" // LETTER SHA "\ue037>\u0b37;" // LETTER SSA @@ -91,11 +96,13 @@ translit_InterIndic_Oriya { "\ue042>\u0b42;" // VOWEL SIGN UU "\ue043>\u0b43;" // VOWEL SIGN VOCALIC R "\ue044>\u0b43\u0b3c;" // REMAP (indicExceptions.txt): \u0b44>\u0b43\u0b3c = VOWEL SIGN VOCALIC RR>VOWEL SIGN VOCALIC R.SIGN NUKTA -"\ue045>\u0b47;" // REMAP (indicExceptions.txt): \u0b45>\u0b47 = VOWEL SIGN CANDRA E>VOWEL SIGN E -// \ue047>; # UNMAPPED InterIndic-Oriya: VOWEL SIGN EE (\u0b47 = VOWEL SIGN E) +"\ue045>\u0b47;" // FALLBACK +"\ue046>\u0b47;" // FALLBACK +"\ue047>\u0b47;" // VOWEL SIGN E "\ue048>\u0b48;" // VOWEL SIGN AI -"\ue049>\u0b4b;" // REMAP (indicExceptions.txt): \u0b49>\u0b4b = VOWEL SIGN CANDRA O>VOWEL SIGN O -// \ue04b>; # UNMAPPED InterIndic-Oriya: VOWEL SIGN OO (\u0b4b = VOWEL SIGN O) +"\ue049>\u0b4b;" // FALLBACK +"\ue04a>\u0b4b;" // FALLBACK +"\ue04b>\u0b4b;" // VOWEL SIGN E "\ue04c>\u0b4c;" // VOWEL SIGN AU "\ue04d>\u0b4d;" // SIGN VIRAMA "\ue050>\u0b13\u0b01;" // REMAP (indicExceptions.txt): \u0b50>\u0b13\u0b01 = OM>LETTER O.SIGN CANDRABINDU @@ -103,8 +110,10 @@ translit_InterIndic_Oriya { "\ue056>\u0b56;" // AI LENGTH MARK "\ue057>\u0b57;" // AU LENGTH MARK "\ue059>\u0b16\u0b3c;" // REMAP (indicExceptions.txt): \u0b59>\u0b16\u0b3c = LETTER KHHA>LETTER KHA.SIGN NUKTA +"\ue058>\u0b15\u0b3c;" // FALLBACK "\ue05a>\u0b17\u0b3c;" // REMAP (indicExceptions.txt): \u0b5a>\u0b17\u0b3c = LETTER GHHA>LETTER GA.SIGN NUKTA "\ue05b>\u0b1c\u0b3c;" // REMAP (indicExceptions.txt): \u0b5b>\u0b1c\u0b3c = LETTER ZA>LETTER JA.SIGN NUKTA +"\ue05c>\u0b21\u0b3c;" // FALLBACK "\ue05d>\u0b5d;" // LETTER RHA "\ue05e>\u0b2b\u0b3c;" // REMAP (indicExceptions.txt): \u0b5e>\u0b2b\u0b3c = LETTER FA>LETTER PHA.SIGN NUKTA "\ue05f>\u0b5f;" // LETTER YYA @@ -123,11 +132,8 @@ translit_InterIndic_Oriya { "\ue06e>\u0b6e;" // DIGIT EIGHT "\ue06f>\u0b6f;" // DIGIT NINE "\ue070>\u0b70;" // ISSHAR -"\ue00e>\u0b0f;" // LETTER E -"\ue013>\u0b13;" // LETTER O -"\ue031>\u0b5c;" // LETTER RRA -"\ue047>\u0b47;" // VOWEL SIGN E -"\ue04b>\u0b4b;" // VOWEL SIGN O + + // :: NFC (NFD) ; // eof } diff --git a/icu4c/source/data/translit/t_InterIndic_Taml.txt b/icu4c/source/data/translit/t_InterIndic_Taml.txt index 1c6d1ba9a2..780fb029b4 100644 --- a/icu4c/source/data/translit/t_InterIndic_Taml.txt +++ b/icu4c/source/data/translit/t_InterIndic_Taml.txt @@ -6,7 +6,7 @@ // THIS IS A MACHINE-GENERATED FILE // Tool: dumpICUrules.bat // Source: ../../text/resources/Transliterator_InterIndic_Tamil.txt -// Date: Thu Oct 25 22:17:21 2001 +// Date: Sat Nov 10 17:25:41 2001 //-------------------------------------------------------------------- // InterIndic_Tamil @@ -31,7 +31,7 @@ translit_InterIndic_Tamil { //-------------------------------------------------------------------- // InterIndic-Tamil //:: NFD (NFC) ; -// \ue001>; # UNMAPPED InterIndic-Tamil: SIGN CANDRABINDU +"\ue001>\u0b82;" // FALLBACK SIGN CANDRABINDU "\ue002>\u0b82;" // SIGN ANUSVARA "\ue003>\u0b83;" // SIGN VISARGA "\ue005>\u0b85;" // LETTER A @@ -42,8 +42,12 @@ translit_InterIndic_Tamil { "\ue00a>\u0b8a;" // LETTER UU "\ue00b>\u0bb0\u0bbf;" // REMAP (indicExceptions.txt): \u0b8b>\u0bb0\u0bbf = LETTER VOCALIC R>LETTER RA.VOWEL SIGN I "\ue00c>\u0b87;" // REMAP (indicExceptions.txt): \u0b8c>\u0b87 = LETTER VOCALIC L>LETTER I +"\ue00d>\u0b8f;" // FALLBACK +"\ue00e>\u0b8e;" // LETTER E "\ue00f>\u0b8f;" // LETTER EE "\ue010>\u0b90;" // LETTER AI +"\ue011>\u0b92;" // FALLBACK +"\ue012>\u0b92;" // LETTER O "\ue013>\u0b93;" // LETTER OO "\ue014>\u0b94;" // LETTER AU "\ue015>\u0b95;" // LETTER KA @@ -74,6 +78,7 @@ translit_InterIndic_Tamil { "\ue02e>\u0bae;" // LETTER MA "\ue02f>\u0baf;" // LETTER YA "\ue030>\u0bb0;" // LETTER RA +"\ue031>\u0bb1;" // LETTER RRA "\ue032>\u0bb2;" // LETTER LA "\ue033>\u0bb3;" // LETTER LLA "\ue034>\u0bb4;" // LETTER LLLA @@ -82,8 +87,10 @@ translit_InterIndic_Tamil { "\ue037>\u0bb7;" // LETTER SSA "\ue038>\u0bb8;" // LETTER SA "\ue039>\u0bb9;" // LETTER HA -// \ue03c>; # UNMAPPED InterIndic-Tamil: SIGN NUKTA -// \ue03d>; # UNMAPPED InterIndic-Tamil: SIGN AVAGRAHA + +"\ue03c>;" // FALLBACK BLOW AWAY NUKTA +"\ue03d>;" // FALLBACK BLOW AWAY AVAGRAHA + "\ue03e>\u0bbe;" // VOWEL SIGN AA "\ue03f>\u0bbf;" // VOWEL SIGN I "\ue040>\u0bc0;" // VOWEL SIGN II @@ -92,27 +99,32 @@ translit_InterIndic_Tamil { "\ue043>\u0bcd\u0bb0\u0bbf;" // REMAP (indicExceptions.txt): \u0bc3>\u0bcd\u0bb0\u0bbf = VOWEL SIGN VOCALIC R>SIGN VIRAMA.LETTER RA.VOWEL SIGN I "\ue044>\u0bcd\u0bb0\u0bbf;" // REMAP (indicExceptions.txt): \u0bc4>\u0bcd\u0bb0\u0bbf = VOWEL SIGN VOCALIC RR>SIGN VIRAMA.LETTER RA.VOWEL SIGN I "\ue045>\u0bbe;" // REMAP (indicExceptions.txt): \u0bc5>\u0bbe = VOWEL SIGN CANDRA E>VOWEL SIGN AA +"\ue046>\u0bc6;" // VOWEL SIGN E "\ue047>\u0bc7;" // VOWEL SIGN EE "\ue048>\u0bc8;" // VOWEL SIGN AI "\ue049>\u0bbe;" // REMAP (indicExceptions.txt): \u0bc9>\u0bbe = VOWEL SIGN CANDRA O>VOWEL SIGN AA +"\ue04a>\u0bca;" // VOWEL SIGN O "\ue04b>\u0bcb;" // VOWEL SIGN OO "\ue04c>\u0bcc;" // VOWEL SIGN AU "\ue04d>\u0bcd;" // SIGN VIRAMA "\ue050>\u0b93\u0bae\u0bcd;" // REMAP (indicExceptions.txt): \u0bd0>\u0b93\u0bae\u0bcd = OM>LETTER OO.LETTER MA.SIGN VIRAMA + // \ue055>; # UNMAPPED InterIndic-Tamil: LENGTH MARK "\ue056>\u0bc8;" // REMAP (indicExceptions.txt): \u0bd6>\u0bc8 = AI LENGTH MARK>VOWEL SIGN AI "\ue057>\u0bd7;" // AU LENGTH MARK +"\ue058>\u0b95;" // FALLBACK "\ue059>\u0b95;" // REMAP (indicExceptions.txt): \u0bd9>\u0b95 = LETTER KHHA>LETTER KA "\ue05a>\u0b95;" // REMAP (indicExceptions.txt): \u0bda>\u0b95 = LETTER GHHA>LETTER KA "\ue05b>\u0b9c;" // REMAP (indicExceptions.txt): \u0bdb>\u0b9c = LETTER ZA>LETTER JA +"\ue05c>\u0ba4;" // FALLBACK "\ue05d>\u0b9f;" // REMAP (indicExceptions.txt): \u0bdd>\u0b9f = LETTER RHA>LETTER TTA "\ue05e>\u0baa;" // REMAP (indicExceptions.txt): \u0bde>\u0baa = LETTER FA>LETTER PA "\ue05f>\u0baf;" // REMAP (indicExceptions.txt): \u0bdf>\u0baf = LETTER YYA>LETTER YA "\ue060>\u0bb0\u0bbf;" // REMAP (indicExceptions.txt): \u0be0>\u0bb0\u0bbf = LETTER VOCALIC RR>LETTER RA.VOWEL SIGN I "\ue061>\u0b88;" // REMAP (indicExceptions.txt): \u0be1>\u0b88 = LETTER VOCALIC LL>LETTER II -// \ue062>; # UNMAPPED InterIndic-Tamil: VOWEL SIGN VOCALIC L -// \ue063>; # UNMAPPED InterIndic-Tamil: VOWEL SIGN VOCALIC LL -// \ue066>; # UNMAPPED InterIndic-Tamil: DIGIT ZERO +"\ue062>\u0bbf;"// FALLBACK VOWEL SIGN VOCALIC L +"\ue063>\u0bc0;"// FALLBACK VOWEL SIGN VOCALIC LL +"\ue066>\u0030;" // FALLBACK DIGIT ZERO "\ue067>\u0be7;" // DIGIT ONE "\ue068>\u0be8;" // DIGIT TWO "\ue069>\u0be9;" // DIGIT THREE @@ -123,11 +135,6 @@ translit_InterIndic_Tamil { "\ue06e>\u0bee;" // DIGIT EIGHT "\ue06f>\u0bef;" // DIGIT NINE // \ue080>; # UNMAPPED InterIndic-Tamil: ISSHAR -"\ue00e>\u0b8e;" // LETTER E -"\ue012>\u0b92;" // LETTER O -"\ue031>\u0bb1;" // LETTER RRA -"\ue046>\u0bc6;" // VOWEL SIGN E -"\ue04a>\u0bca;" // VOWEL SIGN O // :: NFC (NFD) ; // eof } diff --git a/icu4c/source/data/translit/t_InterIndic_Telu.txt b/icu4c/source/data/translit/t_InterIndic_Telu.txt index 24145b786a..7ff58f3e2f 100644 --- a/icu4c/source/data/translit/t_InterIndic_Telu.txt +++ b/icu4c/source/data/translit/t_InterIndic_Telu.txt @@ -6,7 +6,7 @@ // THIS IS A MACHINE-GENERATED FILE // Tool: dumpICUrules.bat // Source: ../../text/resources/Transliterator_InterIndic_Telugu.txt -// Date: Thu Oct 25 22:17:21 2001 +// Date: Sat Nov 10 17:25:41 2001 //-------------------------------------------------------------------- // InterIndic_Telugu @@ -42,8 +42,12 @@ translit_InterIndic_Telugu { "\ue00a>\u0c0a;" // LETTER UU "\ue00b>\u0c0b;" // LETTER VOCALIC R "\ue00c>\u0c0c;" // LETTER VOCALIC L +"\ue00d>\u0c0E;" // FALLBACK MAPPING +"\ue00e>\u0c0E;" // LETTER E "\ue00f>\u0c0f;" // LETTER EE "\ue010>\u0c10;" // LETTER AI +"\ue011>\u0c12;" // FALBACK MAPPING +"\ue012>\u0c12;" // LETTER O "\ue013>\u0c13;" // LETTER OO "\ue014>\u0c14;" // LETTER AU "\ue015>\u0c15;" // LETTER KA @@ -82,8 +86,10 @@ translit_InterIndic_Telugu { "\ue037>\u0c37;" // LETTER SSA "\ue038>\u0c38;" // LETTER SA "\ue039>\u0c39;" // LETTER HA -// \ue03c>; # UNMAPPED InterIndic-Telugu: SIGN NUKTA -// \ue03d>; # UNMAPPED InterIndic-Telugu: SIGN AVAGRAHA + +"\ue03c>;" // FALLBACK BLOW AWAY NUKTA +"\ue03d>;" // FALLBACK BLOW AWAY AVAGRAHA + "\ue03e>\u0c3e;" // VOWEL SIGN AA "\ue03f>\u0c3f;" // VOWEL SIGN I "\ue040>\u0c40;" // VOWEL SIGN II @@ -102,9 +108,11 @@ translit_InterIndic_Telugu { "\ue055>\u0c55;" // LENGTH MARK "\ue056>\u0c56;" // AI LENGTH MARK "\ue057>\u0c4c;" // REMAP (indicExceptions.txt): \u0c57>\u0c4c = AU LENGTH MARK>VOWEL SIGN AU +"\ue058>\u0c15;" // REMAP "\ue059>\u0c16;" // REMAP (indicExceptions.txt): \u0c59>\u0c16 = LETTER KHHA>LETTER KHA "\ue05a>\u0c17;" // REMAP (indicExceptions.txt): \u0c5a>\u0c17 = LETTER GHHA>LETTER GA "\ue05b>\u0c1c;" // REMAP (indicExceptions.txt): \u0c5b>\u0c1c = LETTER ZA>LETTER JA +"\ue05c>\u0c22;" // REMAP "\ue05d>\u0c22;" // REMAP (indicExceptions.txt): \u0c5d>\u0c22 = LETTER RHA>LETTER DDHA "\ue05e>\u0c2b;" // REMAP (indicExceptions.txt): \u0c5e>\u0c2b = LETTER FA>LETTER PHA "\ue05f>\u0c2f;" // REMAP (indicExceptions.txt): \u0c5f>\u0c2f = LETTER YYA>LETTER YA @@ -123,8 +131,7 @@ translit_InterIndic_Telugu { "\ue06e>\u0c6e;" // DIGIT EIGHT "\ue06f>\u0c6f;" // DIGIT NINE // \ue080>; # UNMAPPED InterIndic-Telugu: ISSHAR -"\ue00e>\u0c0e;" // LETTER E -"\ue012>\u0c12;" // LETTER O + "\ue031>\u0c31;" // LETTER RRA "\ue046>\u0c46;" // VOWEL SIGN E "\ue04a>\u0c4a;" // VOWEL SIGN O diff --git a/icu4c/source/data/translit/t_Knda_InterIndic.txt b/icu4c/source/data/translit/t_Knda_InterIndic.txt index 587e66d0d7..9f445b99a5 100644 --- a/icu4c/source/data/translit/t_Knda_InterIndic.txt +++ b/icu4c/source/data/translit/t_Knda_InterIndic.txt @@ -6,7 +6,7 @@ // THIS IS A MACHINE-GENERATED FILE // Tool: dumpICUrules.bat // Source: ../../text/resources/Transliterator_Kannada_InterIndic.txt -// Date: Thu Oct 25 22:17:21 2001 +// Date: Sat Nov 10 17:25:41 2001 //-------------------------------------------------------------------- // Kannada_InterIndic diff --git a/icu4c/source/data/translit/t_Latn_InterIndic.txt b/icu4c/source/data/translit/t_Latn_InterIndic.txt index b7c91cf855..0fadbe3ff8 100644 --- a/icu4c/source/data/translit/t_Latn_InterIndic.txt +++ b/icu4c/source/data/translit/t_Latn_InterIndic.txt @@ -6,7 +6,7 @@ // THIS IS A MACHINE-GENERATED FILE // Tool: dumpICUrules.bat // Source: ../../text/resources/Transliterator_Latin_InterIndic.txt -// Date: Thu Oct 25 22:17:21 2001 +// Date: Sat Nov 10 17:25:41 2001 //-------------------------------------------------------------------- // Latin_InterIndic @@ -102,7 +102,7 @@ translit_Latin_InterIndic { "$virama=\ue04d;" // \u094e Reserved // \u094f Reserved -//\u0950>\ue050; # OM + "$om = \ue050;" // OM // \u0951>; # UNMAPPED STRESS SIGN UDATTA // \u0952>; # UNMAPPED STRESS SIGN ANUDATTA // \u0953>; # UNMAPPED GRAVE ACCENT @@ -166,19 +166,19 @@ translit_Latin_InterIndic { "'-'h\u0323>$visarga;" "mm>$anusvara;" "x>$visarga;" - "aa>$waa;" +// convert to independent forms at start of word or syllable: +// e.g. keai -> {ka}{e}{wai}; k'ai -> {ka}{wai}; (ai) -> ({wai}) "a\u0304>$waa;" "ai>$wai;" "au>$wau;" - "ii>$wii;" "i\u0304>$wii;" "i>$wi;" - "uu>$wuu;" + //uu>$wuu; "u\u0304>$wuu;" "u>$wu;" - "rrh>$wrr;" + //rrh>$wrr; "r\u0325\u0304>$wrr;" - "rh>$wr;" + //rh>$wr; "r\u0325>$wr;" "l\u0325\u0304>$wll;" "lh>$wl;l\u0325>$wl;" @@ -188,6 +188,7 @@ translit_Latin_InterIndic { "e\u0306>$wce;" "o\u0306>$wco;" "e>$wse;" + "''om>$om;" "o>$wso;" "n}na > $na|$virama;" "n\u0307}[kg] > $anusvara;" @@ -199,6 +200,15 @@ translit_Latin_InterIndic { "m}[pbm] > $anusvara;" "n} [yrlvsh] > $anusvara;" "'-'m\u0307 > $anusvara;" + + //urdu compatibility + "q>$uka|$virama;" + "k\u0331h\u0331>$ukha |$virama;" + "g\u0307> $ugha | $virama;" + "z > $ujha |$virama;" + "f > $ufa|$virama;" + + // dev "y\u0307>$uya|$virama;" "l\u0331>$ela|$virama;" "n\u0331>$ena|$virama;" @@ -213,7 +223,6 @@ translit_Latin_InterIndic { "d\u0323>$dda|$virama;" "kh>$kha|$virama;" "k>$ka|$virama;" - "q>$ka|$virama;" "gh>$gha|$virama;" "g>$ga|$virama;" "ch>$cha|$virama;" @@ -234,36 +243,37 @@ translit_Latin_InterIndic { "b>$ba|$virama;" "m>$ma|$virama;" "y>$ya|$virama;" + "r\u0331>$rra|$virama;" "r>$ra|$virama;" "l\u0323a>$lla;" "l>$la|$virama;" "v>$va|$virama;" - "f>$va|$virama;" "w>$va|$virama;" "sh>$sha|$virama;" "ss>$ssa|$virama;" "s\u0323>$ssa|$virama;" "s\u0301>$sha|$virama;" "s>$sa|$virama;" - "z>$sa|$virama;" "h>$ha|$virama;" "'.'>$danda;" "$danda'.'>$doubleDanda;" "$depVowelAbove{'~'>$anusvara;" "$depVowelBelow{'~'>$chandrabindu;" - "$virama aa>$aa;" +// convert to dependent forms after consonant with no vowel: +// e.g. kai -> {ka}{virama}ai -> {ka}{ai} + //$virama aa>$aa; "$virama a\u0304>$aa;" "$virama ai>$ai;" "$virama au>$au;" "$virama ii>$ii;" "$virama i\u0304>$ii;" "$virama i>$i;" - "$virama uu>$uu;" + //$virama uu>$uu; "$virama u\u0304>$uu;" "$virama u>$u;" - "$virama rrh>$rrh;" + //$virama rrh>$rrh; "$virama r\u0325\u0304>$rrh;" - "$virama rh>$rh;" + //$virama rh>$rh; "$virama r\u0325a>$rh;" "$virama r\u0325>$rh;" "$virama l\u0325\u0304>$llh;" @@ -276,22 +286,23 @@ translit_Latin_InterIndic { "$virama o\u0306>$co;" "$virama e>$se;" "$virama o>$so;" - "$virama''aa>$waa;" +// otherwise convert independent forms when separated by ': k'ai -> {ka}{virama}{wai} + //$virama''aa>$waa; "$virama''a\u0304>$waa;" "$virama''ai>$wai;" "$virama''au>$wau;" - "$virama''ii>$wii;" + //$virama''ii>$wii; "$virama''i\u0304>$wii;" "$virama''i>$wi;" - "$virama''uu>$wuu;" + //$virama''uu>$wuu; "$virama''u\u0304>$wuu;" "$virama''u>$wu;" - "$virama''rrh>$wrr;" + //$virama''rrh>$wrr; "$virama''r\u0325\u0304>$wrr;" - "$virama''rh>$wr;" + //$virama''rh>$wr; "$virama''r\u0325>$wr;" "$virama''l\u0325\u0304>$wll;" - "$virama''lh>$wl;" + //$virama''lh>$wl; "$virama''l\u0325>$wl;" "$virama''e\u0304>$we;" "$virama''o\u0304>$wo;" diff --git a/icu4c/source/data/translit/t_Mlym_InterIndic.txt b/icu4c/source/data/translit/t_Mlym_InterIndic.txt index 6fca3b1342..0818f1d055 100644 --- a/icu4c/source/data/translit/t_Mlym_InterIndic.txt +++ b/icu4c/source/data/translit/t_Mlym_InterIndic.txt @@ -6,7 +6,7 @@ // THIS IS A MACHINE-GENERATED FILE // Tool: dumpICUrules.bat // Source: ../../text/resources/Transliterator_Malayalam_InterIndic.txt -// Date: Thu Oct 25 22:17:22 2001 +// Date: Sat Nov 10 17:25:42 2001 //-------------------------------------------------------------------- // Malayalam_InterIndic diff --git a/icu4c/source/data/translit/t_Orya_InterIndic.txt b/icu4c/source/data/translit/t_Orya_InterIndic.txt index 7a5b247388..ca81e9acc4 100644 --- a/icu4c/source/data/translit/t_Orya_InterIndic.txt +++ b/icu4c/source/data/translit/t_Orya_InterIndic.txt @@ -6,7 +6,7 @@ // THIS IS A MACHINE-GENERATED FILE // Tool: dumpICUrules.bat // Source: ../../text/resources/Transliterator_Oriya_InterIndic.txt -// Date: Thu Oct 25 22:17:22 2001 +// Date: Sat Nov 10 17:25:42 2001 //-------------------------------------------------------------------- // Oriya_InterIndic diff --git a/icu4c/source/data/translit/t_Taml_InterIndic.txt b/icu4c/source/data/translit/t_Taml_InterIndic.txt index dbc2aee910..b64602ed87 100644 --- a/icu4c/source/data/translit/t_Taml_InterIndic.txt +++ b/icu4c/source/data/translit/t_Taml_InterIndic.txt @@ -6,7 +6,7 @@ // THIS IS A MACHINE-GENERATED FILE // Tool: dumpICUrules.bat // Source: ../../text/resources/Transliterator_Tamil_InterIndic.txt -// Date: Thu Oct 25 22:17:22 2001 +// Date: Sat Nov 10 17:25:42 2001 //-------------------------------------------------------------------- // Tamil_InterIndic @@ -31,6 +31,7 @@ translit_Tamil_InterIndic { //-------------------------------------------------------------------- // Tamil-InterIndic //:: NFD (NFC) ; +"\u0bc6\u0bd7>\ue04c;" "\u0b82>\ue002;" // SIGN ANUSVARA "\u0b83>\ue003;" // SIGN VISARGA "\u0b85>\ue005;" // LETTER A diff --git a/icu4c/source/data/translit/t_Telu_InterIndic.txt b/icu4c/source/data/translit/t_Telu_InterIndic.txt index e7cded3f0f..7308ab6b43 100644 --- a/icu4c/source/data/translit/t_Telu_InterIndic.txt +++ b/icu4c/source/data/translit/t_Telu_InterIndic.txt @@ -6,7 +6,7 @@ // THIS IS A MACHINE-GENERATED FILE // Tool: dumpICUrules.bat // Source: ../../text/resources/Transliterator_Telugu_InterIndic.txt -// Date: Thu Oct 25 22:17:22 2001 +// Date: Sat Nov 10 17:25:42 2001 //-------------------------------------------------------------------- // Telugu_InterIndic