ICU-2022 Fix Gurmukhi and other Indic transliterators

X-SVN-Rev: 9348
This commit is contained in:
Ram Viswanadha 2002-07-25 21:20:26 +00:00
parent 77ffea72e9
commit ea7823ca07
14 changed files with 124 additions and 49 deletions

View File

@ -3,8 +3,8 @@
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/data/Attic/Transliterator_Bengali_InterIndic.txt,v $
# $Date: 2002/03/02 00:27:26 $
# $Revision: 1.8 $
# $Date: 2002/07/25 21:20:26 $
# $Revision: 1.9 $
#--------------------------------------------------------------------
# Bengali-InterIndic
@ -88,8 +88,8 @@
\u09ED>\uE06D; # DIGIT SEVEN
\u09EE>\uE06E; # DIGIT EIGHT
\u09EF>\uE06F; # DIGIT NINE
\u09F0>\uE070; # UNMAPPED Bengali-InterIndic: LETTER RA WITH MIDDLE DIAGONAL
\u09F1>\uE071; # UNMAPPED Bengali-InterIndic: LETTER RA WITH LOWER DIAGONAL
\u09F0>; # UNMAPPED Bengali-InterIndic: LETTER RA WITH MIDDLE DIAGONAL
\u09F1>; # UNMAPPED Bengali-InterIndic: LETTER RA WITH LOWER DIAGONAL
\u09F2>; # UNMAPPED Bengali-InterIndic: RUPEE MARK
\u09F3>; # UNMAPPED Bengali-InterIndic: RUPEE SIGN
\u09F4>; # UNMAPPED Bengali-InterIndic: CURRENCY NUMERATOR ONE
@ -98,7 +98,7 @@
\u09F7>; # UNMAPPED Bengali-InterIndic: CURRENCY NUMERATOR FOUR
\u09F8>; # UNMAPPED Bengali-InterIndic: CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
\u09F9>; # UNMAPPED Bengali-InterIndic: CURRENCY DENOMINATOR SIXTEEN
\u09FA>\uE070; # ISSHAR
\u09FA>; # ISSHAR
\u0964>\ue064; # DANDA
\u0965>\ue065; # DOUBLE DANDA
# :: NFC (NFD) ;

View File

@ -3,8 +3,8 @@
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/data/Attic/Transliterator_Gurmukhi_InterIndic.txt,v $
# $Date: 2002/03/02 00:27:27 $
# $Revision: 1.9 $
# $Date: 2002/07/25 21:20:26 $
# $Revision: 1.10 $
#--------------------------------------------------------------------
# Gurmukhi-InterIndic
@ -17,13 +17,14 @@
#\u0A32\u0A3C>\uE033; # LETTER LLA
#\u0A2B\u0A3C>\uE05E; # LETTER FA
\u0A02>\uE001; # REMAP (indicExceptions.txt): \u0a01>\u0a02 = SIGN CANDRABINDU>SIGN BINDI
\u0A02>\uE002; # SIGN BINDI
\u0A05>\uE005; # LETTER A
\u0A06>\uE006; # LETTER AA
\u0A07>\uE007; # LETTER I
\u0A08>\uE008; # LETTER II
\u0A09>\uE009; # LETTER U
\u0A0A>\uE00A; # LETTER UU
\u0A0C>; # UNMAPPED
\u0A0F>\uE00F; # LETTER EE
\u0A10>\uE010; # LETTER AI
\u0A13>\uE013; # LETTER OO
@ -56,9 +57,11 @@
\u0A2F>\uE02F; # LETTER YA
\u0A30>\uE030; # LETTER RA
\u0A32>\uE032; # LETTER LA
\u0a33>\uE033; # FALLBACK
\u0A35>\uE035; # LETTER VA
\u0a36>\ue036;
\u0A38\0a3c>\ue036;
\u0A37>\uE036;
\u0A38>\uE038; # LETTER SA
\u0A39>\uE039; # LETTER HA
\u0A3C>\uE03C; # SIGN NUKTA

View File

@ -3,8 +3,8 @@
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/data/Attic/Transliterator_InterIndic_Bengali.txt,v $
# $Date: 2002/03/02 00:27:27 $
# $Revision: 1.7 $
# $Date: 2002/07/25 21:20:26 $
# $Revision: 1.8 $
#--------------------------------------------------------------------
# InterIndic-Bengali
@ -115,14 +115,20 @@
\uE06D>\u09ED; # DIGIT SEVEN
\uE06E>\u09EE; # DIGIT EIGHT
\uE06F>\u09EF; # DIGIT NINE
\uE070>;
\uE070>\u09F0; # UNMAPPED Bengali-InterIndic: LETTER RA WITH MIDDLE DIAGONAL
\uE071>;
\uE0FA>\u09FA; # ISSHAR
0 > \u09E6; # FALLBACK FOR TAMIL
1 > \u09E7;
\ue072>;
\ue073>;
\ue074>;
\ue075>\u09F5;
\ue076>\u09F6;
\ue077>\u09F7;
\ue078>\u09F8;
\ue079>\u09F9;
\ue07A>\u09FA; # ISSHAR
# :: NFC (NFD) ;
# eof

View File

@ -3,8 +3,8 @@
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/data/Attic/Transliterator_InterIndic_Devanagari.txt,v $
# $Date: 2002/03/02 00:27:26 $
# $Revision: 1.6 $
# $Date: 2002/07/25 21:20:26 $
# $Revision: 1.7 $
#--------------------------------------------------------------------
# InterIndic-Devanagari
@ -149,6 +149,12 @@
\ue0f0 > \u0930; # FALLBACK RA
\ue0f1 > \u0930; # FALLBACK RA
\ue075>;
\ue076>;
\ue077>;
\ue078>;
\ue079>;
\ue07A>;
# \u0970 # UNMAPPED Devanagari-InterIndic: ABBREVIATION SIGN
# :: NFC;

View File

@ -3,8 +3,8 @@
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/data/Attic/Transliterator_InterIndic_Gujarati.txt,v $
# $Date: 2002/03/02 00:27:27 $
# $Revision: 1.7 $
# $Date: 2002/07/25 21:20:26 $
# $Revision: 1.8 $
#--------------------------------------------------------------------
# InterIndic-Gujarati
@ -120,6 +120,12 @@
\ue072>;
\ue073>;
\ue074>;
\ue075>;
\ue076>;
\ue077>;
\ue078>;
\ue079>;
\ue07A>;
0 > \u0ae6; # FALLBACK FOR TAMIL
1 > \u0ae7;
\ue0f0 > \u0ab0; # FALLBACK RA

View File

@ -3,14 +3,24 @@
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/data/Attic/Transliterator_InterIndic_Gurmukhi.txt,v $
# $Date: 2002/03/02 00:27:27 $
# $Revision: 1.7 $
# $Date: 2002/07/25 21:20:26 $
# $Revision: 1.8 $
#--------------------------------------------------------------------
# InterIndic-Gurmukhi
#:: NFD (NFC) ;
\ue001>\u0a02; # REMAP (indicExceptions.txt): \u0a01>\u0a02 = SIGN CANDRABINDU>SIGN BINDI
\ue002>; # FALLBACK BLOW AWAY SIGN ANUSVARA (\u0a02 = SIGN BINDI)
$vowel = [\u0A05-\u0A14 \u0A3e-\u0A4D];
$consonant = [\u0A15-\u0A39];
\ue001>; # FALLBACK BLOW AWAY SIGN CHANDRABINDU
#rules for BINDI
# Anusvara is equivalent to BINDI when preceeded by a vowel
$vowel{\ue002>\u0a02; # SIGN ANUSVARA (\u0a02 = SIGN BINDI)
# else is equivalent to TIPPI
$consonant{\ue002>\u0a70; # SIGN TIPPI
\ue002>\u0a02;
\ue003>; # FALLBACK BLOW AWAY SIGN VISARGA
\ue005>\u0a05; # LETTER A
\ue006>\u0a06; # LETTER AA
@ -115,11 +125,17 @@
\ue06d>\u0a6d; # DIGIT SEVEN
\ue06e>\u0a6e; # DIGIT EIGHT
\ue06f>\u0a6f; # DIGIT NINE
\ue070>\u0a70; # TIPPI
\ue071>\u0a71; # ADDAK
\ue072>\u0a72; # IRI
\ue073>\u0a73; # URA
\ue074>\u0a74; # EK ONKAR
\ue070>; # TIPPI
\ue071>; # ADDAK
\ue072>; # IRI
\ue073>; # URA
\ue074>; # EK ONKAR
\ue075>;
\ue076>;
\ue077>;
\ue078>;
\ue079>;
\ue07A>;
\ue080>; # FALLBACK BLOW AWAY ISSHAR
\ue081>; # FALLBACK BLOW AWAY LETTER E
\ue082>; # FALLBACK BLOW AWAY LETTER O (\u0a02 = SIGN BINDI)

View File

@ -3,8 +3,8 @@
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/data/Attic/Transliterator_InterIndic_Kannada.txt,v $
# $Date: 2002/03/02 00:27:27 $
# $Revision: 1.8 $
# $Date: 2002/07/25 21:20:26 $
# $Revision: 1.9 $
#--------------------------------------------------------------------
# InterIndic-Kannada
@ -125,6 +125,12 @@
\ue072>;
\ue073>;
\ue074>;
\ue075>;
\ue076>;
\ue077>;
\ue078>;
\ue079>;
\ue07A>;
0 > \u0ce6; # FALLBACK FOR TAMIL
1 > \u0ce7;
\ue0f0 > \u0cb0; # FALLBACK RA

View File

@ -3,8 +3,8 @@
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/data/Attic/Transliterator_InterIndic_Latin.txt,v $
# $Date: 2002/03/02 00:27:26 $
# $Revision: 1.6 $
# $Date: 2002/07/25 21:20:26 $
# $Revision: 1.7 $
#--------------------------------------------------------------------
# InterIndic-Latin
@ -177,6 +177,7 @@
$ena }$x > n\u0331 ;
$ena$virama > n\u0331 ;
$ena > n\u0331a ;
$uka > qa ;
$ka$nukta }$x > q ;
$ka$nukta$virama > q ;
$ka$nukta > qa ;
@ -185,9 +186,12 @@
$kha$nukta > k\u0331h\u0331a ;
$ukha$virama > k\u0331h\u0331;
$ukha > k\u0331h\u0331a;
$ugha > g\u0307a ;
$ga$nukta }$x > g\u0307 ;
$ga$nukta$virama > g\u0307 ;
$ga$nukta > g\u0307a ;
$ujha > za ;
$ja$nukta }$x > z ;
$ja$nukta$virama > z ;
$ja$nukta > za ;
@ -198,7 +202,8 @@
$uddha}$x > r\u0323 ;
$uddha$virama > r\u0323 ;
$uddha > r\u0323a;
$udha > r\u0323a ;
$dda$nukta}$x > r\u0323 ;
$dda$nukta$virama > r\u0323 ;
$dda$nukta > r\u0323a ;

View File

@ -3,8 +3,8 @@
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/data/Attic/Transliterator_InterIndic_Malayalam.txt,v $
# $Date: 2002/03/02 00:27:27 $
# $Revision: 1.7 $
# $Date: 2002/07/25 21:20:26 $
# $Revision: 1.8 $
#--------------------------------------------------------------------
# InterIndic-Malayalam
@ -125,6 +125,12 @@
\ue072>;
\ue073>;
\ue074>;
\ue075>;
\ue076>;
\ue077>;
\ue078>;
\ue079>;
\ue07A>;
0 > \u0d66; # FALLBACK FOR TAMIL
1 > \u0d67;
\ue0f0 > \u0d30; # FALLBACK RA

View File

@ -3,8 +3,8 @@
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/data/Attic/Transliterator_InterIndic_Oriya.txt,v $
# $Date: 2002/03/02 00:27:26 $
# $Revision: 1.7 $
# $Date: 2002/07/25 21:20:26 $
# $Revision: 1.8 $
#--------------------------------------------------------------------
# InterIndic-Oriya
@ -115,11 +115,17 @@
\ue06d>\u0b6d; # DIGIT SEVEN
\ue06e>\u0b6e; # DIGIT EIGHT
\ue06f>\u0b6f; # DIGIT NINE
\ue070>\u0b70; # ISSHAR
\ue070>; # ISSHAR
\ue071>;
\ue072>;
\ue073>;
\ue074>;
\ue075>;
\ue076>;
\ue077>;
\ue078>;
\ue079>;
\ue07A>;
0 > \u0b66; # FALLBACK FOR TAMIL
1 > \u0b67;
\ue0f0 > \u0b30; # FALLBACK RA

View File

@ -3,8 +3,8 @@
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/data/Attic/Transliterator_InterIndic_Tamil.txt,v $
# $Date: 2002/03/02 00:27:26 $
# $Revision: 1.8 $
# $Date: 2002/07/25 21:20:26 $
# $Revision: 1.9 $
#--------------------------------------------------------------------
# InterIndic-Tamil
@ -134,12 +134,17 @@
\ue080>; # UNMAPPED InterIndic-Tamil: ISSHAR
\ue070>;
\ue071>;
\ue072>;
\ue070>\u0BF0;
\ue071>\u0BF2;
\ue072>\u0BF1;
\ue073>;
\ue074>;
\ue075>;
\ue076>;
\ue077>;
\ue078>;
\ue079>;
\ue07A>;
\ue0f0 > \u0bb0; # FALLBACK RA
\ue0f1 > \u0bb0; # FALLBACK RA
# :: NFC (NFD) ;

View File

@ -3,8 +3,8 @@
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/data/Attic/Transliterator_InterIndic_Telugu.txt,v $
# $Date: 2002/03/02 00:27:27 $
# $Revision: 1.7 $
# $Date: 2002/07/25 21:20:26 $
# $Revision: 1.8 $
#--------------------------------------------------------------------
# InterIndic-Telugu
@ -126,6 +126,13 @@
\ue072>;
\ue073>;
\ue074>;
\ue075>;
\ue076>;
\ue077>;
\ue078>;
\ue079>;
\ue07A>;
0 > \u0c66; # FALLBACK FOR TAMIL
1 > \u0c67;
\ue0f0 > \u0c30; # FALLBACK RA

View File

@ -3,8 +3,8 @@
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/data/Attic/Transliterator_Tamil_InterIndic.txt,v $
# $Date: 2002/03/02 00:27:27 $
# $Revision: 1.7 $
# $Date: 2002/07/25 21:20:26 $
# $Revision: 1.8 $
#--------------------------------------------------------------------
# Tamil-InterIndic
@ -28,6 +28,7 @@
\u0B90>\uE010; # LETTER AI
\u0B92>\uE012; # LETTER O
\u0B93>\uE013; # LETTER OO
\u0B94>\uE014; # LETTER AU
\u0B95>\uE015; # LETTER KA
\u0B99>\uE019; # LETTER NGA
\u0B9A>\uE01A; # LETTER CA

View File

@ -3,8 +3,8 @@
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/data/Attic/Transliterator_Telugu_InterIndic.txt,v $
# $Date: 2002/03/02 00:27:27 $
# $Revision: 1.5 $
# $Date: 2002/07/25 21:20:26 $
# $Revision: 1.6 $
#--------------------------------------------------------------------
# Telugu-InterIndic
@ -80,6 +80,8 @@
\u0C56>\uE056; # AI LENGTH MARK
\u0C60>\uE060; # LETTER VOCALIC RR
\u0C61>\uE061; # LETTER VOCALIC LL
\u0C64>\uE064;
\u0C65>\uE065;
\u0C66>\uE066; # DIGIT ZERO
\u0C67>\uE067; # DIGIT ONE
\u0C68>\uE068; # DIGIT TWO