// ******************************************************************************* // * // * Copyright (C) 1997-2001, International Business Machines // * Corporation and others. All Rights Reserved. // * // ******************************************************************************* // Date Name Description // 11/17/99 aliu Added support for transliterators. // Please search for 'DUP FIXME' - duplicate lang/country names commented out root { Version { "3.1.1" } // each variant name that occurs in locales should be listed with %% and a display string "%%EURO" { "Euro" } // Euro variant display name "%%B" { "Bokm\u00e5l" } // Norwegian variant display name "%%NY" { "Nynorsk" } // Norwegian variant display name "%%AL" { "\u00C5land" } // Aland variant display name "%%POSIX" { "POSIX" } // variants for collation (traditional may also be used for traditional chinese) "%%PHONEBOOK" { "Phonebook Order" } "%%PINYIN" { "Pinyin Order" } "%%TRADITIONAL" { "Traditional" } "%%STROKE" { "Stroke Order" } "%%DIRECT" { "Direct Order" } // this is a special tag that makes genrb include UCARules.txt for collation "%%UCARULES" { "UCARules.txt"} // UCARules // no collation elements any more // CollationElements { // Version { "1.0" } // Override { "FALSE" } // Sequence { "" } // } // Formats for the display name of a locale, for a list of // items, and for composing two items in a list into one item. // The list patterns are used in the variant name and in the // full display name. // // This is the language-neutral form of this resource. // LocaleNamePatterns { { "{0,choice,0#|1#{1}|2#{1} ({2})}", // Display name "{0,choice,0#|1#{1}|2#{1},{2}|3#{1},{2},{3}}", // List "{0},{1}" // List composition } } //------------------------------------------------------------ // BEGIN Transliterator support //------------------------------------------------------------ // See also icu/data/translit/index.txt TransliteratorNamePattern { // Format for the display name of a Transliterator. // This is the language-neutral form of this resource. "{0,choice,0#|1#{1}|2#{1}-{2}}" // Display name } //------------------------------------------------------------ // END Transliterator support //------------------------------------------------------------ //------------------------------------------------------------ // BEGIN BreakIterator support //------------------------------------------------------------ CharacterBreakRules { // ignore non-spacing marks and enclosing marks (since we never // put a break before ignore characters, this keeps combining // accents with the base characters they modify) "$ignore=[[:Mn:][:Me:]];" // other category definitions "choseong=[\u1100-\u115f];" "jungseong=[\u1160-\u11a7];" "jongseong=[\u11a8-\u11ff];" "surr-hi=[\ud800-\udbff];" "surr-lo=[\udc00-\udfff];" // break after every character, except as follows: ".;" // keep CRLF sequences together "\r\n;" // keep surrogate pairs together "{surr-hi}{surr-lo};" // keep Hangul syllables spelled out using conjoining jamo together "{choseong}*{jungseong}*{jongseong}*;" // various additions for Hindi support "nukta=[\u093c];" "danda=[\u0964\u0965];" "virama=[\u094d];" "devVowelSign=[\u093e-\u094c\u0962\u0963];" "devConsonant=[\u0915-\u0939];" "devNuktaConsonant=[\u0958-\u095f];" "devCharEnd=[\u0902\u0903\u0951-\u0954];" "zwj=[\u200d];" "devCAMN=({devConsonant}{nukta}?);" "devConsonant1=({devNuktaConsonant}|{devCAMN});" "devConjunct=(({devConsonant1}{virama}{zwj}?)?{devConsonant1});" "{devConjunct}{devVowelSign}?{devCharEnd}?;" "{danda}{nukta};" } // default rules for finding word boundaries WordBreakRules { // ignore non-spacing marks, enclosing marks, and format characters, // all of which should not influence the algorithm "$ignore=[[:Mn:][:Me:][:Cf:]];" // Hindi phrase separator, kanji, katakana, hiragana, CJK diacriticals, // other letters, and digits "danda=[\u0964\u0965];" "kanji=[\u3005\u4e00-\u9fa5\uf900-\ufa2d];" "kata=[\u3099-\u309c\u30a1-\u30fe];" "hira=[\u3041-\u309e\u30fc];" "let=[[[:L:][:Mc:]]-[{kanji}{kata}{hira}]];" "dgt=[:N:];" // punctuation that can occur in the middle of a word: currently // dashes, apostrophes, quotation marks, and periods "mid-word=[[:Pd:]\u00ad\u2027\\\"\\\'\\.];" // punctuation that can occur in the middle of a number: currently // apostrophes, qoutation marks, periods, commas, and the Arabic // decimal point "mid-num=[\\\"\\\'\\,\u066b\\.];" // punctuation that can occur at the beginning of a number: currently // the period, the number sign, and all currency symbols except the cents sign "pre-num=[[[:Sc:]-[\u00a2]]\\#\\.];" // punctuation that can occur at the end of a number: currently // the percent, per-thousand, per-ten-thousand, and Arabic percent // signs, the cents sign, and the ampersand "post-num=[\\%\\&\u00a2\u066a\u2030\u2031];" // line separators: currently LF, FF, PS, and LS "ls=[\n\u000c\u2028\u2029];" // whitespace: all space separators and the tab character "ws=[[:Zs:]\t];" // a word is a sequence of letters that may contain internal // punctuation, as long as it begins and ends with a letter and // never contains two punctuation marks in a row "word=({let}+({mid-word}{let}+)*{danda}?);" // a number is a sequence of digits that may contain internal // punctuation, as long as it begins and ends with a digit and // never contains two punctuation marks in a row. "number=({dgt}+({mid-num}{dgt}+)*);" // break after every character, with the following exceptions // (this will cause punctuation marks that aren't considered // part of words or numbers to be treated as words unto themselves) ".;" // keep together any sequence of contiguous words and numbers // (including just one of either), plus an optional trailing // number-suffix character "{word}?({number}{word})*({number}{post-num}?)?;" // keep together and sequence of contiguous words and numbers // that starts with a number-prefix character and a number, // and may end with a number-suffix character "{pre-num}({number}{word})*({number}{post-num}?)?;" // keep together runs of whitespace (optionally with a single trailing // line separator or CRLF sequence) "{ws}*\r?{ls}?;" // keep together runs of Katakana "{kata}*;" // keep together runs of Hiragana "{hira}*;" // keep together runs of Kanji "{kanji}*;" } // default rules for determining legal line-breaking positions LineBreakRules { // ignore non-spacing marks, enclosing marks, and format characters "$ignore=[[:Mn:][:Me:][:Cf:]];" // Hindi phrase separators "danda=[\u0964\u0965];" // characters that always cause a break: ETX, tab, LF, FF, LS, and PS "break=[\u0003\t\n\f\u2028\u2029];" // characters that always prevent a break: the non-breaking space // and similar characters "nbsp=[\u00a0\u2007\u2011\ufeff];" // whitespace: space separators and control characters, except for // CR and the other characters mentioned above "space=[[[:Zs:][:Cc:]]-[{nbsp}{break}\r]];" // dashes: dash punctuation and the discretionary hyphen, except for // non-breaking hyphens "dash=[[[:Pd:]\u00ad]-[{nbsp}]];" // characters that stick to a word if they precede it: currency symbols // (except the cents sign) and starting punctuation "pre-word=[[[:Sc:]-[\u00a2]][:Ps:]\\\"\\\'];" // characters that stick to a word if they follow it: ending punctuation, // other punctuation that usually occurs at the end of a sentence, // small Kana characters, some CJK diacritics, etc. "post-word=[[:Pe:]\\!\\\"\\\'\\%\\.\\,\\:\\;\\?\u00a2\u00b0\u066a\u2030-\u2034" "\u2103\u2105\u2109\u3001\u3002\u3005\u3041\u3043\u3045\u3047\u3049\u3063" "\u3083\u3085\u3087\u308e\u3099-\u309e\u30a1\u30a3\u30a5\u30a7\u30a9" "\u30c3\u30e3\u30e5\u30e7\u30ee\u30f5\u30f6\u30fc-\u30fe\uff01\uff0c" "\uff0e\uff1f];" // Kanji: actually includes both Kanji and Kana, except for small Kana and // CJK diacritics "kanji=[[\u4e00-\u9fa5\uf900-\ufa2d\u3041-\u3094\u30a1-\u30fa]-[{post-word}{$ignore}]];" // digits "digit=[[:Nd:][:No:]];" // punctuation that can occur in the middle of a number: periods and commas "mid-num=[\\.\\,];" // everything not mentioned above, plus the quote marks (which are both // , , and ) "char=[^{break}{space}{dash}{kanji}{nbsp}{$ignore}{pre-word}{post-word}{mid-num}{danda}\r\\\"\\\'];" // a "number" is a run of prefix characters and dashes, followed by one or // more digits with isolated number-punctuation characters interspersed "number=([{pre-word}{dash}]*{digit}+({mid-num}{digit}+)*);" // the basic core of a word can be either a "number" as defined above, a single // "Kanji" character, or a run of any number of not-explicitly-mentioned // characters (this includes Latin letters) "word-core=([{pre-word}{char}]*|{kanji}|{number});" // a word may end with an optional suffix that be either a run of one or // more dashes or a run of word-suffix characters, followed by an optional // run of whitespace "word-suffix=(({dash}+|{post-word}*){space}*);" // a word, thus, is an optional run of word-prefix characters, followed by // a word core and a word suffix (the syntax of and // actually allows either of them to match the empty string, putting a break // between things like ")(" or "aaa(aaa" "word=({pre-word}*{word-core}{word-suffix});" // finally, the rule that does the work: Keep together any run of words that // are joined by runs of one of more non-spacing mark. Also keep a trailing // line-break character or CRLF combination with the word. (line separators // "win" over nbsp's) "{word}({nbsp}+{word})*\r?{break}?;" } // default rules for finding sentence boundaries SentenceBreakRules { // ignore non-spacing marks, enclosing marks, and format characters "$ignore=[[:Mn:][:Me:][:Cf:]];" // lowercase letters "lc=[:Ll:];" // uppercase Latin letters "ucLatin=[A-Z];" // whitespace (line separators are treated as whitespace) "space=[\t\r\f\n\u2028[:Zs:]];" // punctuation which may occur at the beginning of a sentence: "starting // punctuation" and quotation marks "start=[[:Ps:]\\\"\\\'];" // punctuation with may occur at the end of a sentence: "ending punctuation" // and quotation marks "end=[[:Pe:]\\\"\\\'];" // digits "digit=[:N:];" // characters that unambiguously signal the end of a sentence "term=[\\!\\?\u3002\uff01\uff1f];" // periods, which MAY signal the end of a sentence "period=[\\.\uff0e];" // characters that may occur at the beginning of a sentence: basically anything // not mentioned above (lowercase letters and digits are specifically excluded) "sent-start=[^{lc}{ucLatin}{space}{start}{end}{digit}{term}{period}\u2029{$ignore}];" // Hindi phrase separator "danda=[\u0964\u0965];" // always break sentences after paragraph separators ".*?\u2029?;" // always break after a danda, if it's followed by whitespace ".*?{danda}{space}*;" // if you see a period, skip over additional periods and ending punctuation // and if the next character is a paragraph separator, break after the // paragraph separator ".*?{period}[{period}{end}]*{space}*\u2029;" // if you see a period, skip over additional periods and ending punctuation, // followed by optional whitespace, followed by optional starting punctuation, // and if the next character is something that can start a sentence // (basically, a capital letter), then put the sentence break between the // whitespace and the opening punctuation ".*?{period}[{period}{end}]*{space}*/({start}*{sent-start}|{start}+{ucLatin});" // same as above, except that there's a sentence break before a Latin capital // letter only if there's at least one space after the period ".*?{period}[{period}{end}]*{space}+/{ucLatin};" // if you see a sentence-terminating character, skip over any additional // terminators, periods, or ending punctuation, followed by any whitespace, // followed by a SINGLE optional paragraph separator, and put the break there ".*?{term}[{term}{period}{end}]*{space}*\u2029?;" // The following rules are here to aid in backwards iteration. The automatically // generated backwards state table will rewind to the beginning of the // paragraph all the time (or all the way to the beginning of the document // if the document doesn't use the Unicode PS character) because the only // unambiguous character pairs are those involving paragraph separators. // These specify a few more unambiguous breaking situations. // if you see a sentence-starting character, followed by starting punctuation // (remember, we're iterating backwards), followed by an optional run of // whitespace, followed by an optional run of ending punctuation, followed // by a period, this is a safe place to turn around "![{sent-start}{ucLatin}]{start}*{space}+{end}*{period};" // if you see a letter or a digit, followed by an optional run of // starting punctuation, followed by an optional run of whitespace, // followed by an optional run of ending punctuation, followed by // a sentence terminator, this is a safe place to turn around "![{sent-start}{lc}{digit}]{start}*{space}*{end}*{term};" } //------------------------------------------------------------ // END BreakIterator support //------------------------------------------------------------ AmPmMarkers { "AM", "PM", } Countries { AF { "Afghanistan" } AL { "Albania" } DZ { "Algeria" } AS { "American Samoa" } AD { "Andorra" } AO { "Angola" } AI { "Anguilla" } AR { "Argentina" } AM { "Armenia" } AW { "Aruba" } AU { "Australia" } AT { "Austria" } AZ { "Azerbaijan" } BS { "Bahamas" } BH { "Bahrain" } BD { "Bangladesh" } BB { "Barbados" } BY { "Belarus" } BE { "Belgium" } BZ { "Belize" } BJ { "Benin" } BM { "Bermuda" } BT { "Bhutan" } BO { "Bolivia" } BA { "Bosnia and Herzegovina" } BW { "Botswana" } BR { "Brazil" } BN { "Brunei" } BG { "Bulgaria" } BF { "Burkina Faso" } BI { "Burundi" } KH { "Cambodia" } CM { "Cameroon" } CA { "Canada" } CV { "Cape Verde" } CF { "Central African Republic" } TD { "Chad" } CL { "Chile" } CN { "China" } CO { "Colombia" } KM { "Comoros" } CG { "Congo" } CR { "Costa Rica" } CI { "C\u00F4te d'Ivoire" } HR { "Croatia" } CU { "Cuba" } CY { "Cyprus" } CZ { "Czech Republic" } DK { "Denmark" } DJ { "Djibouti" } DM { "Dominica" } DO { "Dominican Republic" } TP { "East Timor" } EC { "Ecuador" } EG { "Egypt" } SV { "El Salvador" } GQ { "Equatorial Guinea" } ER { "Eritrea" } EE { "Estonia" } ET { "Ethiopia" } FJ { "Fiji" } FI { "Finland" } FO { "Faroe Islands" } // http://www.din.de/gremien/nas/nabd/iso3166ma/codlstp1/en_listp1.html FR { "France" } GF { "French Guiana" } GL { "Greenland" } // http://www.din.de/gremien/nas/nabd/iso3166ma/codlstp1/en_listp1.html PF { "French Polynesia" } TF { "French Southern Territories" } GA { "Gabon" } GM { "Gambia" } GE { "Georgia" } DE { "Germany" } GH { "Ghana" } GR { "Greece" } GP { "Guadeloupe" } GU { "Guam" } GT { "Guatemala" } GN { "Guinea" } GW { "Guinea-Bissau" } GY { "Guyana" } HT { "Haiti" } HN { "Honduras" } HK { "Hong Kong S.A.R., China" } HU { "Hungary" } IS { "Iceland" } IN { "India" } ID { "Indonesia" } IR { "Iran" } IQ { "Iraq" } IE { "Ireland" } IL { "Israel" } IT { "Italy" } JM { "Jamaica" } JP { "Japan" } JO { "Jordan" } KZ { "Kazakhstan" } KE { "Kenya" } KI { "Kiribati" } KP { "North Korea" } KR { "South Korea" } KW { "Kuwait" } KG { "Kyrgyzstan" } LA { "Laos" } LV { "Latvia" } LB { "Lebanon" } LS { "Lesotho" } LR { "Liberia" } LY { "Libya" } LI { "Liechtenstein" } LT { "Lithuania" } LU { "Luxembourg" } MK { "Macedonia" } MG { "Madagascar" } MY { "Malaysia" } ML { "Mali" } MT { "Malta" } MP { "Northern Mariana Islands"} MH { "Marshall Islands"} MQ { "Martinique" } MR { "Mauritania" } MU { "Mauritius" } YT { "Mayotte" } MX { "Mexico" } FM { "Micronesia" } MD { "Moldova" } MC { "Monaco" } MN { "Mongolia" } MS { "Montserrat" } MA { "Morocco" } MZ { "Mozambique" } MM { "Myanmar" } NA { "Namibia" } NP { "Nepal" } NL { "Netherlands" } AN { "Netherlands Antilles" } NC { "New Caledonia" } NZ { "New Zealand" } NI { "Nicaragua" } NE { "Niger" } NG { "Nigeria" } NU { "Niue" } NO { "Norway" } OM { "Oman" } PK { "Pakistan" } PA { "Panama" } PG { "Papua New Guinea" } PY { "Paraguay" } PE { "Peru" } PH { "Philippines" } PL { "Poland" } PT { "Portugal" } PR { "Puerto Rico" } QA { "Qatar" } RO { "Romania" } RU { "Russia" } RW { "Rwanda" } SA { "Saudi Arabia" } SN { "Senegal" } SP { "Serbia" } SC { "Seychelles" } SL { "Sierra Leone" } SG { "Singapore" } SK { "Slovakia" } SI { "Slovenia" } SO { "Somalia" } ZA { "South Africa" } ES { "Spain" } LK { "Sri Lanka" } SD { "Sudan" } SR { "Suriname" } SZ { "Swaziland" } SE { "Sweden" } CH { "Switzerland" } SY { "Syria" } TW { "Taiwan" } TJ { "Tajikistan" } TZ { "Tanzania" } TH { "Thailand" } TG { "Togo" } TK { "Tokelau" } TO { "Tonga" } TT { "Trinidad and Tobago" } TN { "Tunisia" } TR { "Turkey" } TM { "Turkmenistan" } UG { "Uganda" } UA { "Ukraine" } AE { "United Arab Emirates" } GB { "United Kingdom" } US { "United States" } UY { "Uruguay" } UZ { "Uzbekistan" } VU { "Vanuatu" } VA { "Vatican" } VE { "Venezuela" } VN { "Vietnam" } VG { "British Virgin Islands" } VI { "U.S. Virgin Islands" } EH { "Western Sahara" } YE { "Yemen" } YU { "Yugoslavia" } ZR { "Zaire" } ZM { "Zambia" } ZW { "Zimbabwe" } } CurrencyElements { "\u00A4", "XXX", "", } DateTimeElements { "1", "1", } DateTimePatterns { "h:mm:ss a z", "h:mm:ss a z", "h:mm:ss a", "h:mm a", "EEEE, MMMM d, yyyy", "MMMM d, yyyy", "MMM d, yyyy", "M/d/yy", "{1} {0}", } DayAbbreviations { "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", } DayNames { "Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", } Eras { "BC", "AD", } Languages { aa { "Afar" } ab { "Abkhazian" } ace { "Achinese" } ach { "Acoli" } ada { "Adangme" } ae { "Avestan" } af { "Afrikaans" } afa { "Afro-Asiatic (Other)" } afh { "Afrihili" } aka { "Akan" } akk { "Akkadien" } ale { "Aleut" } alg { "Algonquian Languages" } am { "Amharic" } ang { "English, Old (ca.450-1100)" } apa { "Apache Languages"} ar { "Arabic" } arc { "Aramaic" } arn { "Araucanian" } arp { "Arapaho" } art { "Artificial (Other)" } arw { "Arawak" } as { "Assamese" } ath { "Athapaskan Languages" } aus { "Australian Languages" } ava { "Avaric" } awa { "Awadhi" } ay { "Aymara" } az { "Azerbaijani" } ba { "Bashkir" } bad { "Banda" } bai { "Bamileke Languages" } bal { "Baluuchi" } bam { "Bambara" } ban { "Balinese" } bas { "Basa" } bat { "Baltic (Other)" } be { "Belarusian" } bej { "Beja" } bem { "Bemba" } ber { "Beber" } bg { "Bulgarian" } bh { "Bihari" } bho { "Bhojpuri" } bi { "Bislama" } bik { "Bikol" } bin { "Bini" } bla { "Siksika" } bn { "Bengali" } bnt { "Bantu" } bo { "Tibetan" } br { "Brenton" } bra { "Braj" } bs { "Bosnian"} btk { "Batak" } bua { "Buriat" } bug { "Buginese" } ca { "Catalan" } cad { "Caddo" } cai { "Central American Indian (Other)" } car { "Carib" } cau { "Caucasian (Other) " } ce { "Chechen" } ceb { "Cebuano" } cel { "Celtic (Other)" } ch { "Chamorro" } chb { "Chibcha" } chg { "Chagatai" } chk { "Chuukese" } chm { "Mari" } chn { "Chinook Jargon" } cho { "Choctaw" } chp { "Chipewyan" } chr { "Cherokee" } chy { "Cheyenne" } cmc { "Chamic Languages" } co { "Corsican" } cop { "Coptic" } cpe { "Creoles and Pidgins, English-based (Other)" } cpf { "Creoles and Pidgins, French-based (Other)" } cre { "Cree" } crp { "Creoles and Pidgins (Other)" } cs { "Czech" } cu { "Church Slavic" } cus { "Cushitic (Other)" } cv { "Chuvash" } cy { "Welsh" } da { "Danish" } dak { "Dakota" } day { "Dayak" } de { "German" } del { "Delaware" } den { "Slave" } dgr { "Dogrib" } din { "Dinka" } div { "Divehi" } doi { "Dogri" } dra { "Dravidian (Other)" } dua { "Duala" } dum { "Dutch, Middle (ca. 1050-1350)" } dyu { "Dyula" } dz { "Bhutani" } dzo { "Dzongkha" } efi { "Efik" } egy { "Egyptian (Ancient)" } eka { "Ekajuk" } el { "Greek" } // Modern (1453-) elx { "Elamite" } en { "English" } enm { "English, Middle( 1100 -1500)" } eo { "Esperanto" } es { "Spanish" } et { "Estonian" } eu { "Basque" } ewe { "Ewe" } ewo { "Ewondo" } fa { "Persian" } fan { "Fang" } fat { "Fanti" } fi { "Finnish" } fiu { "Finno - Ugrian" } fj { "Fiji" } fo { "Faeroese" } fon { "Fon" } fr { "French" } frm { "French, Middle (ca.1400-1600)" } fro { "French, Old (842-ca.1400)" } ful { "Fulah" } fur { "Friulian" } fy { "Frisian" } ga { "Irish" } gaa { "Ga" } gay { "Gayo" } gba {"Gbaya" } gd { "Scots Gaelic" } gem { "Germanic (Other)" } gil { "Gilbertese" } gl { "Gallegan" } gla { "Gaelic (Scots)" } gmh { "German, Middle High (ca.1050-1500)" } gn { "Guarani" } goh { " German, Old High (ca.750-1050)" } gon { "Gondi" } gor { "Gorontalo" } got { "Gothic" } grb { "Gerbo" } grc { "Greek, Ancient (to 1453)" } gu { "Gujarati" } gv { "Manx" } gwi { "Gwich'in" } hai { "Haida" } hau { "Hausa" } haw { "Hawaiian" } he { "Hebrew" } hi { "Hindi" } him { "Himachali" } hit { "Hittite" } hmn { "Hmong" } ho { "Hiri Motu" } hr { "Croatian" } hu { "Hungarian" } hup { "Hupa" } hy { "Armenian" } hz { "Herero" } ia { "Interlingua" } iba { "Iban" } ibo { "Igbo" } id { "Indonesian" } ie { "Interlingue" } ijo { "Ijo" } ik { "Inupiak" } ilo { "Iloko" } inc { "Indic (Other)" } ine { "Indo-European" } ira { "Iranian " } iro { "Iroguoian Languages" } is { "Icelandic" } it { "Italian" } iu { "Inukitut" } iw { "Hebrew" } ja { "Japanese" } jpr { "Judeo-Persian" } jrb { "Judeo-Arabic" } jw { "Javanese" } ka { "Georgian" } kaa { "Kara-Kalpak" } kab { "Kabyle" } kac { "Kachin" } kam { "Kamba" } kar { "Karen" } kau { "Kanuri" } kaw { "Kawi" } kha { "Khasi" } khi { "Khoisan" } kho { "Khotanese" } ki { "Kikuyu" } kk { "Kazakh" } kl { "Kalaallisut" } km { "Khmer" } kmb { "Kimbundu" } kn { "Kannada" } ko { "Korean" } kok { "Konkani" } kon { "Kongo" } kos { "Kosrean" } kpe { "Kpelle" } kro { "Kru" } kru { "Kuruckh" } ks { "Kashmiri" } ku { "Kurdish" } kum { "Kumyk" } kut { "Kutenai" } kv { "Komi" } kw { "Cornish" } ky { "Kirghiz" } la { "Latin" } lad { "Ladino" } lah { "Lahnda" } lam { "Lamba" } lb { "Letzeburgesch" } lez { "Lezghian" } lin { "Lingala" } lit { "Lithuanian" } ln { "Lingala" } lo { "Lao" } lol { "Mongo" } loz { "Lozi" } lt { "Lithuanian" } lua { "Luba-Lulua" } lub { "Luba-Katanga" } lug { "Ganda" } lui { "Luiseeno" } lun { "Lunda" } luo { "Lua" } lus { "Lushai" } lv { "Latvian (Lettish)" } mad { "Madurese" } mag { "Magahi" } mai { "Maithili" } mak { "Makasar" } man { "Mandingue" } map { "Austronesian" } mas { "Masai" } mdr { "Mandar" } men { "Mende" } mg { "Malagasy" } mga { "Irish, Middle (900-1200)" } mh { "Marshall"} mi { "Maori" } mic { "Mic-Mac" } min { "Minangkabau" } mis { "Miscellaneous Languages" } mk { "Macedonian" } mkh { "Mon-Khmer (Ohter)" } ml { "Malayalam" } mn { "Mongolian" } mnc { "Manchu" } mni { "Manipuri" } mno { "Manobo Languages" } mo { "Moldavian" } moh { "Mohawk" } mos { "Mossi" } mr { "Marathi" } ms { "Malay" } mt { "Maltese" } mul { "Multiple Languages" } mun { "Munda Languages" } mus { "Creek" } mwr { "Marwari" } my { "Burmese" } myn { "Mayan" } na { "Nauru" } nah { "Nahuatl" } nai { "North American Indian (Ohter)" } nb { "Norwegian Bokm\u00e5l" } nd { "Ndebele, North" } nds { "Low German; Low Saxon" } ne { "Nepali" } new { "Newari" } ng { "Ndonga" } nia { "Nias" } nic { "Niger - Kordofanian" } niu { "Niuean" } nl { "Dutch" } nn { "Norwegian Nynorsk" } no { "Norwegian" } non { "Norse, Old" } nr { "Ndebele, South" } nso { "Sotho, Northern" } nub { "Nubian Languages" } nv {"Navajo"} ny { " Chichewa; Nyanja" } nym { "Nyamwezi" } nyo { "Nyoro" } nzi { "Nzima" } oc { "Proven\u00E7al; Occitan (post 1500)"} oji { "Ojibwa" } om { "Oromo (Afan)" } or { "Oriya" } os { "Ossetic" } osa { "Osage" } ota { "Turkish (Ottoman Empire)" } oto { "Otomian Languages" } pa { "Punjabi" } paa { "Papuan (Ohter)" } pag { "Pangasinan" } pal { "Pahlavi" } pam { "pampamga" } pap { "Papiamento" } pau { "Palauan" } peo { "Persian Old (ca.600-400 B.C.)" } phi { "Philippine (Other)" } phn { "Phoenician" } pi { "Pali" } pl { "Polish" } pon { "Pohnpeian" } pra { "Prakrit Languages" } pro { "Proven\u00E7al, Old (to 1500)" } ps { "Pashto (Pushto)" } pt { "Portuguese" } qu { "Quechua" } raj { "Rajastani" } rap { "Rapanui" } rar { "Rarotongan" } rm { "Rhaeto-Romance" } rn { "Rundi" } ro { "Romanian" } roa { "Romance (Other)" } rom { "Romany" } ru { "Russian" } rw { "Kinyarwanda" } sa { "Sanskrit" } sad { "Sandawe" } sah { "Yakut" } sai { "South American Indian (Ohter)" } sal { "Salishan" } sam { "Samritan" } sas { "Saska" } sat { "Santali" } sc { "Sardinian" } sco { "Scots" } sd { "Sindhi" } se { "Northern Sami" } // http://lcweb.loc.gov/standards/iso639-2/englangn.html // should provide the correct language code soon // se { "Sami Languages" } // DUP FIXME sel { "Selkup" } sem { "Semitic" } sg { "Sango" } sga { "Irish, Old (to 900)" } sgn { "Sign Languages" } sh { "Serbo-Croatian" } shn { "Shan" } si { "Sinhalese" } sid { "Sidamo" } sio { "Siouan Languages" } sit { "Sino-Tibetan (Other)" } sk { "Slovak" } sl { "Slovenian" } sm { "Samoan" } sn { "Shona" } snk { "Soninke" } so { "Somali" } sog { "Sogdien" } son { "Songhai" } sq { "Albanian" } sr { "Serbian" } srr { "Serer" } ss { "Swati" } ssa { "Nilo-Saharam" } st { "Sotho, Southern" } su { "Sundanese" } suk { "Sukuma" } sus { "Susu" } sux { "Sumerian"} sv { "Swedish" } sw { "Swahili" } syr { "Syriac" } ta { "Tamil" } tai { "Tai (Other)" } te { "Telugu" } tem { "Timne" } ter { "Tereno" } tet { "Tetum" } tg { "Tajik" } th { "Thai" } tig { "Tigre" } tir { "Tigrinya" } tiv { "Tiv" } tk { "Turkmen" } tl { "Tagalog" } tli { "Tlingit" } tmh { "Tamashek" } tn { "Tswana" } tog { "Tonga (Nyasa)" } ton { "Tongan (Tonga Islands)" } tr { "Turkish" } ts { "Tsonga" } tsi { "Tsimshian" } tt { "Tatar" } tum { "Tumbuka" } tur { "Turkish" } tut { "Altaic (Other)" } tvl { "Tuvalu" } tw { "Twi" } ty { "Tahitian" } tyv { "Tuvinian" } ug { "Uighur" } uga { "Ugaritic" } uk { "Ukrainian" } umb { "Umbundu" } und { "Undetermined" } ur { "Urdu" } uz { "Uzbek" } vai { "Vai" } ven { "Venda" } vi { "Vietnamese" } vo { "Volapuk" } vot { "Votic" } wak { "Wakashan Languages" } wal { "Walamo" } war { "Waray" } was { "Washo" } wen { "Sorbian Languages" } wo { "Wolof" } xh { "Xhosa" } yao { "Yao" } yap { "Yapese" } yi { "Yiddish" } yor { "Yoruba" } ypk { "Yupik Languages" } za { "Zhuang" } zap { "Zapotec" } zen { "Zenaga" } zh { "Chinese" } znd { "Zande" } zu { "Zulu" } zun { "Zuni" } } LocaleID { "0000" } LocaleString { "en" } MonthAbbreviations { "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec", "", } MonthNames { "January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December", "", } NumberElements { ".", ",", ";", "%", "0", "#", "-", "E", "\u2030", "\u221E", "\uFFFD", } NumberPatterns { "#,##0.###;-#,##0.###", "\u00A4 #,##0.00;-\u00A4 #,##0.00", "#,##0%", "#E0" } ShortCountry { "" } ShortLanguage { "eng" } localPatternChars { "GyMdkHmsSEDFwWahKzYe" } zoneStrings { { "PST", "Pacific Standard Time", "PST", "Pacific Daylight Time", "PDT", "San Francisco", } { "MST", "Mountain Standard Time", "MST", "Mountain Daylight Time", "MDT", "Denver", } { "PNT", "Mountain Standard Time", "MST", "Mountain Standard Time", "MST", "Phoenix", } { "CST", "Central Standard Time", "CST", "Central Daylight Time", "CDT", "Chicago", } { "EST", "Eastern Standard Time", "EST", "Eastern Daylight Time", "EDT", "New York", } { "IET", "Eastern Standard Time", "EST", "Eastern Standard Time", "EST", "Indianapolis", } { "PRT", "Atlantic Standard Time", "AST", "Atlantic Daylight Time", "ADT", "Halifax", } { "HST", "Hawaii Standard Time", "HST", "Hawaii Daylight Time", "HDT", "Honolulu", } { "AST", "Alaska Standard Time", "AST", "Alaska Daylight Time", "ADT", "Anchorage", } } LocaleScript{ "Latn", } //------------------------------------------------------------ // Rule Based Number Format Support //------------------------------------------------------------ // * Spellout rules for U.S. English. This rule set has two variants: // * %simplified is a set of rules showing the simple method of spelling // * out numbers in English: 289 is formatted as "two hundred eighty-nine". // * %default uses a more complicated algorithm to format // * numbers in a more natural way: 289 is formatted as "two hundred AND // * eighty-nine" and commas are inserted between the thousands groups for // * values above 100,000. SpelloutRules { // This rule set shows the normal simple formatting rules for English "%simplified:\n" // negative number rule. This rule is used to format negative // numbers. The result of formatting the number's absolute // value is placed where the >> is. " -x: minus >>;\n" // faction rule. This rule is used for formatting numbers // with fractional parts. The result of formatting the // number's integral part is substituted for the <<, and // the result of formatting the number's fractional part // (one digit at a time, e.g., 0.123 is "zero point one two // three") replaces the >>. " x.x: << point >>;\n" // the rules for the values from 0 to 19 are simply the // words for those numbers " zero; one; two; three; four; five; six; seven; eight; nine;\n" " ten; eleven; twelve; thirteen; fourteen; fifteen; sixteen;\n" " seventeen; eighteen; nineteen;\n" // beginning at 20, we use the >> to mark the position where // the result of formatting the number's ones digit. Thus, // we only need a new rule at every multiple of 10. Text in // backets is omitted if the value being formatted is an // even multiple of 10. " 20: twenty[->>];\n" " 30: thirty[->>];\n" " 40: forty[->>];\n" " 50: fifty[->>];\n" " 60: sixty[->>];\n" " 70: seventy[->>];\n" " 80: eighty[->>];\n" " 90: ninety[->>];\n" // beginning at 100, we can use << to mark the position where // the result of formatting the multiple of 100 is to be // inserted. Notice also that the meaning of >> has shifted: // here, it refers to both the ones place and the tens place. // The meanings of the << and >> tokens depend on the base value // of the rule. A rule's divisor is (usually) the highest // power of 10 that is less than or equal to the rule's base // value. The value being formatted is divided by the rule's // divisor, and the integral quotient is used to get the text // for <<, while the remainder is used to produce the text // for >>. Again, text in brackets is omitted if the value // being formatted is an even multiple of the rule's divisor // (in this case, an even multiple of 100) " 100: << hundred[ >>];\n" // The rules for the higher numbers work the same way as the // rule for 100: Again, the << and >> tokens depend on the // rule's divisor, which for all these rules is also the rule's // base value. To group by thousand, we simply don't have any // rules between 1,000 and 1,000,000. " 1000: << thousand[ >>];\n" " 1,000,000: << million[ >>];\n" " 1,000,000,000: << billion[ >>];\n" " 1,000,000,000,000: << trillion[ >>];\n" // overflow rule. This rule specifies that values of a // quadrillion or more are shown in numerals rather than words. // The == token means to format (with new rules) the value // being formatted by this rule and place the result where // the == is. The #,##0 inside the == signs is a // DecimalFormat pattern. It specifies that the value should // be formatted with a DecimalFormat object, and that it // should be formatted with no decimal places, at least one // digit, and a thousands separator. " 1,000,000,000,000,000: =#,##0=;\n" // %default is a more elaborate form of %simplified; It is basically // the same, except that it introduces "and" before the ones digit // when appropriate (basically, between the tens and ones digits) and // separates the thousands groups with commas in values over 100,000. "%default:\n" // negative-number and fraction rules. These are the same // as those for %simplified, but have to be stated here too // because this is an entry point " -x: minus >>;\n" " x.x: << point >>;\n" // just use %simplified for values below 100 " =%simplified=;\n" // for values from 100 to 9,999 use %%and to decide whether or // not to interpose the "and" " 100: << hundred[ >%%and>];\n" " 1000: << thousand[ >%%and>];\n" // for values of 100,000 and up, use %%commas to interpose the // commas in the right places (and also to interpose the "and") " 100,000>>: << thousand[>%%commas>];\n" " 1,000,000: << million[>%%commas>];\n" " 1,000,000,000: << billion[>%%commas>];\n" " 1,000,000,000,000: << trillion[>%%commas>];\n" " 1,000,000,000,000,000: =#,##0=;\n" // if the value passed to this rule set is greater than 100, don't // add the "and"; if it's less than 100, add "and" before the last // digits "%%and:\n" " and =%default=;\n" " 100: =%default=;\n" // this rule set is used to place the commas "%%commas:\n" // for values below 100, add "and" (the apostrophe at the // beginning is ignored, but causes the space that follows it // to be significant: this is necessary because the rules // calling %%commas don't put a space before it) " ' and =%default=;\n" // put a comma after the thousands (or whatever preceded the // hundreds) " 100: , =%default=;\n" // put a comma after the millions (or whatever precedes the // thousands) " 1000: , <%default< thousand, >%default>;\n" // and so on... " 1,000,000: , =%default=;" // %%lenient-parse isn't really a set of number formatting rules; // it's a set of collation rules. Lenient-parse mode uses a Collator // object to compare fragments of the text being parsed to the text // in the rules, allowing more leeway in the matching text. This set // of rules tells the formatter to ignore commas when parsing (it // already ignores spaces, which is why we refer to the space; it also // ignores hyphens, making "twenty one" and "twenty-one" parse // identically) "%%lenient-parse:\n" // " & ' ' , ',' ;\n" " &\u0000 << ' ' << ',' << '-'; \n" } // * This rule set adds an English ordinal abbreviation to the end of a // * number. For example, 2 is formatted as "2nd". Parsing doesn't work with // * this rule set. To parse, use DecimalFormat on the numeral. OrdinalRules { // this rule set formats the numeral and calls %%abbrev to // supply the abbreviation "%main:\n" " =#,##0==%%abbrev=;\n" // this rule set supplies the abbreviation "%%abbrev:\n" // the abbreviations. Everything from 4 to 19 ends in "th" " th; st; nd; rd; th;\n" // at 20, we begin repeating the cycle every 10 (13 is "13th", // but 23 and 33 are "23rd" and "33rd") We do this by // ignoring all bug the ones digit in selecting the abbreviation " 20: >>;\n" // at 100, we repeat the whole cycle by considering only the // tens and ones digits in picking an abbreviation " 100: >>;\n" } // * This rule set formats a number of seconds in sexagesimal notation // * (i.e., hours, minutes, and seconds). %with-words formats it with // * words (3,740 is "1 hour, 2 minutes, 20 seconds") and %in-numerals // * formats it entirely in numerals (3,740 is "1:02:20"). DurationRules { // main rule set for formatting with words "%with-words:\n" // take care of singular and plural forms of "second" " 0 seconds; 1 second; =0= seconds;\n" // use %%min to format values greater than 60 seconds " 60/60: <%%min<[, >>];\n" // use %%hr to format values greater than 3,600 seconds // (the ">>>" below causes us to see the number of minutes // when when there are zero minutes) " 3600/60: <%%hr<[, >>>];\n" // this rule set takes care of the singular and plural forms // of "minute" "%%min:\n" " 0 minutes; 1 minute; =0= minutes;\n" // this rule set takes care of the singular and plural forms // of "hour" "%%hr:\n" " 0 hours; 1 hour; =0= hours;\n" // main rule set for formatting in numerals "%in-numerals:\n" // values below 60 seconds are shown with "sec." " =0= sec.;\n" // higher values are shown with colons: %%min-sec is used for // values below 3,600 seconds... " 60: =%%min-sec=;\n" // ...and %%hr-min-sec is used for values of 3,600 seconds // and above " 3600: =%%hr-min-sec=;\n" // this rule causes values of less than 10 minutes to show without // a leading zero "%%min-sec:\n" " 0: :=00=;\n" " 60/60: <0<>>;\n" // this rule set is used for values of 3,600 or more. Minutes are always // shown, and always shown with two digits "%%hr-min-sec:\n" " 0: :=00=;\n" " 60/60: <00<>>;\n" " 3600/60: <#,##0<:>>>;\n" // the lenient-parse rules allow several different characters to be used // as delimiters between hours, minutes, and seconds "%%lenient-parse:\n" " & ':' = '.' = ' ' = '-';\n" } Scripts { ARAB { "Arabic" } ARMN { "Armenian" } BENG { "Bengali" } BOPO { "Bopomofo" } CANS { "Unified Canadian Aboriginal Symbols" } CHER { "Cherokee" } CYRL { "Cyrillic" } DEVA { "Devanagari" } DSRT { "Deseret" } ETHI { "Ethiopic" } GEOR { "Georgian" } GOTH { "Gothic" } GREK { "Greek" } GUJR { "Gujarati" } GURU { "Gurmukhi" } HANG { "Hangul" } HANI { "Han" } HEBR { "Hebrew" } HIRA { "Hiragana" } ITAL { "Old_italic" } KANA { "Katakana" } KHMR { "Khmer" } KNDA { "Kannada" } LAO { "Lao" } LATN { "Latin" } MLYM { "Malayalam" } MONG { "Mongolian" } MYMR { "Myanmar" } OGAM { "Ogham" } ORYA { "Oriya" } QAAC { "Coptic" } QAAI { "Inherited" } RUNR { "Runic" } SINH { "Sinhala" } SYRC { "Syriac" } TAML { "Tamil" } TELU { "Telugu" } THAA { "Thana" } THAI { "Thai" } TIBT { "Tibetan" } YIII { "Yi" } ZYYY { "Common" } } }