// ******************************************************************************* // * // * Copyright (C) 1997-2000, International Business Machines // * Corporation and others. All Rights Reserved. // * // ******************************************************************************* // Date Name Description // 11/17/99 aliu Added support for transliterators. default { Version { "3" } "%%EURO" { "Euro" } // Euro variant display name "%%B" { "Bokm\u00e5l" } // Norwegian variant display name "%%NY" { "Nynorsk" } // Norwegian variant display name // Formats for the display name of a locale, for a list of // items, and for composing two items in a list into one item. // The list patterns are used in the variant name and in the // full display name. // // This is the language-neutral form of this resource. // LocaleNamePatterns { { "{0,choice,0#|1#{1}|2#{1} ({2})}", // Display name "{0,choice,0#|1#{1}|2#{1},{2}|3#{1},{2},{3}}", // List "{0},{1}" // List composition } } //------------------------------------------------------------ // BEGIN Transliterator support //------------------------------------------------------------ // See also icu/data/translit/index.txt TransliteratorNamePattern { // Format for the display name of a Transliterator. // This is the language-neutral form of this resource. "{0,choice,0#|1#{1}|2#{1}-{2}}" // Display name } //------------------------------------------------------------ // END Transliterator support //------------------------------------------------------------ //------------------------------------------------------------ // BEGIN BreakIterator support //------------------------------------------------------------ CharacterBreakRules { // ignore non-spacing marks and enclosing marks (since we never // put a break before ignore characters, this keeps combining // accents with the base characters they modify) "$ignore=[[:Mn:][:Me:]];" // other category definitions "choseong=[\u1100-\u115f];" "jungseong=[\u1160-\u11a7];" "jongseong=[\u11a8-\u11ff];" "surr-hi=[\ud800-\udbff];" "surr-lo=[\udc00-\udfff];" // break after every character, except as follows: ".;" // keep CRLF sequences together "\r\n;" // keep surrogate pairs together "{surr-hi}{surr-lo};" // keep Hangul syllables spelled out using conjoining jamo together "{choseong}*{jungseong}*{jongseong}*;" // various additions for Hindi support "nukta=[\u093c];" "danda=[\u0964\u0965];" "virama=[\u094d];" "devVowelSign=[\u093e-\u094c\u0962\u0963];" "devConsonant=[\u0915-\u0939];" "devNuktaConsonant=[\u0958-\u095f];" "devCharEnd=[\u0902\u0903\u0951-\u0954];" "zwj=[\u200d];" "devCAMN=({devConsonant}{nukta}?);" "devConsonant1=({devNuktaConsonant}|{devCAMN});" "devConjunct=(({devConsonant1}{virama}{zwj}?)?{devConsonant1});" "{devConjunct}{devVowelSign}?{devCharEnd}?;" "{danda}{nukta};" } // default rules for finding word boundaries WordBreakRules { // ignore non-spacing marks, enclosing marks, and format characters, // all of which should not influence the algorithm "$ignore=[[:Mn:][:Me:][:Cf:]];" // Hindi phrase separator, kanji, katakana, hiragana, CJK diacriticals, // other letters, and digits "danda=[\u0964\u0965];" "kanji=[\u3005\u4e00-\u9fa5\uf900-\ufa2d];" "kata=[\u3099-\u309c\u30a1-\u30fe];" "hira=[\u3041-\u309e\u30fc];" "let=[[[:L:][:Mc:]]-[{kanji}{kata}{hira}]];" "dgt=[:N:];" // punctuation that can occur in the middle of a word: currently // dashes, apostrophes, quotation marks, and periods "mid-word=[[:Pd:]\u00ad\u2027\\\"\\\'\\.];" // punctuation that can occur in the middle of a number: currently // apostrophes, qoutation marks, periods, commas, and the Arabic // decimal point "mid-num=[\\\"\\\'\\,\u066b\\.];" // punctuation that can occur at the beginning of a number: currently // the period, the number sign, and all currency symbols except the cents sign "pre-num=[[[:Sc:]-[\u00a2]]\\#\\.];" // punctuation that can occur at the end of a number: currently // the percent, per-thousand, per-ten-thousand, and Arabic percent // signs, the cents sign, and the ampersand "post-num=[\\%\\&\u00a2\u066a\u2030\u2031];" // line separators: currently LF, FF, PS, and LS "ls=[\n\u000c\u2028\u2029];" // whitespace: all space separators and the tab character "ws=[[:Zs:]\t];" // a word is a sequence of letters that may contain internal // punctuation, as long as it begins and ends with a letter and // never contains two punctuation marks in a row "word=({let}+({mid-word}{let}+)*{danda}?);" // a number is a sequence of digits that may contain internal // punctuation, as long as it begins and ends with a digit and // never contains two punctuation marks in a row. "number=({dgt}+({mid-num}{dgt}+)*);" // break after every character, with the following exceptions // (this will cause punctuation marks that aren't considered // part of words or numbers to be treated as words unto themselves) ".;" // keep together any sequence of contiguous words and numbers // (including just one of either), plus an optional trailing // number-suffix character "{word}?({number}{word})*({number}{post-num}?)?;" // keep together and sequence of contiguous words and numbers // that starts with a number-prefix character and a number, // and may end with a number-suffix character "{pre-num}({number}{word})*({number}{post-num}?)?;" // keep together runs of whitespace (optionally with a single trailing // line separator or CRLF sequence) "{ws}*\r?{ls}?;" // keep together runs of Katakana "{kata}*;" // keep together runs of Hiragana "{hira}*;" // keep together runs of Kanji "{kanji}*;" } // default rules for determining legal line-breaking positions LineBreakRules { // ignore non-spacing marks, enclosing marks, and format characters "$ignore=[[:Mn:][:Me:][:Cf:]];" // Hindi phrase separators "danda=[\u0964\u0965];" // characters that always cause a break: ETX, tab, LF, FF, LS, and PS "break=[\u0003\t\n\f\u2028\u2029];" // characters that always prevent a break: the non-breaking space // and similar characters "nbsp=[\u00a0\u2007\u2011\ufeff];" // whitespace: space separators and control characters, except for // CR and the other characters mentioned above "space=[[[:Zs:][:Cc:]]-[{nbsp}{break}\r]];" // dashes: dash punctuation and the discretionary hyphen, except for // non-breaking hyphens "dash=[[[:Pd:]\u00ad]-[{nbsp}]];" // characters that stick to a word if they precede it: currency symbols // (except the cents sign) and starting punctuation "pre-word=[[[:Sc:]-[\u00a2]][:Ps:]\\\"\\\'];" // characters that stick to a word if they follow it: ending punctuation, // other punctuation that usually occurs at the end of a sentence, // small Kana characters, some CJK diacritics, etc. "post-word=[[:Pe:]\\!\\\"\\\'\\%\\.\\,\\:\\;\\?\u00a2\u00b0\u066a\u2030-\u2034" "\u2103\u2105\u2109\u3001\u3002\u3005\u3041\u3043\u3045\u3047\u3049\u3063" "\u3083\u3085\u3087\u308e\u3099-\u309e\u30a1\u30a3\u30a5\u30a7\u30a9" "\u30c3\u30e3\u30e5\u30e7\u30ee\u30f5\u30f6\u30fc-\u30fe\uff01\uff0c" "\uff0e\uff1f];" // Kanji: actually includes both Kanji and Kana, except for small Kana and // CJK diacritics "kanji=[[\u4e00-\u9fa5\uf900-\ufa2d\u3041-\u3094\u30a1-\u30fa]-[{post-word}{$ignore}]];" // digits "digit=[[:Nd:][:No:]];" // punctuation that can occur in the middle of a number: periods and commas "mid-num=[\\.\\,];" // everything not mentioned above, plus the quote marks (which are both // , , and ) "char=[^{break}{space}{dash}{kanji}{nbsp}{$ignore}{pre-word}{post-word}{mid-num}{danda}\r\\\"\\\'];" // a "number" is a run of prefix characters and dashes, followed by one or // more digits with isolated number-punctuation characters interspersed "number=([{pre-word}{dash}]*{digit}+({mid-num}{digit}+)*);" // the basic core of a word can be either a "number" as defined above, a single // "Kanji" character, or a run of any number of not-explicitly-mentioned // characters (this includes Latin letters) "word-core=([{pre-word}{char}]*|{kanji}|{number});" // a word may end with an optional suffix that be either a run of one or // more dashes or a run of word-suffix characters, followed by an optional // run of whitespace "word-suffix=(({dash}+|{post-word}*){space}*);" // a word, thus, is an optional run of word-prefix characters, followed by // a word core and a word suffix (the syntax of and // actually allows either of them to match the empty string, putting a break // between things like ")(" or "aaa(aaa" "word=({pre-word}*{word-core}{word-suffix});" // finally, the rule that does the work: Keep together any run of words that // are joined by runs of one of more non-spacing mark. Also keep a trailing // line-break character or CRLF combination with the word. (line separators // "win" over nbsp's) "{word}({nbsp}+{word})*\r?{break}?;" } // default rules for finding sentence boundaries SentenceBreakRules { // ignore non-spacing marks, enclosing marks, and format characters "$ignore=[[:Mn:][:Me:][:Cf:]];" // lowercase letters "lc=[:Ll:];" // uppercase Latin letters "ucLatin=[A-Z];" // whitespace (line separators are treated as whitespace) "space=[\t\r\f\n\u2028[:Zs:]];" // punctuation which may occur at the beginning of a sentence: "starting // punctuation" and quotation marks "start=[[:Ps:]\\\"\\\'];" // punctuation with may occur at the end of a sentence: "ending punctuation" // and quotation marks "end=[[:Pe:]\\\"\\\'];" // digits "digit=[:N:];" // characters that unambiguously signal the end of a sentence "term=[\\!\\?\u3002\uff01\uff1f];" // periods, which MAY signal the end of a sentence "period=[\\.\uff0e];" // characters that may occur at the beginning of a sentence: basically anything // not mentioned above (lowercase letters and digits are specifically excluded) "sent-start=[^{lc}{ucLatin}{space}{start}{end}{digit}{term}{period}\u2029{$ignore}];" // Hindi phrase separator "danda=[\u0964\u0965];" // always break sentences after paragraph separators ".*?\u2029?;" // always break after a danda, if it's followed by whitespace ".*?{danda}{space}*;" // if you see a period, skip over additional periods and ending punctuation // and if the next character is a paragraph separator, break after the // paragraph separator ".*?{period}[{period}{end}]*{space}*\u2029;" // if you see a period, skip over additional periods and ending punctuation, // followed by optional whitespace, followed by optional starting punctuation, // and if the next character is something that can start a sentence // (basically, a capital letter), then put the sentence break between the // whitespace and the opening punctuation ".*?{period}[{period}{end}]*{space}*/({start}*{sent-start}|{start}+{ucLatin});" // same as above, except that there's a sentence break before a Latin capital // letter only if there's at least one space after the period ".*?{period}[{period}{end}]*{space}+/{ucLatin};" // if you see a sentence-terminating character, skip over any additional // terminators, periods, or ending punctuation, followed by any whitespace, // followed by a SINGLE optional paragraph separator, and put the break there ".*?{term}[{term}{period}{end}]*{space}*\u2029?;" // The following rules are here to aid in backwards iteration. The automatically // generated backwards state table will rewind to the beginning of the // paragraph all the time (or all the way to the beginning of the document // if the document doesn't use the Unicode PS character) because the only // unambiguous character pairs are those involving paragraph separators. // These specify a few more unambiguous breaking situations. // if you see a sentence-starting character, followed by starting punctuation // (remember, we're iterating backwards), followed by an optional run of // whitespace, followed by an optional run of ending punctuation, followed // by a period, this is a safe place to turn around "![{sent-start}{ucLatin}]{start}*{space}+{end}*{period};" // if you see a letter or a digit, followed by an optional run of // starting punctuation, followed by an optional run of whitespace, // followed by an optional run of ending punctuation, followed by // a sentence terminator, this is a safe place to turn around "![{sent-start}{lc}{digit}]{start}*{space}*{end}*{term};" } //------------------------------------------------------------ // END BreakIterator support //------------------------------------------------------------ AmPmMarkers { "AM", "PM", } Countries { AF { "Afghanistan" } AL { "Albania" } DZ { "Algeria" } AD { "Andorra" } AO { "Angola" } AI { "Anguilla" } AR { "Argentina" } AM { "Armenia" } AW { "Aruba" } AU { "Australia" } AT { "Austria" } AZ { "Azerbaijan" } BS { "Bahamas" } BH { "Bahrain" } BD { "Bangladesh" } BB { "Barbados" } BY { "Belarus" } BE { "Belgium" } BZ { "Belize" } BJ { "Benin" } BM { "Bermuda" } BT { "Bhutan" } BO { "Bolivia" } BA { "Bosnia and Herzegovina" } BW { "Botswana" } BR { "Brazil" } BN { "Brunei" } BG { "Bulgaria" } BF { "Burkina Faso" } BI { "Burundi" } KH { "Cambodia" } CM { "Cameroon" } CA { "Canada" } CV { "Cape Verde" } CF { "Central African Republic" } TD { "Chad" } CL { "Chile" } CN { "China" } CO { "Colombia" } KM { "Comoros" } CG { "Congo" } CR { "Costa Rica" } CI { "C\u00F4te d'Ivoire" } HR { "Croatia" } CU { "Cuba" } CY { "Cyprus" } CZ { "Czech Republic" } DK { "Denmark" } DJ { "Djibouti" } DM { "Dominica" } DO { "Dominican Republic" } TP { "East Timor" } EC { "Ecuador" } EG { "Egypt" } SV { "El Salvador" } GQ { "Equatorial Guinea" } ER { "Eritrea" } EE { "Estonia" } ET { "Ethiopia" } FJ { "Fiji" } FI { "Finland" } FR { "France" } GF { "French Guiana" } PF { "French Polynesia" } TF { "French Southern Territories" } GA { "Gabon" } GM { "Gambia" } GE { "Georgia" } DE { "Germany" } GH { "Ghana" } GR { "Greece" } GP { "Guadeloupe" } GT { "Guatemala" } GN { "Guinea" } GW { "Guinea-Bissau" } GY { "Guyana" } HT { "Haiti" } HN { "Honduras" } HK { "Hong Kong" } HU { "Hungary" } IS { "Iceland" } IN { "India" } ID { "Indonesia" } IR { "Iran" } IQ { "Iraq" } IE { "Ireland" } IL { "Israel" } IT { "Italy" } JM { "Jamaica" } JP { "Japan" } JO { "Jordan" } KZ { "Kazakhstan" } KE { "Kenya" } KI { "Kiribati" } KP { "North Korea" } KR { "South Korea" } KW { "Kuwait" } KG { "Kyrgyzstan" } LA { "Laos" } LV { "Latvia" } LB { "Lebanon" } LS { "Lesotho" } LR { "Liberia" } LY { "Libya" } LI { "Liechtenstein" } LT { "Lithuania" } LU { "Luxembourg" } MK { "Macedonia" } MG { "Madagascar" } MY { "Malaysia" } ML { "Mali" } MT { "Malta" } MQ { "Martinique" } MR { "Mauritania" } MU { "Mauritius" } YT { "Mayotte" } MX { "Mexico" } FM { "Micronesia" } MD { "Moldova" } MC { "Monaco" } MN { "Mongolia" } MS { "Montserrat" } MA { "Morocco" } MZ { "Mozambique" } MM { "Myanmar" } NA { "Namibia" } NP { "Nepal" } NL { "Netherlands" } AN { "Netherlands Antilles" } NC { "New Caledonia" } NZ { "New Zealand" } NI { "Nicaragua" } NE { "Niger" } NG { "Nigeria" } NU { "Niue" } NO { "Norway" } OM { "Oman" } PK { "Pakistan" } PA { "Panama" } PG { "Papua New Guinea" } PY { "Paraguay" } PE { "Peru" } PH { "Philippines" } PL { "Poland" } PT { "Portugal" } PR { "Puerto Rico" } QA { "Qatar" } RO { "Romania" } RU { "Russia" } RW { "Rwanda" } SA { "Saudi Arabia" } SN { "Senegal" } SP { "Serbia" } SC { "Seychelles" } SL { "Sierra Leone" } SG { "Singapore" } SK { "Slovakia" } SI { "Slovenia" } SO { "Somalia" } ZA { "South Africa" } ES { "Spain" } LK { "Sri Lanka" } SD { "Sudan" } SR { "Suriname" } SZ { "Swaziland" } SE { "Sweden" } CH { "Switzerland" } SY { "Syria" } TW { "Taiwan" } TJ { "Tajikistan" } TZ { "Tanzania" } TH { "Thailand" } TG { "Togo" } TK { "Tokelau" } TO { "Tonga" } TT { "Trinidad and Tobago" } TN { "Tunisia" } TR { "Turkey" } TM { "Turkmenistan" } UG { "Uganda" } UA { "Ukraine" } AE { "United Arab Emirates" } GB { "United Kingdom" } US { "United States" } UY { "Uruguay" } UZ { "Uzbekistan" } VU { "Vanuatu" } VA { "Vatican" } VE { "Venezuela" } VN { "Vietnam" } VG { "British Virgin Islands" } VI { "U.S. Virgin Islands" } EH { "Western Sahara" } YE { "Yemen" } YU { "Yugoslavia" } ZR { "Zaire" } ZM { "Zambia" } ZW { "Zimbabwe" } } CurrencyElements { "\u00A4", "XXX", "", } DateTimeElements { "1", "1", } DateTimePatterns { "h:mm:ss a z", "h:mm:ss a z", "h:mm:ss a", "h:mm a", "EEEE, MMMM d, yyyy", "MMMM d, yyyy", "MMM d, yyyy", "M/d/yy", "{1} {0}", } DayAbbreviations { "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", } DayNames { "Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", } Eras { "BC", "AD", } Languages { ab { "Abkhazian" } aa { "Afar" } af { "Afrikaans" } sq { "Albanian" } am { "Amharic" } ar { "Arabic" } hy { "Armenian" } as { "Assamese" } ay { "Aymara" } az { "Azerbaijani" } ba { "Bashkir" } eu { "Basque" } bn { "Bengali" } dz { "Bhutani" } bh { "Bihari" } bi { "Bislama" } br { "Breton" } bg { "Bulgarian" } my { "Burmese" } be { "Byelorussian" } km { "Cambodian" } ca { "Catalan" } zh { "Chinese" } co { "Corsican" } hr { "Croatian" } cs { "Czech" } da { "Danish" } nl { "Dutch" } en { "English" } eo { "Esperanto" } et { "Estonian" } fo { "Faeroese" } fj { "Fiji" } fi { "Finnish" } fr { "French" } fy { "Frisian" } gl { "Galician" } ka { "Georgian" } de { "German" } el { "Greek" } kl { "Greenlandic" } gn { "Guarani" } gu { "Gujarati" } ha { "Hausa" } he { "Hebrew" } iw { "Hebrew" } hi { "Hindi" } hu { "Hungarian" } is { "Icelandic" } id { "Indonesian" } in { "Indonesian" } ia { "Interlingua" } ie { "Interlingue" } iu { "Inukitut" } ik { "Inupiak" } ga { "Irish" } it { "Italian" } ja { "Japanese" } jw { "Javanese" } kn { "Kannada" } ks { "Kashmiri" } kk { "Kazakh" } rw { "Kinyarwanda" } ky { "Kirghiz" } rn { "Kirundi" } ko { "Korean" } ku { "Kurdish" } lo { "Laothian" } la { "Latin" } lv { "Latvian (Lettish)" } ln { "Lingala" } lt { "Lithuanian" } mk { "Macedonian" } mg { "Malagasy" } ms { "Malay" } ml { "Malayalam" } mt { "Maltese" } mi { "Maori" } mr { "Marathi" } mo { "Moldavian" } mn { "Mongolian" } na { "Nauru" } ne { "Nepali" } no { "Norwegian" } oc { "Occitan" } or { "Oriya" } om { "Oromo (Afan)" } ps { "Pashto (Pushto)" } fa { "Persian" } pl { "Polish" } pt { "Portuguese" } pa { "Punjabi" } qu { "Quechua" } rm { "Rhaeto-Romance" } ro { "Romanian" } ru { "Russian" } sm { "Samoan" } sg { "Sangro" } sa { "Sanskrit" } gd { "Scots Gaelic" } sr { "Serbian" } sh { "Serbo-Croatian" } st { "Sesotho" } tn { "Setswana" } sn { "Shona" } sd { "Sindhi" } si { "Singhalese" } ss { "Siswati" } sk { "Slovak" } sl { "Slovenian" } so { "Somali" } es { "Spanish" } su { "Sundanese" } sw { "Swahili" } sv { "Swedish" } tl { "Tagalog" } tg { "Tajik" } ta { "Tamil" } tt { "Tatar" } te { "Telugu" } th { "Thai" } bo { "Tibetan" } ti { "Tigrinya" } to { "Tonga" } ts { "Tsonga" } tr { "Turkish" } tk { "Turkmen" } tw { "Twi" } ug { "Uighur" } uk { "Ukrainian" } ur { "Urdu" } uz { "Uzbek" } vi { "Vietnamese" } vo { "Volapuk" } cy { "Welsh" } wo { "Wolof" } xh { "Xhosa" } ji { "Yiddish" } yi { "Yiddish" } yo { "Yoruba" } za { "Zhuang" } zu { "Zulu" } } LocaleID { "0409" } LocaleString { "en" } MonthAbbreviations { "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec", "", } MonthNames { "January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December", "", } NumberElements { ".", ",", ";", "%", "0", "#", "-", "E", "\u2030", "\u221E", "\uFFFD", } NumberPatterns { "#,##0.###;-#,##0.###", "\u00A4 #,##0.00;-\u00A4 #,##0.00", "#,##0%", } ShortCountry { "" } ShortLanguage { "eng" } localPatternChars { "GyMdkHmsSEDFwWahKz" } zoneStrings { { "PST", "Pacific Standard Time", "PST", "Pacific Daylight Time", "PDT", "San Francisco", } { "MST", "Mountain Standard Time", "MST", "Mountain Daylight Time", "MDT", "Denver", } { "PNT", "Mountain Standard Time", "MST", "Mountain Standard Time", "MST", "Phoenix", } { "CST", "Central Standard Time", "CST", "Central Daylight Time", "CDT", "Chicago", } { "EST", "Eastern Standard Time", "EST", "Eastern Daylight Time", "EDT", "New York", } { "IET", "Eastern Standard Time", "EST", "Eastern Standard Time", "EST", "Indianapolis", } { "PRT", "Atlantic Standard Time", "AST", "Atlantic Daylight Time", "ADT", "Halifax", } { "HST", "Hawaii Standard Time", "HST", "Hawaii Daylight Time", "HDT", "Honolulu", } { "AST", "Alaska Standard Time", "AST", "Alaska Daylight Time", "ADT", "Anchorage", } } }