scuffed-code/icu4c/source/data/locales/root.txt

// *******************************************************************************
// *
// *   Copyright (C) 1997-2001, International Business Machines
// *   Corporation and others.  All Rights Reserved.
// *
// *******************************************************************************
//  Date        Name        Description
//  11/17/99    aliu        Added support for transliterators.

// Please search for 'DUP FIXME' - duplicate lang/country names commented out


root {
    Version { "3.1.1" }

    // each variant name that occurs in locales should be listed with %% and a display string
    "%%EURO" { "Euro" }         // Euro variant display name
    "%%B"    { "Bokm\u00e5l" }  // Norwegian variant display name
    "%%NY"   { "Nynorsk" }      // Norwegian variant display name
    "%%AL"   { "\u00C5land" }   // Aland variant display name

    "%%POSIX" { "POSIX" }

    // variants for collation (traditional may also be used for traditional chinese)
    "%%PHONEBOOK" { "Phonebook Order" }
    "%%PINYIN" { "Pinyin Order" }
    "%%TRADITIONAL" { "Traditional" }
    "%%STROKE" { "Stroke Order" }
    "%%DIRECT" { "Direct Order" }

    // this is a special tag that makes genrb include UCARules.txt for collation
    "%%UCARULES" { "UCARules.txt"} // UCARules

    // no collation elements any more
    // CollationElements {
    //	    Version { "1.0" }
    //		Override { "FALSE" }
    //		Sequence { "" }
    // }
    // Formats for the display name of a locale, for a list of
    // items, and for composing two items in a list into one item.
    // The list patterns are used in the variant name and in the
    // full display name.
    //
    // This is the language-neutral form of this resource.
    //
    LocaleNamePatterns {
         {
            "{0,choice,0#|1#{1}|2#{1} ({2})}", // Display name
            "{0,choice,0#|1#{1}|2#{1},{2}|3#{1},{2},{3}}", // List
            "{0},{1}" // List composition
         }
    }

    //------------------------------------------------------------
    // BEGIN Transliterator support
    //------------------------------------------------------------

    // See also icu/data/translit/index.txt

    TransliteratorNamePattern {
        // Format for the display name of a Transliterator.
        // This is the language-neutral form of this resource.
        "{0,choice,0#|1#{1}|2#{1}-{2}}" // Display name
    }

    //------------------------------------------------------------
    // END Transliterator support
    //------------------------------------------------------------

    //------------------------------------------------------------
    // BEGIN BreakIterator support
    //------------------------------------------------------------

    CharacterBreakRules {
        // ignore non-spacing marks and enclosing marks (since we never
        // put a break before ignore characters, this keeps combining
        // accents with the base characters they modify)
        "$ignore=[[:Mn:][:Me:]];"

        // other category definitions
        "choseong=[\u1100-\u115f];"
        "jungseong=[\u1160-\u11a7];"
        "jongseong=[\u11a8-\u11ff];"
        "surr-hi=[\ud800-\udbff];"
        "surr-lo=[\udc00-\udfff];"

        // break after every character, except as follows:
        ".;"

        // keep CRLF sequences together
        "\r\n;"

        // keep surrogate pairs together
        "{surr-hi}{surr-lo};"

        // keep Hangul syllables spelled out using conjoining jamo together
        "{choseong}*{jungseong}*{jongseong}*;"

        // various additions for Hindi support
        "nukta=[\u093c];"
        "danda=[\u0964\u0965];"
        "virama=[\u094d];"
        "devVowelSign=[\u093e-\u094c\u0962\u0963];"
        "devConsonant=[\u0915-\u0939];"
        "devNuktaConsonant=[\u0958-\u095f];"
        "devCharEnd=[\u0902\u0903\u0951-\u0954];"
        "zwj=[\u200d];"

        "devCAMN=({devConsonant}{nukta}?);"
        "devConsonant1=({devNuktaConsonant}|{devCAMN});"
        "devConjunct=(({devConsonant1}{virama}{zwj}?)?{devConsonant1});"

        "{devConjunct}{devVowelSign}?{devCharEnd}?;"
        "{danda}{nukta};"
    }

    // default rules for finding word boundaries
    WordBreakRules {
        // ignore non-spacing marks, enclosing marks, and format characters,
        // all of which should not influence the algorithm
        "$ignore=[[:Mn:][:Me:][:Cf:]];"

        // Hindi phrase separator, kanji, katakana, hiragana, CJK diacriticals,
        // other letters, and digits
        "danda=[\u0964\u0965];"
        "kanji=[\u3005\u4e00-\u9fa5\uf900-\ufa2d];"
        "kata=[\u3099-\u309c\u30a1-\u30fe];"
        "hira=[\u3041-\u309e\u30fc];"
        "let=[[[:L:][:Mc:]]-[{kanji}{kata}{hira}]];"
        "dgt=[:N:];"

        // punctuation that can occur in the middle of a word: currently
        // dashes, apostrophes, quotation marks, and periods
        "mid-word=[[:Pd:]\u00ad\u2027\\\"\\\'\\.];"

        // punctuation that can occur in the middle of a number: currently
        // apostrophes, qoutation marks, periods, commas, and the Arabic
        // decimal point
        "mid-num=[\\\"\\\'\\,\u066b\\.];"

        // punctuation that can occur at the beginning of a number: currently
        // the period, the number sign, and all currency symbols except the cents sign
        "pre-num=[[[:Sc:]-[\u00a2]]\\#\\.];"

        // punctuation that can occur at the end of a number: currently
        // the percent, per-thousand, per-ten-thousand, and Arabic percent
        // signs, the cents sign, and the ampersand
        "post-num=[\\%\\&\u00a2\u066a\u2030\u2031];"

        // line separators: currently LF, FF, PS, and LS
        "ls=[\n\u000c\u2028\u2029];"

        // whitespace: all space separators and the tab character
        "ws=[[:Zs:]\t];"

        // a word is a sequence of letters that may contain internal
        // punctuation, as long as it begins and ends with a letter and
        // never contains two punctuation marks in a row
        "word=({let}+({mid-word}{let}+)*{danda}?);"

        // a number is a sequence of digits that may contain internal
        // punctuation, as long as it begins and ends with a digit and
        // never contains two punctuation marks in a row.
        "number=({dgt}+({mid-num}{dgt}+)*);"

        // break after every character, with the following exceptions
        // (this will cause punctuation marks that aren't considered
        // part of words or numbers to be treated as words unto themselves)
        ".;"

        // keep together any sequence of contiguous words and numbers
        // (including just one of either), plus an optional trailing
        // number-suffix character
        "{word}?({number}{word})*({number}{post-num}?)?;"

        // keep together and sequence of contiguous words and numbers
        // that starts with a number-prefix character and a number,
        // and may end with a number-suffix character
        "{pre-num}({number}{word})*({number}{post-num}?)?;"

        // keep together runs of whitespace (optionally with a single trailing
        // line separator or CRLF sequence)
        "{ws}*\r?{ls}?;"

        // keep together runs of Katakana
        "{kata}*;"

        // keep together runs of Hiragana
        "{hira}*;"

        // keep together runs of Kanji
        "{kanji}*;"
    }

    // default rules for determining legal line-breaking positions
    LineBreakRules {
        // ignore non-spacing marks, enclosing marks, and format characters
        "$ignore=[[:Mn:][:Me:][:Cf:]];"

        // Hindi phrase separators
        "danda=[\u0964\u0965];"

        // characters that always cause a break: ETX, tab, LF, FF, LS, and PS
        "break=[\u0003\t\n\f\u2028\u2029];"

        // characters that always prevent a break: the non-breaking space
        // and similar characters
        "nbsp=[\u00a0\u2007\u2011\ufeff];"

        // whitespace: space separators and control characters, except for
        // CR and the other characters mentioned above
        "space=[[[:Zs:][:Cc:]]-[{nbsp}{break}\r]];"

        // dashes: dash punctuation and the discretionary hyphen, except for
        // non-breaking hyphens
        "dash=[[[:Pd:]\u00ad]-[{nbsp}]];"

        // characters that stick to a word if they precede it: currency symbols
        // (except the cents sign) and starting punctuation
        "pre-word=[[[:Sc:]-[\u00a2]][:Ps:]\\\"\\\'];"

        // characters that stick to a word if they follow it: ending punctuation,
        // other punctuation that usually occurs at the end of a sentence,
        // small Kana characters, some CJK diacritics, etc.
        "post-word=[[:Pe:]\\!\\\"\\\'\\%\\.\\,\\:\\;\\?\u00a2\u00b0\u066a\u2030-\u2034"
                "\u2103\u2105\u2109\u3001\u3002\u3005\u3041\u3043\u3045\u3047\u3049\u3063"
                "\u3083\u3085\u3087\u308e\u3099-\u309e\u30a1\u30a3\u30a5\u30a7\u30a9"
                "\u30c3\u30e3\u30e5\u30e7\u30ee\u30f5\u30f6\u30fc-\u30fe\uff01\uff0c"
                "\uff0e\uff1f];"

        // Kanji: actually includes both Kanji and Kana, except for small Kana and
        // CJK diacritics
        "kanji=[[\u4e00-\u9fa5\uf900-\ufa2d\u3041-\u3094\u30a1-\u30fa]-[{post-word}{$ignore}]];"

        // digits
        "digit=[[:Nd:][:No:]];"

        // punctuation that can occur in the middle of a number: periods and commas
        "mid-num=[\\.\\,];"

        // everything not mentioned above, plus the quote marks (which are both
        // <pre-word>, <post-word>, and <char>)
        "char=[^{break}{space}{dash}{kanji}{nbsp}{$ignore}{pre-word}{post-word}{mid-num}{danda}\r\\\"\\\'];"

        // a "number" is a run of prefix characters and dashes, followed by one or
        // more digits with isolated number-punctuation characters interspersed
        "number=([{pre-word}{dash}]*{digit}+({mid-num}{digit}+)*);"

        // the basic core of a word can be either a "number" as defined above, a single
        // "Kanji" character, or a run of any number of not-explicitly-mentioned
        // characters (this includes Latin letters)
        "word-core=([{pre-word}{char}]*|{kanji}|{number});"

        // a word may end with an optional suffix that be either a run of one or
        // more dashes or a run of word-suffix characters, followed by an optional
        // run of whitespace
        "word-suffix=(({dash}+|{post-word}*){space}*);"

        // a word, thus, is an optional run of word-prefix characters, followed by
        // a word core and a word suffix (the syntax of <word-core> and <word-suffix>
        // actually allows either of them to match the empty string, putting a break
        // between things like ")(" or "aaa(aaa"
        "word=({pre-word}*{word-core}{word-suffix});"

        // finally, the rule that does the work: Keep together any run of words that
        // are joined by runs of one of more non-spacing mark.  Also keep a trailing
        // line-break character or CRLF combination with the word.  (line separators
        // "win" over nbsp's)
        "{word}({nbsp}+{word})*\r?{break}?;"
    }

    // default rules for finding sentence boundaries
    SentenceBreakRules {
        // ignore non-spacing marks, enclosing marks, and format characters
        "$ignore=[[:Mn:][:Me:][:Cf:]];"

        // lowercase letters
        "lc=[:Ll:];"

        // uppercase Latin letters
        "ucLatin=[A-Z];"

        // whitespace (line separators are treated as whitespace)
        "space=[\t\r\f\n\u2028[:Zs:]];"

        // punctuation which may occur at the beginning of a sentence: "starting
        // punctuation" and quotation marks
        "start=[[:Ps:]\\\"\\\'];"

        // punctuation with may occur at the end of a sentence: "ending punctuation"
        // and quotation marks
        "end=[[:Pe:]\\\"\\\'];"

        // digits
        "digit=[:N:];"

        // characters that unambiguously signal the end of a sentence
        "term=[\\!\\?\u3002\uff01\uff1f];"

        // periods, which MAY signal the end of a sentence
        "period=[\\.\uff0e];"

        // characters that may occur at the beginning of a sentence: basically anything
        // not mentioned above (lowercase letters and digits are specifically excluded)
        "sent-start=[^{lc}{ucLatin}{space}{start}{end}{digit}{term}{period}\u2029{$ignore}];"

        // Hindi phrase separator
        "danda=[\u0964\u0965];"

        // always break sentences after paragraph separators
        ".*?\u2029?;"

        // always break after a danda, if it's followed by whitespace
        ".*?{danda}{space}*;"

        // if you see a period, skip over additional periods and ending punctuation
        // and if the next character is a paragraph separator, break after the
        // paragraph separator
        ".*?{period}[{period}{end}]*{space}*\u2029;"

        // if you see a period, skip over additional periods and ending punctuation,
        // followed by optional whitespace, followed by optional starting punctuation,
        // and if the next character is something that can start a sentence
        // (basically, a capital letter), then put the sentence break between the
        // whitespace and the opening punctuation
        ".*?{period}[{period}{end}]*{space}*/({start}*{sent-start}|{start}+{ucLatin});"

        // same as above, except that there's a sentence break before a Latin capital
        // letter only if there's at least one space after the period
        ".*?{period}[{period}{end}]*{space}+/{ucLatin};"

        // if you see a sentence-terminating character, skip over any additional
        // terminators, periods, or ending punctuation, followed by any whitespace,
        // followed by a SINGLE optional paragraph separator, and put the break there
        ".*?{term}[{term}{period}{end}]*{space}*\u2029?;"

        // The following rules are here to aid in backwards iteration.  The automatically
        // generated backwards state table will rewind to the beginning of the
        // paragraph all the time (or all the way to the beginning of the document
        // if the document doesn't use the Unicode PS character) because the only
        // unambiguous character pairs are those involving paragraph separators.
        // These specify a few more unambiguous breaking situations.

        // if you see a sentence-starting character, followed by starting punctuation
        // (remember, we're iterating backwards), followed by an optional run of
        // whitespace, followed by an optional run of ending punctuation, followed
        // by a period, this is a safe place to turn around
        "![{sent-start}{ucLatin}]{start}*{space}+{end}*{period};"

        // if you see a letter or a digit, followed by an optional run of
        // starting punctuation, followed by an optional run of whitespace,
        // followed by an optional run of ending punctuation, followed by
        // a sentence terminator, this is a safe place to turn around
        "![{sent-start}{lc}{digit}]{start}*{space}*{end}*{term};"
    }

    //------------------------------------------------------------
    // END BreakIterator support
    //------------------------------------------------------------

    AmPmMarkers {
            "AM",
            "PM",
    }
    Countries {
        AF { "Afghanistan" }
        AL { "Albania" }
        DZ { "Algeria" }
        AS { "American Samoa" }
        AD { "Andorra" }
        AO { "Angola" }
        AI { "Anguilla" }
        AR { "Argentina" }
        AM { "Armenia" }
        AW { "Aruba" }
        AU { "Australia" }
        AT { "Austria" }
        AZ { "Azerbaijan" }
        BS { "Bahamas" }
        BH { "Bahrain" }
        BD { "Bangladesh" }
        BB { "Barbados" }
        BY { "Belarus" }
        BE { "Belgium" }
        BZ { "Belize" }
        BJ { "Benin" }
        BM { "Bermuda" }
        BT { "Bhutan" }
        BO { "Bolivia" }
        BA { "Bosnia and Herzegovina" }
        BW { "Botswana" }
        BR { "Brazil" }
        BN { "Brunei" }
        BG { "Bulgaria" }
        BF { "Burkina Faso" }
        BI { "Burundi" }
        KH { "Cambodia" }
        CM { "Cameroon" }
        CA { "Canada" }
        CV { "Cape Verde" }
        CF { "Central African Republic" }
        TD { "Chad" }
        CL { "Chile" }
        CN { "China" }
        CO { "Colombia" }
        KM { "Comoros" }
        CG { "Congo" }
        CR { "Costa Rica" }
        CI { "C\u00F4te d'Ivoire" }
        HR { "Croatia" }
        CU { "Cuba" }
        CY { "Cyprus" }
        CZ { "Czech Republic" }
        DK { "Denmark" }
        DJ { "Djibouti" }
        DM { "Dominica" }
        DO { "Dominican Republic" }
        TP { "East Timor" }
        EC { "Ecuador" }
        EG { "Egypt" }
        SV { "El Salvador" }
        GQ { "Equatorial Guinea" }
        ER { "Eritrea" }
        EE { "Estonia" }
        ET { "Ethiopia" }
        FJ { "Fiji" }
        FI { "Finland" }
        FO { "Faroe Islands" } // http://www.din.de/gremien/nas/nabd/iso3166ma/codlstp1/en_listp1.html
        FR { "France" }
        GF { "French Guiana" }
        GL { "Greenland" } // http://www.din.de/gremien/nas/nabd/iso3166ma/codlstp1/en_listp1.html
        PF { "French Polynesia" }
        TF { "French Southern Territories" }
        GA { "Gabon" }
        GM { "Gambia" }
        GE { "Georgia" }
        DE { "Germany" }
        GH { "Ghana" }
        GR { "Greece" }
        GP { "Guadeloupe" }
        GU { "Guam" }
        GT { "Guatemala" }
        GN { "Guinea" }
        GW { "Guinea-Bissau" }
        GY { "Guyana" }
        HT { "Haiti" }
        HN { "Honduras" }
        HK { "Hong Kong S.A.R., China" }
        HU { "Hungary" }
        IS { "Iceland" }
        IN { "India" }
        ID { "Indonesia" }
        IR { "Iran" }
        IQ { "Iraq" }
        IE { "Ireland" }
        IL { "Israel" }
        IT { "Italy" }
        JM { "Jamaica" }
        JP { "Japan" }
        JO { "Jordan" }
        KZ { "Kazakhstan" }
        KE { "Kenya" }
        KI { "Kiribati" }
        KP { "North Korea" }
        KR { "South Korea" }
        KW { "Kuwait" }
        KG { "Kyrgyzstan" }
        LA { "Laos" }
        LV { "Latvia" }
        LB { "Lebanon" }
        LS { "Lesotho" }
        LR { "Liberia" }
        LY { "Libya" }
        LI { "Liechtenstein" }
        LT { "Lithuania" }
        LU { "Luxembourg" }
        MK { "Macedonia" }
        MG { "Madagascar" }
        MY { "Malaysia" }
        ML { "Mali" }
        MT { "Malta" }
        MP { "Northern Mariana Islands"}
        MH { "Marshall Islands"}
        MQ { "Martinique" }
        MR { "Mauritania" }
        MU { "Mauritius" }
        YT { "Mayotte" }
        MX { "Mexico" }
        FM { "Micronesia" }
        MD { "Moldova" }
        MC { "Monaco" }
        MN { "Mongolia" }
        MS { "Montserrat" }
        MA { "Morocco" }
        MZ { "Mozambique" }
        MM { "Myanmar" }
        NA { "Namibia" }
        NP { "Nepal" }
        NL { "Netherlands" }
        AN { "Netherlands Antilles" }
        NC { "New Caledonia" }
        NZ { "New Zealand" }
        NI { "Nicaragua" }
        NE { "Niger" }
        NG { "Nigeria" }
        NU { "Niue" }
        NO { "Norway" }
        OM { "Oman" }
        PK { "Pakistan" }
        PA { "Panama" }
        PG { "Papua New Guinea" }
        PY { "Paraguay" }
        PE { "Peru" }
        PH { "Philippines" }
        PL { "Poland" }
        PT { "Portugal" }
        PR { "Puerto Rico" }
        QA { "Qatar" }
        RO { "Romania" }
        RU { "Russia" }
        RW { "Rwanda" }
        SA { "Saudi Arabia" }
        SN { "Senegal" }
        SP { "Serbia" }
        SC { "Seychelles" }
        SL { "Sierra Leone" }
        SG { "Singapore" }
        SK { "Slovakia" }
        SI { "Slovenia" }
        SO { "Somalia" }
        ZA { "South Africa" }
        ES { "Spain" }
        LK { "Sri Lanka" }
        SD { "Sudan" }
        SR { "Suriname" }
        SZ { "Swaziland" }
        SE { "Sweden" }
        CH { "Switzerland" }
        SY { "Syria" }
        TW { "Taiwan" }
        TJ { "Tajikistan" }
        TZ { "Tanzania" }
        TH { "Thailand" }
        TG { "Togo" }
        TK { "Tokelau" }
        TO { "Tonga" }
        TT { "Trinidad and Tobago" }
        TN { "Tunisia" }
        TR { "Turkey" }
        TM { "Turkmenistan" }
        UG { "Uganda" }
        UA { "Ukraine" }
        AE { "United Arab Emirates" }
        GB { "United Kingdom" }
        US { "United States" }
        UY { "Uruguay" }
        UZ { "Uzbekistan" }
        VU { "Vanuatu" }
        VA { "Vatican" }
        VE { "Venezuela" }
        VN { "Vietnam" }
        VG { "British Virgin Islands" }
        VI { "U.S. Virgin Islands" }
        EH { "Western Sahara" }
        YE { "Yemen" }
        YU { "Yugoslavia" }
        ZR { "Zaire" }
        ZM { "Zambia" }
        ZW { "Zimbabwe" }
    }
    CurrencyElements {
            "\u00A4",
            "XXX",
            "",
    }
    DateTimeElements {
            "1",
            "1",
    }
    DateTimePatterns {
            "h:mm:ss a z",
            "h:mm:ss a z",
            "h:mm:ss a",
            "h:mm a",
            "EEEE, MMMM d, yyyy",
            "MMMM d, yyyy",
            "MMM d, yyyy",
            "M/d/yy",
            "{1} {0}",
    }
    DayAbbreviations {
            "Sun",
            "Mon",
            "Tue",
            "Wed",
            "Thu",
            "Fri",
            "Sat",
    }
    DayNames {
            "Sunday",
            "Monday",
            "Tuesday",
            "Wednesday",
            "Thursday",
            "Friday",
            "Saturday",
    }
    Eras {
            "BC",
            "AD",
    }
    Languages {
        aa  { "Afar" }
        ab  { "Abkhazian" }
        ace { "Achinese" }
        ach { "Acoli" }
        ada { "Adangme" }
        ae  { "Avestan" }
        af  { "Afrikaans" }
        afa { "Afro-Asiatic (Other)" }
        afh { "Afrihili" }
        aka { "Akan" }
        akk { "Akkadien" }
        ale { "Aleut" }
        alg {  "Algonquian Languages" }
        am  { "Amharic" }
        ang { "English, Old (ca.450-1100)" }
        apa { "Apache Languages"}
        ar  { "Arabic" }
        arc { "Aramaic" }
        arn { "Araucanian" }
        arp { "Arapaho" }
        art { "Artificial (Other)" }
        arw { "Arawak" }
        as  { "Assamese" }
        ath { "Athapaskan Languages" }
        aus { "Australian Languages" }
        ava { "Avaric" }
        awa { "Awadhi" }
        ay  { "Aymara" }
        az  { "Azerbaijani" }
        ba  { "Bashkir" }
        bad { "Banda" }
        bai { "Bamileke Languages" }
        bal { "Baluuchi" }
        bam { "Bambara" }
        ban { "Balinese" }
        bas { "Basa" }
        bat { "Baltic (Other)" }
        be  { "Belarusian" }
        bej { "Beja" }
        bem { "Bemba" }
        ber { "Beber" }
        bg  { "Bulgarian" }
        bh  { "Bihari" }
        bho { "Bhojpuri" }
        bi  { "Bislama" }
        bik { "Bikol" }
        bin { "Bini" }
        bla { "Siksika" }
        bn  { "Bengali" }
        bnt { "Bantu" }
        bo  { "Tibetan" }
        br  { "Brenton" }
        bra { "Braj" }
        bs  { "Bosnian"}
        btk { "Batak" }
        bua { "Buriat" }
        bug { "Buginese" }
        ca  { "Catalan" }
        cad { "Caddo" }
        cai { "Central American Indian (Other)" }
        car { "Carib" }
        cau { "Caucasian (Other) " }
        ce  { "Chechen" }
        ceb { "Cebuano" }
        cel { "Celtic (Other)" }
        ch  { "Chamorro" }
        chb { "Chibcha" }
        chg { "Chagatai" }
        chk { "Chuukese" }
        chm { "Mari" }
        chn { "Chinook Jargon" }
        cho { "Choctaw" }
        chp { "Chipewyan" }
        chr { "Cherokee" }
        chy { "Cheyenne" }
        cmc { "Chamic Languages" }
        co  { "Corsican" }
        cop { "Coptic" }
        cpe { "Creoles and Pidgins, English-based (Other)" }
        cpf { "Creoles and Pidgins, French-based (Other)" }
        cre { "Cree" }
        crp { "Creoles and Pidgins (Other)" }
        cs  { "Czech" }
        cu  { "Church Slavic" }
        cus { "Cushitic (Other)" }
        cv  { "Chuvash" }
        cy  { "Welsh" }
        da  { "Danish" }
        dak { "Dakota" }
        day { "Dayak" }
        de  { "German" }
        del { "Delaware" }
        den { "Slave" }
        dgr { "Dogrib" }
        din { "Dinka" }
        div { "Divehi" }
        doi { "Dogri" }
        dra { "Dravidian (Other)" }
        dua { "Duala" }
        dum { "Dutch, Middle (ca. 1050-1350)" }
        dyu { "Dyula" }
        dz  { "Bhutani" }
        dzo { "Dzongkha" }
        efi { "Efik" }
        egy { "Egyptian (Ancient)" }
        eka { "Ekajuk" }
        el  { "Greek" } //  Modern (1453-)
        elx { "Elamite" }
        en  { "English" }
        enm { "English, Middle( 1100 -1500)" }
        eo  { "Esperanto" }
        es  { "Spanish" }
        et  { "Estonian" }
        eu  { "Basque" }
        ewe { "Ewe" }
        ewo { "Ewondo" }
        fa  { "Persian" }
        fan { "Fang" }
        fat { "Fanti" }
        fi  { "Finnish" }
        fiu { "Finno - Ugrian" }
        fj  { "Fiji" }
        fo  { "Faeroese" }
        fon { "Fon" }
        fr  { "French" }
        frm { "French, Middle (ca.1400-1600)" }
        fro { "French, Old (842-ca.1400)" }
        ful { "Fulah" }
        fur { "Friulian" }
        fy  { "Frisian" }
        ga  { "Irish" }
        gaa { "Ga" }
        gay { "Gayo" }
        gba {"Gbaya" }
        gd  { "Scots Gaelic" }
        gem { "Germanic (Other)" }
        gil { "Gilbertese" }
		gl  { "Gallegan" }
        gla { "Gaelic (Scots)" }
        gmh { "German, Middle High (ca.1050-1500)" }
        gn  { "Guarani" }
        goh { " German, Old High (ca.750-1050)" }
        gon { "Gondi" }
        gor { "Gorontalo" }
        got { "Gothic" }
        grb { "Gerbo" }
        grc { "Greek, Ancient (to 1453)" }
        gu  { "Gujarati" }
        gv  { "Manx" }
        gwi { "Gwich'in" }
        hai { "Haida" }
        hau { "Hausa" }
        haw { "Hawaiian" }
        he  { "Hebrew" }
        hi  { "Hindi" }
        him { "Himachali" }
        hit { "Hittite" }
        hmn { "Hmong" }
        ho  { "Hiri Motu" }
        hr  { "Croatian" }
        hu  { "Hungarian" }
        hup { "Hupa" }
        hy  { "Armenian" }
        hz  { "Herero" }
        ia  { "Interlingua" }
        iba { "Iban" }
        ibo { "Igbo" }
        id  { "Indonesian" }
        ie  { "Interlingue" }
        ijo { "Ijo" }
        ik  { "Inupiak" }
        ilo { "Iloko" }
        inc { "Indic (Other)" }
        ine { "Indo-European" }
        ira { "Iranian " }
        iro { "Iroguoian  Languages" }
        is  { "Icelandic" }
        it  { "Italian" }
        iu  { "Inukitut" }
        iw  { "Hebrew" }
        ja  { "Japanese" }
        jpr { "Judeo-Persian" }
        jrb { "Judeo-Arabic" }
        jw  { "Javanese" }
        ka  { "Georgian" }
        kaa { "Kara-Kalpak" }
        kab { "Kabyle" }
        kac { "Kachin" }
        kam { "Kamba" }
        kar { "Karen" }
        kau { "Kanuri" }
        kaw { "Kawi" }
        kha { "Khasi" }
        khi { "Khoisan" }
        kho { "Khotanese" }
        ki  { "Kikuyu" }
        kk  { "Kazakh" }
        kl  { "Kalaallisut" }
        km  { "Khmer" }
        kmb { "Kimbundu" }
        kn  { "Kannada" }
        ko  { "Korean" }
        kok { "Konkani" }
        kon { "Kongo" }
        kos { "Kosrean" }
        kpe { "Kpelle" }
        kro { "Kru" }
        kru { "Kuruckh" }
        ks  { "Kashmiri" }
        ku  { "Kurdish" }
        kum { "Kumyk" }
        kut { "Kutenai" }
        kv  { "Komi" }
        kw  { "Cornish" }
        ky  { "Kirghiz" }
        la  { "Latin" }
        lad { "Ladino" }
        lah { "Lahnda" }
        lam { "Lamba" }
        lb  { "Letzeburgesch" }
        lez { "Lezghian" }
        lin { "Lingala" }
        lit { "Lithuanian" }
        ln  { "Lingala" }
        lo  { "Lao" }
        lol { "Mongo" }
        loz { "Lozi" }
        lt  { "Lithuanian" }
        lua { "Luba-Lulua" }
        lub { "Luba-Katanga" }
        lug { "Ganda" }
        lui { "Luiseeno" }
        lun { "Lunda" }
        luo { "Lua" }
        lus { "Lushai" }
        lv  { "Latvian (Lettish)" }
        mad { "Madurese" }
        mag { "Magahi" }
        mai { "Maithili" }
        mak { "Makasar" }
        man { "Mandingue" }
        map { "Austronesian" }
        mas { "Masai" }
        mdr { "Mandar" }
        men { "Mende" }
        mg  { "Malagasy" }
        mga { "Irish, Middle (900-1200)" }
        mh  { "Marshall"}
        mi  { "Maori" }
        mic { "Mic-Mac" }
        min { "Minangkabau" }
        mis { "Miscellaneous Languages" }
        mk  { "Macedonian" }
        mkh { "Mon-Khmer (Ohter)" }
        ml  { "Malayalam" }
        mn  { "Mongolian" }
        mnc { "Manchu" }
        mni { "Manipuri" }
        mno { "Manobo Languages" }
        mo  { "Moldavian" }
        moh { "Mohawk" }
        mos { "Mossi" }
        mr  { "Marathi" }
        ms  { "Malay" }
        mt  { "Maltese" }
        mul { "Multiple Languages" }
        mun { "Munda Languages" }
        mus { "Creek" }
        mwr { "Marwari" }
        my  { "Burmese" }
        myn { "Mayan" }
        na  { "Nauru" }
        nah { "Nahuatl" }
        nai { "North American Indian (Ohter)" }
        nb  { "Norwegian Bokm\u00e5l" }
        nd  { "Ndebele, North" }
        nds { "Low German; Low Saxon" }
        ne  { "Nepali" }
        new { "Newari" }
        ng  { "Ndonga" }
        nia { "Nias" }
        nic { "Niger - Kordofanian" }
        niu { "Niuean" }
        nl  { "Dutch" }
        nn  { "Norwegian Nynorsk" }
        no  { "Norwegian" }
        non { "Norse, Old" }
        nr  { "Ndebele, South" }
        nso { "Sotho, Northern" }
        nub { "Nubian Languages" }
        nv  {"Navajo"}
        ny  { " Chichewa; Nyanja" }
        nym { "Nyamwezi" }
        nyo { "Nyoro" }
        nzi { "Nzima" }
        oc  { "Proven\u00E7al; Occitan (post 1500)"}
        oji { "Ojibwa" }
        om  { "Oromo (Afan)" }
        or  { "Oriya" }
        os  { "Ossetic" }
        osa { "Osage" }
        ota { "Turkish (Ottoman Empire)" }
        oto { "Otomian Languages" }
        pa  { "Punjabi" }
        paa { "Papuan (Ohter)" }
        pag { "Pangasinan" }
        pal { "Pahlavi" }
        pam { "pampamga" }
        pap { "Papiamento" }
        pau { "Palauan" }
        peo { "Persian Old (ca.600-400 B.C.)"  }
        phi { "Philippine (Other)" }
        phn { "Phoenician" }
        pi  { "Pali" }
        pl  { "Polish" }
        pon { "Pohnpeian" }
        pra { "Prakrit Languages" }
        pro { "Proven\u00E7al, Old (to 1500)" }
        ps  { "Pashto (Pushto)" }
        pt  { "Portuguese" }
        qu  { "Quechua" }
        raj { "Rajastani" }
        rap { "Rapanui" }
        rar { "Rarotongan" }
        rm  { "Rhaeto-Romance" }
        rn  { "Rundi" }
        ro  { "Romanian" }
        roa { "Romance (Other)" }
        rom { "Romany" }
        ru  { "Russian" }
        rw  { "Kinyarwanda" }
        sa  { "Sanskrit" }
        sad { "Sandawe" }
        sah { "Yakut" }
        sai { "South American Indian (Ohter)" }
        sal { "Salishan" }
        sam { "Samritan" }
        sas { "Saska" }
        sat { "Santali" }
        sc  { "Sardinian" }
        sco { "Scots" }
        sd  { "Sindhi" }
        se  { "Northern Sami" }
// http://lcweb.loc.gov/standards/iso639-2/englangn.html
// should provide the correct language code soon
//        se { "Sami Languages" }        // DUP FIXME
        sel { "Selkup" }
        sem { "Semitic" }
        sg  { "Sango" }
        sga { "Irish, Old (to 900)" }
        sgn { "Sign Languages" }
        sh  { "Serbo-Croatian" }
        shn { "Shan" }
        si  { "Sinhalese" }
        sid { "Sidamo" }
        sio { "Siouan Languages" }
        sit { "Sino-Tibetan (Other)" }
        sk  { "Slovak" }
        sl  { "Slovenian" }
        sm  { "Samoan" }
        sn  { "Shona" }
        snk { "Soninke" }
        so  { "Somali" }
        sog { "Sogdien" }
        son { "Songhai" }
        sq  { "Albanian" }
        sr  { "Serbian" }
        srr { "Serer" }
        ss  { "Swati" }
        ssa { "Nilo-Saharam" }
        st  { "Sotho, Southern" }
        su  { "Sundanese" }
        suk { "Sukuma" }
        sus { "Susu" }
        sux { "Sumerian"}
        sv  { "Swedish" }
        sw  { "Swahili" }
        syr { "Syriac" }
        ta  { "Tamil" }
        tai { "Tai (Other)" }
        te  { "Telugu" }
        tem { "Timne" }
        ter { "Tereno" }
        tet { "Tetum" }
        tg  { "Tajik" }
        th  { "Thai" }
        tig { "Tigre" }
        tir { "Tigrinya" }
        tiv { "Tiv" }
        tk  { "Turkmen" }
        tl  { "Tagalog" }
        tli { "Tlingit" }
        tmh { "Tamashek" }
        tn  { "Tswana" }
        tog { "Tonga (Nyasa)" }
        ton { "Tongan (Tonga Islands)" }
        tr  { "Turkish" }
        ts  { "Tsonga" }
        tsi { "Tsimshian" }
        tt  { "Tatar" }
        tum { "Tumbuka" }
        tur { "Turkish" }
        tut { "Altaic (Other)" }
        tvl { "Tuvalu" }
        tw  { "Twi" }
        ty  { "Tahitian" }
        tyv { "Tuvinian" }
        ug  { "Uighur" }
        uga { "Ugaritic" }
        uk  { "Ukrainian" }
        umb { "Umbundu" }
        und { "Undetermined" }
        ur  { "Urdu" }
        uz  { "Uzbek" }
        vai { "Vai" }
        ven { "Venda" }
        vi  { "Vietnamese" }
        vo  { "Volapuk" }
        vot  { "Votic" }
        wak { "Wakashan Languages" }
        wal { "Walamo" }
        war { "Waray" }
        was { "Washo" }
        wen { "Sorbian Languages" }
        wo  { "Wolof" }
        xh  { "Xhosa" }
        yao { "Yao" }
        yap { "Yapese" }
        yi  { "Yiddish" }
        yor  { "Yoruba" }
        ypk { "Yupik Languages" }
        za  { "Zhuang" }
        zap { "Zapotec" }
        zen { "Zenaga" }
        zh  { "Chinese" }
        znd { "Zande" }
        zu  { "Zulu" }
        zun { "Zuni" }
    }
    LocaleID { "0000" }
    LocaleString { "en" }
    MonthAbbreviations {
            "Jan",
            "Feb",
            "Mar",
            "Apr",
            "May",
            "Jun",
            "Jul",
            "Aug",
            "Sep",
            "Oct",
            "Nov",
            "Dec",
            "",
    }
    MonthNames {
            "January",
            "February",
            "March",
            "April",
            "May",
            "June",
            "July",
            "August",
            "September",
            "October",
            "November",
            "December",
            "",
    }
    NumberElements {
            ".",
            ",",
            ";",
            "%",
            "0",
            "#",
            "-",
            "E",
            "\u2030",
            "\u221E",
            "\uFFFD",
    }
    NumberPatterns {
            "#,##0.###;-#,##0.###",
            "\u00A4 #,##0.00;-\u00A4 #,##0.00",
            "#,##0%",
	    "#E0"
    }
    ShortCountry { "" }
    ShortLanguage { "eng" }
    localPatternChars { "GyMdkHmsSEDFwWahKzYe" }
    zoneStrings {
        {
                "PST",
                "Pacific Standard Time",
                "PST",
                "Pacific Daylight Time",
                "PDT",
                "San Francisco",
        }
        {
                "MST",
                "Mountain Standard Time",
                "MST",
                "Mountain Daylight Time",
                "MDT",
                "Denver",
        }
        {
                "PNT",
                "Mountain Standard Time",
                "MST",
                "Mountain Standard Time",
                "MST",
                "Phoenix",
        }
        {
                "CST",
                "Central Standard Time",
                "CST",
                "Central Daylight Time",
                "CDT",
                "Chicago",
        }
        {
                "EST",
                "Eastern Standard Time",
                "EST",
                "Eastern Daylight Time",
                "EDT",
                "New York",
        }
        {
                "IET",
                "Eastern Standard Time",
                "EST",
                "Eastern Standard Time",
                "EST",
                "Indianapolis",
        }
        {
                "PRT",
                "Atlantic Standard Time",
                "AST",
                "Atlantic Daylight Time",
                "ADT",
                "Halifax",
        }
        {
                "HST",
                "Hawaii Standard Time",
                "HST",
                "Hawaii Daylight Time",
                "HDT",
                "Honolulu",
        }
        {
                "AST",
                "Alaska Standard Time",
                "AST",
                "Alaska Daylight Time",
                "ADT",
                "Anchorage",
        }
    }


    LocaleScript{
        "Latn",
    }

    //------------------------------------------------------------
    // Rule Based Number Format Support
    //------------------------------------------------------------

//         * Spellout rules for U.S. English.  This rule set has two variants:
//         * %simplified is a set of rules showing the simple method of spelling
//         * out numbers in English: 289 is formatted as "two hundred eighty-nine".
//         * %default uses a more complicated algorithm to format
//         * numbers in a more natural way: 289 is formatted as "two hundred AND
//         * eighty-nine" and commas are inserted between the thousands groups for
//         * values above 100,000.

     SpelloutRules {
            // This rule set shows the normal simple formatting rules for English
            "%simplified:\n"
                   // negative number rule.  This rule is used to format negative
                   // numbers.  The result of formatting the number's absolute
                   // value is placed where the >> is.
            "    -x: minus >>;\n"
                   // faction rule.  This rule is used for formatting numbers
                   // with fractional parts.  The result of formatting the
                   // number's integral part is substituted for the <<, and
                   // the result of formatting the number's fractional part
                   // (one digit at a time, e.g., 0.123 is "zero point one two
                   // three") replaces the >>.
            "    x.x: << point >>;\n"
                   // the rules for the values from 0 to 19 are simply the
                   // words for those numbers
            "    zero; one; two; three; four; five; six; seven; eight; nine;\n"
            "    ten; eleven; twelve; thirteen; fourteen; fifteen; sixteen;\n"
            "        seventeen; eighteen; nineteen;\n"
                   // beginning at 20, we use the >> to mark the position where
                   // the result of formatting the number's ones digit.  Thus,
                   // we only need a new rule at every multiple of 10.  Text in
                   // backets is omitted if the value being formatted is an
                   // even multiple of 10.
            "    20: twenty[->>];\n"
            "    30: thirty[->>];\n"
            "    40: forty[->>];\n"
            "    50: fifty[->>];\n"
            "    60: sixty[->>];\n"
            "    70: seventy[->>];\n"
            "    80: eighty[->>];\n"
            "    90: ninety[->>];\n"
                   // beginning at 100, we can use << to mark the position where
                   // the result of formatting the multiple of 100 is to be
                   // inserted.  Notice also that the meaning of >> has shifted:
                   // here, it refers to both the ones place and the tens place.
                   // The meanings of the << and >> tokens depend on the base value
                   // of the rule.  A rule's divisor is (usually) the highest
                   // power of 10 that is less than or equal to the rule's base
                   // value.  The value being formatted is divided by the rule's
                   // divisor, and the integral quotient is used to get the text
                   // for <<, while the remainder is used to produce the text
                   // for >>.  Again, text in brackets is omitted if the value
                   // being formatted is an even multiple of the rule's divisor
                   // (in this case, an even multiple of 100)
            "    100: << hundred[ >>];\n"
                   // The rules for the higher numbers work the same way as the
                   // rule for 100: Again, the << and >> tokens depend on the
                   // rule's divisor, which for all these rules is also the rule's
                   // base value.  To group by thousand, we simply don't have any
                   // rules between 1,000 and 1,000,000.
            "    1000: << thousand[ >>];\n"
            "    1,000,000: << million[ >>];\n"
            "    1,000,000,000: << billion[ >>];\n"
            "    1,000,000,000,000: << trillion[ >>];\n"
                   // overflow rule.  This rule specifies that values of a
                   // quadrillion or more are shown in numerals rather than words.
                   // The == token means to format (with new rules) the value
                   // being formatted by this rule and place the result where
                   // the == is.  The #,##0 inside the == signs is a
                   // DecimalFormat pattern.  It specifies that the value should
                   // be formatted with a DecimalFormat object, and that it
                   // should be formatted with no decimal places, at least one
                   // digit, and a thousands separator.
            "    1,000,000,000,000,000: =#,##0=;\n"

            // %default is a more elaborate form of %simplified;  It is basically
            // the same, except that it introduces "and" before the ones digit
            // when appropriate (basically, between the tens and ones digits) and
            // separates the thousands groups with commas in values over 100,000.
            "%default:\n"
                   // negative-number and fraction rules.  These are the same
                   // as those for %simplified, but have to be stated here too
                   // because this is an entry point
            "    -x: minus >>;\n"
            "    x.x: << point >>;\n"
                   // just use %simplified for values below 100
            "    =%simplified=;\n"
                   // for values from 100 to 9,999 use %%and to decide whether or
                   // not to interpose the "and"
            "    100: << hundred[ >%%and>];\n"
            "    1000: << thousand[ >%%and>];\n"
                   // for values of 100,000 and up, use %%commas to interpose the
                   // commas in the right places (and also to interpose the "and")
            "    100,000>>: << thousand[>%%commas>];\n"
            "    1,000,000: << million[>%%commas>];\n"
            "    1,000,000,000: << billion[>%%commas>];\n"
            "    1,000,000,000,000: << trillion[>%%commas>];\n"
            "    1,000,000,000,000,000: =#,##0=;\n"
            // if the value passed to this rule set is greater than 100, don't
            // add the "and"; if it's less than 100, add "and" before the last
            // digits
            "%%and:\n"
            "    and =%default=;\n"
            "    100: =%default=;\n"
            // this rule set is used to place the commas
            "%%commas:\n"
                   // for values below 100, add "and" (the apostrophe at the
                   // beginning is ignored, but causes the space that follows it
                   // to be significant: this is necessary because the rules
                   // calling %%commas don't put a space before it)
            "    ' and =%default=;\n"
                   // put a comma after the thousands (or whatever preceded the
                   // hundreds)
            "    100: , =%default=;\n"
                   // put a comma after the millions (or whatever precedes the
                   // thousands)
            "    1000: , <%default< thousand, >%default>;\n"
                   // and so on...
            "    1,000,000: , =%default=;"
            // %%lenient-parse isn't really a set of number formatting rules;
            // it's a set of collation rules.  Lenient-parse mode uses a Collator
            // object to compare fragments of the text being parsed to the text
            // in the rules, allowing more leeway in the matching text.  This set
            // of rules tells the formatter to ignore commas when parsing (it
            // already ignores spaces, which is why we refer to the space; it also
            // ignores hyphens, making "twenty one" and "twenty-one" parse
            // identically)
            "%%lenient-parse:\n"
            //            "    & ' ' , ',' ;\n"
            "   &\u0000 << ' ' << ',' << '-'; \n"
    }


//         * This rule set adds an English ordinal abbreviation to the end of a
//         * number.  For example, 2 is formatted as "2nd".  Parsing doesn't work with
//         * this rule set.  To parse, use DecimalFormat on the numeral.
    OrdinalRules {
            // this rule set formats the numeral and calls %%abbrev to
            // supply the abbreviation
            "%main:\n"
            "    =#,##0==%%abbrev=;\n"
            // this rule set supplies the abbreviation
            "%%abbrev:\n"
                   // the abbreviations.  Everything from 4 to 19 ends in "th"
            "    th; st; nd; rd; th;\n"
                   // at 20, we begin repeating the cycle every 10 (13 is "13th",
                   // but 23 and 33 are "23rd" and "33rd")  We do this by
                   // ignoring all bug the ones digit in selecting the abbreviation
            "    20: >>;\n"
                   // at 100, we repeat the whole cycle by considering only the
                   // tens and ones digits in picking an abbreviation
            "    100: >>;\n"
    }

//         * This rule set formats a number of seconds in sexagesimal notation
//         * (i.e., hours, minutes, and seconds).  %with-words formats it with
//         * words (3,740 is "1 hour, 2 minutes, 20 seconds") and %in-numerals
//         * formats it entirely in numerals (3,740 is "1:02:20").
    DurationRules {
            // main rule set for formatting with words
            "%with-words:\n"
                   // take care of singular and plural forms of "second"
            "    0 seconds; 1 second; =0= seconds;\n"
                   // use %%min to format values greater than 60 seconds
            "    60/60: <%%min<[, >>];\n"
                   // use %%hr to format values greater than 3,600 seconds
                   // (the ">>>" below causes us to see the number of minutes
                   // when when there are zero minutes)
            "    3600/60: <%%hr<[, >>>];\n"
            // this rule set takes care of the singular and plural forms
            // of "minute"
            "%%min:\n"
            "    0 minutes; 1 minute; =0= minutes;\n"
            // this rule set takes care of the singular and plural forms
            // of "hour"
            "%%hr:\n"
            "    0 hours; 1 hour; =0= hours;\n"

            // main rule set for formatting in numerals
            "%in-numerals:\n"
                   // values below 60 seconds are shown with "sec."
            "    =0= sec.;\n"
                   // higher values are shown with colons: %%min-sec is used for
                   // values below 3,600 seconds...
            "    60: =%%min-sec=;\n"
                   // ...and %%hr-min-sec is used for values of 3,600 seconds
                   // and above
            "    3600: =%%hr-min-sec=;\n"
            // this rule causes values of less than 10 minutes to show without
            // a leading zero
            "%%min-sec:\n"
            "    0: :=00=;\n"
            "    60/60: <0<>>;\n"
            // this rule set is used for values of 3,600 or more.  Minutes are always
            // shown, and always shown with two digits
            "%%hr-min-sec:\n"
            "    0: :=00=;\n"
            "    60/60: <00<>>;\n"
            "    3600/60: <#,##0<:>>>;\n"
            // the lenient-parse rules allow several different characters to be used
            // as delimiters between hours, minutes, and seconds
            "%%lenient-parse:\n"
            "    & ':' = '.' = ' ' = '-';\n"
    }
    Scripts {
        ARAB { "Arabic" }
        ARMN { "Armenian" }
        BENG { "Bengali" }
        BOPO { "Bopomofo" }
        CANS { "Unified Canadian Aboriginal Symbols" }
        CHER { "Cherokee" }
        CYRL { "Cyrillic" }
        DEVA { "Devanagari" }
        DSRT { "Deseret" }
        ETHI { "Ethiopic" }
        GEOR { "Georgian" }
        GOTH { "Gothic" }
        GREK { "Greek" }
        GUJR { "Gujarati" }
        GURU { "Gurmukhi" }
        HANG { "Hangul" }
        HANI { "Han" }
        HEBR { "Hebrew" }
        HIRA { "Hiragana" }
        ITAL { "Old_italic" }
        KANA { "Katakana" }
        KHMR { "Khmer" }
        KNDA { "Kannada" }
        LAO  { "Lao" }
        LATN { "Latin" }
        MLYM { "Malayalam" }
        MONG { "Mongolian" }
        MYMR { "Myanmar" }
        OGAM { "Ogham" }
        ORYA { "Oriya" }
        QAAC { "Coptic" }
        QAAI { "Inherited" }
        RUNR { "Runic" }
        SINH { "Sinhala" }
        SYRC { "Syriac" }
        TAML { "Tamil" }
        TELU { "Telugu" }
        THAA { "Thana" }
        THAI { "Thai" }
        TIBT { "Tibetan" }
        YIII { "Yi" }
        ZYYY { "Common" }
   }
}