scuffed-code/icu4c/source/data/coll/th.txt

// ***************************************************************************
// *
// *   Copyright (C) 1999-2004, International Business Machines
// *   Corporation and others.  All Rights Reserved.
// *
// ***************************************************************************

th {
    Version{ "2.0" }
    // -------------------- ibm.597 --------------------
    //
    // First put in all of the consonants, after Z
    //
    collations {
      standard {
        Version { "3.0" }
        Sequence {
           // Tailoring of UCA for Thai Royal Institute Dictionary Sort, B.E. 2525
           "[normalization on]" // needed because Thai uses multiple accents

           // put Ru with Lakkhangyao after Ru and put Lu with Lakkhangyao after Lu
           // see the comment below on Lakkhangyao
           "& \u0e24"          // U+0E24 THAI CHARACTER RU
           "< \u0e24\u0e45"    // U+0E24 THAI CHARACTER RU  U+0E45 THAI CHARACTER LAKKHANGYAO
           "& \u0e26"          // U+0E26 THAI CHARACTER LU
           "< \u0e26\u0e45"    // U+0E26 THAI CHARACTER LU U+0E45 THAI CHARACTER LAKKHANGYAO

           // put Lakkhangyao after Sara Ai Maimalai
           // this rare symbol also comes after all characters. But when it is used in combination
           // with Ru and Lu, the combination is treated as a seperate letter, ala CH sorting after
           // C in the traditional Spanish.
           "& \u0e44"  // U+0E44 THAI CHARACTER SARA AI MAIMALAI
           "< \u0e45"  // U+0E45 THAI CHARACTER LAKKHANGYAO

           // put Yamakkan just before Maitaikhu. It will behave like an accent (primary ignorable)
           "& [before 2] \u0E47" // U+0E47 THAI CHARACTER MAITAIKHU
           "<< \u0E4E"           // U+0E4E THAI CHARACTER YAMAKKAN

           // put Thantakat and Nikhahit just after Mai Chattawa.  They will behave like an accent (primary ignorable)
            "& \u0E4B"  // U+0E4B  THAI CHARACTER MAI CHATTAWA
           "<< \u0E4C"  // U+0E4C  THAI CHARACTER THANTAKAT
           "<< \u0E4D"  // U+0E4D  THAI CHARACTER NIKHAHIT

           // make punctuation and  Paiyannoi...Khomut secondary ignorable. This will make them sort after the same
           // strings that don't contain them.

           "& [last secondary ignorable]"
           "<<< ' '"    // Space
           "<<< '-'"    // Hyphen
           "<<< '.'"    // Full stop
           "<<< '...'"  // Ellipsis
           "<<< \u0E2F" // U+0E2F  THAI CHARACTER PAIYANNOI (abbreviation mark)
           "<<< \u0E46" // U+0E46  THAI CHARACTER MAIYAMOK (repetition mark)
           "<<< \u0E4F" // U+0E4F  THAI CHARACTER FONGMAN (ancient symbol used as bullet mark)
           "<<< \u0E5A" // U+0E5A  THAI CHARACTER ANGKHANKHU (ancient symbol used to mark end of section or episode)
           "<<< \u0E5B" // U+0E5B  THAI CHARACTER KHOMUT (ancient symbol used to mark end of story)

        }
      }
    }
}