scuffed-code/icu4c/source/data/coll/th.txt
Steven R. Loomis fe11d5ce2d ICU-2487 move collation data into new tree
X-SVN-Rev: 14965
2004-04-14 19:39:08 +00:00

62 lines
2.9 KiB
Plaintext

// ***************************************************************************
// *
// * Copyright (C) 1999-2004, International Business Machines
// * Corporation and others. All Rights Reserved.
// *
// ***************************************************************************
th {
Version{ "2.0" }
// -------------------- ibm.597 --------------------
//
// First put in all of the consonants, after Z
//
collations {
standard {
Version { "3.0" }
Sequence {
// Tailoring of UCA for Thai Royal Institute Dictionary Sort, B.E. 2525
"[normalization on]" // needed because Thai uses multiple accents
// put Ru with Lakkhangyao after Ru and put Lu with Lakkhangyao after Lu
// see the comment below on Lakkhangyao
"& \u0e24" // U+0E24 THAI CHARACTER RU
"< \u0e24\u0e45" // U+0E24 THAI CHARACTER RU U+0E45 THAI CHARACTER LAKKHANGYAO
"& \u0e26" // U+0E26 THAI CHARACTER LU
"< \u0e26\u0e45" // U+0E26 THAI CHARACTER LU U+0E45 THAI CHARACTER LAKKHANGYAO
// put Lakkhangyao after Sara Ai Maimalai
// this rare symbol also comes after all characters. But when it is used in combination
// with Ru and Lu, the combination is treated as a seperate letter, ala CH sorting after
// C in the traditional Spanish.
"& \u0e44" // U+0E44 THAI CHARACTER SARA AI MAIMALAI
"< \u0e45" // U+0E45 THAI CHARACTER LAKKHANGYAO
// put Yamakkan just before Maitaikhu. It will behave like an accent (primary ignorable)
"& [before 2] \u0E47" // U+0E47 THAI CHARACTER MAITAIKHU
"<< \u0E4E" // U+0E4E THAI CHARACTER YAMAKKAN
// put Thantakat and Nikhahit just after Mai Chattawa. They will behave like an accent (primary ignorable)
"& \u0E4B" // U+0E4B THAI CHARACTER MAI CHATTAWA
"<< \u0E4C" // U+0E4C THAI CHARACTER THANTAKAT
"<< \u0E4D" // U+0E4D THAI CHARACTER NIKHAHIT
// make punctuation and Paiyannoi...Khomut secondary ignorable. This will make them sort after the same
// strings that don't contain them.
"& [last secondary ignorable]"
"<<< ' '" // Space
"<<< '-'" // Hyphen
"<<< '.'" // Full stop
"<<< '...'" // Ellipsis
"<<< \u0E2F" // U+0E2F THAI CHARACTER PAIYANNOI (abbreviation mark)
"<<< \u0E46" // U+0E46 THAI CHARACTER MAIYAMOK (repetition mark)
"<<< \u0E4F" // U+0E4F THAI CHARACTER FONGMAN (ancient symbol used as bullet mark)
"<<< \u0E5A" // U+0E5A THAI CHARACTER ANGKHANKHU (ancient symbol used to mark end of section or episode)
"<<< \u0E5B" // U+0E5B THAI CHARACTER KHOMUT (ancient symbol used to mark end of story)
}
}
}
}