diff --git a/localedata/Makefile b/localedata/Makefile index dd41db6d8f..3b9802c84e 100644 --- a/localedata/Makefile +++ b/localedata/Makefile @@ -112,6 +112,7 @@ test-input := \ syr.UTF-8 \ szl_PL.UTF-8 \ tg_TJ.UTF-8 \ + th_TH.UTF-8 \ tk_TM.UTF-8 \ tr_TR.UTF-8 \ tt_RU.UTF-8 \ @@ -305,6 +306,7 @@ LOCALES := \ syr.UTF-8 \ szl_PL.UTF-8 \ tg_TJ.UTF-8 \ + th_TH.UTF-8 \ tk_TM.UTF-8 \ tr_TR.ISO-8859-9 \ tr_TR.UTF-8 \ diff --git a/localedata/locales/th_TH b/localedata/locales/th_TH index 7a10376e80..f97b6bdcb4 100644 --- a/localedata/locales/th_TH +++ b/localedata/locales/th_TH @@ -62,750 +62,96 @@ END LC_CTYPE LC_COLLATE -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" - -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" - -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" - -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" - -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" - -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" - -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" - -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" - -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" - -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" - -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" - -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" - -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" - -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" - -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" - -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" - -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" - -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" - -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" - -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" - -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" - -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" - -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" - -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" - -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" - -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" - -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" - -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" - -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" - -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" - -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" - -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" - -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" - -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" - -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" - -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" - -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" - -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" - -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" - -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" - -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" - -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" - -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" - -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" - -collating-symbol -collating-symbol -collating-symbol -collating-symbol -collating-symbol - -order_start forward;forward;forward;forward - -% definitions of extra collating symbols - - - - - - -UNDEFINED IGNORE;IGNORE;IGNORE;IGNORE - -% punctuation marks, ordered after ISO/IEC 14651 - IGNORE;IGNORE;;IGNORE % SPACE - IGNORE;IGNORE;;IGNORE % LOW LINE - IGNORE;IGNORE;;IGNORE % HYPHEN-MINUS - IGNORE;IGNORE;;IGNORE % COMMA - IGNORE;IGNORE;;IGNORE % SEMICOLON - IGNORE;IGNORE;;IGNORE % COLON - IGNORE;IGNORE;;IGNORE % EXCLAMATION MARK - IGNORE;IGNORE;;IGNORE % QUESTION MARK - IGNORE;IGNORE;;IGNORE % SOLIDUS - IGNORE;IGNORE;;IGNORE % FULL STOP - IGNORE;IGNORE;;IGNORE % THAI CHARACTER PAIYANNOI - IGNORE;IGNORE;;IGNORE % THAI CHARACTER MAIYAMOK - IGNORE;IGNORE;;IGNORE % GRAVE ACCENT - IGNORE;IGNORE;;IGNORE % CIRCUMFLEX - IGNORE;IGNORE;;IGNORE % TILDE - IGNORE;IGNORE;;IGNORE % APOSTROPHE - IGNORE;IGNORE;;IGNORE % QUOTATION MARK - IGNORE;IGNORE;;IGNORE % LEFT PAREN. - IGNORE;IGNORE;;IGNORE % LT BRACKET - IGNORE;IGNORE;;IGNORE % LEFT CURLY BRACKET - IGNORE;IGNORE;;IGNORE % RIGHT CURLY BRACKET - IGNORE;IGNORE;;IGNORE % RT BRACKET - IGNORE;IGNORE;;IGNORE % RIGHT PAREN. - IGNORE;IGNORE;;IGNORE % COMMERCIAL AT - IGNORE;IGNORE;;IGNORE % THAI CHARACTER SYMBOL BAHT - IGNORE;IGNORE;;IGNORE % DOLLAR SIGN - IGNORE;IGNORE;;IGNORE % THAI CHARACTER FONGMAN - IGNORE;IGNORE;;IGNORE % THAI CHARACTER ANGKHANKHU - IGNORE;IGNORE;;IGNORE % THAI CHARACTER KHOMUT - IGNORE;IGNORE;;IGNORE % ASTERISK - IGNORE;IGNORE;;IGNORE % BACK SOLIDUS - IGNORE;IGNORE;;IGNORE % AMPERSAND - IGNORE;IGNORE;;IGNORE % NUMBER SIGN - IGNORE;IGNORE;;IGNORE % PERCENT - IGNORE;IGNORE;;IGNORE % PLUS - IGNORE;IGNORE;;IGNORE % LESS THAN - IGNORE;IGNORE;;IGNORE % EQUAL - IGNORE;IGNORE;;IGNORE % GREATER THAN - IGNORE;IGNORE;;IGNORE % VERTICAL LINE - -% Thai tone marks and diacritics - IGNORE;;; % THAI CHARACTER YAMAKKAN - IGNORE;;; % THAI CHARACTER PINTHU - IGNORE;;; % THAI CHARACTER THANTHAKHAT - IGNORE;;; % THAI CHARACTER MAITAIKHU - IGNORE;;; % THAI CHARACTER MAI EK - IGNORE;;; % THAI CHARACTER MAI THO - IGNORE;;; % THAI CHARACTER MAI TRI - IGNORE;;; % THAI CHARACTER MAI CHATTAWA - -% Arabic and Thai decimal digits - ;;; % DIGIT ZERO - ;;; % THAI DIGIT ZERO - ;;; % DIGIT ONE - ;;; % THAI DIGIT ONE - ;;; % DIGIT TWO - ;;; % THAI DIGIT TWO - ;;; % DIGIT THREE - ;;; % THAI DIGIT THREE - ;;; % DIGIT FOUR - ;;; % THAI DIGIT FOUR - ;;; % DIGIT FIVE - ;;; % THAI DIGIT FIVE - ;;; % DIGIT SIX - ;;; % THAI DIGIT SIX - ;;; % DIGIT SEVEN - ;;; % THAI DIGIT SEVEN - ;;; % DIGIT EIGHT - ;;; % THAI DIGIT EIGHT - ;;; % DIGIT NINE - ;;; % THAI DIGIT NINE - -% Latin alphabet - ;;; % A - ;;; % a - ;;; % B - ;;; % b - ;;; % C - ;;; % c - ;;; % D - ;;; % d - ;;; % E - ;;; % e - ;;; % F - ;;; % f - ;;; % G - ;;; % g - ;;; % H - ;;; % h - ;;; % I - ;;; % i - ;;; % J - ;;; % j - ;;; % K - ;;; % k - ;;; % L - ;;; % l - ;;; % M - ;;; % m - ;;; % N - ;;; % n - ;;; % O - ;;; % o - ;;; % P - ;;; % p - ;;; % Q - ;;; % q - ;;; % R - ;;; % r - ;;; % S - ;;; % s - ;;; % T - ;;; % t - ;;; % U - ;;; % u - ;;; % V - ;;; % v - ;;; % W - ;;; % w - ;;; % X - ;;; % x - ;;; % Y - ;;; % y - ;;; % Z - ;;; % z +% Copy the template from ISO/IEC 14651 +copy "iso14651_t1" +% CLDR collation rules for Thai: +% (see: https://github.com/unicode-org/cldr/blob/main/common/collation/th.xml) % -% Thai consonants, with leading vowels rearrangement +%[normalization on] +%[alternate shifted] +%[reorder Thai] +% # +% # The following tailoring is an adjustment of the +% # DUCET collation order for PAIYANNOI, MAIYAMOK, +% # NIKHAHIT, LAKKHANGYAO, and PHINTHU. This gives +% # a sort order as defined in the Royal Institute +% # Dictionary 2542 B.E. Edition (1999 A.D.). +% # +% &[before 1]๚<ฯ # should be "variable" % - ;;; % THAI CHARACTER KO KAI - "";;; - "";;; - "";;; - "";;; - "";;; +% &๛<ๆ # should be "variable" +% +% &๎<<์ +% &[before 1]ะ<ํ +% &า<<<ๅ +% &าํ<<<ํา<<<ำ +% &ๅํ<<<ํๅ +% &ไ<ฺ +% # consider: order pali virama as secondary different from yammacan (another old virama) +% # &๎ +% # <<ฺ +% # - ;;; % THAI CHARACTER KHO KHAI - "";;; - "";;; - "";;; - "";;; - "";;; +collating-element from "" +% This is already defined in iso14651_t1: +% collating-element from "" % decomposition of THAI CHARACTER SARA AM - ;;; % THAI CHARACTER KHO KHUAT - "";;; - "";;; - "";;; - "";;; - "";;; +collating-element from "" % LAKKHANGYAO + NIKHAHIT +collating-element from "" % NIKHAHIT + LAKKHANGYAO +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Finished defining collating-elements and collating-symbols +% +% One dummy reorder-after statement here to avoid a syntax error +% because the first rule reordering stuff starts without a reorder-after: +collating-symbol +reorder-after % FULL STOP + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - ;;; % THAI CHARACTER KHO KHWAI - "";;; - "";;; - "";;; - "";;; - "";;; +% &[before 1]๚<ฯ # should be "variable" +% ๚ U+0E5A should keep "IGNORE" as the primary weight (as defined in iso14651_t1_common). +% Therefore, I cannot sort ฯ U+0E2F before ๚ U+0E5A as a primary difference. +% Sorting it before as a secondary difference works though. To sort the existing test data +% in the correct order, this seems good enough. The previous collation in +% this th_TH locale, which did not use 'copy "iso14651_t1"' had these characters +% as a tertinary difference: +% IGNORE;IGNORE;;IGNORE % THAI CHARACTER PAIYANNOI +% IGNORE;IGNORE;;IGNORE % THAI CHARACTER ANGKHANKHU + IGNORE;"";IGNORE; % ฯ THAI CHARACTER PAIYANNOI + IGNORE;"";IGNORE; % ๚ THAI CHARACTER ANGKHANKHU +% &๛<ๆ # should be "variable" +% ๛ U+0E5B should keep "IGNORE" as the primary weight (as defined in iso14651_t1_common). +% Therefore I cannot sort ๆ U+0E46 after ๛ U+0E5B as a primary difference. +% Sorting it after as a secondary differnce works though and it seems good enough +% to sort the existing test data in the correct order. The previous collation in +% this th_TH locale, which did not use 'copy "iso14651_t1"' had these characters +% as a tertinary difference: +% IGNORE;IGNORE;;IGNORE % THAI CHARACTER MAIYAMOK +% IGNORE;IGNORE;;IGNORE % THAI CHARACTER KHOMUT + IGNORE;"";IGNORE; % ๛ THAI CHARACTER KHOMUT + IGNORE;"";IGNORE; % ๆ THAI CHARACTER MAIYAMOK +% &๎<<์ + IGNORE;;IGNORE; % ๎ THAI CHARACTER YAMAKKAN + IGNORE;;IGNORE; % ์ THAI CHARACTER THANTHAKHAT +% &[before 1]ะ<ํ + "";;; % ํ THAI CHARACTER NIKHAHIT + "";;; % ะ THAI CHARACTER SARA A +% &า<<<ๅ + ;;; % า THAI CHARACTER SARA AA + ;;; % ๅ THAI CHARACTER LAKKHANGYAO +% &าํ<<<ํา<<<ำ + ;;; % าํ decomposition of THAI CHARACTER SARA AM + ;;; % ํา decomposition of THAI CHARACTER SARA AM + ;;; % ำ THAI CHARACTER SARA AM +% &ๅํ<<<ํๅ + ;;; % LAKKHANGYAO + NIKHAHIT + ;;; % NIKHAHIT + LAKKHANGYAO +% &ไ<ฺ +reorder-after + - ;;; % THAI CHARACTER KHO KHON - "";;; - "";;; - "";;; - "";;; - "";;; - - ;;; % THAI CHARACTER KHO RAKHANG - "";;; - "";;; - "";;; - "";;; - "";;; - - ;;; % THAI CHARACTER NGO NGU - "";;; - "";;; - "";;; - "";;; - "";;; - - ;;; % THAI CHARACTER CHO CHAN - "";;; - "";;; - "";;; - "";;; - "";;; - - ;;; % THAI CHARACTER CHO CHING - "";;; - "";;; - "";;; - "";;; - "";;; - - ;;; % THAI CHARACTER CHO CHANG - "";;; - "";;; - "";;; - "";;; - "";;; - - ;;; % THAI CHARACTER SO SO - "";;; - "";;; - "";;; - "";;; - "";;; - - ;;; % THAI CHARACTER CHO CHOE - "";;; - "";;; - "";;; - "";;; - "";;; - - ;;; % THAI CHARACTER YO YING - "";;; - "";;; - "";;; - "";;; - "";;; - - ;;; % THAI CHARACTER DO CHADA - "";;; - "";;; - "";;; - "";;; - "";;; - - ;;; % THAI CHARACTER TO PATAK - "";;; - "";;; - "";;; - "";;; - "";;; - - ;;; % THAI CHARACTER THO THAN - "";;; - "";;; - "";;; - "";;; - "";;; - - ;;; % THAI CHARACTER THO NANGMONTHO - "";;; - "";;; - "";;; - "";;; - "";;; - - ;;; % THAI CHARACTER THO PHUTHAO - "";;; - "";;; - "";;; - "";;; - "";;; - - ;;; % THAI CHARACTER NO NEN - "";;; - "";;; - "";;; - "";;; - "";;; - - ;;; % THAI CHARACTER DO DEK - "";;; - "";;; - "";;; - "";;; - "";;; - - ;;; % THAI CHARACTER TO TAO - "";;; - "";;; - "";;; - "";;; - "";;; - - ;;; % THAI CHARACTER THO THUNG - "";;; - "";;; - "";;; - "";;; - "";;; - - ;;; % THAI CHARACTER THO THAHAN - "";;; - "";;; - "";;; - "";;; - "";;; - - ;;; % THAI CHARACTER THO THONG - "";;; - "";;; - "";;; - "";;; - "";;; - - ;;; % THAI CHARACTER NO NU - "";;; - "";;; - "";;; - "";;; - "";;; - - ;;; % THAI CHARACTER BO BAIMAI - "";;; - "";;; - "";;; - "";;; - "";;; - - ;;; % THAI CHARACTER PO PLA - "";;; - "";;; - "";;; - "";;; - "";;; - - ;;; % THAI CHARACTER PHO PHUNG - "";;; - "";;; - "";;; - "";;; - "";;; - - ;;; % THAI CHARACTER FO FA - "";;; - "";;; - "";;; - "";;; - "";;; - - ;;; % THAI CHARACTER PHO PHAN - "";;; - "";;; - "";;; - "";;; - "";;; - - ;;; % THAI CHARACTER FO FAN - "";;; - "";;; - "";;; - "";;; - "";;; - - ;;; % THAI CHARACTER PHO SAMPHAO - "";;; - "";;; - "";;; - "";;; - "";;; - - ;;; % THAI CHARACTER MO MA - "";;; - "";;; - "";;; - "";;; - "";;; - - ;;; % THAI CHARACTER YO YAK - "";;; - "";;; - "";;; - "";;; - "";;; - - ;;; % THAI CHARACTER RO RUA - "";;; - "";;; - "";;; - "";;; - "";;; - - ;;; % THAI CHARACTER RU - - ;;; % THAI CHARACTER LO LING - "";;; - "";;; - "";;; - "";;; - "";;; - - ;;; % THAI CHARACTER LU - - ;;; % THAI CHARACTER WO WAEN - "";;; - "";;; - "";;; - "";;; - "";;; - - ;;; % THAI CHARACTER SO SALA - "";;; - "";;; - "";;; - "";;; - "";;; - - ;;; % THAI CHARACTER SO RUSI - "";;; - "";;; - "";;; - "";;; - "";;; - - ;;; % THAI CHARACTER SO SUA - "";;; - "";;; - "";;; - "";;; - "";;; - - ;;; % THAI CHARACTER HO HIP - "";;; - "";;; - "";;; - "";;; - "";;; - - ;;; % THAI CHARACTER LO CHULA - "";;; - "";;; - "";;; - "";;; - "";;; - - ;;; % THAI CHARACTER O ANG - "";;; - "";;; - "";;; - "";;; - "";;; - - ;;; % THAI CHARACTER HO NOKHUK - "";;; - "";;; - "";;; - "";;; - "";;; - - ;;; % THAI CHARACTER NIKHAHIT - -% order of Thai vowels - ;;; % THAI CHARACTER SARA A - ;;; % THAI CHARACTER MAI HAN-AKAT - ;;; % THAI CHARACTER SARA AA - ;;; % THAI CHARACTER LAKKHANGYAO - ;;; % THAI CHARACTER SARA AM - ;;; % THAI CHARACTER SARA I - ;;; % THAI CHARACTER SARA II - ;;; % THAI CHARACTER SARA UE - ;;; % THAI CHARACTER SARA UEE - ;;; % THAI CHARACTER SARA U - ;;; % THAI CHARACTER SARA UU - ;;; % THAI CHARACTER SARA E - ;;; % THAI CHARACTER SARA AE - ;;; % THAI CHARACTER SARA O - ;;; % THAI CHARACTER SARA AI MAIMUAN - ;;; % THAI CHARACTER SARA AI MAIMALAI - -order_end +reorder-end END LC_COLLATE diff --git a/localedata/th_TH.UTF-8.in b/localedata/th_TH.UTF-8.in new file mode 100644 index 0000000000..06263dda34 --- /dev/null +++ b/localedata/th_TH.UTF-8.in @@ -0,0 +1,163 @@ +* +. +๎ +์ +ฯ +๚ +๛ +ๆ +0 +๐ +0000 +๐๐๐๐ +10 +๑๐ +9 +๙ +9999 +๙๙๙๙ +a +A +๎A +์a +ฯä +๚a +๛ä +ๆa +b +B +กก +กรรม +กรรม์ +กราบ +กะเกณฑ์ +กัก +ก้าว +กำ +กิน +กี่ +กึ๋น +กุน +กูด +เก้ง +เกล้า +เกลียว +เก้า +เกาะ +เกี่ยว +เกี๊ยะ +เกือก +แกง +แกะ +โกน +โกร๋น +ใกล้ +ไก่ +ไกล +ข้น +ขนาบ +ขาง +ข่าง +ข้าง +ข้างๆ +ข้างกระดาน +ข้างขึ้น +ข้างควาย +ข้างๆ คูๆ +ข้างเงิน +ข้างแรม +ข้างออก +เข็ด +เขน +เข็น +เข่น +แข็ง +แข่ง +แข้ง +แข้งขวา +แข็งขัน +แข่งขัน +แขน +แขวะ +ฃวด +ครรภ- +ครรภ์ +ฅอ +งาม +จุมพล +จุํพล +ฉาก +ชาย +ซาบ +ญาณ +ฎีกา +ฐาน +ฑาหะ +เฒ่า +เณร +ดนตรี +ตลาด +ถนน +ทูลเกล้า +ทูลเกล้าฯ +ทูลเกล้าทูลกระหม่อม +ธนาคาร +น้า +น้ำ +นี้ +บุญญา +บุญหลง +ปา +ป่า +ป้า +ป๊า +ป๋า +ปาน +ป่าน +ป้าน +แป้ง +ผัด +ฝา +ฯพณฯ +พณิชย์ +ฟาง +ภาษี +ม้า +ย่อง +รอง +ฤทธิ์ +ฤษี +ฤๅษี +ลลิตา +ฦๅชา +วก +ศาล +ษมา +สกุล +หริภุญชัย +หฤทัย +หลง +แหง่ +แห่ง +แหนม +แหนหวง +แหบ +แหม +อาน +ฮา +ไฮโล +ํ +ํä +ะ +ะa +า +ๅ +ๅํ +ํๅ +ๅa +าä +าํ +ํา +ำ +ไ +ฺ