mirror of
https://sourceware.org/git/glibc.git
synced 2024-11-22 04:50:07 +00:00
aceda10bd5
I made it to agree as much as possible with the rules from CLDR (see: https://github.com/unicode-org/cldr/blob/main/common/collation/th.xml). It seems to be impossible to follow the CLDR rules &[before 1]๚<ฯ # should be "variable" and &๛<ๆ # should be "variable" exactly though. These ask for a primary difference in punctuation characters whose primary weight should be "IGNORE". But using a secondary differnence instead still sorts the test data correctly and the previously used collation in th_TH used tertiary differences for these characters. There was old localedata/th_TH.in test data in TIS-620 encoding which was not used (it was not in the localedata/Makefile). I converted this to UTF-8 and moved it to localedata/th_TH.UTF-8.in and added it to localedata/Makefile. Using the existing collation rules in the th_TH locale did not sort that test file completely correct, I think my new collation rules based on iso14651_t1 are better.
296 lines
9.7 KiB
Plaintext
296 lines
9.7 KiB
Plaintext
comment_char %
|
|
escape_char /
|
|
|
|
% This file is part of the GNU C Library and contains locale data.
|
|
% The Free Software Foundation does not claim any copyright interest
|
|
% in the locale data contained in this file. The foregoing does not
|
|
% affect the license of the GNU C Library as a whole. It does not
|
|
% exempt you from the conditions of the license if your use would
|
|
% otherwise be governed by that license.
|
|
|
|
% Thailand (Thai) locale
|
|
%
|
|
% Name: th_TH
|
|
% Author: Theppitak Karoonboonyanan
|
|
% Contact: Software and Language Engineering Laboratory (SLL)
|
|
% National Electronics and Computer Technology Center (NECTEC)
|
|
% National Science and Technology Development Agency (NSTDA)
|
|
% Ministry of Science Technology and Environment
|
|
% 22nd Floor, Gypsum Metropilitan Tower
|
|
% Sri Ayuthaya Rd, Ratchathewee, Bangkok 10400
|
|
% THAILAND.
|
|
% E-mail: thep@links.nectec.or.th
|
|
% Language: Thai
|
|
% Territory: Thailand
|
|
% Revision: 0.5.3
|
|
% Date: 1999-05-28
|
|
|
|
LC_IDENTIFICATION
|
|
title "Thai locale for Thailand"
|
|
source ""
|
|
address ""
|
|
contact ""
|
|
email "bug-glibc-locales@gnu.org"
|
|
tel ""
|
|
fax ""
|
|
language "Thai"
|
|
territory "Thailand"
|
|
revision "1.0"
|
|
date "2000-06-29"
|
|
|
|
category "i18n:2012";LC_IDENTIFICATION
|
|
category "i18n:2012";LC_CTYPE
|
|
category "i18n:2012";LC_COLLATE
|
|
category "i18n:2012";LC_TIME
|
|
category "i18n:2012";LC_NUMERIC
|
|
category "i18n:2012";LC_MONETARY
|
|
category "i18n:2012";LC_MESSAGES
|
|
category "i18n:2012";LC_PAPER
|
|
category "i18n:2012";LC_NAME
|
|
category "i18n:2012";LC_ADDRESS
|
|
category "i18n:2012";LC_TELEPHONE
|
|
category "i18n:2012";LC_MEASUREMENT
|
|
END LC_IDENTIFICATION
|
|
|
|
LC_CTYPE
|
|
copy "i18n"
|
|
|
|
translit_start
|
|
include "translit_combining";""
|
|
translit_end
|
|
END LC_CTYPE
|
|
|
|
LC_COLLATE
|
|
|
|
% Copy the template from ISO/IEC 14651
|
|
copy "iso14651_t1"
|
|
|
|
% CLDR collation rules for Thai:
|
|
% (see: https://github.com/unicode-org/cldr/blob/main/common/collation/th.xml)
|
|
%
|
|
%[normalization on]
|
|
%[alternate shifted]
|
|
%[reorder Thai]
|
|
% #
|
|
% # The following tailoring is an adjustment of the
|
|
% # DUCET collation order for PAIYANNOI, MAIYAMOK,
|
|
% # NIKHAHIT, LAKKHANGYAO, and PHINTHU. This gives
|
|
% # a sort order as defined in the Royal Institute
|
|
% # Dictionary 2542 B.E. Edition (1999 A.D.).
|
|
% #
|
|
% &[before 1]๚<ฯ # should be "variable"
|
|
%
|
|
% &๛<ๆ # should be "variable"
|
|
%
|
|
% &๎<<์
|
|
% &[before 1]ะ<ํ
|
|
% &า<<<ๅ
|
|
% &าํ<<<ํา<<<ำ
|
|
% &ๅํ<<<ํๅ
|
|
% &ไ<ฺ
|
|
% # consider: order pali virama as secondary different from yammacan (another old virama)
|
|
% # &๎
|
|
% # <<ฺ
|
|
% #
|
|
|
|
collating-element <U0E32_0E4D> from "<U0E32><U0E4D>"
|
|
% This is already defined in iso14651_t1:
|
|
% collating-element <U0E4D_0E32> from "<U0E4D><U0E32>" % decomposition of THAI CHARACTER SARA AM
|
|
|
|
collating-element <U0E45_0E4D> from "<U0E45><U0E4D>" % LAKKHANGYAO + NIKHAHIT
|
|
collating-element <U0E4D_0E45> from "<U0E4D><U0E45>" % NIKHAHIT + LAKKHANGYAO
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
% Finished defining collating-elements and collating-symbols
|
|
%
|
|
% One dummy reorder-after statement here to avoid a syntax error
|
|
% because the first rule reordering stuff starts without a reorder-after:
|
|
collating-symbol <dummy>
|
|
reorder-after <S002E> % FULL STOP
|
|
<dummy>
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
|
|
% &[before 1]๚<ฯ # should be "variable"
|
|
% ๚ U+0E5A should keep "IGNORE" as the primary weight (as defined in iso14651_t1_common).
|
|
% Therefore, I cannot sort ฯ U+0E2F before ๚ U+0E5A as a primary difference.
|
|
% Sorting it before as a secondary difference works though. To sort the existing test data
|
|
% in the correct order, this seems good enough. The previous collation in
|
|
% this th_TH locale, which did not use 'copy "iso14651_t1"' had these characters
|
|
% as a tertinary difference:
|
|
% <U0E2F> IGNORE;IGNORE;<U0E2F>;IGNORE % THAI CHARACTER PAIYANNOI
|
|
% <U0E5A> IGNORE;IGNORE;<U0E5A>;IGNORE % THAI CHARACTER ANGKHANKHU
|
|
<U0E2F> IGNORE;"<U0E5A><VRNT1>";IGNORE;<U0E2F> % ฯ THAI CHARACTER PAIYANNOI
|
|
<U0E5A> IGNORE;"<U0E5A><VRNT2>";IGNORE;<U0E5A> % ๚ THAI CHARACTER ANGKHANKHU
|
|
% &๛<ๆ # should be "variable"
|
|
% ๛ U+0E5B should keep "IGNORE" as the primary weight (as defined in iso14651_t1_common).
|
|
% Therefore I cannot sort ๆ U+0E46 after ๛ U+0E5B as a primary difference.
|
|
% Sorting it after as a secondary differnce works though and it seems good enough
|
|
% to sort the existing test data in the correct order. The previous collation in
|
|
% this th_TH locale, which did not use 'copy "iso14651_t1"' had these characters
|
|
% as a tertinary difference:
|
|
% <U0E46> IGNORE;IGNORE;<U0E46>;IGNORE % THAI CHARACTER MAIYAMOK
|
|
% <U0E5B> IGNORE;IGNORE;<U0E5B>;IGNORE % THAI CHARACTER KHOMUT
|
|
<U0E5B> IGNORE;"<U0E5B><VRNT1>";IGNORE;<U0E5B> % ๛ THAI CHARACTER KHOMUT
|
|
<U0E46> IGNORE;"<U0E5B><VRNT2>";IGNORE;<U0E46> % ๆ THAI CHARACTER MAIYAMOK
|
|
% &๎<<์
|
|
<U0E4E> IGNORE;<D0E4E>;IGNORE;<U0E4E> % ๎ THAI CHARACTER YAMAKKAN
|
|
<U0E4C> IGNORE;<D0E4C>;IGNORE;<U0E4C> % ์ THAI CHARACTER THANTHAKHAT
|
|
% &[before 1]ะ<ํ
|
|
<U0E4D> "<S0E30><VRNT1>";<BASE>;<MIN>;<U0E4D> % ํ THAI CHARACTER NIKHAHIT
|
|
<U0E30> "<S0E30><VRNT2>";<BASE>;<MIN>;<U0E30> % ะ THAI CHARACTER SARA A
|
|
% &า<<<ๅ
|
|
<U0E32> <S0E32>;<BASE>;<MIN>;<U0E32> % า THAI CHARACTER SARA AA
|
|
<U0E45> <S0E32>;<BASE>;<COMPAT>;<U0E45> % ๅ THAI CHARACTER LAKKHANGYAO
|
|
% &าํ<<<ํา<<<ำ
|
|
<U0E32_0E4D> <S0E33>;<BASE>;<MIN>;<U0E33> % าํ decomposition of THAI CHARACTER SARA AM
|
|
<U0E4D_0E32> <S0E33>;<BASE>;<CAP>;<U0E33> % ํา decomposition of THAI CHARACTER SARA AM
|
|
<U0E33> <S0E33>;<BASE>;<MAX>;<U0E33> % ำ THAI CHARACTER SARA AM
|
|
% &ๅํ<<<ํๅ
|
|
<U0E45_0E4D> <S0E32>;<BASE>;<CAP>;<U0E45> % LAKKHANGYAO + NIKHAHIT
|
|
<U0E4D_0E45> <S0E32>;<BASE>;<MAX>;<U0E45> % NIKHAHIT + LAKKHANGYAO
|
|
% &ไ<ฺ
|
|
reorder-after <S0E44>
|
|
<S0E3A>
|
|
|
|
reorder-end
|
|
|
|
END LC_COLLATE
|
|
|
|
LC_MONETARY
|
|
|
|
int_curr_symbol "THB "
|
|
currency_symbol "<U0E3F>"
|
|
mon_decimal_point "."
|
|
mon_thousands_sep ","
|
|
mon_grouping 3
|
|
positive_sign ""
|
|
negative_sign "-"
|
|
int_frac_digits 2
|
|
frac_digits 2
|
|
p_cs_precedes 1
|
|
p_sep_by_space 2
|
|
n_cs_precedes 1
|
|
n_sep_by_space 2
|
|
p_sign_posn 4
|
|
n_sign_posn 4
|
|
|
|
END LC_MONETARY
|
|
|
|
LC_NUMERIC
|
|
|
|
decimal_point "."
|
|
thousands_sep ","
|
|
grouping 3
|
|
|
|
END LC_NUMERIC
|
|
|
|
LC_TIME
|
|
|
|
abday "<U0E2D><U0E32>.";"<U0E08>.";"<U0E2D>.";/
|
|
"<U0E1E>.";"<U0E1E><U0E24>.";"<U0E28>.";/
|
|
"<U0E2A>."
|
|
day "<U0E2D><U0E32><U0E17><U0E34><U0E15><U0E22><U0E4C>";/
|
|
"<U0E08><U0E31><U0E19><U0E17><U0E23><U0E4C>";/
|
|
"<U0E2D><U0E31><U0E07><U0E04><U0E32><U0E23>";/
|
|
"<U0E1E><U0E38><U0E18>";/
|
|
"<U0E1E><U0E24><U0E2B><U0E31><U0E2A><U0E1A><U0E14><U0E35>";/
|
|
"<U0E28><U0E38><U0E01><U0E23><U0E4C>";/
|
|
"<U0E40><U0E2A><U0E32><U0E23><U0E4C>"
|
|
abmon "<U0E21>.<U0E04>.";/
|
|
"<U0E01>.<U0E1E>.";/
|
|
"<U0E21><U0E35>.<U0E04>.";/
|
|
"<U0E40><U0E21>.<U0E22>.";/
|
|
"<U0E1E>.<U0E04>.";/
|
|
"<U0E21><U0E34>.<U0E22>.";/
|
|
"<U0E01>.<U0E04>.";/
|
|
"<U0E2A>.<U0E04>.";/
|
|
"<U0E01>.<U0E22>.";/
|
|
"<U0E15>.<U0E04>.";/
|
|
"<U0E1E>.<U0E22>.";/
|
|
"<U0E18>.<U0E04>."
|
|
mon "<U0E21><U0E01><U0E23><U0E32><U0E04><U0E21>";/
|
|
"<U0E01><U0E38><U0E21><U0E20><U0E32><U0E1E><U0E31><U0E19><U0E18><U0E4C>";/
|
|
"<U0E21><U0E35><U0E19><U0E32><U0E04><U0E21>";/
|
|
"<U0E40><U0E21><U0E29><U0E32><U0E22><U0E19>";/
|
|
"<U0E1E><U0E24><U0E29><U0E20><U0E32><U0E04><U0E21>";/
|
|
"<U0E21><U0E34><U0E16><U0E38><U0E19><U0E32><U0E22><U0E19>";/
|
|
"<U0E01><U0E23><U0E01><U0E0E><U0E32><U0E04><U0E21>";/
|
|
"<U0E2A><U0E34><U0E07><U0E2B><U0E32><U0E04><U0E21>";/
|
|
"<U0E01><U0E31><U0E19><U0E22><U0E32><U0E22><U0E19>";/
|
|
"<U0E15><U0E38><U0E25><U0E32><U0E04><U0E21>";/
|
|
"<U0E1E><U0E24><U0E28><U0E08><U0E34><U0E01><U0E32><U0E22><U0E19>";/
|
|
"<U0E18><U0E31><U0E19><U0E27><U0E32><U0E04><U0E21>"
|
|
% Appropriate date & time representation
|
|
d_t_fmt "%a %e %b %Ey, %H:%M:%S"
|
|
% Appropriate date representation
|
|
d_fmt "%d//%m//%Ey"
|
|
% Appropriate time representation
|
|
t_fmt "%H:%M:%S"
|
|
% AM/PM signs
|
|
am_pm "AM";"PM"
|
|
% Appropriate 12-hour clock representation
|
|
t_fmt_ampm "%I:%M:%S %p"
|
|
% Era : Buddhist Era
|
|
era "+:1:-543//01//01:+*:<U0E1E>.<U0E28>.:%EC %Ey"
|
|
era_d_fmt "%e %b %Ey"
|
|
era_t_fmt "%H.%M.%S <U0E19>."
|
|
era_d_t_fmt "<U0E27><U0E31><U0E19>%A<U0E17><U0E35><U0E48> %e %B %EC %Ey, %H.%M.%S <U0E19>."
|
|
% Appropriate date representation (date(1))
|
|
date_fmt "%a %e %b %Ey %H:%M:%S %Z"
|
|
week 7;19971130;1
|
|
END LC_TIME
|
|
|
|
LC_MESSAGES
|
|
|
|
% yesexpr : begins with "y", "Y", or CHO CHANG
|
|
% noexpr : begins with "n", "N", or MO MA
|
|
% yesstr = "Chai2" = MAIMUAN + CHO CHANG + MAI EK
|
|
% nostr = "Mai2Chai2" = MAIMALAI + MO MA + MAI EK + MAIMUAN + CHO CHANG + MAI EK
|
|
|
|
yesexpr "^[+1yY<U0E0A>]"
|
|
noexpr "^[-0nN<U0E21>]"
|
|
yesstr "<U0E43><U0E0A><U0E48>"
|
|
nostr "<U0E44><U0E21><U0E48><U0E43><U0E0A><U0E48>"
|
|
|
|
END LC_MESSAGES
|
|
|
|
LC_PAPER
|
|
copy "i18n"
|
|
END LC_PAPER
|
|
|
|
LC_TELEPHONE
|
|
tel_int_fmt "+%c %a %l"
|
|
tel_dom_fmt "0-%a%l"
|
|
int_select "001"
|
|
int_prefix "66"
|
|
END LC_TELEPHONE
|
|
|
|
LC_MEASUREMENT
|
|
copy "i18n"
|
|
END LC_MEASUREMENT
|
|
|
|
LC_NAME
|
|
name_fmt "%d%t%g%t%m%t%f"
|
|
name_gen "<U0E04><U0E38><U0E13>"
|
|
name_miss "<U0E19><U0E32><U0E07><U0E2A><U0E32><U0E27>"
|
|
name_mr "<U0E19><U0E32><U0E22>"
|
|
name_mrs "<U0E19><U0E32><U0E07>"
|
|
END LC_NAME
|
|
|
|
LC_ADDRESS
|
|
postal_fmt "%f%N%a%N%d%N%r%t%e%t%b%N%h%t%s%N%T%N%S%N%z%c%N"
|
|
country_name "<U0E44><U0E17><U0E22>"
|
|
%FIXME
|
|
%country_post ""
|
|
country_ab2 "TH"
|
|
country_ab3 "THA"
|
|
%country_num "764"
|
|
country_car "T"
|
|
%FIXME
|
|
%country_isbn ""
|
|
lang_name "<U0E44><U0E17><U0E22>"
|
|
lang_ab "th"
|
|
lang_term "tha"
|
|
lang_lib "tha"
|
|
country_num 764
|
|
END LC_ADDRESS
|