eccb780f34
X-SVN-Rev: 805
516 lines
21 KiB
HTML
516 lines
21 KiB
HTML
<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML//EN">
|
||
<html xmlns:v="urn:schemas-microsoft-com:vml"
|
||
xmlns:o="urn:schemas-microsoft-com:office:office"
|
||
xmlns:w="urn:schemas-microsoft-com:office:word"
|
||
xmlns="http://www.w3.org/TR/REC-html40">
|
||
|
||
<head>
|
||
<meta http-equiv=Content-Type content="text/html; charset=iso-8859-1">
|
||
<meta name=ProgId content=Word.Document>
|
||
<meta name=Generator content="Microsoft Word 9">
|
||
<meta name=Originator content="Microsoft Word 9">
|
||
<link rel=File-List href="./Collate_files/filelist.xml">
|
||
<link rel=Edit-Time-Data href="./Collate_files/editdata.mso">
|
||
<!--[if !mso]>
|
||
<style>
|
||
v\:* {behavior:url(#default#VML);}
|
||
o\:* {behavior:url(#default#VML);}
|
||
w\:* {behavior:url(#default#VML);}
|
||
.shape {behavior:url(#default#VML);}
|
||
</style>
|
||
<![endif]-->
|
||
<title>International Classes for Unicode - Collation</title>
|
||
<!--[if gte mso 9]><xml>
|
||
<o:DocumentProperties>
|
||
<o:Author>Helena Shih</o:Author>
|
||
<o:Template>Normal</o:Template>
|
||
<o:LastAuthor>Helena Shih</o:LastAuthor>
|
||
<o:Revision>2</o:Revision>
|
||
<o:TotalTime>0</o:TotalTime>
|
||
<o:Created>2000-01-15T02:20:00Z</o:Created>
|
||
<o:LastSaved>2000-01-15T02:20:00Z</o:LastSaved>
|
||
<o:Pages>4</o:Pages>
|
||
<o:Words>982</o:Words>
|
||
<o:Characters>5599</o:Characters>
|
||
<o:Company>IBM</o:Company>
|
||
<o:Lines>46</o:Lines>
|
||
<o:Paragraphs>11</o:Paragraphs>
|
||
<o:CharactersWithSpaces>6875</o:CharactersWithSpaces>
|
||
<o:Version>9.2720</o:Version>
|
||
</o:DocumentProperties>
|
||
</xml><![endif]-->
|
||
<style>
|
||
<!--
|
||
/* Style Definitions */
|
||
p.MsoNormal, li.MsoNormal, div.MsoNormal
|
||
{mso-style-parent:"";
|
||
margin:0in;
|
||
margin-bottom:.0001pt;
|
||
mso-pagination:widow-orphan;
|
||
font-size:12.0pt;
|
||
font-family:"Times New Roman";
|
||
mso-fareast-font-family:"Times New Roman";}
|
||
p
|
||
{font-size:12.0pt;
|
||
font-family:"Times New Roman";
|
||
mso-fareast-font-family:"Times New Roman";}
|
||
@page Section1
|
||
{size:8.5in 11.0in;
|
||
margin:1.0in 1.25in 1.0in 1.25in;
|
||
mso-header-margin:.5in;
|
||
mso-footer-margin:.5in;
|
||
mso-paper-source:0;}
|
||
div.Section1
|
||
{page:Section1;}
|
||
/* List Definitions */
|
||
@list l0
|
||
{mso-list-id:56786128;
|
||
mso-list-type:hybrid;
|
||
mso-list-template-ids:316935058 799580840 -2129604566 1894698424 -1886861812 1076558752 -1316478726 -1694838522 -1962102214 -432647774;}
|
||
@list l0:level1
|
||
{mso-level-number-format:bullet;
|
||
mso-level-text:\F0B7;
|
||
mso-level-tab-stop:.5in;
|
||
mso-level-number-position:left;
|
||
text-indent:-.25in;
|
||
mso-ansi-font-size:10.0pt;
|
||
font-family:Symbol;}
|
||
@list l1
|
||
{mso-list-id:218128614;
|
||
mso-list-type:hybrid;
|
||
mso-list-template-ids:726427572 -220806470 -535028662 -1139008068 926857880 19293176 -1336270008 -629910652 1961381030 -112671298;}
|
||
@list l1:level1
|
||
{mso-level-number-format:bullet;
|
||
mso-level-text:\F0B7;
|
||
mso-level-tab-stop:.5in;
|
||
mso-level-number-position:left;
|
||
text-indent:-.25in;
|
||
mso-ansi-font-size:10.0pt;
|
||
font-family:Symbol;}
|
||
@list l2
|
||
{mso-list-id:398596625;
|
||
mso-list-type:hybrid;
|
||
mso-list-template-ids:1581174674 -1457777898 -2089911548 -358428948 -724424496 486305342 -803443362 102692998 -557393154 -905049134;}
|
||
@list l3
|
||
{mso-list-id:399836585;
|
||
mso-list-type:hybrid;
|
||
mso-list-template-ids:-1362098886 105021812 -1203221970 402953214 116268298 1957075642 -2103935390 135310026 2024683000 -1150503632;}
|
||
@list l4
|
||
{mso-list-id:512963647;
|
||
mso-list-type:hybrid;
|
||
mso-list-template-ids:870891966 718416036 1606166768 -734915736 -457166330 1502492706 -472354846 -1617501690 -1298739844 -1263368320;}
|
||
@list l4:level1
|
||
{mso-level-number-format:bullet;
|
||
mso-level-text:\F0B7;
|
||
mso-level-tab-stop:.5in;
|
||
mso-level-number-position:left;
|
||
text-indent:-.25in;
|
||
mso-ansi-font-size:10.0pt;
|
||
font-family:Symbol;}
|
||
@list l4:level2
|
||
{mso-level-number-format:bullet;
|
||
mso-level-text:o;
|
||
mso-level-tab-stop:1.0in;
|
||
mso-level-number-position:left;
|
||
text-indent:-.25in;
|
||
mso-ansi-font-size:10.0pt;
|
||
font-family:"Courier New";
|
||
mso-bidi-font-family:"Times New Roman";}
|
||
@list l5
|
||
{mso-list-id:1228296673;
|
||
mso-list-type:hybrid;
|
||
mso-list-template-ids:-1126373434 672300220 -1390479766 -247709302 -1005183176 586437888 1683491832 2053804606 -1192436542 -1064926734;}
|
||
@list l5:level1
|
||
{mso-level-number-format:bullet;
|
||
mso-level-text:\F0B7;
|
||
mso-level-tab-stop:.5in;
|
||
mso-level-number-position:left;
|
||
text-indent:-.25in;
|
||
mso-ansi-font-size:10.0pt;
|
||
font-family:Symbol;}
|
||
@list l6
|
||
{mso-list-id:1292789779;
|
||
mso-list-type:hybrid;
|
||
mso-list-template-ids:-517678870 758258566 1452305524 223270358 -53846700 -513747388 -2068313458 1590586680 -1770905550 -1947821216;}
|
||
@list l6:level1
|
||
{mso-level-number-format:bullet;
|
||
mso-level-text:\F0B7;
|
||
mso-level-tab-stop:.5in;
|
||
mso-level-number-position:left;
|
||
text-indent:-.25in;
|
||
mso-ansi-font-size:10.0pt;
|
||
font-family:Symbol;}
|
||
@list l7
|
||
{mso-list-id:1316833148;
|
||
mso-list-type:hybrid;
|
||
mso-list-template-ids:1332113200 -424870104 1051208176 892489996 -1233066968 -1810460500 -1022461362 2044245910 -1736145250 -1083136974;}
|
||
@list l7:level1
|
||
{mso-level-number-format:bullet;
|
||
mso-level-text:\F0B7;
|
||
mso-level-tab-stop:.5in;
|
||
mso-level-number-position:left;
|
||
text-indent:-.25in;
|
||
mso-ansi-font-size:10.0pt;
|
||
font-family:Symbol;}
|
||
@list l8
|
||
{mso-list-id:1327392266;
|
||
mso-list-type:hybrid;
|
||
mso-list-template-ids:-1403883808 -326201746 -1498098978 273696058 1220037092 978733230 -1686724236 -2129999476 1994688346 -162764280;}
|
||
@list l8:level1
|
||
{mso-level-number-format:bullet;
|
||
mso-level-text:\F0B7;
|
||
mso-level-tab-stop:.5in;
|
||
mso-level-number-position:left;
|
||
text-indent:-.25in;
|
||
mso-ansi-font-size:10.0pt;
|
||
font-family:Symbol;}
|
||
@list l9
|
||
{mso-list-id:1662541158;
|
||
mso-list-type:hybrid;
|
||
mso-list-template-ids:-1664995996 -1721883820 -1345845536 314467622 -225125196 2134916302 970646716 2097594674 1329790124 -20539764;}
|
||
@list l9:level1
|
||
{mso-level-number-format:bullet;
|
||
mso-level-text:\F0B7;
|
||
mso-level-tab-stop:.5in;
|
||
mso-level-number-position:left;
|
||
text-indent:-.25in;
|
||
mso-ansi-font-size:10.0pt;
|
||
font-family:Symbol;}
|
||
@list l10
|
||
{mso-list-id:1800419391;
|
||
mso-list-type:hybrid;
|
||
mso-list-template-ids:-160151480 1490450160 1649709476 -912377276 -843295976 955542454 -958090392 1090130474 897190372 -1064773102;}
|
||
@list l10:level1
|
||
{mso-level-number-format:bullet;
|
||
mso-level-text:\F0B7;
|
||
mso-level-tab-stop:.5in;
|
||
mso-level-number-position:left;
|
||
text-indent:-.25in;
|
||
mso-ansi-font-size:10.0pt;
|
||
font-family:Symbol;}
|
||
@list l11
|
||
{mso-list-id:1804076982;
|
||
mso-list-type:hybrid;
|
||
mso-list-template-ids:26769352 -1273701986 819625222 -1355785140 1125281256 -561239646 -1265353568 721578390 882827536 -1507570576;}
|
||
ol
|
||
{margin-bottom:0in;}
|
||
ul
|
||
{margin-bottom:0in;}
|
||
-->
|
||
</style>
|
||
<!--[if gte mso 9]><xml>
|
||
<o:shapedefaults v:ext="edit" spidmax="1027"/>
|
||
</xml><![endif]--><!--[if gte mso 9]><xml>
|
||
<o:shapelayout v:ext="edit">
|
||
<o:idmap v:ext="edit" data="1"/>
|
||
</o:shapelayout></xml><![endif]-->
|
||
</head>
|
||
|
||
<body bgcolor=white lang=EN-US link=blue vlink=blue style='tab-interval:.5in'>
|
||
|
||
<div class=Section1>
|
||
|
||
<h1>International Components for Unicode</h1>
|
||
|
||
<h2>Collation Framework</h2>
|
||
|
||
|
||
<div class=MsoNormal align=center style='text-align:center'>
|
||
|
||
<hr size=2 width="100%" align=center>
|
||
|
||
</div>
|
||
|
||
|
||
<h3><u>Contents</u></h3>
|
||
|
||
<ul type=disc>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l4 level1 lfo1;tab-stops:list .5in'>What is collation?</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l4 level1 lfo1;tab-stops:list .5in'>The rule symbols and their
|
||
usage</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l4 level1 lfo1;tab-stops:list .5in'>Interesting Examples</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l4 level1 lfo1;tab-stops:list .5in'>Implementation Details</li>
|
||
<ul type=circle>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:
|
||
auto;mso-list:l4 level2 lfo1;tab-stops:list 1.0in'>Building the Collation
|
||
Table</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:
|
||
auto;mso-list:l4 level2 lfo1;tab-stops:list 1.0in'>Incremental Comparison
|
||
Diagram</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:
|
||
auto;mso-list:l4 level2 lfo1;tab-stops:list 1.0in'>Generating a Collation
|
||
Table</li>
|
||
</ul>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l4 level1 lfo1;tab-stops:list .5in'>Q and A</li>
|
||
</ul>
|
||
|
||
<h3><u>What is collation?</u></h3>
|
||
|
||
<p>Collation framework performs locale-sensitive string comparison. The user of
|
||
this class can use this class to build searching and sorting routines for
|
||
natural language text, build table of contents for large documentation or
|
||
create efficient index look up for database entries.<br>
|
||
<br>
|
||
The ICU Collator classes provides services to allow: </p>
|
||
|
||
<ul type=disc>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l10 level1 lfo2;tab-stops:list .5in'>Simple, data-driven, table
|
||
based collation.</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l10 level1 lfo2;tab-stops:list .5in'>Easily customizble for your
|
||
needs.</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l10 level1 lfo2;tab-stops:list .5in'>Merging different resources
|
||
made possible.</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l10 level1 lfo2;tab-stops:list .5in'>Behind the scene
|
||
transforming the ASCII data file into a binary file for efficiency.</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l10 level1 lfo2;tab-stops:list .5in'>Offering both incremental
|
||
comparison for simple comparison and collation keys for batch processes.</li>
|
||
</ul>
|
||
|
||
<p>There are 4 comparison levels in the Collator classes to allow different
|
||
levels of difference to be considered significant: </p>
|
||
|
||
<ul type=disc>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l6 level1 lfo3;tab-stops:list .5in'>Primary: a letter difference.
|
||
For example, 'a' and 'b'.</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l6 level1 lfo3;tab-stops:list .5in'>Secondary: an accent
|
||
difference. For example, '<27>' and '<27>'.</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l6 level1 lfo3;tab-stops:list .5in'>Tertiary: a case difference.
|
||
For example, 'a' and 'A'.</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l6 level1 lfo3;tab-stops:list .5in'>Identical: no difference. For
|
||
example, 'a' and 'a'.</li>
|
||
</ul>
|
||
|
||
<h3><u>The rule symbols and their usage</u></h3>
|
||
|
||
<p>A string is decomposed to be one or more collation elements when using with
|
||
the collation classes. The collation rules specify the order of these collation
|
||
elements. The collation table is composed of a list of collation rules, where
|
||
each rule is of three forms: </p>
|
||
|
||
<ol start=1 type=1>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l3 level1 lfo4;tab-stops:list .5in'><modifier></li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l3 level1 lfo4;tab-stops:list .5in'><relation>
|
||
<text-argument></li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l3 level1 lfo4;tab-stops:list .5in'><reset>
|
||
<text-argument1> <relation> <text-argument2></li>
|
||
</ol>
|
||
|
||
<h4><modifier></h4>
|
||
|
||
<ul type=disc>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l7 level1 lfo5;tab-stops:list .5in'>'@': French secondary, accent
|
||
weights sorted backwards.</li>
|
||
</ul>
|
||
|
||
<h4><text-argument></h4>
|
||
|
||
<p>A text-argument is any sequence of characters, excluding special characters
|
||
(that is, common whitespace characters [0009-000D, 0020] and rule syntax
|
||
characters [0021-002F, 003A-0040, 005B-0060, 007B-007E]). If those characters
|
||
are desired, you can put them in single quotes (e.g. ampersand => '&').
|
||
Note that unquoted white space characters are ignored; e.g. "b c" is
|
||
treated as "bc".</p>
|
||
|
||
<h4><relation></h4>
|
||
|
||
<ul type=disc>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l9 level1 lfo6;tab-stops:list .5in'>'<' : Greater, as a letter
|
||
difference (primary)</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l9 level1 lfo6;tab-stops:list .5in'>';' : Greater, as an accent
|
||
difference (secondary)</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l9 level1 lfo6;tab-stops:list .5in'>',' : Greater, as a case
|
||
difference (tertiary)</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l9 level1 lfo6;tab-stops:list .5in'>'=' : Equal</li>
|
||
</ul>
|
||
|
||
<h4><reset></h4>
|
||
|
||
<ul type=disc>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l0 level1 lfo7;tab-stops:list .5in'>'&': Indicates that
|
||
text-argument2 follows the position to where the reset text-argument1
|
||
would be sorted.</li>
|
||
</ul>
|
||
|
||
<h3><u>Interesting Examples</u></h3>
|
||
|
||
<p>The following is a list of interesting examples of the rules and some string
|
||
comparison results using those rules. The comparison relation will be denoted
|
||
as "<" of primary difference of less than, "<<" of
|
||
secondary difference of less than, "<<<" of teriatry
|
||
difference of less than and "==" of equal to relationships: </p>
|
||
|
||
<ul type=disc>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l1 level1 lfo8;tab-stops:list .5in'>Rule " a, A < b, B
|
||
< c, C < ch, cH, Ch, CH < d, D < e, E": this rule simply
|
||
says, sorts letters 'a', 'b', 'c', 'd' and 'e' in that order with primary
|
||
weights. 'ch' is sorted as a significant letter between 'c' and 'd' with
|
||
primary weights and upper cased letters sorts after lower cased letters
|
||
with tertiary weights. For example, "abc" <<<
|
||
"ABC" and "achb" < "adb".</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l1 level1 lfo8;tab-stops:list .5in'>Rule " a, A < b, B
|
||
< c, C < d, D < e, E & AE; <20> ": this will sort letters
|
||
'a', 'b', 'c', 'd' and 'e' in that order with primary weights. '<27>' will
|
||
sort as with a secondary less than to the sequence of 'A' following 'E'.
|
||
For example, "aeb" << "<EFBFBD>b" and "acb"
|
||
< "<EFBFBD>b".</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l1 level1 lfo8;tab-stops:list .5in'>Rule ".... q, Q &
|
||
Question'-'mark = '?' ....": the rule shows how to sort symbols to be
|
||
equivalent to the corrsponding text. In this example, "?" ==
|
||
"Question-mark". Note that the special symbols need to be quoted
|
||
in the rule.</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l1 level1 lfo8;tab-stops:list .5in'>Rule ".... & aa ; a-
|
||
& ee ; e- & ii ; i- & oo ; o- & uu ; u- ....": this
|
||
rule demonstrates how to specify prolonged vowels in Japanese. In this
|
||
case, "aa" is sorted as with a secondary less than to
|
||
"a-". For example, "baab" << "ba-b".</li>
|
||
</ul>
|
||
|
||
<h3><u>Implementation Details</u></h3>
|
||
|
||
<p>Three parts of the code will be carefully examined here: </p>
|
||
|
||
<ul type=disc>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l5 level1 lfo9;tab-stops:list .5in'>Building the collation rule
|
||
table. (see mergecol.cpp, ptnentry.cpp and tblcoll.cpp)</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l5 level1 lfo9;tab-stops:list .5in'>Incremental comparison
|
||
algorithm for simple string comparison. (RuleBasedCollator.compare() in
|
||
tblcoll.cpp)</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l5 level1 lfo9;tab-stops:list .5in'>Collation key generation and
|
||
its format. (RuleBasedCollator.getCollationKey() in tblcoll.cpp)</li>
|
||
</ul>
|
||
|
||
<h3><u>Building the Collation Table</u></h3>
|
||
|
||
<p>The process of building a collation table is as following: </p>
|
||
|
||
<ul type=disc>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l8 level1 lfo10;tab-stops:list .5in'>Parse the rule text into a
|
||
list of pattern entries. Each pattern has the content of current core
|
||
characters, extension character and the strength relation. (In
|
||
ptnentry.cpp)</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l8 level1 lfo10;tab-stops:list .5in'>Inserts each entry at the
|
||
correct position based on the <reset> arguements. (In mergecol.cpp)</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l8 level1 lfo10;tab-stops:list .5in'>Build the compacted, highly
|
||
efficient look-up table based on the list of pattern entries. (In
|
||
tblcoll.cpp)</li>
|
||
</ul>
|
||
|
||
<p> </p>
|
||
|
||
<h3><u>Incremental Comparison Diagram</u></h3>
|
||
|
||
<p> </p>
|
||
|
||
<p><img width=468 height=800 id="_x0000_i1026" src=collflow.gif></p>
|
||
|
||
<h3><u>Generating a Collation Key</u></h3>
|
||
|
||
<p>The control flow of generating a collation key is as the following: </p>
|
||
|
||
<ol start=1 type=1>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l2 level1 lfo11;tab-stops:list .5in'>Retrieve the next collation
|
||
element of the source string. Go to step 5 when reaches the end of string.</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l2 level1 lfo11;tab-stops:list .5in'>Append the primary weight of
|
||
element to the primary weight buffer.</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l2 level1 lfo11;tab-stops:list .5in'>Checks if it's necessary to
|
||
process secondary weights. If so, append the secondary weights to the
|
||
secondary weight buffer. If the collator is marked to process French
|
||
secondary, reverse the order of all the secondary weights before encounters
|
||
the next primary weight.</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l2 level1 lfo11;tab-stops:list .5in'>Checks if it's necessary to
|
||
process tertiary weights. If so, append the tertiary weights to the
|
||
tertiary weight buffer. </li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l2 level1 lfo11;tab-stops:list .5in'>Concatenate the primary
|
||
weight buffer, secondary weight buffer and tertiary weight buffer and add
|
||
a null delimiter among the weights. Return the concatenated buffer as the
|
||
collation key.</li>
|
||
</ol>
|
||
|
||
<h3><u>Q & A</u></h3>
|
||
|
||
<ol start=1 type=1>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l11 level1 lfo12;tab-stops:list .5in'>How do I customize the
|
||
collation sequence?<br>
|
||
A: Using the RuleBasedCollator constructor, the user of the collation
|
||
framework can then create his/her own Collator with a customized rule.</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l11 level1 lfo12;tab-stops:list .5in'>Will the collation framwork
|
||
support the surrogate and private use characters?<br>
|
||
A: It's part of our future work items. However, no firm schedule has
|
||
been set for this yet.</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l11 level1 lfo12;tab-stops:list .5in'>How does the French
|
||
secondary turn-on affect the generation of collation key?<br>
|
||
A: In French, the secondary differences are sorted backwards so this will
|
||
invoke the collation key to reverse the secondary weights in the keys.</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l11 level1 lfo12;tab-stops:list .5in'>Is there any support for
|
||
composing characters? If so, how does it work?<br>
|
||
A: Yes, it is based on the Normalizer interface. When a expanding
|
||
character is detected, the rule builder will construct collation entries
|
||
for the precomposed version internally to handle the composed characters
|
||
correctly.</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l11 level1 lfo12;tab-stops:list .5in'>Is there any plan for
|
||
performance improvement, for instance, contracting/expanding character
|
||
lookup?<br>
|
||
A: Yes, the performance enhancement is an ongoing work item.</li>
|
||
</ol>
|
||
|
||
<p> </p>
|
||
|
||
<p><a href="../readme.html">ReadMe for </a><a href="../readme.html#API">International
|
||
Components for Unicode</a></p>
|
||
|
||
|
||
<div class=MsoNormal align=center style='text-align:center'>
|
||
|
||
<hr size=2 width="100%" align=center>
|
||
|
||
</div>
|
||
|
||
</div>
|
||
|
||
</body>
|
||
|
||
</html>
|