5f94ba6df9
X-SVN-Rev: 804
1536 lines
81 KiB
HTML
1536 lines
81 KiB
HTML
<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
|
||
<html xmlns:v="urn:schemas-microsoft-com:vml"
|
||
xmlns:o="urn:schemas-microsoft-com:office:office"
|
||
xmlns:w="urn:schemas-microsoft-com:office:word"
|
||
xmlns="http://www.w3.org/TR/REC-html40">
|
||
|
||
<head>
|
||
<meta http-equiv=Content-Type content="text/html; charset=iso-8859-1">
|
||
<meta name=ProgId content=Word.Document>
|
||
<meta name=Generator content="Microsoft Word 9">
|
||
<meta name=Originator content="Microsoft Word 9">
|
||
<link rel=File-List href="./readme_files/filelist.xml">
|
||
<link rel=Edit-Time-Data href="./readme_files/editdata.mso">
|
||
<!--[if !mso]>
|
||
<style>
|
||
v\:* {behavior:url(#default#VML);}
|
||
o\:* {behavior:url(#default#VML);}
|
||
w\:* {behavior:url(#default#VML);}
|
||
.shape {behavior:url(#default#VML);}
|
||
</style>
|
||
<![endif]-->
|
||
<title>ReadMe for ICU</title>
|
||
<!--[if gte mso 9]><xml>
|
||
<o:DocumentProperties>
|
||
<o:Author>Helena Shih</o:Author>
|
||
<o:Template>Normal</o:Template>
|
||
<o:LastAuthor>Helena Shih</o:LastAuthor>
|
||
<o:Revision>4</o:Revision>
|
||
<o:TotalTime>28</o:TotalTime>
|
||
<o:Created>2000-02-01T18:26:00Z</o:Created>
|
||
<o:LastSaved>2000-02-09T20:02:00Z</o:LastSaved>
|
||
<o:Pages>11</o:Pages>
|
||
<o:Words>5177</o:Words>
|
||
<o:Characters>29510</o:Characters>
|
||
<o:Company>IBM</o:Company>
|
||
<o:Lines>245</o:Lines>
|
||
<o:Paragraphs>59</o:Paragraphs>
|
||
<o:CharactersWithSpaces>36240</o:CharactersWithSpaces>
|
||
<o:Version>9.2720</o:Version>
|
||
</o:DocumentProperties>
|
||
</xml><![endif]-->
|
||
<style>
|
||
<!--
|
||
/* Style Definitions */
|
||
p.MsoNormal, li.MsoNormal, div.MsoNormal
|
||
{mso-style-parent:"";
|
||
margin:0in;
|
||
margin-bottom:.0001pt;
|
||
mso-pagination:widow-orphan;
|
||
font-size:12.0pt;
|
||
font-family:"Times New Roman";
|
||
mso-fareast-font-family:"Times New Roman";}
|
||
a:link, span.MsoHyperlink
|
||
{color:blue;
|
||
text-decoration:underline;
|
||
text-underline:single;}
|
||
a:visited, span.MsoHyperlinkFollowed
|
||
{color:purple;
|
||
text-decoration:underline;
|
||
text-underline:single;}
|
||
p
|
||
{margin-right:0in;
|
||
mso-margin-top-alt:auto;
|
||
mso-margin-bottom-alt:auto;
|
||
margin-left:0in;
|
||
mso-pagination:widow-orphan;
|
||
font-size:12.0pt;
|
||
font-family:"Times New Roman";
|
||
mso-fareast-font-family:"Times New Roman";}
|
||
pre
|
||
{margin:0in;
|
||
margin-bottom:.0001pt;
|
||
mso-pagination:widow-orphan;
|
||
tab-stops:45.8pt 91.6pt 137.4pt 183.2pt 229.0pt 274.8pt 320.6pt 366.4pt 412.2pt 458.0pt 503.8pt 549.6pt 595.4pt 641.2pt 687.0pt 732.8pt;
|
||
font-size:10.0pt;
|
||
font-family:"Courier New";
|
||
mso-fareast-font-family:"Courier New";}
|
||
@page Section1
|
||
{size:8.5in 11.0in;
|
||
margin:1.0in 1.25in 1.0in 1.25in;
|
||
mso-header-margin:.5in;
|
||
mso-footer-margin:.5in;
|
||
mso-paper-source:0;}
|
||
div.Section1
|
||
{page:Section1;}
|
||
/* List Definitions */
|
||
@list l0
|
||
{mso-list-id:47188065;
|
||
mso-list-type:hybrid;
|
||
mso-list-template-ids:1466486202 116280834 552215838 906805068 1521137622 401357620 -1378838014 -511129794 450771094 1606707228;}
|
||
@list l1
|
||
{mso-list-id:117913232;
|
||
mso-list-type:hybrid;
|
||
mso-list-template-ids:-450314456 413984474 -860879584 458148388 1916146286 1538412862 863017768 -300143966 -55386208 -1462483600;}
|
||
@list l1:level1
|
||
{mso-level-number-format:bullet;
|
||
mso-level-text:\F0B7;
|
||
mso-level-tab-stop:.5in;
|
||
mso-level-number-position:left;
|
||
text-indent:-.25in;
|
||
mso-ansi-font-size:10.0pt;
|
||
font-family:Symbol;}
|
||
@list l2
|
||
{mso-list-id:283392397;
|
||
mso-list-type:hybrid;
|
||
mso-list-template-ids:221802328 -258441084 -934746342 1419537734 -1901567532 1981813236 287487774 2029678266 1193441118 -320863984;}
|
||
@list l3
|
||
{mso-list-id:568229750;
|
||
mso-list-type:hybrid;
|
||
mso-list-template-ids:1260177520 158890492 503090804 -1695902632 1465945538 -935272554 -2049028 941654354 -1871965178 -1305592270;}
|
||
@list l3:level1
|
||
{mso-level-number-format:bullet;
|
||
mso-level-text:\F0B7;
|
||
mso-level-tab-stop:.5in;
|
||
mso-level-number-position:left;
|
||
text-indent:-.25in;
|
||
mso-ansi-font-size:10.0pt;
|
||
font-family:Symbol;}
|
||
@list l4
|
||
{mso-list-id:576742058;
|
||
mso-list-type:hybrid;
|
||
mso-list-template-ids:-715868556 -132768664 498237874 598241330 -1517910766 -1750947490 714492842 1225816080 -2059080712 393093814;}
|
||
@list l4:level1
|
||
{mso-level-number-format:bullet;
|
||
mso-level-text:\F0B7;
|
||
mso-level-tab-stop:.5in;
|
||
mso-level-number-position:left;
|
||
text-indent:-.25in;
|
||
mso-ansi-font-size:10.0pt;
|
||
font-family:Symbol;}
|
||
@list l5
|
||
{mso-list-id:664672491;
|
||
mso-list-type:hybrid;
|
||
mso-list-template-ids:1772678752 2062991906 446980294 -161209628 -100250862 2128757756 2052650808 -538035208 -6270306 291808606;}
|
||
@list l5:level1
|
||
{mso-level-number-format:bullet;
|
||
mso-level-text:\F0B7;
|
||
mso-level-tab-stop:.5in;
|
||
mso-level-number-position:left;
|
||
text-indent:-.25in;
|
||
mso-ansi-font-size:10.0pt;
|
||
font-family:Symbol;}
|
||
@list l6
|
||
{mso-list-id:836961509;
|
||
mso-list-type:hybrid;
|
||
mso-list-template-ids:-224895454 -1014200752 826189550 -2031309554 1889069730 -609042030 1875430626 1998766280 1070782366 1076102286;}
|
||
@list l6:level1
|
||
{mso-level-number-format:bullet;
|
||
mso-level-text:\F0B7;
|
||
mso-level-tab-stop:.5in;
|
||
mso-level-number-position:left;
|
||
text-indent:-.25in;
|
||
mso-ansi-font-size:10.0pt;
|
||
font-family:Symbol;}
|
||
@list l7
|
||
{mso-list-id:913508786;
|
||
mso-list-type:hybrid;
|
||
mso-list-template-ids:1568069976 -1959095566 -768301796 -931790726 -2143009482 1948575304 -712632468 -697775276 -403279512 968792852;}
|
||
@list l7:level1
|
||
{mso-level-number-format:bullet;
|
||
mso-level-text:\F0B7;
|
||
mso-level-tab-stop:.5in;
|
||
mso-level-number-position:left;
|
||
text-indent:-.25in;
|
||
mso-ansi-font-size:10.0pt;
|
||
font-family:Symbol;}
|
||
@list l7:level2
|
||
{mso-level-number-format:bullet;
|
||
mso-level-text:o;
|
||
mso-level-tab-stop:1.0in;
|
||
mso-level-number-position:left;
|
||
text-indent:-.25in;
|
||
mso-ansi-font-size:10.0pt;
|
||
font-family:"Courier New";
|
||
mso-bidi-font-family:"Times New Roman";}
|
||
@list l8
|
||
{mso-list-id:991719643;
|
||
mso-list-type:hybrid;
|
||
mso-list-template-ids:2129591196 1460462400 -1172699750 1169610458 -437120214 1872653486 -842923700 841514964 2136911810 1364103300;}
|
||
@list l9
|
||
{mso-list-id:1115171434;
|
||
mso-list-type:hybrid;
|
||
mso-list-template-ids:-1407672820 -1016529878 1566456998 -99162944 1112470608 1354390290 1532235962 643100262 1447355994 -1094388302;}
|
||
@list l10
|
||
{mso-list-id:1133790422;
|
||
mso-list-type:hybrid;
|
||
mso-list-template-ids:602321612 -1181957080 -1020131346 -1755803318 646190636 122975578 1475804658 -455318736 -1891325090 -1214717408;}
|
||
@list l10:level1
|
||
{mso-level-number-format:bullet;
|
||
mso-level-text:\F0B7;
|
||
mso-level-tab-stop:.5in;
|
||
mso-level-number-position:left;
|
||
text-indent:-.25in;
|
||
mso-ansi-font-size:10.0pt;
|
||
font-family:Symbol;}
|
||
@list l10:level2
|
||
{mso-level-number-format:bullet;
|
||
mso-level-text:o;
|
||
mso-level-tab-stop:1.0in;
|
||
mso-level-number-position:left;
|
||
text-indent:-.25in;
|
||
mso-ansi-font-size:10.0pt;
|
||
font-family:"Courier New";
|
||
mso-bidi-font-family:"Times New Roman";}
|
||
@list l11
|
||
{mso-list-id:1170366750;
|
||
mso-list-type:hybrid;
|
||
mso-list-template-ids:139625938 1784856908 14434230 2135990762 205005018 -1679018944 -78496528 941265532 136326846 -2030939902;}
|
||
@list l12
|
||
{mso-list-id:1395081028;
|
||
mso-list-type:hybrid;
|
||
mso-list-template-ids:-1450152516 594060610 -1995156862 -742472460 1399253788 775304844 -332753902 -90000986 -656356064 -1128074476;}
|
||
@list l12:level1
|
||
{mso-level-number-format:bullet;
|
||
mso-level-text:\F0B7;
|
||
mso-level-tab-stop:.5in;
|
||
mso-level-number-position:left;
|
||
text-indent:-.25in;
|
||
mso-ansi-font-size:10.0pt;
|
||
font-family:Symbol;}
|
||
@list l13
|
||
{mso-list-id:1410885180;
|
||
mso-list-type:hybrid;
|
||
mso-list-template-ids:-1420239212 -2078642128 1381369862 486155864 -1973497194 1050204580 -46207730 -508368526 1735818270 669444066;}
|
||
@list l13:level1
|
||
{mso-level-number-format:bullet;
|
||
mso-level-text:\F0B7;
|
||
mso-level-tab-stop:.5in;
|
||
mso-level-number-position:left;
|
||
text-indent:-.25in;
|
||
mso-ansi-font-size:10.0pt;
|
||
font-family:Symbol;}
|
||
@list l14
|
||
{mso-list-id:1558080398;
|
||
mso-list-type:hybrid;
|
||
mso-list-template-ids:-720580774 1417685988 1440894936 -1423303424 -193586832 898255738 -212563752 725117368 1124212432 2069772174;}
|
||
@list l14:level1
|
||
{mso-level-number-format:bullet;
|
||
mso-level-text:\F0B7;
|
||
mso-level-tab-stop:.5in;
|
||
mso-level-number-position:left;
|
||
text-indent:-.25in;
|
||
mso-ansi-font-size:10.0pt;
|
||
font-family:Symbol;}
|
||
@list l15
|
||
{mso-list-id:1927837791;
|
||
mso-list-type:hybrid;
|
||
mso-list-template-ids:1363567226 -2132220750 875589078 1199445854 -582346952 944043046 -49528938 -394654148 1636605060 -183584940;}
|
||
@list l15:level1
|
||
{mso-level-number-format:bullet;
|
||
mso-level-text:\F0B7;
|
||
mso-level-tab-stop:.5in;
|
||
mso-level-number-position:left;
|
||
text-indent:-.25in;
|
||
mso-ansi-font-size:10.0pt;
|
||
font-family:Symbol;}
|
||
@list l15:level2
|
||
{mso-level-number-format:bullet;
|
||
mso-level-text:o;
|
||
mso-level-tab-stop:1.0in;
|
||
mso-level-number-position:left;
|
||
text-indent:-.25in;
|
||
mso-ansi-font-size:10.0pt;
|
||
font-family:"Courier New";
|
||
mso-bidi-font-family:"Times New Roman";}
|
||
@list l16
|
||
{mso-list-id:2067096185;
|
||
mso-list-type:hybrid;
|
||
mso-list-template-ids:582122176 -1729593526 -491090842 -1692663546 -1673229800 1254113432 1459242370 292878442 -961933704 -1788173682;}
|
||
@list l16:level1
|
||
{mso-level-number-format:bullet;
|
||
mso-level-text:\F0B7;
|
||
mso-level-tab-stop:.5in;
|
||
mso-level-number-position:left;
|
||
text-indent:-.25in;
|
||
mso-ansi-font-size:10.0pt;
|
||
font-family:Symbol;}
|
||
ol
|
||
{margin-bottom:0in;}
|
||
ul
|
||
{margin-bottom:0in;}
|
||
-->
|
||
</style>
|
||
<!--[if gte mso 9]><xml>
|
||
<o:shapedefaults v:ext="edit" spidmax="1027"/>
|
||
</xml><![endif]--><!--[if gte mso 9]><xml>
|
||
<o:shapelayout v:ext="edit">
|
||
<o:idmap v:ext="edit" data="1"/>
|
||
</o:shapelayout></xml><![endif]-->
|
||
<meta name=Template content="F:\Program Files\Microsoft Office\Office\html.dot">
|
||
</head>
|
||
|
||
<body bgcolor=white lang=EN-US link=blue vlink=purple style='tab-interval:.5in'>
|
||
|
||
<div class=Section1>
|
||
|
||
<h2>ReadMe: International Components for Unicode</h2>
|
||
|
||
<p>Version: February 1, 2000</p>
|
||
|
||
|
||
<div class=MsoNormal align=center style='text-align:center'>
|
||
|
||
<hr size=2 width="100%" align=center>
|
||
|
||
</div>
|
||
|
||
|
||
<p>COPYRIGHT: <br>
|
||
Copyright (c) 1997-2000 International Business Machines Corporation and others.
|
||
All Rights Reserved.</p>
|
||
|
||
|
||
<div class=MsoNormal align=center style='text-align:center'>
|
||
|
||
<hr size=2 width="100%" align=center>
|
||
|
||
</div>
|
||
|
||
|
||
<p><br>
|
||
</p>
|
||
|
||
<h3><u>Contents</u></h3>
|
||
|
||
<ul type=disc>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l3 level1 lfo3;tab-stops:list .5in'><a href="#introduction">Introduction</a></li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l3 level1 lfo3;tab-stops:list .5in'><a href="#WhatContain">What
|
||
the International Components for Unicode Contain</a></li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l3 level1 lfo3;tab-stops:list .5in'><a href="#API">API overview</a></li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l3 level1 lfo3;tab-stops:list .5in'><a
|
||
href="#PlatformDependencies">Platform Dependencies</a></li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l3 level1 lfo3;tab-stops:list .5in'><a href="#ImportantNotes">Important
|
||
Installation Notes</a></li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l3 level1 lfo3;tab-stops:list .5in'><a href="#HowToInstall">How
|
||
to Install/Build</a></li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l3 level1 lfo3;tab-stops:list .5in'><a href="#datahandling">How
|
||
ICU handles data</a></li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l3 level1 lfo3;tab-stops:list .5in'><a href="#CharsetConvert">Character
|
||
Set Conversion Information</a></li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l3 level1 lfo3;tab-stops:list .5in'><a href="#ProgrammingNotes">Programming
|
||
Notes</a></li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l3 level1 lfo3;tab-stops:list .5in'><a href="#WhereToFindMore">Where
|
||
to Find More Information</a></li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l3 level1 lfo3;tab-stops:list .5in'><a href="#SubmittingComments">Submitting
|
||
Comments, Requesting Features and Reporting Bugs</a></li>
|
||
</ul>
|
||
|
||
<h3><a name=introduction></a><u>Introduction</u></h3>
|
||
|
||
<p>Today's software market is a global one in which it is desirable to develop
|
||
and maintain one application that supports a wide variety of national
|
||
languages. International Components for Unicode provides the following tools to
|
||
help you write language independent applications: </p>
|
||
|
||
<ul type=disc>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l5 level1 lfo6;tab-stops:list .5in'>UnicodeString supporting the
|
||
Unicode 3.0 standard</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l5 level1 lfo6;tab-stops:list .5in'>Resource bundles for storing
|
||
and accessing localized information</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l5 level1 lfo6;tab-stops:list .5in'>Number formatters for
|
||
converting binary numbers into text strings for meaningful display</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l5 level1 lfo6;tab-stops:list .5in'>Date and time formatters for
|
||
converting internal time data into text strings for meaningful display</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l5 level1 lfo6;tab-stops:list .5in'>Message formatters for
|
||
putting together sequences of strings, numbers dates and other format to
|
||
create messages</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l5 level1 lfo6;tab-stops:list .5in'>Text collation supporting
|
||
language sensitive comparison of strings</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l5 level1 lfo6;tab-stops:list .5in'>Text boundary analysis for
|
||
finding characters, word and sentence boundaries</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l5 level1 lfo6;tab-stops:list .5in'>Changing simple data files
|
||
rather than modifying program code easily localizes applications written
|
||
using these tools</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l5 level1 lfo6;tab-stops:list .5in'>Over 150 locales supported.
|
||
Visit <a
|
||
href="http://oss.software.ibm.com/developerworks/opensource/icu/localeexplorer">LocaleExplore
|
||
(http://oss.software.ibm.com/developerworks/opensource/icu/localeexplorer)</a>
|
||
site for a demonstration and a full list of supported locales or <a
|
||
href="docs/supp_loc.html">click here for a table of supported locales</a>.</li>
|
||
</ul>
|
||
|
||
<p>It is possible to support additional locales by adding more locale data
|
||
files, with no code changes. </p>
|
||
|
||
<p>Please refer to POSIX programmer's Guide for details on what the ISO locale
|
||
ID means. </p>
|
||
|
||
<p>Your comments are important to making this release successful. We are
|
||
committed to fixing any bugs, and will also use your feedback to help plan
|
||
future releases. </p>
|
||
|
||
<p style='margin-right:.5in;margin-left:.5in'><b><u>IMPORTANT</u>: Please make
|
||
sure you understand the <a href="license.html">Copyright and License
|
||
information</a>.</b></p>
|
||
|
||
<p style='margin-right:.5in;margin-left:.5in'> </p>
|
||
|
||
<h3><a name=WhatContain></a><u>What the International Components for Unicode
|
||
Contain</u></h3>
|
||
|
||
<p>There are two ways to download the ICU releases, </p>
|
||
|
||
<ul type=disc>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l10 level1 lfo9;tab-stops:list .5in'><strong>Official Release
|
||
Snapshot:</strong><b><br>
|
||
</b>If you want to use ICU (as opposed to developing it), your best bet is
|
||
to download an official, packaged ICU version of the ICU source
|
||
code. These versions are tested more thoroughly than day-to-day
|
||
development builds of the system, and they are packaged in zip and tar
|
||
files for convenient download. These packaged files can be found at <a
|
||
href="http://oss.software.ibm.com/developerworks/opensource/icu/project/download/index.html">http://oss.software.ibm.com/developerworks/opensource/icu/project/download/index.html</a>.<br>
|
||
If packaged snapshot is named <b>ICUXXXXXX.zip </b>, XXXXXX is the release
|
||
version number.<br>
|
||
Please unzip this file. It will re-construct the source directory. </li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l10 level1 lfo9;tab-stops:list .5in'><strong>CVS Source
|
||
Repository:</strong><b><br>
|
||
</b>If you are interested in developing features, patches, or bug fixes
|
||
for ICU, you should probably be working with the latest version of the ICU
|
||
source code. You will need to check the code out of our CVS repository to
|
||
ensure that you have the most recent version of all of the files. There
|
||
are several ways to do this:</li>
|
||
<ul type=circle>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:
|
||
auto;mso-list:l10 level2 lfo9;tab-stops:list 1.0in'>WebCVS:<br>
|
||
If you want to browse the code and only make occasional downloads, you
|
||
may want to use WebCVS. It provides a convenient, web-based interface for
|
||
browsing and downloading the latest version of the ICU source code and
|
||
documentation. You can also view each file's revision history, display
|
||
the differences between individual revisions, determine which revisions
|
||
were part of which official release, and so on. </li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:
|
||
auto;mso-list:l10 level2 lfo9;tab-stops:list 1.0in'>WinCVS:<br>
|
||
If you will be doing serious work on ICU, you should probably install a
|
||
CVS client on your own machine so that you can do batch operations
|
||
without going through the WebCVS interface. On Windows, we suggest the
|
||
WinCVS client. The following is the example instruction on how to download
|
||
ICU via WinCVS: <br>
|
||
1.Install the WinCVS client, which you can download from the WinCVS home
|
||
page. <br>
|
||
2.In the WinCVS preferences, specify your CVSRoot to be
|
||
":pserver:anoncvs@oss.software.ibm.com:/usr/cvs/icu"<br>
|
||
with the password "anoncvs". To enter the CVSRoot value, select
|
||
"Preferences" from the "Cvs Admin" pull-down menu.
|
||
Authentication should be set to "'passwd' file on the
|
||
cvs server". <br>
|
||
3.To "extract" the most recent version of ICU, select
|
||
"Checkout module" from the "Cvs Admin" menu. Specify
|
||
"icu" for the module name. </li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:
|
||
auto;mso-list:l10 level2 lfo9;tab-stops:list 1.0in'>CVS command line:<br>
|
||
You can also check out the repository anonymously on UNIX using the
|
||
following commands, after first setting your CVSROOT to point to the ICU
|
||
repository: <br>
|
||
<br>
|
||
export CVSROOT=:pserver:anoncvs@oss.software.ibm.com:/usr/cvs/icu<br>
|
||
cvs login CVS password: anoncvs<br>
|
||
cvs checkout icu<br>
|
||
cvs logout</li>
|
||
</ul>
|
||
</ul>
|
||
|
||
<p>For more details on how to download ICU directly from the web site, please
|
||
also see <a
|
||
href="http://oss.software.ibm.com/developerworks/opensource/icu/project/download/index.html">http:/oss.software.ibm.com/developerworks/opensource/icu/project/download/index.html</a></p>
|
||
|
||
<p>Below, <b>$Root</b> is the placement of the icu directory in your file
|
||
system, like "drive:\...\icu" in your environment.
|
||
"drive:\..." stands for any drive and any directory on that drive
|
||
that you chose to install icu into.</p>
|
||
|
||
<p><b>The following files describe the code drop:</b> <br>
|
||
<br>
|
||
</p>
|
||
|
||
<table border=1 cellpadding=0 style='mso-cellspacing:1.5pt;mso-padding-alt:
|
||
0in 0in 0in 0in'>
|
||
<tr>
|
||
<td style='padding:.75pt .75pt .75pt .75pt'>
|
||
<p class=MsoNormal>readme.html (this file)</p>
|
||
</td>
|
||
<td style='padding:.75pt .75pt .75pt .75pt'>
|
||
<p class=MsoNormal>describes the International Components for Unicode</p>
|
||
</td>
|
||
</tr>
|
||
<tr>
|
||
<td style='padding:.75pt .75pt .75pt .75pt'>
|
||
<p class=MsoNormal>license.html</p>
|
||
</td>
|
||
<td style='padding:.75pt .75pt .75pt .75pt'>
|
||
<p class=MsoNormal>contains IBM's public license</p>
|
||
</td>
|
||
</tr>
|
||
</table>
|
||
|
||
<p><b>The following directories contain source code and data files:</b> <br>
|
||
<br>
|
||
</p>
|
||
|
||
<table border=1 cellpadding=0 width=623 style='width:467.25pt;mso-cellspacing:
|
||
1.5pt;mso-padding-alt:0in 0in 0in 0in'>
|
||
<tr>
|
||
<td width="20%" style='width:20.0%;padding:.75pt .75pt .75pt .75pt'>
|
||
<p class=MsoNormal>$Root\source\common\</p>
|
||
</td>
|
||
<td width="80%" style='width:80.0%;padding:.75pt .75pt .75pt .75pt'>
|
||
<p class=MsoNormal>The utility classes, such as ResourceBundle, Unicode,
|
||
Locale, UnicodeString. The codepage conversion library API, UnicodeConverter.</p>
|
||
</td>
|
||
</tr>
|
||
<tr>
|
||
<td width="20%" style='width:20.0%;padding:.75pt .75pt .75pt .75pt'>
|
||
<p class=MsoNormal>$Root\source\i18n\</p>
|
||
</td>
|
||
<td width="80%" style='width:80.0%;padding:.75pt .75pt .75pt .75pt'>
|
||
<p class=MsoNormal>The collation source files, Collator, RuleBasedCollator
|
||
and CollationKey. <br>
|
||
The text boundary API, which locates character, word, sentence, and <br>
|
||
line breaks. <br>
|
||
The format API, which formats and parses data in numeric or date format to
|
||
and from text.</p>
|
||
</td>
|
||
</tr>
|
||
<tr>
|
||
<td width="20%" style='width:20.0%;padding:.75pt .75pt .75pt .75pt'>
|
||
<p class=MsoNormal>$Root\source\test\intltest\</p>
|
||
</td>
|
||
<td width="80%" style='width:80.0%;padding:.75pt .75pt .75pt .75pt'>
|
||
<p class=MsoNormal>A test suite including all C++ APIs. For information about
|
||
running the test suite, see <a href="docs/intltest.html">docs/intltest.html</a>.</p>
|
||
</td>
|
||
</tr>
|
||
<tr>
|
||
<td width="20%" style='width:20.0%;padding:.75pt .75pt .75pt .75pt'>
|
||
<p class=MsoNormal>$Root/source/test/cintltst/</p>
|
||
</td>
|
||
<td width="80%" style='width:80.0%;padding:.75pt .75pt .75pt .75pt'>
|
||
<p class=MsoNormal>A test suite including all C APIs. For information about
|
||
running the test suite, see <a href="docs/cintltst.html">docs/cintltst.html.</a></p>
|
||
</td>
|
||
</tr>
|
||
<tr>
|
||
<td width="20%" style='width:20.0%;padding:.75pt .75pt .75pt .75pt'>
|
||
<p class=MsoNormal>$Root/data/</p>
|
||
</td>
|
||
<td width="80%" style='width:80.0%;padding:.75pt .75pt .75pt .75pt'>
|
||
<p class=MsoNormal>The Unicode 3.0 data file. Please see <a
|
||
href="http://www.unicode.org/">http://www.unicode.org/</a> for more
|
||
information. <br>
|
||
This directory also contains the resource files for all international
|
||
objects. These files are of three types: </p>
|
||
<ul type=disc>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:
|
||
auto;mso-list:l12 level1 lfo12;tab-stops:list .5in'>TXT files contain
|
||
general locale data. </li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:
|
||
auto;mso-list:l12 level1 lfo12;tab-stops:list .5in'>RES files contain
|
||
non-portable locale data files which are generated by the <strong>genrb</strong>
|
||
tool.</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:
|
||
auto;mso-list:l12 level1 lfo12;tab-stops:list .5in'>COL files are non-portable
|
||
packed binary collation data files which are created by the <strong>gencol</strong>
|
||
tool. </li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:
|
||
auto;mso-list:l12 level1 lfo12;tab-stops:list .5in'>UCM files which
|
||
contain mapping tables {from,to} Unicode in text format</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:
|
||
auto;mso-list:l12 level1 lfo12;tab-stops:list .5in'>CNV files are
|
||
non-portable packed binary conversion data generated by the <strong>makeconv</strong>
|
||
tool.</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:
|
||
auto;mso-list:l12 level1 lfo12;tab-stops:list .5in'>icudata.dll file
|
||
contains data files in a dynamic loadable library format. At this
|
||
moment, this file contains CNV files, converter aliases, timezone data
|
||
and Unicode character names. Please read <a href="docs/udata.html">udata.html</a>
|
||
for more information.</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:
|
||
auto;mso-list:l12 level1 lfo12;tab-stops:list .5in'>icudata.dat file
|
||
contains data files in a memory mapped file format. At this moment, this
|
||
file contains CNV files, converter aliases, timezone data and Unicode
|
||
character names. Please read <a href="docs/udata.html">udata.html</a>
|
||
for more information.</li>
|
||
</ul>
|
||
</td>
|
||
</tr>
|
||
<tr>
|
||
<td width="20%" style='width:20.0%;padding:.75pt .75pt .75pt .75pt'>
|
||
<p class=MsoNormal>$Root/source/tools</p>
|
||
</td>
|
||
<td width="80%" style='width:80.0%;padding:.75pt .75pt .75pt .75pt'>
|
||
<p class=MsoNormal>Tools for generating the data files. Data files are
|
||
generated by invoking $Root/source/tools/makedata.bat on Win32 or
|
||
$Root/source/make install on Unix.</p>
|
||
</td>
|
||
</tr>
|
||
<tr>
|
||
<td width="20%" style='width:20.0%;padding:.75pt .75pt .75pt .75pt'>
|
||
<p class=MsoNormal>$Root/source/samples</p>
|
||
</td>
|
||
<td width="80%" style='width:80.0%;padding:.75pt .75pt .75pt .75pt'>
|
||
<p class=MsoNormal>Various sample programs that use ICU</p>
|
||
</td>
|
||
</tr>
|
||
</table>
|
||
|
||
<p> <b>The following directories are populated when you've built the
|
||
framework:</b> <br>
|
||
(on Unix, replace $Root with the value given to the file
|
||
"configure") <br>
|
||
</p>
|
||
|
||
<table border=1 cellpadding=0 style='mso-cellspacing:1.5pt;mso-padding-alt:
|
||
0in 0in 0in 0in'>
|
||
<tr>
|
||
<td style='padding:.75pt .75pt .75pt .75pt'>
|
||
<p class=MsoNormal>$Root/include/</p>
|
||
</td>
|
||
<td style='padding:.75pt .75pt .75pt .75pt'>
|
||
<p class=MsoNormal>contains all the public header files.</p>
|
||
</td>
|
||
</tr>
|
||
<tr>
|
||
<td style='padding:.75pt .75pt .75pt .75pt'>
|
||
<p class=MsoNormal>$output</p>
|
||
</td>
|
||
<td style='padding:.75pt .75pt .75pt .75pt'>
|
||
<p class=MsoNormal>contains the libraries for static/dynamic linking or
|
||
executable programs.</p>
|
||
</td>
|
||
</tr>
|
||
</table>
|
||
|
||
<p><b>The following diagram shows the main directory structure of the
|
||
International Components for Unicode:</b> </p>
|
||
|
||
<pre> icu-NNNN</pre><pre> |</pre><pre> output <span style="mso-spacerun: yes"> </span> <span style="mso-spacerun: yes"> </span>icu</pre><pre> _____|_____<span style="mso-spacerun: yes"> </span>______________|______________________________</pre><pre> | <span style="mso-spacerun: yes"> </span>| <span style="mso-spacerun: yes"> </span>| <span style="mso-spacerun: yes"> </span> | | <span style="mso-spacerun: yes"> </span> |<span style="mso-spacerun: yes"> </span>|</pre><pre> libraries<span style="mso-spacerun: yes"> </span>programs include data source |<span style="mso-spacerun: yes"> </span>|</pre><pre> (built)<span style="mso-spacerun: yes"> </span>(built)<span style="mso-spacerun: yes"> </span>(built) <span style="mso-spacerun: yes"> </span> | readme.html license.html</pre><pre> <span style="mso-spacerun: yes"> </span> |<span style="mso-spacerun: yes"> </span></pre><pre> <span style="mso-spacerun: yes"> </span>_________________|__________________________</pre><pre> <span style="mso-spacerun: yes"> </span>|<span style="mso-spacerun: yes"> </span>|<span style="mso-spacerun: yes"> </span><span style="mso-spacerun: yes"> </span> | |<span style="mso-spacerun: yes"> </span>|<span style="mso-spacerun: yes"> </span>|</pre><pre> <span style="mso-spacerun: yes"> </span>common<span style="mso-spacerun: yes"> </span>i18n <span style="mso-spacerun: yes"> </span>test<span style="mso-spacerun: yes"> </span>extra<span style="mso-spacerun: yes"> </span>tools<span style="mso-spacerun: yes"> </span>samples</pre><pre> <span style="mso-spacerun: yes"> </span> <span style="mso-spacerun: yes"> </span>|<span style="mso-spacerun: yes"> </span>|<span style="mso-spacerun: yes"> </span></pre><pre> <span style="mso-spacerun: yes"> </span> ___|___ <span style="mso-spacerun: yes"> </span>___|_________________</pre><pre><span style="mso-spacerun: yes"> </span>|<span style="mso-spacerun: yes"> </span>|<span style="mso-spacerun: yes"> </span>|<span style="mso-spacerun: yes"> </span>|<span style="mso-spacerun: yes"> </span>|<span style="mso-spacerun: yes"> </span>| </pre><pre><span style="mso-spacerun: yes"> </span>intltest cintltst makeconv ctestfw genrb<span style="mso-spacerun: yes"> </span>....</pre>
|
||
|
||
<h3><a name=API></a><u>API Overview</u></h3>
|
||
|
||
<p>In the International Components for Unicode, there are two categories: </p>
|
||
|
||
<ul type=disc>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l15 level1 lfo15;tab-stops:list .5in'>Low-level Unicode/Resource
|
||
Attributes: (<strong>icuuc</strong> library)</li>
|
||
<ul type=circle>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:
|
||
auto;mso-list:l15 level2 lfo15;tab-stops:list 1.0in'><a
|
||
href="docs/utilCL.html">Utility Classes</a></li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:
|
||
auto;mso-list:l15 level2 lfo15;tab-stops:list 1.0in'><a
|
||
href="docs/conversion_interface.htm">Conversion Interface</a></li>
|
||
</ul>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l15 level1 lfo15;tab-stops:list .5in'>High-level Unicode
|
||
Internationalization: (<strong>icui18n</strong> library)</li>
|
||
<ul type=circle>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:
|
||
auto;mso-list:l15 level2 lfo15;tab-stops:list 1.0in'><a
|
||
href="docs/boundCL.html">Text Boundary Classes</a></li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:
|
||
auto;mso-list:l15 level2 lfo15;tab-stops:list 1.0in'><a
|
||
href="docs/collateCL.html">Collation Classes</a></li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:
|
||
auto;mso-list:l15 level2 lfo15;tab-stops:list 1.0in'><a
|
||
href="docs/formatCL.html">Formatting Classes</a></li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:
|
||
auto;mso-list:l15 level2 lfo15;tab-stops:list 1.0in'>Transliterator
|
||
Classes</li>
|
||
</ul>
|
||
</ul>
|
||
|
||
<p>See <A HREF="http://oss.software.ibm.com/icu/develop/codestds.html">International Components for Unicode Coding Guidelines</A>
|
||
for a discussion of code conventions common to all library classes. </p>
|
||
|
||
<p>See also <a href="../html/aindex.html">html/aindex.html</a> for an alphabetical
|
||
index, and <a href="../html/HIERjava.html">html/HIERjava.html</a> for a
|
||
hierarchical index to detailed API documentation. <br>
|
||
<br>
|
||
</p>
|
||
|
||
<h3><a name=PlatformDependencies></a><u>Platform Dependencies</u></h3>
|
||
|
||
<p>The platform dependencies have been isolated into the following 4 files: </p>
|
||
|
||
<ul type=disc>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l14 level1 lfo18;tab-stops:list .5in'><u>platform.h.in:</u>
|
||
Platform-dependent typedefs and defines:</li>
|
||
</ul>
|
||
|
||
<p class=MsoNormal style='margin-right:.5in;mso-margin-top-alt:auto;mso-margin-bottom-alt:
|
||
auto;margin-left:1.0in;text-indent:-.25in;mso-list:l13 level1 lfo20;tab-stops:
|
||
list .5in'><![if !supportLists]><span style='font-size:10.0pt;mso-bidi-font-size:
|
||
12.0pt;font-family:Symbol'><EFBFBD><span style='font:7.0pt "Times New Roman"'>
|
||
</span></span><![endif]>XP_CPLUSPLUS is defined for C++</p>
|
||
|
||
<p class=MsoNormal style='margin-right:.5in;mso-margin-top-alt:auto;mso-margin-bottom-alt:
|
||
auto;margin-left:1.0in;text-indent:-.25in;mso-list:l13 level1 lfo20;tab-stops:
|
||
list .5in'><![if !supportLists]><span style='font-size:10.0pt;mso-bidi-font-size:
|
||
12.0pt;font-family:Symbol'><EFBFBD><span style='font:7.0pt "Times New Roman"'>
|
||
</span></span><![endif]>bool_t, TRUE and FALSE, int8_t, int16_t etc.</p>
|
||
|
||
<p class=MsoNormal style='margin-right:.5in;mso-margin-top-alt:auto;mso-margin-bottom-alt:
|
||
auto;margin-left:1.0in;text-indent:-.25in;mso-list:l13 level1 lfo20;tab-stops:
|
||
list .5in'><![if !supportLists]><span style='font-size:10.0pt;mso-bidi-font-size:
|
||
12.0pt;font-family:Symbol'><EFBFBD><span style='font:7.0pt "Times New Roman"'>
|
||
</span></span><![endif]>U_EXPORT and U_IMPORT for specifying dynamic library
|
||
import and export</p>
|
||
|
||
<ul type=disc>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l6 level1 lfo23;tab-stops:list .5in'><u>putil.c:</u>
|
||
platform-dependent implementations of various functions that are platform
|
||
dependent: (declared in putil.h)</li>
|
||
</ul>
|
||
|
||
<p class=MsoNormal style='margin-right:.5in;mso-margin-top-alt:auto;mso-margin-bottom-alt:
|
||
auto;margin-left:1.0in;text-indent:-.25in;mso-list:l1 level1 lfo25;tab-stops:
|
||
list .5in'><![if !supportLists]><span style='font-size:10.0pt;mso-bidi-font-size:
|
||
12.0pt;font-family:Symbol'><EFBFBD><span style='font:7.0pt "Times New Roman"'>
|
||
</span></span><![endif]>icu_isNaN, icu_isInfinite(double), icu_getNaN();
|
||
icu_getInfinity for handling special floating point values</p>
|
||
|
||
<p class=MsoNormal style='margin-right:.5in;mso-margin-top-alt:auto;mso-margin-bottom-alt:
|
||
auto;margin-left:1.0in;text-indent:-.25in;mso-list:l1 level1 lfo25;tab-stops:
|
||
list .5in'><![if !supportLists]><span style='font-size:10.0pt;mso-bidi-font-size:
|
||
12.0pt;font-family:Symbol'><EFBFBD><span style='font:7.0pt "Times New Roman"'>
|
||
</span></span><![endif]>icu_tzset, icu_timezone, icu_tzname and time for
|
||
reading platform specific time and timezone information</p>
|
||
|
||
<p class=MsoNormal style='margin-right:.5in;mso-margin-top-alt:auto;mso-margin-bottom-alt:
|
||
auto;margin-left:1.0in;text-indent:-.25in;mso-list:l1 level1 lfo25;tab-stops:
|
||
list .5in'><![if !supportLists]><span style='font-size:10.0pt;mso-bidi-font-size:
|
||
12.0pt;font-family:Symbol'><EFBFBD><span style='font:7.0pt "Times New Roman"'>
|
||
</span></span><![endif]>icu_getDefaultDataDirectory, icu_getDefaultLocaleID for
|
||
reading the locale setting and data directory</p>
|
||
|
||
<p class=MsoNormal style='margin-right:.5in;mso-margin-top-alt:auto;mso-margin-bottom-alt:
|
||
auto;margin-left:1.0in;text-indent:-.25in;mso-list:l1 level1 lfo25;tab-stops:
|
||
list .5in'><![if !supportLists]><span style='font-size:10.0pt;mso-bidi-font-size:
|
||
12.0pt;font-family:Symbol'><EFBFBD><span style='font:7.0pt "Times New Roman"'>
|
||
</span></span><![endif]>icu_isBigEndian for finding the endianess of the
|
||
platform</p>
|
||
|
||
<p class=MsoNormal style='margin-right:.5in;mso-margin-top-alt:auto;mso-margin-bottom-alt:
|
||
auto;margin-left:1.0in;text-indent:-.25in;mso-list:l1 level1 lfo25;tab-stops:
|
||
list .5in'><![if !supportLists]><span style='font-size:10.0pt;mso-bidi-font-size:
|
||
12.0pt;font-family:Symbol'><EFBFBD><span style='font:7.0pt "Times New Roman"'>
|
||
</span></span><![endif]>icu_nextDouble is used specifically by the ChoiceFormat
|
||
API.</p>
|
||
|
||
<ul type=disc>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l7 level1 lfo28;tab-stops:list .5in'><u>mutex.h and mutex.cpp</u>:
|
||
Code for doing synchronization in multithreaded applications. If you wish
|
||
to use International Components for Unicode in a multithreaded
|
||
application, you must provide a synchronization primitive that the classes
|
||
can use to protect their global data against simultaneous modifications.
|
||
See <a href="docs/mutex.html">docs/mutex.html</a> for more information.</li>
|
||
<ul type=circle>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:
|
||
auto;mso-list:l7 level2 lfo28;tab-stops:list 1.0in'>We supply sample
|
||
implementations for WinNT, Win95, Win98, Sun/Solaris, RedHat/Linux, HP-UX
|
||
and for AIX on an RS/6000.</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:
|
||
auto;mso-list:l7 level2 lfo28;tab-stops:list 1.0in'>If you are changing
|
||
the platform-dependent files, ptypes.h and putil.h may also be interesting,
|
||
but shouldn't have to be changed. If you think any other files than the
|
||
ones mentioned above have platform dependencies, please contact us.</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:
|
||
auto;mso-list:l7 level2 lfo28;tab-stops:list 1.0in'>For the Intltest test
|
||
suite, intltest.cpp in "icu\source\test\intltest\" contains the
|
||
method pathnameInContext, which must also be adapted to any new platform.</li>
|
||
</ul>
|
||
</ul>
|
||
|
||
<ul type=disc>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l14 level1 lfo18;tab-stops:list .5in'>udata.h: The data-accessing
|
||
interface in ICU is implemented such that there is a lot of flexibility
|
||
for reading a data file.<span style="mso-spacerun: yes"> </span>Each
|
||
platform can tune the performance of file accessing for its environment by
|
||
choosing to implement one of the following options:</li>
|
||
</ul>
|
||
|
||
<ul type=disc>
|
||
<ul type=circle>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:
|
||
auto;mso-list:l7 level2 lfo28;tab-stops:list 1.0in'>DLL</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:
|
||
auto;mso-list:l7 level2 lfo28;tab-stops:list 1.0in'>Memory map</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:
|
||
auto;mso-list:l7 level2 lfo28;tab-stops:list 1.0in'>Plain text </li>
|
||
</ul>
|
||
</ul>
|
||
|
||
<h3><a name=ImportantNotes></a><u>Important Installation Notes </u></h3>
|
||
|
||
<p><strong>Win32 Platform</strong></p>
|
||
|
||
<p>If you are building on the Win32 platform, it is important that you
|
||
understand a few build details: </p>
|
||
|
||
<p><u>DLL directories and the PATH setting:</u> As delivered, the International
|
||
Components for Unicode build as several DLLs. These DLLs are placed in the
|
||
directories "icu\bin\Debug" and "icu\bin\Release".
|
||
You must add either of these directories to the PATH environment variable in
|
||
your system, or any executables you build will not be able to access
|
||
International Components for Unicode libraries. Alternatively, you can copy the
|
||
DLL files into a directory already in your PATH, but we do not recommend this
|
||
-- you can wind up with multiple copies of the DLL, and wind up using the wrong
|
||
one. </p>
|
||
|
||
<p><u>To change your PATH:</u> Do this under NT by using the System
|
||
control panel. Pick the "Environment" tab, select the variable PATH
|
||
in the lower box. In the "value" box, append the string
|
||
";drive:\...\icu\bin\Debug" at the end of the path string. If
|
||
there is nothing there, just type in "drive:\...\icu\bin\Debug".
|
||
Click the Set button, then the Ok button. </p>
|
||
|
||
<p><u>Link with Runtime libraries:</u> All the DLLs link with the C runtime
|
||
library "Debug Multithreaded DLL" or "Multithreaded DLL."
|
||
(This is changed through the Project Settings dialog, on the C/C++ tab, under
|
||
Code Generation.) It is important that any executable or other DLL you build
|
||
which uses the International Components for Unicode DLLs links with these
|
||
runtime libraries as well. If you do not do this, you will seemingly get memory
|
||
errors when you run the executable. <br>
|
||
</p>
|
||
|
||
<p><strong>OS/390 Platform</strong></p>
|
||
|
||
<p>If you are building on the OS/390 UNIX System Services platform, it is
|
||
important that you understand a<br>
|
||
few details. <br>
|
||
<br>
|
||
The gnu utilities gmake and gzip/gunzip are needed and can be obtained for
|
||
OS/390 from<br>
|
||
www.mks.com. Search for os/390, register, and follow download directions. <br>
|
||
<br>
|
||
DLL directories and the LIBPATH setting: The ICU dlls libicu-i18n and
|
||
libicu-uc.dll should be added<br>
|
||
to the LIBPATH environment variable concatenation.<br>
|
||
<br>
|
||
OS/390 supports both native S/390 hexadecimal floating point and, with Version
|
||
2.6 and later, IEEE binary<br>
|
||
floating point. This is a compile time option. Applications built with IEEE
|
||
should use ICU dlls that are<br>
|
||
built with IEEE (and vice versa). The environment variable IEEE390=1 will cause
|
||
the OS/390 version<br>
|
||
of ICU to be built with IEEE floating point. The default is native hexadecimal
|
||
floating point. <br>
|
||
<br>
|
||
The makedep executable is shipped with ICU for use with the OS/390 ICU build
|
||
process. The PATH<br>
|
||
environment variable should be updated to contain the location of this
|
||
executable prior to build.<br>
|
||
Alternatively, makedep may be moved into an existing PATH directory.<br>
|
||
<br>
|
||
When running the test suite, the TZ environment variable should be set to
|
||
export TZ="PST8PDT" so<br>
|
||
that time zone comparisons are correct.</p>
|
||
|
||
<p><a name=OS400><strong>OS/400 Platform</strong></a></p>
|
||
|
||
ICU Reference Release 1.4.0 contains partial support for the 400
|
||
platform, but additional work by the user is currently needed to get
|
||
it to build completely.
|
||
A future release of the ICU should work out-of-the-box
|
||
under OS/400.
|
||
<UL>
|
||
<LI> Requirements:
|
||
<UL>
|
||
<LI> QSHELL interpreter installed (install base option 30, operating
|
||
system)
|
||
<LI> QShell Utilities, PRPQ 5799-XEH
|
||
<LI> ILE C++ for AS/400, PRPQ 5799-GDW
|
||
<LI> GNU facilities (the gnu facilities are currently available by request only. Send e-mail to <A HREF="mailto:rchasgo400@us.ibm.com">rchasgo400@us.ibm.com</A> )
|
||
|
||
<P></UL> <!-- end requirements -->
|
||
<LI> Build environment setup:
|
||
<OL>
|
||
<LI> Create AS400 target library. This library will be the target for the
|
||
resulting modules, programs and service programs. You will specify this
|
||
library on the OUTPUTDIR environment variable in step 2.
|
||
<LI> Set up the following environment variables in your build process (use
|
||
ADDENVVAR or WRKENVVAR CL commands)
|
||
<UL>
|
||
CC - '/usr/bin/icc'<BR>
|
||
CXX - ' /usr/bin/icc'<BR>
|
||
MAKE - '/usr/bin/gmake'<BR>
|
||
OUTPUTDIR - <I>identifies target as400 library for *module, *pgm and
|
||
*srvpgm objects</I>
|
||
<P></UL>
|
||
<LI> Add QCXXN, to your build process library list. This results in the
|
||
resolution of CRTCPPMOD used by the icc compiler
|
||
<LI> Configure the Makefiles (see configure below) Note: Verify that the
|
||
mh-os400 configure file is used.
|
||
<UL>
|
||
<LI> Run 'configure --host=as400-os400'
|
||
<LI> Change the TARGET entries in
|
||
<TT>common/Makefile.in, i18n/Makefile.in, tools/toolutil/Makefile.in </TT> so that instead
|
||
of <TT>TARGET=<U>libicu-</U>uc.$(SO)</TT> they read
|
||
<TT>TARGET=<B>$(LIBICU)</B>uc.$(SO)</TT> [remove hyphens]
|
||
<LI> Change Makefile.in's (under source/tools) that have
|
||
LIBICU-TOOLUTIL so that it follows the other LIBICU variables on
|
||
the link line:
|
||
<TT>$(LIBICU-UC) $(LIBICU-TOOLUTIL) @LIBS@ @LIB_M@</TT>
|
||
<LI> The 'clean' and 'install' targets will not work without changes
|
||
because of symbolic links. To delete the target module, program, or service programs replace <TT>rm -rf</TT> with <B>$(RMV)</B>, and in the
|
||
library installation targets (install-library) change
|
||
<TT>$(INSTALL)</TT> to <B><TT>$(INSTALL-S)</TT></B>.
|
||
<P></UL>
|
||
|
||
<LI> gmake -e (-e to pickup the compilers)
|
||
<P></OL> <!-- end build environment -->
|
||
</UL>
|
||
|
||
Note on NULL pointer checks:
|
||
<UL>
|
||
In common/ucnv.c and common/unistr.c (search for U_MAX_PTR), there are
|
||
additional checks for NULL pointers. This is because pointer
|
||
comparison works differently on the AS/400 architecture.
|
||
</UL>
|
||
|
||
<h3><a name=HowToInstall></a><u>How to Install/Build on Win NT</u></h3>
|
||
|
||
<p>Building International Components for Unicode requires: </p>
|
||
|
||
<ul type=disc>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l16 level1 lfo31;tab-stops:list .5in'>Microsoft NT 3.51 or above</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l16 level1 lfo31;tab-stops:list .5in'>Microsoft Visual C++ 6.0
|
||
(Service Pack 2 is required to work with the release build of max speed
|
||
optimization).</li>
|
||
</ul>
|
||
|
||
<p>The steps are: </p>
|
||
|
||
<ol start=1 type=1>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l11 level1 lfo34;tab-stops:list .5in'>Unzip the icu-XXXX.zip
|
||
file, type "unzip -a icu-XXXX.zip -d drive:\directory" under
|
||
command prompt or use WinZip. drive:\directory\icu is the root
|
||
($Root) directory (you may but don't need to place "icu" into another
|
||
directory). If you change the root, you will change the project settings
|
||
accordingly in EACH makefile in the project, updating the "include"
|
||
and "library" paths.</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l11 level1 lfo34;tab-stops:list .5in'>Set the environment
|
||
variable <strong>ICU_DATA</strong>, the full pathname of the data
|
||
directory, to indicate where the locale data files and conversion mapping
|
||
tables are.</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l11 level1 lfo34;tab-stops:list .5in'>Start Microsoft Visual C++
|
||
6.0.</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l11 level1 lfo34;tab-stops:list .5in'>Choose "File"
|
||
menu and select "Open WorkSpace".</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l11 level1 lfo34;tab-stops:list .5in'>In the file chooser, choose
|
||
icu\source\allinone\allinone.dsw. Open this workspace.</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l11 level1 lfo34;tab-stops:list .5in'>This workspace includes all
|
||
the International Components for Unicode libraries, necessary tools as
|
||
well as intltest and cintltest test suite projects.</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l11 level1 lfo34;tab-stops:list .5in'>Set the active Project.
|
||
Choose "Project" menu and select "Set active project".
|
||
In the submenu, select "all" workspace.</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l11 level1 lfo34;tab-stops:list .5in'>Set the active
|
||
configuration ("Win32 Debug" or "Win32 Release") and
|
||
make sure this matches your PATH setting as described in the previous
|
||
chapter. (See note below.)</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l11 level1 lfo34;tab-stops:list .5in'>Choose "Build"
|
||
menu and select "Rebuild All". If you want to build the Debug
|
||
and Release configurations at the same time, choose "Build" menu
|
||
and select "Batch Build..." instead (and mark all configurations
|
||
as checked), then click the button named "Rebuild All".</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l11 level1 lfo34;tab-stops:list .5in'>The "all"
|
||
workspace will build all the test programs as well as the tools for
|
||
generating binary locale data files. The "makedata"
|
||
project will be run automatically to convert the locale data files from
|
||
text format into icudata.dll.</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l11 level1 lfo34;tab-stops:list .5in'>Save the value of the <strong>TZ</strong>
|
||
environment variable and then set it to <strong>PST8PDT</strong>. </li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l11 level1 lfo34;tab-stops:list .5in'>Reopen the
|
||
"allinone" project file and run the "intltest" test.
|
||
Reset the <strong>TZ</strong> value.</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l11 level1 lfo34;tab-stops:list .5in'>To run the C test suite,
|
||
set "cintltst" as the active project, repeat steps 11 and then
|
||
run the "cintltst" test..</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l11 level1 lfo34;tab-stops:list .5in'>Build and run as outlined
|
||
above.</li>
|
||
</ol>
|
||
|
||
<p><b>Note: </b>To set the active configuration, two different possibilities are:
|
||
</p>
|
||
|
||
<ul type=disc>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l4 level1 lfo37;tab-stops:list .5in'>Choose "Build"
|
||
menu, select "Set Active Configuration", and select "Win32
|
||
Release" or "Win32 Debug".</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l4 level1 lfo37;tab-stops:list .5in'>Another way is to select
|
||
"Customize" in the "Tools" menu, select the
|
||
"Toolbars" tab, enable "Build" instead of "Build
|
||
Minibar", and click on "Close". This will bring up a
|
||
toolbar which you can move aside the other permanent toolbars at the top
|
||
of the MSVC window. The advantage is that you now have an easy-to-reach
|
||
pop-up menu that will always show the currently selected active
|
||
configuration. Or, you can drag the project and configuration selections
|
||
and drop them on the menu bar for later selection.</li>
|
||
</ul>
|
||
|
||
<p>It is also possible to build each library individually, using the workspaces
|
||
in each respective directory. They have to be built in the following order: <br>
|
||
1. common <br>
|
||
2. i18n <br>
|
||
3. makedata (which invokes makeconv,
|
||
genrb, gencol, genccode etc.)<br>
|
||
4. ctestfw <br>
|
||
5. intltest and cintltst, if you
|
||
want to run the test suite. <br>
|
||
Regarding the test suite, please read the directions in <a
|
||
href="docs/intltest.html">docs/intltest.html</a> and <a
|
||
href="docs/cintltst.html">docs/cintltst.html</a> </p>
|
||
|
||
<h3>How to Install/Build on Unix</h3>
|
||
|
||
<p>There is a set of Makefiles for Unix that supports Linux w/gcc, Solaris
|
||
w/gcc and Workshop CC, AIX w/xlc and OS/390 with C++.</p>
|
||
|
||
<p>Building International Components for Unicode on Unix requires: </p>
|
||
|
||
<p>A UNIX C++ compiler, (gcc, cc, xlc_r, etc...) installed on the target
|
||
machine. A recent version of GNU make (3.7+). OS/390 gnu utilities
|
||
for both make (gmake) and zip (gzip/gunzip) can be found at the MKS web site at
|
||
<a href="http://www.mks.com">http://www.mks.com</a>. Please do a search
|
||
on "os/390".</p>
|
||
|
||
<p>The steps are: </p>
|
||
|
||
<ol start=1 type=1>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l2 level1 lfo40;tab-stops:list .5in'>Unzip the icuXXXX.tar (or
|
||
icuXXXX.tgz) file.</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l2 level1 lfo40;tab-stops:list .5in'>Before running the test programs
|
||
or samples, please set the environment variable <strong>ICU_DATA</strong>,
|
||
the full pathname of the data directory, to indicate where the locale data
|
||
files and conversion mapping tables are. If this variable is not
|
||
set, the default user data directory will be used.</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l2 level1 lfo40;tab-stops:list .5in'>Change directory to the
|
||
"icu/source".</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l2 level1 lfo40;tab-stops:list .5in'>If it is not already set,
|
||
please set the executable flag for the following files (by executing
|
||
'chmod +x' command): configure, install.sh and config.*, </li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l2 level1 lfo40;tab-stops:list .5in'>You also need to set other
|
||
environment variables for different build systems. Use this <a
|
||
href="docs/build_env.htm">table</a> or provided <a
|
||
href="source/runConfigureICU">script</a>.</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l2 level1 lfo40;tab-stops:list .5in'>Type "./configure"
|
||
or type "./configure --help" to print the available options.</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l2 level1 lfo40;tab-stops:list .5in'>Type "make" to compile
|
||
the libraries and all the data files. On OS/390, both IEEE binary
|
||
floating point and native S/390 hexadecimal floating point calculations
|
||
are supported. The default is to build with native floating-point
|
||
support. Please set the environment variable IEEE390=1 if you would
|
||
like to make the ICU DLLs with IEEE floating point support.</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l2 level1 lfo40;tab-stops:list .5in'>Optionally, type "make
|
||
check" to verify the test suite.</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l2 level1 lfo40;tab-stops:list .5in'>Type "Make
|
||
install" to install.</li>
|
||
</ol>
|
||
|
||
<p>It is also possible to build each library individually, using the Makefiles
|
||
in each respective directory. They have to be built in the following order: <br>
|
||
1. common <br>
|
||
2. i18n <br>
|
||
3. makeconv <br>
|
||
4. genrb<br>
|
||
5. gencol<br>
|
||
6. gentz<br>
|
||
7. genccode<br>
|
||
8. ctestfw <br>
|
||
9. intltest and cintltst, if you
|
||
want to run the test suite. <br>
|
||
Regarding the test suite, please read the directions in <a
|
||
href="docs/intltest.html">docs/intltest.html</a> and <a
|
||
href="docs/cintltst.html">docs/cintltst.html</a> </p>
|
||
|
||
<h1><a name=datahandling>How ICU handles data</a></h1>
|
||
|
||
<span style='mso-bookmark:datahandling'></span>
|
||
|
||
<h3><u>How to add a locale data file</u></h3>
|
||
|
||
<p>To add locale data files to International Components for Unicode do the
|
||
following: </p>
|
||
|
||
<p style='margin-right:.5in;margin-left:.5in'>1. Create a file containing the
|
||
key-value pairs which value you are overriding from the parent locale data
|
||
file. <br>
|
||
Make sure the filename is the locale ID with the extension
|
||
".txt". We recommend you copy parent file and change the values <br>
|
||
that need to be changed, remove all other key-pairs. Be sure
|
||
to update the locale ID key (the outmost brace) with <br>
|
||
the name of the locale id your a creating.</p>
|
||
|
||
<p style='margin-right:.5in;margin-left:.5in'>2. Name the file with locale ID
|
||
you are creating with a ".txt" at the end.</p>
|
||
|
||
<p style='margin-right:1.0in;margin-left:1.0in'>e.g.
|
||
fr_BF.txt <br>
|
||
Would create a locale that inherits all the key-value pairs from fr.txt.</p>
|
||
|
||
<p style='margin-right:.5in;margin-left:.5in'>3. Add the name of that file
|
||
(without the ".txt" extension) as a single line in
|
||
"index.txt" file in the default locale directory (icu/data/).</p>
|
||
|
||
<p style='margin-right:.5in;margin-left:.5in'>4. Regenerate the data DLL
|
||
file. Please see "<a href="#HowToInstall">How to Install</a>"
|
||
section for more details on how to verify the ICU release.</p>
|
||
|
||
<p><a name=addrbdatatoapp></a><b><u><span style='font-size:13.5pt'>How to add
|
||
resource bundle data to your application</span></u></b> </p>
|
||
|
||
<p>Adding resource bundle data to your application is quite simple: </p>
|
||
|
||
<p>Create resource bundle files with the right format and names in a directory
|
||
for resource bundles you create in your application directory tree.(for more
|
||
information of that format of these files see <a
|
||
href="../icuhtml/ResourceBundle.html#DOC.DOCU">resource bundle documentation</a>
|
||
or <a
|
||
href="http://www.ibm.com/java/education/international-unicode/unicodec.html">resource
|
||
bundle format)</a>. <br>
|
||
Please note that resource bundle tag names should contain only invariant 7-bit
|
||
ASCII characters (e.g. ones from the following set: A-Z, a-z, 0-9, <SP>,
|
||
", %, &, `, (, ), *, +, ,, -, ., /, :, ;, <, =, >, ?, _).<br>
|
||
Use that same directory name (absolute path) when instantiating a resource
|
||
bundle at run time.</p>
|
||
|
||
<h3><a name=WhereCollation></a><u>Where Collation Data is stored</u></h3>
|
||
|
||
<p>Collation data is stored in a single directory on a local disk. Each
|
||
locale's data is stored in a corresponding ASCII text file indicated by a
|
||
"CollationElements" tag . For instance, the data for de_CH is stored
|
||
with a tag "CollationElements" in a file named "de_CH.txt".
|
||
Reading the collation data from these files can be time-consuming, especially
|
||
for large pieces of data that occur in languages such as Japanese. For this
|
||
reason, the Collation Framework implements a second file format, a
|
||
performance-optimized, non-portable, binary format. These binary files are
|
||
generated automatically by the framework the first time a collation table is
|
||
parsed. They have names of the form "de_CH.col". Once the files are
|
||
generated by the framework, future loading of those collations occur from the
|
||
binary file, rather than the text file, at much higher speed. </p>
|
||
|
||
<p>In general, you don't have to do anything special with these files. They can
|
||
be generated directly by using the "gencol" tool. In addition,
|
||
they can also be generated and used automatically by the framework, without
|
||
intervention on your part. However, there are situations in which you will have
|
||
to regenerate them. To do so, you must manually delete the ".col"
|
||
files from your collation data directory and re-run the gencol tool.</p>
|
||
|
||
<p>You will need to regenerate your ".col" files in the following
|
||
circumstances: </p>
|
||
|
||
<ol start=1 type=1>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l0 level1 lfo43;tab-stops:list .5in'>You are moving your data to
|
||
another platform. Since the ".col" files are non-portable,
|
||
you must make sure they are regenerated.</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l0 level1 lfo43;tab-stops:list .5in'><b>DO NOT </b>copy them from
|
||
one platform to another.</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l0 level1 lfo43;tab-stops:list .5in'>You have changed the
|
||
"CollationElements" data in the locale's ".txt"
|
||
file. Note that if you change the default rules for some reason,
|
||
which underlie all collations, then you will have to rebuild ALL your
|
||
".col" files, since they all are merged with the default rule
|
||
set.</li>
|
||
</ol>
|
||
|
||
<h3><a name=CharsetConvert></a><u>Character Set Conversion Information</u></h3>
|
||
|
||
<p>The charset conversion library provides ways to convert simple text strings
|
||
(e.g., char*) such as ISO 8859-1 to and from Unicode. The objective is to
|
||
provide clean, simple, reliable, portable and adaptable data structures and
|
||
algorithms to support the International Components for Unicode's character
|
||
codeset Conversion APIs. The conversion data in the library originated from the
|
||
NLTC lab in IBM. The IBM character set conversion tables are publicly available
|
||
in the published IBM document called "CHARACTER DATA REPRESENTATION
|
||
ARCHITECTURE - REFERENCE AND REGISTRY". The character set conversion
|
||
library includes single-byte, double-byte and some UCS encodings to and from Unicode.
|
||
This document can be ordered through Mechanicsberg and it comes with 2 CD ROMs
|
||
which have machine-readable conversion tables on them. The license agreement is
|
||
included in International Components for Unicode agreement. </p>
|
||
|
||
<p>Click <a href="data/convrtrs.txt">here</a> to view converters implemented in
|
||
ICU. To see converters in action, please visit <a
|
||
href="http://oss.software.ibm.com/developerworks/opensource/icu/localeexplorer/?converter&"><span
|
||
style='color:black'>http://oss.software.ibm.com/developerworks/opensource/icu/localeexplorer/?converter&</span></a></p>
|
||
|
||
<p>To order the document in the US you can call 1-800-879-2755 and request
|
||
document number SC09-2190-00. The cost of this publication is $75.00 US not
|
||
including tax. </p>
|
||
|
||
<h3><a name=ProgrammingNotes></a><u>Programming Notes</u></h3>
|
||
|
||
<h4><u>Reporting Errors</u></h4>
|
||
|
||
<p>In order for the code to be portable, only a subset of the C++ language that
|
||
will compile correctly on even the oldest of C++ compilers (and also to provide
|
||
a usable C interface) can be used in the implementation, which means that
|
||
there's no use the C++ exception mechanism in the code. </p>
|
||
|
||
<p>After considering many alternatives, the decision was that every function
|
||
that can fail takes an error-code parameter by reference. This is always the
|
||
last parameter in the function’s parameter list. The ErrorCode type is defined
|
||
as a enumerated type. Zero represents no error, positive values represent
|
||
errors, and negative values represent non-error status codes. Macros were
|
||
provided, SUCCESS and FAILURE, to check the error code. </p>
|
||
|
||
<p>The ErrorCode parameter is an input-output parameter. Every function tests
|
||
the error code before doing anything else, and immediately exits if it’s a
|
||
FAILURE error code. If the function fails later on, it sets the error code
|
||
appropriately and exits without doing any other work (except, of course, any
|
||
cleanup it has to do). If the function encounters a non-error condition it
|
||
wants to signal (such as "encountered an unmapped character" in
|
||
transcoding), it sets the error code appropriately and continues. Otherwise,
|
||
the function leaves the error code unchanged. </p>
|
||
|
||
<p>Generally, only functions that don’t take an ErrorCode parameter, but call
|
||
functions that do, have to declare one. Almost all functions that take an
|
||
ErrorCode parameter and also call other functions that do merely have to
|
||
propagate the error code they were passed down to the functions they call.
|
||
Functions that declare a new ErrorCode parameter must initialize it to
|
||
ZERO_ERROR before calling any other functions. </p>
|
||
|
||
<p>The rationale here is to allow a function to call several functions (that
|
||
take error codes) in a row without having to check the error code after each
|
||
one. [A function usually will have to check the error code before doing any
|
||
other processing, however, since it is supposed to stop immediately after
|
||
receiving an error code.] Propagating the error-code parameter down the call
|
||
chain saves the programmer from having to declare one everywhere, and also
|
||
allows us to more closely mimic the C++ exception protocol. </p>
|
||
|
||
<h4><u>C Function and Data Type Naming</u></h4>
|
||
|
||
<p><b>Function names.</b> If a function is identical (or almost identical) to
|
||
an ANSI or POSIX function, we give it the same name and (as much as possible)
|
||
the same parameter list. A "u" is prepended onto the beginning of the
|
||
name. </p>
|
||
|
||
<p>For functions that exist prior to version 1.2.1, that the function name
|
||
should begin with a lower-case "u". After the "u" is a
|
||
short code identifying the subsystem it belongs to (e.g., "loc",
|
||
"rb", "cnv", "coll", etc.). This code is
|
||
separated from the actual function name by an underscore, and the actual
|
||
function name can be anything. For example, </p>
|
||
|
||
<pre style='margin-top:0in;margin-right:.5in;margin-bottom:0in;margin-left:
|
||
.5in;margin-bottom:.0001pt'><span style='font-size:7.5pt'>UChar* uloc_getLanguage(...);<o:p></o:p></span></pre><pre
|
||
style='margin-top:0in;margin-right:.5in;margin-bottom:0in;margin-left:.5in;
|
||
margin-bottom:.0001pt'><span style='font-size:7.5pt'>void uloc_setDefaultLocale(...);<o:p></o:p></span></pre><pre
|
||
style='margin-top:0in;margin-right:.5in;margin-bottom:0in;margin-left:.5in;
|
||
margin-bottom:.0001pt'><span style='font-size:7.5pt'>UChar* ures_getString(...);</span></pre>
|
||
|
||
<p><b>Struct and enum type names.</b> For structs and enum types, the rule is
|
||
that their names begin with a capital "U." There is no underscore for
|
||
struct names.</p>
|
||
|
||
<pre><span style='font-size:7.5pt'> UResourceBundle;<o:p></o:p></span></pre><pre><span
|
||
style='font-size:7.5pt'> UCollator;<o:p></o:p></span></pre><pre><span
|
||
style='font-size:7.5pt'><span style="mso-spacerun: yes"> </span>UCollationResult;</span></pre>
|
||
|
||
<p><b>Enum value names.</b> Enumeration values have names that begin with
|
||
"UXXX" where XXX stands for the name of the functional category.</p>
|
||
|
||
<pre style='margin-top:0in;margin-right:.5in;margin-bottom:0in;margin-left:
|
||
.5in;margin-bottom:.0001pt'><span style='font-size:7.5pt'>UNUM_DECIMAL;<o:p></o:p></span></pre><pre
|
||
style='margin-top:0in;margin-right:.5in;margin-bottom:0in;margin-left:.5in;
|
||
margin-bottom:.0001pt'><span style='font-size:7.5pt'>UCOL_GREATER;</span></pre>
|
||
|
||
<p><b>Macro names.</b> Macro names are in all caps, but there are currently no
|
||
other requirements. </p>
|
||
|
||
<p><b>Constant names.</b> Many constant names (constants defined with
|
||
"const", not macros defined with "#define" that are used as
|
||
constants) begin with a lowercase k, but this isn’t universally enforced. </p>
|
||
|
||
<h4><u>Preflighting and Overflow Handling</u></h4>
|
||
|
||
<p>In ICU's C APIs, the user needs to adhere to the following principles for
|
||
consistency across all functional categories: </p>
|
||
|
||
<ol start=1 type=1>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l8 level1 lfo46;tab-stops:list .5in'>All the Unicode string
|
||
processing should be expressed in terms of a UChar* buffer that is always
|
||
null terminated.</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l8 level1 lfo46;tab-stops:list .5in'>The APIs assume that the
|
||
input string parameters are statically allocated fix-sized character
|
||
buffers.</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l8 level1 lfo46;tab-stops:list .5in'>When the value a function is
|
||
going to return is already stored as a constant value in static space
|
||
(e.g., it’s coming from a fixed table, or is stored in a cache), the
|
||
function will just return the const UChar* pointer.</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l8 level1 lfo46;tab-stops:list .5in'>When the function can’t
|
||
return a UChar* to storage the user doesn’t have to delete, the caller
|
||
needs to pass in a pointer to a character buffer that the function can
|
||
fill with the result. This pointer needs to be accompanied by a int32_t
|
||
parameter that gives the size of the buffer.</li>
|
||
</ol>
|
||
|
||
<p>To find out how large the result buffer should be, ICU provides a <strong>preflighting</strong>
|
||
C interface. The interface works like this: </p>
|
||
|
||
<ol start=1 type=1>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l9 level1 lfo49;tab-stops:list .5in'>When using the "<b>preflighting</b>"
|
||
option: you need to pass the function a NULL pointer for the buffer
|
||
pointer, and the function returns the actual size of the result. You can
|
||
then choose to allocate a buffer of the correct size and re-run the
|
||
operation if you would like to.</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l9 level1 lfo49;tab-stops:list .5in'>After allocating a buffer of
|
||
some reasonable size on the stack and passes that to the function, if the
|
||
result can fit in that buffer, everything works fine. If the result
|
||
doesn’t fit, the function will return the actual size needed. You
|
||
can then allocate a buffer of the correct size on the heap and try calling
|
||
the same function again.</li>
|
||
<li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;
|
||
mso-list:l9 level1 lfo49;tab-stops:list .5in'>Now you have created a
|
||
buffer of some reasonable size on the stack and passes it to the
|
||
function. If you don't care about the completeness of the result and
|
||
the allocated buffer is too small, you can continue on using the truncated
|
||
result.</li>
|
||
</ol>
|
||
|
||
<p>The following three options demonstrates how to use the preflighting
|
||
interface, </p>
|
||
|
||
<pre style='margin-top:0in;margin-right:.5in;margin-bottom:0in;margin-left:
|
||
.5in;margin-bottom:.0001pt'><span style='font-size:7.5pt'>/** <o:p></o:p></span></pre><pre
|
||
style='margin-top:0in;margin-right:.5in;margin-bottom:0in;margin-left:.5in;
|
||
margin-bottom:.0001pt'><span style='font-size:7.5pt'> * @param result is a pointer to where the actual result will be.<o:p></o:p></span></pre><pre
|
||
style='margin-top:0in;margin-right:.5in;margin-bottom:0in;margin-left:.5in;
|
||
margin-bottom:.0001pt'><span style='font-size:7.5pt'> * @param maxResultSize is the number of characters the buffer pointed to be result has room for. <o:p></o:p></span></pre><pre
|
||
style='margin-top:0in;margin-right:.5in;margin-bottom:0in;margin-left:.5in;
|
||
margin-bottom:.0001pt'><span style='font-size:7.5pt'> * @return The actual length of the result (counting the terminating null)<o:p></o:p></span></pre><pre
|
||
style='margin-top:0in;margin-right:.5in;margin-bottom:0in;margin-left:.5in;
|
||
margin-bottom:.0001pt'><span style='font-size:7.5pt'> */<o:p></o:p></span></pre><pre
|
||
style='margin-top:0in;margin-right:.5in;margin-bottom:0in;margin-left:.5in;
|
||
margin-bottom:.0001pt'><span style='font-size:7.5pt'>int32_t doSomething( /* input params */, UChar* result,<o:p></o:p></span></pre><pre
|
||
style='margin-top:0in;margin-right:.5in;margin-bottom:0in;margin-left:.5in;
|
||
margin-bottom:.0001pt'><span style='font-size:7.5pt'> int32_t maxResultSize, UErrorCode* err);</span></pre>
|
||
|
||
<p>In this sample, if the actual result doesn’t fit in the space available in <span
|
||
style='font-size:10.0pt;font-family:"Courier New"'>maxResultSize</span>, this
|
||
function returns the amount of space necessary to hold the result, and result
|
||
holds as many characters of the actual result as possible. If you don’t care
|
||
about this, no further action is necessary. If you <i>do </i>care about the
|
||
truncated characters, you can then allocate a buffer on the heap of the size
|
||
specified by the return value and call the function again, passing <i>that </i>buffer’s
|
||
address for result. </p>
|
||
|
||
<p>All preflighting functions have a fill-in <span style='font-size:10.0pt;
|
||
font-family:"Courier New"'>ErrorCode</span> parameter (and follow the normal <span
|
||
style='font-size:10.0pt;font-family:"Courier New"'>ErrorCode</span> rules),
|
||
even if they are not currently doing so. Buffer overflow would be treated as a
|
||
FAILURE error condition, but would <i>not</i> be reported when the caller
|
||
passes in NULL for <span style='font-size:10.0pt;font-family:"Courier New"'>actualResultSize</span>
|
||
(presumably, a NULL for this parameter means the client doesn’t care if he got
|
||
a buffer overflow). All other failing error conditions will overwrite the
|
||
"buffer overflow" error, e.g. <span style='font-family:"Courier New"'>MISSING_RESOURCE_ERROR</span>
|
||
etc..</p>
|
||
|
||
<h4><u>Arrays as return types</u></h4>
|
||
|
||
<p>Returning an array of strings is fairly easy in C++, but very hard in C.
|
||
Instead of returning the array pointer directly, we opted for an iterative
|
||
interface instead: split the function into two functions. One returns the
|
||
number of elements in the array, and the other one returns a single specified
|
||
element from the array.</p>
|
||
|
||
<pre style='margin-top:0in;margin-right:.5in;margin-bottom:0in;margin-left:
|
||
.5in;margin-bottom:.0001pt'><span style='font-size:7.5pt'>int32_t countArrayItems(/* params */);<o:p></o:p></span></pre><pre
|
||
style='margin-top:0in;margin-right:.5in;margin-bottom:0in;margin-left:.5in;
|
||
margin-bottom:.0001pt'><span style='font-size:7.5pt'>int32_t getArrayElement(int32_t elementIndex, /* other params */,<o:p></o:p></span></pre><pre
|
||
style='margin-top:0in;margin-right:.5in;margin-bottom:0in;margin-left:.5in;
|
||
margin-bottom:.0001pt'><span style='font-size:7.5pt'> UChar* result, int32_t maxResultSize, UErrorCode* err);</span></pre>
|
||
|
||
<p>In this case, iterating across all the elements in the array would amount to
|
||
a call to the count() function followed by multiple calls to the getElement()
|
||
function. </p>
|
||
|
||
<pre style='margin-top:0in;margin-right:.5in;margin-bottom:0in;margin-left:
|
||
.5in;margin-bottom:.0001pt'><span style='font-size:7.5pt'>for (i = 0; i < countArrayItems(...); i++) {<o:p></o:p></span></pre><pre
|
||
style='margin-top:0in;margin-right:.5in;margin-bottom:0in;margin-left:.5in;
|
||
margin-bottom:.0001pt'><span style='font-size:7.5pt'> UChar element[50];<o:p></o:p></span></pre><pre
|
||
style='margin-top:0in;margin-right:.5in;margin-bottom:0in;margin-left:.5in;
|
||
margin-bottom:.0001pt'><span style='font-size:7.5pt'> getArrayItem(i, ..., element, 50, &err);<o:p></o:p></span></pre><pre
|
||
style='margin-top:0in;margin-right:.5in;margin-bottom:0in;margin-left:.5in;
|
||
margin-bottom:.0001pt'><span style='font-size:7.5pt'> /* do something with element */<o:p></o:p></span></pre><pre
|
||
style='margin-top:0in;margin-right:.5in;margin-bottom:0in;margin-left:.5in;
|
||
margin-bottom:.0001pt'><span style='font-size:7.5pt'>}</span></pre>
|
||
|
||
<p>In the case of the resource bundle <span style='font-family:"Courier New"'>ures_XXXX</span>
|
||
functions returning 2-dimensional arrays, the getElement() function takes both
|
||
x and y coordinates for the desired element, and the count() function returns
|
||
the number of arrays (x axis). Since the size of each array element in
|
||
the resource 2-D arrays should always be the same, this provides an easy-to-use
|
||
C interface. </p>
|
||
|
||
<pre style='margin-top:0in;margin-right:.5in;margin-bottom:0in;margin-left:
|
||
.5in;margin-bottom:.0001pt'><span style='font-size:7.5pt'>void countArrayItems(int32_t* rows, int32_t* columns,<o:p></o:p></span></pre><pre
|
||
style='margin-top:0in;margin-right:.5in;margin-bottom:0in;margin-left:.5in;
|
||
margin-bottom:.0001pt'><span style='font-size:7.5pt'> /* other params */);<o:p></o:p></span></pre><pre
|
||
style='margin-top:0in;margin-right:.5in;margin-bottom:0in;margin-left:.5in;
|
||
margin-bottom:.0001pt'><span style='font-size:7.5pt'><![if !supportEmptyParas]> <![endif]><o:p></o:p></span></pre><pre
|
||
style='margin-top:0in;margin-right:.5in;margin-bottom:0in;margin-left:.5in;
|
||
margin-bottom:.0001pt'><span style='font-size:7.5pt'>int32_t get2dArrayElement(int32_t rowIndex, <o:p></o:p></span></pre><pre
|
||
style='margin-top:0in;margin-right:.5in;margin-bottom:0in;margin-left:.5in;
|
||
margin-bottom:.0001pt'><span style='font-size:7.5pt'> int32_t colIndex,<o:p></o:p></span></pre><pre
|
||
style='margin-top:0in;margin-right:.5in;margin-bottom:0in;margin-left:.5in;
|
||
margin-bottom:.0001pt'><span style='font-size:7.5pt'> /* other params */, <o:p></o:p></span></pre><pre
|
||
style='margin-top:0in;margin-right:.5in;margin-bottom:0in;margin-left:.5in;
|
||
margin-bottom:.0001pt'><span style='font-size:7.5pt'> UChar* result, <o:p></o:p></span></pre><pre
|
||
style='margin-top:0in;margin-right:.5in;margin-bottom:0in;margin-left:.5in;
|
||
margin-bottom:.0001pt'><span style='font-size:7.5pt'> int32_t maxResultSize,<o:p></o:p></span></pre><pre
|
||
style='margin-top:0in;margin-right:.5in;margin-bottom:0in;margin-left:.5in;
|
||
margin-bottom:.0001pt'><span style='font-size:7.5pt'> UErrorCode* err);</span></pre>
|
||
|
||
<h3><a name=WhereToFindMore></a><u>Where to Find More Information</u></h3>
|
||
|
||
<p><a href="http://oss.software.ibm.com/icu">http://oss.software.ibm.com/icu</a>
|
||
is a pointer to general information about the International Components for
|
||
Unicode. </p>
|
||
|
||
<p><a href="docs/udata.html">docs/udata.html</a> is a raw draft of ICU data
|
||
handling.</p>
|
||
|
||
<p><a href="../icuhtml/aindex.html">html/aindex.html</a> is an alphabetical
|
||
index to detailed API documentation. <br>
|
||
<a href="../icuhtml/HIERjava.html">html/HIERjava.html</a> is a hierarchical
|
||
index to detailed API documentation. </p>
|
||
|
||
<p><a href="docs/collate.html">docs/collate.html</a> is an overview to
|
||
Collation. </p>
|
||
|
||
<p><a href="docs/BreakIterator.html">docs/BreakIterator.html</a> is a diagram
|
||
showing how BreakIterator processes text elements. </p>
|
||
|
||
<p><a href="http://www.ibm.com/unicode">http://www.ibm.com/unicode</a> is a
|
||
pointer to information on how to make applications global. <br>
|
||
</p>
|
||
|
||
<h3><a name=SubmittingComments></a><u>Submitting Comments, Requesting Features
|
||
and Reporting Bugs</u></h3>
|
||
|
||
<p>To submit comments, request features and report bugs, please contact
|
||
us. While we are not able to respond individually to each comment, we do
|
||
review all comments. Send Internet email to <a
|
||
href="mailto:icu@oss.software.ibm.com">icu@oss.software.ibm.com</a>.</p>
|
||
|
||
|
||
<div class=MsoNormal align=center style='text-align:center'>
|
||
|
||
<hr size=2 width="100%" align=center>
|
||
|
||
</div>
|
||
|
||
|
||
<p>Copyright <20> 1997-2000 International Business Machines Corporation and
|
||
others. All Rights Reserved.<br>
|
||
IBM Center for Java Technology Silicon Valley, <br>
|
||
10275 N De Anza Blvd., Cupertino, CA 95014 <br>
|
||
All rights reserved. </p>
|
||
|
||
|
||
<div class=MsoNormal align=center style='text-align:center'>
|
||
|
||
<hr size=2 width="100%" align=center>
|
||
|
||
</div>
|
||
|
||
</div>
|
||
|
||
</body>
|
||
|
||
</html>
|