ICU-434 update for library/path changes, added more examples

X-SVN-Rev: 2890
This commit is contained in:
Steven R. Loomis 2000-11-09 16:11:21 +00:00
parent d70dac9d40
commit 93137aa35d
3 changed files with 250 additions and 21 deletions

View File

@ -1,5 +1,5 @@
# Copyright (c) 2000 IBM, Inc. and others # Copyright (c) 2000 IBM, Inc. and others
# conversion sample code $Revision: 1.2 $ # conversion sample code $Revision: 1.3 $
# Usage: # Usage:
@ -9,7 +9,7 @@
# - do 'make install' of icu # - do 'make install' of icu
# #
# - change the following line to point to the $(prefix) that # - change the following line to point to the $(prefix) that
# was used (will look for $(prefix)/share/icu/Makefile.inc ) # was used (will look for $(prefix)/lib/icu/Makefile.inc )
# OR # OR
# set the variable ICU_PREFIX to point at $(prefix) # set the variable ICU_PREFIX to point at $(prefix)
# #
@ -18,9 +18,9 @@
ICU_DEFAULT_PREFIX=/home/srl/II ICU_DEFAULT_PREFIX=/home/srl/II
ifeq ($(strip $(ICU_PREFIX)),) ifeq ($(strip $(ICU_PREFIX)),)
ICU_INC=$(ICU_DEFAULT_PREFIX)/share/icu/Makefile.inc ICU_INC=$(ICU_DEFAULT_PREFIX)/lib/icu/Makefile.inc
else else
ICU_INC=$(ICU_PREFIX)/share/icu/Makefile.inc ICU_INC=$(ICU_PREFIX)/lib/icu/Makefile.inc
endif endif
ICUPATH= ICUPATH=

View File

@ -175,8 +175,8 @@ void printUChar(UChar32 ch32)
} }
/******************************************************************* /*******************************************************************
Very simple C++ sample to convert the word 'Moscow' in Russian, followed Very simple C++ sample to convert the word 'Moscow' in Russian in Unicode,
by an exclamation mark (!) into the KOI8-R Russian code page. followed by an exclamation mark (!) into the KOI8-R Russian code page.
This example first creates a UnicodeString out of the Unicode chars. This example first creates a UnicodeString out of the Unicode chars.
@ -233,7 +233,8 @@ UErrorCode convsample_01()
/****************************************************** /******************************************************
Similar sample to the preceding one. Similar sample to the preceding one. Converting FROM unicode
to koi8-r.
You must call ucnv_close to clean up the memory used by the You must call ucnv_close to clean up the memory used by the
converter. converter.
@ -423,6 +424,137 @@ UErrorCode convsample_05()
} }
#undef BUFFERSIZE #undef BUFFERSIZE
#define BUFFERSIZE 1024
typedef struct
{
UChar32 codepoint;
uint32_t frequency;
} CharFreqInfo;
UErrorCode convsample_06()
{
printf("\n\n==============================================\n"
"Sample 06: C: frequency distribution of letters in a UTF-8 document\n");
FILE *f;
int32_t count;
char inBuf[BUFFERSIZE];
const char *source;
const char *sourceLimit;
UChar *uBuf;
int32_t uBufSize = 0;
UConverter *conv;
UErrorCode status = U_ZERO_ERROR;
uint32_t letters=0, total=0;
CharFreqInfo *info;
UChar32 charCount = 0x10000; /* increase this if you want to handle non bmp.. todo: automatically bump it.. */
UChar32 p;
uint32_t ie = 0;
uint32_t gh = 0;
UChar32 l = 0;
f = fopen("data06.ut8", "r");
if(!f)
{
fprintf(stderr, "Couldn't open file 'data06.ut8' (UTF-8 data file).\n");
return U_FILE_ACCESS_ERROR;
}
info = (CharFreqInfo*)malloc(sizeof(CharFreqInfo) * charCount);
if(!info)
{
fprintf(stderr, " Couldn't allocate %d bytes for freq counter\n", sizeof(CharFreqInfo)*charCount);
}
/* reset frequencies */
for(p=0;p<charCount;p++)
{
info[p].codepoint = p;
info[p].frequency = 0;
}
// **************************** START SAMPLE *******************
conv = ucnv_open("utf-8", &status);
assert(U_SUCCESS(status));
uBufSize = (BUFFERSIZE/ucnv_getMinCharSize(conv));
printf("input bytes %d / min chars %d = %d UChars\n",
BUFFERSIZE, ucnv_getMinCharSize(conv), uBufSize);
uBuf = (UChar*)malloc(uBufSize * sizeof(UChar));
assert(uBuf!=NULL);
// grab another buffer's worth
while((!feof(f)) &&
((count=fread(inBuf, 1, BUFFERSIZE , f)) > 0) )
{
// Convert bytes to unicode
source = inBuf;
sourceLimit = inBuf + count;
while(source < sourceLimit)
{
p = ucnv_getNextUChar(conv, &source, sourceLimit, &status);
if(U_FAILURE(status))
{
fprintf(stderr, "%s @ %d\n", u_errorName(status), total);
status = U_ZERO_ERROR;
continue;
}
U_ASSERT(status);
total++;
if(u_isalpha(p))
letters++;
if((u_tolower(l) == 'i') && (u_tolower(p) == 'e'))
ie++;
if((u_tolower(l) == 'g') && (u_tolower(p) == 0x0127))
gh++;
if(p>charCount)
{
fprintf(stderr, "U+%06X: oh.., we only handle BMP characters so far.. redesign!\n", p);
return U_UNSUPPORTED_ERROR;
}
info[p].frequency++;
l = p;
}
}
fclose(f);
ucnv_close(conv);
printf("%d letters out of %d total UChars.\n", letters, total);
printf("%d ie digraphs, %d gh digraphs.\n", ie, gh);
// now, we could sort it..
// qsort(info, charCount, sizeof(info[0]), charfreq_compare);
for(p=0;p<charCount;p++)
{
if(info[p].frequency)
{
printf("% 5d U+%06X ", info[p].frequency, p);
if(p <= 0xFFFF)
{
prettyPrintUChar((UChar)p);
}
printf("\n");
}
}
free(info);
// ***************************** END SAMPLE ********************
printf("\n");
return U_ZERO_ERROR;
}
#undef BUFFERSIZE
/******************************************************************* /*******************************************************************
Very simple C++ sample to convert a string into Unicode from SJIS Very simple C++ sample to convert a string into Unicode from SJIS
@ -799,6 +931,7 @@ UErrorCode convsample_41()
assert(U_SUCCESS(status)); assert(U_SUCCESS(status));
uBufSize = (BUFFERSIZE/conv->getMinBytesPerChar()); uBufSize = (BUFFERSIZE/conv->getMinBytesPerChar());
//uBufSize = 4;
printf("input bytes %d / min chars %d = %d UChars\n", printf("input bytes %d / min chars %d = %d UChars\n",
BUFFERSIZE, conv->getMinBytesPerChar(), uBufSize); BUFFERSIZE, conv->getMinBytesPerChar(), uBufSize);
uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)); uBuf = (UChar*)malloc(uBufSize * sizeof(UChar));
@ -843,6 +976,10 @@ UErrorCode convsample_41()
assert(fwrite(uBuf, sizeof(uBuf[0]), (target-uBuf), out) == assert(fwrite(uBuf, sizeof(uBuf[0]), (target-uBuf), out) ==
(size_t)(target-uBuf)); (size_t)(target-uBuf));
total += (target-uBuf); total += (target-uBuf);
fprintf(stderr, "srcLeft=%d, wrote %d, err %s\n",
sourceLimit - source, target-uBuf, u_errorName(status));
} while (source < sourceLimit); // while simply out of space } while (source < sourceLimit); // while simply out of space
} }
@ -863,7 +1000,7 @@ UErrorCode convsample_41()
// 46- C, UTF16 -> latin2 [data41.utf16 -> data46.out] // 46- C, UTF16 -> latin2 [data41.utf16 -> data46.out]
#define BUFFERSIZE 23 /* make it interesting :) */ #define BUFFERSIZE 24 /* make it interesting :) */
UErrorCode convsample_46() UErrorCode convsample_46()
{ {
@ -1075,8 +1212,9 @@ int main()
convsample_01(); // C++, u->koi8r, conv convsample_01(); // C++, u->koi8r, conv
convsample_02(); // C , u->koi8r, conv convsample_02(); // C , u->koi8r, conv
convsample_03(); // C, iterate convsample_03(); // C, iterate
// // convsample_04(); /* not written yet */ // convsample_04(); /* not written yet */
convsample_05(); // C, utf8->u, getNextUChar convsample_05(); // C, utf8->u, getNextUChar
convsample_06(); // C freq counter thingy
convsample_11(); // C++, sjis->u, conv convsample_11(); // C++, sjis->u, conv
convsample_12(); // C, sjis->u, conv convsample_12(); // C, sjis->u, conv
convsample_13(); // C, big5->u, getNextU convsample_13(); // C, big5->u, getNextU

View File

@ -0,0 +1,91 @@

// *******************************************************************************
// *
// * Copyright (C) 1997-2000, International Business Machines
// * Corporation and others. All Rights Reserved.
// *
// *******************************************************************************
fa {
Version { "x0.0" }
DayAbbreviations {
"ی∔",
"د∔",
"س∔",
"چ∔",
"پ∔",
"ج∔",
"ش∔",
}
DayNames {
"یی‌شنبه",
"دوشنبه",
"سه‌شنبه",
"چهارشنبه",
"پنج‌شنبه",
"جمعه",
"شنبه",
}
MonthAbbreviations {
"ژان",
"فور",
"مار",
"آور",
"مـه",
"ژون",
"ژوی",
"اوت",
"سپت",
"اكت",
"نوا",
"دسا",
"",
}
MonthNames {
"ژانویه",
"فورویه",
"مارس",
"آوریل",
"مه",
"ژوئن",
"ژوئیه",
"اوت",
"سپتامبر",
"اكتبر",
"نوامبر",
"دسامبر",
"",
}
NumberElements {
"٫",
"٬",
";",
"%",
"0",
"#",
"-",
"E",
"‰",
"∞",
"<22>",
}
NumberPatterns {
"#,##0.###;-#,##0.###",
"#,##0.#¤;#,##0.#- ¤",
"#,##0%",
}
LocaleID{ "29"}
// /**************** Info Below is needed ****************/
// CollationElements{}
// Countries{}
// Eras{}
// Languages{}
// ShortLanguage { }
// localPatternChars{}
//
}