ICU-434 update for library/path changes, added more examples
X-SVN-Rev: 2890
This commit is contained in:
parent
d70dac9d40
commit
93137aa35d
@ -1,5 +1,5 @@
|
||||
# Copyright (c) 2000 IBM, Inc. and others
|
||||
# conversion sample code $Revision: 1.2 $
|
||||
# conversion sample code $Revision: 1.3 $
|
||||
|
||||
|
||||
# Usage:
|
||||
@ -9,7 +9,7 @@
|
||||
# - do 'make install' of icu
|
||||
#
|
||||
# - change the following line to point to the $(prefix) that
|
||||
# was used (will look for $(prefix)/share/icu/Makefile.inc )
|
||||
# was used (will look for $(prefix)/lib/icu/Makefile.inc )
|
||||
# OR
|
||||
# set the variable ICU_PREFIX to point at $(prefix)
|
||||
#
|
||||
@ -18,9 +18,9 @@
|
||||
ICU_DEFAULT_PREFIX=/home/srl/II
|
||||
|
||||
ifeq ($(strip $(ICU_PREFIX)),)
|
||||
ICU_INC=$(ICU_DEFAULT_PREFIX)/share/icu/Makefile.inc
|
||||
ICU_INC=$(ICU_DEFAULT_PREFIX)/lib/icu/Makefile.inc
|
||||
else
|
||||
ICU_INC=$(ICU_PREFIX)/share/icu/Makefile.inc
|
||||
ICU_INC=$(ICU_PREFIX)/lib/icu/Makefile.inc
|
||||
endif
|
||||
ICUPATH=
|
||||
|
||||
|
@ -175,8 +175,8 @@ void printUChar(UChar32 ch32)
|
||||
}
|
||||
|
||||
/*******************************************************************
|
||||
Very simple C++ sample to convert the word 'Moscow' in Russian, followed
|
||||
by an exclamation mark (!) into the KOI8-R Russian code page.
|
||||
Very simple C++ sample to convert the word 'Moscow' in Russian in Unicode,
|
||||
followed by an exclamation mark (!) into the KOI8-R Russian code page.
|
||||
|
||||
This example first creates a UnicodeString out of the Unicode chars.
|
||||
|
||||
@ -233,7 +233,8 @@ UErrorCode convsample_01()
|
||||
|
||||
|
||||
/******************************************************
|
||||
Similar sample to the preceding one.
|
||||
Similar sample to the preceding one. Converting FROM unicode
|
||||
to koi8-r.
|
||||
You must call ucnv_close to clean up the memory used by the
|
||||
converter.
|
||||
|
||||
@ -423,6 +424,137 @@ UErrorCode convsample_05()
|
||||
}
|
||||
#undef BUFFERSIZE
|
||||
|
||||
#define BUFFERSIZE 1024
|
||||
typedef struct
|
||||
{
|
||||
UChar32 codepoint;
|
||||
uint32_t frequency;
|
||||
} CharFreqInfo;
|
||||
|
||||
UErrorCode convsample_06()
|
||||
{
|
||||
printf("\n\n==============================================\n"
|
||||
"Sample 06: C: frequency distribution of letters in a UTF-8 document\n");
|
||||
|
||||
FILE *f;
|
||||
int32_t count;
|
||||
char inBuf[BUFFERSIZE];
|
||||
const char *source;
|
||||
const char *sourceLimit;
|
||||
UChar *uBuf;
|
||||
int32_t uBufSize = 0;
|
||||
UConverter *conv;
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
uint32_t letters=0, total=0;
|
||||
|
||||
CharFreqInfo *info;
|
||||
UChar32 charCount = 0x10000; /* increase this if you want to handle non bmp.. todo: automatically bump it.. */
|
||||
UChar32 p;
|
||||
|
||||
uint32_t ie = 0;
|
||||
uint32_t gh = 0;
|
||||
UChar32 l = 0;
|
||||
|
||||
f = fopen("data06.ut8", "r");
|
||||
if(!f)
|
||||
{
|
||||
fprintf(stderr, "Couldn't open file 'data06.ut8' (UTF-8 data file).\n");
|
||||
return U_FILE_ACCESS_ERROR;
|
||||
}
|
||||
|
||||
info = (CharFreqInfo*)malloc(sizeof(CharFreqInfo) * charCount);
|
||||
if(!info)
|
||||
{
|
||||
fprintf(stderr, " Couldn't allocate %d bytes for freq counter\n", sizeof(CharFreqInfo)*charCount);
|
||||
}
|
||||
|
||||
/* reset frequencies */
|
||||
for(p=0;p<charCount;p++)
|
||||
{
|
||||
info[p].codepoint = p;
|
||||
info[p].frequency = 0;
|
||||
}
|
||||
|
||||
// **************************** START SAMPLE *******************
|
||||
conv = ucnv_open("utf-8", &status);
|
||||
assert(U_SUCCESS(status));
|
||||
|
||||
uBufSize = (BUFFERSIZE/ucnv_getMinCharSize(conv));
|
||||
printf("input bytes %d / min chars %d = %d UChars\n",
|
||||
BUFFERSIZE, ucnv_getMinCharSize(conv), uBufSize);
|
||||
uBuf = (UChar*)malloc(uBufSize * sizeof(UChar));
|
||||
assert(uBuf!=NULL);
|
||||
|
||||
// grab another buffer's worth
|
||||
while((!feof(f)) &&
|
||||
((count=fread(inBuf, 1, BUFFERSIZE , f)) > 0) )
|
||||
{
|
||||
// Convert bytes to unicode
|
||||
source = inBuf;
|
||||
sourceLimit = inBuf + count;
|
||||
|
||||
while(source < sourceLimit)
|
||||
{
|
||||
p = ucnv_getNextUChar(conv, &source, sourceLimit, &status);
|
||||
if(U_FAILURE(status))
|
||||
{
|
||||
fprintf(stderr, "%s @ %d\n", u_errorName(status), total);
|
||||
status = U_ZERO_ERROR;
|
||||
continue;
|
||||
}
|
||||
U_ASSERT(status);
|
||||
total++;
|
||||
|
||||
if(u_isalpha(p))
|
||||
letters++;
|
||||
|
||||
if((u_tolower(l) == 'i') && (u_tolower(p) == 'e'))
|
||||
ie++;
|
||||
|
||||
if((u_tolower(l) == 'g') && (u_tolower(p) == 0x0127))
|
||||
gh++;
|
||||
|
||||
if(p>charCount)
|
||||
{
|
||||
fprintf(stderr, "U+%06X: oh.., we only handle BMP characters so far.. redesign!\n", p);
|
||||
return U_UNSUPPORTED_ERROR;
|
||||
}
|
||||
info[p].frequency++;
|
||||
l = p;
|
||||
}
|
||||
}
|
||||
|
||||
fclose(f);
|
||||
ucnv_close(conv);
|
||||
|
||||
printf("%d letters out of %d total UChars.\n", letters, total);
|
||||
printf("%d ie digraphs, %d gh digraphs.\n", ie, gh);
|
||||
|
||||
// now, we could sort it..
|
||||
|
||||
// qsort(info, charCount, sizeof(info[0]), charfreq_compare);
|
||||
|
||||
for(p=0;p<charCount;p++)
|
||||
{
|
||||
if(info[p].frequency)
|
||||
{
|
||||
printf("% 5d U+%06X ", info[p].frequency, p);
|
||||
if(p <= 0xFFFF)
|
||||
{
|
||||
prettyPrintUChar((UChar)p);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
free(info);
|
||||
// ***************************** END SAMPLE ********************
|
||||
|
||||
printf("\n");
|
||||
|
||||
return U_ZERO_ERROR;
|
||||
}
|
||||
#undef BUFFERSIZE
|
||||
|
||||
|
||||
/*******************************************************************
|
||||
Very simple C++ sample to convert a string into Unicode from SJIS
|
||||
@ -799,6 +931,7 @@ UErrorCode convsample_41()
|
||||
assert(U_SUCCESS(status));
|
||||
|
||||
uBufSize = (BUFFERSIZE/conv->getMinBytesPerChar());
|
||||
//uBufSize = 4;
|
||||
printf("input bytes %d / min chars %d = %d UChars\n",
|
||||
BUFFERSIZE, conv->getMinBytesPerChar(), uBufSize);
|
||||
uBuf = (UChar*)malloc(uBufSize * sizeof(UChar));
|
||||
@ -843,6 +976,10 @@ UErrorCode convsample_41()
|
||||
assert(fwrite(uBuf, sizeof(uBuf[0]), (target-uBuf), out) ==
|
||||
(size_t)(target-uBuf));
|
||||
total += (target-uBuf);
|
||||
|
||||
fprintf(stderr, "srcLeft=%d, wrote %d, err %s\n",
|
||||
sourceLimit - source, target-uBuf, u_errorName(status));
|
||||
|
||||
} while (source < sourceLimit); // while simply out of space
|
||||
}
|
||||
|
||||
@ -863,7 +1000,7 @@ UErrorCode convsample_41()
|
||||
|
||||
// 46- C, UTF16 -> latin2 [data41.utf16 -> data46.out]
|
||||
|
||||
#define BUFFERSIZE 23 /* make it interesting :) */
|
||||
#define BUFFERSIZE 24 /* make it interesting :) */
|
||||
|
||||
UErrorCode convsample_46()
|
||||
{
|
||||
@ -1072,22 +1209,23 @@ int main()
|
||||
|
||||
printf("Default Converter=%s\n", ucnv_getDefaultName() );
|
||||
|
||||
convsample_01(); // C++, u->koi8r, conv
|
||||
convsample_02(); // C , u->koi8r, conv
|
||||
convsample_03(); // C, iterate
|
||||
// // convsample_04(); /* not written yet */
|
||||
convsample_05(); // C, utf8->u, getNextUChar
|
||||
convsample_11(); // C++, sjis->u, conv
|
||||
convsample_12(); // C, sjis->u, conv
|
||||
convsample_13(); // C, big5->u, getNextU
|
||||
convsample_01(); // C++, u->koi8r, conv
|
||||
convsample_02(); // C , u->koi8r, conv
|
||||
convsample_03(); // C, iterate
|
||||
// convsample_04(); /* not written yet */
|
||||
convsample_05(); // C, utf8->u, getNextUChar
|
||||
convsample_06(); // C freq counter thingy
|
||||
convsample_11(); // C++, sjis->u, conv
|
||||
convsample_12(); // C, sjis->u, conv
|
||||
convsample_13(); // C, big5->u, getNextU
|
||||
|
||||
convsample_20(); // C, callback
|
||||
convsample_20(); // C, callback
|
||||
|
||||
convsample_40(); // C, cp37 -> UTF16 [data02.bin -> data40.utf16]
|
||||
convsample_41(); // C++, cp37 -> UTF16 [data02.bin -> data41.utf16]
|
||||
convsample_40(); // C, cp37 -> UTF16 [data02.bin -> data40.utf16]
|
||||
convsample_41(); // C++, cp37 -> UTF16 [data02.bin -> data41.utf16]
|
||||
|
||||
convsample_46(); // C, UTF16 -> latin3 [data41.utf16 -> data46.out]
|
||||
convsample_47(); // C++,UTF16 -> latin3 [data40.utf16 -> data47.out]
|
||||
convsample_46(); // C, UTF16 -> latin3 [data41.utf16 -> data46.out]
|
||||
convsample_47(); // C++,UTF16 -> latin3 [data40.utf16 -> data47.out]
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
91
icu4c/source/samples/ucnv/data06.ut8
Normal file
91
icu4c/source/samples/ucnv/data06.ut8
Normal file
@ -0,0 +1,91 @@
|
||||
|
||||
// *******************************************************************************
|
||||
// *
|
||||
// * Copyright (C) 1997-2000, International Business Machines
|
||||
// * Corporation and others. All Rights Reserved.
|
||||
// *
|
||||
// *******************************************************************************
|
||||
|
||||
|
||||
|
||||
fa {
|
||||
|
||||
Version { "x0.0" }
|
||||
|
||||
|
||||
DayAbbreviations {
|
||||
"ی∔",
|
||||
"د∔",
|
||||
"س∔",
|
||||
"چ∔",
|
||||
"پ∔",
|
||||
"ج∔",
|
||||
"ش∔",
|
||||
}
|
||||
DayNames {
|
||||
"ییشنبه",
|
||||
"دوشنبه",
|
||||
"سهشنبه",
|
||||
"چهارشنبه",
|
||||
"پنجشنبه",
|
||||
"جمعه",
|
||||
"شنبه",
|
||||
}
|
||||
MonthAbbreviations {
|
||||
"ژان",
|
||||
"فور",
|
||||
"مار",
|
||||
"آور",
|
||||
"مـه",
|
||||
"ژون",
|
||||
"ژوی",
|
||||
"اوت",
|
||||
"سپت",
|
||||
"اكت",
|
||||
"نوا",
|
||||
"دسا",
|
||||
"",
|
||||
}
|
||||
MonthNames {
|
||||
"ژانویه",
|
||||
"فورویه",
|
||||
"مارس",
|
||||
"آوریل",
|
||||
"مه",
|
||||
"ژوئن",
|
||||
"ژوئیه",
|
||||
"اوت",
|
||||
"سپتامبر",
|
||||
"اكتبر",
|
||||
"نوامبر",
|
||||
"دسامبر",
|
||||
"",
|
||||
}
|
||||
NumberElements {
|
||||
"٫",
|
||||
"٬",
|
||||
";",
|
||||
"%",
|
||||
"0",
|
||||
"#",
|
||||
"-",
|
||||
"E",
|
||||
"‰",
|
||||
"∞",
|
||||
"<22>",
|
||||
}
|
||||
NumberPatterns {
|
||||
"#,##0.###;-#,##0.###",
|
||||
"#,##0.#¤;#,##0.#- ¤",
|
||||
"#,##0%",
|
||||
}
|
||||
LocaleID{ "29"}
|
||||
// /**************** Info Below is needed ****************/
|
||||
// CollationElements{}
|
||||
// Countries{}
|
||||
// Eras{}
|
||||
// Languages{}
|
||||
// ShortLanguage { }
|
||||
// localPatternChars{}
|
||||
//
|
||||
}
|
Loading…
Reference in New Issue
Block a user