ICU-434 update for library/path changes, added more examples
X-SVN-Rev: 2890
This commit is contained in:
parent
d70dac9d40
commit
93137aa35d
@ -1,5 +1,5 @@
|
|||||||
# Copyright (c) 2000 IBM, Inc. and others
|
# Copyright (c) 2000 IBM, Inc. and others
|
||||||
# conversion sample code $Revision: 1.2 $
|
# conversion sample code $Revision: 1.3 $
|
||||||
|
|
||||||
|
|
||||||
# Usage:
|
# Usage:
|
||||||
@ -9,7 +9,7 @@
|
|||||||
# - do 'make install' of icu
|
# - do 'make install' of icu
|
||||||
#
|
#
|
||||||
# - change the following line to point to the $(prefix) that
|
# - change the following line to point to the $(prefix) that
|
||||||
# was used (will look for $(prefix)/share/icu/Makefile.inc )
|
# was used (will look for $(prefix)/lib/icu/Makefile.inc )
|
||||||
# OR
|
# OR
|
||||||
# set the variable ICU_PREFIX to point at $(prefix)
|
# set the variable ICU_PREFIX to point at $(prefix)
|
||||||
#
|
#
|
||||||
@ -18,9 +18,9 @@
|
|||||||
ICU_DEFAULT_PREFIX=/home/srl/II
|
ICU_DEFAULT_PREFIX=/home/srl/II
|
||||||
|
|
||||||
ifeq ($(strip $(ICU_PREFIX)),)
|
ifeq ($(strip $(ICU_PREFIX)),)
|
||||||
ICU_INC=$(ICU_DEFAULT_PREFIX)/share/icu/Makefile.inc
|
ICU_INC=$(ICU_DEFAULT_PREFIX)/lib/icu/Makefile.inc
|
||||||
else
|
else
|
||||||
ICU_INC=$(ICU_PREFIX)/share/icu/Makefile.inc
|
ICU_INC=$(ICU_PREFIX)/lib/icu/Makefile.inc
|
||||||
endif
|
endif
|
||||||
ICUPATH=
|
ICUPATH=
|
||||||
|
|
||||||
|
@ -175,8 +175,8 @@ void printUChar(UChar32 ch32)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*******************************************************************
|
/*******************************************************************
|
||||||
Very simple C++ sample to convert the word 'Moscow' in Russian, followed
|
Very simple C++ sample to convert the word 'Moscow' in Russian in Unicode,
|
||||||
by an exclamation mark (!) into the KOI8-R Russian code page.
|
followed by an exclamation mark (!) into the KOI8-R Russian code page.
|
||||||
|
|
||||||
This example first creates a UnicodeString out of the Unicode chars.
|
This example first creates a UnicodeString out of the Unicode chars.
|
||||||
|
|
||||||
@ -233,7 +233,8 @@ UErrorCode convsample_01()
|
|||||||
|
|
||||||
|
|
||||||
/******************************************************
|
/******************************************************
|
||||||
Similar sample to the preceding one.
|
Similar sample to the preceding one. Converting FROM unicode
|
||||||
|
to koi8-r.
|
||||||
You must call ucnv_close to clean up the memory used by the
|
You must call ucnv_close to clean up the memory used by the
|
||||||
converter.
|
converter.
|
||||||
|
|
||||||
@ -423,6 +424,137 @@ UErrorCode convsample_05()
|
|||||||
}
|
}
|
||||||
#undef BUFFERSIZE
|
#undef BUFFERSIZE
|
||||||
|
|
||||||
|
#define BUFFERSIZE 1024
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
UChar32 codepoint;
|
||||||
|
uint32_t frequency;
|
||||||
|
} CharFreqInfo;
|
||||||
|
|
||||||
|
UErrorCode convsample_06()
|
||||||
|
{
|
||||||
|
printf("\n\n==============================================\n"
|
||||||
|
"Sample 06: C: frequency distribution of letters in a UTF-8 document\n");
|
||||||
|
|
||||||
|
FILE *f;
|
||||||
|
int32_t count;
|
||||||
|
char inBuf[BUFFERSIZE];
|
||||||
|
const char *source;
|
||||||
|
const char *sourceLimit;
|
||||||
|
UChar *uBuf;
|
||||||
|
int32_t uBufSize = 0;
|
||||||
|
UConverter *conv;
|
||||||
|
UErrorCode status = U_ZERO_ERROR;
|
||||||
|
uint32_t letters=0, total=0;
|
||||||
|
|
||||||
|
CharFreqInfo *info;
|
||||||
|
UChar32 charCount = 0x10000; /* increase this if you want to handle non bmp.. todo: automatically bump it.. */
|
||||||
|
UChar32 p;
|
||||||
|
|
||||||
|
uint32_t ie = 0;
|
||||||
|
uint32_t gh = 0;
|
||||||
|
UChar32 l = 0;
|
||||||
|
|
||||||
|
f = fopen("data06.ut8", "r");
|
||||||
|
if(!f)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Couldn't open file 'data06.ut8' (UTF-8 data file).\n");
|
||||||
|
return U_FILE_ACCESS_ERROR;
|
||||||
|
}
|
||||||
|
|
||||||
|
info = (CharFreqInfo*)malloc(sizeof(CharFreqInfo) * charCount);
|
||||||
|
if(!info)
|
||||||
|
{
|
||||||
|
fprintf(stderr, " Couldn't allocate %d bytes for freq counter\n", sizeof(CharFreqInfo)*charCount);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* reset frequencies */
|
||||||
|
for(p=0;p<charCount;p++)
|
||||||
|
{
|
||||||
|
info[p].codepoint = p;
|
||||||
|
info[p].frequency = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// **************************** START SAMPLE *******************
|
||||||
|
conv = ucnv_open("utf-8", &status);
|
||||||
|
assert(U_SUCCESS(status));
|
||||||
|
|
||||||
|
uBufSize = (BUFFERSIZE/ucnv_getMinCharSize(conv));
|
||||||
|
printf("input bytes %d / min chars %d = %d UChars\n",
|
||||||
|
BUFFERSIZE, ucnv_getMinCharSize(conv), uBufSize);
|
||||||
|
uBuf = (UChar*)malloc(uBufSize * sizeof(UChar));
|
||||||
|
assert(uBuf!=NULL);
|
||||||
|
|
||||||
|
// grab another buffer's worth
|
||||||
|
while((!feof(f)) &&
|
||||||
|
((count=fread(inBuf, 1, BUFFERSIZE , f)) > 0) )
|
||||||
|
{
|
||||||
|
// Convert bytes to unicode
|
||||||
|
source = inBuf;
|
||||||
|
sourceLimit = inBuf + count;
|
||||||
|
|
||||||
|
while(source < sourceLimit)
|
||||||
|
{
|
||||||
|
p = ucnv_getNextUChar(conv, &source, sourceLimit, &status);
|
||||||
|
if(U_FAILURE(status))
|
||||||
|
{
|
||||||
|
fprintf(stderr, "%s @ %d\n", u_errorName(status), total);
|
||||||
|
status = U_ZERO_ERROR;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
U_ASSERT(status);
|
||||||
|
total++;
|
||||||
|
|
||||||
|
if(u_isalpha(p))
|
||||||
|
letters++;
|
||||||
|
|
||||||
|
if((u_tolower(l) == 'i') && (u_tolower(p) == 'e'))
|
||||||
|
ie++;
|
||||||
|
|
||||||
|
if((u_tolower(l) == 'g') && (u_tolower(p) == 0x0127))
|
||||||
|
gh++;
|
||||||
|
|
||||||
|
if(p>charCount)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "U+%06X: oh.., we only handle BMP characters so far.. redesign!\n", p);
|
||||||
|
return U_UNSUPPORTED_ERROR;
|
||||||
|
}
|
||||||
|
info[p].frequency++;
|
||||||
|
l = p;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fclose(f);
|
||||||
|
ucnv_close(conv);
|
||||||
|
|
||||||
|
printf("%d letters out of %d total UChars.\n", letters, total);
|
||||||
|
printf("%d ie digraphs, %d gh digraphs.\n", ie, gh);
|
||||||
|
|
||||||
|
// now, we could sort it..
|
||||||
|
|
||||||
|
// qsort(info, charCount, sizeof(info[0]), charfreq_compare);
|
||||||
|
|
||||||
|
for(p=0;p<charCount;p++)
|
||||||
|
{
|
||||||
|
if(info[p].frequency)
|
||||||
|
{
|
||||||
|
printf("% 5d U+%06X ", info[p].frequency, p);
|
||||||
|
if(p <= 0xFFFF)
|
||||||
|
{
|
||||||
|
prettyPrintUChar((UChar)p);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
free(info);
|
||||||
|
// ***************************** END SAMPLE ********************
|
||||||
|
|
||||||
|
printf("\n");
|
||||||
|
|
||||||
|
return U_ZERO_ERROR;
|
||||||
|
}
|
||||||
|
#undef BUFFERSIZE
|
||||||
|
|
||||||
|
|
||||||
/*******************************************************************
|
/*******************************************************************
|
||||||
Very simple C++ sample to convert a string into Unicode from SJIS
|
Very simple C++ sample to convert a string into Unicode from SJIS
|
||||||
@ -799,6 +931,7 @@ UErrorCode convsample_41()
|
|||||||
assert(U_SUCCESS(status));
|
assert(U_SUCCESS(status));
|
||||||
|
|
||||||
uBufSize = (BUFFERSIZE/conv->getMinBytesPerChar());
|
uBufSize = (BUFFERSIZE/conv->getMinBytesPerChar());
|
||||||
|
//uBufSize = 4;
|
||||||
printf("input bytes %d / min chars %d = %d UChars\n",
|
printf("input bytes %d / min chars %d = %d UChars\n",
|
||||||
BUFFERSIZE, conv->getMinBytesPerChar(), uBufSize);
|
BUFFERSIZE, conv->getMinBytesPerChar(), uBufSize);
|
||||||
uBuf = (UChar*)malloc(uBufSize * sizeof(UChar));
|
uBuf = (UChar*)malloc(uBufSize * sizeof(UChar));
|
||||||
@ -843,6 +976,10 @@ UErrorCode convsample_41()
|
|||||||
assert(fwrite(uBuf, sizeof(uBuf[0]), (target-uBuf), out) ==
|
assert(fwrite(uBuf, sizeof(uBuf[0]), (target-uBuf), out) ==
|
||||||
(size_t)(target-uBuf));
|
(size_t)(target-uBuf));
|
||||||
total += (target-uBuf);
|
total += (target-uBuf);
|
||||||
|
|
||||||
|
fprintf(stderr, "srcLeft=%d, wrote %d, err %s\n",
|
||||||
|
sourceLimit - source, target-uBuf, u_errorName(status));
|
||||||
|
|
||||||
} while (source < sourceLimit); // while simply out of space
|
} while (source < sourceLimit); // while simply out of space
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -863,7 +1000,7 @@ UErrorCode convsample_41()
|
|||||||
|
|
||||||
// 46- C, UTF16 -> latin2 [data41.utf16 -> data46.out]
|
// 46- C, UTF16 -> latin2 [data41.utf16 -> data46.out]
|
||||||
|
|
||||||
#define BUFFERSIZE 23 /* make it interesting :) */
|
#define BUFFERSIZE 24 /* make it interesting :) */
|
||||||
|
|
||||||
UErrorCode convsample_46()
|
UErrorCode convsample_46()
|
||||||
{
|
{
|
||||||
@ -1075,8 +1212,9 @@ int main()
|
|||||||
convsample_01(); // C++, u->koi8r, conv
|
convsample_01(); // C++, u->koi8r, conv
|
||||||
convsample_02(); // C , u->koi8r, conv
|
convsample_02(); // C , u->koi8r, conv
|
||||||
convsample_03(); // C, iterate
|
convsample_03(); // C, iterate
|
||||||
// // convsample_04(); /* not written yet */
|
// convsample_04(); /* not written yet */
|
||||||
convsample_05(); // C, utf8->u, getNextUChar
|
convsample_05(); // C, utf8->u, getNextUChar
|
||||||
|
convsample_06(); // C freq counter thingy
|
||||||
convsample_11(); // C++, sjis->u, conv
|
convsample_11(); // C++, sjis->u, conv
|
||||||
convsample_12(); // C, sjis->u, conv
|
convsample_12(); // C, sjis->u, conv
|
||||||
convsample_13(); // C, big5->u, getNextU
|
convsample_13(); // C, big5->u, getNextU
|
||||||
|
91
icu4c/source/samples/ucnv/data06.ut8
Normal file
91
icu4c/source/samples/ucnv/data06.ut8
Normal file
@ -0,0 +1,91 @@
|
|||||||
|
|
||||||
|
// *******************************************************************************
|
||||||
|
// *
|
||||||
|
// * Copyright (C) 1997-2000, International Business Machines
|
||||||
|
// * Corporation and others. All Rights Reserved.
|
||||||
|
// *
|
||||||
|
// *******************************************************************************
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
fa {
|
||||||
|
|
||||||
|
Version { "x0.0" }
|
||||||
|
|
||||||
|
|
||||||
|
DayAbbreviations {
|
||||||
|
"ی∔",
|
||||||
|
"د∔",
|
||||||
|
"س∔",
|
||||||
|
"چ∔",
|
||||||
|
"پ∔",
|
||||||
|
"ج∔",
|
||||||
|
"ش∔",
|
||||||
|
}
|
||||||
|
DayNames {
|
||||||
|
"ییشنبه",
|
||||||
|
"دوشنبه",
|
||||||
|
"سهشنبه",
|
||||||
|
"چهارشنبه",
|
||||||
|
"پنجشنبه",
|
||||||
|
"جمعه",
|
||||||
|
"شنبه",
|
||||||
|
}
|
||||||
|
MonthAbbreviations {
|
||||||
|
"ژان",
|
||||||
|
"فور",
|
||||||
|
"مار",
|
||||||
|
"آور",
|
||||||
|
"مـه",
|
||||||
|
"ژون",
|
||||||
|
"ژوی",
|
||||||
|
"اوت",
|
||||||
|
"سپت",
|
||||||
|
"اكت",
|
||||||
|
"نوا",
|
||||||
|
"دسا",
|
||||||
|
"",
|
||||||
|
}
|
||||||
|
MonthNames {
|
||||||
|
"ژانویه",
|
||||||
|
"فورویه",
|
||||||
|
"مارس",
|
||||||
|
"آوریل",
|
||||||
|
"مه",
|
||||||
|
"ژوئن",
|
||||||
|
"ژوئیه",
|
||||||
|
"اوت",
|
||||||
|
"سپتامبر",
|
||||||
|
"اكتبر",
|
||||||
|
"نوامبر",
|
||||||
|
"دسامبر",
|
||||||
|
"",
|
||||||
|
}
|
||||||
|
NumberElements {
|
||||||
|
"٫",
|
||||||
|
"٬",
|
||||||
|
";",
|
||||||
|
"%",
|
||||||
|
"0",
|
||||||
|
"#",
|
||||||
|
"-",
|
||||||
|
"E",
|
||||||
|
"‰",
|
||||||
|
"∞",
|
||||||
|
"<22>",
|
||||||
|
}
|
||||||
|
NumberPatterns {
|
||||||
|
"#,##0.###;-#,##0.###",
|
||||||
|
"#,##0.#¤;#,##0.#- ¤",
|
||||||
|
"#,##0%",
|
||||||
|
}
|
||||||
|
LocaleID{ "29"}
|
||||||
|
// /**************** Info Below is needed ****************/
|
||||||
|
// CollationElements{}
|
||||||
|
// Countries{}
|
||||||
|
// Eras{}
|
||||||
|
// Languages{}
|
||||||
|
// ShortLanguage { }
|
||||||
|
// localPatternChars{}
|
||||||
|
//
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user