ICU-434 update for library/path changes, added more examples

X-SVN-Rev: 2890
2000-11-09 16:11:21 +00:00 · 2000-11-09 16:11:21 +00:00 · 93137aa35d
commit 93137aa35d
parent d70dac9d40
3 changed files with 250 additions and 21 deletions
--- a/icu4c/source/samples/ucnv/Makefile
+++ b/icu4c/source/samples/ucnv/Makefile
@ -1,5 +1,5 @@
 # Copyright (c) 2000 IBM, Inc. and others
-# conversion sample code $Revision: 1.2 $
+# conversion sample code $Revision: 1.3 $


 # Usage:
@ -9,7 +9,7 @@
 #  - do 'make install' of icu 
 #
 #  - change the following line to point to the $(prefix) that 
-#    was used (will look for $(prefix)/share/icu/Makefile.inc )
+#    was used (will look for $(prefix)/lib/icu/Makefile.inc )
 #      OR 
 #   set the variable ICU_PREFIX to point at $(prefix)
 #  
@ -18,9 +18,9 @@
 ICU_DEFAULT_PREFIX=/home/srl/II

 ifeq ($(strip $(ICU_PREFIX)),)
-  ICU_INC=$(ICU_DEFAULT_PREFIX)/share/icu/Makefile.inc
+  ICU_INC=$(ICU_DEFAULT_PREFIX)/lib/icu/Makefile.inc
 else
-  ICU_INC=$(ICU_PREFIX)/share/icu/Makefile.inc
+  ICU_INC=$(ICU_PREFIX)/lib/icu/Makefile.inc
 endif
 ICUPATH=

--- a/icu4c/source/samples/ucnv/convsamp.cpp
+++ b/icu4c/source/samples/ucnv/convsamp.cpp
@ -175,8 +175,8 @@ void printUChar(UChar32 ch32)
 }

 /*******************************************************************
-  Very simple C++ sample to convert the word 'Moscow' in Russian, followed
-  by an exclamation mark (!) into the KOI8-R Russian code page.
+  Very simple C++ sample to convert the word 'Moscow' in Russian in Unicode,
+  followed by an exclamation mark (!) into the KOI8-R Russian code page.

  This example first creates a UnicodeString out of the Unicode chars.

@ -233,7 +233,8 @@ UErrorCode convsample_01()


 /******************************************************
-  Similar sample to the preceding one. 
+  Similar sample to the preceding one.  Converting FROM unicode 
+  to koi8-r.
  You must call ucnv_close to clean up the memory used by the
  converter.

@ -423,6 +424,137 @@ UErrorCode convsample_05()
 }
 #undef BUFFERSIZE

+#define BUFFERSIZE 1024
+typedef struct
+{
+  UChar32  codepoint;
+  uint32_t frequency;
+} CharFreqInfo;
+
+UErrorCode convsample_06()
+{
+  printf("\n\n==============================================\n"
+         "Sample 06: C: frequency distribution of letters in a UTF-8 document\n");
+
+  FILE *f;
+  int32_t count;
+  char inBuf[BUFFERSIZE];
+  const char *source;
+  const char *sourceLimit;
+  UChar *uBuf;
+  int32_t uBufSize = 0;
+  UConverter *conv;
+  UErrorCode status = U_ZERO_ERROR;
+  uint32_t letters=0, total=0;
+
+  CharFreqInfo   *info;
+  UChar32   charCount = 0x10000;  /* increase this if you want to handle non bmp.. todo: automatically bump it.. */
+  UChar32   p;
+
+  uint32_t ie = 0;
+  uint32_t gh = 0;
+  UChar32 l = 0;
+
+  f = fopen("data06.ut8", "r");
+  if(!f)
+  {
+    fprintf(stderr, "Couldn't open file 'data06.ut8' (UTF-8 data file).\n");
+    return U_FILE_ACCESS_ERROR;
+  }
+
+  info = (CharFreqInfo*)malloc(sizeof(CharFreqInfo) * charCount);
+  if(!info)
+  {
+    fprintf(stderr, " Couldn't allocate %d bytes for freq counter\n", sizeof(CharFreqInfo)*charCount);
+  }
+
+  /* reset frequencies */
+  for(p=0;p<charCount;p++)
+  {
+    info[p].codepoint = p;
+    info[p].frequency = 0;
+  }
+
+  // **************************** START SAMPLE *******************
+  conv = ucnv_open("utf-8", &status);
+  assert(U_SUCCESS(status));
+
+  uBufSize = (BUFFERSIZE/ucnv_getMinCharSize(conv));
+  printf("input bytes %d / min chars %d = %d UChars\n",
+         BUFFERSIZE, ucnv_getMinCharSize(conv), uBufSize);
+  uBuf = (UChar*)malloc(uBufSize * sizeof(UChar));
+  assert(uBuf!=NULL);
+
+  // grab another buffer's worth
+  while((!feof(f)) && 
+        ((count=fread(inBuf, 1, BUFFERSIZE , f)) > 0) )
+  {
+    // Convert bytes to unicode
+    source = inBuf;
+    sourceLimit = inBuf + count;
+    
+    while(source < sourceLimit)
+    {
+      p = ucnv_getNextUChar(conv, &source, sourceLimit, &status);
+      if(U_FAILURE(status))
+      {
+        fprintf(stderr, "%s @ %d\n", u_errorName(status), total);
+        status = U_ZERO_ERROR;
+        continue;
+      }
+      U_ASSERT(status);
+      total++;
+
+      if(u_isalpha(p))
+        letters++;
+
+      if((u_tolower(l) == 'i') && (u_tolower(p) == 'e'))
+        ie++;
+
+      if((u_tolower(l) == 'g') && (u_tolower(p) == 0x0127))
+        gh++;
+
+      if(p>charCount)
+      {
+        fprintf(stderr, "U+%06X: oh.., we only handle BMP characters so far.. redesign!\n", p);
+        return U_UNSUPPORTED_ERROR;
+      }
+      info[p].frequency++;
+      l = p;
+    }
+  }
+
+  fclose(f);
+  ucnv_close(conv);
+
+  printf("%d letters out of %d total UChars.\n", letters, total);
+  printf("%d ie digraphs, %d gh digraphs.\n", ie, gh);
+
+  // now, we could sort it..
+
+  //  qsort(info, charCount, sizeof(info[0]), charfreq_compare);
+
+  for(p=0;p<charCount;p++)
+  {
+    if(info[p].frequency)
+    {
+      printf("% 5d U+%06X ", info[p].frequency, p);
+      if(p <= 0xFFFF)
+      {
+        prettyPrintUChar((UChar)p);
+      }
+      printf("\n");
+    }
+  }
+  free(info);
+  // ***************************** END SAMPLE ********************
+
+  printf("\n");
+
+  return U_ZERO_ERROR;
+}
+#undef BUFFERSIZE
+

 /*******************************************************************
  Very simple C++ sample to convert a string into Unicode from SJIS
@ -799,6 +931,7 @@ UErrorCode convsample_41()
  assert(U_SUCCESS(status));

  uBufSize = (BUFFERSIZE/conv->getMinBytesPerChar());
+  //uBufSize = 4;
  printf("input bytes %d / min chars %d = %d UChars\n",
         BUFFERSIZE, conv->getMinBytesPerChar(), uBufSize);
  uBuf = (UChar*)malloc(uBufSize * sizeof(UChar));
@ -843,6 +976,10 @@ UErrorCode convsample_41()
        assert(fwrite(uBuf, sizeof(uBuf[0]), (target-uBuf), out) ==
               (size_t)(target-uBuf));
        total += (target-uBuf);
+
+        fprintf(stderr, "srcLeft=%d, wrote %d, err %s\n",
+                sourceLimit - source, target-uBuf, u_errorName(status));
+
    } while (source < sourceLimit); // while simply out of space
  }

@ -863,7 +1000,7 @@ UErrorCode convsample_41()

 //  46-  C, UTF16 -> latin2 [data41.utf16 -> data46.out]

-#define BUFFERSIZE 23 /* make it interesting :) */
+#define BUFFERSIZE 24 /* make it interesting :) */

 UErrorCode convsample_46()
 {
@ -1072,22 +1209,23 @@ int main()

  printf("Default Converter=%s\n", ucnv_getDefaultName() );
  
-  convsample_01();  // C++, u->koi8r, conv
-  convsample_02();  // C  , u->koi8r, conv
-  convsample_03();  // C,   iterate
-  //    //  convsample_04();  /* not written yet */
-  convsample_05();  // C,  utf8->u, getNextUChar
-  convsample_11();  // C++, sjis->u, conv
-  convsample_12();  // C,  sjis->u, conv
-  convsample_13();  // C,  big5->u, getNextU
+    convsample_01();  // C++, u->koi8r, conv
+    convsample_02();  // C  , u->koi8r, conv
+    convsample_03();  // C,   iterate
+ //  convsample_04();  /* not written yet */
+    convsample_05();  // C,  utf8->u, getNextUChar
+    convsample_06(); // C freq counter thingy
+    convsample_11();  // C++, sjis->u, conv
+    convsample_12();  // C,  sjis->u, conv
+    convsample_13();  // C,  big5->u, getNextU
  
-  convsample_20();  // C, callback
+    convsample_20();  // C, callback
  
-  convsample_40();  // C,   cp37 -> UTF16 [data02.bin -> data40.utf16]
-  convsample_41();  // C++, cp37 -> UTF16 [data02.bin -> data41.utf16]
+    convsample_40();  // C,   cp37 -> UTF16 [data02.bin -> data40.utf16]
+    convsample_41();  // C++, cp37 -> UTF16 [data02.bin -> data41.utf16]
  
-  convsample_46();  // C,  UTF16 -> latin3 [data41.utf16 -> data46.out]
-  convsample_47();  // C++,UTF16 -> latin3 [data40.utf16 -> data47.out]
+    convsample_46();  // C,  UTF16 -> latin3 [data41.utf16 -> data46.out]
+    convsample_47();  // C++,UTF16 -> latin3 [data40.utf16 -> data47.out]
        
   return 0;
 }
--- a/icu4c/source/samples/ucnv/data06.ut8
+++ b/icu4c/source/samples/ucnv/data06.ut8
@ -0,0 +1,91 @@
+
+// *******************************************************************************
+// *
+// *   Copyright (C) 1997-2000, International Business Machines
+// *   Corporation and others.  All Rights Reserved.
+// *
+// *******************************************************************************
+
+
+
+fa {
+
+   Version { "x0.0" }
+
+
+    DayAbbreviations { 
+            "ی∔",
+            "د∔",
+            "س∔",
+            "چ∔",
+            "پ∔",
+            "ج∔",
+            "ش∔",
+    }
+    DayNames { 
+            "یی‌شنبه",
+            "دوشنبه",
+            "سه‌شنبه",
+            "چهارشنبه",
+            "پنج‌شنبه",
+            "جمعه",
+            "شنبه",
+    }
+    MonthAbbreviations { 
+            "ژان",
+            "فور",
+            "مار",
+            "آور",
+            "مـه",
+            "ژون",
+            "ژوی",
+            "اوت",
+            "سپت",
+            "اكت",
+            "نوا",
+            "دسا",
+            "",
+    }
+    MonthNames { 
+            "ژانویه",
+            "فورویه",
+            "مارس",
+            "آوریل",
+            "مه",
+            "ژوئن",
+            "ژوئیه",
+            "اوت",
+            "سپتامبر",
+            "اكتبر",
+            "نوامبر",
+            "دسامبر",
+            "",
+    }
+    NumberElements { 
+            "٫",
+            "٬",
+            ";",
+            "%",
+            "0",
+            "#",
+            "-",
+            "E",
+            "‰",
+            "∞",
+            "<22>",
+    }
+    NumberPatterns { 
+            "#,##0.###;-#,##0.###",
+            "#,##0.#¤;#,##0.#- ¤",
+            "#,##0%",
+    }
+    LocaleID{ "29"}
+            // /**************** Info Below is needed ****************/
+            // CollationElements{}
+            // Countries{}
+            // Eras{}
+            // Languages{}
+            // ShortLanguage { }
+            // localPatternChars{}
+            //
+}