ICU-434 update for library/path changes, added more examples

X-SVN-Rev: 2890
2000-11-09 16:11:21 +00:00 · 2000-11-09 16:11:21 +00:00 · 93137aa35d
commit 93137aa35d
parent d70dac9d40
3 changed files with 250 additions and 21 deletions
--- a/icu4c/source/samples/ucnv/Makefile
+++ b/icu4c/source/samples/ucnv/Makefile
@ -1,5 +1,5 @@
 # Copyright (c) 2000 IBM, Inc. and others
-# conversion sample code $Revision: 1.2 $
+# conversion sample code $Revision: 1.3 $
 # Usage:
@ -9,7 +9,7 @@
 #  - do 'make install' of icu 
 #
 #  - change the following line to point to the $(prefix) that 
-#    was used (will look for $(prefix)/share/icu/Makefile.inc )
+#    was used (will look for $(prefix)/lib/icu/Makefile.inc )
 #      OR 
 #   set the variable ICU_PREFIX to point at $(prefix)
 #  
@ -18,9 +18,9 @@
 ICU_DEFAULT_PREFIX=/home/srl/II
 ifeq ($(strip $(ICU_PREFIX)),)
-  ICU_INC=$(ICU_DEFAULT_PREFIX)/share/icu/Makefile.inc
+  ICU_INC=$(ICU_DEFAULT_PREFIX)/lib/icu/Makefile.inc
 else
-  ICU_INC=$(ICU_PREFIX)/share/icu/Makefile.inc
+  ICU_INC=$(ICU_PREFIX)/lib/icu/Makefile.inc
 endif
 ICUPATH=
--- a/icu4c/source/samples/ucnv/convsamp.cpp
+++ b/icu4c/source/samples/ucnv/convsamp.cpp
@ -175,8 +175,8 @@ void printUChar(UChar32 ch32)
 }
 /*******************************************************************
-  Very simple C++ sample to convert the word 'Moscow' in Russian, followed
+  Very simple C++ sample to convert the word 'Moscow' in Russian in Unicode,
-  by an exclamation mark (!) into the KOI8-R Russian code page.
+  followed by an exclamation mark (!) into the KOI8-R Russian code page.
  This example first creates a UnicodeString out of the Unicode chars.
@ -233,7 +233,8 @@ UErrorCode convsample_01()
 /******************************************************
-  Similar sample to the preceding one. 
+  Similar sample to the preceding one.  Converting FROM unicode 
  to koi8-r.
  You must call ucnv_close to clean up the memory used by the
  converter.
@ -423,6 +424,137 @@ UErrorCode convsample_05()
 }
 #undef BUFFERSIZE
 #define BUFFERSIZE 1024
 typedef struct
 {
  UChar32  codepoint;
  uint32_t frequency;
 } CharFreqInfo;
 UErrorCode convsample_06()
 {
  printf("\n\n==============================================\n"
         "Sample 06: C: frequency distribution of letters in a UTF-8 document\n");
  FILE *f;
  int32_t count;
  char inBuf[BUFFERSIZE];
  const char *source;
  const char *sourceLimit;
  UChar *uBuf;
  int32_t uBufSize = 0;
  UConverter *conv;
  UErrorCode status = U_ZERO_ERROR;
  uint32_t letters=0, total=0;
  CharFreqInfo   *info;
  UChar32   charCount = 0x10000;  /* increase this if you want to handle non bmp.. todo: automatically bump it.. */
  UChar32   p;
  uint32_t ie = 0;
  uint32_t gh = 0;
  UChar32 l = 0;
  f = fopen("data06.ut8", "r");
  if(!f)
  {
    fprintf(stderr, "Couldn't open file 'data06.ut8' (UTF-8 data file).\n");
    return U_FILE_ACCESS_ERROR;
  }
  info = (CharFreqInfo*)malloc(sizeof(CharFreqInfo) * charCount);
  if(!info)
  {
    fprintf(stderr, " Couldn't allocate %d bytes for freq counter\n", sizeof(CharFreqInfo)*charCount);
  }
  /* reset frequencies */
  for(p=0;p<charCount;p++)
  {
    info[p].codepoint = p;
    info[p].frequency = 0;
  }
  // **************************** START SAMPLE *******************
  conv = ucnv_open("utf-8", &status);
  assert(U_SUCCESS(status));
  uBufSize = (BUFFERSIZE/ucnv_getMinCharSize(conv));
  printf("input bytes %d / min chars %d = %d UChars\n",
         BUFFERSIZE, ucnv_getMinCharSize(conv), uBufSize);
  uBuf = (UChar*)malloc(uBufSize * sizeof(UChar));
  assert(uBuf!=NULL);
  // grab another buffer's worth
  while((!feof(f)) && 
        ((count=fread(inBuf, 1, BUFFERSIZE , f)) > 0) )
  {
    // Convert bytes to unicode
    source = inBuf;
    sourceLimit = inBuf + count;
    while(source < sourceLimit)
    {
      p = ucnv_getNextUChar(conv, &source, sourceLimit, &status);
      if(U_FAILURE(status))
      {
        fprintf(stderr, "%s @ %d\n", u_errorName(status), total);
        status = U_ZERO_ERROR;
        continue;
      }
      U_ASSERT(status);
      total++;
      if(u_isalpha(p))
        letters++;
      if((u_tolower(l) == 'i') && (u_tolower(p) == 'e'))
        ie++;
      if((u_tolower(l) == 'g') && (u_tolower(p) == 0x0127))
        gh++;
      if(p>charCount)
      {
        fprintf(stderr, "U+%06X: oh.., we only handle BMP characters so far.. redesign!\n", p);
        return U_UNSUPPORTED_ERROR;
      }
      info[p].frequency++;
      l = p;
    }
  }
  fclose(f);
  ucnv_close(conv);
  printf("%d letters out of %d total UChars.\n", letters, total);
  printf("%d ie digraphs, %d gh digraphs.\n", ie, gh);
  // now, we could sort it..
  //  qsort(info, charCount, sizeof(info[0]), charfreq_compare);
  for(p=0;p<charCount;p++)
  {
    if(info[p].frequency)
    {
      printf("% 5d U+%06X ", info[p].frequency, p);
      if(p <= 0xFFFF)
      {
        prettyPrintUChar((UChar)p);
      }
      printf("\n");
    }
  }
  free(info);
  // ***************************** END SAMPLE ********************
  printf("\n");
  return U_ZERO_ERROR;
 }
 #undef BUFFERSIZE
 /*******************************************************************
  Very simple C++ sample to convert a string into Unicode from SJIS
@ -799,6 +931,7 @@ UErrorCode convsample_41()
  assert(U_SUCCESS(status));
  uBufSize = (BUFFERSIZE/conv->getMinBytesPerChar());
  //uBufSize = 4;
  printf("input bytes %d / min chars %d = %d UChars\n",
         BUFFERSIZE, conv->getMinBytesPerChar(), uBufSize);
  uBuf = (UChar*)malloc(uBufSize * sizeof(UChar));
@ -843,6 +976,10 @@ UErrorCode convsample_41()
        assert(fwrite(uBuf, sizeof(uBuf[0]), (target-uBuf), out) ==
               (size_t)(target-uBuf));
        total += (target-uBuf);
        fprintf(stderr, "srcLeft=%d, wrote %d, err %s\n",
                sourceLimit - source, target-uBuf, u_errorName(status));
    } while (source < sourceLimit); // while simply out of space
  }
@ -863,7 +1000,7 @@ UErrorCode convsample_41()
 //  46-  C, UTF16 -> latin2 [data41.utf16 -> data46.out]
-#define BUFFERSIZE 23 /* make it interesting :) */
+#define BUFFERSIZE 24 /* make it interesting :) */
 UErrorCode convsample_46()
 {
@ -1072,22 +1209,23 @@ int main()
  printf("Default Converter=%s\n", ucnv_getDefaultName() );
-  convsample_01();  // C++, u->koi8r, conv
+    convsample_01();  // C++, u->koi8r, conv
-  convsample_02();  // C  , u->koi8r, conv
+    convsample_02();  // C  , u->koi8r, conv
-  convsample_03();  // C,   iterate
+    convsample_03();  // C,   iterate
-  //    //  convsample_04();  /* not written yet */
+ //  convsample_04();  /* not written yet */
-  convsample_05();  // C,  utf8->u, getNextUChar
+    convsample_05();  // C,  utf8->u, getNextUChar
-  convsample_11();  // C++, sjis->u, conv
+    convsample_06(); // C freq counter thingy
-  convsample_12();  // C,  sjis->u, conv
+    convsample_11();  // C++, sjis->u, conv
-  convsample_13();  // C,  big5->u, getNextU
+    convsample_12();  // C,  sjis->u, conv
    convsample_13();  // C,  big5->u, getNextU
-  convsample_20();  // C, callback
+    convsample_20();  // C, callback
-  convsample_40();  // C,   cp37 -> UTF16 [data02.bin -> data40.utf16]
+    convsample_40();  // C,   cp37 -> UTF16 [data02.bin -> data40.utf16]
-  convsample_41();  // C++, cp37 -> UTF16 [data02.bin -> data41.utf16]
+    convsample_41();  // C++, cp37 -> UTF16 [data02.bin -> data41.utf16]
-  convsample_46();  // C,  UTF16 -> latin3 [data41.utf16 -> data46.out]
+    convsample_46();  // C,  UTF16 -> latin3 [data41.utf16 -> data46.out]
-  convsample_47();  // C++,UTF16 -> latin3 [data40.utf16 -> data47.out]
+    convsample_47();  // C++,UTF16 -> latin3 [data40.utf16 -> data47.out]
   return 0;
 }
--- a/icu4c/source/samples/ucnv/data06.ut8
+++ b/icu4c/source/samples/ucnv/data06.ut8
@ -0,0 +1,91 @@
 // *******************************************************************************
 // *
 // *   Copyright (C) 1997-2000, International Business Machines
 // *   Corporation and others.  All Rights Reserved.
 // *
 // *******************************************************************************
 fa {
   Version { "x0.0" }
    DayAbbreviations { 
            "ی∔",
            "د∔",
            "س∔",
            "چ∔",
            "پ∔",
            "ج∔",
            "ش∔",
    }
    DayNames { 
            "یی‌شنبه",
            "دوشنبه",
            "سه‌شنبه",
            "چهارشنبه",
            "پنج‌شنبه",
            "جمعه",
            "شنبه",
    }
    MonthAbbreviations { 
            "ژان",
            "فور",
            "مار",
            "آور",
            "مـه",
            "ژون",
            "ژوی",
            "اوت",
            "سپت",
            "اكت",
            "نوا",
            "دسا",
            "",
    }
    MonthNames { 
            "ژانویه",
            "فورویه",
            "مارس",
            "آوریل",
            "مه",
            "ژوئن",
            "ژوئیه",
            "اوت",
            "سپتامبر",
            "اكتبر",
            "نوامبر",
            "دسامبر",
            "",
    }
    NumberElements { 
            "٫",
            "٬",
            ";",
            "%",
            "0",
            "#",
            "-",
            "E",
            "‰",
            "∞",
            "<22>",
    }
    NumberPatterns { 
            "#,##0.###;-#,##0.###",
            "#,##0.#¤;#,##0.#- ¤",
            "#,##0%",
    }
    LocaleID{ "29"}
            // /**************** Info Below is needed ****************/
            // CollationElements{}
            // Countries{}
            // Eras{}
            // Languages{}
            // ShortLanguage { }
            // localPatternChars{}
            //
 }