scuffed-code/icu4c/source/samples/scsu/compsamp.cpp

/**************************************************************************
*
*   Copyright (C) 2000, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
***************************************************************************
*   file name:  compsamp.c
*   encoding:   ASCII (7-bit)
*
*   created on: 2000may30
*   created by: Steven R. Loomis
*
*   Sample code for the ICU compression routines.
*
* Note: Nothing special is needed to build this sample. Link with
*       the icu UC and icu I18N libraries.
*
*       I use 'assert' for error checking, you probably will want
*       something more flexible.  '***BEGIN SAMPLE***' and
*       '***END SAMPLE***' mark pieces suitable for stand alone
*       code snippets.
*
*/

#include <stdio.h>
#include <ctype.h>            /* for isspace, etc.    */
#include <assert.h>

#include "unicode/utypes.h"   /* Basic ICU data types */
#include "unicode/scsu.h"
#include "unicode/uchar.h"
#include "unicode/ustring.h"  /* some more string fcns*/
#include "unicode/uloc.h"

/* Some utility functions */

static const UChar kNone[] = { 0x0000 };

/* Print a UChar if possible, in seven characters. */
static void prettyPrintUChar(UChar c)
{
  if(  (c <= 0x007F) &&
       (isgraph(c))  ) {
    printf("  '%c'  ", (char)(0x00FF&c));
  } else if ( c > 0x007F ) {
    char buf[100];
    UErrorCode status = U_ZERO_ERROR;
    UTextOffset o;

    o = u_charName(c, U_UNICODE_CHAR_NAME, buf, 100, &status);
    if(U_SUCCESS(status)) {
      buf[6] = 0;
      printf("%- 7s", buf);
    } else {
      printf("??????? ");
    }
  } else {
    switch((char)(c & 0x007F)) {
    case ' ':
      printf("  ' '  ");
      break;
    case '\t':
      printf("  \t   ");
      break;
    case '\n':
      printf("  \n   ");
      break;
    default:
      printf("       ");
      break;
    }
  }
}


static void printUChars(const char  *name = "?",
                 const UChar *uch  = kNone,
                 int32_t     len   = -1 )
{
  int32_t i;

  if( (len == -1) && (uch) ) {
    len = u_strlen(uch);
  }

  printf("% 5s:", name);
  for( i = 0; i <len; i++) {
    printf("%- 6d ", i);
  }
  printf("\n");

  printf("% 5s: ", "uni");
  for( i = 0; i <len; i++) {
    printf("\\u%04X ", (int)uch[i]);
  }
  printf("\n");

  printf("% 5s: ", "ch");
  for( i = 0; i <len; i++) {
    prettyPrintUChar(uch[i]);
  }
  printf("\n");
}

static void printBytes(const char  *name = "?",
                 const uint8_t *uch  = (const uint8_t*)"",
                 int32_t     len   = -1 )
{
  int32_t i;

  if( (len == -1) && (uch) ) {
    len = strlen((const char*)uch);
  }

  printf("% 5s:", name);
  for( i = 0; i <len; i++) {
    printf(" %- 4d", i);
  }
  printf("\n");

  printf("% 5s: ", "uni");
  for( i = 0; i <len; i++) {
    printf("\\x%02X ", 0x00FF & (int)uch[i]);
  }
  printf("\n");

  printf("% 5s: ", "ch");
  for( i = 0; i <len; i++) {
    if(isgraph(uch[i])) {
      printf(" '%c' ", (char)uch[i]);
    } else {
      printf("     ");
    }
  }
  printf("\n");
}


/*******************************************************************
  Very simple C sample to compress the word 'Moscow' in Russian, followed
  by an exclamation mark (!)

 */
UErrorCode compsample_01()
{
  printf("\n\n==============================================\n"
         "Sample 01: C: simple Unicode compression\n");


  // "Moscva!" in cyrillic letters, to be converted to the KOI8-R
  // Russian code page.
  UChar input[] = { 0x041C, 0x043E, 0x0441, 0x043A, 0x0432,
                     0x0430, 0x0021, 0x0000 };
  const UChar *source;
  uint8_t buffer[100];
  uint8_t *target;
  int32_t targetLen = sizeof(buffer);
  int32_t sourceLen = u_strlen(input);
  target = buffer;
  source = input;

  UErrorCode status = U_ZERO_ERROR;

  // **************************** START SAMPLE *******************
  // set up the compressor
  UnicodeCompressor comp;
  scsu_init(&comp);

  target = buffer;
  scsu_compress(&comp, &target, target+targetLen, &source,
                source+sourceLen, &status);
  assert(U_SUCCESS(status));

  fprintf(stderr, "Bytes converted: %d\n", target-buffer);

  // ***************************** END SAMPLE ********************

  // Print it out
  printUChars("src", input);
  printf("\n");
  printBytes("targ", buffer, target - buffer);

  return U_ZERO_ERROR;
}

int32_t countOurDataStrings()
{
  return uloc_countAvailable();
}

int32_t getOurDataString(int32_t i, UChar *s, int32_t size)
{
  UErrorCode status = U_ZERO_ERROR;
  int32_t len;
  const char *loc = uloc_getAvailable(i);
  len =  uloc_getDisplayName(loc,loc,s,size,&status);
  if(U_FAILURE(status))
    {
      return 0;
    }
  return len;
}

UErrorCode compsample_02()
{
  printf("\n\n==============================================\n"
         "Sample 02: C: bulk Unicode compression\n");

#define SAMPLE2BUFFERSIZE 1024  /* larger than our largest data */
  UChar input[SAMPLE2BUFFERSIZE];
  const UChar *source;
  uint8_t output[SAMPLE2BUFFERSIZE];
  uint8_t *target;
  int32_t sourceLen;
  int32_t count;
  int32_t i;
  int32_t charsIn = 0,bytesOut = 0;
  FILE *f;

  UErrorCode status = U_ZERO_ERROR;

  f = fopen("outdata2.scsu", "w");

  // **************************** START SAMPLE *******************
  // set up the compressor
  UnicodeCompressor comp;
  scsu_init(&comp);

  count = countOurDataStrings();
  for(i=0;i<count;i++)
    {
      sourceLen = getOurDataString(i, input, SAMPLE2BUFFERSIZE);
      charsIn += sourceLen;
      //      printUChars("src", input);

      /* Now, loop and write out all of the data */
      source = input;
      target = output;

      while(source < (input+sourceLen))
        {
          scsu_compress(&comp, &target, output+SAMPLE2BUFFERSIZE,
                        &source, input+sourceLen, &status);

          if( (status == U_ZERO_ERROR) || (status == U_INDEX_OUTOFBOUNDS_ERROR)) {
            /* got all of it */
//            printBytes("out", output, target-output); // Uncomment for very verbose output..

            fwrite(output, 1, target-output, f);
            bytesOut += (target-output);

            target = output; /* reset target to beginning */
            if(status == U_ZERO_ERROR) {
              break; /* Got everything! */
            }

            status = U_ZERO_ERROR; /* reset, go get another chunk. */
          }
        }
    }

  fclose(f);
  printf("done[02] - %d uchars in, %d bytes written. \n", charsIn, bytesOut);
  /* at this point, call scsu_reset(&comp) if you want ot write out
     a different data stream with the same compressor. */

  /************************* END SAMPLE ************************/
}

int main()
{
  compsample_01();
  compsample_02();

  return 0;
}