2000-06-09 23:33:40 +00:00
|
|
|
/**************************************************************************
|
|
|
|
*
|
|
|
|
* Copyright (C) 2000, International Business Machines
|
|
|
|
* Corporation and others. All Rights Reserved.
|
|
|
|
*
|
|
|
|
***************************************************************************
|
|
|
|
* file name: compsamp.c
|
|
|
|
* encoding: ASCII (7-bit)
|
|
|
|
*
|
|
|
|
* created on: 2000may30
|
|
|
|
* created by: Steven R. Loomis
|
|
|
|
*
|
|
|
|
* Sample code for the ICU compression routines.
|
|
|
|
*
|
|
|
|
* Note: Nothing special is needed to build this sample. Link with
|
|
|
|
* the icu UC and icu I18N libraries.
|
|
|
|
*
|
|
|
|
* I use 'assert' for error checking, you probably will want
|
|
|
|
* something more flexible. '***BEGIN SAMPLE***' and
|
|
|
|
* '***END SAMPLE***' mark pieces suitable for stand alone
|
|
|
|
* code snippets.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <ctype.h> /* for isspace, etc. */
|
|
|
|
#include <assert.h>
|
2000-09-06 23:34:38 +00:00
|
|
|
#include <string.h>
|
2000-06-09 23:33:40 +00:00
|
|
|
|
|
|
|
#include "unicode/utypes.h" /* Basic ICU data types */
|
|
|
|
#include "unicode/scsu.h"
|
|
|
|
#include "unicode/uchar.h"
|
|
|
|
#include "unicode/ustring.h" /* some more string fcns*/
|
2000-06-10 00:07:01 +00:00
|
|
|
#include "unicode/uloc.h"
|
2000-06-09 23:33:40 +00:00
|
|
|
|
|
|
|
/* Some utility functions */
|
|
|
|
|
|
|
|
static const UChar kNone[] = { 0x0000 };
|
|
|
|
|
|
|
|
/* Print a UChar if possible, in seven characters. */
|
|
|
|
static void prettyPrintUChar(UChar c)
|
|
|
|
{
|
|
|
|
if( (c <= 0x007F) &&
|
|
|
|
(isgraph(c)) ) {
|
|
|
|
printf(" '%c' ", (char)(0x00FF&c));
|
|
|
|
} else if ( c > 0x007F ) {
|
|
|
|
char buf[100];
|
|
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
|
|
UTextOffset o;
|
|
|
|
|
|
|
|
o = u_charName(c, U_UNICODE_CHAR_NAME, buf, 100, &status);
|
|
|
|
if(U_SUCCESS(status)) {
|
|
|
|
buf[6] = 0;
|
|
|
|
printf("%- 7s", buf);
|
|
|
|
} else {
|
|
|
|
printf("??????? ");
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
switch((char)(c & 0x007F)) {
|
|
|
|
case ' ':
|
|
|
|
printf(" ' ' ");
|
|
|
|
break;
|
|
|
|
case '\t':
|
|
|
|
printf(" \t ");
|
|
|
|
break;
|
|
|
|
case '\n':
|
|
|
|
printf(" \n ");
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
printf(" ");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static void printUChars(const char *name = "?",
|
|
|
|
const UChar *uch = kNone,
|
|
|
|
int32_t len = -1 )
|
|
|
|
{
|
|
|
|
int32_t i;
|
|
|
|
|
|
|
|
if( (len == -1) && (uch) ) {
|
|
|
|
len = u_strlen(uch);
|
|
|
|
}
|
|
|
|
|
|
|
|
printf("% 5s:", name);
|
|
|
|
for( i = 0; i <len; i++) {
|
|
|
|
printf("%- 6d ", i);
|
|
|
|
}
|
|
|
|
printf("\n");
|
|
|
|
|
|
|
|
printf("% 5s: ", "uni");
|
|
|
|
for( i = 0; i <len; i++) {
|
|
|
|
printf("\\u%04X ", (int)uch[i]);
|
|
|
|
}
|
|
|
|
printf("\n");
|
|
|
|
|
|
|
|
printf("% 5s: ", "ch");
|
|
|
|
for( i = 0; i <len; i++) {
|
|
|
|
prettyPrintUChar(uch[i]);
|
|
|
|
}
|
|
|
|
printf("\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
static void printBytes(const char *name = "?",
|
|
|
|
const uint8_t *uch = (const uint8_t*)"",
|
|
|
|
int32_t len = -1 )
|
|
|
|
{
|
|
|
|
int32_t i;
|
|
|
|
|
|
|
|
if( (len == -1) && (uch) ) {
|
|
|
|
len = strlen((const char*)uch);
|
|
|
|
}
|
|
|
|
|
|
|
|
printf("% 5s:", name);
|
|
|
|
for( i = 0; i <len; i++) {
|
|
|
|
printf(" %- 4d", i);
|
|
|
|
}
|
|
|
|
printf("\n");
|
|
|
|
|
|
|
|
printf("% 5s: ", "uni");
|
|
|
|
for( i = 0; i <len; i++) {
|
|
|
|
printf("\\x%02X ", 0x00FF & (int)uch[i]);
|
|
|
|
}
|
|
|
|
printf("\n");
|
|
|
|
|
|
|
|
printf("% 5s: ", "ch");
|
|
|
|
for( i = 0; i <len; i++) {
|
|
|
|
if(isgraph(uch[i])) {
|
|
|
|
printf(" '%c' ", (char)uch[i]);
|
|
|
|
} else {
|
|
|
|
printf(" ");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
printf("\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*******************************************************************
|
|
|
|
Very simple C sample to compress the word 'Moscow' in Russian, followed
|
|
|
|
by an exclamation mark (!)
|
|
|
|
|
|
|
|
*/
|
|
|
|
UErrorCode compsample_01()
|
|
|
|
{
|
|
|
|
printf("\n\n==============================================\n"
|
|
|
|
"Sample 01: C: simple Unicode compression\n");
|
|
|
|
|
|
|
|
|
|
|
|
// "Moscva!" in cyrillic letters, to be converted to the KOI8-R
|
|
|
|
// Russian code page.
|
|
|
|
UChar input[] = { 0x041C, 0x043E, 0x0441, 0x043A, 0x0432,
|
|
|
|
0x0430, 0x0021, 0x0000 };
|
|
|
|
const UChar *source;
|
|
|
|
uint8_t buffer[100];
|
|
|
|
uint8_t *target;
|
|
|
|
int32_t targetLen = sizeof(buffer);
|
|
|
|
int32_t sourceLen = u_strlen(input);
|
|
|
|
target = buffer;
|
|
|
|
source = input;
|
|
|
|
|
|
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
|
|
|
|
|
|
// **************************** START SAMPLE *******************
|
|
|
|
// set up the compressor
|
|
|
|
UnicodeCompressor comp;
|
|
|
|
scsu_init(&comp);
|
|
|
|
|
|
|
|
target = buffer;
|
|
|
|
scsu_compress(&comp, &target, target+targetLen, &source,
|
|
|
|
source+sourceLen, &status);
|
|
|
|
assert(U_SUCCESS(status));
|
|
|
|
|
|
|
|
fprintf(stderr, "Bytes converted: %d\n", target-buffer);
|
|
|
|
|
|
|
|
// ***************************** END SAMPLE ********************
|
|
|
|
|
|
|
|
// Print it out
|
|
|
|
printUChars("src", input);
|
|
|
|
printf("\n");
|
|
|
|
printBytes("targ", buffer, target - buffer);
|
|
|
|
|
|
|
|
return U_ZERO_ERROR;
|
|
|
|
}
|
|
|
|
|
|
|
|
int32_t countOurDataStrings()
|
|
|
|
{
|
|
|
|
return uloc_countAvailable();
|
|
|
|
}
|
|
|
|
|
|
|
|
int32_t getOurDataString(int32_t i, UChar *s, int32_t size)
|
|
|
|
{
|
|
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
|
|
int32_t len;
|
|
|
|
const char *loc = uloc_getAvailable(i);
|
|
|
|
len = uloc_getDisplayName(loc,loc,s,size,&status);
|
|
|
|
if(U_FAILURE(status))
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
return len;
|
|
|
|
}
|
|
|
|
|
2000-09-06 23:34:38 +00:00
|
|
|
void compsample_02()
|
2000-06-09 23:33:40 +00:00
|
|
|
{
|
|
|
|
printf("\n\n==============================================\n"
|
|
|
|
"Sample 02: C: bulk Unicode compression\n");
|
|
|
|
|
|
|
|
#define SAMPLE2BUFFERSIZE 1024 /* larger than our largest data */
|
2000-06-10 00:07:01 +00:00
|
|
|
UChar input[SAMPLE2BUFFERSIZE];
|
2000-06-09 23:33:40 +00:00
|
|
|
const UChar *source;
|
|
|
|
uint8_t output[SAMPLE2BUFFERSIZE];
|
|
|
|
uint8_t *target;
|
|
|
|
int32_t sourceLen;
|
|
|
|
int32_t count;
|
|
|
|
int32_t i;
|
|
|
|
int32_t charsIn = 0,bytesOut = 0;
|
|
|
|
FILE *f;
|
|
|
|
|
|
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
|
|
|
|
|
|
f = fopen("outdata2.scsu", "w");
|
|
|
|
|
|
|
|
// **************************** START SAMPLE *******************
|
|
|
|
// set up the compressor
|
|
|
|
UnicodeCompressor comp;
|
|
|
|
scsu_init(&comp);
|
|
|
|
|
|
|
|
count = countOurDataStrings();
|
|
|
|
for(i=0;i<count;i++)
|
|
|
|
{
|
|
|
|
sourceLen = getOurDataString(i, input, SAMPLE2BUFFERSIZE);
|
|
|
|
charsIn += sourceLen;
|
|
|
|
// printUChars("src", input);
|
|
|
|
|
|
|
|
/* Now, loop and write out all of the data */
|
|
|
|
source = input;
|
|
|
|
target = output;
|
|
|
|
|
|
|
|
while(source < (input+sourceLen))
|
|
|
|
{
|
|
|
|
scsu_compress(&comp, &target, output+SAMPLE2BUFFERSIZE,
|
|
|
|
&source, input+sourceLen, &status);
|
|
|
|
|
2000-08-11 20:11:06 +00:00
|
|
|
if( (status == U_ZERO_ERROR) || (status == U_BUFFER_OVERFLOW_ERROR)) {
|
2000-06-09 23:33:40 +00:00
|
|
|
/* got all of it */
|
2000-06-10 00:07:01 +00:00
|
|
|
// printBytes("out", output, target-output); // Uncomment for very verbose output..
|
2000-06-09 23:33:40 +00:00
|
|
|
|
|
|
|
fwrite(output, 1, target-output, f);
|
|
|
|
bytesOut += (target-output);
|
|
|
|
|
|
|
|
target = output; /* reset target to beginning */
|
|
|
|
if(status == U_ZERO_ERROR) {
|
|
|
|
break; /* Got everything! */
|
|
|
|
}
|
|
|
|
|
|
|
|
status = U_ZERO_ERROR; /* reset, go get another chunk. */
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fclose(f);
|
|
|
|
printf("done[02] - %d uchars in, %d bytes written. \n", charsIn, bytesOut);
|
|
|
|
/* at this point, call scsu_reset(&comp) if you want ot write out
|
|
|
|
a different data stream with the same compressor. */
|
|
|
|
|
|
|
|
/************************* END SAMPLE ************************/
|
|
|
|
}
|
|
|
|
|
|
|
|
int main()
|
|
|
|
{
|
|
|
|
compsample_01();
|
|
|
|
compsample_02();
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|