ICU-740 Remove old/unused code according to Helena.

X-SVN-Rev: 5180
This commit is contained in:
George Rhoten 2001-07-05 22:32:23 +00:00
parent 6c37bcf77d
commit ce3bed4f40
16 changed files with 0 additions and 2621 deletions

View File

@ -1,173 +0,0 @@
/*
*
* Copyright (C) 1998-2001, International Business Machines
* Corporation and others. All Rights Reserved.
*
* @version 1.0 06/19/98
* @author Helena Shih
* Based on Taligent international support for C++
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include "cmemory.h"
#include "ucmp8.h"
#if U_IOSTREAM_SOURCE >= 199711
#include <iostream>
using namespace std;
#elif U_IOSTREAM_SOURCE >= 198506
#include <iostream.h>
#endif
CompactByteArray* charDirArray = 0;
static const UChar LAST_CHAR_CODE_IN_FILE = 0xFFFD;
const char charDirStrings[] = "L R EN ES ET AN CS B S WS ON LRELROAL RLERLOPDFNSMBN ";
int tagValues[] = {
0, // kLeftToRight = 0,
1, // kRightToLeft = 1,
2, // kEuropeanNumber = 2,
3, // kEuropeanNumberSeparator = 3,
4, // kEuropeanNumberTerminator = 4,
5, // kArabicNumber = 5,
6, // kCommonNumberSeparator = 6,
7, // kParagraphSeparator = 7,
8, // kSegmentSeparator = 8,
9, // kWhiteSpaceNeutral = 9,
10, // kOtherNeutral = 10,
11, // kLeftToRightEmbedding = 11,
12, // kLeftToRightOverride = 12,
13, // kRightToLeftArabic = 13,
14, // kRightToLeftEmbedding = 14,
15, // kRightToLeftOverride = 15,
16, // kPopDirectionalFormat = 16,
17, // kNonSpacingMark = 17,
18 // kBoundaryNeutral = 18,
};
int MakeProp(char* str)
{
int result = 0;
char* matchPosition;
matchPosition = strstr(charDirStrings, str);
if (matchPosition == 0)
fprintf(stderr, "unrecognized type letter %s\n", str);
else
result = ((matchPosition - charDirStrings) / 3);
return result;
}
CompactByteArray*
getArray(FILE *input)
{
if (charDirArray == 0) {
char buffer[1000];
char* bufferPtr;
int set = FALSE;
try {
charDirArray = ucmp8_open(0);
int32_t unicode;
char *next;
char dir[4];
int j;
for (;;) {
// Clear buffer first.
bufferPtr = fgets(buffer, 999, input);
if (bufferPtr == NULL)
break;
if (bufferPtr[0] == '#' || bufferPtr[0] == '\n' || bufferPtr[0] == 0)
continue;
sscanf(bufferPtr, "%X", &unicode);
assert(0 <= unicode && unicode < 65536);
for (int i = 0; i < 4; i++) {
bufferPtr = strchr(bufferPtr, ';');
assert(bufferPtr != NULL);
bufferPtr++;
}
assert(bufferPtr != NULL);
next = strchr(bufferPtr, ';');
*next = 0;
/* for (int j = 0; j < 3; j++) {
if (bufferPtr+j!= next)
dir[j] = bufferPtr[j];
else
dir[j] = ' ';
}*/
for(j=0; bufferPtr+j != next; j++)
dir[j] = bufferPtr[j];
while(j<3)
{
dir[j] = ' ';
j++;
}
dir[3] = 0;
ucmp8_set(charDirArray, (UChar)unicode, (int8_t)tagValues[MakeProp(dir)]);
}
if (input)
fclose(input);
ucmp8_compact(charDirArray, 1);
}
catch (...) {
fprintf(stderr, "Error Occured while parsing unicode data file.\n");
}
}
return charDirArray;
}
void
writeArrays()
{
const int8_t* values = ucmp8_getArray(charDirArray);
const uint16_t* indexes = ucmp8_getIndex(charDirArray);
int32_t i;
int32_t cnt = ucmp8_getCount(charDirArray);
cout << "\nconst t_uint32 Unicode::fCharDirIndices[] = {\n ";
for (i = 0; i < ucmp8_getkIndexCount()-1; i++)
{
cout << "(uint16_t)" << ((indexes[i] >= 0) ? (int)indexes[i] : (int)(indexes[i]+ucmp8_getkUnicodeCount()))
<< ", ";
if (i != 0)
if (i % 3 == 0)
cout << "\n ";
}
cout << " (uint16_t)" << ((indexes[ucmp8_getkIndexCount()-1] >= 0) ? (int)indexes[i] : (int)(indexes[i]+ucmp8_getkUnicodeCount()))
<< " };\n";
cout << "\nconst int8_t Unicode::fCharDirValues[] = {\n ";
for (i = 0; i < cnt-1; i++)
{
cout << "(int8_t)" << (int)values[i] << ", ";
if (i != 0)
if (i % 5 == 0)
cout << "\n ";
}
cout << " (int8_t)" << (int)values[cnt-1] << " }\n";
cout << "const int32_t Unicode::fCharDirCount = " << cnt << ";\n";
}
/**
* The main function builds the CharType data array and prints it to System.out
*/
int main(int argc, char** argv)
{
CompactByteArray* arrays = 0;
FILE *input = 0;
if (argc != 2) {
printf("Usage : chartype filename\n\n");
exit(1);
}
input = fopen(argv[1], "r");
if (input == 0) {
printf("Cannot open the input file: %s\n\n", argv[1]);
exit(1);
}
arrays = getArray(input);
writeArrays();
return 0;
}

View File

@ -1,98 +0,0 @@
# Microsoft Developer Studio Project File - Name="chardir" - Package Owner=<4>
# Microsoft Developer Studio Generated Build File, Format Version 6.00
# ** DO NOT EDIT **
# TARGTYPE "Win32 (x86) Console Application" 0x0103
CFG=chardir - Win32 Debug
!MESSAGE This is not a valid makefile. To build this project using NMAKE,
!MESSAGE use the Export Makefile command and run
!MESSAGE
!MESSAGE NMAKE /f "chardir.mak".
!MESSAGE
!MESSAGE You can specify a configuration when running NMAKE
!MESSAGE by defining the macro CFG on the command line. For example:
!MESSAGE
!MESSAGE NMAKE /f "chardir.mak" CFG="chardir - Win32 Debug"
!MESSAGE
!MESSAGE Possible choices for configuration are:
!MESSAGE
!MESSAGE "chardir - Win32 Release" (based on "Win32 (x86) Console Application")
!MESSAGE "chardir - Win32 Debug" (based on "Win32 (x86) Console Application")
!MESSAGE
# Begin Project
# PROP AllowPerConfigDependencies 0
# PROP Scc_ProjName ""
# PROP Scc_LocalPath ""
CPP=cl.exe
RSC=rc.exe
!IF "$(CFG)" == "chardir - Win32 Release"
# PROP BASE Use_MFC 0
# PROP BASE Use_Debug_Libraries 0
# PROP BASE Output_Dir "Release"
# PROP BASE Intermediate_Dir "Release"
# PROP BASE Target_Dir ""
# PROP Use_MFC 0
# PROP Use_Debug_Libraries 0
# PROP Output_Dir "Release"
# PROP Intermediate_Dir "Release"
# PROP Ignore_Export_Lib 0
# PROP Target_Dir ""
# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
# ADD CPP /nologo /W3 /GX /O2 /I "..\..\..\include" /I "..\..\..\source\common" /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
# ADD BASE RSC /l 0x409 /d "NDEBUG"
# ADD RSC /l 0x409 /d "NDEBUG"
BSC32=bscmake.exe
# ADD BASE BSC32 /nologo
# ADD BSC32 /nologo
LINK32=link.exe
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386
# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386
!ELSEIF "$(CFG)" == "chardir - Win32 Debug"
# PROP BASE Use_MFC 0
# PROP BASE Use_Debug_Libraries 1
# PROP BASE Output_Dir "Debug"
# PROP BASE Intermediate_Dir "Debug"
# PROP BASE Target_Dir ""
# PROP Use_MFC 0
# PROP Use_Debug_Libraries 1
# PROP Output_Dir "Debug"
# PROP Intermediate_Dir "Debug"
# PROP Ignore_Export_Lib 0
# PROP Target_Dir ""
# ADD BASE CPP /nologo /W3 /Gm /GX /Zi /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
# ADD CPP /nologo /W3 /Gm /GX /ZI /Od /I "..\..\..\include" /I "..\..\..\source\common" /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
# ADD BASE RSC /l 0x409 /d "_DEBUG"
# ADD RSC /l 0x409 /d "_DEBUG"
BSC32=bscmake.exe
# ADD BASE BSC32 /nologo
# ADD BSC32 /nologo
LINK32=link.exe
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept
# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept
!ENDIF
# Begin Target
# Name "chardir - Win32 Release"
# Name "chardir - Win32 Debug"
# Begin Source File
SOURCE=.\chardir.cpp
# End Source File
# Begin Source File
SOURCE=.\ucmp8.c
# End Source File
# Begin Source File
SOURCE=.\ucmp8.h
# End Source File
# End Target
# End Project

View File

@ -1,29 +0,0 @@
Microsoft Developer Studio Workspace File, Format Version 6.00
# WARNING: DO NOT EDIT OR DELETE THIS WORKSPACE FILE!
###############################################################################
Project: "chardir"=.\chardir.dsp - Package Owner=<4>
Package=<5>
{{{
}}}
Package=<4>
{{{
}}}
###############################################################################
Global:
Package=<5>
{{{
}}}
Package=<3>
{{{
}}}
###############################################################################

View File

@ -1,386 +0,0 @@
/*
********************************************************************
*
* Copyright (C) 1997-2000, International Business Machines
* Corporation and others. All Rights Reserved.
*
********************************************************************
*/
#ifndef _STDLIB_H
#include <stdlib.h>
#endif
#ifndef _STDIO_H
#include <stdio.h>
#endif
#include "ucmp8.h"
#include "cmemory.h"
static int32_t findOverlappingPosition(CompactByteArray* this,
uint32_t start,
const UChar *tempIndex,
int32_t tempIndexCount,
uint32_t cycle);
/* internal constants*/
#define kUnicodeCount_int 65536
#define kBlockShift_int 7
#define kBlockCount_int (1<<kBlockShift_int)
#define kIndexShift_int (16-kBlockShift_int)
#define kIndexCount_int (1<<kIndexShift_int)
#define kBlockMask_int (kBlockCount_int-1)
const int32_t UCMP8_kUnicodeCount = kUnicodeCount_int;
const int32_t UCMP8_kBlockShift = kBlockShift_int;
const int32_t UCMP8_kBlockCount = kBlockCount_int;
const int32_t UCMP8_kIndexShift = kIndexShift_int;
const int32_t UCMP8_kIndexCount = kIndexCount_int;
const uint32_t UCMP8_kBlockMask = kBlockMask_int;
int32_t ucmp8_getkUnicodeCount() { return UCMP8_kUnicodeCount;}
int32_t ucmp8_getkBlockCount() { return UCMP8_kBlockCount;}
int32_t ucmp8_getkIndexCount(){ return UCMP8_kIndexCount;}
/* debug flags*/
/*=======================================================*/
U_CAPI int8_t ucmp8_get(CompactByteArray* array, uint16_t index)
{
return (array->fArray[(array->fIndex[index >> UCMP8_kBlockShift] & 0xFFFF) + (index & UCMP8_kBlockMask)]);
}
U_CAPI uint8_t ucmp8_getu(CompactByteArray* array, uint16_t index)
{
return (uint8_t)ucmp8_get(array,index);
}
CompactByteArray* ucmp8_open(int8_t defaultValue)
{
/* set up the index array and the data array.
* the index array always points into particular parts of the data array
* it is initially set up to point at regular block boundaries
* The following example uses blocks of 4 for simplicity
* Example: Expanded
* INDEX# 0 1 2 3 4
* INDEX 0 4 8 12 16 ...
* ARRAY abcdeababcedzyabcdea...
* | | | | | |...
* whenever you set an element in the array, it unpacks to this state
* After compression, the index will point to various places in the data array
* wherever there is a runs of the same elements as in the original
* Example: Compressed
* INDEX# 0 1 2 3 4
* INDEX 0 4 1 8 2 ...
* ARRAY abcdeabazyabc...
* If you look at the example, index# 2 in the expanded version points
* to data position number 8, which has elements "bced". In the compressed
* version, index# 2 points to data position 1, which also has "bced"
*/
CompactByteArray* this = (CompactByteArray*) uprv_malloc(sizeof(CompactByteArray));
int32_t i;
if (this == NULL) return NULL;
this->fArray = NULL;
this->fIndex = NULL;
this->fCount = UCMP8_kUnicodeCount;
this->fCompact = FALSE;
this->fBogus = FALSE;
this->fArray = (int8_t*) uprv_malloc(sizeof(int8_t) * UCMP8_kUnicodeCount);
if (!this->fArray)
{
this->fBogus = TRUE;
return NULL;
}
this->fIndex = (uint16_t*) uprv_malloc(sizeof(uint16_t) * UCMP8_kIndexCount);
if (!this->fIndex)
{
uprv_free(this->fArray);
this->fArray = NULL;
this->fBogus = TRUE;
return NULL;
}
for (i = 0; i < UCMP8_kUnicodeCount; ++i)
{
this->fArray[i] = defaultValue;
}
for (i = 0; i < UCMP8_kIndexCount; ++i)
{
this->fIndex[i] = (uint16_t)(i << UCMP8_kBlockShift);
}
return this;
}
CompactByteArray* ucmp8_openAdopt(uint16_t *indexArray,
int8_t *newValues,
int32_t count)
{
CompactByteArray* this = (CompactByteArray*) uprv_malloc(sizeof(CompactByteArray));
if (!this) return NULL;
this->fArray = NULL;
this->fIndex = NULL;
this->fCount = count;
this->fBogus = FALSE;
this->fArray = newValues;
this->fIndex = indexArray;
this->fCompact = (count < UCMP8_kUnicodeCount) ? TRUE : FALSE;
return this;
}
/*=======================================================*/
void ucmp8_close(CompactByteArray* this)
{
uprv_free(this->fArray);
this->fArray = NULL;
uprv_free(this->fIndex);
this->fIndex = NULL;
this->fCount = 0;
this->fCompact = FALSE;
uprv_free(this);
}
/*=======================================================*/
void ucmp8_expand(CompactByteArray* this)
{
/* can optimize later.
* if we have to expand, then walk through the blocks instead of using Get
* this code unpacks the array by copying the blocks to the normalized position.
* Example: Compressed
* INDEX# 0 1 2 3 4
* INDEX 0 4 1 8 2 ...
* ARRAY abcdeabazyabc...
* turns into
* Example: Expanded
* INDEX# 0 1 2 3 4
* INDEX 0 4 8 12 16 ...
* ARRAY abcdeababcedzyabcdea...
*/
int32_t i;
if (this->fCompact)
{
int8_t* tempArray;
tempArray = (int8_t*) uprv_malloc(sizeof(int8_t) * UCMP8_kUnicodeCount);
if (!tempArray)
{
this->fBogus = TRUE;
return;
}
for (i = 0; i < UCMP8_kUnicodeCount; ++i)
{
tempArray[i] = ucmp8_get(this,(UChar)i); /* HSYS : How expand?*/
}
for (i = 0; i < UCMP8_kIndexCount; ++i)
{
this->fIndex[i] = (uint16_t)(i<< UCMP8_kBlockShift);
}
uprv_free(this->fArray);
this->fArray = tempArray;
this->fCompact = FALSE;
}
}
/*=======================================================*/
/* this->fArray: an array to be overlapped
* start and count: specify the block to be overlapped
* tempIndex: the overlapped array (actually indices back into inputContents)
* inputHash: an index of hashes for tempIndex, where
* inputHash[i] = XOR of values from i-count+1 to i
*/
int32_t
findOverlappingPosition(CompactByteArray* this,
uint32_t start,
const UChar* tempIndex,
int32_t tempIndexCount,
uint32_t cycle)
{
/* this is a utility routine for finding blocks that overlap.
* IMPORTANT: the cycle number is very important. Small cycles take a lot
* longer to work. In some cases, they may be able to get better compaction.
*/
int32_t i;
int32_t j;
int32_t currentCount;
for (i = 0; i < tempIndexCount; i += cycle)
{
currentCount = UCMP8_kBlockCount;
if (i + UCMP8_kBlockCount > tempIndexCount)
{
currentCount = tempIndexCount - i;
}
for (j = 0; j < currentCount; ++j)
{
if (this->fArray[start + j] != this->fArray[tempIndex[i + j]]) break;
}
if (j == currentCount) break;
}
return i;
}
UBool
ucmp8_isBogus(const CompactByteArray* this)
{
return this->fBogus;
}
const int8_t*
ucmp8_getArray(const CompactByteArray* this)
{
return this->fArray;
}
const uint16_t*
ucmp8_getIndex(const CompactByteArray* this)
{
return this->fIndex;
}
int32_t
ucmp8_getCount(const CompactByteArray* this)
{
return this->fCount;
}
void
ucmp8_set(CompactByteArray* this,
UChar c,
int8_t value)
{
if (this->fCompact == TRUE)
{
ucmp8_expand(this);
if (this->fBogus) return;
}
this->fArray[(int32_t)c] = value;
}
void
ucmp8_setRange(CompactByteArray* this,
UChar start,
UChar end,
int8_t value)
{
int32_t i;
if (this->fCompact == TRUE)
{
ucmp8_expand(this);
if (this->fBogus) return;
}
for (i = start; i <= end; ++i)
{
this->fArray[i] = value;
}
}
/*=======================================================*/
void
ucmp8_compact(CompactByteArray* this,
uint32_t cycle)
{
if (!this->fCompact)
{
/* this actually does the compaction.
* it walks throught the contents of the expanded array, finding the
* first block in the data that matches the contents of the current index.
* As it works, it keeps an updated pointer to the last position,
* so that it knows how big to make the final array
* If the matching succeeds, then the index will point into the data
* at some earlier position.
* If the matching fails, then last position pointer will be bumped,
* and the index will point to that last block of data.
*/
UChar* tempIndex;
int32_t tempIndexCount;
int8_t* tempArray;
int32_t iBlock, iIndex;
/* fix cycle, must be 0 < cycle <= blockcount*/
if (cycle < 0) cycle = 1;
else if (cycle > (uint32_t)UCMP8_kBlockCount) cycle = UCMP8_kBlockCount;
/* make temp storage, larger than we need*/
tempIndex = (UChar*) uprv_malloc(sizeof(UChar)* UCMP8_kUnicodeCount);
if (!tempIndex)
{
this->fBogus = TRUE;
return;
}
/* set up first block.*/
tempIndexCount = UCMP8_kBlockCount;
for (iIndex = 0; iIndex < UCMP8_kBlockCount; ++iIndex)
{
tempIndex[iIndex] = (uint16_t)iIndex;
}; /* endfor (iIndex = 0; .....)*/
this->fIndex[0] = 0;
/* for each successive block, find out its first position in the compacted array*/
for (iBlock = 1; iBlock < UCMP8_kIndexCount; ++iBlock)
{
int32_t newCount, firstPosition, block;
block = iBlock << UCMP8_kBlockShift;
/* if (debugSmall) if (block > debugSmallLimit) break;*/
firstPosition = findOverlappingPosition(this,
block,
tempIndex,
tempIndexCount,
cycle);
/* if not contained in the current list, copy the remainder
* invariant; cumulativeHash[iBlock] = XOR of values from iBlock-kBlockCount+1 to iBlock
* we do this by XORing out cumulativeHash[iBlock-kBlockCount]
*/
newCount = firstPosition + UCMP8_kBlockCount;
if (newCount > tempIndexCount)
{
for (iIndex = tempIndexCount; iIndex < newCount; ++iIndex)
{
tempIndex[iIndex] = (uint16_t)(iIndex - firstPosition + block);
} /* endfor (iIndex = tempIndexCount....)*/
tempIndexCount = newCount;
} /* endif (newCount > tempIndexCount)*/
this->fIndex[iBlock] = (uint16_t)firstPosition;
} /* endfor (iBlock = 1.....)*/
/* now allocate and copy the items into the array*/
tempArray = (int8_t*) uprv_malloc(tempIndexCount * sizeof(int8_t));
if (!tempArray)
{
this->fBogus = TRUE;
uprv_free(tempIndex);
return;
}
for (iIndex = 0; iIndex < tempIndexCount; ++iIndex)
{
tempArray[iIndex] = this->fArray[tempIndex[iIndex]];
}
uprv_free(this->fArray);
this->fArray = tempArray;
this->fCount = tempIndexCount;
/* free up temp storage*/
uprv_free(tempIndex);
this->fCompact = TRUE;
} /* endif (!this->fCompact)*/
}

View File

@ -1,86 +0,0 @@
/*
********************************************************************
*
* Copyright (C) 1996-2000, International Business Machines
* Corporation and others. All Rights Reserved.
*
********************************************************************
*/
#ifndef UCMP8_H
#define UCMP8_H
#include "unicode/utypes.h"
/*====================================*/
/* class CompactByteArray
* Provides a compact way to store information that is indexed by Unicode values,
* such as character properties, types, keyboard values, etc.
* The ATypes are used by value, so should be small, integers or pointers.
*====================================
*/
U_CAPI const int32_t UCMP8_kUnicodeCount;
U_CAPI const int32_t UCMP8_kBlockShift;
U_CAPI const int32_t UCMP8_kBlockCount;
U_CAPI const int32_t UCMP8_kIndexShift;
U_CAPI const int32_t UCMP8_kIndexCount;
U_CAPI const uint32_t UCMP8_kBlockMask;
U_CAPI int32_t ucmp8_getkUnicodeCount(void);
U_CAPI int32_t ucmp8_getkBlockCount(void);
U_CAPI int32_t ucmp8_getkIndexCount(void);
typedef struct{
int8_t* fArray;
uint16_t* fIndex;
int32_t fCount;
UBool fCompact;
UBool fBogus;
} CompactByteArray;
U_CAPI CompactByteArray* ucmp8_open(int8_t defaultValue);
U_CAPI CompactByteArray* ucmp8_openAdopt(uint16_t* indexArray,
int8_t* newValues,
int32_t count);
U_CAPI void ucmp8_close(CompactByteArray* array);
U_CAPI UBool isBogus(const CompactByteArray* array);
U_CAPI int8_t ucmp8_get(CompactByteArray* array, uint16_t index);
U_CAPI uint8_t ucmp8_getu(CompactByteArray* array, uint16_t index);
U_CAPI void ucmp8_set(CompactByteArray* array,
UChar index,
int8_t value);
U_CAPI void ucmp8_setRange(CompactByteArray* array,
UChar start,
UChar end,
int8_t value);
U_CAPI int32_t ucmp8_getCount(const CompactByteArray* array);
U_CAPI const int8_t* ucmp8_getArray(const CompactByteArray* array);
U_CAPI const uint16_t* ucmp8_getIndex(const CompactByteArray* array);
/* Compact the array.
The value of cycle determines how large the overlap can be.
A cycle of 1 is the most compacted, but takes the most time to do.
If values stored in the array tend to repeat in cycles of, say, 16,
then using that will be faster than cycle = 1, and get almost the
same compression.
*/
U_CAPI void ucmp8_compact(CompactByteArray* array,
uint32_t cycle);
/* Expanded takes the array back to a 65536 element array*/
U_CAPI void ucmp8_expand(CompactByteArray* array);
#endif

View File

@ -1,169 +0,0 @@
/*
*
* Copyright (C) 1996-2001, International Business Machines
* Corporation and others. All Rights Reserved.
*
* @version 1.0 12/12/96
* @author Helena Shih
* Based on Taligent international support for C++
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#ifndef _CHARTBLD
#include "chartbld.h"
#endif
#if U_IOSTREAM_SOURCE >= 199711
#include <iostream>
using namespace std;
#elif U_IOSTREAM_SOURCE >= 198506
#include <iostream.h>
#endif
const char CharTypeBuilder::tagStrings[] = "MnMcMeNdNlNoZsZlZpCcCfCsCoCnLuLlLtLmLoPcPdPsPePoSmScSkSoPiPf";
const int16_t CharTypeBuilder::tagValues[] =
{
/* Mn */ (int16_t)NON_SPACING_MARK,
/* Mc */ (int16_t)COMBINING_SPACING_MARK,
/* Me */ (int16_t)ENCLOSING_MARK,
/* Nd */ (int16_t)DECIMAL_DIGIT_NUMBER,
/* Nl */ (int16_t)LETTER_NUMBER,
/* No */ (int16_t)OTHER_NUMBER,
/* Zs */ (int16_t)SPACE_SEPARATOR,
/* Zl */ (int16_t)LINE_SEPARATOR,
/* Zp */ (int16_t)PARAGRAPH_SEPARATOR,
/* Cc */ (int16_t)CONTROL,
/* Cf */ (int16_t)FORMAT,
/* Cs */ (int16_t)SURROGATE,
/* Co */ (int16_t)PRIVATE_USE,
/* Cn */ (int16_t)UNASSIGNED,
/* Lu */ (int16_t)UPPERCASE_LETTER,
/* Ll */ (int16_t)LOWERCASE_LETTER,
/* Lt */ (int16_t)TITLECASE_LETTER,
/* Lm */ (int16_t)MODIFIER_LETTER,
/* Lo */ (int16_t)OTHER_LETTER,
/* Pc */ (int16_t)CONNECTOR_PUNCTUATION,
/* Pd */ (int16_t)DASH_PUNCTUATION,
/* Ps */ (int16_t)START_PUNCTUATION,
/* Pe */ (int16_t)END_PUNCTUATION,
/* Po */ (int16_t)OTHER_PUNCTUATION,
/* Sm */ (int16_t)MATH_SYMBOL,
/* Sc */ (int16_t)CURRENCY_SYMBOL,
/* Sk */ (int16_t)MODIFIER_SYMBOL,
/* So */ (int16_t)OTHER_SYMBOL,
/* Pi */ (int16_t)INITIAL_PUNCTUATION,
/* Pf */ (int16_t)FINAL_PUNCTUATION
};
const UChar CharTypeBuilder:: LAST_CHAR_CODE_IN_FILE = 0xFFFD;
CompactByteArray* CharTypeBuilder::charTypeArray = 0;
int
CharTypeBuilder::MakeProp(char* str)
{
int result = 0;
char* matchPosition;
matchPosition = strstr(tagStrings, str);
if (matchPosition == 0) fprintf(stderr, "unrecognized type letter %s\n", str);
else result = ((matchPosition - tagStrings) / 2);
return result;
}
CompactByteArray*
CharTypeBuilder::getByteArray(FILE* input)
{
if (charTypeArray == 0) {
char buffer[1000];
char* bufferPtr;
try {
charTypeArray = ucmp8_open((int8_t)CharTypeBuilder::UNASSIGNED);
int32_t unicode;
while (TRUE) {
bufferPtr = fgets(buffer, 999, input);
if (bufferPtr == NULL) break;
if (bufferPtr[0] == '#' || bufferPtr[0] == '\n' || bufferPtr[0] == 0) continue;
sscanf(bufferPtr, "%X", &unicode);
assert(0 <= unicode && unicode < 65536);
bufferPtr = strchr(bufferPtr, ';');
assert(bufferPtr != NULL);
bufferPtr = strchr(bufferPtr + 1, ';'); // go to start of third field
assert(bufferPtr != NULL);
bufferPtr++;
bufferPtr[2] = 0;
ucmp8_set(charTypeArray, (UChar)unicode, (int8_t)tagValues[MakeProp(bufferPtr)]);
if (unicode == LAST_CHAR_CODE_IN_FILE)
break;
}
/* Check the database to see if this needs to be updated!!! */
ucmp8_setRange(charTypeArray, 0x3401, 0x4db4, ucmp8_get(charTypeArray, 0x3400));
ucmp8_setRange(charTypeArray, 0x4e01, 0x9fa4, ucmp8_get(charTypeArray, 0x4e00));
ucmp8_setRange(charTypeArray, 0xac01, 0xd7a2, ucmp8_get(charTypeArray, 0xac00));
ucmp8_setRange(charTypeArray, 0xd801, 0xdb7e, ucmp8_get(charTypeArray, 0xd800));
ucmp8_setRange(charTypeArray, 0xdb81, 0xdbfe, ucmp8_get(charTypeArray, 0xdb80));
ucmp8_setRange(charTypeArray, 0xdc01, 0xdffe, ucmp8_get(charTypeArray, 0xdc00));
ucmp8_setRange(charTypeArray, 0xe001, 0xf8fe, ucmp8_get(charTypeArray, 0xe000));
if (input) fclose(input);
ucmp8_compact(charTypeArray, 1);
}
catch (...) {
fprintf(stderr, "Error Occured while parsing unicode data file.\n");
}
}
return charTypeArray;
}
void
CharTypeBuilder::writeByteArrays()
{
const int8_t* values = ucmp8_getArray(charTypeArray);
const uint16_t* indexes = ucmp8_getIndex(charTypeArray);
int32_t i;
int32_t cnt = ucmp8_getCount(charTypeArray);
cout << "\nconst unsigned short Unicode::indicies[] = {\n ";
for (i = 0; i < ucmp8_getkIndexCount()-1; i++)
{
cout << "(uint16_t)" << ((indexes[i] >= 0) ? (int)indexes[i] : (int)(indexes[i]+ucmp8_getkUnicodeCount()))
<< ", ";
if (i != 0)
if (i % 3 == 0)
cout << "\n ";
}
cout << " (uint16_t)" << ((indexes[ucmp8_getkIndexCount()-1] >= 0) ? (int)indexes[i] : (int)(indexes[i]+ucmp8_getkUnicodeCount()))
<< " };\n";
cout << "\nconst char Unicode::values[] = {\n ";
for (i = 0; i < cnt-1; i++)
{
cout << "(int8_t)" << (int)values[i] << ", ";
if (i != 0)
if (i % 5 == 0)
cout << "\n ";
}
cout << " (int8_t)" << (int)values[cnt-1] << " }\n";
cout << "const short Unicode::offsetCount = " << cnt << ";\n";
}
/**
* The main function builds the CharType data array and prints it to System.out
*/
int main(int argc, char** argv)
{
if (argc != 2) {
printf("Usage : chartype filename\n\n");
exit(1);
}
FILE *input = fopen(argv[1], "r");
if (input == 0) {
printf("Cannot open the input file: %s\n\n", argv[1]);
exit(1);
}
CompactByteArray* arrays = CharTypeBuilder::getByteArray(input);
CharTypeBuilder::writeByteArrays();
return 0;
}

View File

@ -1,91 +0,0 @@
/*
*****************************************************************************************
*
* Copyright (C) 1994-2001, International Business Machines
* Corporation and others. All Rights Reserved.
*
*****************************************************************************************
*/
// FILE NAME : chartbld.h
//
// CREATED
// Wednesday, December 11, 1996
//
// CREATED BY
// Helena Shih
//
//
//********************************************************************************************
#ifndef _CHARTBLD
#define _CHARTBLD
#include "unicode/utypes.h"
#include "ucmp8.h"
/**
* CharTypeBuilder is an internal class that creates a CompactByteArray for use by
* CharType. The array is constructed from a data file. The name is specified in
* the hard coded constant INPUT_FILE_NAME. CharTypeBuilder is run as an application
* and the output sent to System.out is then copied into the CharType.java source file.
*/
class CharTypeBuilder
{
public :
enum ECharTypeMapping {
UNASSIGNED = 0,
UPPERCASE_LETTER = 1,
LOWERCASE_LETTER = 2,
TITLECASE_LETTER = 3,
MODIFIER_LETTER = 4,
OTHER_LETTER = 5,
NON_SPACING_MARK = 6,
ENCLOSING_MARK = 7,
COMBINING_SPACING_MARK = 8,
DECIMAL_DIGIT_NUMBER = 9,
LETTER_NUMBER = 10,
OTHER_NUMBER = 11,
SPACE_SEPARATOR = 12,
LINE_SEPARATOR = 13,
PARAGRAPH_SEPARATOR = 14,
CONTROL = 15,
FORMAT = 16,
PRIVATE_USE = 17,
SURROGATE = 18,
DASH_PUNCTUATION = 19,
START_PUNCTUATION = 20,
END_PUNCTUATION = 21,
CONNECTOR_PUNCTUATION = 22,
OTHER_PUNCTUATION = 23,
MATH_SYMBOL = 24,
CURRENCY_SYMBOL = 25,
MODIFIER_SYMBOL = 26,
OTHER_SYMBOL = 27,
INITIAL_PUNCTUATION = 28,
FINAL_PUNCTUATION = 29
};
static CompactByteArray* getByteArray(FILE*);
static void writeByteArrays(void);
private :
static int MakeProp(char* str);
static const char tagStrings[];
static const short tagValues[];
//LAST_CHAR_CODE_IN_FILE is taken from the data file itself. If the
// data file changes, this value may need to be changed also.
// After this value is read, the program exits.
static const UChar LAST_CHAR_CODE_IN_FILE;
static CompactByteArray *charTypeArray;
};
#endif

View File

@ -1,114 +0,0 @@
# Microsoft Developer Studio Project File - Name="chartype" - Package Owner=<4>
# Microsoft Developer Studio Generated Build File, Format Version 6.00
# ** DO NOT EDIT **
# TARGTYPE "Win32 (x86) Console Application" 0x0103
CFG=chartype - Win32 Release
!MESSAGE This is not a valid makefile. To build this project using NMAKE,
!MESSAGE use the Export Makefile command and run
!MESSAGE
!MESSAGE NMAKE /f "chartype.mak".
!MESSAGE
!MESSAGE You can specify a configuration when running NMAKE
!MESSAGE by defining the macro CFG on the command line. For example:
!MESSAGE
!MESSAGE NMAKE /f "chartype.mak" CFG="chartype - Win32 Release"
!MESSAGE
!MESSAGE Possible choices for configuration are:
!MESSAGE
!MESSAGE "chartype - Win32 Release" (based on "Win32 (x86) Console Application")
!MESSAGE "chartype - Win32 Debug" (based on "Win32 (x86) Console Application")
!MESSAGE
# Begin Project
# PROP AllowPerConfigDependencies 0
# PROP Scc_ProjName ""
# PROP Scc_LocalPath ""
CPP=cl.exe
RSC=rc.exe
!IF "$(CFG)" == "chartype - Win32 Release"
# PROP BASE Use_MFC 0
# PROP BASE Use_Debug_Libraries 0
# PROP BASE Output_Dir ".\Release"
# PROP BASE Intermediate_Dir ".\Release"
# PROP BASE Target_Dir ""
# PROP Use_MFC 0
# PROP Use_Debug_Libraries 0
# PROP Output_Dir ".\Release"
# PROP Intermediate_Dir ".\Release"
# PROP Ignore_Export_Lib 0
# PROP Target_Dir ""
# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /YX /c
# ADD CPP /nologo /W3 /GX /O2 /I "..\..\..\include" /I "..\..\..\source\common" /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /YX /FD /c
# ADD BASE RSC /l 0x409 /d "NDEBUG"
# ADD RSC /l 0x409 /d "NDEBUG"
BSC32=bscmake.exe
# ADD BASE BSC32 /nologo
# ADD BSC32 /nologo
LINK32=link.exe
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386
# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386
!ELSEIF "$(CFG)" == "chartype - Win32 Debug"
# PROP BASE Use_MFC 0
# PROP BASE Use_Debug_Libraries 1
# PROP BASE Output_Dir ".\Debug"
# PROP BASE Intermediate_Dir ".\Debug"
# PROP BASE Target_Dir ""
# PROP Use_MFC 0
# PROP Use_Debug_Libraries 1
# PROP Output_Dir ".\Debug"
# PROP Intermediate_Dir ".\Debug"
# PROP Ignore_Export_Lib 0
# PROP Target_Dir ""
# ADD BASE CPP /nologo /W3 /Gm /GX /Zi /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /YX /c
# ADD CPP /nologo /W3 /Gm /GX /ZI /Od /I "..\..\..\include" /I "..\..\..\source\common" /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /YX /FD /c
# ADD BASE RSC /l 0x409 /d "_DEBUG"
# ADD RSC /l 0x409 /d "_DEBUG"
BSC32=bscmake.exe
# ADD BASE BSC32 /nologo
# ADD BSC32 /nologo
LINK32=link.exe
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386
# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386
!ENDIF
# Begin Target
# Name "chartype - Win32 Release"
# Name "chartype - Win32 Debug"
# Begin Group "Source Files"
# PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;hpj;bat;for;f90"
# Begin Source File
SOURCE=.\chartbld.cpp
# End Source File
# Begin Source File
SOURCE=.\ucmp8.c
# End Source File
# End Group
# Begin Group "Header Files"
# PROP Default_Filter "h;hpp;hxx;hm;inl;fi;fd"
# Begin Source File
SOURCE=.\chartbld.h
# End Source File
# Begin Source File
SOURCE=.\ucmp8.h
# End Source File
# End Group
# Begin Group "Resource Files"
# PROP Default_Filter "ico;cur;bmp;dlg;rc2;rct;bin;cnt;rtf;gif;jpg;jpeg;jpe"
# End Group
# End Target
# End Project

View File

@ -1,29 +0,0 @@
Microsoft Developer Studio Workspace File, Format Version 6.00
# WARNING: DO NOT EDIT OR DELETE THIS WORKSPACE FILE!
###############################################################################
Project: "chartype"=.\chartype.dsp - Package Owner=<4>
Package=<5>
{{{
}}}
Package=<4>
{{{
}}}
###############################################################################
Global:
Package=<5>
{{{
}}}
Package=<3>
{{{
}}}
###############################################################################

View File

@ -1,387 +0,0 @@
/*
********************************************************************
*
* Copyright (C) 1997-2000, International Business Machines
* Corporation and others. All Rights Reserved.
*
********************************************************************
*/
#ifndef _STDLIB_H
#include <stdlib.h>
#endif
#ifndef _STDIO_H
#include <stdio.h>
#endif
#include "ucmp8.h"
#include "cmemory.h"
static int32_t findOverlappingPosition(CompactByteArray* this,
uint32_t start,
const UChar *tempIndex,
int32_t tempIndexCount,
uint32_t cycle);
/* internal constants*/
#define kUnicodeCount_int 65536
#define kBlockShift_int 7
#define kBlockCount_int (1<<kBlockShift_int)
#define kIndexShift_int (16-kBlockShift_int)
#define kIndexCount_int (1<<kIndexShift_int)
#define kBlockMask_int (kBlockCount_int-1)
const int32_t UCMP8_kUnicodeCount = kUnicodeCount_int;
const int32_t UCMP8_kBlockShift = kBlockShift_int;
const int32_t UCMP8_kBlockCount = kBlockCount_int;
const int32_t UCMP8_kIndexShift = kIndexShift_int;
const int32_t UCMP8_kIndexCount = kIndexCount_int;
const uint32_t UCMP8_kBlockMask = kBlockMask_int;
int32_t ucmp8_getkUnicodeCount() { return UCMP8_kUnicodeCount;}
int32_t ucmp8_getkBlockCount() { return UCMP8_kBlockCount;}
int32_t ucmp8_getkIndexCount(){ return UCMP8_kIndexCount;}
/* debug flags*/
/*=======================================================*/
U_CAPI int8_t ucmp8_get(CompactByteArray* array, uint16_t index)
{
return (array->fArray[(array->fIndex[index >> UCMP8_kBlockShift] & 0xFFFF) + (index & UCMP8_kBlockMask)]);
}
U_CAPI uint8_t ucmp8_getu(CompactByteArray* array, uint16_t index)
{
return (uint8_t)ucmp8_get(array,index);
}
CompactByteArray* ucmp8_open(int8_t defaultValue)
{
/* set up the index array and the data array.
* the index array always points into particular parts of the data array
* it is initially set up to point at regular block boundaries
* The following example uses blocks of 4 for simplicity
* Example: Expanded
* INDEX# 0 1 2 3 4
* INDEX 0 4 8 12 16 ...
* ARRAY abcdeababcedzyabcdea...
* | | | | | |...
* whenever you set an element in the array, it unpacks to this state
* After compression, the index will point to various places in the data array
* wherever there is a runs of the same elements as in the original
* Example: Compressed
* INDEX# 0 1 2 3 4
* INDEX 0 4 1 8 2 ...
* ARRAY abcdeabazyabc...
* If you look at the example, index# 2 in the expanded version points
* to data position number 8, which has elements "bced". In the compressed
* version, index# 2 points to data position 1, which also has "bced"
*/
CompactByteArray* this = (CompactByteArray*) uprv_malloc(sizeof(CompactByteArray));
int32_t i;
if (this == NULL) return NULL;
this->fArray = NULL;
this->fIndex = NULL;
this->fCount = UCMP8_kUnicodeCount;
this->fCompact = FALSE;
this->fBogus = FALSE;
this->fArray = (int8_t*) uprv_malloc(sizeof(int8_t) * UCMP8_kUnicodeCount);
if (!this->fArray)
{
this->fBogus = TRUE;
return NULL;
}
this->fIndex = (uint16_t*) uprv_malloc(sizeof(uint16_t) * UCMP8_kIndexCount);
if (!this->fIndex)
{
uprv_free(this->fArray);
this->fArray = NULL;
this->fBogus = TRUE;
return NULL;
}
for (i = 0; i < UCMP8_kUnicodeCount; ++i)
{
this->fArray[i] = defaultValue;
}
for (i = 0; i < UCMP8_kIndexCount; ++i)
{
this->fIndex[i] = (uint16_t)(i << UCMP8_kBlockShift);
}
return this;
}
CompactByteArray* ucmp8_openAdopt(uint16_t *indexArray,
int8_t *newValues,
int32_t count)
{
CompactByteArray* this = (CompactByteArray*) uprv_malloc(sizeof(CompactByteArray));
if (!this) return NULL;
this->fArray = NULL;
this->fIndex = NULL;
this->fCount = count;
this->fBogus = FALSE;
this->fArray = newValues;
this->fIndex = indexArray;
this->fCompact = (count < UCMP8_kUnicodeCount) ? TRUE : FALSE;
return this;
}
/*=======================================================*/
void ucmp8_close(CompactByteArray* this)
{
uprv_free(this->fArray);
this->fArray = NULL;
uprv_free(this->fIndex);
this->fIndex = NULL;
this->fCount = 0;
this->fCompact = FALSE;
uprv_free(this);
}
/*=======================================================*/
void ucmp8_expand(CompactByteArray* this)
{
/* can optimize later.
* if we have to expand, then walk through the blocks instead of using Get
* this code unpacks the array by copying the blocks to the normalized position.
* Example: Compressed
* INDEX# 0 1 2 3 4
* INDEX 0 4 1 8 2 ...
* ARRAY abcdeabazyabc...
* turns into
* Example: Expanded
* INDEX# 0 1 2 3 4
* INDEX 0 4 8 12 16 ...
* ARRAY abcdeababcedzyabcdea...
*/
int32_t i;
if (this->fCompact)
{
int8_t* tempArray;
tempArray = (int8_t*) uprv_malloc(sizeof(int8_t) * UCMP8_kUnicodeCount);
if (!tempArray)
{
this->fBogus = TRUE;
return;
}
for (i = 0; i < UCMP8_kUnicodeCount; ++i)
{
tempArray[i] = ucmp8_get(this,(UChar)i); /* HSYS : How expand?*/
}
for (i = 0; i < UCMP8_kIndexCount; ++i)
{
this->fIndex[i] = (uint16_t)(i<< UCMP8_kBlockShift);
}
uprv_free(this->fArray);
this->fArray = tempArray;
this->fCompact = FALSE;
}
}
/*=======================================================*/
/* this->fArray: an array to be overlapped
* start and count: specify the block to be overlapped
* tempIndex: the overlapped array (actually indices back into inputContents)
* inputHash: an index of hashes for tempIndex, where
* inputHash[i] = XOR of values from i-count+1 to i
*/
int32_t
findOverlappingPosition(CompactByteArray* this,
uint32_t start,
const UChar* tempIndex,
int32_t tempIndexCount,
uint32_t cycle)
{
/* this is a utility routine for finding blocks that overlap.
* IMPORTANT: the cycle number is very important. Small cycles take a lot
* longer to work. In some cases, they may be able to get better compaction.
*/
int32_t i;
int32_t j;
int32_t currentCount;
for (i = 0; i < tempIndexCount; i += cycle)
{
currentCount = UCMP8_kBlockCount;
if (i + UCMP8_kBlockCount > tempIndexCount)
{
currentCount = tempIndexCount - i;
}
for (j = 0; j < currentCount; ++j)
{
if (this->fArray[start + j] != this->fArray[tempIndex[i + j]]) break;
}
if (j == currentCount) break;
}
return i;
}
UBool
ucmp8_isBogus(const CompactByteArray* this)
{
return this->fBogus;
}
const int8_t*
ucmp8_getArray(const CompactByteArray* this)
{
return this->fArray;
}
const uint16_t*
ucmp8_getIndex(const CompactByteArray* this)
{
return this->fIndex;
}
int32_t
ucmp8_getCount(const CompactByteArray* this)
{
return this->fCount;
}
void
ucmp8_set(CompactByteArray* this,
UChar c,
int8_t value)
{
if (this->fCompact == TRUE)
{
ucmp8_expand(this);
if (this->fBogus) return;
}
this->fArray[(int32_t)c] = value;
}
void
ucmp8_setRange(CompactByteArray* this,
UChar start,
UChar end,
int8_t value)
{
int32_t i;
if (this->fCompact == TRUE)
{
ucmp8_expand(this);
if (this->fBogus) return;
}
for (i = start; i <= end; ++i)
{
this->fArray[i] = value;
}
}
/*=======================================================*/
void
ucmp8_compact(CompactByteArray* this,
uint32_t cycle)
{
if (!this->fCompact)
{
/* this actually does the compaction.
* it walks throught the contents of the expanded array, finding the
* first block in the data that matches the contents of the current index.
* As it works, it keeps an updated pointer to the last position,
* so that it knows how big to make the final array
* If the matching succeeds, then the index will point into the data
* at some earlier position.
* If the matching fails, then last position pointer will be bumped,
* and the index will point to that last block of data.
*/
UChar* tempIndex;
int32_t tempIndexCount;
int8_t* tempArray;
int32_t iBlock, iIndex;
/* fix cycle, must be 0 < cycle <= blockcount*/
if (cycle < 0) cycle = 1;
else if (cycle > (uint32_t)UCMP8_kBlockCount) cycle = UCMP8_kBlockCount;
/* make temp storage, larger than we need*/
tempIndex = (UChar*) uprv_malloc(sizeof(UChar)* UCMP8_kUnicodeCount);
if (!tempIndex)
{
this->fBogus = TRUE;
return;
}
/* set up first block.*/
tempIndexCount = UCMP8_kBlockCount;
for (iIndex = 0; iIndex < UCMP8_kBlockCount; ++iIndex)
{
tempIndex[iIndex] = (uint16_t)iIndex;
}; /* endfor (iIndex = 0; .....)*/
this->fIndex[0] = 0;
/* for each successive block, find out its first position in the compacted array*/
for (iBlock = 1; iBlock < UCMP8_kIndexCount; ++iBlock)
{
int32_t newCount, firstPosition, block;
block = iBlock << UCMP8_kBlockShift;
/* if (debugSmall) if (block > debugSmallLimit) break;*/
firstPosition = findOverlappingPosition(this,
block,
tempIndex,
tempIndexCount,
cycle);
/* if not contained in the current list, copy the remainder
* invariant; cumulativeHash[iBlock] = XOR of values from iBlock-kBlockCount+1 to iBlock
* we do this by XORing out cumulativeHash[iBlock-kBlockCount]
*/
newCount = firstPosition + UCMP8_kBlockCount;
if (newCount > tempIndexCount)
{
for (iIndex = tempIndexCount; iIndex < newCount; ++iIndex)
{
tempIndex[iIndex] = (uint16_t)(iIndex - firstPosition + block);
} /* endfor (iIndex = tempIndexCount....)*/
tempIndexCount = newCount;
} /* endif (newCount > tempIndexCount)*/
this->fIndex[iBlock] = (uint16_t)firstPosition;
} /* endfor (iBlock = 1.....)*/
/* now allocate and copy the items into the array*/
tempArray = (int8_t*) uprv_malloc(tempIndexCount * sizeof(int8_t));
if (!tempArray)
{
this->fBogus = TRUE;
uprv_free(tempIndex);
return;
}
for (iIndex = 0; iIndex < tempIndexCount; ++iIndex)
{
tempArray[iIndex] = this->fArray[tempIndex[iIndex]];
}
uprv_free(this->fArray);
this->fArray = tempArray;
this->fCount = tempIndexCount;
/* free up temp storage*/
uprv_free(tempIndex);
this->fCompact = TRUE;
} /* endif (!this->fCompact)*/
}

View File

@ -1,87 +0,0 @@
/*
********************************************************************
*
* Copyright (C) 1996-2000, International Business Machines
* Corporation and others. All Rights Reserved.
*
********************************************************************
*/
#ifndef UCMP8_H
#define UCMP8_H
#include "unicode/utypes.h"
/*====================================*/
/* class CompactByteArray
* Provides a compact way to store information that is indexed by Unicode values,
* such as character properties, types, keyboard values, etc.
* The ATypes are used by value, so should be small, integers or pointers.
*====================================
*/
U_CAPI const int32_t UCMP8_kUnicodeCount;
U_CAPI const int32_t UCMP8_kBlockShift;
U_CAPI const int32_t UCMP8_kBlockCount;
U_CAPI const int32_t UCMP8_kIndexShift;
U_CAPI const int32_t UCMP8_kIndexCount;
U_CAPI const uint32_t UCMP8_kBlockMask;
U_CAPI int32_t ucmp8_getkUnicodeCount(void);
U_CAPI int32_t ucmp8_getkBlockCount(void);
U_CAPI int32_t ucmp8_getkIndexCount(void);
typedef struct{
int8_t* fArray;
uint16_t* fIndex;
int32_t fCount;
UBool fCompact;
UBool fBogus;
} CompactByteArray;
U_CAPI CompactByteArray* ucmp8_open(int8_t defaultValue);
U_CAPI CompactByteArray* ucmp8_openAdopt(uint16_t* indexArray,
int8_t* newValues,
int32_t count);
U_CAPI void ucmp8_close(CompactByteArray* array);
U_CAPI UBool isBogus(const CompactByteArray* array);
U_CAPI int8_t ucmp8_get(CompactByteArray* array, uint16_t index);
U_CAPI uint8_t ucmp8_getu(CompactByteArray* array, uint16_t index);
U_CAPI void ucmp8_set(CompactByteArray* array,
UChar index,
int8_t value);
U_CAPI void ucmp8_setRange(CompactByteArray* array,
UChar start,
UChar end,
int8_t value);
U_CAPI int32_t ucmp8_getCount(const CompactByteArray* array);
U_CAPI const int8_t* ucmp8_getArray(const CompactByteArray* array);
U_CAPI const uint16_t* ucmp8_getIndex(const CompactByteArray* array);
/* Compact the array.
The value of cycle determines how large the overlap can be.
A cycle of 1 is the most compacted, but takes the most time to do.
If values stored in the array tend to repeat in cycles of, say, 16,
then using that will be faster than cycle = 1, and get almost the
same compression.
*/
U_CAPI void ucmp8_compact(CompactByteArray* array,
uint32_t cycle);
/* Expanded takes the array back to a 65536 element array*/
U_CAPI void ucmp8_expand(CompactByteArray* array);
#endif

View File

@ -1,398 +0,0 @@
/*
*****************************************************************************************
*
* Copyright (C) 1997-200, International Business Machines
* Corporation and others. All Rights Reserved.
*
*****************************************************************************************
*/
/*===============================================================================
*
* File cmpshrta.cpp
*
* Modification History:
*
* Date Name Description
* 2/5/97 aliu Added CompactIntArray streamIn and streamOut methods.
* 3/4/97 aliu Tuned performance of CompactIntArray constructor,
* 05/07/97 helena Added isBogus()
* based on performance data indicating that this was slow.
* 07/15/98 erm Synched with Java 1.2 CompactShortArray.java.
* 07/30/98 erm Added changes from 07/29/98 code review.
*===============================================================================
*/
#include "ucmp16.h"
#include "cmemory.h"
#define arrayRegionMatches(source, sourceStart, target, targetStart, len) (uprv_memcmp(&source[sourceStart], &target[targetStart], len * sizeof(int16_t)) != 0)
/* internal constants*/
#define UCMP16_kMaxUnicode_int 65535
#define UCMP16_kUnicodeCount_int (UCMP16_kMaxUnicode_int + 1)
#define UCMP16_kBlockShift_int 7
#define UCMP16_kBlockCount_int (1 << UCMP16_kBlockShift_int)
#define UCMP16_kBlockBytes_int (UCMP16_kBlockCount_int * sizeof(int16_t))
#define UCMP16_kIndexShift_int (16 - UCMP16_kBlockShift_int)
#define UCMP16_kIndexCount_int (1 << UCMP16_kIndexShift_int)
#define UCMP16_kBlockMask_int (UCMP16_kBlockCount_int - 1)
const int32_t UCMP16_kMaxUnicode = UCMP16_kMaxUnicode_int;
const int32_t UCMP16_kUnicodeCount = UCMP16_kUnicodeCount_int;
const int32_t UCMP16_kBlockShift = UCMP16_kBlockShift_int;
const int32_t UCMP16_kBlockCount = UCMP16_kBlockCount_int;
const int32_t UCMP16_kBlockBytes = UCMP16_kBlockBytes_int;
const int32_t UCMP16_kIndexShift = UCMP16_kIndexShift_int;
const int32_t UCMP16_kIndexCount = UCMP16_kIndexCount_int;
const uint32_t UCMP16_kBlockMask = UCMP16_kBlockMask_int;
/**
* Sets the array to the invalid memory state.
*/
static CompactShortArray* setToBogus(CompactShortArray* array);
static void touchBlock(CompactShortArray* this,
int32_t i,
int16_t value);
static UBool blockTouched(const CompactShortArray* this,
int32_t i);
/* debug flags*/
/*=======================================================*/
int32_t ucmp16_getkUnicodeCount()
{return UCMP16_kUnicodeCount;}
int32_t ucmp16_getkBlockCount()
{return UCMP16_kBlockCount;}
int32_t ucmp16_getkIndexCount()
{ return UCMP16_kIndexCount;}
CompactShortArray* ucmp16_open(int16_t defaultValue)
{
int32_t i;
CompactShortArray* this = (CompactShortArray*) uprv_malloc(sizeof(CompactShortArray));
if (this == NULL) return NULL;
this->fCount = UCMP16_kUnicodeCount;
this->fCompact = FALSE;
this->fBogus = FALSE;
this->fArray = NULL;
this->fIndex = NULL;
this->fHashes = NULL;
this->fDefaultValue = defaultValue;
this->fArray = (int16_t*)uprv_malloc(UCMP16_kUnicodeCount * sizeof(int16_t));
if (this->fArray == NULL)
{
this->fBogus = TRUE;
return NULL;
}
this->fIndex = (uint16_t*)uprv_malloc(UCMP16_kIndexCount * sizeof(uint16_t));
if (this->fIndex == NULL)
{
uprv_free(this->fArray);
this->fArray = NULL;
this->fBogus = TRUE;
return NULL;
}
this->kBlockShift = UCMP16_kBlockShift;
this->kBlockMask = UCMP16_kBlockMask;
for (i = 0; i < UCMP16_kUnicodeCount; i += 1)
{
this->fArray[i] = defaultValue;
}
this->fHashes =(int32_t*)uprv_malloc(UCMP16_kIndexCount * sizeof(int32_t));
if (this->fHashes == NULL)
{
uprv_free(this->fArray);
uprv_free(this->fIndex);
this->fBogus = TRUE;
return NULL;
}
for (i = 0; i < UCMP16_kIndexCount; i += 1)
{
this->fIndex[i] = (uint16_t)(i << UCMP16_kBlockShift);
this->fHashes[i] = 0;
}
return this;
}
CompactShortArray* ucmp16_openAdopt(uint16_t *indexArray,
int16_t *newValues,
int32_t count,
int16_t defaultValue)
{
CompactShortArray* this = (CompactShortArray*) uprv_malloc(sizeof(CompactShortArray));
if (this == NULL) return NULL;
this->fHashes = NULL;
this->fCount = count;
this->fDefaultValue = defaultValue;
this->fBogus = FALSE;
this->fArray = newValues;
this->fIndex = indexArray;
this->fCompact = count < UCMP16_kUnicodeCount;
this->kBlockShift = UCMP16_kBlockShift;
this->kBlockMask = UCMP16_kBlockMask;
return this;
}
CompactShortArray* ucmp16_openAdoptWithBlockShift(uint16_t *indexArray,
int16_t *newValues,
int32_t count,
int16_t defaultValue,
int32_t blockShift)
{
CompactShortArray* this = ucmp16_openAdopt(indexArray,
newValues,
count,
defaultValue);
if (this == NULL) return NULL;
this->kBlockShift = blockShift;
this->kBlockMask = (uint32_t) (((uint32_t)1 << (uint32_t)blockShift) - (uint32_t)1);
return this;
}
/*=======================================================*/
void ucmp16_close(CompactShortArray* this)
{
uprv_free(this->fArray);
uprv_free(this->fIndex);
uprv_free(this->fHashes);
uprv_free(this);
return;
}
CompactShortArray* setToBogus(CompactShortArray* this)
{
uprv_free(this->fArray);
this->fArray = NULL;
uprv_free(this->fIndex);
this->fIndex = NULL;
uprv_free(this->fHashes);
this->fHashes = NULL;
this->fCount = 0;
this->fCompact = FALSE;
this->fBogus = TRUE;
return this;
}
void ucmp16_expand(CompactShortArray* this)
{
if (this->fCompact)
{
int32_t i;
int16_t *tempArray = (int16_t*)uprv_malloc(UCMP16_kUnicodeCount * sizeof(int16_t));
if (tempArray == NULL)
{
this->fBogus = TRUE;
return;
}
for (i = 0; i < UCMP16_kUnicodeCount; i += 1)
{
tempArray[i] = ucmp16_get(this, (UChar)i); /* HSYS : How expand?*/
}
for (i = 0; i < (1 << (16 - this->kBlockShift)); i += 1)
{
this->fIndex[i] = (uint16_t)(i<<this->kBlockShift);
}
uprv_free(this->fArray);
this->fArray = tempArray;
this->fCompact = FALSE;
}
}
void ucmp16_set(CompactShortArray* this,
UChar c,
int16_t value)
{
if (this->fCompact)
{
ucmp16_expand(this);
if (this->fBogus) return;
}
this->fArray[(int32_t)c] = value;
if (value != this->fDefaultValue)
{
touchBlock(this, c >> this->kBlockShift, value);
}
}
void ucmp16_setRange(CompactShortArray* this,
UChar start,
UChar end,
int16_t value)
{
int32_t i;
if (this->fCompact)
{
ucmp16_expand(this);
if (this->fBogus) return;
}
if (value != this->fDefaultValue)
{
for (i = start; i <= end; i += 1)
{
this->fArray[i] = value;
touchBlock(this, i >> this->kBlockShift, value);
}
}
else
{
for (i = start; i <= end; i += 1) this->fArray[i] = value;
}
}
/*=======================================================*/
void ucmp16_compact(CompactShortArray* this)
{
if (!this->fCompact)
{
int32_t limitCompacted = 0;
int32_t i, iBlockStart;
int16_t iUntouched = -1;
for (i = 0, iBlockStart = 0; i < (1 << (16 - this->kBlockShift)); i += 1, iBlockStart += (1 << this->kBlockShift))
{
UBool touched = blockTouched(this, i);
this->fIndex[i] = 0xFFFF;
if (!touched && iUntouched != -1)
{
/* If no values in this block were set, we can just set its
* index to be the same as some other block with no values
* set, assuming we've seen one yet.
*/
this->fIndex[i] = iUntouched;
}
else
{
int32_t j, jBlockStart;
for (j = 0, jBlockStart = 0;
j < limitCompacted;
j += 1, jBlockStart += (1 << this->kBlockShift))
{
if (this->fHashes[i] == this->fHashes[j] &&
arrayRegionMatches(this->fArray,
iBlockStart,
this->fArray,
jBlockStart,
(1 << this->kBlockShift)))
{
this->fIndex[i] = (int16_t)jBlockStart;
}
}
/* TODO: verify this is correct*/
if (this->fIndex[i] == 0xFFFF)
{
/* we didn't match, so copy & update*/
uprv_memcpy(&(this->fArray[jBlockStart]),
&(this->fArray[iBlockStart]),
(1 << this->kBlockShift)*sizeof(int16_t));
this->fIndex[i] = (int16_t)jBlockStart;
this->fHashes[j] = this->fHashes[i];
limitCompacted += 1;
if (!touched)
{
/* If this is the first untouched block we've seen,*/
/* remember its index.*/
iUntouched = (int16_t)jBlockStart;
}
}
}
}
/* we are done compacting, so now make the array shorter*/
{
int32_t newSize = limitCompacted * (1 << this->kBlockShift);
int16_t *result = (int16_t*) uprv_malloc(sizeof(int16_t) * newSize);
uprv_memcpy(result, this->fArray, newSize * sizeof(int16_t));
uprv_free(this->fArray);
this->fArray = result;
this->fCount = newSize;
uprv_free(this->fHashes);
this->fHashes = NULL;
this->fCompact = TRUE;
}
}
}
/**
* Query whether a specified block was "touched", i.e. had a value set.
* Untouched blocks can be skipped when compacting the array
*/
int16_t ucmp16_getDefaultValue(const CompactShortArray* this)
{
return this->fDefaultValue;
}
void touchBlock(CompactShortArray* this,
int32_t i,
int16_t value)
{
this->fHashes[i] = (this->fHashes[i] + (value << 1)) | 1;
}
UBool blockTouched(const CompactShortArray* this, int32_t i)
{
return (this->fHashes[i] != 0);
}
const int16_t*
ucmp16_getArray(const CompactShortArray* this)
{
return this->fArray;
}
const uint16_t*
ucmp16_getIndex(const CompactShortArray* this)
{
return this->fIndex;
}
uint32_t
ucmp16_getCount(const CompactShortArray* this)
{
return this->fCount;
}

View File

@ -1,218 +0,0 @@
/*
********************************************************************
*
* Copyright (C) 1996-2000, International Business Machines
* Corporation and others. All Rights Reserved.
*
********************************************************************
*/
/*
* ==========================================================================
* @version 1.0 23/10/96
* @author Helena Shih
* Based on Taligent international support for java
* Modification History :
*
* 05/07/97 helena Added isBogus()
* 07/15/98 erm Synched with Java 1.2 CompactShortArray.java.
* 07/30/98 erm Added 07/29/98 code review changes.
* 04/21/99 Damiba Port to C/New API faster ucmp16_get
*/
#ifndef UCMP16_H
#define UCMP16_H
#include "unicode/utypes.h"
/**
* class CompactATypeArray : use only on primitive data types
* Provides a compact way to store information that is indexed by Unicode
* values, such as character properties, types, keyboard values, etc.This
* is very useful when you have a block of Unicode data that contains
* significant values while the rest of the Unicode data is unused in the
* application or when you have a lot of redundance, such as where all 21,000
* Han ideographs have the same value. However, lookup is much faster than a
* hash table.
* <P>
* A compact array of any primitive data type serves two purposes:
* <UL type = round>
* <LI>Fast access of the indexed values.
* <LI>Smaller memory footprint.
* </UL>
* <P>
* The index array always points into particular parts of the data array
* it is initially set up to point at regular block boundaries
* The following example uses blocks of 4 for simplicity
* <PRE>
* Example: Expanded
* BLOCK 0 1 2 3 4
* INDEX 0 4 8 12 16 ...
* ARRAY abcdeababcdezyabcdea...
* | | | | | |...
* </PRE>
* <P>
* After compression, the index will point to various places in the data array
* wherever there is a runs of the same elements as in the original
* <PRE>
* Example: Compressed
* BLOCK 0 1 2 3 4
* INDEX 0 4 1 8 2 ...
* ARRAY abcdeabazyabc...
* </PRE>
* <P>
* If you look at the example, index number 2 in the expanded version points
* to data position number 8, which has elements "bcde". In the compressed
* version, index number 2 points to data position 1, which also has "bcde"
* @see CompactByteArray
* @see CompactIntArray
* @see CompactCharArray
* @see CompactStringArray
* @version $Revision: 1.7 $ 8/25/98
* @author Helena Shih
*/
typedef struct
{
int16_t* fArray;
uint16_t* fIndex;
int32_t* fHashes;
int32_t fCount;
int16_t fDefaultValue;
UBool fCompact;
UBool fBogus;
int32_t kBlockShift;
int32_t kBlockMask;
} CompactShortArray;
U_CAPI const int32_t UCMP16_kMaxUnicode;
U_CAPI const int32_t UCMP16_kUnicodeCount;
U_CAPI const int32_t UCMP16_kBlockShift;
U_CAPI const int32_t UCMP16_kBlockCount;
U_CAPI const int32_t UCMP16_kBlockBytes;
U_CAPI const int32_t UCMP16_kIndexShift;
U_CAPI const int32_t UCMP16_kIndexCount;
U_CAPI const uint32_t UCMP16_kBlockMask;
U_CAPI int32_t ucmp16_getkUnicodeCount(void);
U_CAPI int32_t ucmp16_getkBlockCount(void);
U_CAPI int32_t ucmp16_getkIndexCount(void);
/**
* Construct an empty CompactShortArray.
* @param defaultValue the default value for all characters not explicitly in the array
*/
U_CAPI CompactShortArray* ucmp16_open(int16_t defaultValue);
/**
* Construct a CompactShortArray from a pre-computed index and values array. The values
* will be adobped by the CompactShortArray. Note: for speed, the compact method will
* only re-use blocks in the values array that are on a block boundary. The pre-computed
* arrays passed in to this constructor may re-use blocks at any position in the values
* array.
* @param indexArray the index array to be adopted
* @param newValues the value array to be adobptd
* @param count the number of entries in the value array
* @param defaultValue the default value for all characters not explicitly in the array
* @see compact
*/
U_CAPI CompactShortArray* ucmp16_openAdopt(uint16_t *indexArray,
int16_t *newValues,
int32_t count,
int16_t defaultValue );
U_CAPI CompactShortArray* ucmp16_openAdoptWithBlockShift(uint16_t *indexArray,
int16_t *newValues,
int32_t count,
int16_t defaultValue,
int32_t blockShift);
U_CAPI void ucmp16_close(CompactShortArray* array);
/**
* Returns TRUE if the creation of the compact array fails.
*/
U_CAPI UBool ucmp16_isBogus(const CompactShortArray* array);
/**
*
* Get the mapped value of a Unicode character.
* @param index the character to get the mapped value with
* @return the mapped value of the given character
*/
#define ucmp16_get(array, index) (array->fArray[(array->fIndex[(index >> array->kBlockShift)] )+ \
(index & array->kBlockMask)])
#define ucmp16_getu(array, index) (uint16_t)ucmp16_get(array, index)
/**
* Set a new value for a Unicode character.
* Set automatically expands the array if it is compacted.
* @param index the character to set the mapped value with
* @param value the new mapped value
*/
U_CAPI void ucmp16_set(CompactShortArray *array,
UChar index,
int16_t value);
/**
*
* Set new values for a range of Unicode character.
* @param start the starting offset of the range
* @param end the ending offset of the range
* @param value the new mapped value
*/
U_CAPI void ucmp16_setRange(CompactShortArray* array,
UChar start,
UChar end,
int16_t value);
/**
* Compact the array. For efficency, this method will only re-use
* blocks in the values array that are on a block bounday. If you
* want better compaction, you can do your own compaction and use
* the constructor that lets you pass in the pre-computed arrays.
*/
U_CAPI void ucmp16_compact(CompactShortArray* array);
/**
* Get the default value.
*/
U_CAPI int16_t ucmp16_getDefaultValue(const CompactShortArray* array);
/**
*
* Get the number of elements in the value array.
* @return the number of elements in the value array.
*/
U_CAPI uint32_t ucmp16_getCount(const CompactShortArray* array);
/**
*
* Get the address of the value array.
* @return the address of the value array
*/
U_CAPI const int16_t* ucmp16_getArray(const CompactShortArray* array);
/**
*
* Get the address of the index array.
* @return the address of the index array
*/
U_CAPI const uint16_t* ucmp16_getIndex(const CompactShortArray* array);
#endif

View File

@ -1,229 +0,0 @@
/*
*
* Copyright (C) 1998-2000, International Business Machines
* Corporation and others. All Rights Reserved.
*
* @version 1.0 06/19/98
* @author Helena Shih
* Based on Taligent international support for C++
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include "ucmp16.h"
#if U_IOSTREAM_SOURCE >= 199711
#include <iostream>
using namespace std;
#elif U_IOSTREAM_SOURCE >= 198506
#include <iostream.h>
#endif
CompactShortArray* ulxfrmArray = 0;
enum ECharTypeMapping {
UNASSIGNED = 0,
UPPERCASE_LETTER = 1,
LOWERCASE_LETTER = 2,
TITLECASE_LETTER = 3,
MODIFIER_LETTER = 4,
OTHER_LETTER = 5,
NON_SPACING_MARK = 6,
ENCLOSING_MARK = 7,
COMBINING_SPACING_MARK = 8,
DECIMAL_DIGIT_NUMBER = 9,
LETTER_NUMBER = 10,
OTHER_NUMBER = 11,
SPACE_SEPARATOR = 12,
LINE_SEPARATOR = 13,
PARAGRAPH_SEPARATOR = 14,
CONTROL = 15,
FORMAT = 16,
PRIVATE_USE = 17,
SURROGATE = 18,
DASH_PUNCTUATION = 19,
START_PUNCTUATION = 20,
END_PUNCTUATION = 21,
CONNECTOR_PUNCTUATION = 22,
OTHER_PUNCTUATION = 23,
MATH_SYMBOL = 24,
CURRENCY_SYMBOL = 25,
MODIFIER_SYMBOL = 26,
OTHER_SYMBOL = 27,
INITIAL_PUNCTUATION = 28,
FINAL_PUNCTUATION = 29
};
static const UChar LAST_CHAR_CODE_IN_FILE = 0xFFFD;
const char tagStrings[] = "MnMcMeNdNlNoZsZlZpCcCfCsCoCnLuLlLtLmLoPcPdPsPePoSmScSkSoPiPf";
const int16_t tagValues[] =
{
/* Mn */ (int16_t)NON_SPACING_MARK,
/* Mc */ (int16_t)COMBINING_SPACING_MARK,
/* Me */ (int16_t)ENCLOSING_MARK,
/* Nd */ (int16_t)DECIMAL_DIGIT_NUMBER,
/* Nl */ (int16_t)LETTER_NUMBER,
/* No */ (int16_t)OTHER_NUMBER,
/* Zs */ (int16_t)SPACE_SEPARATOR,
/* Zl */ (int16_t)LINE_SEPARATOR,
/* Zp */ (int16_t)PARAGRAPH_SEPARATOR,
/* Cc */ (int16_t)CONTROL,
/* Cf */ (int16_t)FORMAT,
/* Cs */ (int16_t)SURROGATE,
/* Co */ (int16_t)PRIVATE_USE,
/* Cn */ (int16_t)UNASSIGNED,
/* Lu */ (int16_t)UPPERCASE_LETTER,
/* Ll */ (int16_t)LOWERCASE_LETTER,
/* Lt */ (int16_t)TITLECASE_LETTER,
/* Lm */ (int16_t)MODIFIER_LETTER,
/* Lo */ (int16_t)OTHER_LETTER,
/* Pc */ (int16_t)CONNECTOR_PUNCTUATION,
/* Pd */ (int16_t)DASH_PUNCTUATION,
/* Ps */ (int16_t)START_PUNCTUATION,
/* Pe */ (int16_t)END_PUNCTUATION,
/* Po */ (int16_t)OTHER_PUNCTUATION,
/* Sm */ (int16_t)MATH_SYMBOL,
/* Sc */ (int16_t)CURRENCY_SYMBOL,
/* Sk */ (int16_t)MODIFIER_SYMBOL,
/* So */ (int16_t)OTHER_SYMBOL,
/* Pi */ (int16_t)INITIAL_PUNCTUATION,
/* Pf */ (int16_t)FINAL_PUNCTUATION
};
int
MakeProp(char* str)
{
int result = 0;
char* matchPosition;
matchPosition = strstr(tagStrings, str);
if (matchPosition == 0) fprintf(stderr, "unrecognized type letter %s", str);
else result = ((matchPosition - tagStrings) / 2);
return result;
}
CompactShortArray*
getArray(FILE *input)
{
if (ulxfrmArray == 0) {
char buffer[1000];
char* bufferPtr;
int set = FALSE;
char type[3];
try {
ulxfrmArray = ucmp16_open((int16_t)0xffff);
int32_t unicode, otherunicode, digit, i;
while (TRUE) {
otherunicode = 0xffff;
digit = -1;
bufferPtr = fgets(buffer, 999, input);
if (bufferPtr == NULL) break;
if (bufferPtr[0] == '#' || bufferPtr[0] == '\n' || bufferPtr[0] == 0) continue;
sscanf(bufferPtr, "%X", &unicode);
assert(0 <= unicode && unicode < 65536);
bufferPtr = strchr(bufferPtr, ';');
assert(bufferPtr != NULL);
bufferPtr = strchr(bufferPtr + 1, ';');
strncpy(type, ++bufferPtr, 2); // go to start of third field
assert(type != NULL);
type[2] = 0;
int typeResult = tagValues[MakeProp(type)];
// check for the decimal values
bufferPtr++;
for (i = 3; i < 8; i++) {
bufferPtr = strchr(bufferPtr, ';');
assert(bufferPtr != NULL);
bufferPtr++;
}
sscanf(bufferPtr, "%X", &digit);
if (((typeResult == DECIMAL_DIGIT_NUMBER) || (typeResult == OTHER_NUMBER)) &&
(digit >= 0 && digit <= 9)){
buffer[10];
sprintf(buffer, "0x%04X", unicode);
cout << " { " << buffer << ", " << digit << "}, \n";
}
bufferPtr++;
for (i = 8; i < 12; i++) {
bufferPtr = strchr(bufferPtr, ';');
assert(bufferPtr != NULL);
bufferPtr++;
}
sscanf(bufferPtr, "%X", &otherunicode);
// the Unicode char has a equivalent uppercase
if ((typeResult == LOWERCASE_LETTER) && (0 <= otherunicode && otherunicode < 65536)) {
set = TRUE;
}
if ((typeResult == UPPERCASE_LETTER) && !set) {
bufferPtr++;
sscanf(bufferPtr, "%X", &otherunicode);
if (0 <= otherunicode && otherunicode < 65536) {
set = TRUE;
}
}
if ((set == TRUE) && (ucmp16_get(ulxfrmArray, (UChar)unicode) == (int16_t)0xffff))
ucmp16_set(ulxfrmArray, (UChar)unicode, (int16_t)otherunicode);
set = FALSE;
}
if (input) fclose(input);
ucmp16_compact(ulxfrmArray);
}
catch (...) {
fprintf(stderr, "Error Occured while parsing unicode data file.\n");
}
}
return ulxfrmArray;
}
void
writeArrays()
{
const int16_t* values = ucmp16_getArray(ulxfrmArray);
const uint16_t* indexes = ucmp16_getIndex(ulxfrmArray);
int32_t i;
int32_t cnt = ucmp16_getCount(ulxfrmArray);
cout << "\nconst uint32_t Unicode::caseIndex[] = {\n ";
for (i = 0; i < ucmp16_getkIndexCount()-1; i++)
{
cout << "(uint16_t)" << ((indexes[i] >= 0) ? (int)indexes[i] : (int)(indexes[i]+ucmp16_getkUnicodeCount()))
<< ", ";
if (i != 0)
if (i % 3 == 0)
cout << "\n ";
}
cout << " (uint16_t)" << ((indexes[ucmp16_getkIndexCount()-1] >= 0) ? (int)indexes[i] : (int)(indexes[i]+ucmp16_getkUnicodeCount()))
<< " };\n";
cout << "\nconst int16_t Unicode::caseValues[] = {\n ";
for (i = 0; i < cnt-1; i++)
{
cout << "(int16_t)" << (int16_t)values[i] << ", ";
if (i != 0)
if (i % 5 == 0)
cout << "\n ";
}
cout << " (char)" << (int16_t)values[cnt-1] << " }\n";
cout << "const int32_t Unicode::caseCount = " << cnt << ";\n";
}
/**
* The main function builds the CharType data array and prints it to System.out
*/
void main(int argc, char** argv)
{
CompactShortArray* arrays = 0;
FILE *input = 0;
if (argc != 2) {
printf("Usage : chartype filename\n\n");
exit(1);
}
input = fopen(argv[1], "r");
if (input == 0) {
printf("Cannot open the input file: %s\n\n", argv[1]);
exit(1);
}
arrays = getArray(input);
writeArrays();
}

View File

@ -1,98 +0,0 @@
# Microsoft Developer Studio Project File - Name="ulxfrm" - Package Owner=<4>
# Microsoft Developer Studio Generated Build File, Format Version 6.00
# ** DO NOT EDIT **
# TARGTYPE "Win32 (x86) Console Application" 0x0103
CFG=ulxfrm - Win32 Debug
!MESSAGE This is not a valid makefile. To build this project using NMAKE,
!MESSAGE use the Export Makefile command and run
!MESSAGE
!MESSAGE NMAKE /f "ulxfrm.mak".
!MESSAGE
!MESSAGE You can specify a configuration when running NMAKE
!MESSAGE by defining the macro CFG on the command line. For example:
!MESSAGE
!MESSAGE NMAKE /f "ulxfrm.mak" CFG="ulxfrm - Win32 Debug"
!MESSAGE
!MESSAGE Possible choices for configuration are:
!MESSAGE
!MESSAGE "ulxfrm - Win32 Release" (based on "Win32 (x86) Console Application")
!MESSAGE "ulxfrm - Win32 Debug" (based on "Win32 (x86) Console Application")
!MESSAGE
# Begin Project
# PROP AllowPerConfigDependencies 0
# PROP Scc_ProjName ""
# PROP Scc_LocalPath ""
CPP=cl.exe
RSC=rc.exe
!IF "$(CFG)" == "ulxfrm - Win32 Release"
# PROP BASE Use_MFC 0
# PROP BASE Use_Debug_Libraries 0
# PROP BASE Output_Dir "Release"
# PROP BASE Intermediate_Dir "Release"
# PROP BASE Target_Dir ""
# PROP Use_MFC 0
# PROP Use_Debug_Libraries 0
# PROP Output_Dir "Release"
# PROP Intermediate_Dir "Release"
# PROP Ignore_Export_Lib 0
# PROP Target_Dir ""
# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
# ADD CPP /nologo /W3 /GX /O2 /I "..\..\..\include" /I "..\..\..\source\common" /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
# ADD BASE RSC /l 0x409 /d "NDEBUG"
# ADD RSC /l 0x409 /d "NDEBUG"
BSC32=bscmake.exe
# ADD BASE BSC32 /nologo
# ADD BSC32 /nologo
LINK32=link.exe
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386
# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386
!ELSEIF "$(CFG)" == "ulxfrm - Win32 Debug"
# PROP BASE Use_MFC 0
# PROP BASE Use_Debug_Libraries 1
# PROP BASE Output_Dir "Debug"
# PROP BASE Intermediate_Dir "Debug"
# PROP BASE Target_Dir ""
# PROP Use_MFC 0
# PROP Use_Debug_Libraries 1
# PROP Output_Dir "Debug"
# PROP Intermediate_Dir "Debug"
# PROP Ignore_Export_Lib 0
# PROP Target_Dir ""
# ADD BASE CPP /nologo /W3 /Gm /GX /Zi /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
# ADD CPP /nologo /W3 /Gm /GX /ZI /Od /I "..\..\..\include" /I "..\..\..\source\common" /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
# ADD BASE RSC /l 0x409 /d "_DEBUG"
# ADD RSC /l 0x409 /d "_DEBUG"
BSC32=bscmake.exe
# ADD BASE BSC32 /nologo
# ADD BSC32 /nologo
LINK32=link.exe
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept
# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept
!ENDIF
# Begin Target
# Name "ulxfrm - Win32 Release"
# Name "ulxfrm - Win32 Debug"
# Begin Source File
SOURCE=.\ucmp16.c
# End Source File
# Begin Source File
SOURCE=.\ucmp16.h
# End Source File
# Begin Source File
SOURCE=.\ulxfrm.cpp
# End Source File
# End Target
# End Project

View File

@ -1,29 +0,0 @@
Microsoft Developer Studio Workspace File, Format Version 6.00
# WARNING: DO NOT EDIT OR DELETE THIS WORKSPACE FILE!
###############################################################################
Project: "ulxfrm"=.\ulxfrm.dsp - Package Owner=<4>
Package=<5>
{{{
}}}
Package=<4>
{{{
}}}
###############################################################################
Global:
Package=<5>
{{{
}}}
Package=<3>
{{{
}}}
###############################################################################