ICU-740 Remove old/unused code according to Helena.
X-SVN-Rev: 5180
This commit is contained in:
parent
6c37bcf77d
commit
ce3bed4f40
@ -1,173 +0,0 @@
|
||||
/*
|
||||
*
|
||||
* Copyright (C) 1998-2001, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
* @version 1.0 06/19/98
|
||||
* @author Helena Shih
|
||||
* Based on Taligent international support for C++
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include "cmemory.h"
|
||||
#include "ucmp8.h"
|
||||
|
||||
#if U_IOSTREAM_SOURCE >= 199711
|
||||
#include <iostream>
|
||||
using namespace std;
|
||||
#elif U_IOSTREAM_SOURCE >= 198506
|
||||
#include <iostream.h>
|
||||
#endif
|
||||
|
||||
CompactByteArray* charDirArray = 0;
|
||||
|
||||
static const UChar LAST_CHAR_CODE_IN_FILE = 0xFFFD;
|
||||
const char charDirStrings[] = "L R EN ES ET AN CS B S WS ON LRELROAL RLERLOPDFNSMBN ";
|
||||
|
||||
int tagValues[] = {
|
||||
0, // kLeftToRight = 0,
|
||||
1, // kRightToLeft = 1,
|
||||
2, // kEuropeanNumber = 2,
|
||||
3, // kEuropeanNumberSeparator = 3,
|
||||
4, // kEuropeanNumberTerminator = 4,
|
||||
5, // kArabicNumber = 5,
|
||||
6, // kCommonNumberSeparator = 6,
|
||||
7, // kParagraphSeparator = 7,
|
||||
8, // kSegmentSeparator = 8,
|
||||
9, // kWhiteSpaceNeutral = 9,
|
||||
10, // kOtherNeutral = 10,
|
||||
11, // kLeftToRightEmbedding = 11,
|
||||
12, // kLeftToRightOverride = 12,
|
||||
13, // kRightToLeftArabic = 13,
|
||||
14, // kRightToLeftEmbedding = 14,
|
||||
15, // kRightToLeftOverride = 15,
|
||||
16, // kPopDirectionalFormat = 16,
|
||||
17, // kNonSpacingMark = 17,
|
||||
18 // kBoundaryNeutral = 18,
|
||||
|
||||
};
|
||||
|
||||
int MakeProp(char* str)
|
||||
{
|
||||
int result = 0;
|
||||
char* matchPosition;
|
||||
matchPosition = strstr(charDirStrings, str);
|
||||
if (matchPosition == 0)
|
||||
fprintf(stderr, "unrecognized type letter %s\n", str);
|
||||
else
|
||||
result = ((matchPosition - charDirStrings) / 3);
|
||||
return result;
|
||||
}
|
||||
|
||||
CompactByteArray*
|
||||
getArray(FILE *input)
|
||||
{
|
||||
if (charDirArray == 0) {
|
||||
char buffer[1000];
|
||||
char* bufferPtr;
|
||||
int set = FALSE;
|
||||
|
||||
try {
|
||||
charDirArray = ucmp8_open(0);
|
||||
int32_t unicode;
|
||||
char *next;
|
||||
char dir[4];
|
||||
int j;
|
||||
for (;;) {
|
||||
// Clear buffer first.
|
||||
bufferPtr = fgets(buffer, 999, input);
|
||||
if (bufferPtr == NULL)
|
||||
break;
|
||||
if (bufferPtr[0] == '#' || bufferPtr[0] == '\n' || bufferPtr[0] == 0)
|
||||
continue;
|
||||
sscanf(bufferPtr, "%X", &unicode);
|
||||
assert(0 <= unicode && unicode < 65536);
|
||||
for (int i = 0; i < 4; i++) {
|
||||
bufferPtr = strchr(bufferPtr, ';');
|
||||
assert(bufferPtr != NULL);
|
||||
bufferPtr++;
|
||||
}
|
||||
assert(bufferPtr != NULL);
|
||||
next = strchr(bufferPtr, ';');
|
||||
*next = 0;
|
||||
/* for (int j = 0; j < 3; j++) {
|
||||
if (bufferPtr+j!= next)
|
||||
dir[j] = bufferPtr[j];
|
||||
else
|
||||
dir[j] = ' ';
|
||||
}*/
|
||||
for(j=0; bufferPtr+j != next; j++)
|
||||
dir[j] = bufferPtr[j];
|
||||
while(j<3)
|
||||
{
|
||||
dir[j] = ' ';
|
||||
j++;
|
||||
}
|
||||
dir[3] = 0;
|
||||
ucmp8_set(charDirArray, (UChar)unicode, (int8_t)tagValues[MakeProp(dir)]);
|
||||
}
|
||||
|
||||
if (input)
|
||||
fclose(input);
|
||||
ucmp8_compact(charDirArray, 1);
|
||||
}
|
||||
catch (...) {
|
||||
fprintf(stderr, "Error Occured while parsing unicode data file.\n");
|
||||
}
|
||||
}
|
||||
return charDirArray;
|
||||
}
|
||||
|
||||
void
|
||||
writeArrays()
|
||||
{
|
||||
const int8_t* values = ucmp8_getArray(charDirArray);
|
||||
const uint16_t* indexes = ucmp8_getIndex(charDirArray);
|
||||
int32_t i;
|
||||
int32_t cnt = ucmp8_getCount(charDirArray);
|
||||
cout << "\nconst t_uint32 Unicode::fCharDirIndices[] = {\n ";
|
||||
for (i = 0; i < ucmp8_getkIndexCount()-1; i++)
|
||||
{
|
||||
cout << "(uint16_t)" << ((indexes[i] >= 0) ? (int)indexes[i] : (int)(indexes[i]+ucmp8_getkUnicodeCount()))
|
||||
<< ", ";
|
||||
if (i != 0)
|
||||
if (i % 3 == 0)
|
||||
cout << "\n ";
|
||||
}
|
||||
cout << " (uint16_t)" << ((indexes[ucmp8_getkIndexCount()-1] >= 0) ? (int)indexes[i] : (int)(indexes[i]+ucmp8_getkUnicodeCount()))
|
||||
<< " };\n";
|
||||
cout << "\nconst int8_t Unicode::fCharDirValues[] = {\n ";
|
||||
for (i = 0; i < cnt-1; i++)
|
||||
{
|
||||
cout << "(int8_t)" << (int)values[i] << ", ";
|
||||
if (i != 0)
|
||||
if (i % 5 == 0)
|
||||
cout << "\n ";
|
||||
}
|
||||
cout << " (int8_t)" << (int)values[cnt-1] << " }\n";
|
||||
cout << "const int32_t Unicode::fCharDirCount = " << cnt << ";\n";
|
||||
}
|
||||
/**
|
||||
* The main function builds the CharType data array and prints it to System.out
|
||||
*/
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
CompactByteArray* arrays = 0;
|
||||
FILE *input = 0;
|
||||
if (argc != 2) {
|
||||
printf("Usage : chartype filename\n\n");
|
||||
exit(1);
|
||||
}
|
||||
input = fopen(argv[1], "r");
|
||||
if (input == 0) {
|
||||
printf("Cannot open the input file: %s\n\n", argv[1]);
|
||||
exit(1);
|
||||
}
|
||||
arrays = getArray(input);
|
||||
writeArrays();
|
||||
return 0;
|
||||
}
|
||||
|
@ -1,98 +0,0 @@
|
||||
# Microsoft Developer Studio Project File - Name="chardir" - Package Owner=<4>
|
||||
# Microsoft Developer Studio Generated Build File, Format Version 6.00
|
||||
# ** DO NOT EDIT **
|
||||
|
||||
# TARGTYPE "Win32 (x86) Console Application" 0x0103
|
||||
|
||||
CFG=chardir - Win32 Debug
|
||||
!MESSAGE This is not a valid makefile. To build this project using NMAKE,
|
||||
!MESSAGE use the Export Makefile command and run
|
||||
!MESSAGE
|
||||
!MESSAGE NMAKE /f "chardir.mak".
|
||||
!MESSAGE
|
||||
!MESSAGE You can specify a configuration when running NMAKE
|
||||
!MESSAGE by defining the macro CFG on the command line. For example:
|
||||
!MESSAGE
|
||||
!MESSAGE NMAKE /f "chardir.mak" CFG="chardir - Win32 Debug"
|
||||
!MESSAGE
|
||||
!MESSAGE Possible choices for configuration are:
|
||||
!MESSAGE
|
||||
!MESSAGE "chardir - Win32 Release" (based on "Win32 (x86) Console Application")
|
||||
!MESSAGE "chardir - Win32 Debug" (based on "Win32 (x86) Console Application")
|
||||
!MESSAGE
|
||||
|
||||
# Begin Project
|
||||
# PROP AllowPerConfigDependencies 0
|
||||
# PROP Scc_ProjName ""
|
||||
# PROP Scc_LocalPath ""
|
||||
CPP=cl.exe
|
||||
RSC=rc.exe
|
||||
|
||||
!IF "$(CFG)" == "chardir - Win32 Release"
|
||||
|
||||
# PROP BASE Use_MFC 0
|
||||
# PROP BASE Use_Debug_Libraries 0
|
||||
# PROP BASE Output_Dir "Release"
|
||||
# PROP BASE Intermediate_Dir "Release"
|
||||
# PROP BASE Target_Dir ""
|
||||
# PROP Use_MFC 0
|
||||
# PROP Use_Debug_Libraries 0
|
||||
# PROP Output_Dir "Release"
|
||||
# PROP Intermediate_Dir "Release"
|
||||
# PROP Ignore_Export_Lib 0
|
||||
# PROP Target_Dir ""
|
||||
# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
|
||||
# ADD CPP /nologo /W3 /GX /O2 /I "..\..\..\include" /I "..\..\..\source\common" /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
|
||||
# ADD BASE RSC /l 0x409 /d "NDEBUG"
|
||||
# ADD RSC /l 0x409 /d "NDEBUG"
|
||||
BSC32=bscmake.exe
|
||||
# ADD BASE BSC32 /nologo
|
||||
# ADD BSC32 /nologo
|
||||
LINK32=link.exe
|
||||
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386
|
||||
# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386
|
||||
|
||||
!ELSEIF "$(CFG)" == "chardir - Win32 Debug"
|
||||
|
||||
# PROP BASE Use_MFC 0
|
||||
# PROP BASE Use_Debug_Libraries 1
|
||||
# PROP BASE Output_Dir "Debug"
|
||||
# PROP BASE Intermediate_Dir "Debug"
|
||||
# PROP BASE Target_Dir ""
|
||||
# PROP Use_MFC 0
|
||||
# PROP Use_Debug_Libraries 1
|
||||
# PROP Output_Dir "Debug"
|
||||
# PROP Intermediate_Dir "Debug"
|
||||
# PROP Ignore_Export_Lib 0
|
||||
# PROP Target_Dir ""
|
||||
# ADD BASE CPP /nologo /W3 /Gm /GX /Zi /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
|
||||
# ADD CPP /nologo /W3 /Gm /GX /ZI /Od /I "..\..\..\include" /I "..\..\..\source\common" /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
|
||||
# ADD BASE RSC /l 0x409 /d "_DEBUG"
|
||||
# ADD RSC /l 0x409 /d "_DEBUG"
|
||||
BSC32=bscmake.exe
|
||||
# ADD BASE BSC32 /nologo
|
||||
# ADD BSC32 /nologo
|
||||
LINK32=link.exe
|
||||
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept
|
||||
# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept
|
||||
|
||||
!ENDIF
|
||||
|
||||
# Begin Target
|
||||
|
||||
# Name "chardir - Win32 Release"
|
||||
# Name "chardir - Win32 Debug"
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\chardir.cpp
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\ucmp8.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\ucmp8.h
|
||||
# End Source File
|
||||
# End Target
|
||||
# End Project
|
@ -1,29 +0,0 @@
|
||||
Microsoft Developer Studio Workspace File, Format Version 6.00
|
||||
# WARNING: DO NOT EDIT OR DELETE THIS WORKSPACE FILE!
|
||||
|
||||
###############################################################################
|
||||
|
||||
Project: "chardir"=.\chardir.dsp - Package Owner=<4>
|
||||
|
||||
Package=<5>
|
||||
{{{
|
||||
}}}
|
||||
|
||||
Package=<4>
|
||||
{{{
|
||||
}}}
|
||||
|
||||
###############################################################################
|
||||
|
||||
Global:
|
||||
|
||||
Package=<5>
|
||||
{{{
|
||||
}}}
|
||||
|
||||
Package=<3>
|
||||
{{{
|
||||
}}}
|
||||
|
||||
###############################################################################
|
||||
|
@ -1,386 +0,0 @@
|
||||
/*
|
||||
********************************************************************
|
||||
*
|
||||
* Copyright (C) 1997-2000, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
********************************************************************
|
||||
*/
|
||||
|
||||
#ifndef _STDLIB_H
|
||||
#include <stdlib.h>
|
||||
#endif
|
||||
|
||||
#ifndef _STDIO_H
|
||||
#include <stdio.h>
|
||||
#endif
|
||||
|
||||
|
||||
#include "ucmp8.h"
|
||||
#include "cmemory.h"
|
||||
|
||||
static int32_t findOverlappingPosition(CompactByteArray* this,
|
||||
uint32_t start,
|
||||
const UChar *tempIndex,
|
||||
int32_t tempIndexCount,
|
||||
uint32_t cycle);
|
||||
|
||||
/* internal constants*/
|
||||
|
||||
#define kUnicodeCount_int 65536
|
||||
#define kBlockShift_int 7
|
||||
#define kBlockCount_int (1<<kBlockShift_int)
|
||||
#define kIndexShift_int (16-kBlockShift_int)
|
||||
#define kIndexCount_int (1<<kIndexShift_int)
|
||||
#define kBlockMask_int (kBlockCount_int-1)
|
||||
|
||||
const int32_t UCMP8_kUnicodeCount = kUnicodeCount_int;
|
||||
const int32_t UCMP8_kBlockShift = kBlockShift_int;
|
||||
const int32_t UCMP8_kBlockCount = kBlockCount_int;
|
||||
const int32_t UCMP8_kIndexShift = kIndexShift_int;
|
||||
const int32_t UCMP8_kIndexCount = kIndexCount_int;
|
||||
const uint32_t UCMP8_kBlockMask = kBlockMask_int;
|
||||
|
||||
|
||||
int32_t ucmp8_getkUnicodeCount() { return UCMP8_kUnicodeCount;}
|
||||
int32_t ucmp8_getkBlockCount() { return UCMP8_kBlockCount;}
|
||||
int32_t ucmp8_getkIndexCount(){ return UCMP8_kIndexCount;}
|
||||
/* debug flags*/
|
||||
/*=======================================================*/
|
||||
U_CAPI int8_t ucmp8_get(CompactByteArray* array, uint16_t index)
|
||||
{
|
||||
return (array->fArray[(array->fIndex[index >> UCMP8_kBlockShift] & 0xFFFF) + (index & UCMP8_kBlockMask)]);
|
||||
}
|
||||
U_CAPI uint8_t ucmp8_getu(CompactByteArray* array, uint16_t index)
|
||||
{
|
||||
return (uint8_t)ucmp8_get(array,index);
|
||||
}
|
||||
|
||||
CompactByteArray* ucmp8_open(int8_t defaultValue)
|
||||
{
|
||||
/* set up the index array and the data array.
|
||||
* the index array always points into particular parts of the data array
|
||||
* it is initially set up to point at regular block boundaries
|
||||
* The following example uses blocks of 4 for simplicity
|
||||
* Example: Expanded
|
||||
* INDEX# 0 1 2 3 4
|
||||
* INDEX 0 4 8 12 16 ...
|
||||
* ARRAY abcdeababcedzyabcdea...
|
||||
* | | | | | |...
|
||||
* whenever you set an element in the array, it unpacks to this state
|
||||
* After compression, the index will point to various places in the data array
|
||||
* wherever there is a runs of the same elements as in the original
|
||||
* Example: Compressed
|
||||
* INDEX# 0 1 2 3 4
|
||||
* INDEX 0 4 1 8 2 ...
|
||||
* ARRAY abcdeabazyabc...
|
||||
* If you look at the example, index# 2 in the expanded version points
|
||||
* to data position number 8, which has elements "bced". In the compressed
|
||||
* version, index# 2 points to data position 1, which also has "bced"
|
||||
*/
|
||||
CompactByteArray* this = (CompactByteArray*) uprv_malloc(sizeof(CompactByteArray));
|
||||
int32_t i;
|
||||
|
||||
if (this == NULL) return NULL;
|
||||
|
||||
this->fArray = NULL;
|
||||
this->fIndex = NULL;
|
||||
this->fCount = UCMP8_kUnicodeCount;
|
||||
this->fCompact = FALSE;
|
||||
this->fBogus = FALSE;
|
||||
|
||||
|
||||
this->fArray = (int8_t*) uprv_malloc(sizeof(int8_t) * UCMP8_kUnicodeCount);
|
||||
if (!this->fArray)
|
||||
{
|
||||
this->fBogus = TRUE;
|
||||
return NULL;
|
||||
}
|
||||
this->fIndex = (uint16_t*) uprv_malloc(sizeof(uint16_t) * UCMP8_kIndexCount);
|
||||
if (!this->fIndex)
|
||||
{
|
||||
uprv_free(this->fArray);
|
||||
this->fArray = NULL;
|
||||
this->fBogus = TRUE;
|
||||
return NULL;
|
||||
}
|
||||
for (i = 0; i < UCMP8_kUnicodeCount; ++i)
|
||||
{
|
||||
this->fArray[i] = defaultValue;
|
||||
}
|
||||
for (i = 0; i < UCMP8_kIndexCount; ++i)
|
||||
{
|
||||
this->fIndex[i] = (uint16_t)(i << UCMP8_kBlockShift);
|
||||
}
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
CompactByteArray* ucmp8_openAdopt(uint16_t *indexArray,
|
||||
int8_t *newValues,
|
||||
int32_t count)
|
||||
{
|
||||
CompactByteArray* this = (CompactByteArray*) uprv_malloc(sizeof(CompactByteArray));
|
||||
if (!this) return NULL;
|
||||
|
||||
this->fArray = NULL;
|
||||
this->fIndex = NULL;
|
||||
this->fCount = count;
|
||||
this->fBogus = FALSE;
|
||||
|
||||
this->fArray = newValues;
|
||||
this->fIndex = indexArray;
|
||||
this->fCompact = (count < UCMP8_kUnicodeCount) ? TRUE : FALSE;
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
/*=======================================================*/
|
||||
|
||||
void ucmp8_close(CompactByteArray* this)
|
||||
{
|
||||
uprv_free(this->fArray);
|
||||
this->fArray = NULL;
|
||||
uprv_free(this->fIndex);
|
||||
this->fIndex = NULL;
|
||||
this->fCount = 0;
|
||||
this->fCompact = FALSE;
|
||||
uprv_free(this);
|
||||
}
|
||||
|
||||
|
||||
/*=======================================================*/
|
||||
|
||||
void ucmp8_expand(CompactByteArray* this)
|
||||
{
|
||||
/* can optimize later.
|
||||
* if we have to expand, then walk through the blocks instead of using Get
|
||||
* this code unpacks the array by copying the blocks to the normalized position.
|
||||
* Example: Compressed
|
||||
* INDEX# 0 1 2 3 4
|
||||
* INDEX 0 4 1 8 2 ...
|
||||
* ARRAY abcdeabazyabc...
|
||||
* turns into
|
||||
* Example: Expanded
|
||||
* INDEX# 0 1 2 3 4
|
||||
* INDEX 0 4 8 12 16 ...
|
||||
* ARRAY abcdeababcedzyabcdea...
|
||||
*/
|
||||
int32_t i;
|
||||
if (this->fCompact)
|
||||
{
|
||||
int8_t* tempArray;
|
||||
tempArray = (int8_t*) uprv_malloc(sizeof(int8_t) * UCMP8_kUnicodeCount);
|
||||
if (!tempArray)
|
||||
{
|
||||
this->fBogus = TRUE;
|
||||
return;
|
||||
}
|
||||
for (i = 0; i < UCMP8_kUnicodeCount; ++i)
|
||||
{
|
||||
tempArray[i] = ucmp8_get(this,(UChar)i); /* HSYS : How expand?*/
|
||||
}
|
||||
for (i = 0; i < UCMP8_kIndexCount; ++i)
|
||||
{
|
||||
this->fIndex[i] = (uint16_t)(i<< UCMP8_kBlockShift);
|
||||
}
|
||||
uprv_free(this->fArray);
|
||||
this->fArray = tempArray;
|
||||
this->fCompact = FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*=======================================================*/
|
||||
/* this->fArray: an array to be overlapped
|
||||
* start and count: specify the block to be overlapped
|
||||
* tempIndex: the overlapped array (actually indices back into inputContents)
|
||||
* inputHash: an index of hashes for tempIndex, where
|
||||
* inputHash[i] = XOR of values from i-count+1 to i
|
||||
*/
|
||||
int32_t
|
||||
findOverlappingPosition(CompactByteArray* this,
|
||||
uint32_t start,
|
||||
const UChar* tempIndex,
|
||||
int32_t tempIndexCount,
|
||||
uint32_t cycle)
|
||||
{
|
||||
/* this is a utility routine for finding blocks that overlap.
|
||||
* IMPORTANT: the cycle number is very important. Small cycles take a lot
|
||||
* longer to work. In some cases, they may be able to get better compaction.
|
||||
*/
|
||||
|
||||
int32_t i;
|
||||
int32_t j;
|
||||
int32_t currentCount;
|
||||
|
||||
for (i = 0; i < tempIndexCount; i += cycle)
|
||||
{
|
||||
currentCount = UCMP8_kBlockCount;
|
||||
if (i + UCMP8_kBlockCount > tempIndexCount)
|
||||
{
|
||||
currentCount = tempIndexCount - i;
|
||||
}
|
||||
for (j = 0; j < currentCount; ++j)
|
||||
{
|
||||
if (this->fArray[start + j] != this->fArray[tempIndex[i + j]]) break;
|
||||
}
|
||||
if (j == currentCount) break;
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
UBool
|
||||
ucmp8_isBogus(const CompactByteArray* this)
|
||||
{
|
||||
return this->fBogus;
|
||||
}
|
||||
|
||||
const int8_t*
|
||||
ucmp8_getArray(const CompactByteArray* this)
|
||||
{
|
||||
return this->fArray;
|
||||
}
|
||||
|
||||
const uint16_t*
|
||||
ucmp8_getIndex(const CompactByteArray* this)
|
||||
{
|
||||
return this->fIndex;
|
||||
}
|
||||
|
||||
int32_t
|
||||
ucmp8_getCount(const CompactByteArray* this)
|
||||
{
|
||||
return this->fCount;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
ucmp8_set(CompactByteArray* this,
|
||||
UChar c,
|
||||
int8_t value)
|
||||
{
|
||||
if (this->fCompact == TRUE)
|
||||
{
|
||||
ucmp8_expand(this);
|
||||
if (this->fBogus) return;
|
||||
}
|
||||
this->fArray[(int32_t)c] = value;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
ucmp8_setRange(CompactByteArray* this,
|
||||
UChar start,
|
||||
UChar end,
|
||||
int8_t value)
|
||||
{
|
||||
int32_t i;
|
||||
if (this->fCompact == TRUE)
|
||||
{
|
||||
ucmp8_expand(this);
|
||||
if (this->fBogus) return;
|
||||
}
|
||||
for (i = start; i <= end; ++i)
|
||||
{
|
||||
this->fArray[i] = value;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*=======================================================*/
|
||||
|
||||
void
|
||||
ucmp8_compact(CompactByteArray* this,
|
||||
uint32_t cycle)
|
||||
{
|
||||
if (!this->fCompact)
|
||||
{
|
||||
/* this actually does the compaction.
|
||||
* it walks throught the contents of the expanded array, finding the
|
||||
* first block in the data that matches the contents of the current index.
|
||||
* As it works, it keeps an updated pointer to the last position,
|
||||
* so that it knows how big to make the final array
|
||||
* If the matching succeeds, then the index will point into the data
|
||||
* at some earlier position.
|
||||
* If the matching fails, then last position pointer will be bumped,
|
||||
* and the index will point to that last block of data.
|
||||
*/
|
||||
UChar* tempIndex;
|
||||
int32_t tempIndexCount;
|
||||
int8_t* tempArray;
|
||||
int32_t iBlock, iIndex;
|
||||
|
||||
/* fix cycle, must be 0 < cycle <= blockcount*/
|
||||
if (cycle < 0) cycle = 1;
|
||||
else if (cycle > (uint32_t)UCMP8_kBlockCount) cycle = UCMP8_kBlockCount;
|
||||
|
||||
/* make temp storage, larger than we need*/
|
||||
tempIndex = (UChar*) uprv_malloc(sizeof(UChar)* UCMP8_kUnicodeCount);
|
||||
if (!tempIndex)
|
||||
{
|
||||
this->fBogus = TRUE;
|
||||
return;
|
||||
}
|
||||
/* set up first block.*/
|
||||
tempIndexCount = UCMP8_kBlockCount;
|
||||
for (iIndex = 0; iIndex < UCMP8_kBlockCount; ++iIndex)
|
||||
{
|
||||
tempIndex[iIndex] = (uint16_t)iIndex;
|
||||
}; /* endfor (iIndex = 0; .....)*/
|
||||
this->fIndex[0] = 0;
|
||||
|
||||
/* for each successive block, find out its first position in the compacted array*/
|
||||
for (iBlock = 1; iBlock < UCMP8_kIndexCount; ++iBlock)
|
||||
{
|
||||
int32_t newCount, firstPosition, block;
|
||||
block = iBlock << UCMP8_kBlockShift;
|
||||
/* if (debugSmall) if (block > debugSmallLimit) break;*/
|
||||
firstPosition = findOverlappingPosition(this,
|
||||
block,
|
||||
tempIndex,
|
||||
tempIndexCount,
|
||||
cycle);
|
||||
|
||||
/* if not contained in the current list, copy the remainder
|
||||
* invariant; cumulativeHash[iBlock] = XOR of values from iBlock-kBlockCount+1 to iBlock
|
||||
* we do this by XORing out cumulativeHash[iBlock-kBlockCount]
|
||||
*/
|
||||
newCount = firstPosition + UCMP8_kBlockCount;
|
||||
if (newCount > tempIndexCount)
|
||||
{
|
||||
for (iIndex = tempIndexCount; iIndex < newCount; ++iIndex)
|
||||
{
|
||||
tempIndex[iIndex] = (uint16_t)(iIndex - firstPosition + block);
|
||||
} /* endfor (iIndex = tempIndexCount....)*/
|
||||
tempIndexCount = newCount;
|
||||
} /* endif (newCount > tempIndexCount)*/
|
||||
this->fIndex[iBlock] = (uint16_t)firstPosition;
|
||||
} /* endfor (iBlock = 1.....)*/
|
||||
|
||||
/* now allocate and copy the items into the array*/
|
||||
tempArray = (int8_t*) uprv_malloc(tempIndexCount * sizeof(int8_t));
|
||||
if (!tempArray)
|
||||
{
|
||||
this->fBogus = TRUE;
|
||||
uprv_free(tempIndex);
|
||||
return;
|
||||
}
|
||||
for (iIndex = 0; iIndex < tempIndexCount; ++iIndex)
|
||||
{
|
||||
tempArray[iIndex] = this->fArray[tempIndex[iIndex]];
|
||||
}
|
||||
uprv_free(this->fArray);
|
||||
this->fArray = tempArray;
|
||||
this->fCount = tempIndexCount;
|
||||
|
||||
|
||||
/* free up temp storage*/
|
||||
uprv_free(tempIndex);
|
||||
this->fCompact = TRUE;
|
||||
} /* endif (!this->fCompact)*/
|
||||
}
|
||||
|
||||
|
||||
|
@ -1,86 +0,0 @@
|
||||
/*
|
||||
********************************************************************
|
||||
*
|
||||
* Copyright (C) 1996-2000, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
********************************************************************
|
||||
*/
|
||||
|
||||
|
||||
|
||||
#ifndef UCMP8_H
|
||||
#define UCMP8_H
|
||||
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
/*====================================*/
|
||||
/* class CompactByteArray
|
||||
* Provides a compact way to store information that is indexed by Unicode values,
|
||||
* such as character properties, types, keyboard values, etc.
|
||||
* The ATypes are used by value, so should be small, integers or pointers.
|
||||
*====================================
|
||||
*/
|
||||
|
||||
U_CAPI const int32_t UCMP8_kUnicodeCount;
|
||||
U_CAPI const int32_t UCMP8_kBlockShift;
|
||||
U_CAPI const int32_t UCMP8_kBlockCount;
|
||||
U_CAPI const int32_t UCMP8_kIndexShift;
|
||||
U_CAPI const int32_t UCMP8_kIndexCount;
|
||||
U_CAPI const uint32_t UCMP8_kBlockMask;
|
||||
|
||||
U_CAPI int32_t ucmp8_getkUnicodeCount(void);
|
||||
U_CAPI int32_t ucmp8_getkBlockCount(void);
|
||||
U_CAPI int32_t ucmp8_getkIndexCount(void);
|
||||
typedef struct{
|
||||
int8_t* fArray;
|
||||
uint16_t* fIndex;
|
||||
int32_t fCount;
|
||||
UBool fCompact;
|
||||
UBool fBogus;
|
||||
} CompactByteArray;
|
||||
|
||||
U_CAPI CompactByteArray* ucmp8_open(int8_t defaultValue);
|
||||
U_CAPI CompactByteArray* ucmp8_openAdopt(uint16_t* indexArray,
|
||||
int8_t* newValues,
|
||||
int32_t count);
|
||||
U_CAPI void ucmp8_close(CompactByteArray* array);
|
||||
U_CAPI UBool isBogus(const CompactByteArray* array);
|
||||
|
||||
|
||||
U_CAPI int8_t ucmp8_get(CompactByteArray* array, uint16_t index);
|
||||
U_CAPI uint8_t ucmp8_getu(CompactByteArray* array, uint16_t index);
|
||||
|
||||
U_CAPI void ucmp8_set(CompactByteArray* array,
|
||||
UChar index,
|
||||
int8_t value);
|
||||
|
||||
U_CAPI void ucmp8_setRange(CompactByteArray* array,
|
||||
UChar start,
|
||||
UChar end,
|
||||
int8_t value);
|
||||
|
||||
U_CAPI int32_t ucmp8_getCount(const CompactByteArray* array);
|
||||
U_CAPI const int8_t* ucmp8_getArray(const CompactByteArray* array);
|
||||
U_CAPI const uint16_t* ucmp8_getIndex(const CompactByteArray* array);
|
||||
|
||||
/* Compact the array.
|
||||
The value of cycle determines how large the overlap can be.
|
||||
A cycle of 1 is the most compacted, but takes the most time to do.
|
||||
If values stored in the array tend to repeat in cycles of, say, 16,
|
||||
then using that will be faster than cycle = 1, and get almost the
|
||||
same compression.
|
||||
*/
|
||||
U_CAPI void ucmp8_compact(CompactByteArray* array,
|
||||
uint32_t cycle);
|
||||
|
||||
/* Expanded takes the array back to a 65536 element array*/
|
||||
U_CAPI void ucmp8_expand(CompactByteArray* array);
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
@ -1,169 +0,0 @@
|
||||
/*
|
||||
*
|
||||
* Copyright (C) 1996-2001, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
* @version 1.0 12/12/96
|
||||
* @author Helena Shih
|
||||
* Based on Taligent international support for C++
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
|
||||
#ifndef _CHARTBLD
|
||||
#include "chartbld.h"
|
||||
#endif
|
||||
|
||||
#if U_IOSTREAM_SOURCE >= 199711
|
||||
#include <iostream>
|
||||
using namespace std;
|
||||
#elif U_IOSTREAM_SOURCE >= 198506
|
||||
#include <iostream.h>
|
||||
#endif
|
||||
|
||||
const char CharTypeBuilder::tagStrings[] = "MnMcMeNdNlNoZsZlZpCcCfCsCoCnLuLlLtLmLoPcPdPsPePoSmScSkSoPiPf";
|
||||
const int16_t CharTypeBuilder::tagValues[] =
|
||||
{
|
||||
/* Mn */ (int16_t)NON_SPACING_MARK,
|
||||
/* Mc */ (int16_t)COMBINING_SPACING_MARK,
|
||||
/* Me */ (int16_t)ENCLOSING_MARK,
|
||||
/* Nd */ (int16_t)DECIMAL_DIGIT_NUMBER,
|
||||
/* Nl */ (int16_t)LETTER_NUMBER,
|
||||
/* No */ (int16_t)OTHER_NUMBER,
|
||||
/* Zs */ (int16_t)SPACE_SEPARATOR,
|
||||
/* Zl */ (int16_t)LINE_SEPARATOR,
|
||||
/* Zp */ (int16_t)PARAGRAPH_SEPARATOR,
|
||||
/* Cc */ (int16_t)CONTROL,
|
||||
/* Cf */ (int16_t)FORMAT,
|
||||
/* Cs */ (int16_t)SURROGATE,
|
||||
/* Co */ (int16_t)PRIVATE_USE,
|
||||
/* Cn */ (int16_t)UNASSIGNED,
|
||||
/* Lu */ (int16_t)UPPERCASE_LETTER,
|
||||
/* Ll */ (int16_t)LOWERCASE_LETTER,
|
||||
/* Lt */ (int16_t)TITLECASE_LETTER,
|
||||
/* Lm */ (int16_t)MODIFIER_LETTER,
|
||||
/* Lo */ (int16_t)OTHER_LETTER,
|
||||
/* Pc */ (int16_t)CONNECTOR_PUNCTUATION,
|
||||
/* Pd */ (int16_t)DASH_PUNCTUATION,
|
||||
/* Ps */ (int16_t)START_PUNCTUATION,
|
||||
/* Pe */ (int16_t)END_PUNCTUATION,
|
||||
/* Po */ (int16_t)OTHER_PUNCTUATION,
|
||||
/* Sm */ (int16_t)MATH_SYMBOL,
|
||||
/* Sc */ (int16_t)CURRENCY_SYMBOL,
|
||||
/* Sk */ (int16_t)MODIFIER_SYMBOL,
|
||||
/* So */ (int16_t)OTHER_SYMBOL,
|
||||
/* Pi */ (int16_t)INITIAL_PUNCTUATION,
|
||||
/* Pf */ (int16_t)FINAL_PUNCTUATION
|
||||
};
|
||||
|
||||
const UChar CharTypeBuilder:: LAST_CHAR_CODE_IN_FILE = 0xFFFD;
|
||||
|
||||
CompactByteArray* CharTypeBuilder::charTypeArray = 0;
|
||||
int
|
||||
CharTypeBuilder::MakeProp(char* str)
|
||||
{
|
||||
int result = 0;
|
||||
char* matchPosition;
|
||||
|
||||
matchPosition = strstr(tagStrings, str);
|
||||
if (matchPosition == 0) fprintf(stderr, "unrecognized type letter %s\n", str);
|
||||
else result = ((matchPosition - tagStrings) / 2);
|
||||
return result;
|
||||
}
|
||||
|
||||
CompactByteArray*
|
||||
CharTypeBuilder::getByteArray(FILE* input)
|
||||
{
|
||||
if (charTypeArray == 0) {
|
||||
char buffer[1000];
|
||||
char* bufferPtr;
|
||||
|
||||
try {
|
||||
charTypeArray = ucmp8_open((int8_t)CharTypeBuilder::UNASSIGNED);
|
||||
int32_t unicode;
|
||||
while (TRUE) {
|
||||
bufferPtr = fgets(buffer, 999, input);
|
||||
if (bufferPtr == NULL) break;
|
||||
if (bufferPtr[0] == '#' || bufferPtr[0] == '\n' || bufferPtr[0] == 0) continue;
|
||||
sscanf(bufferPtr, "%X", &unicode);
|
||||
assert(0 <= unicode && unicode < 65536);
|
||||
bufferPtr = strchr(bufferPtr, ';');
|
||||
assert(bufferPtr != NULL);
|
||||
bufferPtr = strchr(bufferPtr + 1, ';'); // go to start of third field
|
||||
assert(bufferPtr != NULL);
|
||||
bufferPtr++;
|
||||
bufferPtr[2] = 0;
|
||||
ucmp8_set(charTypeArray, (UChar)unicode, (int8_t)tagValues[MakeProp(bufferPtr)]);
|
||||
if (unicode == LAST_CHAR_CODE_IN_FILE)
|
||||
break;
|
||||
}
|
||||
/* Check the database to see if this needs to be updated!!! */
|
||||
ucmp8_setRange(charTypeArray, 0x3401, 0x4db4, ucmp8_get(charTypeArray, 0x3400));
|
||||
ucmp8_setRange(charTypeArray, 0x4e01, 0x9fa4, ucmp8_get(charTypeArray, 0x4e00));
|
||||
ucmp8_setRange(charTypeArray, 0xac01, 0xd7a2, ucmp8_get(charTypeArray, 0xac00));
|
||||
ucmp8_setRange(charTypeArray, 0xd801, 0xdb7e, ucmp8_get(charTypeArray, 0xd800));
|
||||
ucmp8_setRange(charTypeArray, 0xdb81, 0xdbfe, ucmp8_get(charTypeArray, 0xdb80));
|
||||
ucmp8_setRange(charTypeArray, 0xdc01, 0xdffe, ucmp8_get(charTypeArray, 0xdc00));
|
||||
ucmp8_setRange(charTypeArray, 0xe001, 0xf8fe, ucmp8_get(charTypeArray, 0xe000));
|
||||
|
||||
if (input) fclose(input);
|
||||
ucmp8_compact(charTypeArray, 1);
|
||||
}
|
||||
catch (...) {
|
||||
fprintf(stderr, "Error Occured while parsing unicode data file.\n");
|
||||
}
|
||||
}
|
||||
return charTypeArray;
|
||||
}
|
||||
|
||||
void
|
||||
CharTypeBuilder::writeByteArrays()
|
||||
{
|
||||
const int8_t* values = ucmp8_getArray(charTypeArray);
|
||||
const uint16_t* indexes = ucmp8_getIndex(charTypeArray);
|
||||
int32_t i;
|
||||
int32_t cnt = ucmp8_getCount(charTypeArray);
|
||||
cout << "\nconst unsigned short Unicode::indicies[] = {\n ";
|
||||
for (i = 0; i < ucmp8_getkIndexCount()-1; i++)
|
||||
{
|
||||
cout << "(uint16_t)" << ((indexes[i] >= 0) ? (int)indexes[i] : (int)(indexes[i]+ucmp8_getkUnicodeCount()))
|
||||
<< ", ";
|
||||
if (i != 0)
|
||||
if (i % 3 == 0)
|
||||
cout << "\n ";
|
||||
}
|
||||
cout << " (uint16_t)" << ((indexes[ucmp8_getkIndexCount()-1] >= 0) ? (int)indexes[i] : (int)(indexes[i]+ucmp8_getkUnicodeCount()))
|
||||
<< " };\n";
|
||||
cout << "\nconst char Unicode::values[] = {\n ";
|
||||
for (i = 0; i < cnt-1; i++)
|
||||
{
|
||||
cout << "(int8_t)" << (int)values[i] << ", ";
|
||||
if (i != 0)
|
||||
if (i % 5 == 0)
|
||||
cout << "\n ";
|
||||
}
|
||||
cout << " (int8_t)" << (int)values[cnt-1] << " }\n";
|
||||
cout << "const short Unicode::offsetCount = " << cnt << ";\n";
|
||||
}
|
||||
/**
|
||||
* The main function builds the CharType data array and prints it to System.out
|
||||
*/
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
if (argc != 2) {
|
||||
printf("Usage : chartype filename\n\n");
|
||||
exit(1);
|
||||
}
|
||||
FILE *input = fopen(argv[1], "r");
|
||||
if (input == 0) {
|
||||
printf("Cannot open the input file: %s\n\n", argv[1]);
|
||||
exit(1);
|
||||
}
|
||||
CompactByteArray* arrays = CharTypeBuilder::getByteArray(input);
|
||||
CharTypeBuilder::writeByteArrays();
|
||||
return 0;
|
||||
}
|
||||
|
@ -1,91 +0,0 @@
|
||||
/*
|
||||
*****************************************************************************************
|
||||
*
|
||||
* Copyright (C) 1994-2001, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
// FILE NAME : chartbld.h
|
||||
//
|
||||
// CREATED
|
||||
// Wednesday, December 11, 1996
|
||||
//
|
||||
// CREATED BY
|
||||
// Helena Shih
|
||||
//
|
||||
//
|
||||
//********************************************************************************************
|
||||
|
||||
|
||||
#ifndef _CHARTBLD
|
||||
#define _CHARTBLD
|
||||
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
|
||||
|
||||
#include "ucmp8.h"
|
||||
|
||||
|
||||
/**
|
||||
* CharTypeBuilder is an internal class that creates a CompactByteArray for use by
|
||||
* CharType. The array is constructed from a data file. The name is specified in
|
||||
* the hard coded constant INPUT_FILE_NAME. CharTypeBuilder is run as an application
|
||||
* and the output sent to System.out is then copied into the CharType.java source file.
|
||||
*/
|
||||
class CharTypeBuilder
|
||||
{
|
||||
public :
|
||||
|
||||
enum ECharTypeMapping {
|
||||
UNASSIGNED = 0,
|
||||
UPPERCASE_LETTER = 1,
|
||||
LOWERCASE_LETTER = 2,
|
||||
TITLECASE_LETTER = 3,
|
||||
MODIFIER_LETTER = 4,
|
||||
OTHER_LETTER = 5,
|
||||
NON_SPACING_MARK = 6,
|
||||
ENCLOSING_MARK = 7,
|
||||
COMBINING_SPACING_MARK = 8,
|
||||
DECIMAL_DIGIT_NUMBER = 9,
|
||||
LETTER_NUMBER = 10,
|
||||
OTHER_NUMBER = 11,
|
||||
SPACE_SEPARATOR = 12,
|
||||
LINE_SEPARATOR = 13,
|
||||
PARAGRAPH_SEPARATOR = 14,
|
||||
CONTROL = 15,
|
||||
FORMAT = 16,
|
||||
PRIVATE_USE = 17,
|
||||
SURROGATE = 18,
|
||||
DASH_PUNCTUATION = 19,
|
||||
START_PUNCTUATION = 20,
|
||||
END_PUNCTUATION = 21,
|
||||
CONNECTOR_PUNCTUATION = 22,
|
||||
OTHER_PUNCTUATION = 23,
|
||||
MATH_SYMBOL = 24,
|
||||
CURRENCY_SYMBOL = 25,
|
||||
MODIFIER_SYMBOL = 26,
|
||||
OTHER_SYMBOL = 27,
|
||||
INITIAL_PUNCTUATION = 28,
|
||||
FINAL_PUNCTUATION = 29
|
||||
};
|
||||
|
||||
static CompactByteArray* getByteArray(FILE*);
|
||||
static void writeByteArrays(void);
|
||||
|
||||
private :
|
||||
static int MakeProp(char* str);
|
||||
|
||||
static const char tagStrings[];
|
||||
static const short tagValues[];
|
||||
|
||||
//LAST_CHAR_CODE_IN_FILE is taken from the data file itself. If the
|
||||
// data file changes, this value may need to be changed also.
|
||||
// After this value is read, the program exits.
|
||||
static const UChar LAST_CHAR_CODE_IN_FILE;
|
||||
|
||||
static CompactByteArray *charTypeArray;
|
||||
};
|
||||
#endif
|
@ -1,114 +0,0 @@
|
||||
# Microsoft Developer Studio Project File - Name="chartype" - Package Owner=<4>
|
||||
# Microsoft Developer Studio Generated Build File, Format Version 6.00
|
||||
# ** DO NOT EDIT **
|
||||
|
||||
# TARGTYPE "Win32 (x86) Console Application" 0x0103
|
||||
|
||||
CFG=chartype - Win32 Release
|
||||
!MESSAGE This is not a valid makefile. To build this project using NMAKE,
|
||||
!MESSAGE use the Export Makefile command and run
|
||||
!MESSAGE
|
||||
!MESSAGE NMAKE /f "chartype.mak".
|
||||
!MESSAGE
|
||||
!MESSAGE You can specify a configuration when running NMAKE
|
||||
!MESSAGE by defining the macro CFG on the command line. For example:
|
||||
!MESSAGE
|
||||
!MESSAGE NMAKE /f "chartype.mak" CFG="chartype - Win32 Release"
|
||||
!MESSAGE
|
||||
!MESSAGE Possible choices for configuration are:
|
||||
!MESSAGE
|
||||
!MESSAGE "chartype - Win32 Release" (based on "Win32 (x86) Console Application")
|
||||
!MESSAGE "chartype - Win32 Debug" (based on "Win32 (x86) Console Application")
|
||||
!MESSAGE
|
||||
|
||||
# Begin Project
|
||||
# PROP AllowPerConfigDependencies 0
|
||||
# PROP Scc_ProjName ""
|
||||
# PROP Scc_LocalPath ""
|
||||
CPP=cl.exe
|
||||
RSC=rc.exe
|
||||
|
||||
!IF "$(CFG)" == "chartype - Win32 Release"
|
||||
|
||||
# PROP BASE Use_MFC 0
|
||||
# PROP BASE Use_Debug_Libraries 0
|
||||
# PROP BASE Output_Dir ".\Release"
|
||||
# PROP BASE Intermediate_Dir ".\Release"
|
||||
# PROP BASE Target_Dir ""
|
||||
# PROP Use_MFC 0
|
||||
# PROP Use_Debug_Libraries 0
|
||||
# PROP Output_Dir ".\Release"
|
||||
# PROP Intermediate_Dir ".\Release"
|
||||
# PROP Ignore_Export_Lib 0
|
||||
# PROP Target_Dir ""
|
||||
# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /YX /c
|
||||
# ADD CPP /nologo /W3 /GX /O2 /I "..\..\..\include" /I "..\..\..\source\common" /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /YX /FD /c
|
||||
# ADD BASE RSC /l 0x409 /d "NDEBUG"
|
||||
# ADD RSC /l 0x409 /d "NDEBUG"
|
||||
BSC32=bscmake.exe
|
||||
# ADD BASE BSC32 /nologo
|
||||
# ADD BSC32 /nologo
|
||||
LINK32=link.exe
|
||||
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386
|
||||
# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386
|
||||
|
||||
!ELSEIF "$(CFG)" == "chartype - Win32 Debug"
|
||||
|
||||
# PROP BASE Use_MFC 0
|
||||
# PROP BASE Use_Debug_Libraries 1
|
||||
# PROP BASE Output_Dir ".\Debug"
|
||||
# PROP BASE Intermediate_Dir ".\Debug"
|
||||
# PROP BASE Target_Dir ""
|
||||
# PROP Use_MFC 0
|
||||
# PROP Use_Debug_Libraries 1
|
||||
# PROP Output_Dir ".\Debug"
|
||||
# PROP Intermediate_Dir ".\Debug"
|
||||
# PROP Ignore_Export_Lib 0
|
||||
# PROP Target_Dir ""
|
||||
# ADD BASE CPP /nologo /W3 /Gm /GX /Zi /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /YX /c
|
||||
# ADD CPP /nologo /W3 /Gm /GX /ZI /Od /I "..\..\..\include" /I "..\..\..\source\common" /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /YX /FD /c
|
||||
# ADD BASE RSC /l 0x409 /d "_DEBUG"
|
||||
# ADD RSC /l 0x409 /d "_DEBUG"
|
||||
BSC32=bscmake.exe
|
||||
# ADD BASE BSC32 /nologo
|
||||
# ADD BSC32 /nologo
|
||||
LINK32=link.exe
|
||||
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386
|
||||
# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386
|
||||
|
||||
!ENDIF
|
||||
|
||||
# Begin Target
|
||||
|
||||
# Name "chartype - Win32 Release"
|
||||
# Name "chartype - Win32 Debug"
|
||||
# Begin Group "Source Files"
|
||||
|
||||
# PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;hpj;bat;for;f90"
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\chartbld.cpp
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\ucmp8.c
|
||||
# End Source File
|
||||
# End Group
|
||||
# Begin Group "Header Files"
|
||||
|
||||
# PROP Default_Filter "h;hpp;hxx;hm;inl;fi;fd"
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\chartbld.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\ucmp8.h
|
||||
# End Source File
|
||||
# End Group
|
||||
# Begin Group "Resource Files"
|
||||
|
||||
# PROP Default_Filter "ico;cur;bmp;dlg;rc2;rct;bin;cnt;rtf;gif;jpg;jpeg;jpe"
|
||||
# End Group
|
||||
# End Target
|
||||
# End Project
|
@ -1,29 +0,0 @@
|
||||
Microsoft Developer Studio Workspace File, Format Version 6.00
|
||||
# WARNING: DO NOT EDIT OR DELETE THIS WORKSPACE FILE!
|
||||
|
||||
###############################################################################
|
||||
|
||||
Project: "chartype"=.\chartype.dsp - Package Owner=<4>
|
||||
|
||||
Package=<5>
|
||||
{{{
|
||||
}}}
|
||||
|
||||
Package=<4>
|
||||
{{{
|
||||
}}}
|
||||
|
||||
###############################################################################
|
||||
|
||||
Global:
|
||||
|
||||
Package=<5>
|
||||
{{{
|
||||
}}}
|
||||
|
||||
Package=<3>
|
||||
{{{
|
||||
}}}
|
||||
|
||||
###############################################################################
|
||||
|
@ -1,387 +0,0 @@
|
||||
|
||||
/*
|
||||
********************************************************************
|
||||
*
|
||||
* Copyright (C) 1997-2000, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
********************************************************************
|
||||
*/
|
||||
|
||||
#ifndef _STDLIB_H
|
||||
#include <stdlib.h>
|
||||
#endif
|
||||
|
||||
#ifndef _STDIO_H
|
||||
#include <stdio.h>
|
||||
#endif
|
||||
|
||||
|
||||
#include "ucmp8.h"
|
||||
#include "cmemory.h"
|
||||
|
||||
static int32_t findOverlappingPosition(CompactByteArray* this,
|
||||
uint32_t start,
|
||||
const UChar *tempIndex,
|
||||
int32_t tempIndexCount,
|
||||
uint32_t cycle);
|
||||
|
||||
/* internal constants*/
|
||||
|
||||
#define kUnicodeCount_int 65536
|
||||
#define kBlockShift_int 7
|
||||
#define kBlockCount_int (1<<kBlockShift_int)
|
||||
#define kIndexShift_int (16-kBlockShift_int)
|
||||
#define kIndexCount_int (1<<kIndexShift_int)
|
||||
#define kBlockMask_int (kBlockCount_int-1)
|
||||
|
||||
const int32_t UCMP8_kUnicodeCount = kUnicodeCount_int;
|
||||
const int32_t UCMP8_kBlockShift = kBlockShift_int;
|
||||
const int32_t UCMP8_kBlockCount = kBlockCount_int;
|
||||
const int32_t UCMP8_kIndexShift = kIndexShift_int;
|
||||
const int32_t UCMP8_kIndexCount = kIndexCount_int;
|
||||
const uint32_t UCMP8_kBlockMask = kBlockMask_int;
|
||||
|
||||
|
||||
int32_t ucmp8_getkUnicodeCount() { return UCMP8_kUnicodeCount;}
|
||||
int32_t ucmp8_getkBlockCount() { return UCMP8_kBlockCount;}
|
||||
int32_t ucmp8_getkIndexCount(){ return UCMP8_kIndexCount;}
|
||||
/* debug flags*/
|
||||
/*=======================================================*/
|
||||
U_CAPI int8_t ucmp8_get(CompactByteArray* array, uint16_t index)
|
||||
{
|
||||
return (array->fArray[(array->fIndex[index >> UCMP8_kBlockShift] & 0xFFFF) + (index & UCMP8_kBlockMask)]);
|
||||
}
|
||||
U_CAPI uint8_t ucmp8_getu(CompactByteArray* array, uint16_t index)
|
||||
{
|
||||
return (uint8_t)ucmp8_get(array,index);
|
||||
}
|
||||
|
||||
CompactByteArray* ucmp8_open(int8_t defaultValue)
|
||||
{
|
||||
/* set up the index array and the data array.
|
||||
* the index array always points into particular parts of the data array
|
||||
* it is initially set up to point at regular block boundaries
|
||||
* The following example uses blocks of 4 for simplicity
|
||||
* Example: Expanded
|
||||
* INDEX# 0 1 2 3 4
|
||||
* INDEX 0 4 8 12 16 ...
|
||||
* ARRAY abcdeababcedzyabcdea...
|
||||
* | | | | | |...
|
||||
* whenever you set an element in the array, it unpacks to this state
|
||||
* After compression, the index will point to various places in the data array
|
||||
* wherever there is a runs of the same elements as in the original
|
||||
* Example: Compressed
|
||||
* INDEX# 0 1 2 3 4
|
||||
* INDEX 0 4 1 8 2 ...
|
||||
* ARRAY abcdeabazyabc...
|
||||
* If you look at the example, index# 2 in the expanded version points
|
||||
* to data position number 8, which has elements "bced". In the compressed
|
||||
* version, index# 2 points to data position 1, which also has "bced"
|
||||
*/
|
||||
CompactByteArray* this = (CompactByteArray*) uprv_malloc(sizeof(CompactByteArray));
|
||||
int32_t i;
|
||||
|
||||
if (this == NULL) return NULL;
|
||||
|
||||
this->fArray = NULL;
|
||||
this->fIndex = NULL;
|
||||
this->fCount = UCMP8_kUnicodeCount;
|
||||
this->fCompact = FALSE;
|
||||
this->fBogus = FALSE;
|
||||
|
||||
|
||||
this->fArray = (int8_t*) uprv_malloc(sizeof(int8_t) * UCMP8_kUnicodeCount);
|
||||
if (!this->fArray)
|
||||
{
|
||||
this->fBogus = TRUE;
|
||||
return NULL;
|
||||
}
|
||||
this->fIndex = (uint16_t*) uprv_malloc(sizeof(uint16_t) * UCMP8_kIndexCount);
|
||||
if (!this->fIndex)
|
||||
{
|
||||
uprv_free(this->fArray);
|
||||
this->fArray = NULL;
|
||||
this->fBogus = TRUE;
|
||||
return NULL;
|
||||
}
|
||||
for (i = 0; i < UCMP8_kUnicodeCount; ++i)
|
||||
{
|
||||
this->fArray[i] = defaultValue;
|
||||
}
|
||||
for (i = 0; i < UCMP8_kIndexCount; ++i)
|
||||
{
|
||||
this->fIndex[i] = (uint16_t)(i << UCMP8_kBlockShift);
|
||||
}
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
CompactByteArray* ucmp8_openAdopt(uint16_t *indexArray,
|
||||
int8_t *newValues,
|
||||
int32_t count)
|
||||
{
|
||||
CompactByteArray* this = (CompactByteArray*) uprv_malloc(sizeof(CompactByteArray));
|
||||
if (!this) return NULL;
|
||||
|
||||
this->fArray = NULL;
|
||||
this->fIndex = NULL;
|
||||
this->fCount = count;
|
||||
this->fBogus = FALSE;
|
||||
|
||||
this->fArray = newValues;
|
||||
this->fIndex = indexArray;
|
||||
this->fCompact = (count < UCMP8_kUnicodeCount) ? TRUE : FALSE;
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
/*=======================================================*/
|
||||
|
||||
void ucmp8_close(CompactByteArray* this)
|
||||
{
|
||||
uprv_free(this->fArray);
|
||||
this->fArray = NULL;
|
||||
uprv_free(this->fIndex);
|
||||
this->fIndex = NULL;
|
||||
this->fCount = 0;
|
||||
this->fCompact = FALSE;
|
||||
uprv_free(this);
|
||||
}
|
||||
|
||||
|
||||
/*=======================================================*/
|
||||
|
||||
void ucmp8_expand(CompactByteArray* this)
|
||||
{
|
||||
/* can optimize later.
|
||||
* if we have to expand, then walk through the blocks instead of using Get
|
||||
* this code unpacks the array by copying the blocks to the normalized position.
|
||||
* Example: Compressed
|
||||
* INDEX# 0 1 2 3 4
|
||||
* INDEX 0 4 1 8 2 ...
|
||||
* ARRAY abcdeabazyabc...
|
||||
* turns into
|
||||
* Example: Expanded
|
||||
* INDEX# 0 1 2 3 4
|
||||
* INDEX 0 4 8 12 16 ...
|
||||
* ARRAY abcdeababcedzyabcdea...
|
||||
*/
|
||||
int32_t i;
|
||||
if (this->fCompact)
|
||||
{
|
||||
int8_t* tempArray;
|
||||
tempArray = (int8_t*) uprv_malloc(sizeof(int8_t) * UCMP8_kUnicodeCount);
|
||||
if (!tempArray)
|
||||
{
|
||||
this->fBogus = TRUE;
|
||||
return;
|
||||
}
|
||||
for (i = 0; i < UCMP8_kUnicodeCount; ++i)
|
||||
{
|
||||
tempArray[i] = ucmp8_get(this,(UChar)i); /* HSYS : How expand?*/
|
||||
}
|
||||
for (i = 0; i < UCMP8_kIndexCount; ++i)
|
||||
{
|
||||
this->fIndex[i] = (uint16_t)(i<< UCMP8_kBlockShift);
|
||||
}
|
||||
uprv_free(this->fArray);
|
||||
this->fArray = tempArray;
|
||||
this->fCompact = FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*=======================================================*/
|
||||
/* this->fArray: an array to be overlapped
|
||||
* start and count: specify the block to be overlapped
|
||||
* tempIndex: the overlapped array (actually indices back into inputContents)
|
||||
* inputHash: an index of hashes for tempIndex, where
|
||||
* inputHash[i] = XOR of values from i-count+1 to i
|
||||
*/
|
||||
int32_t
|
||||
findOverlappingPosition(CompactByteArray* this,
|
||||
uint32_t start,
|
||||
const UChar* tempIndex,
|
||||
int32_t tempIndexCount,
|
||||
uint32_t cycle)
|
||||
{
|
||||
/* this is a utility routine for finding blocks that overlap.
|
||||
* IMPORTANT: the cycle number is very important. Small cycles take a lot
|
||||
* longer to work. In some cases, they may be able to get better compaction.
|
||||
*/
|
||||
|
||||
int32_t i;
|
||||
int32_t j;
|
||||
int32_t currentCount;
|
||||
|
||||
for (i = 0; i < tempIndexCount; i += cycle)
|
||||
{
|
||||
currentCount = UCMP8_kBlockCount;
|
||||
if (i + UCMP8_kBlockCount > tempIndexCount)
|
||||
{
|
||||
currentCount = tempIndexCount - i;
|
||||
}
|
||||
for (j = 0; j < currentCount; ++j)
|
||||
{
|
||||
if (this->fArray[start + j] != this->fArray[tempIndex[i + j]]) break;
|
||||
}
|
||||
if (j == currentCount) break;
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
UBool
|
||||
ucmp8_isBogus(const CompactByteArray* this)
|
||||
{
|
||||
return this->fBogus;
|
||||
}
|
||||
|
||||
const int8_t*
|
||||
ucmp8_getArray(const CompactByteArray* this)
|
||||
{
|
||||
return this->fArray;
|
||||
}
|
||||
|
||||
const uint16_t*
|
||||
ucmp8_getIndex(const CompactByteArray* this)
|
||||
{
|
||||
return this->fIndex;
|
||||
}
|
||||
|
||||
int32_t
|
||||
ucmp8_getCount(const CompactByteArray* this)
|
||||
{
|
||||
return this->fCount;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
ucmp8_set(CompactByteArray* this,
|
||||
UChar c,
|
||||
int8_t value)
|
||||
{
|
||||
if (this->fCompact == TRUE)
|
||||
{
|
||||
ucmp8_expand(this);
|
||||
if (this->fBogus) return;
|
||||
}
|
||||
this->fArray[(int32_t)c] = value;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
ucmp8_setRange(CompactByteArray* this,
|
||||
UChar start,
|
||||
UChar end,
|
||||
int8_t value)
|
||||
{
|
||||
int32_t i;
|
||||
if (this->fCompact == TRUE)
|
||||
{
|
||||
ucmp8_expand(this);
|
||||
if (this->fBogus) return;
|
||||
}
|
||||
for (i = start; i <= end; ++i)
|
||||
{
|
||||
this->fArray[i] = value;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*=======================================================*/
|
||||
|
||||
void
|
||||
ucmp8_compact(CompactByteArray* this,
|
||||
uint32_t cycle)
|
||||
{
|
||||
if (!this->fCompact)
|
||||
{
|
||||
/* this actually does the compaction.
|
||||
* it walks throught the contents of the expanded array, finding the
|
||||
* first block in the data that matches the contents of the current index.
|
||||
* As it works, it keeps an updated pointer to the last position,
|
||||
* so that it knows how big to make the final array
|
||||
* If the matching succeeds, then the index will point into the data
|
||||
* at some earlier position.
|
||||
* If the matching fails, then last position pointer will be bumped,
|
||||
* and the index will point to that last block of data.
|
||||
*/
|
||||
UChar* tempIndex;
|
||||
int32_t tempIndexCount;
|
||||
int8_t* tempArray;
|
||||
int32_t iBlock, iIndex;
|
||||
|
||||
/* fix cycle, must be 0 < cycle <= blockcount*/
|
||||
if (cycle < 0) cycle = 1;
|
||||
else if (cycle > (uint32_t)UCMP8_kBlockCount) cycle = UCMP8_kBlockCount;
|
||||
|
||||
/* make temp storage, larger than we need*/
|
||||
tempIndex = (UChar*) uprv_malloc(sizeof(UChar)* UCMP8_kUnicodeCount);
|
||||
if (!tempIndex)
|
||||
{
|
||||
this->fBogus = TRUE;
|
||||
return;
|
||||
}
|
||||
/* set up first block.*/
|
||||
tempIndexCount = UCMP8_kBlockCount;
|
||||
for (iIndex = 0; iIndex < UCMP8_kBlockCount; ++iIndex)
|
||||
{
|
||||
tempIndex[iIndex] = (uint16_t)iIndex;
|
||||
}; /* endfor (iIndex = 0; .....)*/
|
||||
this->fIndex[0] = 0;
|
||||
|
||||
/* for each successive block, find out its first position in the compacted array*/
|
||||
for (iBlock = 1; iBlock < UCMP8_kIndexCount; ++iBlock)
|
||||
{
|
||||
int32_t newCount, firstPosition, block;
|
||||
block = iBlock << UCMP8_kBlockShift;
|
||||
/* if (debugSmall) if (block > debugSmallLimit) break;*/
|
||||
firstPosition = findOverlappingPosition(this,
|
||||
block,
|
||||
tempIndex,
|
||||
tempIndexCount,
|
||||
cycle);
|
||||
|
||||
/* if not contained in the current list, copy the remainder
|
||||
* invariant; cumulativeHash[iBlock] = XOR of values from iBlock-kBlockCount+1 to iBlock
|
||||
* we do this by XORing out cumulativeHash[iBlock-kBlockCount]
|
||||
*/
|
||||
newCount = firstPosition + UCMP8_kBlockCount;
|
||||
if (newCount > tempIndexCount)
|
||||
{
|
||||
for (iIndex = tempIndexCount; iIndex < newCount; ++iIndex)
|
||||
{
|
||||
tempIndex[iIndex] = (uint16_t)(iIndex - firstPosition + block);
|
||||
} /* endfor (iIndex = tempIndexCount....)*/
|
||||
tempIndexCount = newCount;
|
||||
} /* endif (newCount > tempIndexCount)*/
|
||||
this->fIndex[iBlock] = (uint16_t)firstPosition;
|
||||
} /* endfor (iBlock = 1.....)*/
|
||||
|
||||
/* now allocate and copy the items into the array*/
|
||||
tempArray = (int8_t*) uprv_malloc(tempIndexCount * sizeof(int8_t));
|
||||
if (!tempArray)
|
||||
{
|
||||
this->fBogus = TRUE;
|
||||
uprv_free(tempIndex);
|
||||
return;
|
||||
}
|
||||
for (iIndex = 0; iIndex < tempIndexCount; ++iIndex)
|
||||
{
|
||||
tempArray[iIndex] = this->fArray[tempIndex[iIndex]];
|
||||
}
|
||||
uprv_free(this->fArray);
|
||||
this->fArray = tempArray;
|
||||
this->fCount = tempIndexCount;
|
||||
|
||||
|
||||
/* free up temp storage*/
|
||||
uprv_free(tempIndex);
|
||||
this->fCompact = TRUE;
|
||||
} /* endif (!this->fCompact)*/
|
||||
}
|
||||
|
||||
|
||||
|
@ -1,87 +0,0 @@
|
||||
|
||||
/*
|
||||
********************************************************************
|
||||
*
|
||||
* Copyright (C) 1996-2000, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
********************************************************************
|
||||
*/
|
||||
|
||||
|
||||
|
||||
#ifndef UCMP8_H
|
||||
#define UCMP8_H
|
||||
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
/*====================================*/
|
||||
/* class CompactByteArray
|
||||
* Provides a compact way to store information that is indexed by Unicode values,
|
||||
* such as character properties, types, keyboard values, etc.
|
||||
* The ATypes are used by value, so should be small, integers or pointers.
|
||||
*====================================
|
||||
*/
|
||||
|
||||
U_CAPI const int32_t UCMP8_kUnicodeCount;
|
||||
U_CAPI const int32_t UCMP8_kBlockShift;
|
||||
U_CAPI const int32_t UCMP8_kBlockCount;
|
||||
U_CAPI const int32_t UCMP8_kIndexShift;
|
||||
U_CAPI const int32_t UCMP8_kIndexCount;
|
||||
U_CAPI const uint32_t UCMP8_kBlockMask;
|
||||
|
||||
U_CAPI int32_t ucmp8_getkUnicodeCount(void);
|
||||
U_CAPI int32_t ucmp8_getkBlockCount(void);
|
||||
U_CAPI int32_t ucmp8_getkIndexCount(void);
|
||||
typedef struct{
|
||||
int8_t* fArray;
|
||||
uint16_t* fIndex;
|
||||
int32_t fCount;
|
||||
UBool fCompact;
|
||||
UBool fBogus;
|
||||
} CompactByteArray;
|
||||
|
||||
U_CAPI CompactByteArray* ucmp8_open(int8_t defaultValue);
|
||||
U_CAPI CompactByteArray* ucmp8_openAdopt(uint16_t* indexArray,
|
||||
int8_t* newValues,
|
||||
int32_t count);
|
||||
U_CAPI void ucmp8_close(CompactByteArray* array);
|
||||
U_CAPI UBool isBogus(const CompactByteArray* array);
|
||||
|
||||
|
||||
U_CAPI int8_t ucmp8_get(CompactByteArray* array, uint16_t index);
|
||||
U_CAPI uint8_t ucmp8_getu(CompactByteArray* array, uint16_t index);
|
||||
|
||||
U_CAPI void ucmp8_set(CompactByteArray* array,
|
||||
UChar index,
|
||||
int8_t value);
|
||||
|
||||
U_CAPI void ucmp8_setRange(CompactByteArray* array,
|
||||
UChar start,
|
||||
UChar end,
|
||||
int8_t value);
|
||||
|
||||
U_CAPI int32_t ucmp8_getCount(const CompactByteArray* array);
|
||||
U_CAPI const int8_t* ucmp8_getArray(const CompactByteArray* array);
|
||||
U_CAPI const uint16_t* ucmp8_getIndex(const CompactByteArray* array);
|
||||
|
||||
/* Compact the array.
|
||||
The value of cycle determines how large the overlap can be.
|
||||
A cycle of 1 is the most compacted, but takes the most time to do.
|
||||
If values stored in the array tend to repeat in cycles of, say, 16,
|
||||
then using that will be faster than cycle = 1, and get almost the
|
||||
same compression.
|
||||
*/
|
||||
U_CAPI void ucmp8_compact(CompactByteArray* array,
|
||||
uint32_t cycle);
|
||||
|
||||
/* Expanded takes the array back to a 65536 element array*/
|
||||
U_CAPI void ucmp8_expand(CompactByteArray* array);
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
@ -1,398 +0,0 @@
|
||||
/*
|
||||
*****************************************************************************************
|
||||
*
|
||||
* Copyright (C) 1997-200, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
/*===============================================================================
|
||||
*
|
||||
* File cmpshrta.cpp
|
||||
*
|
||||
* Modification History:
|
||||
*
|
||||
* Date Name Description
|
||||
* 2/5/97 aliu Added CompactIntArray streamIn and streamOut methods.
|
||||
* 3/4/97 aliu Tuned performance of CompactIntArray constructor,
|
||||
* 05/07/97 helena Added isBogus()
|
||||
* based on performance data indicating that this was slow.
|
||||
* 07/15/98 erm Synched with Java 1.2 CompactShortArray.java.
|
||||
* 07/30/98 erm Added changes from 07/29/98 code review.
|
||||
*===============================================================================
|
||||
*/
|
||||
#include "ucmp16.h"
|
||||
#include "cmemory.h"
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#define arrayRegionMatches(source, sourceStart, target, targetStart, len) (uprv_memcmp(&source[sourceStart], &target[targetStart], len * sizeof(int16_t)) != 0)
|
||||
|
||||
/* internal constants*/
|
||||
#define UCMP16_kMaxUnicode_int 65535
|
||||
#define UCMP16_kUnicodeCount_int (UCMP16_kMaxUnicode_int + 1)
|
||||
#define UCMP16_kBlockShift_int 7
|
||||
#define UCMP16_kBlockCount_int (1 << UCMP16_kBlockShift_int)
|
||||
#define UCMP16_kBlockBytes_int (UCMP16_kBlockCount_int * sizeof(int16_t))
|
||||
#define UCMP16_kIndexShift_int (16 - UCMP16_kBlockShift_int)
|
||||
#define UCMP16_kIndexCount_int (1 << UCMP16_kIndexShift_int)
|
||||
#define UCMP16_kBlockMask_int (UCMP16_kBlockCount_int - 1)
|
||||
|
||||
|
||||
const int32_t UCMP16_kMaxUnicode = UCMP16_kMaxUnicode_int;
|
||||
const int32_t UCMP16_kUnicodeCount = UCMP16_kUnicodeCount_int;
|
||||
const int32_t UCMP16_kBlockShift = UCMP16_kBlockShift_int;
|
||||
const int32_t UCMP16_kBlockCount = UCMP16_kBlockCount_int;
|
||||
const int32_t UCMP16_kBlockBytes = UCMP16_kBlockBytes_int;
|
||||
const int32_t UCMP16_kIndexShift = UCMP16_kIndexShift_int;
|
||||
const int32_t UCMP16_kIndexCount = UCMP16_kIndexCount_int;
|
||||
const uint32_t UCMP16_kBlockMask = UCMP16_kBlockMask_int;
|
||||
|
||||
/**
|
||||
* Sets the array to the invalid memory state.
|
||||
*/
|
||||
static CompactShortArray* setToBogus(CompactShortArray* array);
|
||||
static void touchBlock(CompactShortArray* this,
|
||||
int32_t i,
|
||||
int16_t value);
|
||||
static UBool blockTouched(const CompactShortArray* this,
|
||||
int32_t i);
|
||||
|
||||
|
||||
/* debug flags*/
|
||||
/*=======================================================*/
|
||||
|
||||
int32_t ucmp16_getkUnicodeCount()
|
||||
{return UCMP16_kUnicodeCount;}
|
||||
|
||||
int32_t ucmp16_getkBlockCount()
|
||||
{return UCMP16_kBlockCount;}
|
||||
|
||||
int32_t ucmp16_getkIndexCount()
|
||||
{ return UCMP16_kIndexCount;}
|
||||
|
||||
CompactShortArray* ucmp16_open(int16_t defaultValue)
|
||||
{
|
||||
int32_t i;
|
||||
CompactShortArray* this = (CompactShortArray*) uprv_malloc(sizeof(CompactShortArray));
|
||||
if (this == NULL) return NULL;
|
||||
|
||||
this->fCount = UCMP16_kUnicodeCount;
|
||||
this->fCompact = FALSE;
|
||||
this->fBogus = FALSE;
|
||||
this->fArray = NULL;
|
||||
this->fIndex = NULL;
|
||||
this->fHashes = NULL;
|
||||
this->fDefaultValue = defaultValue;
|
||||
|
||||
this->fArray = (int16_t*)uprv_malloc(UCMP16_kUnicodeCount * sizeof(int16_t));
|
||||
if (this->fArray == NULL)
|
||||
{
|
||||
this->fBogus = TRUE;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
this->fIndex = (uint16_t*)uprv_malloc(UCMP16_kIndexCount * sizeof(uint16_t));
|
||||
if (this->fIndex == NULL)
|
||||
{
|
||||
uprv_free(this->fArray);
|
||||
this->fArray = NULL;
|
||||
|
||||
this->fBogus = TRUE;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
this->kBlockShift = UCMP16_kBlockShift;
|
||||
this->kBlockMask = UCMP16_kBlockMask;
|
||||
for (i = 0; i < UCMP16_kUnicodeCount; i += 1)
|
||||
{
|
||||
this->fArray[i] = defaultValue;
|
||||
}
|
||||
|
||||
this->fHashes =(int32_t*)uprv_malloc(UCMP16_kIndexCount * sizeof(int32_t));
|
||||
if (this->fHashes == NULL)
|
||||
{
|
||||
uprv_free(this->fArray);
|
||||
uprv_free(this->fIndex);
|
||||
this->fBogus = TRUE;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
for (i = 0; i < UCMP16_kIndexCount; i += 1)
|
||||
{
|
||||
this->fIndex[i] = (uint16_t)(i << UCMP16_kBlockShift);
|
||||
this->fHashes[i] = 0;
|
||||
}
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
CompactShortArray* ucmp16_openAdopt(uint16_t *indexArray,
|
||||
int16_t *newValues,
|
||||
int32_t count,
|
||||
int16_t defaultValue)
|
||||
{
|
||||
CompactShortArray* this = (CompactShortArray*) uprv_malloc(sizeof(CompactShortArray));
|
||||
if (this == NULL) return NULL;
|
||||
this->fHashes = NULL;
|
||||
this->fCount = count;
|
||||
this->fDefaultValue = defaultValue;
|
||||
this->fBogus = FALSE;
|
||||
this->fArray = newValues;
|
||||
this->fIndex = indexArray;
|
||||
this->fCompact = count < UCMP16_kUnicodeCount;
|
||||
this->kBlockShift = UCMP16_kBlockShift;
|
||||
this->kBlockMask = UCMP16_kBlockMask;
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
CompactShortArray* ucmp16_openAdoptWithBlockShift(uint16_t *indexArray,
|
||||
int16_t *newValues,
|
||||
int32_t count,
|
||||
int16_t defaultValue,
|
||||
int32_t blockShift)
|
||||
{
|
||||
CompactShortArray* this = ucmp16_openAdopt(indexArray,
|
||||
newValues,
|
||||
count,
|
||||
defaultValue);
|
||||
if (this == NULL) return NULL;
|
||||
|
||||
this->kBlockShift = blockShift;
|
||||
this->kBlockMask = (uint32_t) (((uint32_t)1 << (uint32_t)blockShift) - (uint32_t)1);
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
/*=======================================================*/
|
||||
|
||||
void ucmp16_close(CompactShortArray* this)
|
||||
{
|
||||
uprv_free(this->fArray);
|
||||
uprv_free(this->fIndex);
|
||||
uprv_free(this->fHashes);
|
||||
uprv_free(this);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
CompactShortArray* setToBogus(CompactShortArray* this)
|
||||
{
|
||||
uprv_free(this->fArray);
|
||||
this->fArray = NULL;
|
||||
|
||||
uprv_free(this->fIndex);
|
||||
this->fIndex = NULL;
|
||||
|
||||
uprv_free(this->fHashes);
|
||||
this->fHashes = NULL;
|
||||
|
||||
this->fCount = 0;
|
||||
this->fCompact = FALSE;
|
||||
this->fBogus = TRUE;
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
|
||||
void ucmp16_expand(CompactShortArray* this)
|
||||
{
|
||||
if (this->fCompact)
|
||||
{
|
||||
int32_t i;
|
||||
int16_t *tempArray = (int16_t*)uprv_malloc(UCMP16_kUnicodeCount * sizeof(int16_t));
|
||||
|
||||
if (tempArray == NULL)
|
||||
{
|
||||
this->fBogus = TRUE;
|
||||
return;
|
||||
}
|
||||
|
||||
for (i = 0; i < UCMP16_kUnicodeCount; i += 1)
|
||||
{
|
||||
tempArray[i] = ucmp16_get(this, (UChar)i); /* HSYS : How expand?*/
|
||||
}
|
||||
|
||||
for (i = 0; i < (1 << (16 - this->kBlockShift)); i += 1)
|
||||
{
|
||||
this->fIndex[i] = (uint16_t)(i<<this->kBlockShift);
|
||||
}
|
||||
|
||||
uprv_free(this->fArray);
|
||||
this->fArray = tempArray;
|
||||
this->fCompact = FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
void ucmp16_set(CompactShortArray* this,
|
||||
UChar c,
|
||||
int16_t value)
|
||||
{
|
||||
if (this->fCompact)
|
||||
{
|
||||
ucmp16_expand(this);
|
||||
if (this->fBogus) return;
|
||||
}
|
||||
|
||||
this->fArray[(int32_t)c] = value;
|
||||
|
||||
if (value != this->fDefaultValue)
|
||||
{
|
||||
touchBlock(this, c >> this->kBlockShift, value);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void ucmp16_setRange(CompactShortArray* this,
|
||||
UChar start,
|
||||
UChar end,
|
||||
int16_t value)
|
||||
{
|
||||
int32_t i;
|
||||
if (this->fCompact)
|
||||
{
|
||||
ucmp16_expand(this);
|
||||
if (this->fBogus) return;
|
||||
}
|
||||
if (value != this->fDefaultValue)
|
||||
{
|
||||
for (i = start; i <= end; i += 1)
|
||||
{
|
||||
this->fArray[i] = value;
|
||||
touchBlock(this, i >> this->kBlockShift, value);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (i = start; i <= end; i += 1) this->fArray[i] = value;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*=======================================================*/
|
||||
void ucmp16_compact(CompactShortArray* this)
|
||||
{
|
||||
if (!this->fCompact)
|
||||
{
|
||||
int32_t limitCompacted = 0;
|
||||
int32_t i, iBlockStart;
|
||||
int16_t iUntouched = -1;
|
||||
|
||||
for (i = 0, iBlockStart = 0; i < (1 << (16 - this->kBlockShift)); i += 1, iBlockStart += (1 << this->kBlockShift))
|
||||
{
|
||||
UBool touched = blockTouched(this, i);
|
||||
|
||||
this->fIndex[i] = 0xFFFF;
|
||||
|
||||
if (!touched && iUntouched != -1)
|
||||
{
|
||||
/* If no values in this block were set, we can just set its
|
||||
* index to be the same as some other block with no values
|
||||
* set, assuming we've seen one yet.
|
||||
*/
|
||||
this->fIndex[i] = iUntouched;
|
||||
}
|
||||
else
|
||||
{
|
||||
int32_t j, jBlockStart;
|
||||
|
||||
for (j = 0, jBlockStart = 0;
|
||||
j < limitCompacted;
|
||||
j += 1, jBlockStart += (1 << this->kBlockShift))
|
||||
{
|
||||
if (this->fHashes[i] == this->fHashes[j] &&
|
||||
arrayRegionMatches(this->fArray,
|
||||
iBlockStart,
|
||||
this->fArray,
|
||||
jBlockStart,
|
||||
(1 << this->kBlockShift)))
|
||||
{
|
||||
this->fIndex[i] = (int16_t)jBlockStart;
|
||||
}
|
||||
}
|
||||
|
||||
/* TODO: verify this is correct*/
|
||||
if (this->fIndex[i] == 0xFFFF)
|
||||
{
|
||||
/* we didn't match, so copy & update*/
|
||||
uprv_memcpy(&(this->fArray[jBlockStart]),
|
||||
&(this->fArray[iBlockStart]),
|
||||
(1 << this->kBlockShift)*sizeof(int16_t));
|
||||
|
||||
this->fIndex[i] = (int16_t)jBlockStart;
|
||||
this->fHashes[j] = this->fHashes[i];
|
||||
limitCompacted += 1;
|
||||
|
||||
if (!touched)
|
||||
{
|
||||
/* If this is the first untouched block we've seen,*/
|
||||
/* remember its index.*/
|
||||
iUntouched = (int16_t)jBlockStart;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* we are done compacting, so now make the array shorter*/
|
||||
{
|
||||
int32_t newSize = limitCompacted * (1 << this->kBlockShift);
|
||||
int16_t *result = (int16_t*) uprv_malloc(sizeof(int16_t) * newSize);
|
||||
|
||||
uprv_memcpy(result, this->fArray, newSize * sizeof(int16_t));
|
||||
|
||||
uprv_free(this->fArray);
|
||||
this->fArray = result;
|
||||
this->fCount = newSize;
|
||||
uprv_free(this->fHashes);
|
||||
this->fHashes = NULL;
|
||||
|
||||
this->fCompact = TRUE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Query whether a specified block was "touched", i.e. had a value set.
|
||||
* Untouched blocks can be skipped when compacting the array
|
||||
*/
|
||||
|
||||
int16_t ucmp16_getDefaultValue(const CompactShortArray* this)
|
||||
{
|
||||
return this->fDefaultValue;
|
||||
}
|
||||
|
||||
|
||||
void touchBlock(CompactShortArray* this,
|
||||
int32_t i,
|
||||
int16_t value)
|
||||
{
|
||||
this->fHashes[i] = (this->fHashes[i] + (value << 1)) | 1;
|
||||
}
|
||||
|
||||
UBool blockTouched(const CompactShortArray* this, int32_t i)
|
||||
{
|
||||
return (this->fHashes[i] != 0);
|
||||
}
|
||||
|
||||
|
||||
const int16_t*
|
||||
ucmp16_getArray(const CompactShortArray* this)
|
||||
{
|
||||
return this->fArray;
|
||||
}
|
||||
|
||||
const uint16_t*
|
||||
ucmp16_getIndex(const CompactShortArray* this)
|
||||
{
|
||||
return this->fIndex;
|
||||
}
|
||||
|
||||
uint32_t
|
||||
ucmp16_getCount(const CompactShortArray* this)
|
||||
{
|
||||
return this->fCount;
|
||||
}
|
||||
|
@ -1,218 +0,0 @@
|
||||
|
||||
/*
|
||||
********************************************************************
|
||||
*
|
||||
* Copyright (C) 1996-2000, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
********************************************************************
|
||||
*/
|
||||
|
||||
/*
|
||||
* ==========================================================================
|
||||
* @version 1.0 23/10/96
|
||||
* @author Helena Shih
|
||||
* Based on Taligent international support for java
|
||||
* Modification History :
|
||||
*
|
||||
* 05/07/97 helena Added isBogus()
|
||||
* 07/15/98 erm Synched with Java 1.2 CompactShortArray.java.
|
||||
* 07/30/98 erm Added 07/29/98 code review changes.
|
||||
* 04/21/99 Damiba Port to C/New API faster ucmp16_get
|
||||
*/
|
||||
|
||||
#ifndef UCMP16_H
|
||||
#define UCMP16_H
|
||||
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* class CompactATypeArray : use only on primitive data types
|
||||
* Provides a compact way to store information that is indexed by Unicode
|
||||
* values, such as character properties, types, keyboard values, etc.This
|
||||
* is very useful when you have a block of Unicode data that contains
|
||||
* significant values while the rest of the Unicode data is unused in the
|
||||
* application or when you have a lot of redundance, such as where all 21,000
|
||||
* Han ideographs have the same value. However, lookup is much faster than a
|
||||
* hash table.
|
||||
* <P>
|
||||
* A compact array of any primitive data type serves two purposes:
|
||||
* <UL type = round>
|
||||
* <LI>Fast access of the indexed values.
|
||||
* <LI>Smaller memory footprint.
|
||||
* </UL>
|
||||
* <P>
|
||||
* The index array always points into particular parts of the data array
|
||||
* it is initially set up to point at regular block boundaries
|
||||
* The following example uses blocks of 4 for simplicity
|
||||
* <PRE>
|
||||
* Example: Expanded
|
||||
* BLOCK 0 1 2 3 4
|
||||
* INDEX 0 4 8 12 16 ...
|
||||
* ARRAY abcdeababcdezyabcdea...
|
||||
* | | | | | |...
|
||||
* </PRE>
|
||||
* <P>
|
||||
* After compression, the index will point to various places in the data array
|
||||
* wherever there is a runs of the same elements as in the original
|
||||
* <PRE>
|
||||
* Example: Compressed
|
||||
* BLOCK 0 1 2 3 4
|
||||
* INDEX 0 4 1 8 2 ...
|
||||
* ARRAY abcdeabazyabc...
|
||||
* </PRE>
|
||||
* <P>
|
||||
* If you look at the example, index number 2 in the expanded version points
|
||||
* to data position number 8, which has elements "bcde". In the compressed
|
||||
* version, index number 2 points to data position 1, which also has "bcde"
|
||||
* @see CompactByteArray
|
||||
* @see CompactIntArray
|
||||
* @see CompactCharArray
|
||||
* @see CompactStringArray
|
||||
* @version $Revision: 1.7 $ 8/25/98
|
||||
* @author Helena Shih
|
||||
*/
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int16_t* fArray;
|
||||
uint16_t* fIndex;
|
||||
int32_t* fHashes;
|
||||
int32_t fCount;
|
||||
int16_t fDefaultValue;
|
||||
UBool fCompact;
|
||||
UBool fBogus;
|
||||
int32_t kBlockShift;
|
||||
int32_t kBlockMask;
|
||||
} CompactShortArray;
|
||||
|
||||
|
||||
U_CAPI const int32_t UCMP16_kMaxUnicode;
|
||||
U_CAPI const int32_t UCMP16_kUnicodeCount;
|
||||
U_CAPI const int32_t UCMP16_kBlockShift;
|
||||
U_CAPI const int32_t UCMP16_kBlockCount;
|
||||
U_CAPI const int32_t UCMP16_kBlockBytes;
|
||||
U_CAPI const int32_t UCMP16_kIndexShift;
|
||||
U_CAPI const int32_t UCMP16_kIndexCount;
|
||||
U_CAPI const uint32_t UCMP16_kBlockMask;
|
||||
|
||||
U_CAPI int32_t ucmp16_getkUnicodeCount(void);
|
||||
U_CAPI int32_t ucmp16_getkBlockCount(void);
|
||||
U_CAPI int32_t ucmp16_getkIndexCount(void);
|
||||
/**
|
||||
* Construct an empty CompactShortArray.
|
||||
* @param defaultValue the default value for all characters not explicitly in the array
|
||||
*/
|
||||
U_CAPI CompactShortArray* ucmp16_open(int16_t defaultValue);
|
||||
|
||||
/**
|
||||
* Construct a CompactShortArray from a pre-computed index and values array. The values
|
||||
* will be adobped by the CompactShortArray. Note: for speed, the compact method will
|
||||
* only re-use blocks in the values array that are on a block boundary. The pre-computed
|
||||
* arrays passed in to this constructor may re-use blocks at any position in the values
|
||||
* array.
|
||||
* @param indexArray the index array to be adopted
|
||||
* @param newValues the value array to be adobptd
|
||||
* @param count the number of entries in the value array
|
||||
* @param defaultValue the default value for all characters not explicitly in the array
|
||||
* @see compact
|
||||
*/
|
||||
U_CAPI CompactShortArray* ucmp16_openAdopt(uint16_t *indexArray,
|
||||
int16_t *newValues,
|
||||
int32_t count,
|
||||
int16_t defaultValue );
|
||||
|
||||
U_CAPI CompactShortArray* ucmp16_openAdoptWithBlockShift(uint16_t *indexArray,
|
||||
int16_t *newValues,
|
||||
int32_t count,
|
||||
int16_t defaultValue,
|
||||
int32_t blockShift);
|
||||
|
||||
|
||||
U_CAPI void ucmp16_close(CompactShortArray* array);
|
||||
/**
|
||||
* Returns TRUE if the creation of the compact array fails.
|
||||
*/
|
||||
|
||||
U_CAPI UBool ucmp16_isBogus(const CompactShortArray* array);
|
||||
|
||||
/**
|
||||
*
|
||||
* Get the mapped value of a Unicode character.
|
||||
* @param index the character to get the mapped value with
|
||||
* @return the mapped value of the given character
|
||||
*/
|
||||
|
||||
#define ucmp16_get(array, index) (array->fArray[(array->fIndex[(index >> array->kBlockShift)] )+ \
|
||||
(index & array->kBlockMask)])
|
||||
|
||||
#define ucmp16_getu(array, index) (uint16_t)ucmp16_get(array, index)
|
||||
|
||||
|
||||
/**
|
||||
* Set a new value for a Unicode character.
|
||||
* Set automatically expands the array if it is compacted.
|
||||
* @param index the character to set the mapped value with
|
||||
* @param value the new mapped value
|
||||
*/
|
||||
U_CAPI void ucmp16_set(CompactShortArray *array,
|
||||
UChar index,
|
||||
int16_t value);
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
* Set new values for a range of Unicode character.
|
||||
* @param start the starting offset of the range
|
||||
* @param end the ending offset of the range
|
||||
* @param value the new mapped value
|
||||
*/
|
||||
U_CAPI void ucmp16_setRange(CompactShortArray* array,
|
||||
UChar start,
|
||||
UChar end,
|
||||
int16_t value);
|
||||
|
||||
|
||||
/**
|
||||
* Compact the array. For efficency, this method will only re-use
|
||||
* blocks in the values array that are on a block bounday. If you
|
||||
* want better compaction, you can do your own compaction and use
|
||||
* the constructor that lets you pass in the pre-computed arrays.
|
||||
*/
|
||||
U_CAPI void ucmp16_compact(CompactShortArray* array);
|
||||
|
||||
/**
|
||||
* Get the default value.
|
||||
*/
|
||||
U_CAPI int16_t ucmp16_getDefaultValue(const CompactShortArray* array);
|
||||
|
||||
/**
|
||||
*
|
||||
* Get the number of elements in the value array.
|
||||
* @return the number of elements in the value array.
|
||||
*/
|
||||
U_CAPI uint32_t ucmp16_getCount(const CompactShortArray* array);
|
||||
|
||||
/**
|
||||
*
|
||||
* Get the address of the value array.
|
||||
* @return the address of the value array
|
||||
*/
|
||||
U_CAPI const int16_t* ucmp16_getArray(const CompactShortArray* array);
|
||||
|
||||
/**
|
||||
*
|
||||
* Get the address of the index array.
|
||||
* @return the address of the index array
|
||||
*/
|
||||
U_CAPI const uint16_t* ucmp16_getIndex(const CompactShortArray* array);
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
@ -1,229 +0,0 @@
|
||||
/*
|
||||
*
|
||||
* Copyright (C) 1998-2000, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
* @version 1.0 06/19/98
|
||||
* @author Helena Shih
|
||||
* Based on Taligent international support for C++
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include "ucmp16.h"
|
||||
|
||||
#if U_IOSTREAM_SOURCE >= 199711
|
||||
#include <iostream>
|
||||
using namespace std;
|
||||
#elif U_IOSTREAM_SOURCE >= 198506
|
||||
#include <iostream.h>
|
||||
#endif
|
||||
|
||||
CompactShortArray* ulxfrmArray = 0;
|
||||
|
||||
enum ECharTypeMapping {
|
||||
UNASSIGNED = 0,
|
||||
UPPERCASE_LETTER = 1,
|
||||
LOWERCASE_LETTER = 2,
|
||||
TITLECASE_LETTER = 3,
|
||||
MODIFIER_LETTER = 4,
|
||||
OTHER_LETTER = 5,
|
||||
NON_SPACING_MARK = 6,
|
||||
ENCLOSING_MARK = 7,
|
||||
COMBINING_SPACING_MARK = 8,
|
||||
DECIMAL_DIGIT_NUMBER = 9,
|
||||
LETTER_NUMBER = 10,
|
||||
OTHER_NUMBER = 11,
|
||||
SPACE_SEPARATOR = 12,
|
||||
LINE_SEPARATOR = 13,
|
||||
PARAGRAPH_SEPARATOR = 14,
|
||||
CONTROL = 15,
|
||||
FORMAT = 16,
|
||||
PRIVATE_USE = 17,
|
||||
SURROGATE = 18,
|
||||
DASH_PUNCTUATION = 19,
|
||||
START_PUNCTUATION = 20,
|
||||
END_PUNCTUATION = 21,
|
||||
CONNECTOR_PUNCTUATION = 22,
|
||||
OTHER_PUNCTUATION = 23,
|
||||
MATH_SYMBOL = 24,
|
||||
CURRENCY_SYMBOL = 25,
|
||||
MODIFIER_SYMBOL = 26,
|
||||
OTHER_SYMBOL = 27,
|
||||
INITIAL_PUNCTUATION = 28,
|
||||
FINAL_PUNCTUATION = 29
|
||||
};
|
||||
|
||||
static const UChar LAST_CHAR_CODE_IN_FILE = 0xFFFD;
|
||||
const char tagStrings[] = "MnMcMeNdNlNoZsZlZpCcCfCsCoCnLuLlLtLmLoPcPdPsPePoSmScSkSoPiPf";
|
||||
const int16_t tagValues[] =
|
||||
{
|
||||
/* Mn */ (int16_t)NON_SPACING_MARK,
|
||||
/* Mc */ (int16_t)COMBINING_SPACING_MARK,
|
||||
/* Me */ (int16_t)ENCLOSING_MARK,
|
||||
/* Nd */ (int16_t)DECIMAL_DIGIT_NUMBER,
|
||||
/* Nl */ (int16_t)LETTER_NUMBER,
|
||||
/* No */ (int16_t)OTHER_NUMBER,
|
||||
/* Zs */ (int16_t)SPACE_SEPARATOR,
|
||||
/* Zl */ (int16_t)LINE_SEPARATOR,
|
||||
/* Zp */ (int16_t)PARAGRAPH_SEPARATOR,
|
||||
/* Cc */ (int16_t)CONTROL,
|
||||
/* Cf */ (int16_t)FORMAT,
|
||||
/* Cs */ (int16_t)SURROGATE,
|
||||
/* Co */ (int16_t)PRIVATE_USE,
|
||||
/* Cn */ (int16_t)UNASSIGNED,
|
||||
/* Lu */ (int16_t)UPPERCASE_LETTER,
|
||||
/* Ll */ (int16_t)LOWERCASE_LETTER,
|
||||
/* Lt */ (int16_t)TITLECASE_LETTER,
|
||||
/* Lm */ (int16_t)MODIFIER_LETTER,
|
||||
/* Lo */ (int16_t)OTHER_LETTER,
|
||||
/* Pc */ (int16_t)CONNECTOR_PUNCTUATION,
|
||||
/* Pd */ (int16_t)DASH_PUNCTUATION,
|
||||
/* Ps */ (int16_t)START_PUNCTUATION,
|
||||
/* Pe */ (int16_t)END_PUNCTUATION,
|
||||
/* Po */ (int16_t)OTHER_PUNCTUATION,
|
||||
/* Sm */ (int16_t)MATH_SYMBOL,
|
||||
/* Sc */ (int16_t)CURRENCY_SYMBOL,
|
||||
/* Sk */ (int16_t)MODIFIER_SYMBOL,
|
||||
/* So */ (int16_t)OTHER_SYMBOL,
|
||||
/* Pi */ (int16_t)INITIAL_PUNCTUATION,
|
||||
/* Pf */ (int16_t)FINAL_PUNCTUATION
|
||||
};
|
||||
int
|
||||
MakeProp(char* str)
|
||||
{
|
||||
int result = 0;
|
||||
char* matchPosition;
|
||||
|
||||
matchPosition = strstr(tagStrings, str);
|
||||
if (matchPosition == 0) fprintf(stderr, "unrecognized type letter %s", str);
|
||||
else result = ((matchPosition - tagStrings) / 2);
|
||||
return result;
|
||||
}
|
||||
|
||||
CompactShortArray*
|
||||
getArray(FILE *input)
|
||||
{
|
||||
if (ulxfrmArray == 0) {
|
||||
char buffer[1000];
|
||||
char* bufferPtr;
|
||||
int set = FALSE;
|
||||
char type[3];
|
||||
|
||||
try {
|
||||
ulxfrmArray = ucmp16_open((int16_t)0xffff);
|
||||
int32_t unicode, otherunicode, digit, i;
|
||||
while (TRUE) {
|
||||
otherunicode = 0xffff;
|
||||
digit = -1;
|
||||
bufferPtr = fgets(buffer, 999, input);
|
||||
if (bufferPtr == NULL) break;
|
||||
if (bufferPtr[0] == '#' || bufferPtr[0] == '\n' || bufferPtr[0] == 0) continue;
|
||||
sscanf(bufferPtr, "%X", &unicode);
|
||||
assert(0 <= unicode && unicode < 65536);
|
||||
bufferPtr = strchr(bufferPtr, ';');
|
||||
assert(bufferPtr != NULL);
|
||||
bufferPtr = strchr(bufferPtr + 1, ';');
|
||||
strncpy(type, ++bufferPtr, 2); // go to start of third field
|
||||
assert(type != NULL);
|
||||
type[2] = 0;
|
||||
int typeResult = tagValues[MakeProp(type)];
|
||||
// check for the decimal values
|
||||
bufferPtr++;
|
||||
for (i = 3; i < 8; i++) {
|
||||
bufferPtr = strchr(bufferPtr, ';');
|
||||
assert(bufferPtr != NULL);
|
||||
bufferPtr++;
|
||||
}
|
||||
sscanf(bufferPtr, "%X", &digit);
|
||||
if (((typeResult == DECIMAL_DIGIT_NUMBER) || (typeResult == OTHER_NUMBER)) &&
|
||||
(digit >= 0 && digit <= 9)){
|
||||
buffer[10];
|
||||
sprintf(buffer, "0x%04X", unicode);
|
||||
cout << " { " << buffer << ", " << digit << "}, \n";
|
||||
}
|
||||
bufferPtr++;
|
||||
for (i = 8; i < 12; i++) {
|
||||
bufferPtr = strchr(bufferPtr, ';');
|
||||
assert(bufferPtr != NULL);
|
||||
bufferPtr++;
|
||||
}
|
||||
sscanf(bufferPtr, "%X", &otherunicode);
|
||||
// the Unicode char has a equivalent uppercase
|
||||
if ((typeResult == LOWERCASE_LETTER) && (0 <= otherunicode && otherunicode < 65536)) {
|
||||
set = TRUE;
|
||||
}
|
||||
if ((typeResult == UPPERCASE_LETTER) && !set) {
|
||||
bufferPtr++;
|
||||
sscanf(bufferPtr, "%X", &otherunicode);
|
||||
if (0 <= otherunicode && otherunicode < 65536) {
|
||||
set = TRUE;
|
||||
}
|
||||
}
|
||||
if ((set == TRUE) && (ucmp16_get(ulxfrmArray, (UChar)unicode) == (int16_t)0xffff))
|
||||
ucmp16_set(ulxfrmArray, (UChar)unicode, (int16_t)otherunicode);
|
||||
set = FALSE;
|
||||
}
|
||||
|
||||
if (input) fclose(input);
|
||||
ucmp16_compact(ulxfrmArray);
|
||||
}
|
||||
catch (...) {
|
||||
fprintf(stderr, "Error Occured while parsing unicode data file.\n");
|
||||
}
|
||||
}
|
||||
return ulxfrmArray;
|
||||
}
|
||||
|
||||
void
|
||||
writeArrays()
|
||||
{
|
||||
const int16_t* values = ucmp16_getArray(ulxfrmArray);
|
||||
const uint16_t* indexes = ucmp16_getIndex(ulxfrmArray);
|
||||
int32_t i;
|
||||
int32_t cnt = ucmp16_getCount(ulxfrmArray);
|
||||
cout << "\nconst uint32_t Unicode::caseIndex[] = {\n ";
|
||||
for (i = 0; i < ucmp16_getkIndexCount()-1; i++)
|
||||
{
|
||||
cout << "(uint16_t)" << ((indexes[i] >= 0) ? (int)indexes[i] : (int)(indexes[i]+ucmp16_getkUnicodeCount()))
|
||||
<< ", ";
|
||||
if (i != 0)
|
||||
if (i % 3 == 0)
|
||||
cout << "\n ";
|
||||
}
|
||||
cout << " (uint16_t)" << ((indexes[ucmp16_getkIndexCount()-1] >= 0) ? (int)indexes[i] : (int)(indexes[i]+ucmp16_getkUnicodeCount()))
|
||||
<< " };\n";
|
||||
cout << "\nconst int16_t Unicode::caseValues[] = {\n ";
|
||||
for (i = 0; i < cnt-1; i++)
|
||||
{
|
||||
cout << "(int16_t)" << (int16_t)values[i] << ", ";
|
||||
if (i != 0)
|
||||
if (i % 5 == 0)
|
||||
cout << "\n ";
|
||||
}
|
||||
cout << " (char)" << (int16_t)values[cnt-1] << " }\n";
|
||||
cout << "const int32_t Unicode::caseCount = " << cnt << ";\n";
|
||||
}
|
||||
/**
|
||||
* The main function builds the CharType data array and prints it to System.out
|
||||
*/
|
||||
void main(int argc, char** argv)
|
||||
{
|
||||
CompactShortArray* arrays = 0;
|
||||
FILE *input = 0;
|
||||
if (argc != 2) {
|
||||
printf("Usage : chartype filename\n\n");
|
||||
exit(1);
|
||||
}
|
||||
input = fopen(argv[1], "r");
|
||||
if (input == 0) {
|
||||
printf("Cannot open the input file: %s\n\n", argv[1]);
|
||||
exit(1);
|
||||
}
|
||||
arrays = getArray(input);
|
||||
writeArrays();
|
||||
}
|
||||
|
@ -1,98 +0,0 @@
|
||||
# Microsoft Developer Studio Project File - Name="ulxfrm" - Package Owner=<4>
|
||||
# Microsoft Developer Studio Generated Build File, Format Version 6.00
|
||||
# ** DO NOT EDIT **
|
||||
|
||||
# TARGTYPE "Win32 (x86) Console Application" 0x0103
|
||||
|
||||
CFG=ulxfrm - Win32 Debug
|
||||
!MESSAGE This is not a valid makefile. To build this project using NMAKE,
|
||||
!MESSAGE use the Export Makefile command and run
|
||||
!MESSAGE
|
||||
!MESSAGE NMAKE /f "ulxfrm.mak".
|
||||
!MESSAGE
|
||||
!MESSAGE You can specify a configuration when running NMAKE
|
||||
!MESSAGE by defining the macro CFG on the command line. For example:
|
||||
!MESSAGE
|
||||
!MESSAGE NMAKE /f "ulxfrm.mak" CFG="ulxfrm - Win32 Debug"
|
||||
!MESSAGE
|
||||
!MESSAGE Possible choices for configuration are:
|
||||
!MESSAGE
|
||||
!MESSAGE "ulxfrm - Win32 Release" (based on "Win32 (x86) Console Application")
|
||||
!MESSAGE "ulxfrm - Win32 Debug" (based on "Win32 (x86) Console Application")
|
||||
!MESSAGE
|
||||
|
||||
# Begin Project
|
||||
# PROP AllowPerConfigDependencies 0
|
||||
# PROP Scc_ProjName ""
|
||||
# PROP Scc_LocalPath ""
|
||||
CPP=cl.exe
|
||||
RSC=rc.exe
|
||||
|
||||
!IF "$(CFG)" == "ulxfrm - Win32 Release"
|
||||
|
||||
# PROP BASE Use_MFC 0
|
||||
# PROP BASE Use_Debug_Libraries 0
|
||||
# PROP BASE Output_Dir "Release"
|
||||
# PROP BASE Intermediate_Dir "Release"
|
||||
# PROP BASE Target_Dir ""
|
||||
# PROP Use_MFC 0
|
||||
# PROP Use_Debug_Libraries 0
|
||||
# PROP Output_Dir "Release"
|
||||
# PROP Intermediate_Dir "Release"
|
||||
# PROP Ignore_Export_Lib 0
|
||||
# PROP Target_Dir ""
|
||||
# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
|
||||
# ADD CPP /nologo /W3 /GX /O2 /I "..\..\..\include" /I "..\..\..\source\common" /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
|
||||
# ADD BASE RSC /l 0x409 /d "NDEBUG"
|
||||
# ADD RSC /l 0x409 /d "NDEBUG"
|
||||
BSC32=bscmake.exe
|
||||
# ADD BASE BSC32 /nologo
|
||||
# ADD BSC32 /nologo
|
||||
LINK32=link.exe
|
||||
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386
|
||||
# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386
|
||||
|
||||
!ELSEIF "$(CFG)" == "ulxfrm - Win32 Debug"
|
||||
|
||||
# PROP BASE Use_MFC 0
|
||||
# PROP BASE Use_Debug_Libraries 1
|
||||
# PROP BASE Output_Dir "Debug"
|
||||
# PROP BASE Intermediate_Dir "Debug"
|
||||
# PROP BASE Target_Dir ""
|
||||
# PROP Use_MFC 0
|
||||
# PROP Use_Debug_Libraries 1
|
||||
# PROP Output_Dir "Debug"
|
||||
# PROP Intermediate_Dir "Debug"
|
||||
# PROP Ignore_Export_Lib 0
|
||||
# PROP Target_Dir ""
|
||||
# ADD BASE CPP /nologo /W3 /Gm /GX /Zi /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
|
||||
# ADD CPP /nologo /W3 /Gm /GX /ZI /Od /I "..\..\..\include" /I "..\..\..\source\common" /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
|
||||
# ADD BASE RSC /l 0x409 /d "_DEBUG"
|
||||
# ADD RSC /l 0x409 /d "_DEBUG"
|
||||
BSC32=bscmake.exe
|
||||
# ADD BASE BSC32 /nologo
|
||||
# ADD BSC32 /nologo
|
||||
LINK32=link.exe
|
||||
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept
|
||||
# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept
|
||||
|
||||
!ENDIF
|
||||
|
||||
# Begin Target
|
||||
|
||||
# Name "ulxfrm - Win32 Release"
|
||||
# Name "ulxfrm - Win32 Debug"
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\ucmp16.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\ucmp16.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\ulxfrm.cpp
|
||||
# End Source File
|
||||
# End Target
|
||||
# End Project
|
@ -1,29 +0,0 @@
|
||||
Microsoft Developer Studio Workspace File, Format Version 6.00
|
||||
# WARNING: DO NOT EDIT OR DELETE THIS WORKSPACE FILE!
|
||||
|
||||
###############################################################################
|
||||
|
||||
Project: "ulxfrm"=.\ulxfrm.dsp - Package Owner=<4>
|
||||
|
||||
Package=<5>
|
||||
{{{
|
||||
}}}
|
||||
|
||||
Package=<4>
|
||||
{{{
|
||||
}}}
|
||||
|
||||
###############################################################################
|
||||
|
||||
Global:
|
||||
|
||||
Package=<5>
|
||||
{{{
|
||||
}}}
|
||||
|
||||
Package=<3>
|
||||
{{{
|
||||
}}}
|
||||
|
||||
###############################################################################
|
||||
|
Loading…
Reference in New Issue
Block a user