ICU-867 initial version.
X-SVN-Rev: 3730
This commit is contained in:
parent
131e27bb87
commit
c3231963c4
121
icu4c/source/tools/genuca/Makefile.in
Normal file
121
icu4c/source/tools/genuca/Makefile.in
Normal file
@ -0,0 +1,121 @@
|
||||
## Makefile.in for ICU - tools/genuca
|
||||
## Copyright (c) 1999, 2000, International Business Machines Corporation and
|
||||
## others. All Rights Reserved.
|
||||
|
||||
## Source directory information
|
||||
srcdir = @srcdir@
|
||||
top_srcdir = @top_srcdir@
|
||||
|
||||
top_builddir = ../..
|
||||
|
||||
include $(top_builddir)/icudefs.mk
|
||||
|
||||
## Platform-specific setup
|
||||
|
||||
include @platform_make_fragment@
|
||||
|
||||
##
|
||||
|
||||
SECTION = 8
|
||||
|
||||
MAN_FILES = $(TARGET).$(SECTION)
|
||||
|
||||
## Build directory information
|
||||
subdir = tools/genuca
|
||||
|
||||
## Extra files to remove for 'make clean'
|
||||
CLEANFILES = *~ $(TARGET).$(SECTION) $(DEPS)
|
||||
|
||||
## Target information
|
||||
TARGET = genuca
|
||||
|
||||
ENABLE_STATIC = @ENABLE_STATIC@
|
||||
|
||||
ifneq ($(ENABLE_STATIC),)
|
||||
LINK = $(LINK.cc)
|
||||
else
|
||||
LINK = $(LINK.c)
|
||||
endif
|
||||
|
||||
DEFS = @DEFS@
|
||||
CPPFLAGS = @CPPFLAGS@ -I$(top_builddir)/common -I$(top_srcdir)/common -I$(top_srcdir)/i18n -I$(top_srcdir)/extra/ustdio -I$(srcdir)/../toolutil
|
||||
CFLAGS = @CFLAGS@
|
||||
CXXFLAGS = @CXXFLAGS@
|
||||
ENABLE_RPATH = @ENABLE_RPATH@
|
||||
ifeq ($(ENABLE_RPATH),YES)
|
||||
RPATHLDFLAGS = $(LD_RPATH)$(LD_RPATH_PRE)$(libdir)
|
||||
endif
|
||||
LDFLAGS = @LDFLAGS@ $(RPATHLDFLAGS)
|
||||
LIBS = $(LIBUSTDIO) $(LIBICUI18N) $(LIBICUTOOLUTIL) $(LIBICUUC) @LIBS@ @LIB_M@
|
||||
|
||||
OBJECTS = UCAData.o cnttable.o
|
||||
|
||||
DEPS = $(OBJECTS:.o=.d)
|
||||
|
||||
|
||||
## List of phony targets
|
||||
.PHONY : all all-local install install-local clean clean-local \
|
||||
distclean distclean-local dist dist-local check \
|
||||
check-local install-man
|
||||
|
||||
## Clear suffix list
|
||||
.SUFFIXES :
|
||||
|
||||
## List of standard targets
|
||||
all: all-local
|
||||
install: install-local
|
||||
clean: clean-local
|
||||
distclean : distclean-local
|
||||
dist: dist-local
|
||||
check: all check-local
|
||||
|
||||
all-local: $(TARGET) $(MAN_FILES)
|
||||
|
||||
install-local: all-local install-man
|
||||
$(MKINSTALLDIRS) $(DESTDIR)$(sbindir)
|
||||
$(INSTALL) $(TARGET) $(DESTDIR)$(sbindir)/$(TARGET)
|
||||
|
||||
<dist-local:
|
||||
|
||||
clean-local:
|
||||
test -z "$(CLEANFILES)" || $(RMV) $(CLEANFILES)
|
||||
$(RMV) $(TARGET) $(OBJECTS)
|
||||
|
||||
distclean-local: clean-local
|
||||
$(RMV) Makefile
|
||||
|
||||
check-local: all-local
|
||||
|
||||
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
|
||||
cd $(top_builddir) \
|
||||
&& CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
|
||||
|
||||
$(TARGET) : $(OBJECTS)
|
||||
$(LINK) -o $@ $^ $(LIBS)
|
||||
|
||||
|
||||
# the 'mv' will always fail if you are building in the source dir
|
||||
|
||||
# man page
|
||||
install-man: $(MAN_FILES)
|
||||
$(MKINSTALLDIRS) $(DESTDIR)$(mandir)/man$(SECTION)
|
||||
$(INSTALL_DATA) $< $(DESTDIR)$(mandir)/man$(SECTION)
|
||||
|
||||
$(TARGET).$(SECTION): $(srcdir)/$(TARGET).$(SECTION).in
|
||||
cd $(top_builddir) \
|
||||
&& CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
|
||||
|
||||
# build postscript and pdf formats
|
||||
$(TARGET).ps: $(TARGET).$(SECTION)
|
||||
groff -man < $< > $@
|
||||
|
||||
$(TARGET).pdf: $(TARGET).ps
|
||||
ps2pdf $< $@
|
||||
|
||||
ifeq (,$(MAKECMDGOALS))
|
||||
-include $(DEPS)
|
||||
else
|
||||
ifneq ($(patsubst %clean,,$(MAKECMDGOALS)),)
|
||||
-include $(DEPS)
|
||||
endif
|
||||
endif
|
345
icu4c/source/tools/genuca/cnttable.cpp
Normal file
345
icu4c/source/tools/genuca/cnttable.cpp
Normal file
@ -0,0 +1,345 @@
|
||||
#include "cnttable.h"
|
||||
#include "cmemory.h"
|
||||
|
||||
void uprv_growTable(ContractionTable *tbl, UErrorCode *status) {
|
||||
if(tbl->position == tbl->size) {
|
||||
uint32_t *newData = (uint32_t *)realloc(tbl->CEs, 2*tbl->size*sizeof(uint32_t));
|
||||
UChar *newCPs = (UChar *)realloc(tbl->codePoints, 2*tbl->size*sizeof(UChar));
|
||||
if(newData == NULL || newCPs == NULL) {
|
||||
fprintf(stderr, "out of memory for contractions\n");
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
tbl->CEs = newData;
|
||||
tbl->codePoints = newCPs;
|
||||
tbl->size *= 2;
|
||||
}
|
||||
}
|
||||
|
||||
CntTable *uprv_cnttab_open(CompactIntArray *mapping, UErrorCode *status) {
|
||||
if(U_FAILURE(*status)) {
|
||||
return 0;
|
||||
}
|
||||
CntTable *tbl = (CntTable *)malloc(sizeof(CntTable));
|
||||
tbl->mapping = mapping;
|
||||
//tbl->elements = uhash_open(uhash_hashLong, uhash_compareLong, status);
|
||||
//uhash_setValueDeleter(tbl->elements, deleteCntElement);
|
||||
tbl->elements = (ContractionTable **)malloc(INIT_EXP_TABLE_SIZE*sizeof(ContractionTable *));
|
||||
tbl->capacity = INIT_EXP_TABLE_SIZE;
|
||||
memset(tbl->elements, 0, INIT_EXP_TABLE_SIZE*sizeof(ContractionTable *));
|
||||
tbl->size = 0;
|
||||
tbl->position = 0;
|
||||
tbl->CEs = NULL;
|
||||
tbl->codePoints = NULL;
|
||||
tbl->offsets = NULL;
|
||||
return tbl;
|
||||
}
|
||||
|
||||
ContractionTable *addATableElement(CntTable *table, uint32_t *key, UErrorCode *status) {
|
||||
ContractionTable *el = (ContractionTable *)malloc(sizeof(ContractionTable));
|
||||
el->CEs = (uint32_t *)malloc(INIT_EXP_TABLE_SIZE*sizeof(uint32_t));
|
||||
el->codePoints = (UChar *)malloc(INIT_EXP_TABLE_SIZE*sizeof(UChar));
|
||||
el->position = 0;
|
||||
el->size = INIT_EXP_TABLE_SIZE;
|
||||
el->forward = TRUE;
|
||||
memset(el->CEs, 'F', INIT_EXP_TABLE_SIZE*sizeof(uint32_t));
|
||||
memset(el->codePoints, 'F', INIT_EXP_TABLE_SIZE*sizeof(UChar));
|
||||
|
||||
el->reversed = (ContractionTable *)malloc(sizeof(ContractionTable));
|
||||
el->reversed->CEs = (uint32_t *)malloc(INIT_EXP_TABLE_SIZE*sizeof(uint32_t));
|
||||
el->reversed->codePoints = (UChar *)malloc(INIT_EXP_TABLE_SIZE*sizeof(UChar));
|
||||
el->reversed->position = 0;
|
||||
el->reversed->size = INIT_EXP_TABLE_SIZE;
|
||||
el->reversed->forward = FALSE;
|
||||
memset(el->reversed->CEs, 'R', INIT_EXP_TABLE_SIZE*sizeof(uint32_t));
|
||||
memset(el->reversed->codePoints, 'R', INIT_EXP_TABLE_SIZE*sizeof(UChar));
|
||||
|
||||
table->elements[table->size] = el;
|
||||
|
||||
//uhash_put(table->elements, (void *)table->size, el, status);
|
||||
|
||||
*key = table->size++;
|
||||
|
||||
if(table->size > table->capacity) {
|
||||
// do realloc
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
|
||||
return el;
|
||||
}
|
||||
|
||||
|
||||
int32_t uprv_cnttab_constructTable(CntTable *table, uint32_t mainOffset, UErrorCode *status) {
|
||||
if(U_FAILURE(*status)) {
|
||||
return 0;
|
||||
}
|
||||
int32_t i = 0, j = 0;
|
||||
|
||||
table->position = 0;
|
||||
|
||||
if(table->offsets != NULL) {
|
||||
free(table->offsets);
|
||||
}
|
||||
table->offsets = (int32_t *)malloc(table->size*sizeof(int32_t));
|
||||
|
||||
|
||||
/* See how much memory we need */
|
||||
for(i = 0; i<table->size; i++) {
|
||||
table->offsets[i] = table->position+mainOffset;
|
||||
table->position += table->elements[i]->position;
|
||||
if(table->elements[i]->reversed->position > 0) {
|
||||
table->elements[i]->codePoints[0] = table->elements[i]->position; /* set offset for backwards table */
|
||||
table->position += table->elements[i]->reversed->position-1;
|
||||
}
|
||||
}
|
||||
|
||||
/* Allocate it */
|
||||
if(table->CEs != NULL) {
|
||||
free(table->CEs);
|
||||
}
|
||||
table->CEs = (uint32_t *)malloc(table->position*sizeof(uint32_t));
|
||||
memset(table->CEs, '?', table->position*sizeof(uint32_t));
|
||||
if(table->codePoints != NULL) {
|
||||
free(table->codePoints);
|
||||
}
|
||||
table->codePoints = (UChar *)malloc(table->position*sizeof(UChar));
|
||||
memset(table->codePoints, '?', table->position*sizeof(UChar));
|
||||
|
||||
/* Now stuff the things in*/
|
||||
|
||||
UChar *cpPointer = table->codePoints;
|
||||
uint32_t *CEPointer = table->CEs;
|
||||
for(i = 0; i<table->size; i++) {
|
||||
int32_t size = table->elements[i]->position;
|
||||
memcpy(cpPointer, table->elements[i]->codePoints, size*sizeof(UChar));
|
||||
memcpy(CEPointer, table->elements[i]->CEs, size*sizeof(uint32_t));
|
||||
for(j = 0; j<size; j++) {
|
||||
if(isContraction(*(CEPointer+j))) {
|
||||
*(CEPointer+j) = constructContractCE(table->offsets[getContractOffset(*(CEPointer+j))]);
|
||||
}
|
||||
}
|
||||
cpPointer += size;
|
||||
CEPointer += size;
|
||||
if(table->elements[i]->reversed->position-1 > 0) {
|
||||
int32_t size2 = table->elements[i]->reversed->position-1;
|
||||
memcpy(cpPointer, (table->elements[i]->reversed->codePoints)+1, size2*sizeof(UChar));
|
||||
memcpy(CEPointer, (table->elements[i]->reversed->CEs)+1, size2*sizeof(uint32_t));
|
||||
for(j = 0; j<size2; j++) {
|
||||
if(isContraction(*(CEPointer+j))) {
|
||||
*(CEPointer+j) = constructContractCE(table->offsets[getContractOffset(*(CEPointer+j))]);
|
||||
}
|
||||
}
|
||||
cpPointer += size2;
|
||||
CEPointer += size2;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
uint32_t CE;
|
||||
for(i = 0; i<=0xFFFF; i++) {
|
||||
CE = ucmp32_get(table->mapping, i);
|
||||
if(isContraction(CE)) {
|
||||
CE = constructContractCE(table->offsets[getContractOffset(CE)]);
|
||||
ucmp32_set(table->mapping, i, CE);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return table->position;
|
||||
}
|
||||
|
||||
void uprv_cnttab_close(CntTable *table) {
|
||||
int32_t i = 0;
|
||||
for(i = 0; i<table->size; i++) {
|
||||
free(table->elements[i]->reversed->CEs);
|
||||
free(table->elements[i]->reversed->codePoints);
|
||||
free(table->elements[i]->reversed);
|
||||
free(table->elements[i]->CEs);
|
||||
free(table->elements[i]->codePoints);
|
||||
free(table->elements[i]);
|
||||
}
|
||||
free(table->CEs);
|
||||
free(table->offsets);
|
||||
free(table->codePoints);
|
||||
free(table);
|
||||
}
|
||||
|
||||
/* this is for adding non contractions */
|
||||
uint32_t uprv_cnttab_changeLastCE(CntTable *table, uint32_t element, uint32_t value, UBool forward, UErrorCode *status) {
|
||||
element &= 0xFFFFFF;
|
||||
|
||||
ContractionTable *tbl = NULL;
|
||||
if(U_FAILURE(*status)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if((element == 0xFFFFFF) || (tbl = table->elements[element]) == NULL) {
|
||||
tbl = addATableElement(table, &element, status);
|
||||
}
|
||||
|
||||
if(forward == TRUE) {
|
||||
tbl->CEs[tbl->position-1] = value;
|
||||
} else {
|
||||
tbl->reversed->CEs[tbl->reversed->position-1] = value;
|
||||
}
|
||||
|
||||
return(constructContractCE(element));
|
||||
}
|
||||
|
||||
|
||||
/* inserts a part of contraction sequence in table. Sequences behind the offset are moved back. If element is non existent, it creates on. Returns element handle */
|
||||
uint32_t uprv_cnttab_insertContraction(CntTable *table, uint32_t element, UChar codePoint, uint32_t value, UBool forward, UErrorCode *status) {
|
||||
|
||||
element &= 0xFFFFFF;
|
||||
ContractionTable *tbl = NULL;
|
||||
|
||||
if(U_FAILURE(*status)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if((element == 0xFFFFFF) || (tbl = table->elements[element]) == NULL) {
|
||||
tbl = addATableElement(table, &element, status);
|
||||
}
|
||||
|
||||
if(forward == FALSE) {
|
||||
tbl = tbl->reversed;
|
||||
}
|
||||
|
||||
uprv_growTable(tbl, status);
|
||||
|
||||
int32_t offset = 0;
|
||||
|
||||
|
||||
while(tbl->codePoints[offset] < codePoint && offset<tbl->position) {
|
||||
offset++;
|
||||
}
|
||||
|
||||
int32_t i = tbl->position;
|
||||
for(i = tbl->position; i > offset; i--) {
|
||||
tbl->CEs[i] = tbl->CEs[i-1];
|
||||
tbl->codePoints[i] = tbl->codePoints[i-1];
|
||||
}
|
||||
|
||||
tbl->CEs[offset] = value;
|
||||
tbl->codePoints[offset] = codePoint;
|
||||
|
||||
tbl->position++;
|
||||
|
||||
return(constructContractCE(element));
|
||||
}
|
||||
|
||||
|
||||
/* adds more contractions in table. If element is non existant, it creates on. Returns element handle */
|
||||
uint32_t uprv_cnttab_addContraction(CntTable *table, uint32_t element, UChar codePoint, uint32_t value, UBool forward, UErrorCode *status) {
|
||||
|
||||
element &= 0xFFFFFF;
|
||||
|
||||
ContractionTable *tbl = NULL;
|
||||
|
||||
if(U_FAILURE(*status)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if((element == 0xFFFFFF) || (tbl = table->elements[element]) == NULL) {
|
||||
tbl = addATableElement(table, &element, status);
|
||||
}
|
||||
|
||||
if(forward == FALSE) {
|
||||
tbl = tbl->reversed;
|
||||
}
|
||||
|
||||
uprv_growTable(tbl, status);
|
||||
|
||||
tbl->CEs[tbl->position] = value;
|
||||
tbl->codePoints[tbl->position] = codePoint;
|
||||
|
||||
tbl->position++;
|
||||
|
||||
return(constructContractCE(element));
|
||||
}
|
||||
|
||||
/* sets a part of contraction sequence in table. If element is non existant, it creates on. Returns element handle */
|
||||
uint32_t uprv_cnttab_setContraction(CntTable *table, uint32_t element, int32_t offset, UChar codePoint, uint32_t value, UBool forward, UErrorCode *status) {
|
||||
|
||||
element &= 0xFFFFFF;
|
||||
ContractionTable *tbl = NULL;
|
||||
|
||||
if(U_FAILURE(*status)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if((element == 0xFFFFFF) || (tbl = table->elements[element]) == NULL) {
|
||||
tbl = addATableElement(table, &element, status);
|
||||
}
|
||||
|
||||
if(forward == FALSE) {
|
||||
tbl = tbl->reversed;
|
||||
}
|
||||
|
||||
if(offset >= tbl->size) {
|
||||
*status = U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return 0;
|
||||
}
|
||||
tbl->CEs[offset] = value;
|
||||
tbl->codePoints[offset] = codePoint;
|
||||
|
||||
//return(offset);
|
||||
return(constructContractCE(element));
|
||||
}
|
||||
|
||||
uint32_t uprv_cnttab_findCP(CntTable *table, uint32_t element, UChar codePoint, UBool forward, UErrorCode *status) {
|
||||
|
||||
element &= 0xFFFFFF;
|
||||
ContractionTable *tbl = NULL;
|
||||
|
||||
if(U_FAILURE(*status)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if((element == 0xFFFFFF) || (tbl = table->elements[element]) == NULL) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if(forward == FALSE) {
|
||||
tbl = tbl->reversed;
|
||||
}
|
||||
|
||||
uint32_t position = 0;
|
||||
|
||||
while(codePoint > tbl->codePoints[position]) {
|
||||
position++;
|
||||
if(position > tbl->position) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
if (codePoint == tbl->codePoints[position]) {
|
||||
return position;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t uprv_cnttab_getCE(CntTable *table, uint32_t element, uint32_t position, UBool forward, UErrorCode *status) {
|
||||
|
||||
element &= 0xFFFFFF;
|
||||
ContractionTable *tbl = NULL;
|
||||
|
||||
if(U_FAILURE(*status)) {
|
||||
return UCOL_NOT_FOUND;
|
||||
}
|
||||
|
||||
if((element == 0xFFFFFF) || (tbl = table->elements[element]) == NULL) {
|
||||
return UCOL_NOT_FOUND;
|
||||
}
|
||||
|
||||
if(forward == FALSE) {
|
||||
tbl = tbl->reversed;
|
||||
}
|
||||
|
||||
|
||||
if(position > tbl->position) {
|
||||
return UCOL_NOT_FOUND;
|
||||
} else {
|
||||
return tbl->CEs[position];
|
||||
}
|
||||
}
|
35
icu4c/source/tools/genuca/cnttable.h
Normal file
35
icu4c/source/tools/genuca/cnttable.h
Normal file
@ -0,0 +1,35 @@
|
||||
#ifndef UCOL_CNTTABLE_H
|
||||
#define UCOL_CNTTABLE_H
|
||||
|
||||
#include "uhash.h"
|
||||
#include "UCAData.h"
|
||||
|
||||
typedef struct {
|
||||
ContractionTable **elements;
|
||||
CompactIntArray *mapping;
|
||||
UChar *codePoints;
|
||||
uint32_t *CEs;
|
||||
int32_t *offsets;
|
||||
int32_t position;
|
||||
int32_t size;
|
||||
int32_t capacity;
|
||||
} CntTable;
|
||||
|
||||
CntTable *uprv_cnttab_open(CompactIntArray *mapping, UErrorCode *status);
|
||||
/* construct the table for output */
|
||||
int32_t uprv_cnttab_constructTable(CntTable *table, uint32_t mainOffset, UErrorCode *status);
|
||||
void uprv_cnttab_close(CntTable *table);
|
||||
/* adds more contractions in table. If element is non existant, it creates on. Returns element handle */
|
||||
uint32_t uprv_cnttab_addContraction(CntTable *table, uint32_t element, UChar codePoint, uint32_t value, UBool forward, UErrorCode *status);
|
||||
/* sets a part of contraction sequence in table. If element is non existant, it creates on. Returns element handle */
|
||||
uint32_t uprv_cnttab_setContraction(CntTable *table, uint32_t element, int32_t offset, UChar codePoint, uint32_t value, UBool forward, UErrorCode *status);
|
||||
/* inserts a part of contraction sequence in table. Sequences behind the offset are moved back. If element is non existant, it creates on. Returns element handle */
|
||||
uint32_t uprv_cnttab_insertContraction(CntTable *table, uint32_t element, UChar codePoint, uint32_t value, UBool forward, UErrorCode *status);
|
||||
/* this is for adding non contractions */
|
||||
uint32_t uprv_cnttab_changeLastCE(CntTable *table, uint32_t element, uint32_t value, UBool forward, UErrorCode *status);
|
||||
|
||||
uint32_t uprv_cnttab_findCP(CntTable *table, uint32_t element, UChar codePoint, UBool forward, UErrorCode *status);
|
||||
|
||||
uint32_t uprv_cnttab_getCE(CntTable *table, uint32_t element, uint32_t position, UBool forward, UErrorCode *status);
|
||||
|
||||
#endif
|
95
icu4c/source/tools/genuca/genuca.8.in
Normal file
95
icu4c/source/tools/genuca/genuca.8.in
Normal file
@ -0,0 +1,95 @@
|
||||
.\" Hey, Emacs! This is -*-nroff-*- you know...
|
||||
.\"
|
||||
.\" genuca.8: manual page for the genuca utility
|
||||
.\"
|
||||
.\" Copyright (C) 2000 IBM, Inc. and others.
|
||||
.\"
|
||||
.TH GENUCA 8 "22 February 2001" "ICU MANPAGE" "ICU @VERSION@ Manual"
|
||||
.SH NAME
|
||||
.B genuca
|
||||
\- create the UCA data table
|
||||
.SH SYNOPSIS
|
||||
.B genuca
|
||||
[
|
||||
.BR "\-V\fP, \fB\-\-version"
|
||||
]
|
||||
[
|
||||
.BR "\-h\fP, \fB\-?\fP, \fB\-\-help"
|
||||
]
|
||||
[
|
||||
.BR "\-v\fP, \fB\-\-verbose"
|
||||
]
|
||||
[
|
||||
.BI "\-s\fP, \fB\-\-sourcedir" " source"
|
||||
]
|
||||
[
|
||||
.BI "\-d\fP, \fB\-\-destdir" " destination"
|
||||
]
|
||||
.IR bundle " \.\.\."
|
||||
.SH DESCRIPTION
|
||||
.B genuca
|
||||
create the UCA data table
|
||||
.I bundle
|
||||
source files passed on the command line to their binary form.
|
||||
The resulting files have a
|
||||
.B .res
|
||||
extension while resource bundle source files typically have a
|
||||
.B .txt
|
||||
extension.
|
||||
The
|
||||
.I bundle
|
||||
file name should be a local identifier, e.g.
|
||||
.B ja_JP.txt
|
||||
for Japanese (Japan) data, or
|
||||
.B root.txt
|
||||
for the root bundle.
|
||||
.PP
|
||||
These binary files can then be read directly by ICU, or used by
|
||||
.BR pkgdata (8)
|
||||
for incorporation into a larger archive or library.
|
||||
.SH OPTIONS
|
||||
.TP
|
||||
.BR \-V\fP, \fB\-\-version
|
||||
Print the version of
|
||||
.B genuca
|
||||
and exit.
|
||||
.TP
|
||||
.BR \-h\fP, \fB\-?\fP, \fB\-\-help
|
||||
Print help about usage and exit.
|
||||
.TP
|
||||
.BR \-v\fP, \fB\-\-verbose
|
||||
Display extra informative messages during execution.
|
||||
.TP
|
||||
.BI "\-s\fP, \fB\-\-sourcedir" " source"
|
||||
Set the source directory to
|
||||
.IR source .
|
||||
The default source directory is specified by the environment variable
|
||||
.BR ICU_DATA .
|
||||
.TP
|
||||
.BI "\-d\fP, \fB\-\-destdir" " destination"
|
||||
Set the destination directory to
|
||||
.IR destination .
|
||||
The default destination directory is specified by the environment variable
|
||||
.BR ICU_DATA .
|
||||
.SH INVARIANT CHARACTERS
|
||||
The
|
||||
.B invariant character set
|
||||
consists of the following set of characters, expressed as a standard POSIX
|
||||
regular expression:
|
||||
.BR "[a-z]|[A-Z]|[0-9]|_| |+|-|*|/" .
|
||||
This is the set which is guaranteed to be available regardless of code page.
|
||||
.SH ENVIRONMENT
|
||||
.TP 10
|
||||
.B ICU_DATA
|
||||
Specifies the directory containing ICU data. Defaults to
|
||||
.BR @thedatadir@/icu/@VERSION@/ .
|
||||
Some tools in ICU depend on the presence of the trailing slash. It is thus
|
||||
important to make sure that it is present if
|
||||
.B ICU_DATA
|
||||
is set.
|
||||
.SH VERSION
|
||||
@VERSION@
|
||||
.SH COPYRIGHT
|
||||
Copyright (C) 2001 IBM, Inc. and others.
|
||||
.SH SEE ALSO
|
||||
.BR pkgdata (8)
|
956
icu4c/source/tools/genuca/genuca.cpp
Normal file
956
icu4c/source/tools/genuca/genuca.cpp
Normal file
@ -0,0 +1,956 @@
|
||||
#include "UCAData.h"
|
||||
#include "cnttable.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
ExpansionTable expansions;
|
||||
CntTable *contractions;
|
||||
CompactIntArray *mapping = NULL;
|
||||
/*UHashtable *elements = NULL;*/
|
||||
UCAElements le;
|
||||
|
||||
void deleteElement(void *element) {
|
||||
UCAElements *el = (UCAElements *)element;
|
||||
/*
|
||||
int32_t i = 0;
|
||||
for(i = 0; i < el->noOfCEs; i++) {
|
||||
free(el->primary[i]);
|
||||
free(el->secondary[i]);
|
||||
free(el->tertiary[i]);
|
||||
}
|
||||
*/
|
||||
//free(el);
|
||||
}
|
||||
|
||||
int32_t readElement(char **from, char *to, char separator, UErrorCode *status) {
|
||||
if(U_FAILURE(*status)) {
|
||||
return 0;
|
||||
}
|
||||
char buffer[1024];
|
||||
int32_t i = 0;
|
||||
while(**from != separator) {
|
||||
if(**from != ' ') {
|
||||
*(buffer+i++) = **from;
|
||||
}
|
||||
(*from)++;
|
||||
}
|
||||
(*from)++;
|
||||
*(buffer + i) = 0;
|
||||
//*to = (char *)malloc(strlen(buffer)+1);
|
||||
strcpy(to, buffer);
|
||||
return i/2;
|
||||
}
|
||||
|
||||
|
||||
uint32_t getSingleCEValue(char *primary, char *secondary, char *tertiary, UBool caseBit, UErrorCode *status) {
|
||||
if(U_FAILURE(*status)) {
|
||||
return 0;
|
||||
}
|
||||
uint32_t value = 0;
|
||||
char primsave = '\0';
|
||||
char secsave = '\0';
|
||||
char tersave = '\0';
|
||||
char *primend = primary+4;
|
||||
if(strlen(primary) > 4) {
|
||||
primsave = *primend;
|
||||
*primend = '\0';
|
||||
}
|
||||
char *secend = secondary+2;
|
||||
if(strlen(secondary) > 2) {
|
||||
secsave = *secend;
|
||||
*secend = '\0';
|
||||
}
|
||||
char *terend = tertiary+2;
|
||||
if(strlen(tertiary) > 2) {
|
||||
tersave = *terend;
|
||||
*terend = '\0';
|
||||
}
|
||||
uint32_t primvalue = (*primary!='\0')?strtoul(primary, &primend, 16):0;
|
||||
uint32_t secvalue = (*secondary!='\0')?strtoul(secondary, &secend, 16):0;
|
||||
uint32_t tervalue = (*tertiary!='\0')?strtoul(tertiary, &terend, 16):0;
|
||||
if(primvalue <= 0xFF) {
|
||||
primvalue <<= 8;
|
||||
}
|
||||
|
||||
value = ((primvalue<<UCOL_PRIMARYORDERSHIFT)&UCOL_PRIMARYORDERMASK)|
|
||||
((secvalue<<UCOL_SECONDARYORDERSHIFT)&UCOL_SECONDARYORDERMASK)|
|
||||
(tervalue&UCOL_TERTIARYORDERMASK);
|
||||
|
||||
// This CE is not special at all... a very uninteresting one...
|
||||
value &= 0xFFFFFF7F;
|
||||
|
||||
// Here's case handling!
|
||||
if(caseBit == TRUE) {
|
||||
value |= 0x40; // 0100 0000 set case bit
|
||||
} else {
|
||||
value &= 0xFFFFFFBF; // ... 1011 1111 (reset case bit)
|
||||
}
|
||||
if(primsave!='\0') {
|
||||
*primend = primsave;
|
||||
}
|
||||
if(secsave!='\0') {
|
||||
*secend = secsave;
|
||||
}
|
||||
if(tersave!='\0') {
|
||||
*terend = tersave;
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
|
||||
UCAElements *copyUCAElement(UCAElements *that) {
|
||||
UCAElements *r = (UCAElements *)malloc(sizeof(*that));
|
||||
memcpy(r, that, sizeof(*that));
|
||||
return r;
|
||||
}
|
||||
|
||||
void releaseUCACopy(UCAElements *r) {
|
||||
free(r);
|
||||
}
|
||||
|
||||
uint32_t processContraction(UCAElements *element, uint32_t existingCE, UBool forward, UErrorCode *status) {
|
||||
if(U_FAILURE(*status)) {
|
||||
return UCOL_NOT_FOUND;
|
||||
}
|
||||
int32_t i = 0;
|
||||
UBool gotContractionOffset = FALSE;
|
||||
int32_t firstContractionOffset = 0;
|
||||
int32_t contractionOffset = 0;
|
||||
uint32_t contractionElement = UCOL_NOT_FOUND;
|
||||
|
||||
/* end of recursion */
|
||||
if(element->cSize == 1) {
|
||||
return element->mapCE;
|
||||
}
|
||||
|
||||
/* this recursion currently feeds on the only element we have... We will have to copy it in order to accomodate */
|
||||
/* for both backward and forward cycles */
|
||||
|
||||
/* we encountered either an empty space or a non-contraction element */
|
||||
/* this means we are constructing a new contraction sequence */
|
||||
if(existingCE == UCOL_NOT_FOUND || !isContraction(existingCE)) {
|
||||
/* if it wasn't contraction, we wouldn't end up here*/
|
||||
firstContractionOffset = uprv_cnttab_addContraction(contractions, -1, 0, existingCE, forward, status);
|
||||
if(forward == FALSE) {
|
||||
uprv_cnttab_addContraction(contractions, firstContractionOffset, 0, existingCE, TRUE, status);
|
||||
uprv_cnttab_addContraction(contractions, firstContractionOffset, 0xFFFF, existingCE, TRUE, status);
|
||||
}
|
||||
|
||||
UChar toAdd = element->cPoints[1];
|
||||
element->cPoints++;
|
||||
element->cSize--;
|
||||
uint32_t newCE = processContraction(element, UCOL_NOT_FOUND, forward, status);
|
||||
element->cPoints--;
|
||||
element->cSize++;
|
||||
contractionOffset = uprv_cnttab_addContraction(contractions, firstContractionOffset, toAdd, newCE, forward, status);
|
||||
contractionOffset = uprv_cnttab_addContraction(contractions, firstContractionOffset, 0xFFFF, existingCE, forward, status);
|
||||
contractionElement = constructContractCE(firstContractionOffset);
|
||||
return contractionElement;
|
||||
} else { /* we are adding to existing contraction */
|
||||
/* there were already some elements in the table, so we need to add a new contraction */
|
||||
/* Two things can happen here: either the codepoint is already in the table, or it is not */
|
||||
uint32_t position = uprv_cnttab_findCP(contractions, existingCE, *(element->cPoints+1), forward, status);
|
||||
element->cPoints++;
|
||||
element->cSize--;
|
||||
if(position != 0) { /* if it is we just continue down the chain */
|
||||
uint32_t eCE = uprv_cnttab_getCE(contractions, existingCE, position, forward, status);
|
||||
uint32_t newCE = processContraction(element, eCE, forward, status);
|
||||
uprv_cnttab_setContraction(contractions, existingCE, position, *(element->cPoints), newCE, forward, status);
|
||||
} else { /* if it isn't, we will have to create a new sequence */
|
||||
uint32_t newCE = processContraction(element, UCOL_NOT_FOUND, forward, status);
|
||||
uprv_cnttab_insertContraction(contractions, existingCE, *(element->cPoints), newCE, forward, status);
|
||||
}
|
||||
element->cPoints--;
|
||||
element->cSize++;
|
||||
return existingCE;
|
||||
}
|
||||
}
|
||||
|
||||
int32_t addExpansion(uint32_t value, UErrorCode *status) {
|
||||
if(U_FAILURE(*status)) {
|
||||
return 0;
|
||||
}
|
||||
if(expansions.CEs == NULL) {
|
||||
expansions.CEs = (uint32_t *)malloc(INIT_EXP_TABLE_SIZE*sizeof(uint32_t));
|
||||
expansions.size = INIT_EXP_TABLE_SIZE;
|
||||
expansions.position = 0;
|
||||
}
|
||||
|
||||
if(expansions.position == expansions.size) {
|
||||
uint32_t *newData = (uint32_t *)realloc(expansions.CEs, 2*expansions.size*sizeof(uint32_t));
|
||||
if(newData == NULL) {
|
||||
fprintf(stderr, "out of memory for expansions\n");
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return -1;
|
||||
}
|
||||
expansions.CEs = newData;
|
||||
expansions.size *= 2;
|
||||
}
|
||||
|
||||
expansions.CEs[expansions.position] = value;
|
||||
return(expansions.position++);
|
||||
}
|
||||
|
||||
uint32_t inverseTable[0xFFFF][3];
|
||||
uint32_t inversePos = 0;
|
||||
/*UChar *stringContinue[0xFFFF];*/
|
||||
UChar stringContinue[0xFFFF];
|
||||
uint32_t stringContSize[0xFFFF];
|
||||
uint32_t sContPos = 0;
|
||||
uint32_t contSize = 0;
|
||||
|
||||
#define UCOL_INV_SIZEMASK 0xFFF00000
|
||||
#define UCOL_INV_OFFSETMASK 0x000FFFFF
|
||||
#define UCOL_INV_SHIFTVALUE 20
|
||||
|
||||
void addNewInverse(UCAElements *element, UErrorCode *status) {
|
||||
|
||||
if(isContinuation(element->CEs[1])) {
|
||||
fprintf(stderr, "+");
|
||||
}
|
||||
inversePos++;
|
||||
inverseTable[inversePos][0] = element->CEs[0];
|
||||
if(element->noOfCEs > 1 && isContinuation(element->CEs[1])) {
|
||||
inverseTable[inversePos][1] = element->CEs[1];
|
||||
}
|
||||
if(element->cSize < 2) {
|
||||
inverseTable[inversePos][2] = element->cPoints[0];
|
||||
} else { /* add a new store of cruft */
|
||||
inverseTable[inversePos][2] = ((element->cSize+1) << UCOL_INV_SHIFTVALUE) | sContPos;
|
||||
memcpy(stringContinue+sContPos, element->cPoints, element->cSize*sizeof(UChar));
|
||||
sContPos += element->cSize+1;
|
||||
}
|
||||
}
|
||||
|
||||
void addToExistingInverse(UCAElements *element, uint32_t position, UErrorCode *status) {
|
||||
|
||||
if((inverseTable[position][2] & UCOL_INV_SIZEMASK) == 0) { /* single element, have to make new extension place and put both guys there */
|
||||
stringContinue[sContPos] = inverseTable[position][2];
|
||||
inverseTable[position][2] = ((element->cSize+3) << UCOL_INV_SHIFTVALUE) | sContPos;
|
||||
sContPos++;
|
||||
stringContinue[sContPos++] = 0xFFFF;
|
||||
memcpy(stringContinue+sContPos, element->cPoints, element->cSize*sizeof(UChar));
|
||||
sContPos += element->cSize;
|
||||
stringContinue[sContPos++] = 0xFFFE;
|
||||
} else { /* adding to the already existing continuing table */
|
||||
uint32_t contIndex = inverseTable[position][2] & UCOL_INV_OFFSETMASK;
|
||||
uint32_t contSize = (inverseTable[position][2] & UCOL_INV_SIZEMASK) >> UCOL_INV_SHIFTVALUE;
|
||||
|
||||
if(contIndex+contSize < sContPos) {
|
||||
/*fprintf(stderr, ".", sContPos, contIndex+contSize);*/
|
||||
memcpy(stringContinue+contIndex+contSize+element->cSize+1, stringContinue+contIndex+contSize, (element->cSize+1)*sizeof(UChar));
|
||||
}
|
||||
|
||||
stringContinue[contIndex+contSize-1] = 0xFFFF;
|
||||
memcpy(stringContinue+contIndex+contSize, element->cPoints, element->cSize*sizeof(UChar));
|
||||
sContPos += element->cSize+1;
|
||||
stringContinue[contIndex+contSize+element->cSize] = 0xFFFE;
|
||||
|
||||
inverseTable[position][2] = ((contSize+element->cSize+1) << UCOL_INV_SHIFTVALUE) | contIndex;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t addToInverse(UCAElements *element, UErrorCode *status) {
|
||||
|
||||
if(inverseTable[inversePos][0] > element->CEs[0]) {
|
||||
uint32_t position = inversePos;
|
||||
while(inverseTable[--position][0] > element->CEs[0])
|
||||
addToExistingInverse(element, position, status);
|
||||
} else if(inverseTable[inversePos][0] == element->CEs[0]) {
|
||||
if(element->noOfCEs > 1 && isContinuation(element->CEs[1])
|
||||
&& inverseTable[inversePos][1] != element->CEs[1]) {
|
||||
/* also, we should do long primaries here */
|
||||
addNewInverse(element, status);
|
||||
} else {
|
||||
addToExistingInverse(element, inversePos, status);
|
||||
}
|
||||
} else {
|
||||
addNewInverse(element, status);
|
||||
}
|
||||
return inversePos;
|
||||
}
|
||||
|
||||
InverseTableHeader *assembleInverseTable(UErrorCode *status) {
|
||||
uint32_t i = 0;
|
||||
InverseTableHeader *result = NULL;
|
||||
uint32_t headerByteSize = paddedsize(sizeof(InverseTableHeader));
|
||||
uint32_t inverseTableByteSize = (inversePos+2)*sizeof(uint32_t)*3;
|
||||
uint32_t contsByteSize = sContPos * sizeof(UChar);
|
||||
|
||||
result = (InverseTableHeader *)malloc(headerByteSize + inverseTableByteSize + contsByteSize);
|
||||
if(result != NULL) {
|
||||
result->byteSize = headerByteSize + inverseTableByteSize + contsByteSize;
|
||||
|
||||
inversePos++;
|
||||
inverseTable[inversePos][0] = 0xFFFFFFFF;
|
||||
inverseTable[inversePos][1] = 0xFFFFFFFF;
|
||||
inverseTable[inversePos][2] = 0x0000FFFF;
|
||||
inversePos++;
|
||||
|
||||
result->tableSize = inversePos;
|
||||
result->contsSize = sContPos;
|
||||
|
||||
result->table = headerByteSize;
|
||||
result->conts = headerByteSize + inverseTableByteSize;
|
||||
|
||||
memcpy((uint8_t *)result + result->table, inverseTable, inverseTableByteSize);
|
||||
memcpy((uint8_t *)result + result->conts, stringContinue, contsByteSize);
|
||||
|
||||
} else {
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
{
|
||||
UNewDataMemory *pData;
|
||||
|
||||
long dataLength;
|
||||
|
||||
#ifdef WIN32
|
||||
char *currdir = _getcwd(NULL, 0);
|
||||
#else
|
||||
char *currdir = getcwd(NULL, 0);
|
||||
#endif
|
||||
pData=udata_create(NULL, INVC_DATA_TYPE, INVC_DATA_NAME, &invDataInfo,
|
||||
U_COPYRIGHT_STRING, status);
|
||||
|
||||
if(currdir != NULL) {
|
||||
free(currdir);
|
||||
}
|
||||
|
||||
|
||||
if(U_FAILURE(*status)) {
|
||||
fprintf(stderr, "Error: unable to create data memory, error %d\n", *status);
|
||||
free(result);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* write the data to the file */
|
||||
fprintf(stdout, "Writing out inverse table\n");
|
||||
udata_writeBlock(pData, result, result->byteSize);
|
||||
|
||||
/* finish up */
|
||||
dataLength=udata_finish(pData, status);
|
||||
if(U_FAILURE(*status)) {
|
||||
fprintf(stderr, "Error: error %d writing the output file\n", *status);
|
||||
free(result);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/* This adds a read element, while testing for existence */
|
||||
uint32_t addAnElement(UCAElements *element, UErrorCode *status) {
|
||||
|
||||
uint32_t i = 1, expansion = 0;
|
||||
|
||||
if(U_FAILURE(*status)) {
|
||||
return 0xFFFF;
|
||||
}
|
||||
if(element->noOfCEs == 1) {
|
||||
if(element->isThai == FALSE) {
|
||||
element->mapCE = element->CEs[0];
|
||||
} else { /* add thai - totally bad here */
|
||||
expansion = UCOL_SPECIAL_FLAG | (THAI_TAG<<UCOL_TAG_SHIFT)
|
||||
| ((addExpansion(element->CEs[0], status)+(paddedsize(sizeof(UCATableHeader))>>2))<<4)
|
||||
| 0x1;
|
||||
element->mapCE = expansion;
|
||||
}
|
||||
} else {
|
||||
expansion = UCOL_SPECIAL_FLAG | (EXPANSION_TAG<<UCOL_TAG_SHIFT)
|
||||
| ((addExpansion(element->CEs[0], status)+(paddedsize(sizeof(UCATableHeader))>>2))<<4)
|
||||
& 0xFFFFF0;
|
||||
|
||||
for(i = 1; i<element->noOfCEs; i++) {
|
||||
addExpansion(element->CEs[i], status);
|
||||
}
|
||||
if(element->noOfCEs <= 0xF) {
|
||||
expansion |= element->noOfCEs;
|
||||
} else {
|
||||
addExpansion(0, status);
|
||||
}
|
||||
element->mapCE = expansion;
|
||||
}
|
||||
|
||||
uint32_t CE = ucmp32_get(mapping, element->cPoints[0]);
|
||||
|
||||
if(element->cSize > 1) { /* we're adding a contraction */
|
||||
/* and we need to deal with it */
|
||||
/* we could aready have something in table - or we might not */
|
||||
/* The fact is that we want to add or modify an existing contraction */
|
||||
/* and add it backwards then */
|
||||
uint32_t result = processContraction(element, CE, TRUE, status);
|
||||
if(CE == UCOL_NOT_FOUND || !isContraction(CE)) {
|
||||
ucmp32_set(mapping, element->cPoints[0], result);
|
||||
}
|
||||
/* add the reverse order */
|
||||
reverseElement(element);
|
||||
CE = ucmp32_get(mapping, element->cPoints[0]);
|
||||
result = processContraction(element, CE, FALSE, status);
|
||||
if(CE == UCOL_NOT_FOUND || !isContraction(CE)) {
|
||||
ucmp32_set(mapping, element->cPoints[0], result);
|
||||
}
|
||||
} else { /* easy case, */
|
||||
if( CE != UCOL_NOT_FOUND) {
|
||||
if(isContraction(CE)) { /* adding a non contraction element (thai, expansion, single) to already existing contraction */
|
||||
uprv_cnttab_setContraction(contractions, CE, 0, 0, element->mapCE, TRUE, status);
|
||||
/* This loop has to change the CE at the end of contraction REDO!*/
|
||||
uprv_cnttab_changeLastCE(contractions, CE, element->mapCE, TRUE, status);
|
||||
} else {
|
||||
fprintf(stderr, "Fatal error - trying to overwrite already existing data for codepoint %04X\n", element->cPoints[0]);
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
}
|
||||
} else {
|
||||
ucmp32_set(mapping, element->cPoints[0], element->mapCE);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return CE;
|
||||
}
|
||||
|
||||
int32_t hex2num(char hex) {
|
||||
if(hex>='0' && hex <='9') {
|
||||
return hex-'0';
|
||||
} else if(hex>='a' && hex<='f') {
|
||||
return hex-'a'+10;
|
||||
} else if(hex>='A' && hex<='F') {
|
||||
return hex-'A'+10;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* Here's the fun part: */
|
||||
/* Normal CE produced by getSingleCEValue | 16P | 8S |0|C| 6T | */
|
||||
/* Continuation CE produced by processContinuation | 16P | 8S |1|0| 6T | */
|
||||
/* Long primary, produced by ???? | 24P |1|1| 6S | */
|
||||
|
||||
UCATableHeader *assembleTable(UChar variableTopValue, UErrorCode *status) {
|
||||
if(U_FAILURE(*status)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
uint32_t beforeContractions = (paddedsize(sizeof(UCATableHeader))+paddedsize(expansions.position*sizeof(uint32_t)))/sizeof(UChar);
|
||||
|
||||
int32_t contractionsSize = uprv_cnttab_constructTable(contractions, beforeContractions, status);
|
||||
|
||||
ucmp32_compact(mapping, 1);
|
||||
UMemoryStream *ms = uprv_mstrm_openNew(8192);
|
||||
int32_t mappingSize = ucmp32_flattenMem(mapping, ms);
|
||||
const uint8_t *flattened = uprv_mstrm_getBuffer(ms, &mappingSize);
|
||||
|
||||
uint32_t tableOffset = 0;
|
||||
uint8_t *dataStart;
|
||||
|
||||
int32_t toAllocate = paddedsize(sizeof(UCATableHeader))+paddedsize(expansions.position*sizeof(uint32_t))+paddedsize(mappingSize)+paddedsize(contractionsSize*(sizeof(UChar)+sizeof(uint32_t))+paddedsize(0x100*sizeof(uint32_t)));
|
||||
|
||||
dataStart = (uint8_t *)malloc(toAllocate);
|
||||
UCATableHeader *myData = (UCATableHeader *)dataStart;
|
||||
|
||||
/* Stuff everything with @ */
|
||||
memset(dataStart, '@', toAllocate);
|
||||
|
||||
memset(dataStart+tableOffset, 0, sizeof(UCATableHeader));
|
||||
tableOffset += paddedsize(sizeof(UCATableHeader));
|
||||
|
||||
/* copy expansions */
|
||||
/*myData->expansion = (uint32_t *)dataStart+tableOffset;*/
|
||||
myData->expansion = tableOffset;
|
||||
memcpy(dataStart+tableOffset, expansions.CEs, expansions.position*sizeof(uint32_t));
|
||||
tableOffset += paddedsize(expansions.position*sizeof(uint32_t));
|
||||
|
||||
/* contractions block */
|
||||
/* copy contraction index */
|
||||
/*myData->contractionIndex = (UChar *)(dataStart+tableOffset);*/
|
||||
myData->contractionIndex = tableOffset;
|
||||
memcpy(dataStart+tableOffset, contractions->codePoints, contractionsSize*sizeof(UChar));
|
||||
tableOffset += paddedsize(contractionsSize*sizeof(UChar));
|
||||
|
||||
/* copy contraction collation elements */
|
||||
/*myData->contractionCEs = (uint32_t *)(dataStart+tableOffset);*/
|
||||
myData->contractionCEs = tableOffset;
|
||||
memcpy(dataStart+tableOffset, contractions->CEs, contractionsSize*sizeof(uint32_t));
|
||||
tableOffset += paddedsize(contractionsSize*sizeof(uint32_t));
|
||||
|
||||
/* copy mapping table */
|
||||
/*myData->mappingPosition = dataStart+tableOffset;*/
|
||||
myData->mappingPosition = tableOffset;
|
||||
memcpy(dataStart+tableOffset, flattened, mappingSize);
|
||||
tableOffset += paddedsize(mappingSize);
|
||||
|
||||
/* construct the fast tracker for latin one*/
|
||||
myData->latinOneMapping = tableOffset;
|
||||
uint32_t *store = (uint32_t*)(dataStart+tableOffset);
|
||||
int32_t i = 0;
|
||||
for(i = 0; i<=0xFF; i++) {
|
||||
*(store++) = ucmp32_get(mapping,i);
|
||||
tableOffset+=sizeof(uint32_t);
|
||||
}
|
||||
|
||||
if(tableOffset != toAllocate) {
|
||||
fprintf(stderr, "calculation screwup!!! Expected to write %i but wrote %i instead!!!\n", toAllocate, tableOffset);
|
||||
*status = U_INTERNAL_PROGRAM_ERROR;
|
||||
free(dataStart);
|
||||
return 0;
|
||||
}
|
||||
|
||||
myData->size = tableOffset;
|
||||
myData->variableTopValue = variableTopValue;
|
||||
myData->strength = UCOL_TERTIARY;
|
||||
myData->frenchCollation = UCOL_OFF;
|
||||
myData->alternateHandling = UCOL_SHIFTED; /* attribute for handling variable elements*/
|
||||
myData->caseFirst = UCOL_LOWER_FIRST; /* who goes first, lower case or uppercase */
|
||||
myData->caseLevel = UCOL_OFF; /* do we have an extra case level */
|
||||
myData->normalizationMode = UCOL_ON; /* attribute for normalization */
|
||||
|
||||
|
||||
|
||||
/* This should happen upon ressurection */
|
||||
const uint8_t *mapPosition = (uint8_t*)myData+myData->mappingPosition;
|
||||
myData->mapping = ucmp32_openFromData(&mapPosition, status);
|
||||
return myData;
|
||||
}
|
||||
|
||||
void processFile(FILE *data, UErrorCode *status) {
|
||||
if(U_FAILURE(*status)) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
UCAElements *readAnElement(FILE *data, UErrorCode *status) {
|
||||
char buffer[2048], primary[100], secondary[100], tertiary[100];
|
||||
UBool detectedContraction;
|
||||
int32_t i = 0;
|
||||
char *pointer = NULL;
|
||||
char *commentStart = NULL;
|
||||
char *startCodePoint = NULL;
|
||||
char *endCodePoint = NULL;
|
||||
char *spacePointer = NULL;
|
||||
char *result = fgets(buffer, 2048, data);
|
||||
if(U_FAILURE(*status)) {
|
||||
return 0;
|
||||
}
|
||||
*primary = *secondary = *tertiary = '\0';
|
||||
if(result == NULL) {
|
||||
if(feof(data)) {
|
||||
return NULL;
|
||||
} else {
|
||||
fprintf(stderr, "empty line but no EOF!\n");
|
||||
*status = U_INVALID_FORMAT_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
if(buffer[0] == '#' || buffer[0] == '\n') {
|
||||
return NULL; // just a comment, skip whole line
|
||||
}
|
||||
|
||||
UCAElements *element = ≤ //(UCAElements *)malloc(sizeof(UCAElements));
|
||||
|
||||
if(buffer[0] == '[') {
|
||||
element->variableTop = TRUE;
|
||||
return element; // just a comment, skip whole line
|
||||
}
|
||||
element->variableTop = FALSE;
|
||||
|
||||
startCodePoint = buffer;
|
||||
endCodePoint = strchr(startCodePoint, ';');
|
||||
|
||||
if(endCodePoint == 0) {
|
||||
fprintf(stderr, "error - line with no code point!\n");
|
||||
*status = U_INVALID_FORMAT_ERROR; /* No code point - could be an error, but probably only an empty line */
|
||||
return NULL;
|
||||
} else {
|
||||
*(endCodePoint) = 0;
|
||||
}
|
||||
|
||||
if(element != NULL) {
|
||||
memset(element, 0, sizeof(*element));
|
||||
} else {
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
element->cPoints = element->uchars;
|
||||
|
||||
spacePointer = strchr(buffer, ' ');
|
||||
sscanf(buffer, "%04X", element->cPoints); /* read first code point */
|
||||
element->codepoint = element->cPoints[0];
|
||||
if(spacePointer == 0) {
|
||||
detectedContraction = FALSE;
|
||||
element->cSize = 1;
|
||||
} else {
|
||||
i = 1;
|
||||
detectedContraction = TRUE;
|
||||
while(spacePointer != NULL) {
|
||||
sscanf(spacePointer+1, "%04X", (element->cPoints+i));
|
||||
i++;
|
||||
spacePointer = strchr(spacePointer+1, ' ');
|
||||
}
|
||||
|
||||
element->cSize = i;
|
||||
|
||||
//fprintf(stderr, "Number of codepoints in contraction: %i\n", i);
|
||||
}
|
||||
|
||||
startCodePoint = endCodePoint+1;
|
||||
endCodePoint = strchr(startCodePoint, ';');
|
||||
|
||||
while(*startCodePoint != 'L' && *startCodePoint != 'S') {
|
||||
startCodePoint++;
|
||||
if(startCodePoint == endCodePoint) {
|
||||
*status = U_INVALID_FORMAT_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
if(*startCodePoint == 'S') {
|
||||
element->caseBit = FALSE;
|
||||
} else {
|
||||
element->caseBit = TRUE;
|
||||
}
|
||||
|
||||
startCodePoint = endCodePoint+1;
|
||||
|
||||
commentStart = strchr(startCodePoint, '#');
|
||||
if(commentStart == NULL) {
|
||||
commentStart = strlen(startCodePoint) + startCodePoint;
|
||||
}
|
||||
|
||||
i = 0;
|
||||
uint32_t CEindex = 0;
|
||||
element->noOfCEs = 0;
|
||||
for(;;) {
|
||||
endCodePoint = strchr(startCodePoint, ']');
|
||||
if(endCodePoint == NULL || endCodePoint >= commentStart) {
|
||||
break;
|
||||
}
|
||||
pointer = strchr(startCodePoint, '[');
|
||||
pointer++;
|
||||
|
||||
element->sizePrim[i]=readElement(&pointer, primary, ',', status);
|
||||
element->sizeSec[i]=readElement(&pointer, secondary, ',', status);
|
||||
element->sizeTer[i]=readElement(&pointer, tertiary, ']', status);
|
||||
|
||||
|
||||
/* I want to get the CEs entered right here, including continuation */
|
||||
#if 0
|
||||
if(element->sizePrim[i]==3 &&
|
||||
strtoul(secondary, 0, 16)== UCOL_UNMARKED &&
|
||||
strtoul(tertiary, 0, 16) < 0x40) {
|
||||
/* This is a test for a long primary - secondary has 6 bits and tertiary must be unmarked */
|
||||
/* fprintf(stderr, "Long primary in expansion for 0x%04X\n", element->codepoint);*/
|
||||
element->CEs[CEindex++] = (uint32_t)strtoul(primary, 0, 16) << 8 | 0xC0 | (strtoul(tertiary, 0, 16) & 0x3F);
|
||||
/* Long primary, | 24P |1|1| 6T | */
|
||||
} else {
|
||||
#endif /* we will try to go without long primaries */
|
||||
element->CEs[CEindex++] = getSingleCEValue(primary, secondary, tertiary, element->caseBit, status);
|
||||
|
||||
uint32_t CEi = 1;
|
||||
while(2*CEi<element->sizePrim[i] || CEi<element->sizeSec[i] || CEi<element->sizeTer[i]) {
|
||||
uint32_t value = 0x80; /* Continuation marker */
|
||||
if(2*CEi<element->sizePrim[i]) {
|
||||
value |= ((hex2num(*(primary+4*CEi))&0xF)<<28);
|
||||
value |= ((hex2num(*(primary+4*CEi+1))&0xF)<<24);
|
||||
}
|
||||
|
||||
if(2*CEi+1<element->sizePrim[i]) {
|
||||
value |= ((hex2num(*(primary+4*CEi+2))&0xF)<<20);
|
||||
value |= ((hex2num(*(primary+4*CEi+3))&0xF)<<16);
|
||||
}
|
||||
|
||||
if(CEi<element->sizeSec[i]) {
|
||||
value |= ((hex2num(*(secondary+2*CEi))&0xF)<<12);
|
||||
value |= ((hex2num(*(secondary+2*CEi+1))&0xF)<<8);
|
||||
}
|
||||
|
||||
if(CEi<element->sizeTer[i]) {
|
||||
value |= ((hex2num(*(tertiary+2*CEi))&0x3)<<4);
|
||||
value |= (hex2num(*(tertiary+2*CEi+1))&0xF);
|
||||
}
|
||||
|
||||
CEi++;
|
||||
|
||||
element->CEs[CEindex++] = value;
|
||||
}
|
||||
#if 0
|
||||
}
|
||||
#endif /* part for long primaries */
|
||||
|
||||
uint32_t terValue = strtoul(tertiary+strlen(tertiary)-2, NULL, 16);
|
||||
if(terValue > 0x3F) {
|
||||
fprintf(stderr, "Tertiary value %02X too big for %04X\n", terValue, element->codepoint);
|
||||
}
|
||||
startCodePoint = endCodePoint+1;
|
||||
i++;
|
||||
}
|
||||
element->noOfCEs = CEindex;
|
||||
|
||||
element->isThai = UCOL_ISTHAIPREVOWEL(element->codepoint);
|
||||
|
||||
// we don't want any strange stuff after useful data!
|
||||
while(pointer < commentStart) {
|
||||
if(*pointer != ' ') {
|
||||
*status=U_INVALID_FORMAT_ERROR;
|
||||
break;
|
||||
}
|
||||
*pointer++;
|
||||
}
|
||||
|
||||
/*
|
||||
strcpy(element->comment, commentStart);
|
||||
uhash_put(elements, (void *)element->codepoint, element, status);
|
||||
*/
|
||||
|
||||
if(U_FAILURE(*status)) {
|
||||
fprintf(stderr, "problem putting stuff in hash table\n");
|
||||
*status = U_INTERNAL_PROGRAM_ERROR;
|
||||
free(element);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return element;
|
||||
|
||||
}
|
||||
|
||||
void reverseElement(UCAElements *el) {
|
||||
int32_t i = 0;
|
||||
UChar temp;
|
||||
for(i = 0; i<el->cSize/2; i++) {
|
||||
temp = el->cPoints[i];
|
||||
el->cPoints[i] = el->cPoints[el->cSize-i-1];
|
||||
el->cPoints[el->cSize-i-1] = temp;
|
||||
}
|
||||
el->codepoint = el->cPoints[0];
|
||||
uint32_t tempCE = 0, expansion = 0;
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
if(el->noOfCEs>1) { /* this is an expansion that needs to be reversed and added - also, we need to change the mapValue */
|
||||
for(i = 0; i<el->noOfCEs/2; i++) {
|
||||
tempCE = el->CEs[i];
|
||||
el->CEs[i] = el->CEs[el->noOfCEs-i-1];
|
||||
el->CEs[el->noOfCEs-i-1] = tempCE;
|
||||
}
|
||||
expansion = UCOL_SPECIAL_FLAG | (EXPANSION_TAG<<UCOL_TAG_SHIFT)
|
||||
| ((addExpansion(el->CEs[0], &status)+(paddedsize(sizeof(UCATableHeader))>>2))<<4)
|
||||
& 0xFFFFF0;
|
||||
|
||||
for(i = 1; i<el->noOfCEs; i++) {
|
||||
addExpansion(el->CEs[i], &status);
|
||||
}
|
||||
if(el->noOfCEs <= 0xF) {
|
||||
expansion |= el->noOfCEs;
|
||||
} else {
|
||||
addExpansion(0, &status);
|
||||
}
|
||||
el->mapCE = expansion;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
void writeOutData(UCATableHeader *data, UErrorCode *status) {
|
||||
if(U_FAILURE(*status)) {
|
||||
return;
|
||||
}
|
||||
|
||||
UNewDataMemory *pData;
|
||||
|
||||
long dataLength;
|
||||
|
||||
#ifdef WIN32
|
||||
char *currdir = _getcwd(NULL, 0);
|
||||
#else
|
||||
char *currdir = getcwd(NULL, 0);
|
||||
#endif
|
||||
/*
|
||||
pData=udata_create(getcwd(NULL, 0), UCA_DATA_TYPE, UCA_DATA_NAME, &dataInfo,
|
||||
U_COPYRIGHT_STRING, status);
|
||||
*/
|
||||
pData=udata_create(NULL, UCA_DATA_TYPE, UCA_DATA_NAME, &dataInfo,
|
||||
U_COPYRIGHT_STRING, status);
|
||||
|
||||
if(currdir != NULL) {
|
||||
free(currdir);
|
||||
}
|
||||
|
||||
|
||||
if(U_FAILURE(*status)) {
|
||||
fprintf(stderr, "Error: unable to create data memory, error %d\n", *status);
|
||||
return;
|
||||
}
|
||||
|
||||
/* write the data to the file */
|
||||
fprintf(stdout, "Writing out table\n");
|
||||
udata_writeBlock(pData, data, data->size);
|
||||
|
||||
/* finish up */
|
||||
dataLength=udata_finish(pData, status);
|
||||
if(U_FAILURE(*status)) {
|
||||
fprintf(stderr, "Error: error %d writing the output file\n", *status);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
FILE *data = fopen("FractionalUCA.txt", "r");
|
||||
//FILE *data = fopen("uca30codepointsort.txt", "r");
|
||||
int32_t i = 0, j = 0, k = 0, line = 0, thai = 0;
|
||||
int32_t sizesPrim[35], sizesSec[35], sizesTer[35];
|
||||
int32_t terValue[0xffff], secValue[0xffff];
|
||||
int32_t sizeBreakDown[35][35][35];
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UCAElements *element = NULL;
|
||||
UChar variableTopValue = 0;
|
||||
UBool foundVariableTop = FALSE;
|
||||
|
||||
if(data == NULL) {
|
||||
fprintf(stderr, "Couldn't open file\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
memset(secValue, 0, 0xffff*sizeof(int32_t));
|
||||
memset(terValue, 0, 0xffff*sizeof(int32_t));
|
||||
memset(sizesPrim, 0, 35*sizeof(int32_t));
|
||||
memset(sizesSec, 0, 35*sizeof(int32_t));
|
||||
memset(sizesTer, 0, 35*sizeof(int32_t));
|
||||
memset(sizeBreakDown, 0, 35*35*35*sizeof(int32_t));
|
||||
memset(&expansions, 0, sizeof(expansions));
|
||||
memset(&contractions, 0, sizeof(contractions));
|
||||
memset(inverseTable, 0, sizeof(int32_t)*3*0xFFFF);
|
||||
|
||||
|
||||
mapping = ucmp32_open(UCOL_UNMAPPED);
|
||||
contractions = uprv_cnttab_open(mapping, &status);
|
||||
ucmp32_setRange(mapping, 0, 0xFFFF, UCOL_NOT_FOUND);
|
||||
|
||||
/*
|
||||
elements = uhash_open(uhash_hashLong, uhash_compareLong, &status);
|
||||
|
||||
uhash_setValueDeleter(elements, deleteElement);
|
||||
*/
|
||||
|
||||
if(mapping == NULL) {
|
||||
return(-1);
|
||||
}
|
||||
|
||||
while(!feof(data)) {
|
||||
if(U_FAILURE(status)) {
|
||||
fprintf(stderr, "Something returned an error %i while processing line: %i\nExiting...", status, line);
|
||||
exit(status);
|
||||
}
|
||||
|
||||
element = readAnElement(data, &status);
|
||||
line++;
|
||||
if(element != NULL) {
|
||||
/* this does statistics on CE lengths, but is currently broken */
|
||||
/*
|
||||
for( i = 0; i<element->noOfCEs; i++) {
|
||||
sizesPrim[element->sizePrim[i]]++;
|
||||
sizesSec[element->sizeSec[i]]++;
|
||||
sizesTer[element->sizeTer[i]]++;
|
||||
|
||||
sizeBreakDown[element->sizePrim[i]][element->sizeSec[i]][element->sizeTer[i]]++;
|
||||
|
||||
if(element->sizePrim[i] == 2 && element->sizeSec[i]==2) {
|
||||
terValue[strtoul(element->tertiary[i], 0, 16)]++;
|
||||
secValue[strtoul(element->secondary[i], 0, 16)]++;
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
// we have read the line, now do something sensible with the read data!
|
||||
if(element->variableTop == TRUE) {
|
||||
foundVariableTop = TRUE;
|
||||
continue;
|
||||
}
|
||||
|
||||
if(variableTopValue == 0 && foundVariableTop == TRUE) {
|
||||
variableTopValue = element->cPoints[0];
|
||||
foundVariableTop = FALSE;
|
||||
}
|
||||
|
||||
/* we're first adding to inverse, because addAnElement will reverse the order */
|
||||
/* of code points and stuff... we don't want that to happen */
|
||||
uint32_t invResult = addToInverse(element, &status);
|
||||
uint32_t result = addAnElement(element, &status);
|
||||
//deleteElement(element);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
fprintf(stderr, "Lines read: %i\n", line);
|
||||
|
||||
|
||||
|
||||
/*
|
||||
for(i = 0; i<35; i++) {
|
||||
fprintf(stderr, "size %i: P:%i S:%i T:%i\n", i, sizesPrim[i], sizesSec[i], sizesTer[i]);
|
||||
}
|
||||
|
||||
for(i = 0; i<35; i++) {
|
||||
UBool printedPrimary = FALSE;
|
||||
for(j = 0; j<35; j++) {
|
||||
for(k = 0; k<35; k++) {
|
||||
if(sizeBreakDown[i][j][k] != 0) {
|
||||
if(!printedPrimary) {
|
||||
fprintf(stderr, "Primary: %i\n", i);
|
||||
printedPrimary = TRUE;
|
||||
}
|
||||
fprintf(stderr, "Sec: %i, Ter: %i = %i\n", j, k, sizeBreakDown[i][j][k]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for(i = 0; i<(uint32_t)0xffff; i++) {
|
||||
if(terValue[i] != 0) {
|
||||
fprintf(stderr, "Tertiaries with value %04X : %i\n", i, terValue[i]);
|
||||
}
|
||||
if(secValue[i] != 0) {
|
||||
fprintf(stderr, "Secondaries with value %04X : %i\n", i, secValue[i]);
|
||||
}
|
||||
}
|
||||
*/
|
||||
/* test */
|
||||
UCATableHeader *myData = assembleTable(variableTopValue, &status);
|
||||
writeOutData(myData, &status);
|
||||
|
||||
InverseTableHeader *inverse = assembleInverseTable(&status);
|
||||
/*
|
||||
uint32_t *itab = (uint32_t *)((uint8_t *)inverse + inverse->table);
|
||||
UChar *conts = (UChar *)((uint8_t *)inverse + inverse->conts);
|
||||
for(i = 0; i<inverse->tableSize; i++) {
|
||||
fprintf(stderr, "[%04X] 0x%08X 0x%08X 0x%08X\n", i, *(itab+3*i), *(itab+3*i+1), *(itab+3*i+2));
|
||||
if((*(itab+3*i+2) & UCOL_INV_SIZEMASK) != 0) {
|
||||
uint32_t contIndex = *(itab+3*i+2) & UCOL_INV_OFFSETMASK;
|
||||
uint32_t contSize = (*(itab+3*i+2) & UCOL_INV_SIZEMASK) >> UCOL_INV_SHIFTVALUE;
|
||||
fprintf(stderr, "\t");
|
||||
for(j = 0; j<contSize; j++) {
|
||||
if(*(conts+contIndex+j) < 0xFFFE) {
|
||||
fprintf(stderr, "%04X ", *(conts+contIndex+j));
|
||||
} else {
|
||||
fprintf(stderr, "\n\t");
|
||||
}
|
||||
}
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
uprv_cnttab_close(contractions);
|
||||
ucmp32_close(mapping);
|
||||
|
||||
//printOutTable(myData, &status);
|
||||
//uhash_close(elements);
|
||||
ucmp32_close(myData->mapping);
|
||||
|
||||
free(myData);
|
||||
|
||||
|
||||
return 0;
|
||||
}
|
100
icu4c/source/tools/genuca/genuca.h
Normal file
100
icu4c/source/tools/genuca/genuca.h
Normal file
@ -0,0 +1,100 @@
|
||||
#ifndef UCADATA_H
|
||||
#define UCADATA_H
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/unicode.h"
|
||||
#include "ucolimp.h"
|
||||
#include "ucmp32.h"
|
||||
#include "compitr.h"
|
||||
#include "uhash.h"
|
||||
#include "umemstrm.h"
|
||||
#include "unewdata.h"
|
||||
#ifdef WIN32
|
||||
#include <direct.h>
|
||||
#else
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#define paddedsize(something) ((something)+((((something)%4)!=0)?(4-(something)%4):0))
|
||||
|
||||
/* UDataInfo for UCA mapping table */
|
||||
static const UDataInfo dataInfo={
|
||||
sizeof(UDataInfo),
|
||||
0,
|
||||
|
||||
U_IS_BIG_ENDIAN,
|
||||
U_CHARSET_FAMILY,
|
||||
sizeof(UChar),
|
||||
0,
|
||||
|
||||
0x55, 0x43, 0x6f, 0x6c, /* dataFormat="UCol" */
|
||||
1, 0, 0, 0, /* formatVersion */
|
||||
3, 0, 0, 0 /* dataVersion = Unicode Version*/
|
||||
};
|
||||
|
||||
/* UDataInfo for inverse UCA table */
|
||||
static const UDataInfo invDataInfo={
|
||||
sizeof(UDataInfo),
|
||||
0,
|
||||
|
||||
U_IS_BIG_ENDIAN,
|
||||
U_CHARSET_FAMILY,
|
||||
sizeof(UChar),
|
||||
0,
|
||||
|
||||
0x49, 0x6E, 0x76, 0x43, /* dataFormat="InvC" */
|
||||
1, 0, 0, 0, /* formatVersion */
|
||||
3, 0, 0, 0 /* dataVersion = Unicode Version*/
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
UChar codepoint;
|
||||
UChar uchars[128];
|
||||
UChar *cPoints;
|
||||
int32_t cSize; /* Number of characters in sequence - for contraction */
|
||||
int32_t noOfCEs; /* Number of collation elements */
|
||||
uint32_t CEs[128]; /* These are collation elements - there could be more than one - in case of expansion */
|
||||
uint32_t mapCE; /* This is the value element maps in original table */
|
||||
int32_t sizePrim[128];
|
||||
int32_t sizeSec[128];
|
||||
int32_t sizeTer[128];
|
||||
UBool variableTop;
|
||||
UBool caseBit;
|
||||
UBool isThai;
|
||||
} UCAElements;
|
||||
|
||||
typedef struct {
|
||||
uint32_t *CEs;
|
||||
int32_t position;
|
||||
int32_t size;
|
||||
} ExpansionTable;
|
||||
|
||||
struct ContractionTable;
|
||||
|
||||
struct ContractionTable {
|
||||
UChar *codePoints;
|
||||
uint32_t *CEs;
|
||||
int32_t position;
|
||||
int32_t size;
|
||||
int32_t backSize;
|
||||
UBool forward;
|
||||
ContractionTable *reversed;
|
||||
};
|
||||
|
||||
void deleteElement(void *element);
|
||||
int32_t readElement(char **from, char *to, char separator, UErrorCode *status);
|
||||
int32_t addExpansion(uint32_t value, UErrorCode *status);
|
||||
uint32_t getSingleCEValue(char *primary, char *secondary, char *tertiary, UBool caseBit, UErrorCode *status);
|
||||
uint32_t processContraction(UCAElements *element, uint32_t existingCE, UBool forward, UErrorCode *status);
|
||||
void printOutTable(UCATableHeader *myData, UErrorCode *status);
|
||||
UCATableHeader *assembleTable(UChar variableTopValue, UErrorCode *status);
|
||||
void processFile(FILE *data, UErrorCode *status);
|
||||
/* This adds a read element, while testing for existence */
|
||||
uint32_t addAnElement(UCAElements *element, UErrorCode *status);
|
||||
UCAElements *readAnElement(FILE *data, UErrorCode *status);
|
||||
void reverseElement(UCAElements *el);
|
||||
|
||||
|
||||
#endif
|
186
icu4c/source/tools/genuca/tblprint.cpp
Normal file
186
icu4c/source/tools/genuca/tblprint.cpp
Normal file
@ -0,0 +1,186 @@
|
||||
#include "tblprint.h"
|
||||
|
||||
char *formatElementString(uint32_t CE, char *buffer) {
|
||||
char temp[1024];
|
||||
UBool firstPrim = FALSE;
|
||||
sprintf(buffer, "[");
|
||||
if(UCOL_PRIMARYORDER(CE)>>8 != 0x02) {
|
||||
sprintf(temp, "%02X ", UCOL_PRIMARYORDER(CE)>>8);
|
||||
strcat(buffer, temp);
|
||||
firstPrim = TRUE;
|
||||
}
|
||||
|
||||
if((UCOL_PRIMARYORDER(CE)&0xFF) != 0x02 || firstPrim == TRUE) {
|
||||
sprintf(temp, "%02X", UCOL_PRIMARYORDER(CE)&0xFF);
|
||||
strcat(buffer, temp);
|
||||
}
|
||||
firstPrim = FALSE;
|
||||
|
||||
strcat(buffer, ",");
|
||||
|
||||
if(UCOL_SECONDARYORDER(CE) != 0x02) {
|
||||
sprintf(temp, " %02X", UCOL_SECONDARYORDER(CE));
|
||||
strcat(buffer, temp);
|
||||
}
|
||||
|
||||
strcat(buffer, ",");
|
||||
|
||||
if((UCOL_TERTIARYORDER(CE)&0x7F) != 0x02) {
|
||||
sprintf(temp, " %02X", UCOL_TERTIARYORDER(CE)&0x7F);
|
||||
strcat(buffer, temp);
|
||||
}
|
||||
|
||||
strcat(buffer, "]");
|
||||
|
||||
return buffer;
|
||||
}
|
||||
|
||||
void printExp(uint32_t CE, uint32_t oldCE, char* primb, char* secb, char *terb, UBool *printedCont) {
|
||||
char temp[1024];
|
||||
if(CE<UCOL_NOT_FOUND) {
|
||||
if(*printedCont == FALSE) {
|
||||
fprintf(stdout, "%s ", formatElementString(oldCE, temp));
|
||||
} else {
|
||||
oldCE &= 0x0FFFFFFF;
|
||||
if(UCOL_PRIMARYORDER(oldCE) > 0xFF) {
|
||||
sprintf(temp, "%02X ", UCOL_PRIMARYORDER(oldCE)>>8);
|
||||
strcat(primb, temp);
|
||||
}
|
||||
|
||||
if(UCOL_PRIMARYORDER(oldCE) != 0) {
|
||||
sprintf(temp, "%02X ", UCOL_PRIMARYORDER(oldCE)&0xFF);
|
||||
strcat(primb, temp);
|
||||
}
|
||||
if(UCOL_SECONDARYORDER(oldCE) != 0) {
|
||||
sprintf(temp, "%02X ", UCOL_SECONDARYORDER(oldCE));
|
||||
strcat(secb, temp);
|
||||
}
|
||||
if(UCOL_TERTIARYORDER(oldCE) != 0) {
|
||||
sprintf(temp, "%02X ", UCOL_TERTIARYORDER(oldCE));
|
||||
strcat(terb, temp);
|
||||
}
|
||||
fprintf(stdout, "[%s, %s, %s] ", primb, secb, terb);
|
||||
*primb = *secb = *terb = *temp = 0;
|
||||
}
|
||||
*printedCont = FALSE;
|
||||
} else { /* this is a contiunation, process accordingly */
|
||||
if(*printedCont == TRUE) {
|
||||
oldCE &= 0x0FFFFFFF;
|
||||
}
|
||||
if(UCOL_PRIMARYORDER(oldCE) > 0xFF) {
|
||||
sprintf(temp, "%02X ", UCOL_PRIMARYORDER(oldCE)>>8);
|
||||
strcat(primb, temp);
|
||||
}
|
||||
|
||||
if(UCOL_PRIMARYORDER(oldCE) != 0) {
|
||||
sprintf(temp, "%02X ", UCOL_PRIMARYORDER(oldCE)&0xFF);
|
||||
strcat(primb, temp);
|
||||
}
|
||||
if(UCOL_SECONDARYORDER(oldCE) != 0) {
|
||||
sprintf(temp, "%02X ", UCOL_SECONDARYORDER(oldCE));
|
||||
strcat(secb, temp);
|
||||
}
|
||||
if(UCOL_TERTIARYORDER(oldCE)&0x7F != 0) {
|
||||
sprintf(temp, "%02X ", UCOL_TERTIARYORDER(oldCE)&0x7F);
|
||||
strcat(terb, temp);
|
||||
}
|
||||
*printedCont = TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
void printOutTable(UCATableHeader *myData, UErrorCode *status) {
|
||||
if(U_FAILURE(*status)) {
|
||||
return;
|
||||
}
|
||||
int32_t i = 0, j = 0;
|
||||
int32_t CE = 0;
|
||||
uint32_t *address = NULL;
|
||||
uint8_t size = 0;
|
||||
char buffer[1024];
|
||||
for(i = 0; i<=0xFFFF; i++) {
|
||||
CE = ucmp32_get(myData->mapping, i);
|
||||
if(CE != UCOL_NOT_FOUND) {
|
||||
fprintf(stdout, "%04X; ", i);
|
||||
if(CE < UCOL_NOT_FOUND) {
|
||||
fprintf(stdout, "%c; %s ", (UCOL_TERTIARYORDER(CE)&0x80)>>7?'L':'S', formatElementString(CE, buffer));
|
||||
} else {
|
||||
int32_t tag = (CE&UCOL_TAG_MASK)>>UCOL_TAG_SHIFT;
|
||||
if(tag == SURROGATE_TAG) {
|
||||
// do surrogates
|
||||
}
|
||||
if(tag == THAI_TAG) {
|
||||
address = ((uint32_t*)myData+((CE&0x00FFFFF0)>>4));
|
||||
CE = *(address);
|
||||
fprintf(stdout, "%c; %s ", (UCOL_TERTIARYORDER(CE)&0x80)>>7?'L':'S', formatElementString(CE, buffer));
|
||||
fprintf(stdout, "THAI - from %08X to %08X (offset %05X) ", CE, address, ((CE&0x00FFFFF0)>>4));
|
||||
}
|
||||
if(tag == CONTRACTION_TAG) {
|
||||
int16_t hasBackward = 0;
|
||||
char conChars[1024];
|
||||
char temp[1024];
|
||||
sprintf(conChars, "%04X", i);
|
||||
UChar *contractionCP = (UChar *)myData+getContractOffset(CE);
|
||||
hasBackward = *(contractionCP); /* skip backward */
|
||||
UBool printSeq = FALSE;
|
||||
address = (uint32_t *)((uint8_t*)myData+myData->contractionCEs)+(contractionCP - (UChar *)((uint8_t*)myData+myData->contractionIndex));
|
||||
while(*contractionCP != 0xFFFF) {
|
||||
if(printSeq == TRUE) {
|
||||
fprintf(stdout, "\n%s;",conChars);
|
||||
}
|
||||
CE = *(address);
|
||||
fprintf(stdout, "%c; %s ", (UCOL_TERTIARYORDER(CE)&0x80)>>7?'L':'S', formatElementString(CE, buffer));
|
||||
fprintf(stdout, "Contraction ");
|
||||
if(hasBackward != 0) {
|
||||
fprintf(stdout, "Back = %i ", hasBackward);
|
||||
}
|
||||
|
||||
contractionCP++;
|
||||
address++;
|
||||
sprintf(temp, " %04X", *contractionCP);
|
||||
strcat(conChars, temp);
|
||||
printSeq = TRUE;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
if(tag == EXPANSION_TAG) {
|
||||
char primb[1024], secb[1024], terb[1024], temp[1024];
|
||||
UBool printedCont = FALSE;
|
||||
uint32_t oldCE;
|
||||
*primb = *secb = *terb = *temp = 0;
|
||||
size = CE&0xF;
|
||||
address = ((uint32_t*)myData+((CE&0x00FFFFF0)>>4));
|
||||
CE = *(address++);
|
||||
fprintf(stdout, "%c; ", (UCOL_TERTIARYORDER(CE)&0x80)>>7?'L':'S');
|
||||
|
||||
if(size != 0) {
|
||||
for(j = 1; j<size; j++) {
|
||||
oldCE = CE;
|
||||
CE = *(address++);
|
||||
printExp(CE, oldCE, primb, secb, terb, &printedCont);
|
||||
}
|
||||
} else {
|
||||
while(*address != 0) {
|
||||
oldCE = CE;
|
||||
CE = *(address++);
|
||||
printExp(CE, oldCE, primb, secb, terb, &printedCont);
|
||||
}
|
||||
}
|
||||
printExp(CE, CE, primb, secb, terb, &printedCont);
|
||||
if(*primb != '\0' || *secb != '\0' || *terb != '\0') {
|
||||
fprintf(stdout, "[%s, %s, %s] ", primb, secb, terb);
|
||||
}
|
||||
}
|
||||
|
||||
if(tag == CHARSET_TAG) {
|
||||
;
|
||||
}
|
||||
}
|
||||
/*
|
||||
UCAElements *e = (UCAElements *)uhash_get(elements, (void *)i);
|
||||
fprintf(stdout, "%s", e->comment);
|
||||
*/
|
||||
}
|
||||
}
|
||||
}
|
||||
|
11
icu4c/source/tools/genuca/tblprint.h
Normal file
11
icu4c/source/tools/genuca/tblprint.h
Normal file
@ -0,0 +1,11 @@
|
||||
#ifndef TBLPRINT_H
|
||||
#define TBLPRINT_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "ucadata.h"
|
||||
|
||||
char *formatElementString(uint32_t CE, char *buffer);
|
||||
void printExp(uint32_t CE, uint32_t oldCE, char* primb, char* secb, char *terb, UBool *printedCont);
|
||||
void printOutTable(UCATableHeader *myData, UErrorCode *status);
|
||||
|
||||
#endif
|
Loading…
Reference in New Issue
Block a user