78f36c9a5a
X-SVN-Rev: 11193
333 lines
9.9 KiB
C
333 lines
9.9 KiB
C
/*
|
|
*******************************************************************************
|
|
*
|
|
* Copyright (C) 1999-2002, International Business Machines
|
|
* Corporation and others. All Rights Reserved.
|
|
*
|
|
*******************************************************************************
|
|
* file name: store.c
|
|
* encoding: US-ASCII
|
|
* tab size: 8 (not used)
|
|
* indentation:4
|
|
*
|
|
* created on: 2003-02-06
|
|
* created by: Ram Viswanadha
|
|
*
|
|
*/
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include "unicode/utypes.h"
|
|
#include "unicode/uchar.h"
|
|
#include "cmemory.h"
|
|
#include "cstring.h"
|
|
#include "filestrm.h"
|
|
#include "unicode/udata.h"
|
|
#include "utrie.h"
|
|
#include "unicode/uset.h"
|
|
#include "unewdata.h"
|
|
#include "genidna.h"
|
|
|
|
#ifdef WIN32
|
|
# pragma warning(disable: 4100)
|
|
#endif
|
|
|
|
#define DO_DEBUG_OUT 0
|
|
|
|
|
|
/**
|
|
This is a simple Trie with the following structure
|
|
|
|
16-bit IDN sets:
|
|
|
|
Each 16-bit IDN word contains:
|
|
|
|
0..2 Category flags
|
|
Contains the enum values IDNStates
|
|
|
|
3..4 Contains the length of the mapping
|
|
If length of the mapping is < 2 the length is stored
|
|
If length of the mapping is > 2 then _IDNA_LENGTH_IN_MAPPING_TABLE
|
|
enum is stored and the length of mapping is stored in the first index
|
|
in the data array
|
|
|
|
5..16 Contains the index into the data array that contains the mapping
|
|
If it contains _IDNA_MAP_TO_NOTHING, then the codepoint is stripped from
|
|
the input
|
|
|
|
*/
|
|
|
|
/* file data ---------------------------------------------------------------- */
|
|
/* indexes[] value names */
|
|
|
|
static int32_t indexes[_IDNA_INDEX_TOP]={ 0 };
|
|
|
|
static uint16_t mappingData[_IDNA_MAPPING_DATA_SIZE]={0};
|
|
|
|
/* UDataInfo cf. udata.h */
|
|
static UDataInfo dataInfo={
|
|
sizeof(UDataInfo),
|
|
0,
|
|
|
|
U_IS_BIG_ENDIAN,
|
|
U_CHARSET_FAMILY,
|
|
U_SIZEOF_UCHAR,
|
|
0,
|
|
|
|
{ 0x49, 0x44, 0x4e, 0x41 }, /* dataFormat="IDNA" */
|
|
{ 2, 2, UTRIE_SHIFT, UTRIE_INDEX_SHIFT }, /* formatVersion */
|
|
{ 3, 2, 0, 0 } /* dataVersion (Unicode version) */
|
|
};
|
|
void
|
|
setUnicodeVersion(const char *v) {
|
|
UVersionInfo version;
|
|
u_versionFromString(version, v);
|
|
uprv_memcpy(dataInfo.dataVersion, version, 4);
|
|
}
|
|
|
|
|
|
static UNewTrie idnTrie={ {0},0,0,0,0,0,0,0,0,{0} };
|
|
|
|
static int32_t currentIndex = 1; /* the current index into the data trie */
|
|
static int32_t maxLength = 0; /* maximum length of mapping string */
|
|
|
|
#define MAX_DATA_LENGTH 11500
|
|
|
|
extern void
|
|
init() {
|
|
|
|
/* initialize the two tries */
|
|
if(NULL==utrie_open(&idnTrie, NULL, MAX_DATA_LENGTH, 0, FALSE)) {
|
|
fprintf(stderr, "error: failed to initialize tries\n");
|
|
exit(U_MEMORY_ALLOCATION_ERROR);
|
|
}
|
|
}
|
|
|
|
static void
|
|
store(uint32_t codepoint, uint32_t* mapping, int32_t length, uint32_t flags, UErrorCode* status){
|
|
|
|
uint32_t trieWord = 0;
|
|
int32_t i =0;
|
|
if(flags == _IDNA_MAP_TO_NOTHING){
|
|
trieWord = flags << 5;
|
|
}else{
|
|
if(length==0){
|
|
trieWord = flags;
|
|
}else{
|
|
/*
|
|
int32_t delta;
|
|
|
|
if(length==1 && (delta=(int32_t)codepoint-(int32_t)mapping[0])>=-4096 && delta<=4095) {
|
|
printf("mapping of U+%04lx to U+%04lx could fit into a 13-bit delta (0x%lx)\n", codepoint, mapping[0], delta);
|
|
}
|
|
*/
|
|
/* set the 0..2 bits the flags */
|
|
trieWord = flags;
|
|
/* set the 3..4 bits the length */
|
|
|
|
if(length > 2){
|
|
trieWord += _IDNA_LENGTH_IN_MAPPING_TABLE << 3;
|
|
}else{
|
|
trieWord += (uint32_t)((length)<<3);
|
|
}
|
|
if(length > maxLength)
|
|
maxLength = length;
|
|
|
|
/* get the current index in the data array
|
|
* and store in 5..15 bits
|
|
*/
|
|
trieWord += currentIndex << 5;
|
|
|
|
|
|
/* load mapping into the data array */
|
|
i = 0;
|
|
|
|
if(trieWord > 0xFFFF){
|
|
fprintf(stderr,"size of trie word is greater than 0xFFFF.\n");
|
|
}
|
|
/* set the length in mapping table */
|
|
if(length > 2){
|
|
mappingData[currentIndex++] = (uint16_t)length;
|
|
}
|
|
while(i<length){
|
|
if(currentIndex < _IDNA_MAPPING_DATA_SIZE){
|
|
if(mappingData[currentIndex]==0){
|
|
if(mapping[i] <= 0xFFFF){
|
|
mappingData[currentIndex++] = (uint16_t)mapping[i++];
|
|
}else{
|
|
mappingData[currentIndex++] = UTF16_LEAD(mapping[i]);
|
|
if(currentIndex < _IDNA_MAPPING_DATA_SIZE){
|
|
mappingData[currentIndex++] = UTF16_TRAIL(mapping[i++]);
|
|
}else{
|
|
fprintf(stderr, "Data Array index out of bounds.currentIndex = %i size of mapping arry = %i \n",currentIndex, _IDNA_MAPPING_DATA_SIZE);
|
|
*status = U_INDEX_OUTOFBOUNDS_ERROR;
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
}else{
|
|
fprintf(stderr, "Data Array index out of bounds.currentIndex = %i size of mapping arry = %i \n",currentIndex, _IDNA_MAPPING_DATA_SIZE);
|
|
*status = U_INDEX_OUTOFBOUNDS_ERROR;
|
|
return;
|
|
}
|
|
}
|
|
|
|
}
|
|
}
|
|
|
|
|
|
i = utrie_get32(&idnTrie,codepoint,NULL);
|
|
|
|
if(i==0){
|
|
/* now set the value in the trie */
|
|
if(!utrie_set32(&idnTrie,codepoint,trieWord)){
|
|
fprintf(stderr, "error: too many mapping entries\n");
|
|
exit(U_BUFFER_OVERFLOW_ERROR);
|
|
}
|
|
|
|
}else{
|
|
if(i== UIDNA_PROHIBITED){
|
|
i += _IDNA_MAP_TO_NOTHING << 5;
|
|
/* now set the value in the trie */
|
|
if(!utrie_set32(&idnTrie,codepoint,i)){
|
|
fprintf(stderr, "error: too many mapping entries\n");
|
|
exit(U_BUFFER_OVERFLOW_ERROR);
|
|
}
|
|
}else{
|
|
fprintf(stderr, "Index array has been set for codepoint 0x%06X. \n",codepoint);
|
|
exit(U_INTERNAL_PROGRAM_ERROR);
|
|
}
|
|
}
|
|
|
|
}
|
|
extern void
|
|
storeMapping(uint32_t codepoint, uint32_t* mapping,int32_t length, UBool withNorm, UErrorCode* status){
|
|
|
|
if(withNorm){
|
|
store(codepoint,mapping,length,UIDNA_MAP_NFKC,status);
|
|
}else{
|
|
store(codepoint,mapping,length,_IDNA_MAP_TO_NOTHING,status);
|
|
}
|
|
}
|
|
|
|
|
|
extern void
|
|
storeRange(uint32_t start, uint32_t end, int8_t flag,UErrorCode* status){
|
|
uint32_t trieWord = 0, i=0;
|
|
|
|
trieWord += flag;
|
|
|
|
if(start == end){
|
|
uint32_t i = utrie_get32(&idnTrie,start,NULL);
|
|
if(i == 0 || i==(uint8_t)flag){
|
|
if(!utrie_set32(&idnTrie,start,trieWord)){
|
|
fprintf(stderr, "error: too many entries\n");
|
|
exit(U_BUFFER_OVERFLOW_ERROR);
|
|
}
|
|
}else{
|
|
fprintf(stderr, "Index array has been set for codepoint 0x%06X. \n",start);
|
|
exit(U_INTERNAL_PROGRAM_ERROR);
|
|
}
|
|
}else{
|
|
if(!utrie_setRange32(&idnTrie,start,end+1,trieWord,FALSE)){
|
|
fprintf(stderr, "error: too many entries\n");
|
|
exit(U_BUFFER_OVERFLOW_ERROR);
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
/* folding value: just store the offset (16 bits) if there is any non-0 entry */
|
|
static uint32_t U_CALLCONV
|
|
getFoldedValue(UNewTrie *trie, UChar32 start, int32_t offset) {
|
|
uint32_t foldedValue, value;
|
|
UChar32 limit;
|
|
UBool inBlockZero;
|
|
|
|
foldedValue=0;
|
|
|
|
limit=start+0x400;
|
|
while(start<limit) {
|
|
value=utrie_get32(trie, start, &inBlockZero);
|
|
if(inBlockZero) {
|
|
start+=UTRIE_DATA_BLOCK_LENGTH;
|
|
} else {
|
|
foldedValue|=value;
|
|
++start;
|
|
}
|
|
}
|
|
|
|
if(foldedValue!=0) {
|
|
return (uint32_t)(offset|0x8000);
|
|
} else {
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
extern void
|
|
generateData(const char *dataDir) {
|
|
static uint8_t idnTrieBlock[100000];
|
|
|
|
UNewDataMemory *pData;
|
|
UErrorCode errorCode=U_ZERO_ERROR;
|
|
int32_t size, idnTrieSize, dataLength;
|
|
|
|
idnTrieSize=utrie_serialize(&idnTrie, idnTrieBlock, sizeof(idnTrieBlock), getFoldedValue, TRUE, &errorCode);
|
|
if(U_FAILURE(errorCode)) {
|
|
fprintf(stderr, "error: utrie_serialize(idn trie) failed, %s\n", u_errorName(errorCode));
|
|
exit(errorCode);
|
|
}
|
|
size = idnTrieSize + sizeof(mappingData) + sizeof(indexes);
|
|
if(beVerbose) {
|
|
printf("size of idn trie %5u bytes\n", idnTrieSize);
|
|
printf("size of " U_ICUDATA_NAME "_" DATA_NAME "." DATA_TYPE " contents: %ld bytes\n", (long)size);
|
|
printf("size of mapping data array %5u bytes\n", sizeof(mappingData));
|
|
printf("Number of code units in mappingData (currentIndex) are: %i \n", currentIndex);
|
|
printf("Maximum length of the mapping string is : %i \n", maxLength);
|
|
}
|
|
|
|
|
|
/* write the data */
|
|
pData=udata_create(dataDir, DATA_TYPE, U_ICUDATA_NAME "_" DATA_NAME, &dataInfo,
|
|
haveCopyright ? U_COPYRIGHT_STRING : NULL, &errorCode);
|
|
if(U_FAILURE(errorCode)) {
|
|
fprintf(stderr, "gennorm: unable to create the output file, error %d\n", errorCode);
|
|
exit(errorCode);
|
|
}
|
|
indexes[_IDNA_INDEX_TRIE_SIZE]=idnTrieSize;
|
|
indexes[_IDNA_INDEX_MAPPING_DATA_SIZE]=sizeof(mappingData);
|
|
|
|
udata_writeBlock(pData, indexes, sizeof(indexes));
|
|
udata_writeBlock(pData, idnTrieBlock, idnTrieSize);
|
|
udata_writeBlock(pData, mappingData, sizeof(mappingData));
|
|
|
|
/* finish up */
|
|
dataLength=udata_finish(pData, &errorCode);
|
|
if(U_FAILURE(errorCode)) {
|
|
fprintf(stderr, "genidn: error %d writing the output file\n", errorCode);
|
|
exit(errorCode);
|
|
}
|
|
|
|
if(dataLength!=size) {
|
|
fprintf(stderr, "genidn error: data length %ld != calculated size %ld\n",
|
|
(long)dataLength, (long)size);
|
|
exit(U_INTERNAL_PROGRAM_ERROR);
|
|
}
|
|
}
|
|
extern void
|
|
cleanUpData(void) {
|
|
|
|
utrie_close(&idnTrie);
|
|
|
|
}
|
|
|
|
|
|
/*
|
|
* Hey, Emacs, please set the following:
|
|
*
|
|
* Local Variables:
|
|
* indent-tabs-mode: nil
|
|
* End:
|
|
*
|
|
*/
|