c8b1f9396f
X-SVN-Rev: 1428
562 lines
18 KiB
C
562 lines
18 KiB
C
/*
|
|
*******************************************************************************
|
|
*
|
|
* Copyright (C) 1998-2000, International Business Machines
|
|
* Corporation and others. All Rights Reserved.
|
|
*
|
|
*******************************************************************************
|
|
*
|
|
* File parse.c
|
|
*
|
|
* Modification History:
|
|
*
|
|
* Date Name Description
|
|
* 05/26/99 stephen Creation.
|
|
* 02/25/00 weiv Overhaul to write udata
|
|
*******************************************************************************
|
|
*/
|
|
|
|
#include "parse.h"
|
|
#include "error.h"
|
|
#include "uhash.h"
|
|
#include "cmemory.h"
|
|
#include "read.h"
|
|
#include "unicode/ustdio.h"
|
|
#include "ustr.h"
|
|
#include "reslist.h"
|
|
#include "unicode/ustring.h"
|
|
#include "unicode/ucol.h"
|
|
|
|
U_CAPI const UChar * U_EXPORT2 ucol_getDefaultRulesArray(uint32_t *size);
|
|
|
|
U_STRING_DECL(k_start_string, "string", 6);
|
|
U_STRING_DECL(k_start_binary, "binary", 6);
|
|
U_STRING_DECL(k_start_table, "table", 5);
|
|
U_STRING_DECL(k_start_int, "int", 3);
|
|
U_STRING_DECL(k_start_array, "array", 5);
|
|
U_STRING_DECL(k_start_intvector, "intvector", 9);
|
|
U_STRING_DECL(k_start_reserved, "reserved", 8);
|
|
U_STRING_DECL(rootName, "root", 4);
|
|
|
|
static UBool didInit=FALSE;
|
|
static UBool didInitRoot=FALSE;
|
|
|
|
/* Node IDs for the state transition table. */
|
|
enum ENode {
|
|
eError,
|
|
eInitial, /* Next: Locale name */
|
|
eGotLoc, /* Next: { */
|
|
eIdle, /* Next: Tag name | } */
|
|
eGotTag, /* Next: { | : */
|
|
eNode5, /* Next: Data | Subtag */
|
|
eNode6, /* Next: } | { | , */
|
|
eList, /* Next: List data */
|
|
eNode8, /* Next: , */
|
|
eTagList, /* Next: Subtag data */
|
|
eNode10, /* Next: } */
|
|
eNode11, /* Next: Subtag */
|
|
eNode12, /* Next: { */
|
|
e2dArray, /* Next: Data | } */
|
|
eNode14, /* Next: , | } */
|
|
eNode15, /* Next: , | } */
|
|
eNode16, /* Next: { | } */
|
|
eTypeStart, /* Next: Type name */
|
|
eGotType /* Next: { */
|
|
};
|
|
|
|
/* Action codes for the state transtiion table. */
|
|
enum EAction {
|
|
/* Generic actions */
|
|
eNOP = 0x0100, /* Do nothing */
|
|
eOpen = 0x0200, /* Open a new locale data block with the data
|
|
string as the locale name */
|
|
eClose = 0x0300, /* Close a locale data block */
|
|
eSetTag = 0x0400, /* Record the last string as the tag name */
|
|
|
|
/* Comma-delimited lists */
|
|
eBegList = 0x1100, /* Start a new string list with the last string
|
|
as the first element */
|
|
eEndList = 0x1200, /* Close a string list being built */
|
|
eListStr = 0x1300, /* Record the last string as a data string and
|
|
increment the index */
|
|
eStr = 0x1400, /* Record the last string as a singleton string */
|
|
|
|
/* 2-d lists */
|
|
eBeg2dList = 0x2100, /* Start a new 2d string list with no elements as yet */
|
|
eEnd2dList = 0x2200, /* Close a 2d string list being built */
|
|
e2dStr = 0x2300, /* Record the last string as a 2d string */
|
|
eNewRow = 0x2400, /* Start a new row */
|
|
|
|
/* Tagged lists */
|
|
eBegTagged = 0x3100, /* Start a new tagged list with the last
|
|
string as the first subtag */
|
|
eEndTagged = 0x3200, /* Close a tagged list being build */
|
|
eSubtag = 0x3300, /* Record the last string as the subtag */
|
|
eTaggedStr = 0x3400, /* Record the last string as a tagged string */
|
|
|
|
/* Type support */
|
|
eBegType = 0x4100, /* Start getting a type */
|
|
eSetType = 0x4200 /* Record and init type */
|
|
};
|
|
|
|
/* A struct which encapsulates a node ID and an action. */
|
|
struct STransition {
|
|
enum ENode fNext;
|
|
enum EAction fAction;
|
|
};
|
|
|
|
/* This table describes an ATM (state machine) which parses resource
|
|
bundle text files rather strictly. Each row represents a node. The
|
|
columns of that row represent transitions into other nodes. Most
|
|
transitions are "eError" because most transitions are
|
|
disallowed. For example, if the parser has just seen a tag name, it
|
|
enters node 4 ("eGotTag"). The state table then marks only one
|
|
valid transition, which is into node 5, upon seeing an eOpenBrace
|
|
token. We allow an extra comma after the last element in a
|
|
comma-delimited list (transition from eList to eIdle on
|
|
kCloseBrace). */
|
|
static struct STransition gTransitionTable [] = {
|
|
/* kString kOpenBrace kCloseBrace kComma
|
|
/*eError*/ {eError,eNOP}, {eError,eNOP}, {eError,eNOP}, {eError,eNOP},
|
|
|
|
/*eInitial*/ {eGotLoc,eOpen}, {eError,eNOP}, {eError,eNOP}, {eError,eNOP},
|
|
/*eGotLoc*/ {eError,eNOP}, {eIdle,eNOP}, {eError,eNOP}, {eError,eNOP},
|
|
|
|
/*eIdle*/ {eGotTag,eSetTag}, {eError,eNOP}, {eInitial,eClose}, {eError,eNOP},
|
|
/*eGotTag*/ {eError,eNOP}, {eNode5,eNOP}, {eError,eNOP}, {eError,eNOP},
|
|
/*eNode5*/ {eNode6,eNOP}, {e2dArray,eBeg2dList},{eError,eNOP}, {eError,eNOP},
|
|
/*eNode6*/ {eError,eNOP}, {eTagList,eBegTagged},{eIdle,eStr}, {eList,eBegList},
|
|
|
|
/*eList*/ {eNode8,eListStr}, {eError,eNOP}, {eIdle,eEndList}, {eError,eNOP},
|
|
/*eNode8*/ {eError,eNOP}, {eError,eNOP}, {eIdle,eEndList}, {eList,eNOP},
|
|
|
|
/*eTagList*/ {eNode10,eTaggedStr},{eError,eNOP}, {eError,eNOP}, {eError,eNOP},
|
|
/*eNode10*/ {eError,eNOP}, {eError,eNOP}, {eNode11,eNOP}, {eError,eNOP},
|
|
/*eNode11*/ {eNode12,eNOP}, {eError,eNOP}, {eIdle,eEndTagged},{eError,eNOP},
|
|
/*eNode12*/ {eError,eNOP}, {eTagList,eSubtag}, {eError,eNOP}, {eError,eNOP},
|
|
|
|
/*e2dArray*/ {eNode14,e2dStr}, {eError,eNOP}, {eNode15,eNOP}, {eError,eNOP},
|
|
/*eNode14*/ {eError,eNOP}, {eError,eNOP}, {eNode15,eNOP}, {e2dArray,eNOP},
|
|
/*eNode15*/ {eError,eNOP}, {e2dArray,eNewRow}, {eIdle,eEnd2dList},{eNode16,eNOP},
|
|
/*eNode16*/ {eError,eNOP}, {e2dArray,eNewRow}, {eIdle,eEnd2dList},{eError,eNOP},
|
|
/*eTypeStart*/{eGotType,eSetType}, {eError,eNOP}, {eError,eNOP}, {eError,eNOP},
|
|
/*eGotType*/ {eError,eNOP}, {eError,eNOP}, {eError,eNOP}, {eError,eNOP}
|
|
};
|
|
|
|
/* Row length is 4 */
|
|
#define GETTRANSITION(row,col) (gTransitionTable[col + (row<<2)])
|
|
/* Not anymore, it is 5 now */
|
|
/*#define GETTRANSITION(row,col) (gTransitionTable[col + (row*5)])*/
|
|
|
|
/*********************************************************************
|
|
* Hashtable glue
|
|
********************************************************************/
|
|
|
|
static UBool get(UHashtable *hash, const struct UString* tag) {
|
|
return uhash_get(hash, tag) != NULL;
|
|
}
|
|
|
|
static void put(UHashtable *hash, const struct UString *tag,
|
|
UErrorCode* status) {
|
|
struct UString* key = uprv_malloc(sizeof(struct UString));
|
|
ustr_init(key);
|
|
ustr_cpy(key, tag, status);
|
|
uhash_put(hash, key, (void*)1, status);
|
|
}
|
|
|
|
static void freeUString(void* ustr) {
|
|
ustr_deinit(ustr);
|
|
uprv_free(ustr);
|
|
}
|
|
|
|
static int32_t hashUString(const void* ustr) {
|
|
return uhash_hashUChars(((struct UString*)ustr)->fChars);
|
|
}
|
|
|
|
static UBool compareUString(const void* ustr1, const void* ustr2) {
|
|
return uhash_compareUChars(((struct UString*)ustr1)->fChars,
|
|
((struct UString*)ustr2)->fChars);
|
|
}
|
|
|
|
/*********************************************************************
|
|
* parse
|
|
********************************************************************/
|
|
|
|
struct SRBRoot*
|
|
parse(FileStream *f, const char *cp,
|
|
UErrorCode *status)
|
|
{
|
|
struct UFILE *file;
|
|
enum ETokenType type;
|
|
enum ENode node;
|
|
struct STransition t;
|
|
|
|
struct UString token;
|
|
struct UString tag;
|
|
|
|
char cTag[1024];
|
|
char cSubTag[1024];
|
|
struct SRBRoot *bundle = NULL;
|
|
struct SResource *rootTable = NULL;
|
|
struct SResource *temp = NULL;
|
|
struct SResource *temp1 = NULL;
|
|
struct SResource *temp2 = NULL;
|
|
UBool colEl = FALSE;
|
|
|
|
/* Hashtable for keeping track of seen tag names */
|
|
struct UHashtable *data;
|
|
|
|
|
|
if(U_FAILURE(*status)) return NULL;
|
|
|
|
/* setup */
|
|
|
|
ustr_init(&token);
|
|
ustr_init(&tag);
|
|
/*
|
|
cTag = uprv_malloc(1024);
|
|
if(cTag == NULL) {
|
|
*status = U_MEMORY_ALLOCATION_ERROR;
|
|
return NULL;
|
|
}
|
|
cSubTag = uprv_malloc(1024);
|
|
if(cSubTag == NULL) {
|
|
*status = U_MEMORY_ALLOCATION_ERROR;
|
|
return NULL;
|
|
}
|
|
*/
|
|
|
|
node = eInitial;
|
|
data = 0;
|
|
|
|
file = u_finit((FILE *)f, 0, cp);
|
|
/* file = u_finit(f, cp, status); */
|
|
|
|
bundle = bundle_open(status);
|
|
rootTable = bundle -> fRoot;
|
|
|
|
if(U_FAILURE(*status) || file == NULL) goto finish;
|
|
|
|
/* iterate through the stream */
|
|
for(;;) {
|
|
|
|
/* get next token from stream */
|
|
type = getNextToken(file, &token, status);
|
|
if(U_FAILURE(*status)) goto finish;
|
|
|
|
switch(type) {
|
|
case tok_EOF:
|
|
*status = (node == eInitial) ? U_ZERO_ERROR : U_INVALID_FORMAT_ERROR;
|
|
if(U_FAILURE(*status)) {
|
|
setErrorText("Unexpected EOF encountered");
|
|
}
|
|
goto finish;
|
|
/*break;*/
|
|
|
|
case tok_error:
|
|
*status = U_INVALID_FORMAT_ERROR;
|
|
goto finish;
|
|
/*break;*/
|
|
|
|
default:
|
|
break;
|
|
}
|
|
|
|
t = GETTRANSITION(node, type);
|
|
node = t.fNext;
|
|
|
|
if(node == eError) {
|
|
*status = U_INVALID_FORMAT_ERROR;
|
|
goto finish;
|
|
}
|
|
|
|
switch(t.fAction) {
|
|
case eNOP:
|
|
break;
|
|
|
|
/* Record the last string as the tag name */
|
|
case eSetTag:
|
|
ustr_cpy(&tag, &token, status);
|
|
u_UCharsToChars(tag.fChars, cTag, u_strlen(tag.fChars)+1);
|
|
if(U_FAILURE(*status)) goto finish;
|
|
if(uprv_strchr(cTag, ':')) {
|
|
/* type modificator - do the type modification*/
|
|
} else if(uprv_strcmp(cTag, "CollationElements") == 0) {
|
|
colEl = TRUE;
|
|
}
|
|
/*if(uhash_get(data, uhash_hashUString(tag.fChars)) != 0) {*/
|
|
if(get(data, &tag)) {
|
|
char *s;
|
|
*status = U_INVALID_FORMAT_ERROR;
|
|
s = uprv_malloc(1024);
|
|
strcpy(s, "Duplicate tag name detected: ");
|
|
u_austrcpy(s+strlen(s), tag.fChars);
|
|
setErrorText(s);
|
|
goto finish;
|
|
}
|
|
|
|
break;
|
|
|
|
/* Record a singleton string */
|
|
case eStr:
|
|
if(temp != NULL) {
|
|
*status = U_INTERNAL_PROGRAM_ERROR;
|
|
goto finish;
|
|
}
|
|
temp = string_open(bundle, cTag, token.fChars, token.fLength, status);
|
|
table_add(rootTable, temp, status);
|
|
if(colEl == TRUE) {
|
|
const UChar * defaultRulesArray;
|
|
uint32_t defaultRulesArrayLength = 0;
|
|
/* do the collation elements */
|
|
int32_t len = 0;
|
|
uint8_t *data = NULL;
|
|
UCollator *coll = NULL;
|
|
const UChar *rules1 = NULL;
|
|
int32_t len1 = 0;
|
|
UCollator *tstColl = NULL;
|
|
const UChar *rules2 = NULL;
|
|
int32_t len2 = 0;
|
|
UChar *rules = NULL;
|
|
defaultRulesArray = ucol_getDefaultRulesArray(&defaultRulesArrayLength);
|
|
rules = uprv_malloc(sizeof(defaultRulesArray[0])*(defaultRulesArrayLength + token.fLength));
|
|
uprv_memcpy(rules, defaultRulesArray, defaultRulesArrayLength*sizeof(defaultRulesArray[0]));
|
|
uprv_memcpy(rules + defaultRulesArrayLength, token.fChars, token.fLength*sizeof(token.fChars[0]));
|
|
|
|
coll = ucol_openRules(rules, defaultRulesArrayLength + token.fLength, 0, 0, status);
|
|
tstColl = ucol_open("da", status);
|
|
rules1 = ucol_getRules(coll, &len1);
|
|
rules2 = ucol_getRules(tstColl, &len2);
|
|
|
|
if(U_SUCCESS(*status) && coll !=NULL) {
|
|
/* This is just for testing & should be removed
|
|
temp1 = bin_open(bundle, "%%Collation", sizeof(defaultRulesArray[0])*(defaultRulesArrayLength + token.fLength), (uint8_t *) rules, status);
|
|
table_add(rootTable, temp1, status);
|
|
*/
|
|
data = ucol_cloneRuleData(coll, &len, status);
|
|
if(U_SUCCESS(*status) && data != NULL) {
|
|
temp1 = bin_open(bundle, "%%Collation", len, data, status);
|
|
table_add(rootTable, temp1, status);
|
|
uprv_free(data);
|
|
}
|
|
ucol_close(coll);
|
|
}
|
|
uprv_free(rules);
|
|
colEl = FALSE;
|
|
}
|
|
/*uhash_put(data, tag.fChars, status);*/
|
|
put(data, &tag, status);
|
|
if(U_FAILURE(*status)) goto finish;
|
|
temp = NULL;
|
|
break;
|
|
|
|
/* Begin a string list */
|
|
case eBegList:
|
|
if(temp != NULL) {
|
|
*status = U_INTERNAL_PROGRAM_ERROR;
|
|
goto finish;
|
|
}
|
|
temp = array_open(bundle, cTag, status);
|
|
temp1 = string_open(bundle, NULL, token.fChars, token.fLength, status);
|
|
array_add(temp, temp1, status);
|
|
temp1 = NULL;
|
|
if(U_FAILURE(*status)) goto finish;
|
|
break;
|
|
|
|
/* Record a comma-delimited list string */
|
|
case eListStr:
|
|
temp1 = string_open(bundle, NULL, token.fChars, token.fLength, status);
|
|
array_add(temp, temp1, status);
|
|
temp1 = NULL;
|
|
if(U_FAILURE(*status)) goto finish;
|
|
break;
|
|
|
|
/* End a string list */
|
|
case eEndList:
|
|
/*uhash_put(data, tag.fChars, status);*/
|
|
put(data, &tag, status);
|
|
table_add(rootTable, temp, status);
|
|
temp = NULL;
|
|
if(U_FAILURE(*status)) goto finish;
|
|
break;
|
|
|
|
case eBeg2dList:
|
|
if(temp != NULL) {
|
|
*status = U_INTERNAL_PROGRAM_ERROR;
|
|
goto finish;
|
|
}
|
|
temp = array_open(bundle, cTag, status);
|
|
temp1 = array_open(bundle, NULL, status);
|
|
if(U_FAILURE(*status)) goto finish;
|
|
break;
|
|
|
|
case eEnd2dList:
|
|
/*uhash_put(data, tag.fChars, status);*/
|
|
put(data, &tag, status);
|
|
array_add(temp, temp1, status);
|
|
table_add(rootTable, temp, status);
|
|
temp1 = NULL;
|
|
temp = NULL;
|
|
if(U_FAILURE(*status)) goto finish;
|
|
break;
|
|
|
|
case e2dStr:
|
|
temp2 = string_open(bundle, NULL, token.fChars, token.fLength, status);
|
|
array_add(temp1, temp2, status);
|
|
temp2 = NULL;
|
|
if(U_FAILURE(*status)) goto finish;
|
|
break;
|
|
|
|
case eNewRow:
|
|
array_add(temp, temp1, status);
|
|
temp1 = array_open(bundle, NULL, status);
|
|
if(U_FAILURE(*status)) goto finish;
|
|
break;
|
|
|
|
case eBegTagged:
|
|
if(temp != NULL) {
|
|
*status = U_INTERNAL_PROGRAM_ERROR;
|
|
goto finish;
|
|
}
|
|
temp = table_open(bundle, cTag, status);
|
|
u_UCharsToChars(token.fChars, cSubTag, u_strlen(token.fChars)+1);
|
|
if(U_FAILURE(*status)) goto finish;
|
|
break;
|
|
|
|
case eEndTagged:
|
|
/*uhash_put(data, tag.fChars, status);*/
|
|
put(data, &tag, status);
|
|
table_add(rootTable, temp, status);
|
|
temp = NULL;
|
|
if(U_FAILURE(*status)) goto finish;
|
|
break;
|
|
|
|
case eTaggedStr:
|
|
temp1 = string_open(bundle, cSubTag, token.fChars, token.fLength, status);
|
|
table_add(temp, temp1, status);
|
|
temp1 = NULL;
|
|
if(U_FAILURE(*status)) goto finish;
|
|
break;
|
|
|
|
/* Record the last string as the subtag */
|
|
case eSubtag:
|
|
u_UCharsToChars(token.fChars, cSubTag, u_strlen(token.fChars)+1);
|
|
if(U_FAILURE(*status)) goto finish;
|
|
if(table_get(temp, cSubTag, status) != 0) {
|
|
*status = U_INVALID_FORMAT_ERROR;
|
|
setErrorText("Duplicate subtag found in tagged list");
|
|
goto finish;
|
|
}
|
|
break;
|
|
|
|
case eOpen:
|
|
if(data != 0) {
|
|
*status = U_INTERNAL_PROGRAM_ERROR;
|
|
goto finish;
|
|
}
|
|
bundle_setlocale(bundle, token.fChars, status);
|
|
if(didInitRoot == FALSE) {
|
|
U_STRING_INIT(rootName, "root", 4);
|
|
didInitRoot = TRUE;
|
|
}
|
|
|
|
if(u_strcmp(token.fChars, rootName) == 0) {
|
|
const UChar * defaultRulesArray;
|
|
uint32_t defaultRulesArrayLength = 0;
|
|
/* do the collation elements */
|
|
int32_t len = 0;
|
|
uint8_t *data = NULL;
|
|
uint8_t *data2 = NULL;
|
|
UCollator *coll = NULL;
|
|
UCollator *tstColl = NULL;
|
|
const UChar *rules1 = NULL;
|
|
const UChar *rules2 = NULL;
|
|
int32_t len1 = 0;
|
|
int32_t len2 = 0;
|
|
|
|
UChar *rules = NULL;
|
|
defaultRulesArray = ucol_getDefaultRulesArray(&defaultRulesArrayLength);
|
|
rules = uprv_malloc(sizeof(defaultRulesArray[0])*(defaultRulesArrayLength));
|
|
uprv_memcpy(rules, defaultRulesArray, defaultRulesArrayLength*sizeof(defaultRulesArray[0]));
|
|
|
|
coll = ucol_openRules(rules, defaultRulesArrayLength, 0, 0, status);
|
|
tstColl = ucol_open("root", status);
|
|
rules1 = ucol_getRules(coll, &len1);
|
|
rules2 = ucol_getRules(tstColl, &len2);
|
|
|
|
if(U_SUCCESS(*status) && coll !=NULL) {
|
|
data = ucol_cloneRuleData(coll, &len, status);
|
|
data2 = ucol_cloneRuleData(tstColl, &len2, status);
|
|
if(U_SUCCESS(*status) && data != NULL) {
|
|
temp1 = bin_open(bundle, "%%Collation", len, data, status);
|
|
table_add(rootTable, temp1, status);
|
|
uprv_free(data);
|
|
}
|
|
ucol_close(coll);
|
|
}
|
|
uprv_free(rules);
|
|
}
|
|
if(U_FAILURE(*status)) goto finish;
|
|
data = uhash_open(hashUString, compareUString, status);
|
|
uhash_setKeyDeleter(data, freeUString);
|
|
break;
|
|
|
|
case eClose:
|
|
if(data == 0) {
|
|
*status = U_INTERNAL_PROGRAM_ERROR;
|
|
goto finish;
|
|
}
|
|
break;
|
|
case eSetType:
|
|
/* type recognition */
|
|
if(!didInit) {
|
|
U_STRING_INIT(k_start_string, "string", 6);
|
|
U_STRING_INIT(k_start_binary, "binary", 6);
|
|
U_STRING_INIT(k_start_table, "table", 5);
|
|
U_STRING_INIT(k_start_int, "int", 3);
|
|
U_STRING_INIT(k_start_array, "array", 5);
|
|
U_STRING_INIT(k_start_intvector, "intvector", 9);
|
|
U_STRING_INIT(k_start_reserved, "reserved", 8);
|
|
didInit=TRUE;
|
|
}
|
|
if(u_strcmp(token.fChars, k_start_string) == 0) {
|
|
node = eGotTag;
|
|
} else if(u_strcmp(token.fChars, k_start_array) == 0) {
|
|
node = eGotTag;
|
|
} else if(u_strcmp(token.fChars, k_start_table) == 0) {
|
|
node = eGotTag;
|
|
} else if(u_strcmp(token.fChars, k_start_binary) == 0) {
|
|
/* start of binary */
|
|
} else if(u_strcmp(token.fChars, k_start_int) == 0) {
|
|
/* start of integer */
|
|
} else if(u_strcmp(token.fChars, k_start_intvector) == 0) {
|
|
/* start of intvector */
|
|
} else if(u_strcmp(token.fChars, k_start_reserved) == 0) {
|
|
/* start of reserved */
|
|
} else {
|
|
*status = U_INTERNAL_PROGRAM_ERROR;
|
|
goto finish;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
finish:
|
|
|
|
/* clean up */
|
|
|
|
if(data != 0)
|
|
uhash_close(data);
|
|
|
|
ustr_deinit(&token);
|
|
ustr_deinit(&tag);
|
|
|
|
/*uprv_free(cTag);*/
|
|
/*uprv_free(cSubTag);*/
|
|
|
|
if(file != 0)
|
|
u_fclose(file);
|
|
|
|
return bundle;
|
|
}
|