8b20f17f49
X-SVN-Rev: 473
366 lines
10 KiB
C
366 lines
10 KiB
C
/*
|
|
*******************************************************************************
|
|
*
|
|
* Copyright (C) 1998-1999, International Business Machines
|
|
* Corporation and others. All Rights Reserved.
|
|
*
|
|
*******************************************************************************
|
|
*
|
|
* File parse.c
|
|
*
|
|
* Modification History:
|
|
*
|
|
* Date Name Description
|
|
* 05/26/99 stephen Creation.
|
|
*******************************************************************************
|
|
*/
|
|
|
|
#include "parse.h"
|
|
#include "error.h"
|
|
#include "uhash.h"
|
|
#include "cmemory.h"
|
|
#include "read.h"
|
|
#include "ufile.h"
|
|
#include "ustdio.h"
|
|
#include "ustr.h"
|
|
#include "list.h"
|
|
#include "rblist.h"
|
|
#include "unicode/ustring.h"
|
|
|
|
/* Node IDs for the state transition table. */
|
|
enum ENode {
|
|
eError,
|
|
eInitial, /* Next: Locale name */
|
|
eGotLoc, /* Next: { */
|
|
eIdle, /* Next: Tag name | } */
|
|
eGotTag, /* Next: { */
|
|
eNode5, /* Next: Data | Subtag */
|
|
eNode6, /* Next: } | { | , */
|
|
eList, /* Next: List data */
|
|
eNode8, /* Next: , */
|
|
eTagList, /* Next: Subtag data */
|
|
eNode10, /* Next: } */
|
|
eNode11, /* Next: Subtag */
|
|
eNode12, /* Next: { */
|
|
e2dArray, /* Next: Data | } */
|
|
eNode14, /* Next: , | } */
|
|
eNode15, /* Next: , | } */
|
|
eNode16 /* Next: { | } */
|
|
};
|
|
|
|
/* Action codes for the state transtiion table. */
|
|
enum EAction {
|
|
/* Generic actions */
|
|
eNOP = 0x0100, /* Do nothing */
|
|
eOpen = 0x0200, /* Open a new locale data block with the data
|
|
string as the locale name */
|
|
eClose = 0x0300, /* Close a locale data block */
|
|
eSetTag = 0x0400, /* Record the last string as the tag name */
|
|
|
|
/* Comma-delimited lists */
|
|
eBegList = 0x1100, /* Start a new string list with the last string
|
|
as the first element */
|
|
eEndList = 0x1200, /* Close a string list being built */
|
|
eListStr = 0x1300, /* Record the last string as a data string and
|
|
increment the index */
|
|
eStr = 0x1400, /* Record the last string as a singleton string */
|
|
|
|
/* 2-d lists */
|
|
eBeg2dList = 0x2100, /* Start a new 2d string list with no elements as yet */
|
|
eEnd2dList = 0x2200, /* Close a 2d string list being built */
|
|
e2dStr = 0x2300, /* Record the last string as a 2d string */
|
|
eNewRow = 0x2400, /* Start a new row */
|
|
|
|
/* Tagged lists */
|
|
eBegTagged = 0x3100, /* Start a new tagged list with the last
|
|
string as the first subtag */
|
|
eEndTagged = 0x3200, /* Close a tagged list being build */
|
|
eSubtag = 0x3300, /* Record the last string as the subtag */
|
|
eTaggedStr = 0x3400 /* Record the last string as a tagged string */
|
|
};
|
|
|
|
/* A struct which encapsulates a node ID and an action. */
|
|
struct STransition {
|
|
enum ENode fNext;
|
|
enum EAction fAction;
|
|
};
|
|
|
|
/* This table describes an ATM (state machine) which parses resource
|
|
bundle text files rather strictly. Each row represents a node. The
|
|
columns of that row represent transitions into other nodes. Most
|
|
transitions are "eError" because most transitions are
|
|
disallowed. For example, if the parser has just seen a tag name, it
|
|
enters node 4 ("eGotTag"). The state table then marks only one
|
|
valid transition, which is into node 5, upon seeing an eOpenBrace
|
|
token. We allow an extra comma after the last element in a
|
|
comma-delimited list (transition from eList to eIdle on
|
|
kCloseBrace). */
|
|
static struct STransition gTransitionTable [] = {
|
|
/* kString kOpenBrace kCloseBrace kComma*/
|
|
{eError,eNOP}, {eError,eNOP}, {eError,eNOP}, {eError,eNOP},
|
|
|
|
{eGotLoc,eOpen}, {eError,eNOP}, {eError,eNOP}, {eError,eNOP},
|
|
{eError,eNOP}, {eIdle,eNOP}, {eError,eNOP}, {eError,eNOP},
|
|
|
|
{eGotTag,eSetTag}, {eError,eNOP}, {eInitial,eClose}, {eError,eNOP},
|
|
{eError,eNOP}, {eNode5,eNOP}, {eError,eNOP}, {eError,eNOP},
|
|
{eNode6,eNOP}, {e2dArray,eBeg2dList},{eError,eNOP}, {eError,eNOP},
|
|
{eError,eNOP}, {eTagList,eBegTagged},{eIdle,eStr}, {eList,eBegList},
|
|
|
|
{eNode8,eListStr}, {eError,eNOP}, {eIdle,eEndList}, {eError,eNOP},
|
|
{eError,eNOP}, {eError,eNOP}, {eIdle,eEndList}, {eList,eNOP},
|
|
|
|
{eNode10,eTaggedStr},{eError,eNOP}, {eError,eNOP}, {eError,eNOP},
|
|
{eError,eNOP}, {eError,eNOP}, {eNode11,eNOP}, {eError,eNOP},
|
|
{eNode12,eNOP}, {eError,eNOP}, {eIdle,eEndTagged},{eError,eNOP},
|
|
{eError,eNOP}, {eTagList,eSubtag}, {eError,eNOP}, {eError,eNOP},
|
|
|
|
{eNode14,e2dStr}, {eError,eNOP}, {eNode15,eNOP}, {eError,eNOP},
|
|
{eError,eNOP}, {eError,eNOP}, {eNode15,eNOP}, {e2dArray,eNOP},
|
|
{eError,eNOP}, {e2dArray,eNewRow}, {eIdle,eEnd2dList},{eNode16,eNOP},
|
|
{eError,eNOP}, {e2dArray,eNewRow}, {eIdle,eEnd2dList},{eError,eNOP}
|
|
};
|
|
|
|
/* Row length is 4 */
|
|
#define GETTRANSITION(row,col) (gTransitionTable[col + (row<<2)])
|
|
|
|
struct SRBItemList*
|
|
parse(FileStream *f, const char *cp,
|
|
UErrorCode *status)
|
|
{
|
|
struct UFILE *file;
|
|
enum ETokenType type;
|
|
enum ENode node;
|
|
struct STransition t;
|
|
|
|
struct UString token;
|
|
struct UString tag;
|
|
struct UString subtag;
|
|
struct UString localeName;
|
|
struct UString keyname;
|
|
|
|
struct SRBItem *item;
|
|
struct SRBItemList *list;
|
|
struct SList *current;
|
|
|
|
/* Hashtable for keeping track of seen tag names */
|
|
struct UHashtable *data;
|
|
|
|
|
|
if(U_FAILURE(*status)) return 0;
|
|
|
|
/* setup */
|
|
|
|
ustr_init(&token);
|
|
ustr_init(&tag);
|
|
ustr_init(&subtag);
|
|
ustr_init(&localeName);
|
|
ustr_init(&keyname);
|
|
|
|
node = eInitial;
|
|
data = 0;
|
|
current = 0;
|
|
item = 0;
|
|
|
|
file = u_finit(f, cp, status);
|
|
list = rblist_open(status);
|
|
if(U_FAILURE(*status)) goto finish;
|
|
|
|
/* iterate through the stream */
|
|
for(;;) {
|
|
|
|
/* get next token from stream */
|
|
type = getNextToken(file, &token, status);
|
|
if(U_FAILURE(*status)) goto finish;
|
|
|
|
switch(type) {
|
|
case tok_EOF:
|
|
*status = (node == eInitial) ? U_ZERO_ERROR : U_INVALID_FORMAT_ERROR;
|
|
setErrorText("Unexpected EOF encountered");
|
|
goto finish;
|
|
/*break;*/
|
|
|
|
case tok_error:
|
|
*status = U_INVALID_FORMAT_ERROR;
|
|
goto finish;
|
|
/*break;*/
|
|
|
|
default:
|
|
break;
|
|
}
|
|
|
|
t = GETTRANSITION(node, type);
|
|
node = t.fNext;
|
|
|
|
if(node == eError) {
|
|
*status = U_INVALID_FORMAT_ERROR;
|
|
goto finish;
|
|
}
|
|
|
|
switch(t.fAction) {
|
|
case eNOP:
|
|
break;
|
|
|
|
/* Record the last string as the tag name */
|
|
case eSetTag:
|
|
ustr_cpy(&tag, &token, status);
|
|
if(U_FAILURE(*status)) goto finish;
|
|
if(uhash_get(data, uhash_hashUString(tag.fChars)) != 0) {
|
|
char *s;
|
|
*status = U_INVALID_FORMAT_ERROR;
|
|
s = uprv_malloc(1024);
|
|
strcpy(s, "Duplicate tag name detected: ");
|
|
u_austrcpy(s+strlen(s), tag.fChars);
|
|
setErrorText(s);
|
|
goto finish;
|
|
}
|
|
break;
|
|
|
|
/* Record a singleton string */
|
|
case eStr:
|
|
if(current != 0) {
|
|
*status = U_INTERNAL_PROGRAM_ERROR;
|
|
goto finish;
|
|
}
|
|
current = strlist_open(status);
|
|
strlist_add(current, token.fChars, status);
|
|
item = make_rbitem(tag.fChars, current, status);
|
|
rblist_add(list, item, status);
|
|
uhash_put(data, tag.fChars, status);
|
|
if(U_FAILURE(*status)) goto finish;
|
|
current = 0;
|
|
item = 0;
|
|
break;
|
|
|
|
/* Begin a string list */
|
|
case eBegList:
|
|
if(current != 0) {
|
|
*status = U_INTERNAL_PROGRAM_ERROR;
|
|
goto finish;
|
|
}
|
|
current = strlist_open(status);
|
|
strlist_add(current, token.fChars, status);
|
|
if(U_FAILURE(*status)) goto finish;
|
|
break;
|
|
|
|
/* Record a comma-delimited list string */
|
|
case eListStr:
|
|
strlist_add(current, token.fChars, status);
|
|
if(U_FAILURE(*status)) goto finish;
|
|
break;
|
|
|
|
/* End a string list */
|
|
case eEndList:
|
|
uhash_put(data, tag.fChars, status);
|
|
item = make_rbitem(tag.fChars, current, status);
|
|
rblist_add(list, item, status);
|
|
if(U_FAILURE(*status)) goto finish;
|
|
current = 0;
|
|
item = 0;
|
|
break;
|
|
|
|
case eBeg2dList:
|
|
if(current != 0) {
|
|
*status = U_INTERNAL_PROGRAM_ERROR;
|
|
goto finish;
|
|
}
|
|
current = strlist2d_open(status);
|
|
if(U_FAILURE(*status)) goto finish;
|
|
break;
|
|
|
|
case eEnd2dList:
|
|
uhash_put(data, tag.fChars, status);
|
|
item = make_rbitem(tag.fChars, current, status);
|
|
rblist_add(list, item, status);
|
|
if(U_FAILURE(*status)) goto finish;
|
|
current = 0;
|
|
item = 0;
|
|
break;
|
|
|
|
case e2dStr:
|
|
strlist2d_add(current, token.fChars, status);
|
|
if(U_FAILURE(*status)) goto finish;
|
|
break;
|
|
|
|
case eNewRow:
|
|
strlist2d_newRow(current, status);
|
|
if(U_FAILURE(*status)) goto finish;
|
|
break;
|
|
|
|
case eBegTagged:
|
|
if(current != 0) {
|
|
*status = U_INTERNAL_PROGRAM_ERROR;
|
|
goto finish;
|
|
}
|
|
current = taglist_open(status);
|
|
ustr_cpy(&subtag, &token, status);
|
|
if(U_FAILURE(*status)) goto finish;
|
|
break;
|
|
|
|
case eEndTagged:
|
|
uhash_put(data, tag.fChars, status);
|
|
item = make_rbitem(tag.fChars, current, status);
|
|
rblist_add(list, item, status);
|
|
if(U_FAILURE(*status)) goto finish;
|
|
current = 0;
|
|
item = 0;
|
|
break;
|
|
|
|
case eTaggedStr:
|
|
taglist_add(current, subtag.fChars, token.fChars, status);
|
|
if(U_FAILURE(*status)) goto finish;
|
|
break;
|
|
|
|
/* Record the last string as the subtag */
|
|
case eSubtag:
|
|
ustr_cpy(&subtag, &token, status);
|
|
if(U_FAILURE(*status)) goto finish;
|
|
if(taglist_get(current, subtag.fChars, status) != 0) {
|
|
*status = U_INVALID_FORMAT_ERROR;
|
|
setErrorText("Duplicate subtag found in tagged list");
|
|
goto finish;
|
|
}
|
|
break;
|
|
|
|
case eOpen:
|
|
if(data != 0) {
|
|
*status = U_INTERNAL_PROGRAM_ERROR;
|
|
goto finish;
|
|
}
|
|
ustr_cpy(&localeName, &token, status);
|
|
rblist_setlocale(list, localeName.fChars, status);
|
|
if(U_FAILURE(*status)) goto finish;
|
|
data = uhash_open(uhash_hashUString, status);
|
|
break;
|
|
|
|
case eClose:
|
|
if(data == 0) {
|
|
*status = U_INTERNAL_PROGRAM_ERROR;
|
|
goto finish;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
finish:
|
|
|
|
/* clean up */
|
|
|
|
if(data != 0)
|
|
uhash_close(data);
|
|
|
|
if(item != 0)
|
|
uprv_free(item);
|
|
|
|
ustr_deinit(&token);
|
|
ustr_deinit(&tag);
|
|
ustr_deinit(&subtag);
|
|
ustr_deinit(&localeName);
|
|
ustr_deinit(&keyname);
|
|
|
|
if(file != 0)
|
|
u_fclose(file);
|
|
|
|
return list;
|
|
}
|