From bd60fe525d2c4d17c07e5231a695cb8de6832ad3 Mon Sep 17 00:00:00 2001 From: George Rhoten Date: Tue, 2 Aug 2005 17:55:57 +0000 Subject: [PATCH] ICU-4235 Fix various porting, configuration and style issues. Don't use namespaces. Use UBool instead of bool. Use FALSE instead of false. Use TRUE instead of true. Use the correct copyright date. Use UCONFIG_NO_COLLATION consistently. ... and a few other issues. X-SVN-Rev: 18378 --- icu4c/source/test/intltest/rndmcoll.cpp | 2776 +++++++++++------------ icu4c/source/test/intltest/rndmcoll.h | 3 +- 2 files changed, 1383 insertions(+), 1396 deletions(-) diff --git a/icu4c/source/test/intltest/rndmcoll.cpp b/icu4c/source/test/intltest/rndmcoll.cpp index de07648484..f117a9dd19 100644 --- a/icu4c/source/test/intltest/rndmcoll.cpp +++ b/icu4c/source/test/intltest/rndmcoll.cpp @@ -1,284 +1,282 @@ /* - ******************************************************************************* - * Copyright (C) 2002-2005, International Business Machines Corporation and * - * others. All Rights Reserved. * - ******************************************************************************* + ****************************************************************************** + * Copyright (C) 2005-2005, International Business Machines Corporation and * + * others. All Rights Reserved. * + ****************************************************************************** */ #include #include #include #include #include +#include "rndmcoll.h" -namespace {//anonymous.design - //Raymond: Following comments are copied from Java implementation - // - // each rule can be: - // "[" command "]" - // "& [" position "]" - // "&" before chars - // relation "[variable top]" - // relation (chars "|")? chars ("/" chars)? - // plus, a reset must come before a relation +#if !UCONFIG_NO_COLLATION - //Raymond: The grammar of "collation rule" can be defined use a modified-BNF. - // We need a tool to - // 1. Parse the defination and - // 2. Build an active object which can generate concrete collation rules - // - //Rammond: - // The difference between standarad BNF and our modified-BNF is - // 1. Alternation item can has a "weight" now - // 2. Accept "? weight" as a new operation -- short form altheration - // 3. Accept "range" as a new operation -- repeat - // We do not accept any EBNF grammar in our modified-BNF. - // - // Furthermore, the grammar of our modified-BNF itself can be defined using standard BNF - // NOTE: Following characters are treated as literal in the definition - // { } ? $ % , - ; - // - // string = - // alphabeta = - // digit = - // integer = integer digit | digit - // var = var alphabet | var digit | $ alphabet - // - // var-defs = var-defs var-def | var-def - // var-def = var '=' defination; - // - // defination = simple | repeat | short-alt | sequence | alternation1 | alternation2 - // defination = alternation1 | alternation2 - // - // simple = var | string | '(' defination ')' - // repeat = simple range - // short-alt = simple ? | simple ? weight - // - // item = simple | repeat | shor-alt - // sequence = sequence item | item item - // - // item1 = sequence - // alternation1 = alternation1 '|' item1 | item1 '|' item1 - // - // item2 = simple weight - // alternation2 = alternation2 '|' item2 | item2 - // - // range = { integer , integer } - // weight = integer % - // - // Special-characters: - // (sapce) contact operation, or separators to increase readability - // = definition - // | selection operation - // ( ) precedence select - // ' ' override special-character to plain character - // - ///////////////////////////////////////// - // Completeness vs. Magic: - // The modified-BNF definition of "collation rule" need not be complete. - // It means following assertion is do acceptable: - // o Some variables are undefined. or - // o We cannot get a "collation rule" according the modified-BNF definition. - // Let's explain: - // - // Our target is to build an active object which can generate concrete collation rules. - // - // In order to formalize the generating process, we used modified-BNF to describe it. - // Then, the parser will help us to build an complex active object from basic active objects. - // - // It's acceptable that some basice active object is defined outside the definition and magically injected into. - // - // The magic power is got via empty variable defination. After parser pasing the definition, - // we get a part-defined active object, then we inject some magic active objects to - // change the prat-defined active object to a complete active object. - // - // Following are copied from Java implementation with less modification. - const char * collationBNF = - "$s = ' '? 50%;" - "$crlf = '\r\n';" +//Raymond: Following comments are copied from Java implementation +// +// each rule can be: +// "[" command "]" +// "& [" position "]" +// "&" before chars +// relation "[variable top]" +// relation (chars "|")? chars ("/" chars)? +// plus, a reset must come before a relation - "$alternateOptions = non'-'ignorable | shifted;" - "$onoff = on | off;" - "$caseFirstOptions = off | upper | lower;" - "$strengthOptions = '1' | '2' | '3' | '4' | 'I';" - "$commandList = '['" - " ( alternate ' ' $alternateOptions" - " | backwards' 2'" - " | normalization ' ' $onoff " - " | caseLevel ' ' $onoff " - " | hiraganaQ ' ' $onoff" - " | caseFirst ' ' $caseFirstOptions" - " | strength ' ' $strengthOptions" - " ) ']';" - "$command = $commandList $crlf;" +//Raymond: The grammar of "collation rule" can be defined use a modified-BNF. +// We need a tool to +// 1. Parse the defination and +// 2. Build an active object which can generate concrete collation rules +// +//Rammond: +// The difference between standarad BNF and our modified-BNF is +// 1. Alternation item can has a "weight" now +// 2. Accept "? weight" as a new operation -- short form altheration +// 3. Accept "range" as a new operation -- repeat +// We do not accept any EBNF grammar in our modified-BNF. +// +// Furthermore, the grammar of our modified-BNF itself can be defined using standard BNF +// NOTE: Following characters are treated as literal in the definition +// { } ? $ % , - ; +// +// string = +// alphabeta = +// digit = +// integer = integer digit | digit +// var = var alphabet | var digit | $ alphabet +// +// var-defs = var-defs var-def | var-def +// var-def = var '=' defination; +// +// defination = simple | repeat | short-alt | sequence | alternation1 | alternation2 +// defination = alternation1 | alternation2 +// +// simple = var | string | '(' defination ')' +// repeat = simple range +// short-alt = simple ? | simple ? weight +// +// item = simple | repeat | shor-alt +// sequence = sequence item | item item +// +// item1 = sequence +// alternation1 = alternation1 '|' item1 | item1 '|' item1 +// +// item2 = simple weight +// alternation2 = alternation2 '|' item2 | item2 +// +// range = { integer , integer } +// weight = integer % +// +// Special-characters: +// (sapce) contact operation, or separators to increase readability +// = definition +// | selection operation +// ( ) precedence select +// ' ' override special-character to plain character +// +///////////////////////////////////////// +// Completeness vs. Magic: +// The modified-BNF definition of "collation rule" need not be complete. +// It means following assertion is do acceptable: +// o Some variables are undefined. or +// o We cannot get a "collation rule" according the modified-BNF definition. +// Let's explain: +// +// Our target is to build an active object which can generate concrete collation rules. +// +// In order to formalize the generating process, we used modified-BNF to describe it. +// Then, the parser will help us to build an complex active object from basic active objects. +// +// It's acceptable that some basice active object is defined outside the definition and magically injected into. +// +// The magic power is got via empty variable defination. After parser pasing the definition, +// we get a part-defined active object, then we inject some magic active objects to +// change the prat-defined active object to a complete active object. +// +// Following are copied from Java implementation with less modification. +static const char collationBNF[] = + "$s = ' '? 50%;" + "$crlf = '\r\n';" - "$ignorableTypes = (tertiary | secondary | primary) ' ' ignorable;" - "$allTypes = variable | regular | implicit | trailing | $ignorableTypes;" - "$positionList = '[' (first | last) ' ' $allTypes ']';" + "$alternateOptions = non'-'ignorable | shifted;" + "$onoff = on | off;" + "$caseFirstOptions = off | upper | lower;" + "$strengthOptions = '1' | '2' | '3' | '4' | 'I';" + "$commandList = '['" + " ( alternate ' ' $alternateOptions" + " | backwards' 2'" + " | normalization ' ' $onoff " + " | caseLevel ' ' $onoff " + " | hiraganaQ ' ' $onoff" + " | caseFirst ' ' $caseFirstOptions" + " | strength ' ' $strengthOptions" + " ) ']';" + "$command = $commandList $crlf;" - "$beforeList = '[before ' ('1' | '2' | '3') ']';" + "$ignorableTypes = (tertiary | secondary | primary) ' ' ignorable;" + "$allTypes = variable | regular | implicit | trailing | $ignorableTypes;" + "$positionList = '[' (first | last) ' ' $allTypes ']';" - "$relationList = (" - " '<'" - " | '<<'" - " | ';'" - " | '<<<'" - " | ','" - " | '='" - ");" - "$string = $magic;" - "$rel1 = '[variable top]' $s;" - "$p1 = ($string $s '|' $s)? 25%;" - "$p2 = ('/' $s $string $s)? 25%;" - "$rel2 = $p1 $string $s $p2;" - "$relation = $relationList $s ($rel1 | $rel2) $crlf;" + "$beforeList = '[before ' ('1' | '2' | '3') ']';" - "$reset = '&' $s ($beforeList $s)? 10% ($positionList 1% | $string 10%) $crlf;" - "$mostRules = $command 1% | $reset 5% | $relation 25%;" - "$root = $command{0,5} $reset $mostRules{1,20};" + "$relationList = (" + " '<'" + " | '<<'" + " | ';'" + " | '<<<'" + " | ','" + " | '='" + ");" + "$string = $magic;" + "$rel1 = '[variable top]' $s;" + "$p1 = ($string $s '|' $s)? 25%;" + "$p2 = ('/' $s $string $s)? 25%;" + "$rel2 = $p1 $string $s $p2;" + "$relation = $relationList $s ($rel1 | $rel2) $crlf;" - ; // string end + "$reset = '&' $s ($beforeList $s)? 10% ($positionList 1% | $string 10%) $crlf;" + "$mostRules = $command 1% | $reset 5% | $relation 25%;" + "$root = $command{0,5} $reset $mostRules{1,20};"; - // Document of class LiteralToEscape - // - // ATTENTION: - // From http://icu.sourceforge.net/userguide/Collate_Customization.html. - // We get the precedence of escape/quote operations - // - // (highest) 1. backslash \ - // 2. two single quotes '' - // 3. quoting ' ' - // - // ICU Collation should accept following as the same string. - // - // 1) 'ab'c _ - // 2) a\bc \ - // 3) a'b'\c |- They are equal. - // 4) abc _/ - // - // From "two single quotes", we have following deductions - // D1. empty quoting is illgal. (obviously) - // D2. no contact operation between two quotings - // '.''.' is not .. it is .'. - // D3. "two single quotes" cannot contact two quoting simultaneously - // '..''''.' is not ..'. it is ..''. - // NOTICE: - // "two single quotes" can contact before one quoting - // '''.' is '. - // "two single quotes" can literally contact after one quoting - // But, from syntax, it's one quoting including a "two single quotes" - // '.''' is .' - // D4. "two single quotes" cannot solely be included in quoting - // '''' is not ' it is '' - // NOTICE: These are legal - // '.''.' is .'. - // '.''' is .' - // - // dicision - // /\ - // /__\ - // output buffer input buffer - // - // To make our dicision (within an atom operation) without caring input and output buffer, - // following calling pattern (within an atom operation) shall be avoided - // - // P1 open_quoting() then close_quoting() (direct violation) D1 - // P2 close_quoting() then open_quoting() (direct violation) D2 - // P3 empty open_quoting() (indirect violation) D1, D4 - // P4 empty close_quoting() (indirect violation) D2, D3 - // P5 open_quoting() then two single quotes (indirect violation) D4 - // P6 close_quoting() then two single quotes (indirect violation) D3 - // - // two single quotes escaping will not open_ or close_ quoting() - // The choice will not lose some quoing forms. - // - // For open_quoting(), - // we may get this form quoting ''' P5 - // It may raise a bug ''''x - // If we expect - // '''.' let the next char open the quoting - // '.''.' the quoting is already opened by preceding char - // - // For close_quoting() - // we will get this form quoting '.''' P6 - // It may raise a bug '.''''.' - // If we expect - // '.'''\. let the next char close the quoting - // '.''''.' the expectation is wrong! using '.'\''.' instead - // - // It's a hard work to readjust generation opportunity for various escaping form. - // We just simply ignore it. +// Document of class LiteralToEscape +// +// ATTENTION: +// From http://icu.sourceforge.net/userguide/Collate_Customization.html. +// We get the precedence of escape/quote operations +// +// (highest) 1. backslash \ +// 2. two single quotes '' +// 3. quoting ' ' +// +// ICU Collation should accept following as the same string. +// +// 1) 'ab'c _ +// 2) a\bc \ +// 3) a'b'\c |- They are equal. +// 4) abc _/ +// +// From "two single quotes", we have following deductions +// D1. empty quoting is illgal. (obviously) +// D2. no contact operation between two quotings +// '.''.' is not .. it is .'. +// D3. "two single quotes" cannot contact two quoting simultaneously +// '..''''.' is not ..'. it is ..''. +// NOTICE: +// "two single quotes" can contact before one quoting +// '''.' is '. +// "two single quotes" can literally contact after one quoting +// But, from syntax, it's one quoting including a "two single quotes" +// '.''' is .' +// D4. "two single quotes" cannot solely be included in quoting +// '''' is not ' it is '' +// NOTICE: These are legal +// '.''.' is .'. +// '.''' is .' +// +// dicision +// /\ +// /__\ +// output buffer input buffer +// +// To make our dicision (within an atom operation) without caring input and output buffer, +// following calling pattern (within an atom operation) shall be avoided +// +// P1 open_quoting() then close_quoting() (direct violation) D1 +// P2 close_quoting() then open_quoting() (direct violation) D2 +// P3 empty open_quoting() (indirect violation) D1, D4 +// P4 empty close_quoting() (indirect violation) D2, D3 +// P5 open_quoting() then two single quotes (indirect violation) D4 +// P6 close_quoting() then two single quotes (indirect violation) D3 +// +// two single quotes escaping will not open_ or close_ quoting() +// The choice will not lose some quoing forms. +// +// For open_quoting(), +// we may get this form quoting ''' P5 +// It may raise a bug ''''x +// If we expect +// '''.' let the next char open the quoting +// '.''.' the quoting is already opened by preceding char +// +// For close_quoting() +// we will get this form quoting '.''' P6 +// It may raise a bug '.''''.' +// If we expect +// '.'''\. let the next char close the quoting +// '.''''.' the expectation is wrong! using '.'\''.' instead +// +// It's a hard work to readjust generation opportunity for various escaping form. +// We just simply ignore it. -}// namespace anonymous.design - -namespace {//anonymous.parser.code - static const char DIGIT_CHAR[] = "0123456789"; - static const char WHITE_SPACE[] = {'\t', ' ', '\r', '\n', 0}; - static const char ALPHABET[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; - inline bool isInList(const char c /*in*/, const char list[] /*in*/){ - const char * p = list; - for (;*p != 0 && *p != c; p++); - return *p?true:false; +static const char DIGIT_CHAR[] = "0123456789"; +static const char WHITE_SPACE[] = {'\t', ' ', '\r', '\n', 0}; +static const char ALPHABET[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; + +static inline UBool isInList(const char c /*in*/, const char list[] /*in*/){ + const char * p = list; + for (;*p != 0 && *p != c; p++); + return *p?TRUE:FALSE; +} +static inline UBool isDigit(char c) {return isInList(c, DIGIT_CHAR);} +static inline UBool isWhiteSpace(char c) {return isInList(c, WHITE_SPACE);} +static inline UBool isAlphabet(char c) {return isInList(c, ALPHABET);} +static inline UBool isSpecialAsciiChar(char c) { + return (c >= 0x0021 && c <= 0x007E && + !((c >= 0x0030/*'0'*/ && c <= 0x0039/*'9'*/) || + (c >= 0x0041/*'A'*/ && c <= 0x005A/*'Z'*/) || + (c >= 0x0061/*'a'*/ && c <= 0x007A/*'z'*/))); +} + +// Utility class, can be treated as an auto expanded array. no boundary check. +class Buffer_byte{ + typedef char byte; + byte * start; + byte * current; + int buffer_size; // size unit is byte + + inline void expand(int add_size = 100){ // size unit is byte + int new_size = buffer_size + add_size; + + int cs_snap = content_size(); + start = (byte *) realloc(start, new_size); // may change the value of start + current = start + cs_snap; + + memset(current, 0, add_size); + buffer_size = new_size; } - inline bool isDigit(char c) {return isInList(c, DIGIT_CHAR);} - inline bool isWhiteSpace(char c) {return isInList(c, WHITE_SPACE);} - inline bool isAlphabet(char c) {return isInList(c, ALPHABET);} - inline bool isSpecialAsciiChar(char c) { - return (c >= 0x0021 && c <= 0x007E && - !((c >= 0x0030/*'0'*/ && c <= 0x0039/*'9'*/) || - (c >= 0x0041/*'A'*/ && c <= 0x005A/*'Z'*/) || - (c >= 0x0061/*'a'*/ && c <= 0x007A/*'z'*/))); + + inline void expand_to(int size){ + int r = size - buffer_size; + if (r > 0) { + expand(r); // simply expand, no block alignment + } + } +public: + Buffer_byte():start(NULL),current(start),buffer_size(0){ + expand(); + } + ~Buffer_byte(){ + free(start); } - - // Utility class, can be treated as an auto expanded array. no boundary check. - class Buffer_byte{ - typedef char byte; - byte * start; - byte * current; - int buffer_size; // size unit is byte - inline void expand(int add_size = 100){ // size unit is byte - int new_size = buffer_size + add_size; + int content_size(){return current - start;} // size unit is byte - int cs_snap = content_size(); - start = (byte *) realloc(start, new_size); // may change the value of start - current = start + cs_snap; + inline void reset(){ + start != NULL ? memset(start, 0, buffer_size) : 0; + current = start; + } - memset(current, 0, add_size); - buffer_size = new_size; - } - - inline void expand_to(int size){ - int r = size - buffer_size; - if (r > 0) { - expand(r); // simply expand, no block alignment - } - } - public: - Buffer_byte():start(NULL),current(start),buffer_size(0){ - expand(); - } - ~Buffer_byte(){ - free(start); - } - - int content_size(){return current - start;} // size unit is byte - - inline void reset(){ - start != NULL ? memset(start, 0, buffer_size) : 0; - current = start; - } - - // Using memory copy method to append a C array to buffer, - inline void append(const void * c, int size){ // size unit is byte - expand_to(content_size() + size) ; - memcpy(current, c, size); - current = current + size; - } - void * operator &(){ - return start; - } - }; + // Using memory copy method to append a C array to buffer, + inline void append(const void * c, int size){ // size unit is byte + expand_to(content_size() + size) ; + memcpy(current, c, size); + current = current + size; + } + void * operator &(){ + return start; + } +}; //template // class BUFFER{ @@ -300,1284 +298,1273 @@ class Node; //typedef BUFFER Buffer_char; //typedef BUFFER Buffer_int; //typedef BUFFER Buffer_pNode; - BUFFER(char, Buffer_char); - BUFFER(int, Buffer_int); - BUFFER(Node *, Buffer_pNode); +BUFFER(char, Buffer_char); +BUFFER(int, Buffer_int); +BUFFER(Node *, Buffer_pNode); - /* Helper class - * Encoding a string literal to a valid collation escaping string. - * See documents in anonymous.design - */ - class LiteralToEscape{ +/* Helper class + * Encoding a string literal to a valid collation escaping string. + * See documents in anonymous.design + */ +class LiteralToEscape{ +public: + // Return a null-terminate c-string. The buffer is owned by callee. + char * operator()(const char * literal /*c-string*/){ + str.reset(); + for(;*literal != 0; literal++){ + append(*literal); + } + close_quoting(); // P4 exception, to close whole quoting + return str; + } + + enum CHOICE {YES, NO, RAND}; + enum ESCAPE_FORM {BSLASH_ONLY, QUOTE_ONLY, QUOTE_AND_BSLAH, RAND_ESC}; + LiteralToEscape(CHOICE escape_literal = RAND, + CHOICE two_quotes_escape = RAND, + ESCAPE_FORM escape_form = RAND_ESC): + escape_literal(escape_literal), + two_quotes_escape(two_quotes_escape), + escape_form(escape_form), + is_quoting(FALSE){} +private: + Buffer_char str; + class Bool{ // assigned or random value public: - // Return a null-terminate c-string. The buffer is owned by callee. - char * operator()(const char * literal /*c-string*/){ - str.reset(); - for(;*literal != 0; literal++){ - append(*literal); + operator UBool() { // conversion operator + if (tag == RAND){ + return rand()%2 == 1; + } else { + return tag == YES ? TRUE : FALSE; } - close_quoting(); // P4 exception, to close whole quoting - return str; } - - enum CHOICE {YES, NO, RAND}; - enum ESCAPE_FORM {BSLASH_ONLY, QUOTE_ONLY, QUOTE_AND_BSLAH, RAND_ESC}; - LiteralToEscape(CHOICE escape_literal = RAND, - CHOICE two_quotes_escape = RAND, - ESCAPE_FORM escape_form = RAND_ESC): - escape_literal(escape_literal), - two_quotes_escape(two_quotes_escape), - escape_form(escape_form), - is_quoting(false){} + Bool(CHOICE flag=RAND):tag(flag){} private: - Buffer_char str; - class Bool{ // assigned or random value - public: - operator bool() { // conversion operator - if (tag == RAND){ - return rand()%2 == 1; - } else { - return tag == YES ? true : false; - } - } - Bool(CHOICE flag=RAND):tag(flag){} - private: - CHOICE tag; - }; - ESCAPE_FORM escape_form; - bool quote_escape; - bool bslash_escape; - Bool escape_literal; - Bool two_quotes_escape; + CHOICE tag; + }; + ESCAPE_FORM escape_form; + UBool quote_escape; + UBool bslash_escape; + Bool escape_literal; + Bool two_quotes_escape; - void set_options(){ - ESCAPE_FORM t = escape_form == RAND_ESC ? (ESCAPE_FORM) (rand()%3) : escape_form; - switch (t){ - case BSLASH_ONLY : - bslash_escape = true; quote_escape = false; break; - case QUOTE_ONLY: - bslash_escape = false;quote_escape = true; break; - case QUOTE_AND_BSLAH: - bslash_escape = true; quote_escape = true; break; - default: - ;// error - } + void set_options(){ + ESCAPE_FORM t = escape_form == RAND_ESC ? (ESCAPE_FORM) (rand()%3) : escape_form; + switch (t){ + case BSLASH_ONLY : + bslash_escape = TRUE; quote_escape = FALSE; break; + case QUOTE_ONLY: + bslash_escape = FALSE;quote_escape = TRUE; break; + case QUOTE_AND_BSLAH: + bslash_escape = TRUE; quote_escape = TRUE; break; + default: + ;// error } + } - // str [in] null-terminated c-string - void append(const char * str){ - for(;*str != 0; str++){ - append(*str); - } + // str [in] null-terminated c-string + void append(const char * str){ + for(;*str != 0; str++){ + append(*str); } + } - inline void append(const char c){ - set_options(); + inline void append(const char c){ + set_options(); - if (c == '\\'){ + if (c == '\\'){ + quote_escape ? open_quoting() : close_quoting(); + //bslash_escape always true here + str.append('\\'); + str.append('\\'); + } else if (c == '\''){ + if (two_quotes_escape){ // quoted using two single quotes + // See documents in anonymous.design + str.append('\''); + str.append('\''); + } else{ quote_escape ? open_quoting() : close_quoting(); //bslash_escape always true here str.append('\\'); - str.append('\\'); - } else if (c == '\''){ - if (two_quotes_escape){ // quoted using two single quotes - // See documents in anonymous.design - str.append('\''); - str.append('\''); - } else{ - quote_escape ? open_quoting() : close_quoting(); - //bslash_escape always true here - str.append('\\'); - str.append('\''); - } - } else if (isSpecialAsciiChar(c) || isWhiteSpace(c)){ + str.append('\''); + } + } else if (isSpecialAsciiChar(c) || isWhiteSpace(c)){ + quote_escape ? open_quoting() : close_quoting(); + if (bslash_escape) str.append('\\'); + str.append(c); + } else { //if (isAlphabet(c) || isDigit(c) || TRUE){ // treat others as literal + if (escape_literal){ quote_escape ? open_quoting() : close_quoting(); - if (bslash_escape) str.append('\\'); + if (bslash_escape) str.append('\\'); str.append(c); - } else if (isAlphabet(c) || isDigit(c) || true){ // treat others as literal - if (escape_literal){ - quote_escape ? open_quoting() : close_quoting(); - if (bslash_escape) str.append('\\'); - str.append(c); - } else { - close_quoting(); - str.append(c); - } - } - } - - void reset(){ - str.reset(); - is_quoting = false; - } - - bool is_quoting; - inline void open_quoting(){ - if(is_quoting){ - // do nothing } else { - str.append('\''); - is_quoting = true; + close_quoting(); + str.append(c); } } - inline void close_quoting(){ - if(is_quoting){ - str.append('\''); - is_quoting = false; - } else { - // do nothing - } - } - }; - + } - enum TokenType {STRING, VAR, NUMBER, WEIGHT, STREAM_END, ERROR, QUESTION_MARK,RANG_START,RANG_END, LPAR, RPAR, SEMI, EQ, COMMA, BAR}; - - /* A simple complier scanner to get token from source string. - * - * The result is put in this->tokenBuffer - * The buffer is owned by Scanner, and will be destoried in next call for getNextToken() - */ - class Scanner{ - public: - // source [in] null-terminated c-string - Scanner(const char *const source/*c-string*/):source(source), working(source), history(source){ - } + void reset(){ + str.reset(); + is_quoting = FALSE; + } - char tokenBuffer[50]; //null terminated c-string. LIMITATION & ASSUMPTION here - TokenType tokenType; - - /* this->working [in] - * this->tokenBuffer [out] - * this->tokenType [out] - */ - TokenType getNextToken(){ - history = working; - p_b = tokenBuffer; // for simplicity, no buffer overflow will be checked - tokenType = ERROR; - StateType state = START; - while (state != DONE){ - char c = *working++; - switch(state){ - case START: - if (isWhiteSpace(c)){ - // do nothing, skip - } else if (isDigit(c)){ - *p_b++ = c; // no overflow check - state = IN_NUM; - } else if (isAlphabet(c)){ - *p_b++ = c; // no overflow check - state = IN_STRING; - } else if (c == '$'){ - *p_b++ = c; // no overflow check - state = IN_VAR; - } else if (c == '\''){ - state = IN_QUOTE; - } else if (c == '\\'){ - state = IN_BSLASH; - } else if (c == 0){ - tokenType = STREAM_END; - state = DONE; - working--; - } else{ - switch(c){ - case '?': tokenType = QUESTION_MARK; break; - case '{': tokenType = RANG_START; break; - case '}': tokenType = RANG_END; break; - case '(': tokenType = LPAR; break; - case ')': tokenType = RPAR; break; - case ';': tokenType = SEMI; break; - case '=': tokenType = EQ; break; - case ',': tokenType = COMMA; break; - case '|': tokenType = BAR; break; - default: tokenType = ERROR; - } - //Raymond: Can we gracefully remove the unnecessary test? - // == Can we write a more beautiful 'switch' statement? - if (tokenType == ERROR){ - working--; - *p_b = 0; - } else { - *p_b++ = c; // tokenBuffer[0], no overflow check - *p_b++ = 0; // tokenBuffer[1], no overflow check - } - state = DONE; + UBool is_quoting; + inline void open_quoting(){ + if(is_quoting){ + // do nothing + } else { + str.append('\''); + is_quoting = TRUE; + } + } + inline void close_quoting(){ + if(is_quoting){ + str.append('\''); + is_quoting = FALSE; + } else { + // do nothing + } + } +}; + + +enum TokenType {STRING, VAR, NUMBER, WEIGHT, STREAM_END, ERROR, QUESTION_MARK,RANG_START,RANG_END, LPAR, RPAR, SEMI, EQ, COMMA, BAR}; + +/* A simple complier scanner to get token from source string. + * + * The result is put in this->tokenBuffer + * The buffer is owned by Scanner, and will be destoried in next call for getNextToken() + */ +class Scanner{ +public: + // source [in] null-terminated c-string + Scanner(const char *const source/*c-string*/):source(source), working(source), history(source){ + } + + char tokenBuffer[50]; //null terminated c-string. LIMITATION & ASSUMPTION here + TokenType tokenType; + + /* this->working [in] + * this->tokenBuffer [out] + * this->tokenType [out] + */ + TokenType getNextToken(){ + history = working; + p_b = tokenBuffer; // for simplicity, no buffer overflow will be checked + tokenType = ERROR; + StateType state = START; + while (state != DONE){ + char c = *working++; + switch(state){ + case START: + if (isWhiteSpace(c)){ + // do nothing, skip + } else if (isDigit(c)){ + *p_b++ = c; // no overflow check + state = IN_NUM; + } else if (isAlphabet(c)){ + *p_b++ = c; // no overflow check + state = IN_STRING; + } else if (c == '$'){ + *p_b++ = c; // no overflow check + state = IN_VAR; + } else if (c == '\''){ + state = IN_QUOTE; + } else if (c == '\\'){ + state = IN_BSLASH; + } else if (c == 0){ + tokenType = STREAM_END; + state = DONE; + working--; + } else{ + switch(c){ + case '?': tokenType = QUESTION_MARK; break; + case '{': tokenType = RANG_START; break; + case '}': tokenType = RANG_END; break; + case '(': tokenType = LPAR; break; + case ')': tokenType = RPAR; break; + case ';': tokenType = SEMI; break; + case '=': tokenType = EQ; break; + case ',': tokenType = COMMA; break; + case '|': tokenType = BAR; break; + default: tokenType = ERROR; } - break;//START - case IN_NUM: - if (isDigit(c)){ - *p_b++ = c; // no overflow check - } else if (c == '%'){ // no blank space between NUMBER and % symbol - *p_b++ = c; - *p_b = 0; - tokenType = WEIGHT; - state = DONE; - } else { - working--; // reset working point to current character - tokenType = NUMBER; - *p_b = 0; - state = DONE; - } - break;//IN_NUM - case IN_VAR: - if (isAlphabet(c) || isDigit(c)){ // For simplicity, digit can be the leading char - *p_b++ = c; // no overflow check - } else { + //Raymond: Can we gracefully remove the unnecessary test? + // == Can we write a more beautiful 'switch' statement? + if (tokenType == ERROR){ working--; *p_b = 0; - tokenType = VAR; - state = DONE; - } - break;//IN_VAR - case IN_STRING: - if (c == '\''){ - state = IN_QUOTE; - } else if (c =='\\'){ // NOTE: escaping for C language syntax here - state = IN_BSLASH; - } else if (isAlphabet(c) || isDigit(c)){ - *p_b++ = c; // no overflow check - } else{ - working--; - *p_b = 0; - tokenType = STRING; - state = DONE; - } - break;//IN_STRING - case IN_QUOTE: - if (c == '\''){ - state = IN_STRING; // Yes, IN_STRING } else { - *p_b++ = c; // no tokenBuffer overflow check !!! + *p_b++ = c; // tokenBuffer[0], no overflow check + *p_b++ = 0; // tokenBuffer[1], no overflow check } - break;//IN_QUOTE - case IN_BSLASH: - if (c == 'n') { - *p_b++ = '\n'; // no tokenBuffer overflow check - } else if (c == 'r'){ - *p_b++ = '\r'; // no tokenBuffer overflow check - } else if (c == 't'){ - *p_b++ = '\t'; // no tokenBuffer overflow check - } else if (c == '\''){ // NOTE: escaping for C language syntax here - *p_b++ = '\''; // no tokenBuffer overflow check - } else { - working--; - } - state = IN_STRING; // Yes, IN_STRING - break;//IN_BSLASH - case DONE: /* should never happen */ - default: + state = DONE; + } + break;//START + case IN_NUM: + if (isDigit(c)){ + *p_b++ = c; // no overflow check + } else if (c == '%'){ // no blank space between NUMBER and % symbol + *p_b++ = c; + *p_b = 0; + tokenType = WEIGHT; + state = DONE; + } else { + working--; // reset working point to current character + tokenType = NUMBER; + *p_b = 0; + state = DONE; + } + break;//IN_NUM + case IN_VAR: + if (isAlphabet(c) || isDigit(c)){ // For simplicity, digit can be the leading char + *p_b++ = c; // no overflow check + } else { working--; *p_b = 0; - tokenType = ERROR; + tokenType = VAR; state = DONE; - break; - }//switch(state) - }//while (state != DONE) + } + break;//IN_VAR + case IN_STRING: + if (c == '\''){ + state = IN_QUOTE; + } else if (c =='\\'){ // NOTE: escaping for C language syntax here + state = IN_BSLASH; + } else if (isAlphabet(c) || isDigit(c)){ + *p_b++ = c; // no overflow check + } else{ + working--; + *p_b = 0; + tokenType = STRING; + state = DONE; + } + break;//IN_STRING + case IN_QUOTE: + if (c == '\''){ + state = IN_STRING; // Yes, IN_STRING + } else { + *p_b++ = c; // no tokenBuffer overflow check !!! + } + break;//IN_QUOTE + case IN_BSLASH: + if (c == 'n') { + *p_b++ = '\n'; // no tokenBuffer overflow check + } else if (c == 'r'){ + *p_b++ = '\r'; // no tokenBuffer overflow check + } else if (c == 't'){ + *p_b++ = '\t'; // no tokenBuffer overflow check + } else if (c == '\''){ // NOTE: escaping for C language syntax here + *p_b++ = '\''; // no tokenBuffer overflow check + } else { + working--; + } + state = IN_STRING; // Yes, IN_STRING + break;//IN_BSLASH + case DONE: /* should never happen */ + default: + working--; + *p_b = 0; + tokenType = ERROR; + state = DONE; + break; + }//switch(state) + }//while (state != DONE) - return tokenType; - } + return tokenType; + } - inline bool ungetToken(){ - working = history; - } - inline void dumpCurrentPoint(){ - printf("\n______________________________________________________________________________\n"); - fwrite(source, history - source, 1, stdout); - printf("\n=====current token=====\n"); - fwrite(history, working - history, 1,stdout); - printf("\n>>>>>current point>>>>>\n"); - //printf(working); // This function will consume some characters, for example % - int len = strlen(working); - fwrite(working, len, 1, stdout); - printf("\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n"); - } - private: - const char *const source; - const char * history; - const char * working; - char * p_b; - enum StateType {START, IN_NUM, IN_VAR, IN_QUOTE, IN_BSLASH, IN_STRING, DONE}; - };//class Scanner - + inline UBool ungetToken(){ + working = history; + } + inline void dumpCurrentPoint(){ + printf("\n______________________________________________________________________________\n"); + fwrite(source, history - source, 1, stdout); + printf("\n=====current token=====\n"); + fwrite(history, working - history, 1,stdout); + printf("\n>>>>>current point>>>>>\n"); + //printf(working); // This function will consume some characters, for example % + int len = strlen(working); + fwrite(working, len, 1, stdout); + printf("\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n"); + } +private: + const char *const source; + const char * history; + const char * working; + char * p_b; + enum StateType {START, IN_NUM, IN_VAR, IN_QUOTE, IN_BSLASH, IN_STRING, DONE}; +};//class Scanner - class Node{ - public: - // Return a null-terminated c-string. The buffer is owned by callee. - virtual const char* getTargetString() = 0; - virtual ~Node(){}; - }; - /* Helper class. - * It's a mapping table between 'variable name' and its 'active Node object' - */ - class SymbolTable{ - public: - bool is_var_exist(const char *const var_name /*c-string*/){ - return get_var_name_index(var_name) == -1? false : true; +class Node{ +public: + // Return a null-terminated c-string. The buffer is owned by callee. + virtual const char* getTargetString() = 0; + virtual ~Node(){}; +}; + +/* Helper class. + * It's a mapping table between 'variable name' and its 'active Node object' + */ +class NodeSymbolTable{ +public: + UBool is_var_exist(const char *const var_name /*c-string*/){ + return get_var_name_index(var_name) == -1? FALSE : TRUE; + } + UBool does_var_has_ref(const char *const var_name /*c-string*/){ + int i = get_var_name_index(var_name); + if (i == -1){ + return FALSE; + } else { + return refs[i] == NULL ? FALSE : TRUE; } - bool does_var_has_ref(const char *const var_name /*c-string*/){ - int i = get_var_name_index(var_name); - if (i == -1){ - return false; + } + Node * get_var_ref(const char *const var_name /*c-string*/){ + int i = get_var_name_index(var_name); + if (i == -1){ + printf("name NOT exist: %s\n", var_name); + return NULL; + } else { + if (refs[i]){ + //printf("name and ref exist: %s\n", var_name); } else { - return refs[i] == NULL ? false : true; - } - } - Node * get_var_ref(const char *const var_name /*c-string*/){ - int i = get_var_name_index(var_name); - if (i == -1){ - printf("name NOT exist: %s\n", var_name); - return NULL; - } else { - if (refs[i]){ - //printf("name and ref exist: %s\n", var_name); - } else { - printf("name exist, ref NOT exist: %s\n", var_name); - } - return refs[i]; + printf("name exist, ref NOT exist: %s\n", var_name); } + return refs[i]; } + } - void put_var(const char *const var_name, Node *const var_ref = NULL){ - int i = get_var_name_index(var_name); - if (i == -1 && var_name !=NULL){ // new variable - int offset = name_buffer.content_size(); - name_buffer.append_array(var_name, strlen(var_name) + 1); - names.append(offset); - refs.append(var_ref); - } else { - if(refs[i] == NULL && var_ref != NULL){ // exist variable, no ref - refs[i] = var_ref; // link definition with variable - }; - } + void put_var(const char *const var_name, Node *const var_ref = NULL){ + int i = get_var_name_index(var_name); + if (i == -1 && var_name !=NULL){ // new variable + int offset = name_buffer.content_size(); + name_buffer.append_array(var_name, strlen(var_name) + 1); + names.append(offset); + refs.append(var_ref); + } else { + if(refs[i] == NULL && var_ref != NULL){ // exist variable, no ref + refs[i] = var_ref; // link definition with variable + }; } - void reset(){ - names.reset(); - name_buffer.reset(); + } + void reset(){ + names.reset(); + name_buffer.reset(); - // release memory here - int s = refs.content_size(); - for (int i=0; i < s; i++){ - delete refs[i]; + // release memory here + int s = refs.content_size(); + for (int i=0; i < s; i++){ + delete refs[i]; + } + refs.reset(); + } +private: + Buffer_int names; // indexes in name_buffer + Buffer_pNode refs; + Buffer_char name_buffer; // var names storage space + int get_var_name_index(const char *const var_name){ + int len = names.content_size(); + for (int i=0; i< len; i++){ + if (strcmp(var_name, &name_buffer + names[i]) == 0){ + return i; } - refs.reset(); } - private: - Buffer_int names; // indexes in name_buffer - Buffer_pNode refs; - Buffer_char name_buffer; // var names storage space - int get_var_name_index(const char *const var_name){ - int len = names.content_size(); - for (int i=0; i< len; i++){ - if (strcmp(var_name, &name_buffer + names[i]) == 0){ - return i; - } - } - return -1; - } - }; - - - class LiteralNode : public Node { - public: - virtual const char* getTargetString(){ - return str; - } - LiteralNode(const char * s /*c-string*/){ - str.append_array(s, strlen(s) + 1); - } - private: - Buffer_char str; //null-terminated c-string - }; + return -1; + } +}; - class VariableNode : public Node { - public: - virtual const char* getTargetString(){ - link(); - if (var_ref == NULL) { - return ""; // constant string has global life-cycle - } - return var_ref->getTargetString(); - } - VariableNode(const char * var_name, SymbolTable * symbols):symbols(*symbols){ - this->var_name.append_array(var_name, strlen(var_name) + 1); - this->var_ref = NULL; - } - bool link(){ - if (var_ref == NULL) { - var_ref = &symbols == NULL ? NULL : symbols.get_var_ref(var_name); - return var_ref != NULL; - } - return true; - } - private: - Buffer_char var_name; - Node * var_ref; - SymbolTable & symbols; - }; - - class Magic_SelectOneChar : public Node{ - public: - virtual const char* getTargetString(){ - return &set + rand() % len; - } - Magic_SelectOneChar( const char * set /*char set*/): len(strlen(set)){ - this->set.append_array(set, len); - } - private: - Buffer_char set; // Buffer ??? - const int len; - }; - - class MagicNode : public Node { - public: - virtual const char* getTargetString(){ - return "aaa"; - return l(select_an_string()); - } - private: - LiteralToEscape l; - Buffer_char str; - // compose a string with lenght {1, 5} - const char * select_an_string(){ - int r = rand(); - r %= 5; - r += 1; // shift 0..4 to 1..5 +class LiteralNode : public Node { +public: + virtual const char* getTargetString(){ + return str; + } + LiteralNode(const char * s /*c-string*/){ + str.append_array(s, strlen(s) + 1); + } +private: + Buffer_char str; //null-terminated c-string +}; - str.reset(); - for (int i=0; i < r; i++){ - str.append(select_an_char()); - } - str.append(0); - return &str; +class VariableNode : public Node { +public: + virtual const char* getTargetString(){ + link(); + if (var_ref == NULL) { + return ""; // constant string has global life-cycle } - // randomly select a char from a set - char select_an_char(){ - static const char *const set = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ[]&<"; - static const int len = strlen(set); - int i = rand()%len; - return set[i]; - } - }; - - class SequenceNode : public Node { - public: - virtual const char* getTargetString(){ - str.reset(); - int l = items.content_size(); - for(int i=0; i < l; i++){ - const char * temp = items[i]->getTargetString(); - str.append_array(temp, strlen(temp)); - } - str.append(0); // terminal null - return str; + return var_ref->getTargetString(); + } + VariableNode(const char * var_name, NodeSymbolTable * symbols):symbols(*symbols){ + this->var_name.append_array(var_name, strlen(var_name) + 1); + this->var_ref = NULL; + } + UBool link(){ + if (var_ref == NULL) { + var_ref = &symbols == NULL ? NULL : symbols.get_var_ref(var_name); + return var_ref != NULL; } + return TRUE; + } +private: + Buffer_char var_name; + Node * var_ref; + NodeSymbolTable & symbols; +}; - void append (Node * node){ - items.append(node); - } +class Magic_SelectOneChar : public Node{ +public: + virtual const char* getTargetString(){ + return &set + rand() % len; + } - virtual ~SequenceNode(){ - int l = items.content_size(); - for(int i=0; i < l; i++){ - //How can assure the item is got from heap? - //Let's assume it. - delete items[i]; - } - } - private: - Buffer_pNode items; - Buffer_char str; //null-terminated c-string - }; + Magic_SelectOneChar( const char * set /*char set*/): len(strlen(set)){ + this->set.append_array(set, len); + } +private: + Buffer_char set; + const int len; +}; - class RepeatNode : public Node { - public: - virtual const char* getTargetString(){ - str.reset(); - for(int i=0; i< select_a_count(); i++){ - const char * temp = item->getTargetString(); - str.append_array(temp, strlen(temp)); - } - str.append(0); - return str; - } +class MagicNode : public Node { +public: + virtual const char* getTargetString(){ + return "aaa"; + return l(select_an_string()); + } +private: + LiteralToEscape l; + Buffer_char str; + // compose a string with lenght {1, 5} + const char * select_an_string(){ + int r = rand(); + r %= 5; + r += 1; // shift 0..4 to 1..5 - RepeatNode(Node * item, int min_count =0, int max_count = 1){ - this->item = item; - this->min_count = min_count; - this->max_count = max_count; + str.reset(); + for (int i=0; i < r; i++){ + str.append(select_an_char()); } - virtual ~RepeatNode(){ - delete item; // We assume its space is got from heap - } - private: - Node * item; - Buffer_char str; - int min_count; - int max_count; - int select_a_count(){ - int t = max_count - min_count + 1; - return min_count + rand()%(t); - } - }; - class AlternationNode : public Node { - public: - virtual const char* getTargetString(){ - str.reset(); - int i = select_an_item(); + str.append(0); + return &str; + } + // randomly select a char from a set + char select_an_char(){ + static const char *const set = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ[]&<"; + static const int len = strlen(set); + int i = rand()%len; + return set[i]; + } +}; + +class SequenceNode : public Node { +public: + virtual const char* getTargetString(){ + str.reset(); + int l = items.content_size(); + for(int i=0; i < l; i++){ const char * temp = items[i]->getTargetString(); str.append_array(temp, strlen(temp)); - str.append(0); - return str; } - virtual ~AlternationNode(){ - int l = items.content_size(); - for(int i=0; i < l; i++){ - delete items[i]; // We assume its space is got from heap - } - } - protected: - Buffer_pNode items; - private: - Buffer_char str; // null-terminated c-string - // Select an item randomly and add it to target string - virtual int select_an_item() = 0; - }; + str.append(0); // terminal null + return str; + } - class Alternation1Node : public AlternationNode{ - public: - void append (Node * node){ - items.append(node); - } - private: - int select_an_item(){ - int entries = items.content_size(); - int i = rand()%entries; - return i; - } - }; + void append (Node * node){ + items.append(node); + } - class Alternation2Node : public AlternationNode{ - public: - void append (Node * node, int weight){ - items.append(node); - weights.append(weight); - total += weight; + virtual ~SequenceNode(){ + int l = items.content_size(); + for(int i=0; i < l; i++){ + //How can assure the item is got from heap? + //Let's assume it. + delete items[i]; } - Alternation2Node():total(0){} - private: - Buffer_int weights; - double total; - - // Select an item randomly. Hight weight item has more chance to be selected. - // - // +____+_+___+______+ <- total weight - // ^mark \__ one item - // - // We use following method to select an item. - // 1.locate a point in total weight randomly --> mark - // every weight has equal chance to be select - // 2.mark can identify an item --> item - // hight weight has more chance to be selected. - // - int select_an_item(){ - double reference_mark = (double)rand()/ (double)RAND_MAX; - double mark = total * reference_mark; - int i=0; - for (; true; i++){ - mark -= weights[i]; - if (mark <= 0) break; - } - return i; + } +private: + Buffer_pNode items; + Buffer_char str; //null-terminated c-string +}; + +class RepeatNode : public Node { +public: + virtual const char* getTargetString(){ + str.reset(); + for(int i=0; i< select_a_count(); i++){ + const char * temp = item->getTargetString(); + str.append_array(temp, strlen(temp)); } - }; + str.append(0); + return str; + } + + RepeatNode(Node * item, int min_count =0, int max_count = 1){ + this->item = item; + this->min_count = min_count; + this->max_count = max_count; + } + virtual ~RepeatNode(){ + delete item; // We assume its space is got from heap + } +private: + Node * item; + Buffer_char str; + int min_count; + int max_count; + int select_a_count(){ + int t = max_count - min_count + 1; + return min_count + rand()%(t); + } +}; +class AlternationNode : public Node { +public: + virtual const char* getTargetString(){ + str.reset(); + int i = select_an_item(); + const char * temp = items[i]->getTargetString(); + str.append_array(temp, strlen(temp)); + str.append(0); + return str; + } + virtual ~AlternationNode(){ + int l = items.content_size(); + for(int i=0; i < l; i++){ + delete items[i]; // We assume its space is got from heap + } + } +protected: + Buffer_pNode items; +private: + Buffer_char str; // null-terminated c-string + // Select an item randomly and add it to target string + virtual int select_an_item() = 0; +}; + +class Alternation1Node : public AlternationNode{ +public: + void append (Node * node){ + items.append(node); + } +private: + int select_an_item(){ + int entries = items.content_size(); + int i = rand()%entries; + return i; + } +}; + +class Alternation2Node : public AlternationNode{ +public: + void append (Node * node, int weight){ + items.append(node); + weights.append(weight); + total += weight; + } + Alternation2Node():total(0){} +private: + Buffer_int weights; + double total; + // Select an item randomly. Hight weight item has more chance to be selected. + // + // +____+_+___+______+ <- total weight + // ^mark \__ one item + // + // We use following method to select an item. + // 1.locate a point in total weight randomly --> mark + // every weight has equal chance to be select + // 2.mark can identify an item --> item + // hight weight has more chance to be selected. + // + int select_an_item(){ + double reference_mark = (double)rand()/ (double)RAND_MAX; + double mark = total * reference_mark; + int i=0; + for (;;){ + mark -= weights[i]; + if (mark <= 0) + break; + i++; + } + return i; + } +}; - class Parser{ - public: - Parser(const char * source, SymbolTable * symbols):s(source), symbols(*symbols){ - } - bool parse(){ - return rules(); - } - private: - Scanner s; - TokenType token; - SymbolTable & symbols; - - bool match(TokenType expected){ - if (token == expected) { - token = s.getNextToken(); - return true; - } else { - //s.dumpCurrentPoint(); - return false; - } - } - bool rules(){ - symbols.reset(); +class Parser{ +public: + Parser(const char * source, NodeSymbolTable * symbols):s(source), symbols(*symbols){ + } + UBool parse(){ + return rules(); + } +private: + Scanner s; + TokenType token; + NodeSymbolTable & symbols; + + UBool match(TokenType expected){ + if (token == expected) { token = s.getNextToken(); - while (rule()){ - } - if (token == STREAM_END){ - return true; - } else { - s.dumpCurrentPoint(); - return false; - } + return TRUE; + } else { + //s.dumpCurrentPoint(); + return FALSE; } + } - bool rule(){ - if (token == VAR){ - Buffer_char name; - name.append_array(s.tokenBuffer, strlen(s.tokenBuffer)); - name.append(0); - match(VAR); + UBool rules(){ + symbols.reset(); + token = s.getNextToken(); + while (rule()){ + } + if (token == STREAM_END){ + return TRUE; + } else { + s.dumpCurrentPoint(); + return FALSE; + } + } - if (match(EQ)){ - Node * t = NULL; - if(defination(t)){ - symbols.put_var(name, t); - return match(SEMI); - } + UBool rule(){ + if (token == VAR){ + Buffer_char name; + name.append_array(s.tokenBuffer, strlen(s.tokenBuffer)); + name.append(0); + match(VAR); + + if (match(EQ)){ + Node * t = NULL; + if(defination(t)){ + symbols.put_var(name, t); + return match(SEMI); } } - return false; } + return FALSE; + } - bool defination(Node* &node /*in,out*/){ - if (node != NULL) return false; - //assert node == NULL - if (simple(node)){ - if (token == WEIGHT){ - return alternation2(node); - } else { - return alternation1(node); - } - } - return false; - } - - bool alternation2(Node * &node /*in,out*/){ - if (node == NULL) return false; - //assert node != NULL, and is simple node - - int w; - if (!weight(w)){ - delete node; - node = NULL; - return false; - } - - // Raymond: (For interest and study purpose) - // We accept alternation2 with only one item, although I do think it is meanfull. - // - // Single item alternation2 should equal to "a simple without weight" rather than a short-alt - // - // Another reasone is, we think 'weight' should be owned by alternation2 rather than item2 itself. - - Alternation2Node * t = new Alternation2Node(); - t->append(node, w); - - node = NULL; // Logically, it has nothing - Node * temp = NULL; // We can use 'node' as temp variable, but its name is uncomfortable - - while (token == BAR){ - match(BAR); - if (simple(temp)){ - if (weight(w)){ - t->append(temp, w); - } else { - delete temp; - goto FAIL; - } - temp = NULL; // Logically, it has nothing now - } else { - goto FAIL; - } - } - - if (token == SEMI || token == RPAR){ - node = t; // A whole new node - return true; - } - // for example, this is illegal: a 4% | b 5% c - -FAIL: - delete t; // fall down... - return false; - } - - bool weight(int & w){ + UBool defination(Node* &node /*in,out*/){ + if (node != NULL) return FALSE; + //assert node == NULL + if (simple(node)){ if (token == WEIGHT){ - w = atoi(s.tokenBuffer); - match(WEIGHT); - return true; - } - return false; - } - - bool alternation1(Node * &node){ - if (!sequence(node)){ - return false; - } - - if (token == BAR){ // detected a real alternation1, create it. - return alternation1_open(node); - } else { // just something with higher precedence, not a alternation1 - return true; + return alternation2(node); + } else { + return alternation1(node); } } + return FALSE; + } - bool alternation1_open(Node * &node){ - if (node == NULL) return false; - // assert node != NULL, and node is sequence or simpler thing + UBool alternation2(Node * &node /*in,out*/){ + if (node == NULL) return FALSE; + //assert node != NULL, and is simple node - Alternation1Node * t = new Alternation1Node(); - t->append(node); + int w; + if (!weight(w)){ + delete node; + node = NULL; + return FALSE; + } + + // Raymond: (For interest and study purpose) + // We accept alternation2 with only one item, although I do think it is meanfull. + // + // Single item alternation2 should equal to "a simple without weight" rather than a short-alt + // + // Another reasone is, we think 'weight' should be owned by alternation2 rather than item2 itself. - node = NULL; // Logically, it has nothing - Node * temp = NULL; // We can use 'node' as temp variable, but its name is uncomfortable + Alternation2Node * t = new Alternation2Node(); + t->append(node, w); - // We can use either recursion (linking node) or loop (plain array) to create the list - // Here, we chosse loop (plain array). - while (token == BAR){ - match(BAR); - if(sequence(temp)){ - t->append(temp); - temp = NULL; + node = NULL; // Logically, it has nothing + Node * temp = NULL; // We can use 'node' as temp variable, but its name is uncomfortable + + while (token == BAR){ + match(BAR); + if (simple(temp)){ + if (weight(w)){ + t->append(temp, w); } else { + delete temp; goto FAIL; } + temp = NULL; // Logically, it has nothing now + } else { + goto FAIL; } + } + + if (token == SEMI || token == RPAR){ + node = t; // A whole new node + return TRUE; + } + // for example, this is illegal: a 4% | b 5% c - if (token == SEMI || token == RPAR){ - node = t; - return true; - } FAIL: - delete t; - return false; + delete t; // fall down... + return FALSE; + } + + UBool weight(int & w){ + if (token == WEIGHT){ + w = atoi(s.tokenBuffer); + match(WEIGHT); + return TRUE; + } + return FALSE; + } + + UBool alternation1(Node * &node){ + if (!sequence(node)){ + return FALSE; } + if (token == BAR){ // detected a real alternation1, create it. + return alternation1_open(node); + } else { // just something with higher precedence, not a alternation1 + return TRUE; + } + } - bool sequence(Node* &node){ - if (!item(node)) { - return false; - } + UBool alternation1_open(Node * &node){ + if (node == NULL) return FALSE; + // assert node != NULL, and node is sequence or simpler thing - if (token == VAR || token == STRING || token == LPAR){ // maybe an item - return sequence_open(node); - } else { // just something with higher precedence. - return true; + Alternation1Node * t = new Alternation1Node(); + t->append(node); + + node = NULL; // Logically, it has nothing + Node * temp = NULL; // We can use 'node' as temp variable, but its name is uncomfortable + + // We can use either recursion (linking node) or loop (plain array) to create the list + // Here, we chosse loop (plain array). + while (token == BAR){ + match(BAR); + if(sequence(temp)){ + t->append(temp); + temp = NULL; + } else { + goto FAIL; } } - bool sequence_open(Node* &node){ - if (node == NULL) return false; - // assert node != NULL, and node is item (simple, repeat, or short-alt) - - SequenceNode* t = new SequenceNode(); - t->append(node); - - node = NULL; // Logically, it has nothing - Node * temp = NULL; // We can use 'node' as temp variable, but its name is uncomfortable - - while (token == VAR || token == STRING || token == LPAR){ // maybe a simple - if (item(temp)){ - t->append(temp); - temp = NULL; - } else { - goto FAIL; - } - } - // ILLEGAL: a c 5% - if (token == SEMI || token == RPAR || token == BAR){ - node = t; - return true; - } + if (token == SEMI || token == RPAR){ + node = t; + return TRUE; + } FAIL: - delete t; - return false; + delete t; + return FALSE; + } + + UBool sequence(Node* &node){ + if (!item(node)) { + return FALSE; } - bool item(Node *& node /*out*/){ - if (node != NULL){ - // assert node is simple + if (token == VAR || token == STRING || token == LPAR){ // maybe an item + return sequence_open(node); + } else { // just something with higher precedence. + return TRUE; + } + } + + UBool sequence_open(Node* &node){ + if (node == NULL) return FALSE; + // assert node != NULL, and node is item (simple, repeat, or short-alt) + + SequenceNode* t = new SequenceNode(); + t->append(node); + + node = NULL; // Logically, it has nothing + Node * temp = NULL; // We can use 'node' as temp variable, but its name is uncomfortable + + while (token == VAR || token == STRING || token == LPAR){ // maybe a simple + if (item(temp)){ + t->append(temp); + temp = NULL; + } else { + goto FAIL; + } + } + // ILLEGAL: a c 5% + if (token == SEMI || token == RPAR || token == BAR){ + node = t; + return TRUE; + } +FAIL: + delete t; + return FALSE; + + } + + UBool item(Node *& node /*out*/){ + if (node != NULL){ + // assert node is simple + // go on + } else { + if (simple(node)){ // go on } else { - if (simple(node)){ - // go on - } else { - return false; + return FALSE; + } + } + + // assert node != NULL, node is simple + switch (token){ + case RANG_START: + return repeat(node); + case QUESTION_MARK: + return short_alt(node); + default: + return TRUE; // bare simple + } + } + + + // get a 'simple node' + UBool simple(Node* &node /*out*/){ + if (node != NULL) return FALSE; + //assert node == NULL + switch(token){ + case LPAR: + match(LPAR); + if(defination(node) && match(RPAR)){ + return TRUE; } - } - - // assert node != NULL, node is simple - switch (token){ - case RANG_START: - return repeat(node); - case QUESTION_MARK: - return short_alt(node); - default: - return true; // bare simple - } + return FALSE; + case VAR: + node = new VariableNode(s.tokenBuffer, &symbols); + match(VAR); + return TRUE; + case STRING: + node = new LiteralNode(s.tokenBuffer); + match(STRING); + return TRUE; + default: + return FALSE; } + } + //upgrade a 'simple node' to 'repeat node' + UBool repeat (Node* &node /*in,out*/){ + if (node == NULL) return FALSE; + //assert node != NULL, node is simple - // get a 'simple node' - bool simple(Node* &node /*out*/){ - if (node != NULL) return false; - //assert node == NULL - switch(token){ - case LPAR: - match(LPAR); - if(defination(node) && match(RPAR)){ - return true; - } - return false; - case VAR: - node = new VariableNode(s.tokenBuffer, &symbols); - match(VAR); - return true; - case STRING: - node = new LiteralNode(s.tokenBuffer); - match(STRING); - return true; - default: - return false; - } - } - - //upgrade a 'simple node' to 'repeat node' - bool repeat (Node* &node /*in,out*/){ - if (node == NULL) return false; - //assert node != NULL, node is simple - - if (match(RANG_START) && token == NUMBER){ - int min = atoi(s.tokenBuffer); + if (match(RANG_START) && token == NUMBER){ + int min = atoi(s.tokenBuffer); + match(NUMBER); + if(match(COMMA) && token == NUMBER){ + int max = atoi(s.tokenBuffer); match(NUMBER); - if(match(COMMA) && token == NUMBER){ - int max = atoi(s.tokenBuffer); - match(NUMBER); - if(match(RANG_END)){ - Node * t = node; - node = new RepeatNode(t, min, max); - return true; - } + if(match(RANG_END)){ + Node * t = node; + node = new RepeatNode(t, min, max); + return TRUE; } } - delete node; - node = NULL; - return false; } + delete node; + node = NULL; + return FALSE; + } - //upgrade a 'simple node' to 'short-alt node' - bool short_alt (Node* &node /*in,out*/){ - if (node == NULL) return false; - //assert node != NULL, node is simple + //upgrade a 'simple node' to 'short-alt node' + UBool short_alt (Node* &node /*in,out*/){ + if (node == NULL) return FALSE; + //assert node != NULL, node is simple - if (match(QUESTION_MARK)){ - int exist_weight = 50; - if (token == WEIGHT){ - exist_weight = atoi(s.tokenBuffer); - match(WEIGHT); - } - int null_weight = 100 - exist_weight; - Node * t1 = node; - Node * t2 = new LiteralNode(""); - Alternation2Node * t = new Alternation2Node(); - t->append(t1, exist_weight); - t->append(t2, null_weight); - node = t; - return true; + if (match(QUESTION_MARK)){ + int exist_weight = 50; + if (token == WEIGHT){ + exist_weight = atoi(s.tokenBuffer); + match(WEIGHT); } - delete node; - node = NULL; - return false; + int null_weight = 100 - exist_weight; + Node * t1 = node; + Node * t2 = new LiteralNode(""); + Alternation2Node * t = new Alternation2Node(); + t->append(t1, exist_weight); + t->append(t2, null_weight); + node = t; + return TRUE; } - }; // class Parser + delete node; + node = NULL; + return FALSE; + } +}; // class Parser + +class RandomLanguageGenerator{ +public: + //NOTE: start cannot be a magic node + RandomLanguageGenerator(const char *const bnf_definition, + const char *const start, + const char *const magic_name = NULL, + Node *const magic_ref = NULL){ + + srand((unsigned)time( NULL )); + // our random sequence is start from here. + // side effect: It's a global C function! + + Parser p(bnf_definition, &symbols); + if (!p.parse()) {return;} // how can we break when encounter error? + root = symbols.get_var_ref(start); + put_magic(magic_name, magic_ref); + } + + void put_magic(const char *const magic_name, Node *const magic_ref){ + symbols.put_var(magic_name, magic_ref); + } + + // Return a null-terminated c-string. The buffer is owned by callee. + const char * get_a_string(){ + return root->getTargetString(); + } +private: + Node * root; + NodeSymbolTable symbols; +}; + +UBool TestScanner(void){ + //const char str1[] = "$root = $command{0,5} $reset $mostRules{1,20};"; + //const char str1_r[][20] = {"$root", "=", "$command", "{", "0", ",", "5", "}", + // "$reset", "$mostRules", "{", "1", ",", "20", "}", ";"}; + + const char str2[] = "$p2 =('\\' $s $string $s)? 25%;"; + const char str2_r[][20] = {"$p2", "=", "(", "\\", "$s", "$string", "$s", ")", "?", "25%", ";"}; + + const char *str = str2; + const char (*str_r)[20] = str2_r; + int tokenNum = sizeof(str2_r)/sizeof(char[20]); + + Scanner t(str); + UBool pass = TRUE; + t.getNextToken(); + int i = 0; + while (pass){ + if (t.tokenType == STREAM_END){ + pass = pass? i == tokenNum : FALSE; + break;//while + } else if (t.tokenType == ERROR){ + pass = FALSE; + break;//while + } else { + pass = strcmp(t.tokenBuffer, str_r[i++]) == 0 ; + t.getNextToken(); + } + } + if (pass){ + printf("TestScanner passed.\n"); + } else { + printf("TestScanner FAILED!!!\n"); + t.dumpCurrentPoint(); + } + return pass; +} + +UBool TestLiteralizer(){ + const char *const str = "This ' A !,z| qq [] .new\tline"; + const char *const str_r = "This \\' A '!,'z'|' qq '[]' '.'new\tline"; + //// + //// :( we must quote our string to following C syntax + //// cannot type the literal here, it makes our code rather human unreadable + //// very very unconformable! + //// + ///* + //*/ + + //const char *const s1 = "ab'c"; + //const char (* s1_r1) [] = { "ab''c", // ab''c + // "ab\\'c", // ab\'c + // };// + ///* + // . '.' \. + // .. \.\. '.'\. '.'\. '..' // '.''.' wrong + //*/ + + //const char *const s2 = "a..'.b"; // a..'.b + //const char (*s2_r) [] = { "a'..''.'b" // a'..''.'b + // ,"a'..\\'.'b" // a'..\'.'b + // ,"a'..'\\''.'b" // a'..'\''.'b + // };// + + //const char *const s3 = "a..\\.b"; // a..\.b + //const char (*s3_r) [] = { "a'..\\\\.'b" // a'..\\.'b + // ,"a'..'\\\\'.'b" // a'..'\\'.'b + // };// + + // // no catact operation, no choice, must be compact + + srand((unsigned)time( NULL )); - class RandomLanguageGenerator{ - public: - //NOTE: start cannot be a magic node - RandomLanguageGenerator(const char *const bnf_definition, - const char *const start, - const char *const magic_name = NULL, - Node *const magic_ref = NULL){ + //LiteralToEscape l(LiteralToEscape::NO, LiteralToEscape::NO, LiteralToEscape::RAND_ESC); + LiteralToEscape l; - srand((unsigned)time( NULL )); - // our random sequence is start from here. - // side effect: It's a global C function! - - Parser p(bnf_definition, &symbols); - if (!p.parse()) {return;} // how can we break when encounter error? - root = symbols.get_var_ref(start); - put_magic(magic_name, magic_ref); - } - - void put_magic(const char *const magic_name, Node *const magic_ref){ - symbols.put_var(magic_name, magic_ref); - } - - // Return a null-terminated c-string. The buffer is owned by callee. - const char * get_a_string(){ - return root->getTargetString(); - } - private: - Node * root; - SymbolTable symbols; - }; - -}//namespace anonymous.parser.code - - -namespace {//anonymous.parser.test - bool TestScanner(void){ - //const char str1[] = "$root = $command{0,5} $reset $mostRules{1,20};"; - //const char str1_r[][20] = {"$root", "=", "$command", "{", "0", ",", "5", "}", - // "$reset", "$mostRules", "{", "1", ",", "20", "}", ";"}; - - const char str2[] = "$p2 =('\\' $s $string $s)? 25%;"; - const char str2_r[][20] = {"$p2", "=", "(", "\\", "$s", "$string", "$s", ")", "?", "25%", ";"}; - - const char *str = str2; - const char (*str_r)[20] = str2_r; - int tokenNum = sizeof(str2_r)/sizeof(char[20]); - - Scanner t(str); - bool pass = true; - t.getNextToken(); - int i = 0; - while (pass){ - if (t.tokenType == STREAM_END){ - pass = pass? i == tokenNum : false; - break;//while - } else if (t.tokenType == ERROR){ - pass = false; - break;//while - } else { - pass = strcmp(t.tokenBuffer, str_r[i++]) == 0 ; - t.getNextToken(); - } - } - if (pass){ - printf("TestScanner passed.\n"); - } else { - printf("TestScanner FAILED!!!\n"); - t.dumpCurrentPoint(); - } - return pass; + printf("\n========TestLiteralier start=======\n"); + printf(str); + printf("\n-----------------------------------\n"); + //printf(r); + for (int i=0; i<10; i++){ + const char * s = l(str); + fwrite(s, strlen(s), 1, stdout); + printf("\n"); } + printf("\n~~~~~~~~TestLiteralier end~~~~~~~~~~\n"); - bool TestLiteralizer(){ - const char *const str = "This ' A !,z| qq [] .new\tline"; - const char *const str_r = "This \\' A '!,'z'|' qq '[]' '.'new\tline"; - //// - //// :( we must quote our string to following C syntax - //// cannot type the literal here, it makes our code rather human unreadable - //// very very unconformable! - //// - ///* - //*/ + // UBool pass = strcmp(str_r,l(str)) == 0; - //const char *const s1 = "ab'c"; - //const char (* s1_r1) [] = { "ab''c", // ab''c - // "ab\\'c", // ab\'c - // };// - ///* - // . '.' \. - // .. \.\. '.'\. '.'\. '..' // '.''.' wrong - //*/ + //if (pass){ + // printf("TestLiteralier passed.\n"); + // } else { + // printf("TestLiteralier FAILED!!!\n"); + // } + // return pass; + return FALSE; +} +UBool TestLiteralNode(){ + const char * s = "test string99."; + LiteralNode n(s); + const char * r = n.getTargetString(); - //const char *const s2 = "a..'.b"; // a..'.b - //const char (*s2_r) [] = { "a'..''.'b" // a'..''.'b - // ,"a'..\\'.'b" // a'..\'.'b - // ,"a'..'\\''.'b" // a'..'\''.'b - // };// + UBool pass = strcmp(s,r) == 0; - //const char *const s3 = "a..\\.b"; // a..\.b - //const char (*s3_r) [] = { "a'..\\\\.'b" // a'..\\.'b - // ,"a'..'\\\\'.'b" // a'..'\\'.'b - // };// - - // // no catact operation, no choice, must be compact - - srand((unsigned)time( NULL )); - - //LiteralToEscape l(LiteralToEscape::NO, LiteralToEscape::NO, LiteralToEscape::RAND_ESC); - LiteralToEscape l; - - printf("\n========TestLiteralier start=======\n"); - printf(str); - printf("\n-----------------------------------\n"); - //printf(r); - for (int i=0; i<10; i++){ - const char * s = l(str); - fwrite(s, strlen(s), 1, stdout); - printf("\n"); - } - printf("\n~~~~~~~~TestLiteralier end~~~~~~~~~~\n"); - - // bool pass = strcmp(str_r,l(str)) == 0; - - //if (pass){ - // printf("TestLiteralier passed.\n"); - // } else { - // printf("TestLiteralier FAILED!!!\n"); - // } - // return pass; - return false; - } - bool TestLiteralNode(){ - const char * s = "test string99."; - LiteralNode n(s); - const char * r = n.getTargetString(); - - bool pass = strcmp(s,r) == 0; - - if (pass){ - printf("TestLiteralNode passed.\n"); - } else { - printf("TestLiteralNode FAILED!!!\n"); - } - return pass; + if (pass){ + printf("TestLiteralNode passed.\n"); + } else { + printf("TestLiteralNode FAILED!!!\n"); } + return pass; +} - bool TestMagicNode(){ - MagicNode n; - - printf("\n========TestMagicNode start=======\n"); - for (int i=0; i < 10 ; i++){ +UBool TestMagicNode(){ + MagicNode n; + + printf("\n========TestMagicNode start=======\n"); + for (int i=0; i < 10 ; i++){ + printf(n.getTargetString()); + printf("\n------------------\n"); + } + printf("\n~~~~~~~~TestMagicNode end~~~~~~~~~~\n"); + return FALSE; +} +UBool TestSequenceNode(){ + SequenceNode n; + LiteralNode * n1 = new LiteralNode("abc "); + LiteralNode * n2 = new LiteralNode(", s"); + n.append(n1); + n.append(n2); + const char * r = n.getTargetString(); + char * s = "abc , s"; + + UBool pass = strcmp(s,r) == 0; + + if (pass){ + printf("TestSequenceNode passed.\n"); + } else { + printf("TestSequenceNode FAILED!!!\n"); + } + return pass; +} + +UBool TestAlternation1Node(){ + srand((unsigned)time( NULL )); + Alternation1Node n; + LiteralNode * a = new LiteralNode("a"); + LiteralNode * b = new LiteralNode("b"); + LiteralNode * c = new LiteralNode("c"); + LiteralNode * d = new LiteralNode("c"); + n.append(a); + n.append(b); + n.append(c); + n.append(d); + printf("\n========= TestAlternation1Node =============\n"); + for(int i=0; i<10; i++){ printf(n.getTargetString()); - printf("\n------------------\n"); - } - printf("\n~~~~~~~~TestMagicNode end~~~~~~~~~~\n"); - return false; + printf("\n"); } - bool TestSequenceNode(){ - SequenceNode n; - LiteralNode * n1 = new LiteralNode("abc "); - LiteralNode * n2 = new LiteralNode(", s"); - n.append(n1); - n.append(n2); - const char * r = n.getTargetString(); - char * s = "abc , s"; - - bool pass = strcmp(s,r) == 0; - - if (pass){ - printf("TestSequenceNode passed.\n"); - } else { - printf("TestSequenceNode FAILED!!!\n"); - } - return pass; - } - - bool TestAlternation1Node(){ - srand((unsigned)time( NULL )); - Alternation1Node n; - LiteralNode * a = new LiteralNode("a"); - LiteralNode * b = new LiteralNode("b"); - LiteralNode * c = new LiteralNode("c"); - LiteralNode * d = new LiteralNode("c"); - n.append(a); - n.append(b); - n.append(c); - n.append(d); - printf("\n========= TestAlternation1Node =============\n"); - for(int i=0; i<10; i++){ - printf(n.getTargetString()); - printf("\n"); - } - printf("~~~~~~~~~ TestAlternation1Node ~~~~~~~~~~~~~\n"); - return false; - } - bool TestAlternation2Node(){ - srand((unsigned)time( NULL )); - Alternation2Node n; - LiteralNode * n1 = new LiteralNode("boy"); - LiteralNode * n2 = new LiteralNode("gggirl"); - n.append(n1,10); - n.append(n2,20); - printf("\n========= TestAlternation2Node = 10, 20 =====\n"); - for(int i=0; i<10; i++){ - printf(n.getTargetString()); - printf("\n"); - } - printf("~~~~~~~~~ TestAlternation2Node ~~~~~~~~~~~~~\n"); - return false; - } - - bool TestRepeatNode(){ - srand((unsigned)time( NULL )); - LiteralNode * n1 = new LiteralNode("abc "); - RepeatNode n(n1, 1, 4); - printf("\n========= TestRepeatNode =============\n"); - for(int i=0; i<10; i++){ - printf(n.getTargetString()); - printf("\n"); - } - printf("~~~~~~~~~ TestRepeatNode ~~~~~~~~~~~~~\n"); - return false; - } - bool TestVariableNode(){ - printf("\n========TestVariableNode===========\n"); - VariableNode n("aaa", NULL); + printf("~~~~~~~~~ TestAlternation1Node ~~~~~~~~~~~~~\n"); + return FALSE; +} +UBool TestAlternation2Node(){ + srand((unsigned)time( NULL )); + Alternation2Node n; + LiteralNode * n1 = new LiteralNode("boy"); + LiteralNode * n2 = new LiteralNode("gggirl"); + n.append(n1,10); + n.append(n2,20); + printf("\n========= TestAlternation2Node = 10, 20 =====\n"); + for(int i=0; i<10; i++){ printf(n.getTargetString()); - printf("\n~~~~~~~~~ TestVariableNode ~~~~~~~~~~~~~\n"); - return false; + printf("\n"); } - bool TestSymbolTable(){ - LiteralNode * n1 = new LiteralNode("uvw"); - LiteralNode * n2 = new LiteralNode("xyz"); - SymbolTable t; - t.put_var("abc", n1); - t.put_var("$aaa", n2); - t.put_var("bbb"); + printf("~~~~~~~~~ TestAlternation2Node ~~~~~~~~~~~~~\n"); + return FALSE; +} - bool pass; - pass = t.is_var_exist("abc"); - pass = pass && t.is_var_exist("$aaa"); - pass = pass && t.is_var_exist("bbb"); - pass = pass && !t.is_var_exist("ccc"); - pass = pass && t.does_var_has_ref("abc"); - pass = pass && t.does_var_has_ref("$aaa"); - pass = pass && !t.does_var_has_ref("bbb"); - pass = pass && !t.does_var_has_ref("zz"); - - t.reset(); - pass = pass && !t.does_var_has_ref("abc"); - if (pass){ - printf("TestSymbolTable passed.\n"); - } else { - printf("TestSymbolTable FAILED!!!\n"); - } - return pass; +UBool TestRepeatNode(){ + srand((unsigned)time( NULL )); + LiteralNode * n1 = new LiteralNode("abc "); + RepeatNode n(n1, 1, 4); + printf("\n========= TestRepeatNode =============\n"); + for(int i=0; i<10; i++){ + printf(n.getTargetString()); + printf("\n"); } + printf("~~~~~~~~~ TestRepeatNode ~~~~~~~~~~~~~\n"); + return FALSE; +} +UBool TestVariableNode(){ + printf("\n========TestVariableNode===========\n"); + VariableNode n("aaa", NULL); + printf(n.getTargetString()); + printf("\n~~~~~~~~~ TestVariableNode ~~~~~~~~~~~~~\n"); + return FALSE; +} +UBool TestSymbolTable(){ + LiteralNode * n1 = new LiteralNode("uvw"); + LiteralNode * n2 = new LiteralNode("xyz"); + NodeSymbolTable t; + t.put_var("abc", n1); + t.put_var("$aaa", n2); + t.put_var("bbb"); - bool TestParser1(){ - const char *const str1 = - "$s = ' ' ? 50%;" - //"$relationList = '<' | '<<' | ';' | '<<<' | ',' | '=';" - "$p1 = ($string $s '|' $s)? 25%;" - "$p2 = ('\\' $s $string $s)? 25%;" - "$rel2 = $p1 $string $s $p2;" - "$relation = $relationList $s ($rel1 | $rel2) $crlf;" - "$command = $commandList $crlf;" - //Raymond: Test code in Java source should be fixed to adapt current syntax - "$reset = '&' $s ($beforeList $s)? 10% ($positionList 100% | $string 10%) $crlf;" - "$mostRules = $command 1% | $reset 5% | $relation 25%;" - "$root = $command{0,5} $reset $mostRules{1,20};" - - //"$x = ($var {1,2}) 3%;" // legal. - //"$x = $var {1,2} 3% | b 4%;" // illegal. 3% - //"$x = $var {1,2} 3%;" // illegal. 3% - //"$m = $c ? 2% 4% | $r 5% | $n 25%;" // should failed at '4%' - //"$a = b ? 2% | c 5%;" // should failed at '5%' - //"$x = A B 5% C 10% | D;" // illegal. 5% - //"$x = aa 45% | bb 5% cc;" // illegal. cc - //"$x = (b 5%) (c 6%);" // legal. - //"$x = (b 5%) c 6%;" // legal? illegal. - //"$x = b 5% (c 6%);" // legal? illegal. - //"$x = b 5% c 6%;" // legal? illegal, should failed at 'c' - //"$x = b 5%;" // legal - //"$x = aa 45% | bb 5% cc;" // should failed at 'cc' - //"$x = a | b | c 4% | d 5%;" // should failed at '4%' - //"$s = ' ' ? 50% abc;" // legal. - ; - SymbolTable symbol_table; - - Parser p(str1, &symbol_table); - - bool pass = p.parse(); - - symbol_table.reset(); - if (pass){ - printf("TestParser passed.\n"); - } else { - printf("TestParser FAILED!!!\n"); - } - return pass; + UBool pass; + pass = t.is_var_exist("abc"); + pass = pass && t.is_var_exist("$aaa"); + pass = pass && t.is_var_exist("bbb"); + pass = pass && !t.is_var_exist("ccc"); + pass = pass && t.does_var_has_ref("abc"); + pass = pass && t.does_var_has_ref("$aaa"); + pass = pass && !t.does_var_has_ref("bbb"); + pass = pass && !t.does_var_has_ref("zz"); + t.reset(); + pass = pass && !t.does_var_has_ref("abc"); + if (pass){ + printf("TestSymbolTable passed.\n"); + } else { + printf("TestSymbolTable FAILED!!!\n"); } - bool TestRandomLanguageGenerator(){ - const char *const def = - "$a = $b;" - "$b = $c;" - "$c = $t;" - "$t = abc z{2,2};" - "$k = a | b | c | d | e | f | g ;" - "$z = a 0% | b 1% | c 10%;" - ; // end of string - const char * s = "abczz"; + return pass; +} +UBool TestParser1(){ + const char *const str1 = + "$s = ' ' ? 50%;" + //"$relationList = '<' | '<<' | ';' | '<<<' | ',' | '=';" + "$p1 = ($string $s '|' $s)? 25%;" + "$p2 = ('\\' $s $string $s)? 25%;" + "$rel2 = $p1 $string $s $p2;" + "$relation = $relationList $s ($rel1 | $rel2) $crlf;" + "$command = $commandList $crlf;" + //Raymond: Test code in Java source should be fixed to adapt current syntax + "$reset = '&' $s ($beforeList $s)? 10% ($positionList 100% | $string 10%) $crlf;" + "$mostRules = $command 1% | $reset 5% | $relation 25%;" + "$root = $command{0,5} $reset $mostRules{1,20};" - //RandomLanguageGenerator g(def, "$a"); - RandomLanguageGenerator g(collationBNF, "$root", "$magic", new MagicNode()); - - printf("\n_________ TestRandomLanguageGenerator _____________\n"); - for (int i= 0; i< 5; i++){ - //for (int j = 0; j < 99999999; j++); - const char * r = g.get_a_string(); - fwrite(r, strlen(r), 1, stdout); - printf("_____________________________________________________\n"); - } - printf("~~~~~~~~~ TestRandomLanguageGenerator ~~~~~~~~~~~~~\n"); - return false; + //"$x = ($var {1,2}) 3%;" // legal. + //"$x = $var {1,2} 3% | b 4%;" // illegal. 3% + //"$x = $var {1,2} 3%;" // illegal. 3% + //"$m = $c ? 2% 4% | $r 5% | $n 25%;" // should failed at '4%' + //"$a = b ? 2% | c 5%;" // should failed at '5%' + //"$x = A B 5% C 10% | D;" // illegal. 5% + //"$x = aa 45% | bb 5% cc;" // illegal. cc + //"$x = (b 5%) (c 6%);" // legal. + //"$x = (b 5%) c 6%;" // legal? illegal. + //"$x = b 5% (c 6%);" // legal? illegal. + //"$x = b 5% c 6%;" // legal? illegal, should failed at 'c' + //"$x = b 5%;" // legal + //"$x = aa 45% | bb 5% cc;" // should failed at 'cc' + //"$x = a | b | c 4% | d 5%;" // should failed at '4%' + //"$s = ' ' ? 50% abc;" // legal. + ; + NodeSymbolTable symbol_table; - ////bool pass = strcmp(s,r) == 0; + Parser p(str1, &symbol_table); - //if (pass){ - // printf("TestRandomLanguageGenerator passed.\n"); - //} else { - // printf("TestRandomLanguageGenerator FAILED!!!\n"); - //} - //return pass; + UBool pass = p.parse(); + + symbol_table.reset(); + if (pass){ + printf("TestParser passed.\n"); + } else { + printf("TestParser FAILED!!!\n"); } + return pass; - void Test2(){ //Raymond: C++ study, Can I keep the name as Test() ?? - TestScanner(); - TestLiteralizer(); - TestLiteralNode(); - TestMagicNode(); - TestSequenceNode(); - TestAlternation1Node(); - TestAlternation2Node(); - TestRepeatNode(); - TestVariableNode(); - TestSymbolTable(); - TestParser1(); - TestRandomLanguageGenerator(); - // How can I test memory leak?? +} +UBool TestRandomLanguageGenerator(){ + const char *const def = + "$a = $b;" + "$b = $c;" + "$c = $t;" + "$t = abc z{2,2};" + "$k = a | b | c | d | e | f | g ;" + "$z = a 0% | b 1% | c 10%;" + ; // end of string + const char * s = "abczz"; + + + //RandomLanguageGenerator g(def, "$a"); + RandomLanguageGenerator g(collationBNF, "$root", "$magic", new MagicNode()); + + printf("\n_________ TestRandomLanguageGenerator _____________\n"); + for (int i= 0; i< 5; i++){ + //for (int j = 0; j < 99999999; j++); + const char * r = g.get_a_string(); + fwrite(r, strlen(r), 1, stdout); + printf("_____________________________________________________\n"); } -}//namespace anonymous.parser.test + printf("~~~~~~~~~ TestRandomLanguageGenerator ~~~~~~~~~~~~~\n"); + return FALSE; + ////UBool pass = strcmp(s,r) == 0; + + //if (pass){ + // printf("TestRandomLanguageGenerator passed.\n"); + //} else { + // printf("TestRandomLanguageGenerator FAILED!!!\n"); + //} + //return pass; +} + +void RandomCollatorTest::Test2(){ + TestScanner(); + TestLiteralizer(); + TestLiteralNode(); + TestMagicNode(); + TestSequenceNode(); + TestAlternation1Node(); + TestAlternation2Node(); + TestRepeatNode(); + TestVariableNode(); + TestSymbolTable(); + TestParser1(); + TestRandomLanguageGenerator(); +} -#include "rndmcoll.h" void RandomCollatorTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* par){ if (exec) logln("TestSuite RandomCollatorTest: "); switch (index) { - case 0: name = "Test"; if (exec) Test(); break; + TESTCASE(0, Test); default: name = ""; break; } } void RandomCollatorTest::Test(){ - //TestRandomLanguageGenerator(); - //return; - - logln("RandomCollatorTest.Test"); - RandomLanguageGenerator test_rule(collationBNF, "$root", "$magic", new MagicNode()); //class TestColltorCompare{ //public: - // bool operator()(Collator &coll, int count = 1000){ + // UBool operator()(Collator &coll, int count = 1000){ // UnicodeString a(test_string.get_a_string()); // UnicodeString b(test_string.get_a_string()); // UnicodeString c(test_string.get_a_string()); @@ -1589,12 +1576,12 @@ void RandomCollatorTest::Test(){ // } // }while(count-- >= 0 ); - // return false; + // return FALSE; // } // TestColltorCompare():test_string("$s = $c{1,8};", "$s", "$c", new Magic_SelectOneChar("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ[]&<")){ // } //private: - // bool check_transitivity(const Collator & coll, const UnicodeString &a, const UnicodeString &b, const UnicodeString &c){ + // UBool check_transitivity(const Collator & coll, const UnicodeString &a, const UnicodeString &b, const UnicodeString &c){ // int ab = coll.compare(a,b), ba = coll.compare(b,a); // int bc = coll.compare(b,c), cb = coll.compare(c,b); // int ca = coll.compare(c,a), ac = coll.compare(a,c); @@ -1612,16 +1599,16 @@ void RandomCollatorTest::Test(){ // ||(bc <=0 && ca <=0 && ba >0) // ||(ca <=0 && ab <=0 && cb >0) // ){ - // return false; + // return FALSE; // } - // return true; + // return TRUE; // } // RandomLanguageGenerator test_string; //} coll_test; - const int CONSTRUCT_RANDOM_COUNT = 10; + static const int CONSTRUCT_RANDOM_COUNT = 10; for (int i=0; i < CONSTRUCT_RANDOM_COUNT; i++){ const char * rule = test_rule.get_a_string(); logln("\n-----------------------------------%d\n",i); @@ -1644,8 +1631,7 @@ void RandomCollatorTest::Test(){ ////} delete c; } - - //Test2(); } +#endif /* #if !UCONFIG_NO_COLLATION */ diff --git a/icu4c/source/test/intltest/rndmcoll.h b/icu4c/source/test/intltest/rndmcoll.h index 862e578872..2a531a9c67 100644 --- a/icu4c/source/test/intltest/rndmcoll.h +++ b/icu4c/source/test/intltest/rndmcoll.h @@ -1,6 +1,6 @@ /******************************************************************** * COPYRIGHT: - * Copyright (c) 2002-2005, International Business Machines Corporation and + * Copyright (c) 2005-2005, International Business Machines Corporation and * others. All Rights Reserved. ********************************************************************/ @@ -22,6 +22,7 @@ public: virtual void runIndexedTest( int32_t index, UBool exec, const char* &name, char* par = NULL ); virtual ~RandomCollatorTest(){} void Test(); + void Test2(); }; #endif /* #if !UCONFIG_NO_COLLATION */