ICU-315 S/390 and EBCDIC updates

X-SVN-Rev: 970
This commit is contained in:
Markus Scherer 2000-03-22 01:57:16 +00:00
parent 54921c0d79
commit 258dbe98d4
10 changed files with 475 additions and 447 deletions

View File

@ -29,292 +29,292 @@
static UCollator *myCollation;
static const UChar DEFAULTRULEARRAY[] =
{
'=', '\'', (UChar)0x200B, '\'', '=', (UChar)0x200C, '=', (UChar)0x200D, '=', (UChar)0x200E, '=', (UChar)0x200F
, '=', (UChar)0x0001, '=', (UChar)0x0002, '=', (UChar)0x0003, '=', (UChar)0x0004
, '=', (UChar)0x0005, '=', (UChar)0x0006, '=', (UChar)0x0007, '=', (UChar)0x0008, '=', '\'', (UChar)0x0009, '\''
, '=', '\'', (UChar)0x000b, '\'', '=', (UChar)0x000e /* vt,, so */
, '=', (UChar)0x000f, '=', '\'', (UChar)0x0010, '\'', '=', (UChar)0x0011, '=', (UChar)0x0012, '=', (UChar)0x0013 /* si, dle, dc1, dc2, dc3 */
, '=', (UChar)0x0014, '=', (UChar)0x0015, '=', (UChar)0x0016, '=', (UChar)0x0017, '=', (UChar)0x0018 /* dc4, nak, syn, etb, can */
, '=', (UChar)0x0019, '=', (UChar)0x001a, '=', (UChar)0x001b, '=', (UChar)0x001c, '=', (UChar)0x001d /* em, sub, esc, fs, gs */
, '=', (UChar)0x001e, '=', (UChar)0x001f, '=', (UChar)0x007f /* rs, us, del */
0x3d, 0x27, (UChar)0x200B, 0x27, 0x3d, (UChar)0x200C, 0x3d, (UChar)0x200D, 0x3d, (UChar)0x200E, 0x3d, (UChar)0x200F
, 0x3d, (UChar)0x0001, 0x3d, (UChar)0x0002, 0x3d, (UChar)0x0003, 0x3d, (UChar)0x0004
, 0x3d, (UChar)0x0005, 0x3d, (UChar)0x0006, 0x3d, (UChar)0x0007, 0x3d, (UChar)0x0008, 0x3d, 0x27, (UChar)0x0009, 0x27
, 0x3d, 0x27, (UChar)0x000b, 0x27, 0x3d, (UChar)0x000e /* vt,, so */
, 0x3d, (UChar)0x000f, 0x3d, 0x27, (UChar)0x0010, 0x27, 0x3d, (UChar)0x0011, 0x3d, (UChar)0x0012, 0x3d, (UChar)0x0013 /* si, dle, dc1, dc2, dc3 */
, 0x3d, (UChar)0x0014, 0x3d, (UChar)0x0015, 0x3d, (UChar)0x0016, 0x3d, (UChar)0x0017, 0x3d, (UChar)0x0018 /* dc4, nak, syn, etb, can */
, 0x3d, (UChar)0x0019, 0x3d, (UChar)0x001a, 0x3d, (UChar)0x001b, 0x3d, (UChar)0x001c, 0x3d, (UChar)0x001d /* em, sub, esc, fs, gs */
, 0x3d, (UChar)0x001e, 0x3d, (UChar)0x001f, 0x3d, (UChar)0x007f /* rs, us, del */
/* ....then the C1 Latin 1 reserved control codes */
, '=', (UChar)0x0080, '=', (UChar)0x0081, '=', (UChar)0x0082, '=', (UChar)0x0083, '=', (UChar)0x0084, '=', (UChar)0x0085
, '=', (UChar)0x0086, '=', (UChar)0x0087, '=', (UChar)0x0088, '=', (UChar)0x0089, '=', (UChar)0x008a, '=', (UChar)0x008b
, '=', (UChar)0x008c, '=', (UChar)0x008d, '=', (UChar)0x008e, '=', (UChar)0x008f, '=', (UChar)0x0090, '=', (UChar)0x0091
, '=', (UChar)0x0092, '=', (UChar)0x0093, '=', (UChar)0x0094, '=', (UChar)0x0095, '=', (UChar)0x0096, '=', (UChar)0x0097
, '=', (UChar)0x0098, '=', (UChar)0x0099, '=', (UChar)0x009a, '=', (UChar)0x009b, '=', (UChar)0x009c, '=', (UChar)0x009d
, '=', (UChar)0x009e, '=', (UChar)0x009f
, 0x3d, (UChar)0x0080, 0x3d, (UChar)0x0081, 0x3d, (UChar)0x0082, 0x3d, (UChar)0x0083, 0x3d, (UChar)0x0084, 0x3d, (UChar)0x0085
, 0x3d, (UChar)0x0086, 0x3d, (UChar)0x0087, 0x3d, (UChar)0x0088, 0x3d, (UChar)0x0089, 0x3d, (UChar)0x008a, 0x3d, (UChar)0x008b
, 0x3d, (UChar)0x008c, 0x3d, (UChar)0x008d, 0x3d, (UChar)0x008e, 0x3d, (UChar)0x008f, 0x3d, (UChar)0x0090, 0x3d, (UChar)0x0091
, 0x3d, (UChar)0x0092, 0x3d, (UChar)0x0093, 0x3d, (UChar)0x0094, 0x3d, (UChar)0x0095, 0x3d, (UChar)0x0096, 0x3d, (UChar)0x0097
, 0x3d, (UChar)0x0098, 0x3d, (UChar)0x0099, 0x3d, (UChar)0x009a, 0x3d, (UChar)0x009b, 0x3d, (UChar)0x009c, 0x3d, (UChar)0x009d
, 0x3d, (UChar)0x009e, 0x3d, (UChar)0x009f
/* IGNORE except for secondary, tertiary difference */
/* Spaces */
, ';', '\'', (UChar)0x0020, '\'', ';', '\'', (UChar)0x00A0, '\'' /* spaces */
, ';', '\'', (UChar)0x2000, '\'', ';', '\'', (UChar)0x2001, '\'', ';', '\'', (UChar)0x2002, '\'', ';', '\'', (UChar)0x2003, '\'', ';', '\'', (UChar)0x2004, '\'' /* spaces */
, ';', '\'', (UChar)0x2005, '\'', ';', '\'', (UChar)0x2006, '\'', ';', '\'', (UChar)0x2007, '\'', ';', '\'', (UChar)0x2008, '\'', ';', '\'', (UChar)0x2009, '\'' /* spaces */
, ';', '\'', (UChar)0x200A, '\'', ';', '\'', (UChar)0x3000, '\'', ';', '\'', (UChar)0xFEFF, '\'' /* spaces */
, ';', '\'', '\r', '\'', ';', '\'', '\t', '\'', ';', '\'', '\n', '\'', ';', '\'', '\f', '\'', ';', '\'', (UChar)0x000b, '\'' /* whitespace */
, 0x3b, 0x27, (UChar)0x0020, 0x27, 0x3b, 0x27, (UChar)0x00A0, 0x27 /* spaces */
, 0x3b, 0x27, (UChar)0x2000, 0x27, 0x3b, 0x27, (UChar)0x2001, 0x27, 0x3b, 0x27, (UChar)0x2002, 0x27, 0x3b, 0x27, (UChar)0x2003, 0x27, 0x3b, 0x27, (UChar)0x2004, 0x27 /* spaces */
, 0x3b, 0x27, (UChar)0x2005, 0x27, 0x3b, 0x27, (UChar)0x2006, 0x27, 0x3b, 0x27, (UChar)0x2007, 0x27, 0x3b, 0x27, (UChar)0x2008, 0x27, 0x3b, 0x27, (UChar)0x2009, 0x27 /* spaces */
, 0x3b, 0x27, (UChar)0x200A, 0x27, 0x3b, 0x27, (UChar)0x3000, 0x27, 0x3b, 0x27, (UChar)0xFEFF, 0x27 /* spaces */
, 0x3b, 0x27, 0xd, 0x27, 0x3b, 0x27, 9, 0x27, 0x3b, 0x27, 0xa, 0x27, 0x3b, 0x27, 0xc, 0x27, 0x3b, 0x27, (UChar)0x000b, 0x27 /* whitespace */
/* Non-spacing accents */
, ';', (UChar)0x0301 /* non-spacing acute accent */
, ';', (UChar)0x0300 /* non-spacing grave accent */
, ';', (UChar)0x0306 /* non-spacing breve accent */
, ';', (UChar)0x0302 /* non-spacing circumflex accent */
, ';', (UChar)0x030c /* non-spacing caron/hacek accent */
, ';', (UChar)0x030a /* non-spacing ring above accent */
, ';', (UChar)0x030d /* non-spacing vertical line above */
, ';', (UChar)0x0308 /* non-spacing diaeresis accent */
, ';', (UChar)0x030b /* non-spacing double acute accent */
, ';', (UChar)0x0303 /* non-spacing tilde accent */
, ';', (UChar)0x0307 /* non-spacing dot above/overdot accent */
, ';', (UChar)0x0304 /* non-spacing macron accent */
, ';', (UChar)0x0337 /* non-spacing short slash overlay (overstruck diacritic) */
, ';', (UChar)0x0327 /* non-spacing cedilla accent */
, ';', (UChar)0x0328 /* non-spacing ogonek accent */
, ';', (UChar)0x0323 /* non-spacing dot-below/underdot accent */
, ';', (UChar)0x0332 /* non-spacing underscore/underline accent */
, 0x3b, (UChar)0x0301 /* non-spacing acute accent */
, 0x3b, (UChar)0x0300 /* non-spacing grave accent */
, 0x3b, (UChar)0x0306 /* non-spacing breve accent */
, 0x3b, (UChar)0x0302 /* non-spacing circumflex accent */
, 0x3b, (UChar)0x030c /* non-spacing caron/hacek accent */
, 0x3b, (UChar)0x030a /* non-spacing ring above accent */
, 0x3b, (UChar)0x030d /* non-spacing vertical line above */
, 0x3b, (UChar)0x0308 /* non-spacing diaeresis accent */
, 0x3b, (UChar)0x030b /* non-spacing double acute accent */
, 0x3b, (UChar)0x0303 /* non-spacing tilde accent */
, 0x3b, (UChar)0x0307 /* non-spacing dot above/overdot accent */
, 0x3b, (UChar)0x0304 /* non-spacing macron accent */
, 0x3b, (UChar)0x0337 /* non-spacing short slash overlay (overstruck diacritic) */
, 0x3b, (UChar)0x0327 /* non-spacing cedilla accent */
, 0x3b, (UChar)0x0328 /* non-spacing ogonek accent */
, 0x3b, (UChar)0x0323 /* non-spacing dot-below/underdot accent */
, 0x3b, (UChar)0x0332 /* non-spacing underscore/underline accent */
/* with the rest of the general diacritical marks in binary order */
, ';', (UChar)0x0305 /* non-spacing overscore/overline */
, ';', (UChar)0x0309 /* non-spacing hook above */
, ';', (UChar)0x030e /* non-spacing double vertical line above */
, ';', (UChar)0x030f /* non-spacing double grave */
, ';', (UChar)0x0310 /* non-spacing chandrabindu */
, ';', (UChar)0x0311 /* non-spacing inverted breve */
, ';', (UChar)0x0312 /* non-spacing turned comma above/cedilla above */
, ';', (UChar)0x0313 /* non-spacing comma above */
, ';', (UChar)0x0314 /* non-spacing reversed comma above */
, ';', (UChar)0x0315 /* non-spacing comma above right */
, ';', (UChar)0x0316 /* non-spacing grave below */
, ';', (UChar)0x0317 /* non-spacing acute below */
, ';', (UChar)0x0318 /* non-spacing left tack below */
, ';', (UChar)0x0319 /* non-spacing tack below */
, ';', (UChar)0x031a /* non-spacing left angle above */
, ';', (UChar)0x031b /* non-spacing horn */
, ';', (UChar)0x031c /* non-spacing left half ring below */
, ';', (UChar)0x031d /* non-spacing up tack below */
, ';', (UChar)0x031e /* non-spacing down tack below */
, ';', (UChar)0x031f /* non-spacing plus sign below */
, ';', (UChar)0x0320 /* non-spacing minus sign below */
, ';', (UChar)0x0321 /* non-spacing palatalized hook below */
, ';', (UChar)0x0322 /* non-spacing retroflex hook below */
, ';', (UChar)0x0324 /* non-spacing double dot below */
, ';', (UChar)0x0325 /* non-spacing ring below */
, ';', (UChar)0x0326 /* non-spacing comma below */
, ';', (UChar)0x0329 /* non-spacing vertical line below */
, ';', (UChar)0x032a /* non-spacing bridge below */
, ';', (UChar)0x032b /* non-spacing inverted double arch below */
, ';', (UChar)0x032c /* non-spacing hacek below */
, ';', (UChar)0x032d /* non-spacing circumflex below */
, ';', (UChar)0x032e /* non-spacing breve below */
, ';', (UChar)0x032f /* non-spacing inverted breve below */
, ';', (UChar)0x0330 /* non-spacing tilde below */
, ';', (UChar)0x0331 /* non-spacing macron below */
, ';', (UChar)0x0333 /* non-spacing double underscore */
, ';', (UChar)0x0334 /* non-spacing tilde overlay */
, ';', (UChar)0x0335 /* non-spacing short bar overlay */
, ';', (UChar)0x0336 /* non-spacing long bar overlay */
, ';', (UChar)0x0338 /* non-spacing long slash overlay */
, ';', (UChar)0x0339 /* non-spacing right half ring below */
, ';', (UChar)0x033a /* non-spacing inverted bridge below */
, ';', (UChar)0x033b /* non-spacing square below */
, ';', (UChar)0x033c /* non-spacing seagull below */
, ';', (UChar)0x033d /* non-spacing x above */
, ';', (UChar)0x033e /* non-spacing vertical tilde */
, ';', (UChar)0x033f /* non-spacing double overscore */
, ';', (UChar)0x0340 /* non-spacing grave tone mark */
, ';', (UChar)0x0341 /* non-spacing acute tone mark */
, ';', (UChar)0x0342, ';', (UChar)0x0343, ';', (UChar)0x0344, ';', (UChar)0x0345, ';', (UChar)0x0360, ';', (UChar)0x0361 /* newer */
, ';', (UChar)0x0483, ';', (UChar)0x0484, ';', (UChar)0x0485, ';', (UChar)0x0486 /* Cyrillic accents */
, 0x3b, (UChar)0x0305 /* non-spacing overscore/overline */
, 0x3b, (UChar)0x0309 /* non-spacing hook above */
, 0x3b, (UChar)0x030e /* non-spacing double vertical line above */
, 0x3b, (UChar)0x030f /* non-spacing double grave */
, 0x3b, (UChar)0x0310 /* non-spacing chandrabindu */
, 0x3b, (UChar)0x0311 /* non-spacing inverted breve */
, 0x3b, (UChar)0x0312 /* non-spacing turned comma above/cedilla above */
, 0x3b, (UChar)0x0313 /* non-spacing comma above */
, 0x3b, (UChar)0x0314 /* non-spacing reversed comma above */
, 0x3b, (UChar)0x0315 /* non-spacing comma above right */
, 0x3b, (UChar)0x0316 /* non-spacing grave below */
, 0x3b, (UChar)0x0317 /* non-spacing acute below */
, 0x3b, (UChar)0x0318 /* non-spacing left tack below */
, 0x3b, (UChar)0x0319 /* non-spacing tack below */
, 0x3b, (UChar)0x031a /* non-spacing left angle above */
, 0x3b, (UChar)0x031b /* non-spacing horn */
, 0x3b, (UChar)0x031c /* non-spacing left half ring below */
, 0x3b, (UChar)0x031d /* non-spacing up tack below */
, 0x3b, (UChar)0x031e /* non-spacing down tack below */
, 0x3b, (UChar)0x031f /* non-spacing plus sign below */
, 0x3b, (UChar)0x0320 /* non-spacing minus sign below */
, 0x3b, (UChar)0x0321 /* non-spacing palatalized hook below */
, 0x3b, (UChar)0x0322 /* non-spacing retroflex hook below */
, 0x3b, (UChar)0x0324 /* non-spacing double dot below */
, 0x3b, (UChar)0x0325 /* non-spacing ring below */
, 0x3b, (UChar)0x0326 /* non-spacing comma below */
, 0x3b, (UChar)0x0329 /* non-spacing vertical line below */
, 0x3b, (UChar)0x032a /* non-spacing bridge below */
, 0x3b, (UChar)0x032b /* non-spacing inverted double arch below */
, 0x3b, (UChar)0x032c /* non-spacing hacek below */
, 0x3b, (UChar)0x032d /* non-spacing circumflex below */
, 0x3b, (UChar)0x032e /* non-spacing breve below */
, 0x3b, (UChar)0x032f /* non-spacing inverted breve below */
, 0x3b, (UChar)0x0330 /* non-spacing tilde below */
, 0x3b, (UChar)0x0331 /* non-spacing macron below */
, 0x3b, (UChar)0x0333 /* non-spacing double underscore */
, 0x3b, (UChar)0x0334 /* non-spacing tilde overlay */
, 0x3b, (UChar)0x0335 /* non-spacing short bar overlay */
, 0x3b, (UChar)0x0336 /* non-spacing long bar overlay */
, 0x3b, (UChar)0x0338 /* non-spacing long slash overlay */
, 0x3b, (UChar)0x0339 /* non-spacing right half ring below */
, 0x3b, (UChar)0x033a /* non-spacing inverted bridge below */
, 0x3b, (UChar)0x033b /* non-spacing square below */
, 0x3b, (UChar)0x033c /* non-spacing seagull below */
, 0x3b, (UChar)0x033d /* non-spacing x above */
, 0x3b, (UChar)0x033e /* non-spacing vertical tilde */
, 0x3b, (UChar)0x033f /* non-spacing double overscore */
, 0x3b, (UChar)0x0340 /* non-spacing grave tone mark */
, 0x3b, (UChar)0x0341 /* non-spacing acute tone mark */
, 0x3b, (UChar)0x0342, 0x3b, (UChar)0x0343, 0x3b, (UChar)0x0344, 0x3b, (UChar)0x0345, 0x3b, (UChar)0x0360, 0x3b, (UChar)0x0361 /* newer */
, 0x3b, (UChar)0x0483, 0x3b, (UChar)0x0484, 0x3b, (UChar)0x0485, 0x3b, (UChar)0x0486 /* Cyrillic accents */
, ';', (UChar)0x20D0, ';', (UChar)0x20D1, ';', (UChar)0x20D2 /* symbol accents */
, ';', (UChar)0x20D3, ';', (UChar)0x20D4, ';', (UChar)0x20D5 /* symbol accents */
, ';', (UChar)0x20D6, ';', (UChar)0x20D7, ';', (UChar)0x20D8 /* symbol accents */
, ';', (UChar)0x20D9, ';', (UChar)0x20DA, ';', (UChar)0x20DB /* symbol accents */
, ';', (UChar)0x20DC, ';', (UChar)0x20DD, ';', (UChar)0x20DE /* symbol accents */
, ';', (UChar)0x20DF, ';', (UChar)0x20E0, ';', (UChar)0x20E1 /* symbol accents */
, 0x3b, (UChar)0x20D0, 0x3b, (UChar)0x20D1, 0x3b, (UChar)0x20D2 /* symbol accents */
, 0x3b, (UChar)0x20D3, 0x3b, (UChar)0x20D4, 0x3b, (UChar)0x20D5 /* symbol accents */
, 0x3b, (UChar)0x20D6, 0x3b, (UChar)0x20D7, 0x3b, (UChar)0x20D8 /* symbol accents */
, 0x3b, (UChar)0x20D9, 0x3b, (UChar)0x20DA, 0x3b, (UChar)0x20DB /* symbol accents */
, 0x3b, (UChar)0x20DC, 0x3b, (UChar)0x20DD, 0x3b, (UChar)0x20DE /* symbol accents */
, 0x3b, (UChar)0x20DF, 0x3b, (UChar)0x20E0, 0x3b, (UChar)0x20E1 /* symbol accents */
, ',', '\'', (UChar)0x002D, '\'', ';', (UChar)0x00AD /* dashes */
, ';', (UChar)0x2010, ';', (UChar)0x2011, ';', (UChar)0x2012 /* dashes */
, ';', (UChar)0x2013, ';', (UChar)0x2014, ';', (UChar)0x2015 /* dashes */
, ';', (UChar)0x2212 /* dashes */
, 0x2c, 0x27, (UChar)0x002D, 0x27, 0x3b, (UChar)0x00AD /* dashes */
, 0x3b, (UChar)0x2010, 0x3b, (UChar)0x2011, 0x3b, (UChar)0x2012 /* dashes */
, 0x3b, (UChar)0x2013, 0x3b, (UChar)0x2014, 0x3b, (UChar)0x2015 /* dashes */
, 0x3b, (UChar)0x2212 /* dashes */
/* other punctuation */
, '<', '\'', (UChar)0x005f, '\'' /* underline/underscore (spacing) */
, '<', (UChar)0x00af /* overline or macron (spacing) */
/* , '<', (UChar)0x00ad /* syllable hyphen (SHY) or soft hyphen */
, '<', '\'', (UChar)0x002c, '\'' /* comma (spacing) */
, '<', '\'', (UChar)0x003b, '\'' /* semicolon */
, '<', '\'', (UChar)0x003a, '\'' /* colon */
, '<', '\'', (UChar)0x0021, '\'' /* exclamation point */
, '<', (UChar)0x00a1 /* inverted exclamation point */
, '<', '\'', (UChar)0x003f, '\'' /* question mark */
, '<', (UChar)0x00bf /* inverted question mark */
, '<', '\'', (UChar)0x002f, '\'' /* slash */
, '<', '\'', (UChar)0x002e, '\'' /* period/full stop */
, '<', (UChar)0x00b4 /* acute accent (spacing) */
, '<', '\'', (UChar)0x0060, '\'' /* grave accent (spacing) */
, '<', '\'', (UChar)0x005e, '\'' /* circumflex accent (spacing) */
, '<', (UChar)0x00a8 /* diaresis/umlaut accent (spacing) */
, '<', '\'', (UChar)0x007e, '\'' /* tilde accent (spacing) */
, '<', (UChar)0x00b7 /* middle dot (spacing) */
, '<', (UChar)0x00b8 /* cedilla accent (spacing) */
, '<', '\'', (UChar)0x0027, '\'' /* apostrophe */
, '<', '\'', '"', '\'' /* quotation marks */
, '<', (UChar)0x00ab /* left angle quotes */
, '<', (UChar)0x00bb /* right angle quotes */
, '<', '\'', (UChar)0x0028, '\'' /* left parenthesis */
, '<', '\'', (UChar)0x0029, '\'' /* right parenthesis */
, '<', '\'', (UChar)0x005b, '\'' /* left bracket */
, '<', '\'', (UChar)0x005d, '\'' /* right bracket */
, '<', '\'', (UChar)0x007b, '\'' /* left brace */
, '<', '\'', (UChar)0x007d, '\'' /* right brace */
, '<', (UChar)0x00a7 /* section symbol */
, '<', (UChar)0x00b6 /* paragraph symbol */
, '<', (UChar)0x00a9 /* copyright symbol */
, '<', (UChar)0x00ae /* registered trademark symbol */
, '<', '\'', (UChar)0x0040, '\'' /* at sign */
, '<', (UChar)0x00a4 /* international currency symbol */
, '<', (UChar)0x00a2 /* cent sign */
, '<', '\'', (UChar)0x0024, '\'' /* dollar sign */
, '<', (UChar)0x00a3 /* pound-sterling sign */
, '<', (UChar)0x00a5 /* yen sign */
, '<', '\'', (UChar)0x002a, '\'' /* asterisk */
, '<', '\'', (UChar)0x005c, '\'' /* backslash */
, '<', '\'', (UChar)0x0026, '\'' /* ampersand */
, '<', '\'', (UChar)0x0023, '\'' /* number sign */
, '<', '\'', (UChar)0x0025, '\'' /* percent sign */
, '<', '\'', (UChar)0x002b, '\'' /* plus sign */
/* , '<', (UChar)0x002d */ /* hyphen or minus sign */
, '<', (UChar)0x00b1 /* plus-or-minus sign */
, '<', (UChar)0x00f7 /* divide sign */
, '<', (UChar)0x00d7 /* multiply sign */
, '<', '\'', (UChar)0x003c, '\'' /* less-than sign */
, '<', '\'', (UChar)0x003d, '\'' /* equal sign */
, '<', '\'', (UChar)0x003e, '\'' /* greater-than sign */
, '<', (UChar)0x00ac /* end of line symbol/logical NOT symbol */
, '<', '\'', (UChar)0x007c, '\'' /* vertical line/logical OR symbol */
, '<', (UChar)0x00a6 /* broken vertical line */
, '<', (UChar)0x00b0 /* degree symbol */
, '<', (UChar)0x00b5 /* micro symbol */
, 0x3c, 0x27, (UChar)0x005f, 0x27 /* underline/underscore (spacing) */
, 0x3c, (UChar)0x00af /* overline or macron (spacing) */
/* , 0x3c, (UChar)0x00ad /* syllable hyphen (SHY) or soft hyphen */
, 0x3c, 0x27, (UChar)0x002c, 0x27 /* comma (spacing) */
, 0x3c, 0x27, (UChar)0x003b, 0x27 /* semicolon */
, 0x3c, 0x27, (UChar)0x003a, 0x27 /* colon */
, 0x3c, 0x27, (UChar)0x0021, 0x27 /* exclamation point */
, 0x3c, (UChar)0x00a1 /* inverted exclamation point */
, 0x3c, 0x27, (UChar)0x003f, 0x27 /* question mark */
, 0x3c, (UChar)0x00bf /* inverted question mark */
, 0x3c, 0x27, (UChar)0x002f, 0x27 /* slash */
, 0x3c, 0x27, (UChar)0x002e, 0x27 /* period/full stop */
, 0x3c, (UChar)0x00b4 /* acute accent (spacing) */
, 0x3c, 0x27, (UChar)0x0060, 0x27 /* grave accent (spacing) */
, 0x3c, 0x27, (UChar)0x005e, 0x27 /* circumflex accent (spacing) */
, 0x3c, (UChar)0x00a8 /* diaresis/umlaut accent (spacing) */
, 0x3c, 0x27, (UChar)0x007e, 0x27 /* tilde accent (spacing) */
, 0x3c, (UChar)0x00b7 /* middle dot (spacing) */
, 0x3c, (UChar)0x00b8 /* cedilla accent (spacing) */
, 0x3c, 0x27, (UChar)0x0027, 0x27 /* apostrophe */
, 0x3c, 0x27, 0x22, 0x27 /* quotation marks */
, 0x3c, (UChar)0x00ab /* left angle quotes */
, 0x3c, (UChar)0x00bb /* right angle quotes */
, 0x3c, 0x27, (UChar)0x0028, 0x27 /* left parenthesis */
, 0x3c, 0x27, (UChar)0x0029, 0x27 /* right parenthesis */
, 0x3c, 0x27, (UChar)0x005b, 0x27 /* left bracket */
, 0x3c, 0x27, (UChar)0x005d, 0x27 /* right bracket */
, 0x3c, 0x27, (UChar)0x007b, 0x27 /* left brace */
, 0x3c, 0x27, (UChar)0x007d, 0x27 /* right brace */
, 0x3c, (UChar)0x00a7 /* section symbol */
, 0x3c, (UChar)0x00b6 /* paragraph symbol */
, 0x3c, (UChar)0x00a9 /* copyright symbol */
, 0x3c, (UChar)0x00ae /* registered trademark symbol */
, 0x3c, 0x27, (UChar)0x0040, 0x27 /* at sign */
, 0x3c, (UChar)0x00a4 /* international currency symbol */
, 0x3c, (UChar)0x00a2 /* cent sign */
, 0x3c, 0x27, (UChar)0x0024, 0x27 /* dollar sign */
, 0x3c, (UChar)0x00a3 /* pound-sterling sign */
, 0x3c, (UChar)0x00a5 /* yen sign */
, 0x3c, 0x27, (UChar)0x002a, 0x27 /* asterisk */
, 0x3c, 0x27, (UChar)0x005c, 0x27 /* backslash */
, 0x3c, 0x27, (UChar)0x0026, 0x27 /* ampersand */
, 0x3c, 0x27, (UChar)0x0023, 0x27 /* number sign */
, 0x3c, 0x27, (UChar)0x0025, 0x27 /* percent sign */
, 0x3c, 0x27, (UChar)0x002b, 0x27 /* plus sign */
/* , 0x3c, (UChar)0x002d */ /* hyphen or minus sign */
, 0x3c, (UChar)0x00b1 /* plus-or-minus sign */
, 0x3c, (UChar)0x00f7 /* divide sign */
, 0x3c, (UChar)0x00d7 /* multiply sign */
, 0x3c, 0x27, (UChar)0x003c, 0x27 /* less-than sign */
, 0x3c, 0x27, (UChar)0x003d, 0x27 /* equal sign */
, 0x3c, 0x27, (UChar)0x003e, 0x27 /* greater-than sign */
, 0x3c, (UChar)0x00ac /* end of line symbol/logical NOT symbol */
, 0x3c, 0x27, (UChar)0x007c, 0x27 /* vertical line/logical OR symbol */
, 0x3c, (UChar)0x00a6 /* broken vertical line */
, 0x3c, (UChar)0x00b0 /* degree symbol */
, 0x3c, (UChar)0x00b5 /* micro symbol */
/* NUMERICS */
, '<', '0', '<', '1', '<', '2', '<', '3', '<', '4', '<', '5', '<', '6', '<', '7', '<', '8', '<', '9'
, '<', (UChar)0x00bc, '<', (UChar)0x00bd, '<', (UChar)0x00be /* 1/4,1/2,3/4 fractions */
, 0x3c, 0x30, 0x3c, 0x31, 0x3c, 0x32, 0x3c, 0x33, 0x3c, 0x34, 0x3c, 0x35, 0x3c, 0x36, 0x3c, 0x37, 0x3c, 0x38, 0x3c, 0x39
, 0x3c, (UChar)0x00bc, 0x3c, (UChar)0x00bd, 0x3c, (UChar)0x00be /* 1/4,1/2,3/4 fractions */
/* NON-IGNORABLES */
, '<', 'a', ',', 'A'
, '<', 'b', ',', 'B'
, '<', 'c', ',', 'C'
, '<', 'd', ',', 'D'
, '<', (UChar)0x00F0, ',', (UChar)0x00D0 /* eth */
, '<', 'e', ',', 'E'
, '<', 'f', ',', 'F'
, '<', 'g', ',', 'G'
, '<', 'h', ',', 'H'
, '<', 'i', ',', 'I'
, '<', 'j', ',', 'J'
, '<', 'k', ',', 'K'
, '<', 'l', ',', 'L'
, '<', 'm', ',', 'M'
, '<', 'n', ',', 'N'
, '<', 'o', ',', 'O'
, '<', 'p', ',', 'P'
, '<', 'q', ',', 'Q'
, '<', 'r', ',', 'R'
, '<', 's', ',', 'S', '&', 'S', 'S', ',', (UChar)0x00DF /* s-zet */
, '<', 't', ',', 'T'
, '&', 'T', 'H', ',', 0x00FE, '&', 'T', 'H', ',', (UChar)0x00DE /* thorn */
, '<', 'u', ',', 'U'
, '<', 'v', ',', 'V'
, '<', 'w', ',', 'W'
, '<', 'x', ',', 'X'
, '<', 'y', ',', 'Y'
, '<', 'z', ',', 'Z'
, '&', 'A', 'E', ',', (UChar)0x00C6 /* ae & AE ligature */
, '&', 'A', 'E', ',', (UChar)0x00E6
, '&', 'O', 'E', ',', (UChar)0x0152 /* oe & OE ligature */
, '&', 'O', 'E', ',', (UChar)0x0153
, 0x3c, 0x61, 0x2c, 0x41
, 0x3c, 0x62, 0x2c, 0x42
, 0x3c, 0x63, 0x2c, 0x43
, 0x3c, 0x64, 0x2c, 0x44
, 0x3c, (UChar)0x00F0, 0x2c, (UChar)0x00D0 /* eth */
, 0x3c, 0x65, 0x2c, 0x45
, 0x3c, 0x66, 0x2c, 0x46
, 0x3c, 0x67, 0x2c, 0x47
, 0x3c, 0x68, 0x2c, 0x48
, 0x3c, 0x69, 0x2c, 0x49
, 0x3c, 0x6a, 0x2c, 0x4a
, 0x3c, 0x6b, 0x2c, 0x4b
, 0x3c, 0x6c, 0x2c, 0x4c
, 0x3c, 0x6d, 0x2c, 0x4d
, 0x3c, 0x6e, 0x2c, 0x4e
, 0x3c, 0x6f, 0x2c, 0x4f
, 0x3c, 0x70, 0x2c, 0x50
, 0x3c, 0x71, 0x2c, 0x51
, 0x3c, 0x72, 0x2c, 0x52
, 0x3c, 0x73, 0x2c, 0x53, 0x26, 0x53, 0x53, 0x2c, (UChar)0x00DF /* s-zet */
, 0x3c, 0x74, 0x2c, 0x54
, 0x26, 0x54, 0x48, 0x2c, 0x00FE, 0x26, 0x54, 0x48, 0x2c, (UChar)0x00DE /* thorn */
, 0x3c, 0x75, 0x2c, 0x55
, 0x3c, 0x76, 0x2c, 0x56
, 0x3c, 0x77, 0x2c, 0x57
, 0x3c, 0x78, 0x2c, 0x58
, 0x3c, 0x79, 0x2c, 0x59
, 0x3c, 0x7a, 0x2c, 0x5a
, 0x26, 0x41, 0x45, 0x2c, (UChar)0x00C6 /* ae & AE ligature */
, 0x26, 0x41, 0x45, 0x2c, (UChar)0x00E6
, 0x26, 0x4f, 0x45, 0x2c, (UChar)0x0152 /* oe & OE ligature */
, 0x26, 0x4f, 0x45, 0x2c, (UChar)0x0153
, (UChar)0x0000
};
const UChar testSourceCases[][MAX_TOKEN_LEN] = {
{'a', 'b', '\'', 'c', 0},
{'c', 'o', '-', 'o', 'p', 0},
{'a', 'b', 0},
{'a', 'm', 'p', 'e', 'r', 's', 'a', 'd', 0},
{'a', 'l', 'l', 0},
{'f', 'o', 'u', 'r', 0},
{'f', 'i', 'v', 'e', 0},
{'1', 0},
{'1', 0},
{'1', 0}, /* 10 */
{'2', 0},
{'2', 0},
{'H', 'e', 'l', 'l', 'o', 0},
{'a', '<', 'b', 0},
{'a', '<', 'b', 0},
{'a', 'c', 'c', 0},
{'a', 'c', 'H', 'c', 0}, /* simple test */
{'p', 0x00EA, 'c', 'h', 'e', 0},
{'a', 'b', 'c', 0},
{'a', 'b', 'c', 0}, /* 20 */
{'a', 'b', 'c', 0},
{'a', 'b', 'c', 0},
{'a', 'b', 'c', 0},
{'a', 0x00E6, 'c', 0},
{'a', 'c', 'H', 'c', 0}, /* primary test */
{'b', 'l', 'a', 'c', 'k', 0},
{'f', 'o', 'u', 'r', 0},
{'f', 'i', 'v', 'e', 0},
{'1', 0},
{'a', 'b', 'c', 0}, /* 30 */
{'a', 'b', 'c', 0},
{'a', 'b', 'c', 'H', 0},
{'a', 'b', 'c', 0},
{'a', 'c', 'H', 'c', 0}, /* 34 */
{'a', 'c', 'e', '0'},
{'1', '0'},
{'p', 0x00EA,'0'} /* 37 */
{0x61, 0x62, 0x27, 0x63, 0},
{0x63, 0x6f, 0x2d, 0x6f, 0x70, 0},
{0x61, 0x62, 0},
{0x61, 0x6d, 0x70, 0x65, 0x72, 0x73, 0x61, 0x64, 0},
{0x61, 0x6c, 0x6c, 0},
{0x66, 0x6f, 0x75, 0x72, 0},
{0x66, 0x69, 0x76, 0x65, 0},
{0x31, 0},
{0x31, 0},
{0x31, 0}, /* 10 */
{0x32, 0},
{0x32, 0},
{0x48, 0x65, 0x6c, 0x6c, 0x6f, 0},
{0x61, 0x3c, 0x62, 0},
{0x61, 0x3c, 0x62, 0},
{0x61, 0x63, 0x63, 0},
{0x61, 0x63, 0x48, 0x63, 0}, /* simple test */
{0x70, 0x00EA, 0x63, 0x68, 0x65, 0},
{0x61, 0x62, 0x63, 0},
{0x61, 0x62, 0x63, 0}, /* 20 */
{0x61, 0x62, 0x63, 0},
{0x61, 0x62, 0x63, 0},
{0x61, 0x62, 0x63, 0},
{0x61, 0x00E6, 0x63, 0},
{0x61, 0x63, 0x48, 0x63, 0}, /* primary test */
{0x62, 0x6c, 0x61, 0x63, 0x6b, 0},
{0x66, 0x6f, 0x75, 0x72, 0},
{0x66, 0x69, 0x76, 0x65, 0},
{0x31, 0},
{0x61, 0x62, 0x63, 0}, /* 30 */
{0x61, 0x62, 0x63, 0},
{0x61, 0x62, 0x63, 0x48, 0},
{0x61, 0x62, 0x63, 0},
{0x61, 0x63, 0x48, 0x63, 0}, /* 34 */
{0x61, 0x63, 0x65, 0x30},
{0x31, 0x30},
{0x70, 0x00EA,0x30} /* 37 */
};
const UChar testTargetCases[][MAX_TOKEN_LEN] = {
{'a', 'b', 'c', '\'', 0},
{'C', 'O', 'O', 'P', 0},
{'a', 'b', 'c', 0},
{'&', 0},
{'&', 0},
{'4', 0},
{'5', 0},
{'o', 'n', 'e', 0},
{'n', 'n', 'e', 0},
{'p', 'n', 'e', 0}, /* 10 */
{'t', 'w', 'o', 0},
{'u', 'w', 'o', 0},
{'h', 'e', 'l', 'l', 'O', 0},
{'a', '<', '=', 'b', 0},
{'a', 'b', 'c', 0},
{'a', 'C', 'H', 'c', 0},
{'a', 'C', 'H', 'c', 0}, /* simple test */
{'p', (UChar)0x00E9, 'c', 'h', 0x00E9, 0},
{'a', 'b', 'c', 0},
{'a', 'B', 'C', 0}, /* 20 */
{'a', 'b', 'c', 'h', 0},
{'a', 'b', 'd', 0},
{(UChar)0x00E4, 'b', 'c', 0},
{'a', (UChar)0x00C6, 'c', 0},
{'a', 'C', 'H', 'c', 0}, /* primary test */
{'b', 'l', 'a', 'c', 'k', '-', 'b', 'i', 'r', 'd', 0},
{'4', 0},
{'5', 0},
{'o', 'n', 'e', 0},
{'a', 'b', 'c', 0},
{'a', 'B', 'c', 0}, /* 30 */
{'a', 'b', 'c', 'h', 0},
{'a', 'b', 'd', 0},
{'a', 'C', 'H', 'c', 0}, /* 34 */
{'a', 'c', 'e', '0'},
{'1', '0'},
{'p', (UChar)0x00EB,'0'} /* 37 */
{0x61, 0x62, 0x63, 0x27, 0},
{0x43, 0x4f, 0x4f, 0x50, 0},
{0x61, 0x62, 0x63, 0},
{0x26, 0},
{0x26, 0},
{0x34, 0},
{0x35, 0},
{0x6f, 0x6e, 0x65, 0},
{0x6e, 0x6e, 0x65, 0},
{0x70, 0x6e, 0x65, 0}, /* 10 */
{0x74, 0x77, 0x6f, 0},
{0x75, 0x77, 0x6f, 0},
{0x68, 0x65, 0x6c, 0x6c, 0x4f, 0},
{0x61, 0x3c, 0x3d, 0x62, 0},
{0x61, 0x62, 0x63, 0},
{0x61, 0x43, 0x48, 0x63, 0},
{0x61, 0x43, 0x48, 0x63, 0}, /* simple test */
{0x70, (UChar)0x00E9, 0x63, 0x68, 0x00E9, 0},
{0x61, 0x62, 0x63, 0},
{0x61, 0x42, 0x43, 0}, /* 20 */
{0x61, 0x62, 0x63, 0x68, 0},
{0x61, 0x62, 0x64, 0},
{(UChar)0x00E4, 0x62, 0x63, 0},
{0x61, (UChar)0x00C6, 0x63, 0},
{0x61, 0x43, 0x48, 0x63, 0}, /* primary test */
{0x62, 0x6c, 0x61, 0x63, 0x6b, 0x2d, 0x62, 0x69, 0x72, 0x64, 0},
{0x34, 0},
{0x35, 0},
{0x6f, 0x6e, 0x65, 0},
{0x61, 0x62, 0x63, 0},
{0x61, 0x42, 0x63, 0}, /* 30 */
{0x61, 0x62, 0x63, 0x68, 0},
{0x61, 0x62, 0x64, 0},
{0x61, 0x43, 0x48, 0x63, 0}, /* 34 */
{0x61, 0x63, 0x65, 0x30},
{0x31, 0x30},
{0x70, (UChar)0x00EB,0x30} /* 37 */
};
const UCollationResult results[] = {
@ -361,17 +361,17 @@ const UCollationResult results[] = {
const UChar testCases[][MAX_TOKEN_LEN] =
{
{'a', 0},
{'A', 0},
{'a', 'e', 0},
{'a', 'E', 0},
{'A', 'e', 0},
{'A', 'E', 0},
{0x61, 0},
{0x41, 0},
{0x61, 0x65, 0},
{0x61, 0x45, 0},
{0x41, 0x65, 0},
{0x41, 0x45, 0},
{(UChar)0x00e6, 0},
{(UChar)0x00c6, 0},
{'b', 0},
{'c', 0},
{'z', 0}
{0x62, 0},
{0x63, 0},
{0x7a, 0}
};

View File

@ -30,6 +30,18 @@
#define MAX_FILE_LEN 1024*20
#define UCS_FILE_NAME_SIZE 100
/* default codepage name for ucnv_getDefaultName() testing */
#ifdef WIN32
/* this assumes a Western European Windows */
# define DEFAULT_CODEPAGE "IBM-1252"
#elif defined(OS390)
# define DEFAULT_CODEPAGE "ibm-37-s390"
#elif defined(OS400)
# define DEFAULT_CODEPAGE "ibm-37"
#else
# define DEFAULT_CODEPAGE "LATIN_1"
#endif
/*writes and entire UChar* (string) along with a BOM to a file*/
void WriteToFile(const UChar *a, FILE *myfile);
/*Case insensitive compare*/
@ -45,7 +57,6 @@ void addTestConvert(TestNode** root)
}
void TestConvert()
{
char myptr[4];
@ -315,34 +326,19 @@ void TestConvert()
/* Testing ucnv_getName()*/
/*default code page */
#ifdef WIN32
if ((strcmp(ucnv_getName(someConverters[0], &err), "IBM-1252")==0)&&
(strcmp(ucnv_getName(someConverters[1], &err), "IBM-1252")==0))
if ((stricmp(ucnv_getName(someConverters[0], &err), DEFAULT_CODEPAGE)==0)&&
(stricmp(ucnv_getName(someConverters[1], &err), DEFAULT_CODEPAGE)==0))
log_verbose("getName ok\n");
else
log_err("getName failed\n");
#else
if ((strcmp(ucnv_getName(someConverters[0], &err), "LATIN_1")==0)&&
(strcmp(ucnv_getName(someConverters[1], &err), "LATIN_1")==0))
log_verbose("getName ok\n");
else
log_err("getName failed\n");
#endif
/*Testing ucnv_getDefaultName() and ucnv_setDefaultNAme()*/
#ifdef WIN32
if(strcmp(ucnv_getDefaultName(), "ibm-1252")==0)
if(stricmp(ucnv_getDefaultName(), DEFAULT_CODEPAGE)==0)
log_verbose("getDefaultName o.k.\n");
else
log_err("getDefaultName failed \n");
#else
if(strcmp(ucnv_getDefaultName(), "LATIN_1")==0)
log_verbose("getDefaultName o.k.\n");
else
log_err("getDefaultName failed\n");
#endif
/*chnage the default name by setting it */
/*change the default name by setting it */
ucnv_setDefaultName("changed");
if(strcmp(ucnv_getDefaultName(), "changed")==0)
log_verbose("setDefaultName o.k");
@ -350,7 +346,7 @@ void TestConvert()
log_err("setDefaultName failed");
/*set it back to the original name */
ucnv_setDefaultName("LATIN_1");
ucnv_setDefaultName(DEFAULT_CODEPAGE);

View File

@ -119,6 +119,51 @@ UChar* appendCompareResult(UCollationResult result, UChar* target)
return target;
}
#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
/*
* These maps for ASCII to/from EBCDIC are copied from putil.c.
* For more information, see there.
*/
static uint8_t asciiFromEbcdic[256]={
0x00, 0x01, 0x02, 0x03, 0x00, 0x09, 0x00, 0x7F, 0x00, 0x00, 0x00, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
0x10, 0x11, 0x12, 0x13, 0x00, 0x00, 0x08, 0x00, 0x18, 0x19, 0x00, 0x00, 0x1C, 0x1D, 0x1E, 0x1F,
0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x17, 0x1B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x06, 0x07,
0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x14, 0x15, 0x00, 0x1A,
0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2E, 0x3C, 0x28, 0x2B, 0x7C,
0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0x5E,
0x2D, 0x2F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
0x00, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x00, 0x00, 0x00, 0x5B, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5D, 0x00, 0x00,
0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x5C, 0x00, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
};
static uint8_t ebcdicFromAscii[256]={
0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F, 0x16, 0x05, 0x25, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
0x10, 0x11, 0x12, 0x13, 0x3C, 0x3D, 0x32, 0x26, 0x18, 0x19, 0x3F, 0x27, 0x1C, 0x1D, 0x1E, 0x1F,
0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, 0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61,
0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F,
0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6,
0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xAD, 0xE0, 0xBD, 0x5F, 0x6D,
0x79, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xC0, 0x4F, 0xD0, 0xA1, 0x07,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
#endif
UChar* CharsToUChars(const char* chars)
{
int unicode;
@ -150,7 +195,13 @@ UChar* CharsToUChars(const char* chars)
i += 6;
++alias;
} else {
*alias = (UChar)chars[i];
#if U_CHARSET_FAMILY==U_ASCII_FAMILY
*alias = (UChar)(uint8_t)chars[i];
#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
*alias = asciiFromEbcdic[(uint8_t)chars[i]];
#else
# error U_CHARSET_FAMILY is not valid
#endif
++alias;
++i;
}

View File

@ -51,17 +51,14 @@ int main ( int argc, const char **argv )
void
ctest_pathnameInContext( char* fullname, int32_t maxsize, const char* relPath )
{
char mainDirBuffer[200];
char* mainDir;
char sepChar;
const char inpSepChar = '|';
char* tmp;
char sepString[2] ;
int32_t lenMainDir;
int32_t lenRelPath ;
#if defined(_WIN32) || defined(WIN32) || defined(__OS2__) || defined(OS2)
char mainDirBuffer[200];
mainDir = getenv("ICU_DATA");
if(mainDir!=NULL) {
strcpy(mainDirBuffer, mainDir);
@ -70,14 +67,6 @@ ctest_pathnameInContext( char* fullname, int32_t maxsize, const char* relPath )
mainDirBuffer[0]='\0';
}
mainDir=mainDirBuffer;
sepChar = '\\';
#elif defined(_AIX) || defined(SOLARIS) || defined(LINUX) || defined(HPUX) || defined(POSIX)
char mainDirBuffer[200];
strcpy(mainDirBuffer, u_getDataDirectory());
strcat(mainDirBuffer, "/../");
mainDir = mainDirBuffer;
sepChar = '/';
#elif defined(XP_MAC)
Str255 volName;
int16_t volNum;
@ -85,29 +74,32 @@ ctest_pathnameInContext( char* fullname, int32_t maxsize, const char* relPath )
if (err != noErr) volName[0] = 0;
mainDir = (char*) &(volName[1]);
mainDir[volName[0]] = 0;
sepChar = ':';
#else
mainDir = "";
sepChar = '/';
strcpy(mainDirBuffer, u_getDataDirectory());
strcat(mainDirBuffer, ".." U_FILE_SEP_STRING);
mainDir = mainDirBuffer;
#endif
sepString[0] = sepChar;
sepString[1] = 0;
if (relPath[0] == '|') relPath++;
lenMainDir = strlen( mainDir );
lenRelPath = strlen( relPath );
if(lenMainDir > 0 && mainDir[lenMainDir - 1] != U_FILE_SEP_CHAR) {
mainDir[lenMainDir++] = U_FILE_SEP_CHAR;
mainDir[lenMainDir] = 0;
}
if (relPath[0] == '|') relPath++;
lenRelPath = strlen( relPath );
if (maxsize < lenMainDir + lenRelPath + 2) { fullname[0] = 0; return; }
strcpy( fullname, mainDir );
strcat( fullname, sepString );
strcat( fullname, U_FILE_SEP_STRING );
strcat( fullname, relPath );
strchr( fullname, inpSepChar );
tmp = strchr(fullname, inpSepChar);
while (tmp) {
*tmp = sepChar;
*tmp = U_FILE_SEP_CHAR;
tmp = strchr( tmp+1, inpSepChar );
}
}
const char*
ctest_getTestDirectory()
{

View File

@ -20,6 +20,7 @@
#include <string.h>
#include "unicode/ustring.h"
#include "cintltst.h"
#include "ccolltst.h"
void PrintDataTable();
@ -628,13 +629,13 @@ static char* rawData2[23][5] = {
{ "English (United States)", "French (France)", "Croatian (Croatia)", "Greek (Greece)", "Norwegian (Norway, Nynorsk)" },
/* display langage (French) */
{ "anglais", "français", "", "grec", "norvégien" },
{ "anglais", "fran\\u00E7ais", "", "grec", "norv\\u00E9gien" },
/* display country (French) */
{ "États-Unis", "France", "", "Grèce", "Norvège" },
{ "\\u00C9tats-Unis", "France", "", "Gr\\u00E8ce", "Norv\\u00E8ge" },
/* display variant (French) */
{ "", "", "", "", "Nynorsk" },
/* display name (French) */
{ "anglais (États-Unis)", "français (France)", "", "grec (Grèce)", "norvégien (Norvège, Nynorsk)" },
{ "anglais (\\u00C9tats-Unis)", "fran\\u00E7ais (France)", "", "grec (Gr\\u00E8ce)", "norv\\u00E9gien (Norv\\u00E8ge, Nynorsk)" },
/* display langage (Croatian) */
{ "", "", "hrvatski", "", "" },
@ -658,7 +659,7 @@ static char* rawData2[23][5] = {
static UChar greekDisplayLanguage[] = { 0x03b5, 0x03bb, 0x03bb, 0x03b7, 0x03bd, 0x03b9, 0x03ba, 0x03ac, 0 };
static UChar greekDisplayCountry[] = { 0x0395, 0x03bb, 0x03bb, 0x03ac, 0x03b4, 0x03b1, 0 };
static UChar greekDisplayName[] = { 0x03b5, 0x03bb, 0x03bb, 0x03b7, 0x03bd, 0x03b9, 0x03ba,
0x03ac, ' ', '(', 0x0395, 0x03bb, 0x03bb, 0x03ac, 0x03b4, 0x03b1, ')', 0 };
0x03ac, 0x20, 0x28, 0x0395, 0x03bb, 0x03bb, 0x03ac, 0x03b4, 0x03b1, 0x29, 0 };
void setUpDataTable()
@ -669,8 +670,7 @@ void setUpDataTable()
for (i = 0; i < 23; i++) {
dataTable[i] = calloc(sizeof(UChar*),5);
for (j = 0; j < 5; j++){
dataTable[i][j] = (UChar*) malloc(sizeof(UChar)*(strlen(rawData2[i][j])+1));
u_uastrcpy(dataTable[i][j],rawData2[i][j]);
dataTable[i][j] = CharsToUChars(rawData2[i][j]);
}
}
dataTable[DLANG_EL][GREEKS]=(UChar*)realloc(dataTable[DLANG_EL][GREEKS],sizeof(UChar)*(u_strlen(greekDisplayLanguage)+1));

View File

@ -176,7 +176,11 @@ void TestExponential()
UChar uvalfor[20], ulvalfor[20];
double a;
UErrorCode status = U_ZERO_ERROR;
#ifdef OS390
double val[] = { 0.01234, 123456789, 1.23e75, -3.141592653e-78 };
#else
double val[] = { 0.01234, 123456789, 1.23e300, -3.141592653e-271 };
#endif
char* pat[] = { "0.####E0", "00.000E00", "##0.######E000", "0.###E0;[0.###E0]" };
int32_t lval[] = { 0, -1, 1, 123456789 };
@ -196,17 +200,24 @@ void TestExponential()
};
double valParse[] =
{
#ifdef OS390
0.01234, 123460000, 1.23E75, -3.1416E-78,
0.01234, 123460000, 1.23E75, -3.1416E-78,
0.01234, 123456800, 1.23E75, -3.141593E-78,
0.01234, 123500000, 1.23E75, -3.142E-78
#else
0.01234, 123460000, 1.23E300, -3.1416E-271,
0.01234, 123460000, 1.23E300, -3.1416E-271,
0.01234, 123456800, 1.23E300, -3.141593E-271,
0.01234, 123500000, 1.23E300, -3.142E-271,
0.01234, 123500000, 1.23E300, -3.142E-271
#endif
};
int32_t lvalParse[] =
{
0, -1, 1, 123460000,
0, -1, 1, 123460000,
0, -1, 1, 123456800,
0, -1, 1, 123500000,
0, -1, 1, 123500000
};
@ -311,8 +322,9 @@ void TestCurrencySign()
UChar *res;
UFieldPosition pos;
UErrorCode status = U_ZERO_ERROR;
pattern=(UChar*)malloc(sizeof(UChar) * (strlen("\xA4#,##0.00;-\xA4#,##0.00") + 1) );
u_uastrcpy(pattern, "\xA4#,##0.00;-\xA4#,##0.00");
pattern=(UChar*)malloc(sizeof(UChar) * (strlen("*#,##0.00;-*#,##0.00") + 1) );
u_uastrcpy(pattern, "*#,##0.00;-*#,##0.00");
pattern[0]=pattern[11]=0xa4; /* insert latin-1 currency symbol */
fmt = unum_openPattern(pattern, u_strlen(pattern), "en_US", &status);
if(U_FAILURE(status)){
log_err("Error in number format construction with pattern \"\\xA4#,##0.00;-\\xA4#,##0.00\\\" \n");

View File

@ -15,6 +15,8 @@
/* C FUNCTIONALITY AND REGRESSION TEST FOR BREAKITERATOR */
#include <stdio.h>
#include <string.h>
#include "unicode/uloc.h"
#include "unicode/ubrk.h"
#include "unicode/uchar.h"
@ -23,9 +25,7 @@
#include "unicode/ustring.h"
#include "cintltst.h"
#include "cregrtst.h"
#include<stdio.h>
#include<string.h>
#include "ccolltst.h"
/* -------------------------------------------------------------------------------------- */
/**
@ -87,45 +87,6 @@ UChar* elementAt(Vector *q, int32_t pos)
}
/* Just to make it easier to use with UChar array.*/
UChar* CharsToUCharArray(const char* chars)
{
int unicode;
int i;
UChar *buffer;
UChar *alias;
int len = strlen(chars);
int count = 0;
/* preflight */
for (i = 0; i < len;) {
if ((chars[i] == '\\') && (i+1 < len) && (chars[i+1] == 'u')) {
++count;
i += 6;
} else {
++count;
i++;
}
}
buffer = (UChar*) malloc(sizeof(UChar) * (count + 1));
alias = buffer;
for (i = 0; i < len;) {
if ((chars[i] == '\\') && (i+1 < len) && (chars[i+1] == 'u')) {
sscanf(&(chars[i+2]), "%4X", &unicode);
*alias = (UChar)unicode;
i += 6;
++alias;
} else {
*alias = (UChar)chars[i];
++alias;
++i;
}
}
*alias = 0x0000;
return buffer;
}
UChar* UCharToUCharArray(const UChar uchar)
{
UChar *buffer;
@ -277,7 +238,7 @@ void addTestWordData()
addElement(wordSelectionData, " ");
/* to test for bug #4097779 */
addElement2(wordSelectionData, CharsToUCharArray("aa\\u0300a"));
addElement2(wordSelectionData, CharsToUChars("aa\\u0300a"));
addElement(wordSelectionData, " ");
/* to test for bug #4098467
@ -286,28 +247,28 @@ void addTestWordData()
it correctly), first as precomposed syllables, and then as conjoining jamo.
Both sequences should be semantically identical and break the same way.
precomposed syllables... */
addElement2(wordSelectionData, CharsToUCharArray("\\uc0c1\\ud56d"));
addElement2(wordSelectionData, CharsToUChars("\\uc0c1\\ud56d"));
addElement(wordSelectionData, " ");
addElement2(wordSelectionData, CharsToUCharArray("\\ud55c\\uc778"));
addElement2(wordSelectionData, CharsToUChars("\\ud55c\\uc778"));
addElement(wordSelectionData, " ");
addElement2(wordSelectionData, CharsToUCharArray("\\uc5f0\\ud569"));
addElement2(wordSelectionData, CharsToUChars("\\uc5f0\\ud569"));
addElement(wordSelectionData, " ");
addElement2(wordSelectionData, CharsToUCharArray("\\uc7a5\\ub85c\\uad50\\ud68c"));
addElement2(wordSelectionData, CharsToUChars("\\uc7a5\\ub85c\\uad50\\ud68c"));
addElement(wordSelectionData, " ");
/* conjoining jamo... */
addElement2(wordSelectionData, CharsToUCharArray("\\u1109\\u1161\\u11bc\\u1112\\u1161\\u11bc"));
addElement2(wordSelectionData, CharsToUChars("\\u1109\\u1161\\u11bc\\u1112\\u1161\\u11bc"));
addElement(wordSelectionData, " ");
addElement2(wordSelectionData, CharsToUCharArray("\\u1112\\u1161\\u11ab\\u110b\\u1175\\u11ab"));
addElement2(wordSelectionData, CharsToUChars("\\u1112\\u1161\\u11ab\\u110b\\u1175\\u11ab"));
addElement(wordSelectionData, " ");
addElement2(wordSelectionData, CharsToUCharArray("\\u110b\\u1167\\u11ab\\u1112\\u1161\\u11b8"));
addElement2(wordSelectionData, CharsToUChars("\\u110b\\u1167\\u11ab\\u1112\\u1161\\u11b8"));
addElement(wordSelectionData, " ");
addElement2(wordSelectionData, CharsToUCharArray("\\u110c\\u1161\\u11bc\\u1105\\u1169\\u1100\\u116d\\u1112\\u116c"));
addElement2(wordSelectionData, CharsToUChars("\\u110c\\u1161\\u11bc\\u1105\\u1169\\u1100\\u116d\\u1112\\u116c"));
addElement(wordSelectionData, " ");
/* this is a test for bug #4117554: the ideographic iteration mark (U+3005) should
count as a Kanji character for the purposes of word breaking */
addElement(wordSelectionData, "abc");
addElement2(wordSelectionData, CharsToUCharArray("\\u4e01\\u4e02\\u3005\\u4e03\\u4e03"));
addElement2(wordSelectionData, CharsToUChars("\\u4e01\\u4e02\\u3005\\u4e03\\u4e03"));
addElement(wordSelectionData, "abc");
elems= Count(wordSelectionData);
@ -373,42 +334,42 @@ void addTestSentenceData()
/* test for bug #4117554: Treat fullwidth variants of .!? the same as their
normal counterparts */
addElement2(sentenceSelectionData, CharsToUCharArray("I know I'm right\\uff0e "));
addElement2(sentenceSelectionData, CharsToUCharArray("Right\\uff1f "));
addElement2(sentenceSelectionData, CharsToUCharArray("Right\\uff01 "));
addElement2(sentenceSelectionData, CharsToUChars("I know I'm right\\uff0e "));
addElement2(sentenceSelectionData, CharsToUChars("Right\\uff1f "));
addElement2(sentenceSelectionData, CharsToUChars("Right\\uff01 "));
/* test for bug #4117554: Break sentence between a sentence terminator and
opening punctuation */
addElement(sentenceSelectionData, "no?");
u_uastrcpy(temp, "(yes)");
u_strcat(temp, CharsToUCharArray("\\u2029"));
u_strcat(temp, CharsToUChars("\\u2029"));
addElement2(sentenceSelectionData, temp);
/* test for bug #4158381: Don't break sentence after period if it isn't
followed by a space */
addElement(sentenceSelectionData, "Test <code>Flags.Flag</code> class. ");
u_uastrcpy(temp, "Another test.");
u_strcat(temp, CharsToUCharArray("\\u2029"));
u_strcat(temp, CharsToUChars("\\u2029"));
addElement2(sentenceSelectionData, temp);
/* test for bug #4158381: No breaks when there are no terminators around */
addElement(sentenceSelectionData, "<P>Provides a set of &quot;lightweight&quot; (all-java<FONT SIZE=\"-2\"><SUP>TM</SUP></FONT> language) components that, to the maximum degree possible, work the same on all platforms. ");
u_uastrcpy(temp, "Another test.");
u_strcat(temp, CharsToUCharArray("\\u2029"));
u_strcat(temp, CharsToUChars("\\u2029"));
addElement2(sentenceSelectionData, temp);
/* test for bug #4143071: Make sure sentences that end with digits work right */
addElement(sentenceSelectionData, "Today is the 27th of May, 1998. ");
addElement(sentenceSelectionData, "Tomorrow with be 28 May 1998. ");
u_uastrcpy(temp, "The day after will be the 30th.");
u_strcat(temp, CharsToUCharArray("\\u2029"));
u_strcat(temp, CharsToUChars("\\u2029"));
addElement2(sentenceSelectionData, temp);
/* test for bug #4152416: Make sure sentences ending with a capital
letter are treated correctly */
addElement(sentenceSelectionData, "The type of all primitive <code>boolean</code> values accessed in the target VM. ");
u_uastrcpy(temp, "Calls to xxx will return an implementor of this interface.");
u_strcat(temp, CharsToUCharArray("\\u2029"));
u_strcat(temp, CharsToUChars("\\u2029"));
addElement2(sentenceSelectionData, temp);
@ -417,7 +378,7 @@ void addTestSentenceData()
addElement(sentenceSelectionData, "Constructs a randomly generated BigInteger, uniformly distributed over the range <tt>0</tt> to <tt>(2<sup>numBits</sup> - 1)</tt>, inclusive. ");
addElement(sentenceSelectionData, "The uniformity of the distribution assumes that a fair source of random bits is provided in <tt>rnd</tt>. ");
u_uastrcpy(temp, "Note that this constructor always constructs a non-negative BigInteger.");
u_strcat(temp, CharsToUCharArray("\\u2029"));
u_strcat(temp, CharsToUChars("\\u2029"));
addElement2(sentenceSelectionData, temp);
elems = Count(sentenceSelectionData);
@ -458,7 +419,7 @@ void addTestLineData()
addElement(lineSelectionData, "are\r");
addElement2(lineSelectionData, CharsToUCharArray("you\\u2028")); /* lineSeperator */
addElement2(lineSelectionData, CharsToUChars("you\\u2028")); /* lineSeperator */
addElement(lineSelectionData, "fine.\t");
addElement(lineSelectionData, "good. ");
@ -473,22 +434,22 @@ void addTestLineData()
addElement(lineSelectionData, "all ");
/* to test for bug #4068133 */
addElement2(lineSelectionData, CharsToUCharArray("\\u96f6"));
addElement2(lineSelectionData, CharsToUCharArray("\\u4e00\\u3002"));
addElement2(lineSelectionData, CharsToUCharArray("\\u4e8c\\u3001"));
addElement2(lineSelectionData, CharsToUCharArray("\\u4e09\\u3002\\u3001"));
addElement2(lineSelectionData, CharsToUCharArray("\\u56db\\u3001\\u3002\\u3001"));
addElement2(lineSelectionData, CharsToUChars("\\u96f6"));
addElement2(lineSelectionData, CharsToUChars("\\u4e00\\u3002"));
addElement2(lineSelectionData, CharsToUChars("\\u4e8c\\u3001"));
addElement2(lineSelectionData, CharsToUChars("\\u4e09\\u3002\\u3001"));
addElement2(lineSelectionData, CharsToUChars("\\u56db\\u3001\\u3002\\u3001"));
addElement2(lineSelectionData, CharsToUCharArray("\\u4e94,"));
addElement2(lineSelectionData, CharsToUChars("\\u4e94,"));
addElement2(lineSelectionData, CharsToUCharArray("\\u516d."));
addElement2(lineSelectionData, CharsToUChars("\\u516d."));
addElement2(lineSelectionData, CharsToUCharArray("\\u4e03.\\u3001,\\u3002"));
addElement2(lineSelectionData, CharsToUCharArray("\\u516b"));
addElement2(lineSelectionData, CharsToUChars("\\u4e03.\\u3001,\\u3002"));
addElement2(lineSelectionData, CharsToUChars("\\u516b"));
/* to test for bug #4086052 */
addElement2(lineSelectionData, CharsToUCharArray("foo\\u00a0bar "));
addElement2(lineSelectionData, CharsToUChars("foo\\u00a0bar "));
/* to test for bug #4097920 */
addElement(lineSelectionData, "dog,");
@ -511,20 +472,20 @@ void addTestLineData()
it correctly), first as precomposed syllables, and then as conjoining jamo.
Both sequences should be semantically identical and break the same way.
precomposed syllables... */
addElement2(lineSelectionData, CharsToUCharArray("\\uc0c1\\ud56d "));
addElement2(lineSelectionData, CharsToUCharArray("\\ud55c\\uc778 "));
addElement2(lineSelectionData, CharsToUCharArray("\\uc5f0\\ud569 "));
addElement2(lineSelectionData, CharsToUCharArray("\\uc7a5\\ub85c\\uad50\\ud68c "));
addElement2(lineSelectionData, CharsToUChars("\\uc0c1\\ud56d "));
addElement2(lineSelectionData, CharsToUChars("\\ud55c\\uc778 "));
addElement2(lineSelectionData, CharsToUChars("\\uc5f0\\ud569 "));
addElement2(lineSelectionData, CharsToUChars("\\uc7a5\\ub85c\\uad50\\ud68c "));
/* conjoining jamo... */
addElement2(lineSelectionData, CharsToUCharArray("\\u1109\\u1161\\u11bc\\u1112\\u1161\\u11bc "));
addElement2(lineSelectionData, CharsToUCharArray("\\u1112\\u1161\\u11ab\\u110b\\u1175\\u11ab "));
addElement2(lineSelectionData, CharsToUCharArray("\\u110b\\u1167\\u11ab\\u1112\\u1161\\u11b8 "));
addElement2(lineSelectionData, CharsToUCharArray("\\u110c\\u1161\\u11bc\\u1105\\u1169\\u1100\\u116d\\u1112\\u116c"));
addElement2(lineSelectionData, CharsToUChars("\\u1109\\u1161\\u11bc\\u1112\\u1161\\u11bc "));
addElement2(lineSelectionData, CharsToUChars("\\u1112\\u1161\\u11ab\\u110b\\u1175\\u11ab "));
addElement2(lineSelectionData, CharsToUChars("\\u110b\\u1167\\u11ab\\u1112\\u1161\\u11b8 "));
addElement2(lineSelectionData, CharsToUChars("\\u110c\\u1161\\u11bc\\u1105\\u1169\\u1100\\u116d\\u1112\\u116c"));
/* to test for bug #4117554: Fullwidth .!? should be treated as postJwrd */
addElement2(lineSelectionData, CharsToUCharArray("\\u4e01\\uff0e"));
addElement2(lineSelectionData, CharsToUCharArray("\\u4e02\\uff01"));
addElement2(lineSelectionData, CharsToUCharArray("\\u4e03\\uff1f"));
addElement2(lineSelectionData, CharsToUChars("\\u4e01\\uff0e"));
addElement2(lineSelectionData, CharsToUChars("\\u4e02\\uff01"));
addElement2(lineSelectionData, CharsToUChars("\\u4e03\\uff1f"));
elems = Count(lineSelectionData);
log_verbose("In line: the no: of lines are %d\n", elems);
@ -607,34 +568,34 @@ void addTestCharacterData()
it correctly), first as precomposed syllables, and then as conjoining jamo.
Both sequences should be semantically identical and break the same way.
precomposed syllables... */
addElement2(characterSelectionData, CharsToUCharArray("\\uc0c1"));
addElement2(characterSelectionData, CharsToUCharArray("\\ud56d"));
addElement2(characterSelectionData, CharsToUChars("\\uc0c1"));
addElement2(characterSelectionData, CharsToUChars("\\ud56d"));
addElement(characterSelectionData, " ");
addElement2(characterSelectionData, CharsToUCharArray("\\ud55c"));
addElement2(characterSelectionData, CharsToUCharArray("\\uc778"));
addElement2(characterSelectionData, CharsToUChars("\\ud55c"));
addElement2(characterSelectionData, CharsToUChars("\\uc778"));
addElement(characterSelectionData, " ");
addElement2(characterSelectionData, CharsToUCharArray("\\uc5f0"));
addElement2(characterSelectionData, CharsToUCharArray("\\ud569"));
addElement2(characterSelectionData, CharsToUChars("\\uc5f0"));
addElement2(characterSelectionData, CharsToUChars("\\ud569"));
addElement(characterSelectionData, " ");
addElement2(characterSelectionData, CharsToUCharArray("\\uc7a5"));
addElement2(characterSelectionData, CharsToUCharArray("\\ub85c"));
addElement2(characterSelectionData, CharsToUCharArray("\\uad50"));
addElement2(characterSelectionData, CharsToUCharArray("\\ud68c"));
addElement2(characterSelectionData, CharsToUChars("\\uc7a5"));
addElement2(characterSelectionData, CharsToUChars("\\ub85c"));
addElement2(characterSelectionData, CharsToUChars("\\uad50"));
addElement2(characterSelectionData, CharsToUChars("\\ud68c"));
addElement(characterSelectionData, " ");
/* conjoining jamo... */
addElement2(characterSelectionData, CharsToUCharArray("\\u1109\\u1161\\u11bc"));
addElement2(characterSelectionData, CharsToUCharArray("\\u1112\\u1161\\u11bc"));
addElement2(characterSelectionData, CharsToUChars("\\u1109\\u1161\\u11bc"));
addElement2(characterSelectionData, CharsToUChars("\\u1112\\u1161\\u11bc"));
addElement(characterSelectionData, " ");
addElement2(characterSelectionData, CharsToUCharArray("\\u1112\\u1161\\u11ab"));
addElement2(characterSelectionData, CharsToUCharArray("\\u110b\\u1175\\u11ab"));
addElement2(characterSelectionData, CharsToUChars("\\u1112\\u1161\\u11ab"));
addElement2(characterSelectionData, CharsToUChars("\\u110b\\u1175\\u11ab"));
addElement(characterSelectionData, " ");
addElement2(characterSelectionData, CharsToUCharArray("\\u110b\\u1167\\u11ab"));
addElement2(characterSelectionData, CharsToUCharArray("\\u1112\\u1161\\u11b8"));
addElement2(characterSelectionData, CharsToUChars("\\u110b\\u1167\\u11ab"));
addElement2(characterSelectionData, CharsToUChars("\\u1112\\u1161\\u11b8"));
addElement(characterSelectionData, " ");
addElement2(characterSelectionData, CharsToUCharArray("\\u110c\\u1161\\u11bc"));
addElement2(characterSelectionData, CharsToUCharArray("\\u1105\\u1169"));
addElement2(characterSelectionData, CharsToUCharArray("\\u1100\\u116d"));
addElement2(characterSelectionData, CharsToUCharArray("\\u1112\\u116c"));
addElement2(characterSelectionData, CharsToUChars("\\u110c\\u1161\\u11bc"));
addElement2(characterSelectionData, CharsToUChars("\\u1105\\u1169"));
addElement2(characterSelectionData, CharsToUChars("\\u1100\\u116d"));
addElement2(characterSelectionData, CharsToUChars("\\u1112\\u116c"));
elems = Count(characterSelectionData);
log_verbose("In character: the no: of characters are %d", elems);
@ -769,7 +730,7 @@ AllocateTextBoundary();
x=u_strlen(cannedTestChars);
s=(UChar*)malloc(sizeof(UChar) * (x + 15));
u_strcpy(s, cannedTestChars);
u_strcat(s, CharsToUCharArray(".,\\u3001\\u3002\\u3041\\u3042\\u3043\\ufeff"));
u_strcat(s, CharsToUChars(".,\\u3001\\u3002\\u3041\\u3042\\u3043\\ufeff"));
log_verbose("Testing sentence Other invariants.....\n");
doOtherInvariantTest(UBRK_SENTENCE, s);
free(s);
@ -885,11 +846,11 @@ AllocateTextBoundary();
x=u_strlen(cannedTestChars);
s=(UChar*)malloc(sizeof(UChar) * (x + 15));
u_strcpy(s, cannedTestChars);
u_strcat(s, CharsToUCharArray("\',.\\u3041\\u3042\\u3043\\u309b\\u309c\\u30a1\\u30a2\\u30a3\\u4e00\\u4e01\\u4e02"));
u_strcat(s, CharsToUChars("\',.\\u3041\\u3042\\u3043\\u309b\\u309c\\u30a1\\u30a2\\u30a3\\u4e00\\u4e01\\u4e02"));
log_verbose("Testing word break invariant.....\n");
doBreakInvariantTest(UBRK_WORD, s);
u_strcpy(s, cannedTestChars);
u_strcat(s, CharsToUCharArray("\',.\\u3041\\u3042\\u3043\\u309b\\u309c\\u30a1\\u30a2\\u30a3\\u4e00\\u4e01\\u4e02"));
u_strcat(s, CharsToUChars("\',.\\u3041\\u3042\\u3043\\u309b\\u309c\\u30a1\\u30a2\\u30a3\\u4e00\\u4e01\\u4e02"));
doOtherInvariantTest(UBRK_WORD, s);
free(s);
FreeTextBoundary();
@ -1007,7 +968,7 @@ void TestLineInvariants()
AllocateTextBoundary();
s=(UChar*)malloc(sizeof(UChar) * (u_strlen(cannedTestChars) + 20));
u_strcpy(s, cannedTestChars);
u_strcat(s, CharsToUCharArray(".,;:\\u3001\\u3002\\u3041\\u3042\\u3043\\u3044\\u3045\\u30a3\\u4e00\\u4e01\\u4e02"));
u_strcat(s, CharsToUChars(".,;:\\u3001\\u3002\\u3041\\u3042\\u3043\\u3044\\u3045\\u30a3\\u4e00\\u4e01\\u4e02"));
log_verbose("Testing line break Invariant.....\n");
doBreakInvariantTest(UBRK_LINE, s);
log_verbose("Testing line other Invariant....\n");
@ -1020,7 +981,7 @@ AllocateTextBoundary();
e = ubrk_open(UBRK_LINE, "en_US", work, u_strlen(work), &status);
errorCount=0;
status=U_ZERO_ERROR;
u_strcpy(noBreak, CharsToUCharArray("\\u00a0\\u2007\\u2011\\ufeff"));
u_strcpy(noBreak, CharsToUChars("\\u00a0\\u2007\\u2011\\ufeff"));
u_uastrcpy(work, "aaa");
for (i = 0; i < u_strlen(s); i++) {
c = s[i];
@ -1051,7 +1012,7 @@ AllocateTextBoundary();
ubrk_close(e);
/* it does break after hyphens (unless they're followed by a digit, a non-spacing mark,
a currency symbol, a non-breaking space, or a line or paragraph separator) */
u_strcpy(dashes, CharsToUCharArray("-\\u00ad\\u2010\\u2012\\u2013\\u2014"));
u_strcpy(dashes, CharsToUChars("-\\u00ad\\u2010\\u2012\\u2013\\u2014"));
for (i = 0; i < u_strlen(s); i++) {
work[0] = s[i];
for (j = 0; j < u_strlen(dashes); j++) {
@ -1201,11 +1162,11 @@ void TestCharacterInvariants()
AllocateTextBoundary();
s=(UChar*)malloc(sizeof(UChar) * (u_strlen(cannedTestChars) + 15));
u_strcpy(s, cannedTestChars);
u_strcat(s, CharsToUCharArray("\\u1100\\u1101\\u1102\\u1160\\u1161\\u1162\\u11a8\\u11a9\\u11aa"));
u_strcat(s, CharsToUChars("\\u1100\\u1101\\u1102\\u1160\\u1161\\u1162\\u11a8\\u11a9\\u11aa"));
log_verbose("Testing character break invariant.....\n");
doBreakInvariantTest(UBRK_CHARACTER, s);
u_strcpy(s, cannedTestChars);
u_strcat(s, CharsToUCharArray("\\u1100\\u1101\\u1102\\u1160\\u1161\\u1162\\u11a8\\u11a9\\u11aa"));
u_strcat(s, CharsToUChars("\\u1100\\u1101\\u1102\\u1160\\u1161\\u1162\\u11a8\\u11a9\\u11aa"));
log_verbose("Testing character other invariant.....\n");
doOtherInvariantTest(UBRK_CHARACTER, s);
free(s);
@ -1526,7 +1487,7 @@ void doBreakInvariantTest(UBreakIteratorType type, UChar* testChars)
log_verbose("doBreakInvariantTest text of length: %d\n", u_strlen(testChars));
/* a break should always occur after CR (unless followed by LF), LF, PS, and LS */
u_strcpy(breaks, CharsToUCharArray("\r\n\\u2029\\u2028"));
u_strcpy(breaks, CharsToUChars("\r\n\\u2029\\u2028"));
tb = ubrk_open(type, "en_US", work, u_strlen(work), &status);

View File

@ -111,9 +111,13 @@ void addUnicodeTest(TestNode** root)
/*==================================================== */
void TestUpperLower()
{
static char* upperTest = "abcdefg123hij.?:klmno";
static char* lowerTest = "ABCDEFG123HIJ.?:KLMNO";
U_STRING_DECL(upperTest, "abcdefg123hij.?:klmno", 21);
U_STRING_DECL(lowerTest, "ABCDEFG123HIJ.?:KLMNO", 21);
int i;
U_STRING_INIT(upperTest, "abcdefg123hij.?:klmno", 21);
U_STRING_INIT(lowerTest, "ABCDEFG123HIJ.?:KLMNO", 21);
log_verbose("testing upper lower\n");
for (i = 0; i < 21; i++) {
@ -198,7 +202,7 @@ void TestLetterNumber()
void TestMisc()
{
const UChar sampleSpaces[] = {0x0020, 0x00a0, 0x2000, 0x2001, 0x2005};
const UChar sampleNonSpaces[] = {'a', 'b', 'c', 'd', 't'};
const UChar sampleNonSpaces[] = {0x61, 0x62, 0x63, 0x64, 0x74};
const UChar sampleUndefined[] = {0xfff1, 0xfff7, 0xfa30};
const UChar sampleDefined[] = {0x523E, 0x4f88, 0xfffd};
const UChar sampleBase[] = {0x0061, 0x0031, 0x03d2};
@ -289,7 +293,7 @@ void TestMisc()
void TestControlPrint()
{
const UChar sampleControl[] = {0x001b, 0x0097, 0x0082};
const UChar sampleNonControl[] = {'a', 0x0031, 0x00e2};
const UChar sampleNonControl[] = {0x61, 0x0031, 0x00e2};
const UChar samplePrintable[] = {0x0042, 0x005f, 0x2014};
const UChar sampleNonPrintable[] = {0x200c, 0x009f, 0x001c};
int i;

View File

@ -239,16 +239,18 @@ void TestSubWithValue(int32_t inputsize, int32_t outputsize)
const char sampleTxtToU[]= { (char)0x00, (char)0x9f, (char)0xaf, (char)0xff, (char)0x89, (char)0xd3 };
UChar IBM_943toUnicode[] = { 0x0000, 0x6D63, '%', 'X', 'F', 'F', 0x6D66};
UChar IBM_943toUnicode[] = { 0x0000, 0x6D63, 0x25, 0x58, 0x46, 0x46, 0x6D66};
int32_t fromIBM943Offs [] = { 0, 1, 3, 3, 3, 3, 4};
const char expsubwvalIBM_949[]= {
(char)0x00, (char)0xb0, (char)0xa1, (char)0xb0, (char)0xa2, '%', 'U', 'E', 'F', '6', '7', (char)0xc8, (char)0xd3 };
(char)0x00, (char)0xb0, (char)0xa1, (char)0xb0, (char)0xa2,
(char)0x25, (char)0x55, (char)0x45, (char)0x46, (char)0x36, (char)0x37, (char)0xc8, (char)0xd3 };
const char expsubwvalIBM_943[]= {
(char)0x9f, (char)0xaf, (char)0x9f, (char)0xb1, '%', 'U', '6', 'D', '6', '5', (char)0x89, (char)0x59 };
(char)0x9f, (char)0xaf, (char)0x9f, (char)0xb1,
(char)0x25, (char)0x55, (char)0x36, (char)0x44, (char)0x36, (char)0x35, (char)0x89, (char)0x59 };
const char expsubwvalIBM_930[] = {
(char)0x0e, (char)0x5d, (char)0x5f, (char)0x5d, (char)0x63, (char)0x0f, (char)0x6c, (char)0xe4, (char)0xf6, (char)0xc4, (char)0xf6, (char)0xf5, (char)0x46, (char)0x6b };
@ -291,7 +293,7 @@ void TestLegalAndOthers(int32_t inputsize, int32_t outputsize)
const char text943[] = {
(char)0x82, (char)0xa9, (char)0x82, (char)0x20, /*(char)0xc8,*/ 'a', (char)0x8a, (char)0xbf, (char)0x8e, (char)0x9a };
(char)0x82, (char)0xa9, (char)0x82, (char)0x20, /*(char)0xc8,*/ (char)0x61, (char)0x8a, (char)0xbf, (char)0x8e, (char)0x9a };
UChar toUnicode943sub[] = { 0x304b, 0xfffd, /*0xff88,*/ 0x0061, 0x6f22, 0x5b57};
UChar toUnicode943skip[]= { 0x304b, /*0xff88,*/ 0x0061, 0x6f22, 0x5b57};
UChar toUnicode943stop[]= { 0x304b};
@ -328,7 +330,9 @@ void TestLegalAndOthers(int32_t inputsize, int32_t outputsize)
}
void TestSingleByte(int32_t inputsize, int32_t outputsize)
{
const char sampleText[] = {(char)0x82, (char)0xa9, 'a', 'b', 'c' , (char)0x82, (char)0xff, /*(char)0x82, (char)0xa9,*/ '2', '3'};
const char sampleText[] = {
(char)0x82, (char)0xa9, (char)0x61, (char)0x62, (char)0x63 , (char)0x82,
(char)0xff, /*(char)0x82, (char)0xa9,*/ (char)0x32, (char)0x33};
UChar toUnicode943sub[] = {0x304b, 0x0061, 0x0062, 0x0063, 0xfffd,/*0x304b,*/ 0x0032, 0x0033};
int32_t fromIBM943Offssub[] = {0, 2, 3, 4, 5, 7, 8};
/*checking illegal value for ibm-943 with substitute*/

View File

@ -627,7 +627,15 @@ void TestAmbiguous()
{
UErrorCode status = U_ZERO_ERROR;
UConverter *ascii_cnv = 0, *sjis_cnv = 0;
const char *target = "\\usr\\local\\share\\data\\icutest.txt";
const char target[] = {
/* "\\usr\\local\\share\\data\\icutest.txt" */
0x5c, 0x75, 0x73, 0x72,
0x5c, 0x6c, 0x6f, 0x63, 0x61, 0x6c,
0x5c, 0x73, 0x68, 0x61, 0x72, 0x65,
0x5c, 0x64, 0x61, 0x74, 0x61,
0x5c, 0x69, 0x63, 0x75, 0x74, 0x65, 0x73, 0x74, 0x2e, 0x74, 0x78, 0x74,
0
};
UChar *asciiResult = 0, *sjisResult = 0;
int32_t asciiLength = 0, sjisLength = 0;