scuffed-code/icu4c/source/test/intltest/itrbnf.cpp

630 lines
23 KiB
C++
Raw Normal View History

/*
*******************************************************************************
* Copyright (C) 1996-2000, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*/
#include "itrbnf.h"
#include "unicode/tblcoll.h"
#include "unicode/coleitr.h"
// import com.ibm.text.RuleBasedNumberFormat;
// import com.ibm.test.TestFmwk;
// import java.util.Locale;
// import java.text.NumberFormat;
// current macro not in icu1.8.1
#define TESTCASE(id,test) \
case id: \
name = #test; \
if (exec) { \
logln(#test "---"); \
logln((UnicodeString)""); \
test(); \
} \
break
void IntlTestRBNF::runIndexedTest(int32_t index, UBool exec, const char* &name, char* /*par*/)
{
if (exec) logln("TestSuite RuleBasedNumberFormat");
switch (index) {
TESTCASE(0, TestEnglishSpellout);
TESTCASE(1, TestOrdinalAbbreviations);
TESTCASE(2, TestDurations);
TESTCASE(3, TestSpanishSpellout);
TESTCASE(4, TestFrenchSpellout);
TESTCASE(5, TestSwissFrenchSpellout);
TESTCASE(6, TestItalianSpellout);
TESTCASE(7, TestGermanSpellout);
TESTCASE(8, TestThaiSpellout);
default:
name = "";
break;
}
}
void
IntlTestRBNF::TestEnglishSpellout()
{
#if 0
// temporary test code
int32_t result = 0;
UErrorCode status = U_ZERO_ERROR;
Collator* temp = Collator::createInstance(Locale::US, status);
if (U_SUCCESS(status) &&
temp->getDynamicClassID() == RuleBasedCollator::getStaticClassID()) {
RuleBasedCollator* collator = (RuleBasedCollator*)temp;
UnicodeString rules(collator->getRules());
UnicodeString tailoring("&'\\u0000' << ' ' << '-'\n");
tailoring = tailoring.unescape();
rules.append(tailoring);
collator = new RuleBasedCollator(rules, status);
if (U_SUCCESS(status)) {
collator->setDecomposition(Normalizer::DECOMP);
UnicodeString prefix(" hundred");
UnicodeString str("hundred-fifty");
CollationElementIterator* strIter = collator->createCollationElementIterator(str);
CollationElementIterator* prefixIter = collator->createCollationElementIterator(prefix);
// match collation elements between the strings
int32_t oStr = strIter->next(status);
int32_t oPrefix = prefixIter->next(status);
while (oPrefix != CollationElementIterator::NULLORDER) {
// skip over ignorable characters in the target string
while (CollationElementIterator::primaryOrder(oStr) == 0
&& oStr != CollationElementIterator::NULLORDER) {
oStr = strIter->next(status);
}
// skip over ignorable characters in the prefix
while (CollationElementIterator::primaryOrder(oPrefix) == 0
&& oPrefix != CollationElementIterator::NULLORDER) {
oPrefix = prefixIter->next(status);
}
// if skipping over ignorables brought us to the end
// of the target string, we didn't match and return 0
if (oStr == CollationElementIterator::NULLORDER) {
result = -1;
break;
}
// if skipping over ignorables brought to the end of
// the prefix, we DID match: drop out of the loop
else if (oPrefix == CollationElementIterator::NULLORDER) {
break;
}
// match collation elements from the two strings
// (considering only primary differences). If we
// get a mismatch, dump out and return 0
if (CollationElementIterator::primaryOrder(oStr)
!= CollationElementIterator::primaryOrder(oPrefix)) {
result = -1;
break;
// otherwise, advance to the next character in each string
// and loop (we drop out of the loop when we exhaust
// collation elements in the prefix)
} else {
oStr = strIter->next(status);
oPrefix = prefixIter->next(status);
}
}
if (result == 0) {
result = strIter->getOffset();
}
delete prefixIter;
delete strIter;
}
delete collator;
}
delete temp;
printf("result: %d\n", result);
#endif
UErrorCode status = U_ZERO_ERROR;
RuleBasedNumberFormat* formatter
= new RuleBasedNumberFormat(URBNF_SPELLOUT, Locale::US, status);
if (U_FAILURE(status)) {
errln("FAIL: could not construct formatter");
} else {
static const char* testData[][2] = {
{ "1", "one" },
{ "2", "two" },
{ "15", "fifteen" },
{ "20", "twenty" },
{ "23", "twenty-three" },
{ "73", "seventy-three" },
{ "88", "eighty-eight" },
{ "100", "one hundred" },
{ "106", "one hundred and six" },
{ "127", "one hundred and twenty-seven" },
{ "200", "two hundred" },
{ "579", "five hundred and seventy-nine" },
{ "1,000", "one thousand" },
{ "2,000", "two thousand" },
{ "3,004", "three thousand and four" },
{ "4,567", "four thousand five hundred and sixty-seven" },
{ "15,943", "fifteen thousand nine hundred and forty-three" },
{ "2,345,678", "two million, three hundred and forty-five thousand, six hundred and seventy-eight" },
{ "-36", "minus thirty-six" },
{ "234.567", "two hundred and thirty-four point five six seven" },
{ NULL, NULL}
};
doTest(formatter, testData, TRUE);
formatter->setLenient(TRUE);
static const char* lpTestData[][2] = {
{ "fifty-7", "57" },
{ " fifty-7", "57" },
{ " fifty-7", "57" },
{ "2 thousand six HUNDRED fifty-7", "2,657" },
{ "fifteen hundred and zero", "1,500" },
{ "FOurhundred thiRTY six", "436" },
{ NULL, NULL}
};
doLenientParseTest(formatter, lpTestData);
}
2001-10-17 16:07:55 +00:00
delete formatter;
}
void
IntlTestRBNF::TestOrdinalAbbreviations()
{
UErrorCode status = U_ZERO_ERROR;
RuleBasedNumberFormat* formatter
= new RuleBasedNumberFormat(URBNF_ORDINAL, Locale::US, status);
if (U_FAILURE(status)) {
errln("FAIL: could not construct formatter");
} else {
static const char* testData[][2] = {
{ "1", "1st" },
{ "2", "2nd" },
{ "3", "3rd" },
{ "4", "4th" },
{ "7", "7th" },
{ "10", "10th" },
{ "11", "11th" },
{ "13", "13th" },
{ "20", "20th" },
{ "21", "21st" },
{ "22", "22nd" },
{ "23", "23rd" },
{ "24", "24th" },
{ "33", "33rd" },
{ "102", "102nd" },
{ "312", "312th" },
{ "12,345", "12,345th" },
{ NULL, NULL}
};
doTest(formatter, testData, FALSE);
}
2001-10-17 16:07:55 +00:00
delete formatter;
}
void
IntlTestRBNF::TestDurations()
{
UErrorCode status = U_ZERO_ERROR;
RuleBasedNumberFormat* formatter
= new RuleBasedNumberFormat(URBNF_DURATION, Locale::US, status);
if (U_FAILURE(status)) {
errln("FAIL: could not construct formatter");
} else {
static const char* testData[][2] = {
{ "3,600", "1:00:00" }, //move me and I fail
{ "0", "0 sec." },
{ "1", "1 sec." },
{ "24", "24 sec." },
{ "60", "1:00" },
{ "73", "1:13" },
{ "145", "2:25" },
{ "666", "11:06" },
// { "3,600", "1:00:00" },
{ "3,740", "1:02:20" },
{ "10,293", "2:51:33" },
{ NULL, NULL}
};
doTest(formatter, testData, TRUE);
formatter->setLenient(TRUE);
static const char* lpTestData[][2] = {
{ "2-51-33", "10,293" },
{ NULL, NULL}
};
doLenientParseTest(formatter, lpTestData);
}
2001-10-17 16:07:55 +00:00
delete formatter;
}
void
IntlTestRBNF::TestSpanishSpellout()
{
UErrorCode status = U_ZERO_ERROR;
RuleBasedNumberFormat* formatter
= new RuleBasedNumberFormat(URBNF_SPELLOUT, Locale("es", "ES", ""), status);
if (U_FAILURE(status)) {
errln("FAIL: could not construct formatter");
} else {
static const char* testData[][2] = {
{ "1", "uno" },
{ "6", "seis" },
{ "16", "diecis\\u00e9is" },
{ "20", "veinte" },
{ "24", "veinticuatro" },
{ "26", "veintis\\u00e9is" },
{ "73", "setenta y tres" },
{ "88", "ochenta y ocho" },
{ "100", "cien" },
{ "106", "ciento seis" },
{ "127", "ciento veintisiete" },
{ "200", "doscientos" },
{ "579", "quinientos setenta y nueve" },
{ "1,000", "mil" },
{ "2,000", "dos mil" },
{ "3,004", "tres mil cuatro" },
{ "4,567", "cuatro mil quinientos sesenta y siete" },
{ "15,943", "quince mil novecientos cuarenta y tres" },
{ "2,345,678", "dos mill\\u00f3n trescientos cuarenta y cinco mil seiscientos setenta y ocho"},
{ "-36", "menos treinta y seis" },
{ "234.567", "doscientos treinta y cuatro punto cinco seis siete" },
{ NULL, NULL}
};
doTest(formatter, testData, TRUE);
}
2001-10-17 16:07:55 +00:00
delete formatter;
}
void
IntlTestRBNF::TestFrenchSpellout()
{
UErrorCode status = U_ZERO_ERROR;
RuleBasedNumberFormat* formatter
= new RuleBasedNumberFormat(URBNF_SPELLOUT, Locale::FRANCE, status);
if (U_FAILURE(status)) {
errln("FAIL: could not construct formatter");
} else {
static const char* testData[][2] = {
{ "1", "un" },
{ "15", "quinze" },
{ "20", "vingt" },
{ "21", "vingt-et-un" },
{ "23", "vingt-trois" },
{ "62", "soixante-deux" },
{ "70", "soixante-dix" },
{ "71", "soixante et onze" },
{ "73", "soixante-treize" },
{ "80", "quatre-vingts" },
{ "88", "quatre-vingt-huit" },
{ "100", "cent" },
{ "106", "cent six" },
{ "127", "cent vingt-sept" },
{ "200", "deux cents" },
{ "579", "cinq cents soixante-dix-neuf" },
{ "1,000", "mille" },
{ "1,123", "onze cents vingt-trois" },
{ "1,594", "mille cinq cents quatre-vingt-quatorze" },
{ "2,000", "deux mille" },
{ "3,004", "trois mille quatre" },
{ "4,567", "quatre mille cinq cents soixante-sept" },
{ "15,943", "quinze mille neuf cents quarante-trois" },
{ "2,345,678", "deux million trois cents quarante-cinq mille six cents soixante-dix-huit" },
{ "-36", "moins trente-six" },
{ "234.567", "deux cents trente-quatre virgule cinq six sept" },
{ NULL, NULL}
};
doTest(formatter, testData, TRUE);
formatter->setLenient(TRUE);
static const char* lpTestData[][2] = {
{ "trente-un", "31" },
{ "un cents quatre vingt dix huit", "198" },
{ NULL, NULL}
};
doLenientParseTest(formatter, lpTestData);
}
2001-10-17 16:07:55 +00:00
delete formatter;
}
2001-10-17 16:07:55 +00:00
void
IntlTestRBNF::TestSwissFrenchSpellout()
{
UErrorCode status = U_ZERO_ERROR;
RuleBasedNumberFormat* formatter
= new RuleBasedNumberFormat(URBNF_SPELLOUT, Locale("fr", "CH", ""), status);
if (U_FAILURE(status)) {
errln("FAIL: could not construct formatter");
} else {
static const char* testData[][2] = {
{ "1", "un" },
{ "15", "quinze" },
{ "20", "vingt" },
{ "21", "vingt-et-un" },
{ "23", "vingt-trois" },
{ "62", "soixante-deux" },
{ "70", "septante" },
{ "71", "septante-et-un" },
{ "73", "septante-trois" },
{ "80", "octante" },
{ "88", "octante-huit" },
{ "100", "cent" },
{ "106", "cent six" },
{ "127", "cent vingt-sept" },
{ "200", "deux cents" },
{ "579", "cinq cents septante-neuf" },
{ "1,000", "mille" },
{ "1,123", "onze cents vingt-trois" },
{ "1,594", "mille cinq cents nonante-quatre" },
{ "2,000", "deux mille" },
{ "3,004", "trois mille quatre" },
{ "4,567", "quatre mille cinq cents soixante-sept" },
{ "15,943", "quinze mille neuf cents quarante-trois" },
{ "2,345,678", "deux million trois cents quarante-cinq mille six cents septante-huit" },
{ "-36", "moins trente-six" },
{ "234.567", "deux cents trente-quatre virgule cinq six sept" },
{ NULL, NULL}
};
doTest(formatter, testData, TRUE);
}
2001-10-17 16:07:55 +00:00
delete formatter;
}
void
IntlTestRBNF::TestItalianSpellout()
{
UErrorCode status = U_ZERO_ERROR;
RuleBasedNumberFormat* formatter
= new RuleBasedNumberFormat(URBNF_SPELLOUT, Locale::ITALIAN, status);
if (U_FAILURE(status)) {
errln("FAIL: could not construct formatter");
} else {
static const char* testData[][2] = {
{ "1", "uno" },
{ "15", "quindici" },
{ "20", "venti" },
{ "23", "ventitre" },
{ "73", "settantatre" },
{ "88", "ottantotto" },
{ "100", "cento" },
{ "106", "centosei" },
{ "108", "centotto" },
{ "127", "centoventisette" },
{ "181", "centottantuno" },
{ "200", "duecento" },
{ "579", "cinquecentosettantanove" },
{ "1,000", "mille" },
{ "2,000", "duemila" },
{ "3,004", "tremilaquattro" },
{ "4,567", "quattromilacinquecentosessantasette" },
{ "15,943", "quindicimilanovecentoquarantatre" },
{ "-36", "meno trentisei" },
{ "234.567", "duecentotrentiquattro virgola cinque sei sette" },
{ NULL, NULL}
};
doTest(formatter, testData, TRUE);
}
2001-10-17 16:07:55 +00:00
delete formatter;
}
void
IntlTestRBNF::TestGermanSpellout()
{
UErrorCode status = U_ZERO_ERROR;
RuleBasedNumberFormat* formatter
= new RuleBasedNumberFormat(URBNF_SPELLOUT, Locale::GERMANY, status);
if (U_FAILURE(status)) {
errln("FAIL: could not construct formatter");
} else {
static const char* testData[][2] = {
{ "1", "eins" },
{ "15", "f\\u00fcnfzehn" },
{ "20", "zwanzig" },
{ "23", "dreiundzwanzig" },
{ "73", "dreiundsiebzig" },
{ "88", "achtundachtzig" },
{ "100", "hundert" },
{ "106", "hundertsechs" },
{ "127", "hundertsiebenundzwanzig" },
{ "200", "zweihundert" },
{ "579", "f\\u00fcnfhundertneunundsiebzig" },
{ "1,000", "tausend" },
{ "2,000", "zweitausend" },
{ "3,004", "dreitausendvier" },
{ "4,567", "viertausendf\\u00fcnfhundertsiebenundsechzig" },
{ "15,943", "f\\u00fcnfzehntausendneunhundertdreiundvierzig" },
{ "2,345,678", "zwei Millionen dreihundertf\\u00fcnfundvierzigtausendsechshundertachtundsiebzig" },
{ NULL, NULL}
};
doTest(formatter, testData, TRUE);
formatter->setLenient(TRUE);
static const char* lpTestData[][2] = {
{ "ein Tausend sechs Hundert fuenfunddreissig", "1,635" },
{ NULL, NULL}
};
doLenientParseTest(formatter, lpTestData);
}
2001-10-17 16:07:55 +00:00
delete formatter;
}
void
IntlTestRBNF::TestThaiSpellout()
{
UErrorCode status = U_ZERO_ERROR;
RuleBasedNumberFormat* formatter
= new RuleBasedNumberFormat(URBNF_SPELLOUT, Locale("th"), status);
if (U_FAILURE(status)) {
errln("FAIL: could not construct formatter");
} else {
static const char* testData[][2] = {
{ "0", "\\u0e28\\u0e39\\u0e19\\u0e22\\u0e4c" },
{ "1", "\\u0e2b\\u0e19\\u0e36\\u0e48\\u0e07" },
{ "10", "\\u0e2a\\u0e34\\u0e1a" },
{ "11", "\\u0e2a\\u0e34\\u0e1a\\u0e40\\u0e2d\\u0e47\\u0e14" },
{ "21", "\\u0e22\\u0e35\\u0e48\\u0e2a\\u0e34\\u0e1a\\u0e40\\u0e2d\\u0e47\\u0e14" },
{ "101", "\\u0e2b\\u0e19\\u0e36\\u0e48\\u0e07\\u0e23\\u0e49\\u0e2d\\u0e22\\u0e2b\\u0e19\\u0e36\\u0e48\\u0e07" },
{ "1.234", "\\u0e2b\\u0e19\\u0e36\\u0e48\\u0e07\\u0e08\\u0e38\\u0e14\\u0e2a\\u0e2d\\u0e07\\u0e2a\\u0e32\\u0e21\\u0e2a\\u0e35\\u0e48" },
{ NULL, NULL}
};
doTest(formatter, testData, TRUE);
}
2001-10-17 16:07:55 +00:00
delete formatter;
}
void
IntlTestRBNF::doTest(RuleBasedNumberFormat* formatter, const char* testData[][2], UBool testParsing)
{
// man, error reporting would be easier with printf-style syntax for unicode string and formattable
UErrorCode status = U_ZERO_ERROR;
NumberFormat* decFmt = NumberFormat::createInstance(Locale::US, status);
if (U_FAILURE(status)) {
errln("FAIL: could not create NumberFormat");
} else {
for (int i = 0; testData[i][0]; ++i) {
const char* numString = testData[i][0];
const char* expectedWords = testData[i][1];
Formattable expectedNumber;
decFmt->parse(numString, expectedNumber, status);
if (U_FAILURE(status)) {
errln("FAIL: decFmt could not parse %s", numString);
break;
} else {
UnicodeString actualString;
FieldPosition pos;
formatter->format(expectedNumber, actualString/* , pos*/, status);
if (U_FAILURE(status)) {
UnicodeString msg = "Fail: formatter could not format ";
decFmt->format(expectedNumber, msg, status);
errln(msg);
break;
} else {
UnicodeString expectedString = UnicodeString(expectedWords).unescape();
if (actualString != expectedString) {
UnicodeString msg = "FAIL: check failed for ";
decFmt->format(expectedNumber, msg, status);
msg.append(", expected ");
msg.append(expectedString);
msg.append(" but got ");
msg.append(actualString);
errln(msg);
break;
} else if (testParsing) {
Formattable parsedNumber;
formatter->parse(actualString, parsedNumber, status);
if (U_FAILURE(status)) {
UnicodeString msg = "FAIL: formatter could not parse ";
msg.append(actualString);
msg.append(" status code: " );
char buffer[32];
sprintf(buffer, "0x%x", status);
msg.append(buffer);
errln(msg);
break;
} else {
if (parsedNumber != expectedNumber) {
UnicodeString msg = "FAIL: parse failed for ";
msg.append(actualString);
msg.append(", expected ");
decFmt->format(expectedNumber, msg, status);
msg.append(", but got ");
decFmt->format(parsedNumber, msg, status);
errln(msg);
break;
}
}
}
}
}
}
delete decFmt;
}
}
void
IntlTestRBNF::doLenientParseTest(RuleBasedNumberFormat* formatter, const char* testData[][2])
{
UErrorCode status = U_ZERO_ERROR;
NumberFormat* decFmt = NumberFormat::createInstance(Locale::US, status);
if (U_FAILURE(status)) {
errln("FAIL: could not create NumberFormat");
} else {
for (int i = 0; testData[i][0]; ++i) {
const char* spelledNumber = testData[i][0]; // spelled-out number
const char* asciiUSNumber = testData[i][1]; // number as ascii digits formatted for US locale
UnicodeString spelledNumberString = UnicodeString(spelledNumber).unescape();
Formattable actualNumber;
formatter->parse(spelledNumberString, actualNumber, status);
if (U_FAILURE(status)) {
UnicodeString msg = "FAIL: formatter could not parse ";
msg.append(spelledNumberString);
errln(msg);
break;
} else {
// I changed the logic of this test somewhat from Java-- instead of comparing the
// strings, I compare the Formattables. Hmmm, but the Formattables don't compare,
// so change it back.
UnicodeString asciiUSNumberString = asciiUSNumber;
Formattable expectedNumber;
decFmt->parse(asciiUSNumberString, expectedNumber, status);
if (U_FAILURE(status)) {
UnicodeString msg = "FAIL: decFmt could not parse ";
msg.append(asciiUSNumberString);
errln(msg);
break;
} else {
UnicodeString actualNumberString;
UnicodeString expectedNumberString;
decFmt->format(actualNumber, actualNumberString, status);
decFmt->format(expectedNumber, expectedNumberString, status);
if (actualNumberString != expectedNumberString) {
UnicodeString msg = "FAIL: parsing";
msg.append(asciiUSNumberString);
msg.append("\n");
msg.append(" lenient parse failed for ");
msg.append(spelledNumberString);
msg.append(", expected ");
msg.append(expectedNumberString);
msg.append(", but got ");
msg.append(actualNumberString);
errln(msg);
break;
}
}
}
}
delete decFmt;
}
}