ICU-1271 Port RuleBasedNumberFormat to ICU

X-SVN-Rev: 6172
This commit is contained in:
Doug Felt 2001-10-10 21:43:11 +00:00
parent 00ac42f02f
commit 7258dac736
52 changed files with 8762 additions and 14 deletions

View File

@ -519,4 +519,58 @@ de {
"Latf", // ISO 15924 Name
"Latg", // ISO 15924 Name
}
//------------------------------------------------------------
// Rule Based Number Format Support
//------------------------------------------------------------
// * RuleBasedNumberFormat data for German
// again, I'm not 100% sure of these rules. I think both "hundert" and
// "einhundert" are correct or 100, but I'm not sure which is preferable
// in situations where this framework is likely to be used. Also, is it
// really true that numbers are run together into compound words all the
// time?
SpelloutRules {
// 1 is "eins" when by itself, but turns into "ein" in most
// combinations
"%alt-ones:\n"
" -x: minus >>;\n"
" x.x: << komma >>;\n"
" null; eins; =%%main=;\n"
"%%main:\n"
// words for numbers from 0 to 12. Notice that the values
// from 13 to 19 can derived algorithmically, unlike in most
// other languages
" null; ein; zwei; drei; vier; f\u00fcnf; sechs; sieben; acht; neun;\n"
" zehn; elf; zw\u00f6lf; >>zehn;\n"
// rules for the multiples of 10. Notice that the ones digit
// goes on the front
" 20: [>>und]zwanzig;\n"
" 30: [>>und]drei\u00dfig;\n"
" 40: [>>und]vierzig;\n"
" 50: [>>und]f\u00fcnfzig;\n"
" 60: [>>und]sechzig;\n"
" 70: [>>und]siebzig;\n"
" 80: [>>und]achtzig;\n"
" 90: [>>und]neunzig;\n"
" 100: hundert[>%alt-ones>];\n"
" 200: <<hundert[>%alt-ones>];\n"
" 1000: tausend[>%alt-ones>];\n"
" 2000: <<tausend[>%alt-ones>];\n"
" 1,000,000: eine Million[ >%alt-ones>];\n"
" 2,000,000: << Millionen[ >%alt-ones>];\n"
" 1,000,000,000: eine Milliarde[ >%alt-ones>];\n"
" 2,000,000,000: << Milliarden[ >%alt-ones>];\n"
" 1,000,000,000,000: eine Billion[ >%alt-ones>];\n"
" 2,000,000,000,000: << Billionen[ >%alt-ones>];\n"
" 1,000,000,000,000,000: =#,##0=;"
"%%lenient-parse:\n"
" &\u0000 << ' ' << '-'\n"
" & ae , \u00e4 & ae , \u00c4\n"
" & oe , \u00f6 & oe , \u00d6\n"
" & ue , \u00fc & ue , \u00dc\n"
}
}

View File

@ -116,4 +116,53 @@ el {
"Greek",// Script Name
"Grek" // ISO 15924 Name
}
//------------------------------------------------------------
// Rule Based Number Format Support
//------------------------------------------------------------
// * Spellout rules for Greek. Again in Greek we have to supply the words
// * for the multiples of 100 because they can't be derived algorithmically.
// * Also, the tens dgit changes form when followed by a ones digit: an
// * accent mark disappears from the tens digit and moves to the ones digit.
// * Therefore, instead of using the [] notation, we actually have to use
// * two separate rules for each multiple of 10 to show the two forms of
// * the word.
// Can someone supply me with information on negatives and decimals?
// I'm also missing the word for zero. Can someone clue me in?
SpelloutRules {
"zero (incomplete data); \u03ad\u03bd\u03b1; \u03b4\u03cd\u03bf; \u03b4\u03c1\u03af\u03b1; "
"\u03c4\u03ad\u03c3\u03c3\u03b5\u03c1\u03b1; \u03c0\u03ad\u03bd\u03c4\u03b5; "
"\u03ad\u03be\u03b9; \u03b5\u03c0\u03c4\u03ac; \u03bf\u03ba\u03c4\u03ce; "
"\u03b5\u03bd\u03bd\u03ad\u03b1;\n"
"10: \u03b4\u03ad\u03ba\u03b1; "
"\u03ad\u03bd\u03b4\u03b5\u03ba\u03b1; \u03b4\u03ce\u03b4\u03b5\u03ba\u03b1; "
"\u03b4\u03b5\u03ba\u03b1>>;\n"
"20: \u03b5\u03af\u03ba\u03bf\u03c3\u03b9; \u03b5\u03b9\u03ba\u03bf\u03c3\u03b9>>;\n"
"30: \u03c4\u03c1\u03b9\u03ac\u03bd\u03c4\u03b1; \u03c4\u03c1\u03b9\u03b1\u03bd\u03c4\u03b1>>;\n"
"40: \u03c3\u03b1\u03c1\u03ac\u03bd\u03c4\u03b1; \u03c3\u03b1\u03c1\u03b1\u03bd\u03c4\u03b1>>;\n"
"50: \u03c0\u03b5\u03bd\u03ae\u03bd\u03c4\u03b1; \u03c0\u03b5\u03bd\u03b7\u03bd\u03c4\u03b1>>;\n"
"60: \u03b5\u03be\u03ae\u03bd\u03c4\u03b1; \u03b5\u03be\u03b7\u03bd\u03c4\u03b1>>;\n"
"70: \u03b5\u03b2\u03b4\u03bf\u03bc\u03ae\u03bd\u03c4\u03b1; "
"\u03b5\u03b2\u03b4\u03bf\u03bc\u03b7\u03bd\u03c4\u03b1>>;\n"
"80: \u03bf\u03b3\u03b4\u03cc\u03bd\u03c4\u03b1; \u03bf\u03b3\u03b4\u03bf\u03bd\u03c4\u03b1>>;\n"
"90: \u03b5\u03bd\u03bd\u03b5\u03bd\u03ae\u03bd\u03c4\u03b1; "
"\u03b5\u03bd\u03bd\u03b5\u03bd\u03b7\u03bd\u03c4\u03b1>>;\n"
"100: \u03b5\u03ba\u03b1\u03c4\u03cc[\u03bd >>];\n"
"200: \u03b4\u03b9\u03b1\u03ba\u03cc\u03c3\u03b9\u03b1[ >>];\n"
"300: \u03c4\u03c1\u03b9\u03b1\u03ba\u03cc\u03c3\u03b9\u03b1[ >>];\n"
"400: \u03c4\u03b5\u03c4\u03c1\u03b1\u03ba\u03cc\u03c3\u03b9\u03b1[ >>];\n"
"500: \u03c0\u03b5\u03bd\u03c4\u03b1\u03ba\u03cc\u03c3\u03b9\u03b1[ >>];\n"
"600: \u03b5\u03be\u03b1\u03ba\u03cc\u03c3\u03b9\u03b1[ >>];\n"
"700: \u03b5\u03c0\u03c4\u03b1\u03ba\u03cc\u03c3\u03b9\u03b1[ >>];\n"
"800: \u03bf\u03ba\u03c4\u03b1\u03ba\u03cc\u03c3\u03b9\u03b1[ >>];\n"
"900: \u03b5\u03bd\u03bd\u03b9\u03b1\u03ba\u03cc\u03c3\u03b9\u03b1[ >>];\n"
"1000: \u03c7\u03af\u03bb\u03b9\u03b1[ >>];\n"
"2000: << \u03c7\u03af\u03bb\u03b9\u03b1[ >>];\n"
"1,000,000: << \u03b5\u03ba\u03b1\u03c4\u03bf\u03bc\u03bc\u03b9\u03cc\u03c1\u03b9\u03bf[ >>];\n"
"1,000,000,000: << \u03b4\u03b9\u03c3\u03b5\u03ba\u03b1\u03c4\u03bf\u03bc\u03bc\u03b9\u03cc\u03c1\u03b9\u03bf[ >>];\n"
"1,000,000,000,000: =#,##0="
}
}

View File

@ -233,4 +233,11 @@ en {
"Latf", // ISO 15924 Name
"Latg", // ISO 15924 Name
}
//------------------------------------------------------------
// Rule Based Number Format Support
//------------------------------------------------------------
// inherited from root
}

View File

@ -49,4 +49,70 @@ en_GB {
"BST",
}
}
//------------------------------------------------------------
// Rule Based Number Format Support
//------------------------------------------------------------
// * Spellout rules for U.K. English. U.K. English has one significant
// * difference from U.S. English: the names for values of 1,000,000,000
// * and higher. In American English, each successive "-illion" is 1,000
// * times greater than the preceding one: 1,000,000,000 is "one billion"
// * and 1,000,000,000,000 is "one trillion." In British English, each
// * successive "-illion" is one million times greater than the one before:
// * "one billion" is 1,000,000,000,000 (or what Americans would call a
// * "trillion"), and "one trillion" is 1,000,000,000,000,000,000.
// * 1,000,000,000 in British English is "one thousand million." (This
// * value is sometimes called a "milliard," but this word seems to have
// * fallen into disuse.)
// Could someone please correct me if I'm wrong about "milliard" falling
// into disuse, or have missed any other details of how large numbers
// are rendered. Also, could someone please provide me with information
// on which other English-speaking countries use which system? Right now,
// I'm assuming that the U.S. system is used in Canada and that all the
// other English-speaking countries follow the British system. Can
// someone out there confirm this?
SpelloutRules {
"%simplified:\n"
" -x: minus >>;\n"
" x.x: << point >>;\n"
" zero; one; two; three; four; five; six; seven; eight; nine;\n"
" ten; eleven; twelve; thirteen; fourteen; fifteen; sixteen;\n"
" seventeen; eighteen; nineteen;\n"
" 20: twenty[->>];\n"
" 30: thirty[->>];\n"
" 40: forty[->>];\n"
" 50: fifty[->>];\n"
" 60: sixty[->>];\n"
" 70: seventy[->>];\n"
" 80: eighty[->>];\n"
" 90: ninety[->>];\n"
" 100: << hundred[ >>];\n"
" 1000: << thousand[ >>];\n"
" 1,000,000: << million[ >>];\n"
" 1,000,000,000,000: << billion[ >>];\n"
" 1,000,000,000,000,000: =#,##0=;\n"
"%default:\n"
" -x: minus >>;\n"
" x.x: << point >>;\n"
" =%simplified=;\n"
" 100: << hundred[ >%%and>];\n"
" 1000: << thousand[ >%%and>];\n"
" 100,000>>: << thousand[>%%commas>];\n"
" 1,000,000: << million[>%%commas>];\n"
" 1,000,000,000,000: << billion[>%%commas>];\n"
" 1,000,000,000,000,000: =#,##0=;\n"
"%%and:\n"
" and =%default=;\n"
" 100: =%default=;\n"
"%%commas:\n"
" ' and =%default=;\n"
" 100: , =%default=;\n"
" 1000: , <%default< thousand, >%default>;\n"
" 1,000,000: , =%default=;"
"%%lenient-parse:\n"
" & ' ' , ',' ;\n"
}
}

View File

@ -140,4 +140,31 @@ eo {
"Latf", // ISO 15924 Name
"Latg", // ISO 15924 Name
}
//------------------------------------------------------------
// Rule Based Number Format Support
//------------------------------------------------------------
// data from 'Esperanto-programita 1' courtesy of Markus Scherer
SpelloutRules {
"-x: minus >>;\n"
"x.x: << komo >>;\n"
"nulo; unu; du; tri; kvar; kvin; ses; sep; ok; na\u016d;\n"
"10: dek[ >>];\n"
"20: <<dek[ >>];\n"
"100: cent[ >>];\n"
"200: <<cent[ >>];\n"
"1000: mil[ >>];\n"
"2000: <<mil[ >>];\n"
"10000: dekmil[ >>];\n"
"11000>: << mil[ >>];\n"
"1,000,000: miliono[ >>];\n"
"2,000,000: << milionoj[ >>];\n"
"1,000,000,000: miliardo[ >>];\n"
"2,000,000,000: << miliardoj[ >>];\n"
"1,000,000,000,000: biliono[ >>];\n"
"2,000,000,000,000: << bilionoj[ >>];\n"
"1,000,000,000,000,000: =#,##0=;\n"
}
}

View File

@ -258,4 +258,69 @@ es {
"Latf", // ISO 15924 Name
"Latg", // ISO 15924 Name
}
//------------------------------------------------------------
// Rule Based Number Format Support
//------------------------------------------------------------
// * Spellout rules for Spanish. The Spanish rules are quite similar to
// * the English rules, but there are some important differences:
// * First, we have to provide separate rules for most of the twenties
// * because the ones digit frequently picks up an accent mark that it
// * doesn't have when standing alone. Second, each multiple of 100 has
// * to be specified separately because the multiplier on 100 very often
// * changes form in the contraction: 500 is "quinientos," not
// * "cincocientos." In addition, the word for 100 is "cien" when
// * standing alone, but changes to "ciento" when followed by more digits.
// * There also some other differences.
// The Spanish rules are incomplete. I'm missing information on negative
// numbers and numbers with fractional parts. I also don't have
// information on numbers higher than the millions.
SpelloutRules {
// negative-number and fraction rules
"-x: menos >>;\n"
"x.x: << punto >>;\n"
// words for values from 0 to 19
"cero; uno; dos; tres; cuatro; cinco; seis; siete; ocho; nueve;\n"
"diez; once; doce; trece; catorce; quince; diecis\u00e9is;\n"
" diecisiete; dieciocho; diecinueve;\n"
// words for values from 20 to 29 (necessary because the ones digit
// often picks up an accent mark it doesn't have when standing alone)
"veinte; veintiuno; veintid\u00f3s; veintitr\u00e9s; veinticuatro;\n"
" veinticinco; veintis\u00e9is; veintisiete; veintiocho;\n"
" veintinueve;\n"
// words for multiples of 10 (notice that the tens digit is separated
// from the ones digit by the word "y".)
"30: treinta[ y >>];\n"
"40: cuarenta[ y >>];\n"
"50: cincuenta[ y >>];\n"
"60: sesenta[ y >>];\n"
"70: setenta[ y >>];\n"
"80: ochenta[ y >>];\n"
"90: noventa[ y >>];\n"
// 100 by itself is "cien," but 100 followed by something is "cineto"
"100: cien;\n"
"101: ciento >>;\n"
// words for multiples of 100 (must be stated because they're
// rarely simple concatenations)
"200: doscientos[ >>];\n"
"300: trescientos[ >>];\n"
"400: cuatrocientos[ >>];\n"
"500: quinientos[ >>];\n"
"600: seiscientos[ >>];\n"
"700: setecientos[ >>];\n"
"800: ochocientos[ >>];\n"
"900: novecientos[ >>];\n"
// for 1,000, the multiplier on "mil" is omitted: 2,000 is "dos mil,"
// but 1,000 is just "mil."
"1000: mil[ >>];\n"
"2000: << mil[ >>];\n"
// 1,000,000 is "un millon," not "uno millon"
"1,000,000: un mill\u00f3n[ >>];\n"
"2,000,000: << mill\u00f3n[ >>];\n"
// overflow rule
"1,000,000,000: =#,##0= (incomplete data);"
}
}

View File

@ -190,10 +190,73 @@ fr {
"Latf", // ISO 15924 Name
"Latg", // ISO 15924 Name
}
//------------------------------------------------------------
// Rule Based Number Format Support
//------------------------------------------------------------
// * Spellout rules for French. French adds some interesting quirks of its
// * own: 1) The word "et" is interposed between the tens and ones digits,
// * but only if the ones digit if 1: 20 is "vingt," and 2 is "vingt-deux,"
// * but 21 is "vingt-et-un." 2) There are no words for 70, 80, or 90.
// * "quatre-vingts" ("four twenties") is used for 80, and values proceed
// * by score from 60 to 99 (e.g., 73 is "soixante-treize" ["sixty-thirteen"]).
// * Numbers from 1,100 to 1,199 are rendered as hundreds rather than
// * thousands: 1,100 is "onze cents" ("eleven hundred"), rather than
// * "mille cent" ("one thousand one hundred")
SpelloutRules {
// the main rule set
"%main:\n"
" -x: moins >>;\n"
" x.x: << virgule >>;\n"
// words for numbers from 0 to 10
" z\u00e9ro; un; deux; trois; quatre; cinq; six; sept; huit; neuf;\n"
" dix; onze; douze; treize; quatorze; quinze; seize;\n"
" dix-sept; dix-huit; dix-neuf;\n"
// ords for the multiples of 10: %%alt-ones inserts "et"
// when needed
" 20: vingt[->%%alt-ones>];\n"
" 30: trente[->%%alt-ones>];\n"
" 40: quarante[->%%alt-ones>];\n"
" 50: cinquante[->%%alt-ones>];\n"
// rule for 60. The /20 causes this rule's multiplier to be
// 20 rather than 10, allowinhg us to recurse for all values
// from 60 to 79...
" 60/20: soixante[->%%alt-ones>];\n"
// ...except for 71, which must be special-cased
" 71: soixante et onze;\n"
// at 72, we have to repeat the rule for 60 to get us to 79
" 72/20: soixante->%%alt-ones>;\n"
// at 80, we state a new rule with the phrase for 80. Since
// it changes form when there's a ones digit, we need a second
// rule at 81. This rule also includes "/20," allowing it to
// be used correctly for all values up to 99
" 80: quatre-vingts; 81/20: quatre-vingt->>;\n"
// "cent" becomes plural when preceded by a multiplier, and
// the multiplier is omitted from the singular form
" 100: cent[ >>];\n"
" 200: << cents[ >>];\n"
" 1000: mille[ >>];\n"
// values from 1,100 to 1,199 are rendered as "onze cents..."
// instead of "mille cent..." The > after "1000" decreases
// the rule's exponent, causing its multiplier to be 100 instead
// of 1,000. This prevents us from getting "onze cents cent
// vingt-deux" ("eleven hundred one hundred twenty-two").
" 1100>: onze cents[ >>];\n"
// at 1,200, we go back to formating in thousands, so we
// repeat the rule for 1,000
" 1200: mille >>;\n"
// at 2,000, the multiplier is added
" 2000: << mille[ >>];\n"
" 1,000,000: << million[ >>];\n"
" 1,000,000,000: << milliarde[ >>];\n"
" 1,000,000,000,000: << billion[ >>];\n"
" 1,000,000,000,000,000: =#,##0=;\n"
// %%alt-ones is used to insert "et" when the ones digit is 1
"%%alt-ones:\n"
" ; et-un; =%main=;\n"
"%%lenient-parse:\n"
" &\u0000 << ' ' << ',' << '-';\n"
}
}

View File

@ -56,4 +56,51 @@ fr_CH {
"GMT",
}
}
//------------------------------------------------------------
// Rule Based Number Format Support
//------------------------------------------------------------
// * Spellout rules for Swiss French. Swiss French differs from French French
// * in that it does have words for 70, 80, and 90. This rule set shows them,
// * and is simpler as a result.
// again, I'm missing information on negative numbers and decimals for
// these to rule sets. Also, I'm not 100% sure about Swiss French. Is
// this correct? Is "onze cents" commonly used for 1,100 in both France
// and Switzerland? Can someone fill me in on the rules for the other
// French-speaking countries? I've heard conflicting opinions on which
// version is used in Canada, and I understand there's an alternate set
// of words for 70, 80, and 90 that is used somewhere, but I don't know
// what those words are or where they're used.
SpelloutRules {
"%main:\n"
" -x: moins >>;\n"
" x.x: << virgule >>;\n"
" z\u00e9ro; un; deux; trois; quatre; cinq; six; sept; huit; neuf;\n"
" dix; onze; douze; treize; quatorze; quinze; seize;\n"
" dix-sept; dix-huit; dix-neuf;\n"
" 20: vingt[->%%alt-ones>];\n"
" 30: trente[->%%alt-ones>];\n"
" 40: quarante[->%%alt-ones>];\n"
" 50: cinquante[->%%alt-ones>];\n"
" 60: soixante[->%%alt-ones>];\n"
// notice new words for 70, 80, and 90
" 70: septante[->%%alt-ones>];\n"
" 80: octante[->%%alt-ones>];\n"
" 90: nonante[->%%alt-ones>];\n"
" 100: cent[ >>];\n"
" 200: << cents[ >>];\n"
" 1000: mille[ >>];\n"
" 1100>: onze cents[ >>];\n"
" 1200: mille >>;\n"
" 2000: << mille[ >>];\n"
" 1,000,000: << million[ >>];\n"
" 1,000,000,000: << milliarde[ >>];\n"
" 1,000,000,000,000: << billion[ >>];\n"
" 1,000,000,000,000,000: =#,##0=;\n"
"%%alt-ones:\n"
" ; et-un; =%main=;"
}
}

View File

@ -102,4 +102,38 @@ he {
"Hebrew", // Script Name
"Hebr" // ISO 15924 Name
}
//------------------------------------------------------------
// Rule Based Number Format Support
//------------------------------------------------------------
// * Spellout rules for Hebrew. Hebrew actually has inflected forms for
// * most of the lower-order numbers. The masculine forms are shown
// * here.
// This data is woefully incomplete. Can someone fill me in on the
// various inflected forms of the numbers, which seem to be necessary
// to do Hebrew correctly? Can somone supply me with data for values
// from 1,000,000 on up? What about the word for zero? What about
// information on negatives and decimals?
SpelloutRules {
"zero (incomplete data); \u05d0\u05d4\u05d3; \u05e9\u05d2\u05d9\u05d9\u05dd; \u05e9\u05dc\u05d5\u05e9\u05d4;\n"
"4: \u05d0\u05d3\u05d1\u05e6\u05d4; \u05d7\u05d2\u05d5\u05d9\u05e9\u05d4; \u05e9\u05e9\u05d4;\n"
"7: \u05e9\u05d1\u05e6\u05d4; \u05e9\u05de\u05d5\u05d2\u05d4; \u05ea\u05e9\u05e6\u05d4;\n"
"10: \u05e6\u05e9\u05d3\u05d4[ >>];\n"
"20: \u05e6\u05e9\u05d3\u05d9\u05dd[ >>];\n"
"30: \u05e9\u05dc\u05d5\u05e9\u05d9\u05dd[ >>];\n"
"40: \u05d0\u05d3\u05d1\u05e6\u05d9\u05dd[ >>];\n"
"50: \u05d7\u05de\u05d9\u05e9\u05d9\u05dd[ >>];\n"
"60: \u05e9\u05e9\u05d9\u05dd[ >>];\n"
"70: \u05e9\u05d1\u05e6\u05d9\u05dd[ >>];\n"
"80: \u05e9\u05de\u05d5\u05d2\u05d9\u05dd[ >>];\n"
"90: \u05ea\u05e9\u05e6\u05d9\u05dd[ >>];\n"
"100: \u05de\u05d0\u05d4[ >>];\n"
"200: << \u05de\u05d0\u05d4[ >>];\n"
"1000: \u05d0\u05dc\u05e3[ >>];\n"
"2000: << \u05d0\u05dc\u05e3[ >>];\n"
"1,000,000: =#,##0= (incomplete data);"
}
}

View File

@ -117,4 +117,106 @@ it {
"Latf", // ISO 15924 Name
"Latg", // ISO 15924 Name
}
//------------------------------------------------------------
// Rule Based Number Format Support
//------------------------------------------------------------
// * Spellout rules for Italian. Like German, most Italian numbers are
// * written as single words. What makes these rules complicated is the rule
// * that says that when a word ending in a vowel and a word beginning with
// * a vowel are combined into a compound, the vowel is dropped from the
// * end of the first word: 180 is "centottanta," not "centoottanta."
// * The complexity of this rule set is to produce this behavior.
// Can someone confirm that I did the vowel-eliding thing right? I'm
// not 100% sure I'm doing it in all the right places, or completely
// correctly. Also, I don't have information for negatives and decimals,
// and I lack words fror values from 1,000,000 on up.
SpelloutRules {
// main rule set. Follows the patterns of the preceding rule sets,
// except that the final vowel is omitted from words ending in
// vowels when they are followed by another word; instead, we have
// separate rule sets that are identical to this one, except that
// all the words that don't begin with a vowel have a vowel tacked
// onto them at the front. A word ending in a vowel calls a
// substitution that will supply that vowel, unless that vowel is to
// be elided.
"%main:\n"
" -x: meno >>;\n"
" x.x: << virgola >>;\n"
" zero; uno; due; tre; quattro; cinque; sei; sette; otto;\n"
" nove;\n"
" dieci; undici; dodici; tredici; quattordici; quindici; sedici;\n"
" diciasette; diciotto; diciannove;\n"
" 20: venti; vent>%%with-i>;\n"
" 30: trenta; trent>%%with-i>;\n"
" 40: quaranta; quarant>%%with-a>;\n"
" 50: cinquanta; cinquant>%%with-a>;\n"
" 60: sessanta; sessant>%%with-a>;\n"
" 70: settanta; settant>%%with-a>;\n"
" 80: ottanta; ottant>%%with-a>;\n"
" 90: novanta; novant>%%with-a>;\n"
" 100: cento; cent[>%%with-o>];\n"
" 200: <<cento; <<cent[>%%with-o>];\n"
" 1000: mille; mill[>%%with-i>];\n"
" 2000: <<mila; <<mil[>%%with-a>];\n"
" 100,000>>: <<mila[ >>];\n"
" 1,000,000: =#,##0= (incomplete data);\n"
"%%with-a:\n"
" azero; uno; adue; atre; aquattro; acinque; asei; asette; otto;\n"
" anove;\n"
" adieci; undici; adodici; atredici; aquattordici; aquindici; asedici;\n"
" adiciasette; adiciotto; adiciannove;\n"
" 20: aventi; avent>%%with-i>;\n"
" 30: atrenta; atrent>%%with-i>;\n"
" 40: aquaranta; aquarant>%%with-a>;\n"
" 50: acinquanta; acinquant>%%with-a>;\n"
" 60: asessanta; asessant>%%with-a>;\n"
" 70: asettanta; asettant>%%with-a>;\n"
" 80: ottanta; ottant>%%with-a>;\n"
" 90: anovanta; anovant>%%with-a>;\n"
" 100: acento; acent[>%%with-o>];\n"
" 200: <%%with-a<cento; <%%with-a<cent[>%%with-o>];\n"
" 1000: amille; amill[>%%with-i>];\n"
" 2000: <%%with-a<mila; <%%with-a<mil[>%%with-a>];\n"
" 100,000: =%main=;\n"
"%%with-i:\n"
" izero; uno; idue; itre; iquattro; icinque; isei; isette; otto;\n"
" inove;\n"
" idieci; undici; idodici; itredici; iquattordici; iquindici; isedici;\n"
" idiciasette; idiciotto; idiciannove;\n"
" 20: iventi; ivent>%%with-i>;\n"
" 30: itrenta; itrent>%%with-i>;\n"
" 40: iquaranta; iquarant>%%with-a>;\n"
" 50: icinquanta; icinquant>%%with-a>;\n"
" 60: isessanta; isessant>%%with-a>;\n"
" 70: isettanta; isettant>%%with-a>;\n"
" 80: ottanta; ottant>%%with-a>;\n"
" 90: inovanta; inovant>%%with-a>;\n"
" 100: icento; icent[>%%with-o>];\n"
" 200: <%%with-i<cento; <%%with-i<cent[>%%with-o>];\n"
" 1000: imille; imill[>%%with-i>];\n"
" 2000: <%%with-i<mila; <%%with-i<mil[>%%with-a>];\n"
" 100,000: =%main=;\n"
"%%with-o:\n"
" ozero; uno; odue; otre; oquattro; ocinque; osei; osette; otto;\n"
" onove;\n"
" odieci; undici; ododici; otredici; oquattordici; oquindici; osedici;\n"
" odiciasette; odiciotto; odiciannove;\n"
" 20: oventi; ovent>%%with-i>;\n"
" 30: otrenta; otrent>%%with-i>;\n"
" 40: oquaranta; oquarant>%%with-a>;\n"
" 50: ocinquanta; ocinquant>%%with-a>;\n"
" 60: osessanta; osessant>%%with-a>;\n"
" 70: osettanta; osettant>%%with-a>;\n"
" 80: ottanta; ottant>%%with-a>;\n"
" 90: onovanta; onovant>%%with-a>;\n"
" 100: ocento; ocent[>%%with-o>];\n"
" 200: <%%with-o<cento; <%%with-o<cent[>%%with-o>];\n"
" 1000: omille; omill[>%%with-i>];\n"
" 2000: <%%with-o<mila; <%%with-o<mil[>%%with-a>];\n"
" 100,000: =%main=;\n"
}
}

View File

@ -859,4 +859,47 @@ ja {
"JST",
}
}
//------------------------------------------------------------
// Rule Based Number Format Support
//------------------------------------------------------------
// * Spellout rules for Japanese. In Japanese, there really isn't any
// * distinction between a number written out in digits and a number
// * written out in words: the ideographic characters are both digits
// * and words. This rule set provides two variants: %traditional
// * uses the traditional CJK numerals (which are also used in China
// * and Korea). %financial uses alternate ideographs for many numbers
// * that are harder to alter than the traditional numerals (one could
// * fairly easily change a one to
// * a three just by adding two strokes, for example). This is also done in
// * the other countries using Chinese idographs, but different ideographs
// * are used in those places.
// Can someone supply me with the right fraud-proof ideographs for
// Simplified and Traditional Chinese, and for Korean? Can someone
// supply me with information on negatives and decimals?
SpelloutRules {
"%financial:\n"
" \u96f6; \u58f1; \u5f10; \u53c2; \u56db; \u4f0d; \u516d; \u4e03; \u516b; \u4e5d;\n"
" \u62fe[>>];\n"
" 20: <<\u62fe[>>];\n"
" 100: <<\u767e[>>];\n"
" 1000: <<\u5343[>>];\n"
" 10,000: <<\u4e07[>>];\n"
" 100,000,000: <<\u5104[>>];\n"
" 1,000,000,000,000: <<\u5146[>>];\n"
" 10,000,000,000,000,000: =#,##0=;\n"
"%traditional:\n"
" \u96f6; \u4e00; \u4e8c; \u4e09; \u56db; \u4e94; \u516d; \u4e03; \u516b; \u4e5d;\n"
" \u5341[>>];\n"
" 20: <<\u5341[>>];\n"
" 100: <<\u767e[>>];\n"
" 1000: <<\u5343[>>];\n"
" 10,000: <<\u4e07[>>];\n"
" 100,000,000: <<\u5104[>>];\n"
" 1,000,000,000,000: <<\u5146[>>];\n"
" 10,000,000,000,000,000: =#,##0=;"
}
}

View File

@ -108,4 +108,32 @@ nl {
"Latg", // ISO 15924 Name
}
//------------------------------------------------------------
// Rule Based Number Format Support
//------------------------------------------------------------
// * Spellout rules for Dutch
// can someone supply me with information on negatives and decimals?
SpelloutRules {
" -x: min >>;\n"
"x.x: << komma >>;\n"
"(zero?); een; twee; drie; vier; vijf; zes; zeven; acht; negen;\n"
"tien; elf; twaalf; dertien; veertien; vijftien; zestien;\n"
"zeventien; achtien; negentien;\n"
"20: [>> en ]twintig;\n"
"30: [>> en ]dertig;\n"
"40: [>> en ]veertig;\n"
"50: [>> en ]vijftig;\n"
"60: [>> en ]zestig;\n"
"70: [>> en ]zeventig;\n"
"80: [>> en ]tachtig;\n"
"90: [>> en ]negentig;\n"
"100: << honderd[ >>];\n"
"1000: << duizend[ >>];\n"
"1,000,000: << miljoen[ >>];\n"
"1,000,000,000: << biljoen[ >>];\n"
"1,000,000,000,000: =#,##0="
}
}

View File

@ -1180,11 +1180,221 @@ root {
"Anchorage",
}
}
LocaleScript{
"Latin",
"Latn",
"Latf",
"Latg"
}
//------------------------------------------------------------
// Rule Based Number Format Support
//------------------------------------------------------------
// * Spellout rules for U.S. English. This rule set has two variants:
// * %simplified is a set of rules showing the simple method of spelling
// * out numbers in English: 289 is formatted as "two hundred eighty-nine".
// * %default uses a more complicated algorithm to format
// * numbers in a more natural way: 289 is formatted as "two hundred AND
// * eighty-nine" and commas are inserted between the thousands groups for
// * values above 100,000.
SpelloutRules {
// This rule set shows the normal simple formatting rules for English
"%simplified:\n"
// negative number rule. This rule is used to format negative
// numbers. The result of formatting the number's absolute
// value is placed where the >> is.
" -x: minus >>;\n"
// faction rule. This rule is used for formatting numbers
// with fractional parts. The result of formatting the
// number's integral part is substituted for the <<, and
// the result of formatting the number's fractional part
// (one digit at a time, e.g., 0.123 is "zero point one two
// three") replaces the >>.
" x.x: << point >>;\n"
// the rules for the values from 0 to 19 are simply the
// words for those numbers
" zero; one; two; three; four; five; six; seven; eight; nine;\n"
" ten; eleven; twelve; thirteen; fourteen; fifteen; sixteen;\n"
" seventeen; eighteen; nineteen;\n"
// beginning at 20, we use the >> to mark the position where
// the result of formatting the number's ones digit. Thus,
// we only need a new rule at every multiple of 10. Text in
// backets is omitted if the value being formatted is an
// even multiple of 10.
" 20: twenty[->>];\n"
" 30: thirty[->>];\n"
" 40: forty[->>];\n"
" 50: fifty[->>];\n"
" 60: sixty[->>];\n"
" 70: seventy[->>];\n"
" 80: eighty[->>];\n"
" 90: ninety[->>];\n"
// beginning at 100, we can use << to mark the position where
// the result of formatting the multiple of 100 is to be
// inserted. Notice also that the meaning of >> has shifted:
// here, it refers to both the ones place and the tens place.
// The meanings of the << and >> tokens depend on the base value
// of the rule. A rule's divisor is (usually) the highest
// power of 10 that is less than or equal to the rule's base
// value. The value being formatted is divided by the rule's
// divisor, and the integral quotient is used to get the text
// for <<, while the remainder is used to produce the text
// for >>. Again, text in brackets is omitted if the value
// being formatted is an even multiple of the rule's divisor
// (in this case, an even multiple of 100)
" 100: << hundred[ >>];\n"
// The rules for the higher numbers work the same way as the
// rule for 100: Again, the << and >> tokens depend on the
// rule's divisor, which for all these rules is also the rule's
// base value. To group by thousand, we simply don't have any
// rules between 1,000 and 1,000,000.
" 1000: << thousand[ >>];\n"
" 1,000,000: << million[ >>];\n"
" 1,000,000,000: << billion[ >>];\n"
" 1,000,000,000,000: << trillion[ >>];\n"
// overflow rule. This rule specifies that values of a
// quadrillion or more are shown in numerals rather than words.
// The == token means to format (with new rules) the value
// being formatted by this rule and place the result where
// the == is. The #,##0 inside the == signs is a
// DecimalFormat pattern. It specifies that the value should
// be formatted with a DecimalFormat object, and that it
// should be formatted with no decimal places, at least one
// digit, and a thousands separator.
" 1,000,000,000,000,000: =#,##0=;\n"
// %default is a more elaborate form of %simplified; It is basically
// the same, except that it introduces "and" before the ones digit
// when appropriate (basically, between the tens and ones digits) and
// separates the thousands groups with commas in values over 100,000.
"%default:\n"
// negative-number and fraction rules. These are the same
// as those for %simplified, but have to be stated here too
// because this is an entry point
" -x: minus >>;\n"
" x.x: << point >>;\n"
// just use %simplified for values below 100
" =%simplified=;\n"
// for values from 100 to 9,999 use %%and to decide whether or
// not to interpose the "and"
" 100: << hundred[ >%%and>];\n"
" 1000: << thousand[ >%%and>];\n"
// for values of 100,000 and up, use %%commas to interpose the
// commas in the right places (and also to interpose the "and")
" 100,000>>: << thousand[>%%commas>];\n"
" 1,000,000: << million[>%%commas>];\n"
" 1,000,000,000: << billion[>%%commas>];\n"
" 1,000,000,000,000: << trillion[>%%commas>];\n"
" 1,000,000,000,000,000: =#,##0=;\n"
// if the value passed to this rule set is greater than 100, don't
// add the "and"; if it's less than 100, add "and" before the last
// digits
"%%and:\n"
" and =%default=;\n"
" 100: =%default=;\n"
// this rule set is used to place the commas
"%%commas:\n"
// for values below 100, add "and" (the apostrophe at the
// beginning is ignored, but causes the space that follows it
// to be significant: this is necessary because the rules
// calling %%commas don't put a space before it)
" ' and =%default=;\n"
// put a comma after the thousands (or whatever preceded the
// hundreds)
" 100: , =%default=;\n"
// put a comma after the millions (or whatever precedes the
// thousands)
" 1000: , <%default< thousand, >%default>;\n"
// and so on...
" 1,000,000: , =%default=;"
// %%lenient-parse isn't really a set of number formatting rules;
// it's a set of collation rules. Lenient-parse mode uses a Collator
// object to compare fragments of the text being parsed to the text
// in the rules, allowing more leeway in the matching text. This set
// of rules tells the formatter to ignore commas when parsing (it
// already ignores spaces, which is why we refer to the space; it also
// ignores hyphens, making "twenty one" and "twenty-one" parse
// identically)
"%%lenient-parse:\n"
// " & ' ' , ',' ;\n"
" &\u0000 << ' ' << ',' << '-'; \n"
}
// * This rule set adds an English ordinal abbreviation to the end of a
// * number. For example, 2 is formatted as "2nd". Parsing doesn't work with
// * this rule set. To parse, use DecimalFormat on the numeral.
OrdinalRules {
// this rule set formats the numeral and calls %%abbrev to
// supply the abbreviation
"%main:\n"
" =#,##0==%%abbrev=;\n"
// this rule set supplies the abbreviation
"%%abbrev:\n"
// the abbreviations. Everything from 4 to 19 ends in "th"
" th; st; nd; rd; th;\n"
// at 20, we begin repeating the cycle every 10 (13 is "13th",
// but 23 and 33 are "23rd" and "33rd") We do this by
// ignoring all bug the ones digit in selecting the abbreviation
" 20: >>;\n"
// at 100, we repeat the whole cycle by considering only the
// tens and ones digits in picking an abbreviation
" 100: >>;\n"
}
// * This rule set formats a number of seconds in sexagesimal notation
// * (i.e., hours, minutes, and seconds). %with-words formats it with
// * words (3,740 is "1 hour, 2 minutes, 20 seconds") and %in-numerals
// * formats it entirely in numerals (3,740 is "1:02:20").
DurationRules {
// main rule set for formatting with words
"%with-words:\n"
// take care of singular and plural forms of "second"
" 0 seconds; 1 second; =0= seconds;\n"
// use %%min to format values greater than 60 seconds
" 60/60: <%%min<[, >>];\n"
// use %%hr to format values greater than 3,600 seconds
// (the ">>>" below causes us to see the number of minutes
// when when there are zero minutes)
" 3600/60: <%%hr<[, >>>];\n"
// this rule set takes care of the singular and plural forms
// of "minute"
"%%min:\n"
" 0 minutes; 1 minute; =0= minutes;\n"
// this rule set takes care of the singular and plural forms
// of "hour"
"%%hr:\n"
" 0 hours; 1 hour; =0= hours;\n"
// main rule set for formatting in numerals
"%in-numerals:\n"
// values below 60 seconds are shown with "sec."
" =0= sec.;\n"
// higher values are shown with colons: %%min-sec is used for
// values below 3,600 seconds...
" 60: =%%min-sec=;\n"
// ...and %%hr-min-sec is used for values of 3,600 seconds
// and above
" 3600: =%%hr-min-sec=;\n"
// this rule causes values of less than 10 minutes to show without
// a leading zero
"%%min-sec:\n"
" 0: :=00=;\n"
" 60/60: <0<>>;\n"
// this rule set is used for values of 3,600 or more. Minutes are always
// shown, and always shown with two digits
"%%hr-min-sec:\n"
" 0: :=00=;\n"
" 60/60: <00<>>;\n"
" 3600/60: <#,##0<:>>>;\n"
// the lenient-parse rules allow several different characters to be used
// as delimiters between hours, minutes, and seconds
"%%lenient-parse:\n"
" & ':' = '.' = ' ' = '-';\n"
}
}

View File

@ -122,4 +122,44 @@ ru {
"Cyrs" // ISO 15924 Name
}
//------------------------------------------------------------
// Rule Based Number Format Support
//------------------------------------------------------------
// * Spellout rules for Russian.
// Can someone supply me with information on negatives and decimals?
// How about words for billions and trillions?
SpelloutRules {
"\u043d\u043e\u043b\u044c; \u043e\u0434\u0438\u043d; \u0434\u0432\u0430; \u0442\u0440\u0438; "
"\u0447\u0435\u0442\u044b\u0440\u0435; \u043f\u044f\u0442; \u0448\u0435\u0441\u0442; "
"\u0441\u0435\u043c\u044c; \u0432\u043e\u0441\u0435\u043c\u044c; \u0434\u0435\u0432\u044f\u0442;\n"
"10: \u0434\u0435\u0441\u044f\u0442; "
"\u043e\u0434\u0438\u043d\u043d\u0430\u0434\u0446\u0430\u0442\u044c;\n"
"\u0434\u0432\u0435\u043d\u043d\u0430\u0434\u0446\u0430\u0442\u044c; "
"\u0442\u0440\u0438\u043d\u0430\u0434\u0446\u0430\u0442\u044c; "
"\u0447\u0435\u0442\u044b\u0440\u043d\u0430\u0434\u0446\u0430\u0442\u044c;\n"
"15: \u043f\u044f\u0442\u043d\u0430\u0434\u0446\u0430\u0442\u044c; "
"\u0448\u0435\u0441\u0442\u043d\u0430\u0434\u0446\u0430\u0442\u044c; "
"\u0441\u0435\u043c\u043d\u0430\u0434\u0446\u0430\u0442\u044c; "
"\u0432\u043e\u0441\u0435\u043c\u043d\u0430\u0434\u0446\u0430\u0442\u044c; "
"\u0434\u0435\u0432\u044f\u0442\u043d\u0430\u0434\u0446\u0430\u0442\u044c;\n"
"20: \u0434\u0432\u0430\u0434\u0446\u0430\u0442\u044c[ >>];\n"
"30: \u0442\u0440\u043b\u0434\u0446\u0430\u0442\u044c[ >>];\n"
"40: \u0441\u043e\u0440\u043e\u043a[ >>];\n"
"50: \u043f\u044f\u0442\u044c\u0434\u0435\u0441\u044f\u0442[ >>];\n"
"60: \u0448\u0435\u0441\u0442\u044c\u0434\u0435\u0441\u044f\u0442[ >>];\n"
"70: \u0441\u0435\u043c\u044c\u0434\u0435\u0441\u044f\u0442[ >>];\n"
"80: \u0432\u043e\u0441\u0435\u043c\u044c\u0434\u0435\u0441\u044f\u0442[ >>];\n"
"90: \u0434\u0435\u0432\u044f\u043d\u043e\u0441\u0442\u043e[ >>];\n"
"100: \u0441\u0442\u043e[ >>];\n"
"200: << \u0441\u0442\u043e[ >>];\n"
"1000: \u0442\u044b\u0441\u044f\u0447\u0430[ >>];\n"
"2000: << \u0442\u044b\u0441\u044f\u0447\u0430[ >>];\n"
"1,000,000: \u043c\u0438\u043b\u043b\u0438\u043e\u043d[ >>];\n"
"2,000,000: << \u043c\u0438\u043b\u043b\u0438\u043e\u043d[ >>];\n"
"1,000,000,000: =#,##0=;"
}
}

View File

@ -118,4 +118,36 @@ sv {
"Latf", // ISO 15924 Name
"Latg", // ISO 15924 Name
}
//------------------------------------------------------------
// Rule Based Number Format Support
//------------------------------------------------------------
// * Spellout rules for Swedish.
// can someone supply me with information on negatives and decimals?
SpelloutRules {
"noll; ett; tv\u00e5; tre; fyra; fem; sex; sjo; \u00e5tta; nio;\n"
"tio; elva; tolv; tretton; fjorton; femton; sexton; sjutton; arton; nitton;\n"
"20: tjugo[>>];\n"
"30: trettio[>>];\n"
"40: fyrtio[>>];\n"
"50: femtio[>>];\n"
"60: sextio[>>];\n"
"70: sjuttio[>>];\n"
"80: \u00e5ttio[>>];\n"
"90: nittio[>>];\n"
"100: hundra[>>];\n"
"200: <<hundra[>>];\n"
"1000: tusen[ >>];\n"
"2000: << tusen[ >>];\n"
"1,000,000: en miljon[ >>];\n"
"2,000,000: << miljon[ >>];\n"
"1,000,000,000: en miljard[ >>];\n"
"2,000,000,000: << miljard[ >>];\n"
"1,000,000,000,000: en biljon[ >>];\n"
"2,000,000,000,000: << biljon[ >>];\n"
"1,000,000,000,000,000: =#,##0="
}
}

View File

@ -245,4 +245,43 @@ th {
"Thai", // Script Name
"Thai" // ISO 15924 Name
}
//------------------------------------------------------------
// Rule Based Number Format Support
//------------------------------------------------------------
// Spellout rules for Thai. Data from Suwit Srivilairith, IBM Thailand
SpelloutRules {
"%default:\n"
" -x: \u0e25\u0e1a>>;\n"
" x.x: <<\u0e08\u0e38\u0e14>>>;\n"
" \u0e28\u0e39\u0e19\u0e22\u0e4c; \u0e2b\u0e19\u0e36\u0e48\u0e07; \u0e2a\u0e2d\u0e07; \u0e2a\u0e32\u0e21;\n"
" \u0e2a\u0e35\u0e48; \u0e2b\u0e49\u0e32; \u0e2b\u0e01; \u0e40\u0e08\u0e47\u0e14; \u0e41\u0e1b\u0e14;\n"
" \u0e40\u0e01\u0e49\u0e32; \u0e2a\u0e34\u0e1a; \u0e2a\u0e34\u0e1a\u0e40\u0e2d\u0e47\u0e14;\n"
" \u0e2a\u0e34\u0e1a\u0e2a\u0e2d\u0e07; \u0e2a\u0e34\u0e1a\u0e2a\u0e32\u0e21;\n"
" \u0e2a\u0e34\u0e1a\u0e2a\u0e35\u0e48; \u0e2a\u0e34\u0e1a\u0e2b\u0e49\u0e32;\n"
" \u0e2a\u0e34\u0e1a\u0e2b\u0e01; \u0e2a\u0e34\u0e1a\u0e40\u0e08\u0e47\u0e14;\n"
" \u0e2a\u0e34\u0e1a\u0e41\u0e1b\u0e14; \u0e2a\u0e34\u0e1a\u0e40\u0e01\u0e49\u0e32;\n"
" 20: \u0e22\u0e35\u0e48\u0e2a\u0e34\u0e1a[>%%alt-ones>];\n"
" 30: \u0e2a\u0e32\u0e21\u0e2a\u0e34\u0e1a[>%%alt-ones>];\n"
" 40: \u0e2a\u0e35\u0e48\u0e2a\u0e34\u0e1a[>%%alt-ones>];\n"
" 50: \u0e2b\u0e49\u0e32\u0e2a\u0e34\u0e1a[>%%alt-ones>];\n"
" 60: \u0e2b\u0e01\u0e2a\u0e34\u0e1a[>%%alt-ones>];\n"
" 70: \u0e40\u0e08\u0e47\u0e14\u0e2a\u0e34\u0e1a[>%%alt-ones>];\n"
" 80: \u0e41\u0e1b\u0e14\u0e2a\u0e34\u0e1a[>%%alt-ones>];\n"
" 90: \u0e40\u0e01\u0e49\u0e32\u0e2a\u0e34\u0e1a[>%%alt-ones>];\n"
" 100: <<\u0e23\u0e49\u0e2d\u0e22[>>];\n"
" 1000: <<\u0e1e\u0e31\u0e19[>>];\n"
" 10000: <<\u0e2b\u0e21\u0e37\u0e48\u0e19[>>];\n"
" 100000: <<\u0e41\u0e2a\u0e19[>>];\n"
" 1,000,000: <<\u0e25\u0e49\u0e32\u0e19[>>];\n"
" 1,000,000,000: <<\u0e1e\u0e31\u0e19\u0e25\u0e49\u0e32\u0e19[>>];\n"
" 1,000,000,000,000: <<\u0e25\u0e49\u0e32\u0e19\u0e25\u0e49\u0e32\u0e19[>>];\n"
" 1,000,000,000,000,000: =#,##0=;\n"
"%%alt-ones:\n"
" \u0e28\u0e39\u0e19\u0e22\u0e4c;\n"
" \u0e40\u0e2d\u0e47\u0e14;\n"
" =%default=;\n";
}
}

View File

@ -519,4 +519,58 @@ de {
"Latf", // ISO 15924 Name
"Latg", // ISO 15924 Name
}
//------------------------------------------------------------
// Rule Based Number Format Support
//------------------------------------------------------------
// * RuleBasedNumberFormat data for German
// again, I'm not 100% sure of these rules. I think both "hundert" and
// "einhundert" are correct or 100, but I'm not sure which is preferable
// in situations where this framework is likely to be used. Also, is it
// really true that numbers are run together into compound words all the
// time?
SpelloutRules {
// 1 is "eins" when by itself, but turns into "ein" in most
// combinations
"%alt-ones:\n"
" -x: minus >>;\n"
" x.x: << komma >>;\n"
" null; eins; =%%main=;\n"
"%%main:\n"
// words for numbers from 0 to 12. Notice that the values
// from 13 to 19 can derived algorithmically, unlike in most
// other languages
" null; ein; zwei; drei; vier; f\u00fcnf; sechs; sieben; acht; neun;\n"
" zehn; elf; zw\u00f6lf; >>zehn;\n"
// rules for the multiples of 10. Notice that the ones digit
// goes on the front
" 20: [>>und]zwanzig;\n"
" 30: [>>und]drei\u00dfig;\n"
" 40: [>>und]vierzig;\n"
" 50: [>>und]f\u00fcnfzig;\n"
" 60: [>>und]sechzig;\n"
" 70: [>>und]siebzig;\n"
" 80: [>>und]achtzig;\n"
" 90: [>>und]neunzig;\n"
" 100: hundert[>%alt-ones>];\n"
" 200: <<hundert[>%alt-ones>];\n"
" 1000: tausend[>%alt-ones>];\n"
" 2000: <<tausend[>%alt-ones>];\n"
" 1,000,000: eine Million[ >%alt-ones>];\n"
" 2,000,000: << Millionen[ >%alt-ones>];\n"
" 1,000,000,000: eine Milliarde[ >%alt-ones>];\n"
" 2,000,000,000: << Milliarden[ >%alt-ones>];\n"
" 1,000,000,000,000: eine Billion[ >%alt-ones>];\n"
" 2,000,000,000,000: << Billionen[ >%alt-ones>];\n"
" 1,000,000,000,000,000: =#,##0=;"
"%%lenient-parse:\n"
" &\u0000 << ' ' << '-'\n"
" & ae , \u00e4 & ae , \u00c4\n"
" & oe , \u00f6 & oe , \u00d6\n"
" & ue , \u00fc & ue , \u00dc\n"
}
}

View File

@ -116,4 +116,53 @@ el {
"Greek",// Script Name
"Grek" // ISO 15924 Name
}
//------------------------------------------------------------
// Rule Based Number Format Support
//------------------------------------------------------------
// * Spellout rules for Greek. Again in Greek we have to supply the words
// * for the multiples of 100 because they can't be derived algorithmically.
// * Also, the tens dgit changes form when followed by a ones digit: an
// * accent mark disappears from the tens digit and moves to the ones digit.
// * Therefore, instead of using the [] notation, we actually have to use
// * two separate rules for each multiple of 10 to show the two forms of
// * the word.
// Can someone supply me with information on negatives and decimals?
// I'm also missing the word for zero. Can someone clue me in?
SpelloutRules {
"zero (incomplete data); \u03ad\u03bd\u03b1; \u03b4\u03cd\u03bf; \u03b4\u03c1\u03af\u03b1; "
"\u03c4\u03ad\u03c3\u03c3\u03b5\u03c1\u03b1; \u03c0\u03ad\u03bd\u03c4\u03b5; "
"\u03ad\u03be\u03b9; \u03b5\u03c0\u03c4\u03ac; \u03bf\u03ba\u03c4\u03ce; "
"\u03b5\u03bd\u03bd\u03ad\u03b1;\n"
"10: \u03b4\u03ad\u03ba\u03b1; "
"\u03ad\u03bd\u03b4\u03b5\u03ba\u03b1; \u03b4\u03ce\u03b4\u03b5\u03ba\u03b1; "
"\u03b4\u03b5\u03ba\u03b1>>;\n"
"20: \u03b5\u03af\u03ba\u03bf\u03c3\u03b9; \u03b5\u03b9\u03ba\u03bf\u03c3\u03b9>>;\n"
"30: \u03c4\u03c1\u03b9\u03ac\u03bd\u03c4\u03b1; \u03c4\u03c1\u03b9\u03b1\u03bd\u03c4\u03b1>>;\n"
"40: \u03c3\u03b1\u03c1\u03ac\u03bd\u03c4\u03b1; \u03c3\u03b1\u03c1\u03b1\u03bd\u03c4\u03b1>>;\n"
"50: \u03c0\u03b5\u03bd\u03ae\u03bd\u03c4\u03b1; \u03c0\u03b5\u03bd\u03b7\u03bd\u03c4\u03b1>>;\n"
"60: \u03b5\u03be\u03ae\u03bd\u03c4\u03b1; \u03b5\u03be\u03b7\u03bd\u03c4\u03b1>>;\n"
"70: \u03b5\u03b2\u03b4\u03bf\u03bc\u03ae\u03bd\u03c4\u03b1; "
"\u03b5\u03b2\u03b4\u03bf\u03bc\u03b7\u03bd\u03c4\u03b1>>;\n"
"80: \u03bf\u03b3\u03b4\u03cc\u03bd\u03c4\u03b1; \u03bf\u03b3\u03b4\u03bf\u03bd\u03c4\u03b1>>;\n"
"90: \u03b5\u03bd\u03bd\u03b5\u03bd\u03ae\u03bd\u03c4\u03b1; "
"\u03b5\u03bd\u03bd\u03b5\u03bd\u03b7\u03bd\u03c4\u03b1>>;\n"
"100: \u03b5\u03ba\u03b1\u03c4\u03cc[\u03bd >>];\n"
"200: \u03b4\u03b9\u03b1\u03ba\u03cc\u03c3\u03b9\u03b1[ >>];\n"
"300: \u03c4\u03c1\u03b9\u03b1\u03ba\u03cc\u03c3\u03b9\u03b1[ >>];\n"
"400: \u03c4\u03b5\u03c4\u03c1\u03b1\u03ba\u03cc\u03c3\u03b9\u03b1[ >>];\n"
"500: \u03c0\u03b5\u03bd\u03c4\u03b1\u03ba\u03cc\u03c3\u03b9\u03b1[ >>];\n"
"600: \u03b5\u03be\u03b1\u03ba\u03cc\u03c3\u03b9\u03b1[ >>];\n"
"700: \u03b5\u03c0\u03c4\u03b1\u03ba\u03cc\u03c3\u03b9\u03b1[ >>];\n"
"800: \u03bf\u03ba\u03c4\u03b1\u03ba\u03cc\u03c3\u03b9\u03b1[ >>];\n"
"900: \u03b5\u03bd\u03bd\u03b9\u03b1\u03ba\u03cc\u03c3\u03b9\u03b1[ >>];\n"
"1000: \u03c7\u03af\u03bb\u03b9\u03b1[ >>];\n"
"2000: << \u03c7\u03af\u03bb\u03b9\u03b1[ >>];\n"
"1,000,000: << \u03b5\u03ba\u03b1\u03c4\u03bf\u03bc\u03bc\u03b9\u03cc\u03c1\u03b9\u03bf[ >>];\n"
"1,000,000,000: << \u03b4\u03b9\u03c3\u03b5\u03ba\u03b1\u03c4\u03bf\u03bc\u03bc\u03b9\u03cc\u03c1\u03b9\u03bf[ >>];\n"
"1,000,000,000,000: =#,##0="
}
}

View File

@ -233,4 +233,11 @@ en {
"Latf", // ISO 15924 Name
"Latg", // ISO 15924 Name
}
//------------------------------------------------------------
// Rule Based Number Format Support
//------------------------------------------------------------
// inherited from root
}

View File

@ -49,4 +49,70 @@ en_GB {
"BST",
}
}
//------------------------------------------------------------
// Rule Based Number Format Support
//------------------------------------------------------------
// * Spellout rules for U.K. English. U.K. English has one significant
// * difference from U.S. English: the names for values of 1,000,000,000
// * and higher. In American English, each successive "-illion" is 1,000
// * times greater than the preceding one: 1,000,000,000 is "one billion"
// * and 1,000,000,000,000 is "one trillion." In British English, each
// * successive "-illion" is one million times greater than the one before:
// * "one billion" is 1,000,000,000,000 (or what Americans would call a
// * "trillion"), and "one trillion" is 1,000,000,000,000,000,000.
// * 1,000,000,000 in British English is "one thousand million." (This
// * value is sometimes called a "milliard," but this word seems to have
// * fallen into disuse.)
// Could someone please correct me if I'm wrong about "milliard" falling
// into disuse, or have missed any other details of how large numbers
// are rendered. Also, could someone please provide me with information
// on which other English-speaking countries use which system? Right now,
// I'm assuming that the U.S. system is used in Canada and that all the
// other English-speaking countries follow the British system. Can
// someone out there confirm this?
SpelloutRules {
"%simplified:\n"
" -x: minus >>;\n"
" x.x: << point >>;\n"
" zero; one; two; three; four; five; six; seven; eight; nine;\n"
" ten; eleven; twelve; thirteen; fourteen; fifteen; sixteen;\n"
" seventeen; eighteen; nineteen;\n"
" 20: twenty[->>];\n"
" 30: thirty[->>];\n"
" 40: forty[->>];\n"
" 50: fifty[->>];\n"
" 60: sixty[->>];\n"
" 70: seventy[->>];\n"
" 80: eighty[->>];\n"
" 90: ninety[->>];\n"
" 100: << hundred[ >>];\n"
" 1000: << thousand[ >>];\n"
" 1,000,000: << million[ >>];\n"
" 1,000,000,000,000: << billion[ >>];\n"
" 1,000,000,000,000,000: =#,##0=;\n"
"%default:\n"
" -x: minus >>;\n"
" x.x: << point >>;\n"
" =%simplified=;\n"
" 100: << hundred[ >%%and>];\n"
" 1000: << thousand[ >%%and>];\n"
" 100,000>>: << thousand[>%%commas>];\n"
" 1,000,000: << million[>%%commas>];\n"
" 1,000,000,000,000: << billion[>%%commas>];\n"
" 1,000,000,000,000,000: =#,##0=;\n"
"%%and:\n"
" and =%default=;\n"
" 100: =%default=;\n"
"%%commas:\n"
" ' and =%default=;\n"
" 100: , =%default=;\n"
" 1000: , <%default< thousand, >%default>;\n"
" 1,000,000: , =%default=;"
"%%lenient-parse:\n"
" & ' ' , ',' ;\n"
}
}

View File

@ -140,4 +140,31 @@ eo {
"Latf", // ISO 15924 Name
"Latg", // ISO 15924 Name
}
//------------------------------------------------------------
// Rule Based Number Format Support
//------------------------------------------------------------
// data from 'Esperanto-programita 1' courtesy of Markus Scherer
SpelloutRules {
"-x: minus >>;\n"
"x.x: << komo >>;\n"
"nulo; unu; du; tri; kvar; kvin; ses; sep; ok; na\u016d;\n"
"10: dek[ >>];\n"
"20: <<dek[ >>];\n"
"100: cent[ >>];\n"
"200: <<cent[ >>];\n"
"1000: mil[ >>];\n"
"2000: <<mil[ >>];\n"
"10000: dekmil[ >>];\n"
"11000>: << mil[ >>];\n"
"1,000,000: miliono[ >>];\n"
"2,000,000: << milionoj[ >>];\n"
"1,000,000,000: miliardo[ >>];\n"
"2,000,000,000: << miliardoj[ >>];\n"
"1,000,000,000,000: biliono[ >>];\n"
"2,000,000,000,000: << bilionoj[ >>];\n"
"1,000,000,000,000,000: =#,##0=;\n"
}
}

View File

@ -258,4 +258,69 @@ es {
"Latf", // ISO 15924 Name
"Latg", // ISO 15924 Name
}
//------------------------------------------------------------
// Rule Based Number Format Support
//------------------------------------------------------------
// * Spellout rules for Spanish. The Spanish rules are quite similar to
// * the English rules, but there are some important differences:
// * First, we have to provide separate rules for most of the twenties
// * because the ones digit frequently picks up an accent mark that it
// * doesn't have when standing alone. Second, each multiple of 100 has
// * to be specified separately because the multiplier on 100 very often
// * changes form in the contraction: 500 is "quinientos," not
// * "cincocientos." In addition, the word for 100 is "cien" when
// * standing alone, but changes to "ciento" when followed by more digits.
// * There also some other differences.
// The Spanish rules are incomplete. I'm missing information on negative
// numbers and numbers with fractional parts. I also don't have
// information on numbers higher than the millions.
SpelloutRules {
// negative-number and fraction rules
"-x: menos >>;\n"
"x.x: << punto >>;\n"
// words for values from 0 to 19
"cero; uno; dos; tres; cuatro; cinco; seis; siete; ocho; nueve;\n"
"diez; once; doce; trece; catorce; quince; diecis\u00e9is;\n"
" diecisiete; dieciocho; diecinueve;\n"
// words for values from 20 to 29 (necessary because the ones digit
// often picks up an accent mark it doesn't have when standing alone)
"veinte; veintiuno; veintid\u00f3s; veintitr\u00e9s; veinticuatro;\n"
" veinticinco; veintis\u00e9is; veintisiete; veintiocho;\n"
" veintinueve;\n"
// words for multiples of 10 (notice that the tens digit is separated
// from the ones digit by the word "y".)
"30: treinta[ y >>];\n"
"40: cuarenta[ y >>];\n"
"50: cincuenta[ y >>];\n"
"60: sesenta[ y >>];\n"
"70: setenta[ y >>];\n"
"80: ochenta[ y >>];\n"
"90: noventa[ y >>];\n"
// 100 by itself is "cien," but 100 followed by something is "cineto"
"100: cien;\n"
"101: ciento >>;\n"
// words for multiples of 100 (must be stated because they're
// rarely simple concatenations)
"200: doscientos[ >>];\n"
"300: trescientos[ >>];\n"
"400: cuatrocientos[ >>];\n"
"500: quinientos[ >>];\n"
"600: seiscientos[ >>];\n"
"700: setecientos[ >>];\n"
"800: ochocientos[ >>];\n"
"900: novecientos[ >>];\n"
// for 1,000, the multiplier on "mil" is omitted: 2,000 is "dos mil,"
// but 1,000 is just "mil."
"1000: mil[ >>];\n"
"2000: << mil[ >>];\n"
// 1,000,000 is "un millon," not "uno millon"
"1,000,000: un mill\u00f3n[ >>];\n"
"2,000,000: << mill\u00f3n[ >>];\n"
// overflow rule
"1,000,000,000: =#,##0= (incomplete data);"
}
}

View File

@ -190,10 +190,73 @@ fr {
"Latf", // ISO 15924 Name
"Latg", // ISO 15924 Name
}
//------------------------------------------------------------
// Rule Based Number Format Support
//------------------------------------------------------------
// * Spellout rules for French. French adds some interesting quirks of its
// * own: 1) The word "et" is interposed between the tens and ones digits,
// * but only if the ones digit if 1: 20 is "vingt," and 2 is "vingt-deux,"
// * but 21 is "vingt-et-un." 2) There are no words for 70, 80, or 90.
// * "quatre-vingts" ("four twenties") is used for 80, and values proceed
// * by score from 60 to 99 (e.g., 73 is "soixante-treize" ["sixty-thirteen"]).
// * Numbers from 1,100 to 1,199 are rendered as hundreds rather than
// * thousands: 1,100 is "onze cents" ("eleven hundred"), rather than
// * "mille cent" ("one thousand one hundred")
SpelloutRules {
// the main rule set
"%main:\n"
" -x: moins >>;\n"
" x.x: << virgule >>;\n"
// words for numbers from 0 to 10
" z\u00e9ro; un; deux; trois; quatre; cinq; six; sept; huit; neuf;\n"
" dix; onze; douze; treize; quatorze; quinze; seize;\n"
" dix-sept; dix-huit; dix-neuf;\n"
// ords for the multiples of 10: %%alt-ones inserts "et"
// when needed
" 20: vingt[->%%alt-ones>];\n"
" 30: trente[->%%alt-ones>];\n"
" 40: quarante[->%%alt-ones>];\n"
" 50: cinquante[->%%alt-ones>];\n"
// rule for 60. The /20 causes this rule's multiplier to be
// 20 rather than 10, allowinhg us to recurse for all values
// from 60 to 79...
" 60/20: soixante[->%%alt-ones>];\n"
// ...except for 71, which must be special-cased
" 71: soixante et onze;\n"
// at 72, we have to repeat the rule for 60 to get us to 79
" 72/20: soixante->%%alt-ones>;\n"
// at 80, we state a new rule with the phrase for 80. Since
// it changes form when there's a ones digit, we need a second
// rule at 81. This rule also includes "/20," allowing it to
// be used correctly for all values up to 99
" 80: quatre-vingts; 81/20: quatre-vingt->>;\n"
// "cent" becomes plural when preceded by a multiplier, and
// the multiplier is omitted from the singular form
" 100: cent[ >>];\n"
" 200: << cents[ >>];\n"
" 1000: mille[ >>];\n"
// values from 1,100 to 1,199 are rendered as "onze cents..."
// instead of "mille cent..." The > after "1000" decreases
// the rule's exponent, causing its multiplier to be 100 instead
// of 1,000. This prevents us from getting "onze cents cent
// vingt-deux" ("eleven hundred one hundred twenty-two").
" 1100>: onze cents[ >>];\n"
// at 1,200, we go back to formating in thousands, so we
// repeat the rule for 1,000
" 1200: mille >>;\n"
// at 2,000, the multiplier is added
" 2000: << mille[ >>];\n"
" 1,000,000: << million[ >>];\n"
" 1,000,000,000: << milliarde[ >>];\n"
" 1,000,000,000,000: << billion[ >>];\n"
" 1,000,000,000,000,000: =#,##0=;\n"
// %%alt-ones is used to insert "et" when the ones digit is 1
"%%alt-ones:\n"
" ; et-un; =%main=;\n"
"%%lenient-parse:\n"
" &\u0000 << ' ' << ',' << '-';\n"
}
}

View File

@ -56,4 +56,51 @@ fr_CH {
"GMT",
}
}
//------------------------------------------------------------
// Rule Based Number Format Support
//------------------------------------------------------------
// * Spellout rules for Swiss French. Swiss French differs from French French
// * in that it does have words for 70, 80, and 90. This rule set shows them,
// * and is simpler as a result.
// again, I'm missing information on negative numbers and decimals for
// these to rule sets. Also, I'm not 100% sure about Swiss French. Is
// this correct? Is "onze cents" commonly used for 1,100 in both France
// and Switzerland? Can someone fill me in on the rules for the other
// French-speaking countries? I've heard conflicting opinions on which
// version is used in Canada, and I understand there's an alternate set
// of words for 70, 80, and 90 that is used somewhere, but I don't know
// what those words are or where they're used.
SpelloutRules {
"%main:\n"
" -x: moins >>;\n"
" x.x: << virgule >>;\n"
" z\u00e9ro; un; deux; trois; quatre; cinq; six; sept; huit; neuf;\n"
" dix; onze; douze; treize; quatorze; quinze; seize;\n"
" dix-sept; dix-huit; dix-neuf;\n"
" 20: vingt[->%%alt-ones>];\n"
" 30: trente[->%%alt-ones>];\n"
" 40: quarante[->%%alt-ones>];\n"
" 50: cinquante[->%%alt-ones>];\n"
" 60: soixante[->%%alt-ones>];\n"
// notice new words for 70, 80, and 90
" 70: septante[->%%alt-ones>];\n"
" 80: octante[->%%alt-ones>];\n"
" 90: nonante[->%%alt-ones>];\n"
" 100: cent[ >>];\n"
" 200: << cents[ >>];\n"
" 1000: mille[ >>];\n"
" 1100>: onze cents[ >>];\n"
" 1200: mille >>;\n"
" 2000: << mille[ >>];\n"
" 1,000,000: << million[ >>];\n"
" 1,000,000,000: << milliarde[ >>];\n"
" 1,000,000,000,000: << billion[ >>];\n"
" 1,000,000,000,000,000: =#,##0=;\n"
"%%alt-ones:\n"
" ; et-un; =%main=;"
}
}

View File

@ -102,4 +102,38 @@ he {
"Hebrew", // Script Name
"Hebr" // ISO 15924 Name
}
//------------------------------------------------------------
// Rule Based Number Format Support
//------------------------------------------------------------
// * Spellout rules for Hebrew. Hebrew actually has inflected forms for
// * most of the lower-order numbers. The masculine forms are shown
// * here.
// This data is woefully incomplete. Can someone fill me in on the
// various inflected forms of the numbers, which seem to be necessary
// to do Hebrew correctly? Can somone supply me with data for values
// from 1,000,000 on up? What about the word for zero? What about
// information on negatives and decimals?
SpelloutRules {
"zero (incomplete data); \u05d0\u05d4\u05d3; \u05e9\u05d2\u05d9\u05d9\u05dd; \u05e9\u05dc\u05d5\u05e9\u05d4;\n"
"4: \u05d0\u05d3\u05d1\u05e6\u05d4; \u05d7\u05d2\u05d5\u05d9\u05e9\u05d4; \u05e9\u05e9\u05d4;\n"
"7: \u05e9\u05d1\u05e6\u05d4; \u05e9\u05de\u05d5\u05d2\u05d4; \u05ea\u05e9\u05e6\u05d4;\n"
"10: \u05e6\u05e9\u05d3\u05d4[ >>];\n"
"20: \u05e6\u05e9\u05d3\u05d9\u05dd[ >>];\n"
"30: \u05e9\u05dc\u05d5\u05e9\u05d9\u05dd[ >>];\n"
"40: \u05d0\u05d3\u05d1\u05e6\u05d9\u05dd[ >>];\n"
"50: \u05d7\u05de\u05d9\u05e9\u05d9\u05dd[ >>];\n"
"60: \u05e9\u05e9\u05d9\u05dd[ >>];\n"
"70: \u05e9\u05d1\u05e6\u05d9\u05dd[ >>];\n"
"80: \u05e9\u05de\u05d5\u05d2\u05d9\u05dd[ >>];\n"
"90: \u05ea\u05e9\u05e6\u05d9\u05dd[ >>];\n"
"100: \u05de\u05d0\u05d4[ >>];\n"
"200: << \u05de\u05d0\u05d4[ >>];\n"
"1000: \u05d0\u05dc\u05e3[ >>];\n"
"2000: << \u05d0\u05dc\u05e3[ >>];\n"
"1,000,000: =#,##0= (incomplete data);"
}
}

View File

@ -117,4 +117,106 @@ it {
"Latf", // ISO 15924 Name
"Latg", // ISO 15924 Name
}
//------------------------------------------------------------
// Rule Based Number Format Support
//------------------------------------------------------------
// * Spellout rules for Italian. Like German, most Italian numbers are
// * written as single words. What makes these rules complicated is the rule
// * that says that when a word ending in a vowel and a word beginning with
// * a vowel are combined into a compound, the vowel is dropped from the
// * end of the first word: 180 is "centottanta," not "centoottanta."
// * The complexity of this rule set is to produce this behavior.
// Can someone confirm that I did the vowel-eliding thing right? I'm
// not 100% sure I'm doing it in all the right places, or completely
// correctly. Also, I don't have information for negatives and decimals,
// and I lack words fror values from 1,000,000 on up.
SpelloutRules {
// main rule set. Follows the patterns of the preceding rule sets,
// except that the final vowel is omitted from words ending in
// vowels when they are followed by another word; instead, we have
// separate rule sets that are identical to this one, except that
// all the words that don't begin with a vowel have a vowel tacked
// onto them at the front. A word ending in a vowel calls a
// substitution that will supply that vowel, unless that vowel is to
// be elided.
"%main:\n"
" -x: meno >>;\n"
" x.x: << virgola >>;\n"
" zero; uno; due; tre; quattro; cinque; sei; sette; otto;\n"
" nove;\n"
" dieci; undici; dodici; tredici; quattordici; quindici; sedici;\n"
" diciasette; diciotto; diciannove;\n"
" 20: venti; vent>%%with-i>;\n"
" 30: trenta; trent>%%with-i>;\n"
" 40: quaranta; quarant>%%with-a>;\n"
" 50: cinquanta; cinquant>%%with-a>;\n"
" 60: sessanta; sessant>%%with-a>;\n"
" 70: settanta; settant>%%with-a>;\n"
" 80: ottanta; ottant>%%with-a>;\n"
" 90: novanta; novant>%%with-a>;\n"
" 100: cento; cent[>%%with-o>];\n"
" 200: <<cento; <<cent[>%%with-o>];\n"
" 1000: mille; mill[>%%with-i>];\n"
" 2000: <<mila; <<mil[>%%with-a>];\n"
" 100,000>>: <<mila[ >>];\n"
" 1,000,000: =#,##0= (incomplete data);\n"
"%%with-a:\n"
" azero; uno; adue; atre; aquattro; acinque; asei; asette; otto;\n"
" anove;\n"
" adieci; undici; adodici; atredici; aquattordici; aquindici; asedici;\n"
" adiciasette; adiciotto; adiciannove;\n"
" 20: aventi; avent>%%with-i>;\n"
" 30: atrenta; atrent>%%with-i>;\n"
" 40: aquaranta; aquarant>%%with-a>;\n"
" 50: acinquanta; acinquant>%%with-a>;\n"
" 60: asessanta; asessant>%%with-a>;\n"
" 70: asettanta; asettant>%%with-a>;\n"
" 80: ottanta; ottant>%%with-a>;\n"
" 90: anovanta; anovant>%%with-a>;\n"
" 100: acento; acent[>%%with-o>];\n"
" 200: <%%with-a<cento; <%%with-a<cent[>%%with-o>];\n"
" 1000: amille; amill[>%%with-i>];\n"
" 2000: <%%with-a<mila; <%%with-a<mil[>%%with-a>];\n"
" 100,000: =%main=;\n"
"%%with-i:\n"
" izero; uno; idue; itre; iquattro; icinque; isei; isette; otto;\n"
" inove;\n"
" idieci; undici; idodici; itredici; iquattordici; iquindici; isedici;\n"
" idiciasette; idiciotto; idiciannove;\n"
" 20: iventi; ivent>%%with-i>;\n"
" 30: itrenta; itrent>%%with-i>;\n"
" 40: iquaranta; iquarant>%%with-a>;\n"
" 50: icinquanta; icinquant>%%with-a>;\n"
" 60: isessanta; isessant>%%with-a>;\n"
" 70: isettanta; isettant>%%with-a>;\n"
" 80: ottanta; ottant>%%with-a>;\n"
" 90: inovanta; inovant>%%with-a>;\n"
" 100: icento; icent[>%%with-o>];\n"
" 200: <%%with-i<cento; <%%with-i<cent[>%%with-o>];\n"
" 1000: imille; imill[>%%with-i>];\n"
" 2000: <%%with-i<mila; <%%with-i<mil[>%%with-a>];\n"
" 100,000: =%main=;\n"
"%%with-o:\n"
" ozero; uno; odue; otre; oquattro; ocinque; osei; osette; otto;\n"
" onove;\n"
" odieci; undici; ododici; otredici; oquattordici; oquindici; osedici;\n"
" odiciasette; odiciotto; odiciannove;\n"
" 20: oventi; ovent>%%with-i>;\n"
" 30: otrenta; otrent>%%with-i>;\n"
" 40: oquaranta; oquarant>%%with-a>;\n"
" 50: ocinquanta; ocinquant>%%with-a>;\n"
" 60: osessanta; osessant>%%with-a>;\n"
" 70: osettanta; osettant>%%with-a>;\n"
" 80: ottanta; ottant>%%with-a>;\n"
" 90: onovanta; onovant>%%with-a>;\n"
" 100: ocento; ocent[>%%with-o>];\n"
" 200: <%%with-o<cento; <%%with-o<cent[>%%with-o>];\n"
" 1000: omille; omill[>%%with-i>];\n"
" 2000: <%%with-o<mila; <%%with-o<mil[>%%with-a>];\n"
" 100,000: =%main=;\n"
}
}

View File

@ -859,4 +859,47 @@ ja {
"JST",
}
}
//------------------------------------------------------------
// Rule Based Number Format Support
//------------------------------------------------------------
// * Spellout rules for Japanese. In Japanese, there really isn't any
// * distinction between a number written out in digits and a number
// * written out in words: the ideographic characters are both digits
// * and words. This rule set provides two variants: %traditional
// * uses the traditional CJK numerals (which are also used in China
// * and Korea). %financial uses alternate ideographs for many numbers
// * that are harder to alter than the traditional numerals (one could
// * fairly easily change a one to
// * a three just by adding two strokes, for example). This is also done in
// * the other countries using Chinese idographs, but different ideographs
// * are used in those places.
// Can someone supply me with the right fraud-proof ideographs for
// Simplified and Traditional Chinese, and for Korean? Can someone
// supply me with information on negatives and decimals?
SpelloutRules {
"%financial:\n"
" \u96f6; \u58f1; \u5f10; \u53c2; \u56db; \u4f0d; \u516d; \u4e03; \u516b; \u4e5d;\n"
" \u62fe[>>];\n"
" 20: <<\u62fe[>>];\n"
" 100: <<\u767e[>>];\n"
" 1000: <<\u5343[>>];\n"
" 10,000: <<\u4e07[>>];\n"
" 100,000,000: <<\u5104[>>];\n"
" 1,000,000,000,000: <<\u5146[>>];\n"
" 10,000,000,000,000,000: =#,##0=;\n"
"%traditional:\n"
" \u96f6; \u4e00; \u4e8c; \u4e09; \u56db; \u4e94; \u516d; \u4e03; \u516b; \u4e5d;\n"
" \u5341[>>];\n"
" 20: <<\u5341[>>];\n"
" 100: <<\u767e[>>];\n"
" 1000: <<\u5343[>>];\n"
" 10,000: <<\u4e07[>>];\n"
" 100,000,000: <<\u5104[>>];\n"
" 1,000,000,000,000: <<\u5146[>>];\n"
" 10,000,000,000,000,000: =#,##0=;"
}
}

View File

@ -108,4 +108,32 @@ nl {
"Latg", // ISO 15924 Name
}
//------------------------------------------------------------
// Rule Based Number Format Support
//------------------------------------------------------------
// * Spellout rules for Dutch
// can someone supply me with information on negatives and decimals?
SpelloutRules {
" -x: min >>;\n"
"x.x: << komma >>;\n"
"(zero?); een; twee; drie; vier; vijf; zes; zeven; acht; negen;\n"
"tien; elf; twaalf; dertien; veertien; vijftien; zestien;\n"
"zeventien; achtien; negentien;\n"
"20: [>> en ]twintig;\n"
"30: [>> en ]dertig;\n"
"40: [>> en ]veertig;\n"
"50: [>> en ]vijftig;\n"
"60: [>> en ]zestig;\n"
"70: [>> en ]zeventig;\n"
"80: [>> en ]tachtig;\n"
"90: [>> en ]negentig;\n"
"100: << honderd[ >>];\n"
"1000: << duizend[ >>];\n"
"1,000,000: << miljoen[ >>];\n"
"1,000,000,000: << biljoen[ >>];\n"
"1,000,000,000,000: =#,##0="
}
}

View File

@ -1180,11 +1180,221 @@ root {
"Anchorage",
}
}
LocaleScript{
"Latin",
"Latn",
"Latf",
"Latg"
}
//------------------------------------------------------------
// Rule Based Number Format Support
//------------------------------------------------------------
// * Spellout rules for U.S. English. This rule set has two variants:
// * %simplified is a set of rules showing the simple method of spelling
// * out numbers in English: 289 is formatted as "two hundred eighty-nine".
// * %default uses a more complicated algorithm to format
// * numbers in a more natural way: 289 is formatted as "two hundred AND
// * eighty-nine" and commas are inserted between the thousands groups for
// * values above 100,000.
SpelloutRules {
// This rule set shows the normal simple formatting rules for English
"%simplified:\n"
// negative number rule. This rule is used to format negative
// numbers. The result of formatting the number's absolute
// value is placed where the >> is.
" -x: minus >>;\n"
// faction rule. This rule is used for formatting numbers
// with fractional parts. The result of formatting the
// number's integral part is substituted for the <<, and
// the result of formatting the number's fractional part
// (one digit at a time, e.g., 0.123 is "zero point one two
// three") replaces the >>.
" x.x: << point >>;\n"
// the rules for the values from 0 to 19 are simply the
// words for those numbers
" zero; one; two; three; four; five; six; seven; eight; nine;\n"
" ten; eleven; twelve; thirteen; fourteen; fifteen; sixteen;\n"
" seventeen; eighteen; nineteen;\n"
// beginning at 20, we use the >> to mark the position where
// the result of formatting the number's ones digit. Thus,
// we only need a new rule at every multiple of 10. Text in
// backets is omitted if the value being formatted is an
// even multiple of 10.
" 20: twenty[->>];\n"
" 30: thirty[->>];\n"
" 40: forty[->>];\n"
" 50: fifty[->>];\n"
" 60: sixty[->>];\n"
" 70: seventy[->>];\n"
" 80: eighty[->>];\n"
" 90: ninety[->>];\n"
// beginning at 100, we can use << to mark the position where
// the result of formatting the multiple of 100 is to be
// inserted. Notice also that the meaning of >> has shifted:
// here, it refers to both the ones place and the tens place.
// The meanings of the << and >> tokens depend on the base value
// of the rule. A rule's divisor is (usually) the highest
// power of 10 that is less than or equal to the rule's base
// value. The value being formatted is divided by the rule's
// divisor, and the integral quotient is used to get the text
// for <<, while the remainder is used to produce the text
// for >>. Again, text in brackets is omitted if the value
// being formatted is an even multiple of the rule's divisor
// (in this case, an even multiple of 100)
" 100: << hundred[ >>];\n"
// The rules for the higher numbers work the same way as the
// rule for 100: Again, the << and >> tokens depend on the
// rule's divisor, which for all these rules is also the rule's
// base value. To group by thousand, we simply don't have any
// rules between 1,000 and 1,000,000.
" 1000: << thousand[ >>];\n"
" 1,000,000: << million[ >>];\n"
" 1,000,000,000: << billion[ >>];\n"
" 1,000,000,000,000: << trillion[ >>];\n"
// overflow rule. This rule specifies that values of a
// quadrillion or more are shown in numerals rather than words.
// The == token means to format (with new rules) the value
// being formatted by this rule and place the result where
// the == is. The #,##0 inside the == signs is a
// DecimalFormat pattern. It specifies that the value should
// be formatted with a DecimalFormat object, and that it
// should be formatted with no decimal places, at least one
// digit, and a thousands separator.
" 1,000,000,000,000,000: =#,##0=;\n"
// %default is a more elaborate form of %simplified; It is basically
// the same, except that it introduces "and" before the ones digit
// when appropriate (basically, between the tens and ones digits) and
// separates the thousands groups with commas in values over 100,000.
"%default:\n"
// negative-number and fraction rules. These are the same
// as those for %simplified, but have to be stated here too
// because this is an entry point
" -x: minus >>;\n"
" x.x: << point >>;\n"
// just use %simplified for values below 100
" =%simplified=;\n"
// for values from 100 to 9,999 use %%and to decide whether or
// not to interpose the "and"
" 100: << hundred[ >%%and>];\n"
" 1000: << thousand[ >%%and>];\n"
// for values of 100,000 and up, use %%commas to interpose the
// commas in the right places (and also to interpose the "and")
" 100,000>>: << thousand[>%%commas>];\n"
" 1,000,000: << million[>%%commas>];\n"
" 1,000,000,000: << billion[>%%commas>];\n"
" 1,000,000,000,000: << trillion[>%%commas>];\n"
" 1,000,000,000,000,000: =#,##0=;\n"
// if the value passed to this rule set is greater than 100, don't
// add the "and"; if it's less than 100, add "and" before the last
// digits
"%%and:\n"
" and =%default=;\n"
" 100: =%default=;\n"
// this rule set is used to place the commas
"%%commas:\n"
// for values below 100, add "and" (the apostrophe at the
// beginning is ignored, but causes the space that follows it
// to be significant: this is necessary because the rules
// calling %%commas don't put a space before it)
" ' and =%default=;\n"
// put a comma after the thousands (or whatever preceded the
// hundreds)
" 100: , =%default=;\n"
// put a comma after the millions (or whatever precedes the
// thousands)
" 1000: , <%default< thousand, >%default>;\n"
// and so on...
" 1,000,000: , =%default=;"
// %%lenient-parse isn't really a set of number formatting rules;
// it's a set of collation rules. Lenient-parse mode uses a Collator
// object to compare fragments of the text being parsed to the text
// in the rules, allowing more leeway in the matching text. This set
// of rules tells the formatter to ignore commas when parsing (it
// already ignores spaces, which is why we refer to the space; it also
// ignores hyphens, making "twenty one" and "twenty-one" parse
// identically)
"%%lenient-parse:\n"
// " & ' ' , ',' ;\n"
" &\u0000 << ' ' << ',' << '-'; \n"
}
// * This rule set adds an English ordinal abbreviation to the end of a
// * number. For example, 2 is formatted as "2nd". Parsing doesn't work with
// * this rule set. To parse, use DecimalFormat on the numeral.
OrdinalRules {
// this rule set formats the numeral and calls %%abbrev to
// supply the abbreviation
"%main:\n"
" =#,##0==%%abbrev=;\n"
// this rule set supplies the abbreviation
"%%abbrev:\n"
// the abbreviations. Everything from 4 to 19 ends in "th"
" th; st; nd; rd; th;\n"
// at 20, we begin repeating the cycle every 10 (13 is "13th",
// but 23 and 33 are "23rd" and "33rd") We do this by
// ignoring all bug the ones digit in selecting the abbreviation
" 20: >>;\n"
// at 100, we repeat the whole cycle by considering only the
// tens and ones digits in picking an abbreviation
" 100: >>;\n"
}
// * This rule set formats a number of seconds in sexagesimal notation
// * (i.e., hours, minutes, and seconds). %with-words formats it with
// * words (3,740 is "1 hour, 2 minutes, 20 seconds") and %in-numerals
// * formats it entirely in numerals (3,740 is "1:02:20").
DurationRules {
// main rule set for formatting with words
"%with-words:\n"
// take care of singular and plural forms of "second"
" 0 seconds; 1 second; =0= seconds;\n"
// use %%min to format values greater than 60 seconds
" 60/60: <%%min<[, >>];\n"
// use %%hr to format values greater than 3,600 seconds
// (the ">>>" below causes us to see the number of minutes
// when when there are zero minutes)
" 3600/60: <%%hr<[, >>>];\n"
// this rule set takes care of the singular and plural forms
// of "minute"
"%%min:\n"
" 0 minutes; 1 minute; =0= minutes;\n"
// this rule set takes care of the singular and plural forms
// of "hour"
"%%hr:\n"
" 0 hours; 1 hour; =0= hours;\n"
// main rule set for formatting in numerals
"%in-numerals:\n"
// values below 60 seconds are shown with "sec."
" =0= sec.;\n"
// higher values are shown with colons: %%min-sec is used for
// values below 3,600 seconds...
" 60: =%%min-sec=;\n"
// ...and %%hr-min-sec is used for values of 3,600 seconds
// and above
" 3600: =%%hr-min-sec=;\n"
// this rule causes values of less than 10 minutes to show without
// a leading zero
"%%min-sec:\n"
" 0: :=00=;\n"
" 60/60: <0<>>;\n"
// this rule set is used for values of 3,600 or more. Minutes are always
// shown, and always shown with two digits
"%%hr-min-sec:\n"
" 0: :=00=;\n"
" 60/60: <00<>>;\n"
" 3600/60: <#,##0<:>>>;\n"
// the lenient-parse rules allow several different characters to be used
// as delimiters between hours, minutes, and seconds
"%%lenient-parse:\n"
" & ':' = '.' = ' ' = '-';\n"
}
}

View File

@ -122,4 +122,44 @@ ru {
"Cyrs" // ISO 15924 Name
}
//------------------------------------------------------------
// Rule Based Number Format Support
//------------------------------------------------------------
// * Spellout rules for Russian.
// Can someone supply me with information on negatives and decimals?
// How about words for billions and trillions?
SpelloutRules {
"\u043d\u043e\u043b\u044c; \u043e\u0434\u0438\u043d; \u0434\u0432\u0430; \u0442\u0440\u0438; "
"\u0447\u0435\u0442\u044b\u0440\u0435; \u043f\u044f\u0442; \u0448\u0435\u0441\u0442; "
"\u0441\u0435\u043c\u044c; \u0432\u043e\u0441\u0435\u043c\u044c; \u0434\u0435\u0432\u044f\u0442;\n"
"10: \u0434\u0435\u0441\u044f\u0442; "
"\u043e\u0434\u0438\u043d\u043d\u0430\u0434\u0446\u0430\u0442\u044c;\n"
"\u0434\u0432\u0435\u043d\u043d\u0430\u0434\u0446\u0430\u0442\u044c; "
"\u0442\u0440\u0438\u043d\u0430\u0434\u0446\u0430\u0442\u044c; "
"\u0447\u0435\u0442\u044b\u0440\u043d\u0430\u0434\u0446\u0430\u0442\u044c;\n"
"15: \u043f\u044f\u0442\u043d\u0430\u0434\u0446\u0430\u0442\u044c; "
"\u0448\u0435\u0441\u0442\u043d\u0430\u0434\u0446\u0430\u0442\u044c; "
"\u0441\u0435\u043c\u043d\u0430\u0434\u0446\u0430\u0442\u044c; "
"\u0432\u043e\u0441\u0435\u043c\u043d\u0430\u0434\u0446\u0430\u0442\u044c; "
"\u0434\u0435\u0432\u044f\u0442\u043d\u0430\u0434\u0446\u0430\u0442\u044c;\n"
"20: \u0434\u0432\u0430\u0434\u0446\u0430\u0442\u044c[ >>];\n"
"30: \u0442\u0440\u043b\u0434\u0446\u0430\u0442\u044c[ >>];\n"
"40: \u0441\u043e\u0440\u043e\u043a[ >>];\n"
"50: \u043f\u044f\u0442\u044c\u0434\u0435\u0441\u044f\u0442[ >>];\n"
"60: \u0448\u0435\u0441\u0442\u044c\u0434\u0435\u0441\u044f\u0442[ >>];\n"
"70: \u0441\u0435\u043c\u044c\u0434\u0435\u0441\u044f\u0442[ >>];\n"
"80: \u0432\u043e\u0441\u0435\u043c\u044c\u0434\u0435\u0441\u044f\u0442[ >>];\n"
"90: \u0434\u0435\u0432\u044f\u043d\u043e\u0441\u0442\u043e[ >>];\n"
"100: \u0441\u0442\u043e[ >>];\n"
"200: << \u0441\u0442\u043e[ >>];\n"
"1000: \u0442\u044b\u0441\u044f\u0447\u0430[ >>];\n"
"2000: << \u0442\u044b\u0441\u044f\u0447\u0430[ >>];\n"
"1,000,000: \u043c\u0438\u043b\u043b\u0438\u043e\u043d[ >>];\n"
"2,000,000: << \u043c\u0438\u043b\u043b\u0438\u043e\u043d[ >>];\n"
"1,000,000,000: =#,##0=;"
}
}

View File

@ -118,4 +118,36 @@ sv {
"Latf", // ISO 15924 Name
"Latg", // ISO 15924 Name
}
//------------------------------------------------------------
// Rule Based Number Format Support
//------------------------------------------------------------
// * Spellout rules for Swedish.
// can someone supply me with information on negatives and decimals?
SpelloutRules {
"noll; ett; tv\u00e5; tre; fyra; fem; sex; sjo; \u00e5tta; nio;\n"
"tio; elva; tolv; tretton; fjorton; femton; sexton; sjutton; arton; nitton;\n"
"20: tjugo[>>];\n"
"30: trettio[>>];\n"
"40: fyrtio[>>];\n"
"50: femtio[>>];\n"
"60: sextio[>>];\n"
"70: sjuttio[>>];\n"
"80: \u00e5ttio[>>];\n"
"90: nittio[>>];\n"
"100: hundra[>>];\n"
"200: <<hundra[>>];\n"
"1000: tusen[ >>];\n"
"2000: << tusen[ >>];\n"
"1,000,000: en miljon[ >>];\n"
"2,000,000: << miljon[ >>];\n"
"1,000,000,000: en miljard[ >>];\n"
"2,000,000,000: << miljard[ >>];\n"
"1,000,000,000,000: en biljon[ >>];\n"
"2,000,000,000,000: << biljon[ >>];\n"
"1,000,000,000,000,000: =#,##0="
}
}

View File

@ -245,4 +245,43 @@ th {
"Thai", // Script Name
"Thai" // ISO 15924 Name
}
//------------------------------------------------------------
// Rule Based Number Format Support
//------------------------------------------------------------
// Spellout rules for Thai. Data from Suwit Srivilairith, IBM Thailand
SpelloutRules {
"%default:\n"
" -x: \u0e25\u0e1a>>;\n"
" x.x: <<\u0e08\u0e38\u0e14>>>;\n"
" \u0e28\u0e39\u0e19\u0e22\u0e4c; \u0e2b\u0e19\u0e36\u0e48\u0e07; \u0e2a\u0e2d\u0e07; \u0e2a\u0e32\u0e21;\n"
" \u0e2a\u0e35\u0e48; \u0e2b\u0e49\u0e32; \u0e2b\u0e01; \u0e40\u0e08\u0e47\u0e14; \u0e41\u0e1b\u0e14;\n"
" \u0e40\u0e01\u0e49\u0e32; \u0e2a\u0e34\u0e1a; \u0e2a\u0e34\u0e1a\u0e40\u0e2d\u0e47\u0e14;\n"
" \u0e2a\u0e34\u0e1a\u0e2a\u0e2d\u0e07; \u0e2a\u0e34\u0e1a\u0e2a\u0e32\u0e21;\n"
" \u0e2a\u0e34\u0e1a\u0e2a\u0e35\u0e48; \u0e2a\u0e34\u0e1a\u0e2b\u0e49\u0e32;\n"
" \u0e2a\u0e34\u0e1a\u0e2b\u0e01; \u0e2a\u0e34\u0e1a\u0e40\u0e08\u0e47\u0e14;\n"
" \u0e2a\u0e34\u0e1a\u0e41\u0e1b\u0e14; \u0e2a\u0e34\u0e1a\u0e40\u0e01\u0e49\u0e32;\n"
" 20: \u0e22\u0e35\u0e48\u0e2a\u0e34\u0e1a[>%%alt-ones>];\n"
" 30: \u0e2a\u0e32\u0e21\u0e2a\u0e34\u0e1a[>%%alt-ones>];\n"
" 40: \u0e2a\u0e35\u0e48\u0e2a\u0e34\u0e1a[>%%alt-ones>];\n"
" 50: \u0e2b\u0e49\u0e32\u0e2a\u0e34\u0e1a[>%%alt-ones>];\n"
" 60: \u0e2b\u0e01\u0e2a\u0e34\u0e1a[>%%alt-ones>];\n"
" 70: \u0e40\u0e08\u0e47\u0e14\u0e2a\u0e34\u0e1a[>%%alt-ones>];\n"
" 80: \u0e41\u0e1b\u0e14\u0e2a\u0e34\u0e1a[>%%alt-ones>];\n"
" 90: \u0e40\u0e01\u0e49\u0e32\u0e2a\u0e34\u0e1a[>%%alt-ones>];\n"
" 100: <<\u0e23\u0e49\u0e2d\u0e22[>>];\n"
" 1000: <<\u0e1e\u0e31\u0e19[>>];\n"
" 10000: <<\u0e2b\u0e21\u0e37\u0e48\u0e19[>>];\n"
" 100000: <<\u0e41\u0e2a\u0e19[>>];\n"
" 1,000,000: <<\u0e25\u0e49\u0e32\u0e19[>>];\n"
" 1,000,000,000: <<\u0e1e\u0e31\u0e19\u0e25\u0e49\u0e32\u0e19[>>];\n"
" 1,000,000,000,000: <<\u0e25\u0e49\u0e32\u0e19\u0e25\u0e49\u0e32\u0e19[>>];\n"
" 1,000,000,000,000,000: =#,##0=;\n"
"%%alt-ones:\n"
" \u0e28\u0e39\u0e19\u0e22\u0e4c;\n"
" \u0e40\u0e2d\u0e47\u0e14;\n"
" =%default=;\n";
}
}

View File

@ -70,7 +70,9 @@ uniset.o unifltlg.o unirange.o translit.o utrans.o \
cpdtrans.o hextouni.o rbt.o rbt_data.o rbt_pars.o rbt_rule.o rbt_set.o \
dbbi.o dbbi_tbl.o rbbi.o rbbi_tbl.o nultrans.o \
remtrans.o titletrn.o tolowtrn.o toupptrn.o xformtrn.o \
name2uni.o uni2name.o unitohex.o nortrans.o unifilt.o quant.o transreg.o
name2uni.o uni2name.o unitohex.o nortrans.o unifilt.o quant.o transreg.o \
llong.o nfrs.o nfrule.o nfsubs.o rbnf.o
STATIC_OBJECTS = $(OBJECTS:.o=.$(STATIC_O))

View File

@ -166,6 +166,10 @@ SOURCE=.\hextouni.cpp
# End Source File
# Begin Source File
SOURCE=.\llong.cpp
# End Source File
# Begin Source File
SOURCE=.\msgfmt.cpp
# End Source File
# Begin Source File
@ -174,6 +178,18 @@ SOURCE=.\name2uni.cpp
# End Source File
# Begin Source File
SOURCE=.\nfrs.cpp
# End Source File
# Begin Source File
SOURCE=.\nfrule.cpp
# End Source File
# Begin Source File
SOURCE=.\nfsubs.cpp
# End Source File
# Begin Source File
SOURCE=.\nortrans.cpp
# End Source File
# Begin Source File
@ -198,6 +214,10 @@ SOURCE=.\rbbi_tbl.cpp
# End Source File
# Begin Source File
SOURCE=.\rbnf.cpp
# End Source File
# Begin Source File
SOURCE=.\rbt.cpp
# End Source File
# Begin Source File
@ -814,6 +834,10 @@ InputPath=.\unicode\hextouni.h
# End Source File
# Begin Source File
SOURCE=.\llong.h
# End Source File
# Begin Source File
SOURCE=.\unicode\msgfmt.h
!IF "$(CFG)" == "i18n - Win32 Release"
@ -860,6 +884,22 @@ InputPath=.\unicode\name2uni.h
# End Source File
# Begin Source File
SOURCE=.\nfrlist.h
# End Source File
# Begin Source File
SOURCE=.\nfrs.h
# End Source File
# Begin Source File
SOURCE=.\nfrule.h
# End Source File
# Begin Source File
SOURCE=.\nfsubs.h
# End Source File
# Begin Source File
SOURCE=.\unicode\nortrans.h
!IF "$(CFG)" == "i18n - Win32 Release"
@ -1022,6 +1062,25 @@ SOURCE=.\rbbi_tbl.h
# End Source File
# Begin Source File
SOURCE=.\unicode\rbnf.h
!IF "$(CFG)" == "i18n - Win32 Release"
!ELSEIF "$(CFG)" == "i18n - Win32 Debug"
# Begin Custom Build
InputPath=.\unicode\rbnf.h
"..\..\include\unicode\rbnf.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy unicode\rbnf.h ..\..\include\unicode
# End Custom Build
!ENDIF
# End Source File
# Begin Source File
SOURCE=.\unicode\rbt.h
!IF "$(CFG)" == "i18n - Win32 Release"

301
icu4c/source/i18n/llong.cpp Normal file
View File

@ -0,0 +1,301 @@
#include "llong.h"
#if 0
/*
* This should work, I think, but SOLARISCC -xO3 can't handle it.
* Works with SOLARISGCC, SOLARISCC -g, Win32...
*
*/
const llong& llong::kMaxValue = llong(0x7fffffff, 0xffffffff);
const llong& llong::kMinValue = llong(0x80000000, 0x0);
const llong& llong::kMinusOne = llong(0xffffffff, 0xffffffff);
const llong& llong::kZero = llong(0x0, 0x0);
const llong& llong::kOne = llong(0x0, 0x1);
const llong& llong::kTwo = llong(0x0, 0x2);
const llong& llong::kMaxDouble = llong(0x200000, 0x0);
const llong& llong::kMinDouble = -kMaxDouble;
#endif
static llong kMaxValueObj(0x7fffffff, 0xffffffff);
static llong kMinValueObj(0x80000000, 0x0);
static llong kMinusOneObj(0xffffffff, 0xffffffff);
static llong kZeroObj(0x0, 0x0);
static llong kOneObj(0x0, 0x1);
static llong kTwoObj(0x0, 0x2);
static llong kMaxDoubleObj(0x200000, 0x0);
static llong kMinDoubleObj(-kMaxDoubleObj);
const llong& llong::kMaxValue = kMaxValueObj;
const llong& llong::kMinValue = kMinValueObj;
const llong& llong::kMinusOne = kMinusOneObj;
const llong& llong::kZero = kZeroObj;
const llong& llong::kOne = kOneObj;
const llong& llong::kTwo = kTwoObj;
const llong& llong::kMaxDouble = kMaxDoubleObj;
const llong& llong::kMinDouble = kMinDoubleObj;
#define SQRT231 46340
const double llong::kD32 = ((double)(0xffffffffu)) + 1;
const double llong::kDMax = llong_asDouble(kMaxDouble);
const double llong::kDMin = -kDMax;
llong& llong::operator*=(const llong& rhs)
{
// optimize small positive multiplications
if (hi == 0 && rhs.hi == 0 && lo < SQRT231 && rhs.lo < SQRT231) {
lo *= rhs.lo;
} else {
int retry = 0;
llong a(*this);
if (a.isNegative()) {
retry = 1;
a.negate();
}
llong b(rhs);
if (b.isNegative()) {
retry = 1;
b.negate();
}
llong r;
// optimize small negative multiplications
if (retry && a.hi == 0 && b.hi == 0 && a.lo < SQRT231 && b.lo < SQRT231) {
r.lo = a.lo * b.lo;
} else {
if (a < b) {
llong t = a;
a = b;
b = t;
}
while (b.notZero()) {
if (b.lo & 0x1) {
r += a;
}
b >>= 1;
a <<= 1;
}
}
if (isNegative() != rhs.isNegative()) {
r.negate();
}
*this = r;
}
return *this;
}
llong& llong::operator/=(const llong& rhs)
{
if (isZero()) {
return *this;
}
int32_t sign = 1;
llong a(*this);
if (a.isNegative()) {
sign = -1;
a.negate();
}
llong b(rhs);
if (b.isNegative()) {
sign = -sign;
b.negate();
}
if (b.isZero()) { // should throw div by zero error
*this = sign < 0 ? kMinValue : kMaxValue;
} else if (a.hi == 0 && b.hi == 0) {
*this = (int32_t)(sign * (a.lo / b.lo));
} else if (b > a) {
*this = kZero;
} else if (b == a) {
*this = sign;
} else {
llong r;
llong m((int32_t)1);
while (ule(b, a)) { // a positive so topmost bit is 0, this will always terminate
m <<= 1;
b <<= 1;
}
do {
m.ushr(1); // don't sign-extend!
if (m.isZero()) break;
b.ushr(1);
if (b <= a) {
r |= m;
a -= b;
}
} while (a >= rhs);
if (sign < 0) {
r.negate();
}
*this = r;
}
return *this;
}
static uint8_t asciiDigits[] = {
(char)0x30, (char)0x31, (char)0x32, (char)0x33, (char)0x34, (char)0x35, (char)0x36, (char)0x37,
(char)0x38, (char)0x39, (char)0x61, (char)0x62, (char)0x63, (char)0x64, (char)0x65, (char)0x66,
(char)0x67, (char)0x68, (char)0x69, (char)0x6a, (char)0x6b, (char)0x6c, (char)0x6d, (char)0x6e,
(char)0x6f, (char)0x70, (char)0x71, (char)0x72, (char)0x73, (char)0x74, (char)0x75, (char)0x76,
(char)0x77, (char)0x78, (char)0x79, (char)0x7a,
};
static UChar kUMinus = (UChar)0x002d;
static char kMinus = (char)0x2d;
static uint8_t digitInfo[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0, 0, 0, 0, 0, 0,
0, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0, 0, 0, 0, 0,
0, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0, 0, 0, 0, 0,
};
llong atoll(const char* str, uint32_t radix)
{
if (radix > 36) {
radix = 36;
} else if (radix < 2) {
radix = 2;
}
llong lradix(radix);
int neg = 0;
if (*str == kMinus) {
++str;
neg = 1;
}
llong result;
uint8_t b;
while ((b = digitInfo[*str++]) && ((b &= 0x7f) < radix)) {
result *= lradix;
result += (int32_t)b;
}
if (neg) {
result.negate();
}
return result;
}
llong u_atoll(const UChar* str, uint32_t radix)
{
if (radix > 36) {
radix = 36;
} else if (radix < 2) {
radix = 2;
}
llong lradix(radix);
int neg = 0;
if (*str == kUMinus) {
++str;
neg = 1;
}
llong result;
UChar c;
uint8_t b;
while (((c = *str++) < 0x0080) && (b = digitInfo[c]) && ((b &= 0x7f) < radix)) {
result *= lradix;
result += (int32_t)b;
}
if (neg) {
result.negate();
}
return result;
}
uint32_t lltoa(const llong& val, char* buf, uint32_t len, uint32_t radix, UBool raw)
{
if (radix > 36) {
radix = 36;
} else if (radix < 2) {
radix = 2;
}
llong base(radix);
char* p = buf;
llong w(val);
if (len && w.isNegative()) {
w.negate();
*p++ = kMinus;
--len;
}
while (len && w.notZero()) {
llong n = w / base;
llong m = n * base;
int32_t d = llong_asInt(w-m);
*p++ = raw ? (char)d : asciiDigits[d];
w = n;
--len;
}
if (len) {
*p = 0; // null terminate if room for caller convenience
}
len = p - buf;
if (*buf == kMinus) {
++buf;
}
while (--p > buf) {
char c = *p;
*p = *buf;
*buf = c;
++buf;
}
return len;
}
uint32_t u_lltoa(const llong& val, UChar* buf, uint32_t len, uint32_t radix, UBool raw)
{
if (radix > 36) {
radix = 36;
} else if (radix < 2) {
radix = 2;
}
llong base(radix);
UChar* p = buf;
llong w(val);
if (len && w.isNegative()) {
w.negate();
*p++ = kUMinus;
--len;
}
while (len && w.notZero()) {
llong n = w / base;
llong m = n * base;
int32_t d = llong_asInt(w-m);
*p++ = (UChar)(raw ? d : asciiDigits[d]);
w = n;
--len;
}
if (len) {
*p = 0; // null terminate if room for caller convenience
}
len = p - buf;
if (*buf == kUMinus) {
++buf;
}
while (--p > buf) {
UChar c = *p;
*p = *buf;
*buf = c;
++buf;
}
return len;
}

312
icu4c/source/i18n/llong.h Normal file
View File

@ -0,0 +1,312 @@
// thanks to Mike Cowlishaw
#ifndef LLONG_H
#define LLONG_H
// debug
#include <stdio.h>
#include "unicode/utypes.h"
// machine dependent value, need to move
#define __u_IntBits 32
class llong {
public:
uint32_t lo;
int32_t hi;
private:
enum {
MASK32 = 0xffffffffu
};
static const double kD32; // 2^^32 as a double
static const double kDMin; // -(2^^54), minimum double with full integer precision
static const double kDMax; // 2^^54, maximum double with full integer precision
// private constructor
// should be private, but we can't construct the way we want using SOLARISCC
// so make public in order that file statics can access this constructor
public:
llong(int32_t h, uint32_t l) : lo(l), hi(h) {}
private:
// convenience, size reduction in inline code
llong& nnot() { hi = ~hi; lo = ~lo; return *this; }
llong& negate() { hi = ~hi; lo = ~lo; if (!++lo) ++hi; return *this; }
llong& abs() { if (hi < 0) negate(); return *this; }
UBool notZero() const { return (hi | lo) != 0; }
UBool isZero() const { return (hi | lo) == 0; }
UBool isNegative() const { return hi < 0; }
public:
llong() : lo(0), hi(0) {}
llong(const int32_t l) : lo((unsigned)l), hi(l < 0 ? -1 : 0) {}
llong(const int16_t l) : lo((unsigned)l), hi(l < 0 ? -1 : 0) {}
llong(const int8_t l) : lo((unsigned)l), hi(l < 0 ? -1 : 0) {}
#if __u_IntBits == 64
llong(const int i) : lo(i & MASK32), hi(i >> 32) {}
#endif
llong(uint16_t s) : lo(s), hi(0) {}
llong(uint32_t l) : lo(l), hi(0) {}
#if __u_IntBits == 64
llong(unsigned int i) : lo(i & MASK32), hi(i >> 32) {}
#endif
llong(double d) { // avoid dependency on bit representation of double
if (uprv_isNaN(d)) {
*this = llong::kZero;
} else if (d < kDMin) {
*this = llong::kMinDouble;
} else if (d > kDMax) {
*this = llong::kMaxDouble;
} else {
int neg = d < 0;
if (neg) d = -d;
d = uprv_floor(d);
hi = (int32_t)uprv_floor(d / kD32);
d -= kD32 * hi;
lo = (uint32_t)d;
if (neg) negate();
}
}
llong(const llong& rhs) : lo(rhs.lo), hi(rhs.hi) {}
// the following cause ambiguities in binary expressions,
// even if we overload all methods on all args!
// so you have to use global functions
// operator const int32_t() const;
// operator const uint32_t() const;
// operator const double() const;
friend int32_t llong_asInt(const llong& lhs);
friend uint32_t llong_asUInt(const llong& lhs);
friend double llong_asDouble(const llong& lhs);
llong& operator=(const llong& rhs) { lo = rhs.lo; hi = rhs.hi; return *this; }
// left shift
llong& operator<<=(int32_t shift) {
shift &= 63; // like java spec
if (shift < 32) {
hi = (signed)(hi << shift | lo >> (32 - shift)); // no sign extension on lo since unsigned
lo <<= shift;
} else {
hi = (signed)(lo << (shift - 32));
lo = 0;
}
return *this;
}
llong operator<<(int32_t shift) const { llong r(*this); r <<= shift; return r; }
// right shift with sign extension
llong& operator>>=(int32_t shift) {
shift &= 63; // like java spec
if (shift < 32) {
lo >>= shift;
lo |= (hi << (32 - shift));
hi = hi >> shift; // note sign extension
} else {
lo = (unsigned)(hi >> (shift - 32)); // note sign extension
hi = hi < 0 ? -1 : 0;
}
return *this;
}
llong operator>>(int32_t shift) const { llong r(*this); r >>= shift; return r; }
// unsigned right shift
friend llong ushr(const llong& lhs, int32_t shift);
// bit operations
friend llong operator&(const llong& lhs, const llong& rhs);
friend llong operator|(const llong& lhs, const llong& rhs);
friend llong operator^(const llong& lhs, const llong& rhs);
friend llong operator&(const llong& lhs, const uint32_t rhs);
friend llong operator|(const llong& lhs, const uint32_t rhs);
friend llong operator^(const llong& lhs, const uint32_t rhs);
llong operator~() const { return llong(~hi, ~lo); }
// is this useful?
// UBool operator!() const { return !(hi | lo); }
llong& operator&=(const llong& rhs) { hi &= rhs.hi; lo &= rhs.lo; return *this; }
llong& operator|=(const llong& rhs) { hi |= rhs.hi; lo |= rhs.lo; return *this; }
llong& operator^=(const llong& rhs) { hi ^= rhs.hi; lo ^= rhs.lo; return *this; }
llong& operator&=(const uint32_t rhs) { hi = 0; lo &= rhs; return *this; }
llong& operator|=(const uint32_t rhs) { lo |= rhs; return *this; }
llong& operator^=(const uint32_t rhs) { lo ^= rhs; return *this; }
// no logical ops since we can't enforce order of evaluation, not much use anyway?
// comparison
friend UBool operator==(const llong& lhs, const llong& rhs);
friend UBool operator!=(const llong& lhs, const llong& rhs);
friend UBool operator> (const llong& lhs, const llong& rhs);
friend UBool operator< (const llong& lhs, const llong& rhs);
friend UBool operator>=(const llong& lhs, const llong& rhs);
friend UBool operator<=(const llong& lhs, const llong& rhs);
// overload comparison to native int to avoid conversion to llong for common comparisons
friend UBool operator==(const llong& lhs, const int32_t rhs);
friend UBool operator!=(const llong& lhs, const int32_t rhs);
friend UBool operator> (const llong& lhs, const int32_t rhs);
friend UBool operator< (const llong& lhs, const int32_t rhs);
friend UBool operator>=(const llong& lhs, const int32_t rhs);
friend UBool operator<=(const llong& lhs, const int32_t rhs);
// unsigned comparison
friend UBool ugt(const llong& lhs, const llong& rhs);
friend UBool ult(const llong& lhs, const llong& rhs);
friend UBool uge(const llong& lhs, const llong& rhs);
friend UBool ule(const llong& lhs, const llong& rhs);
// prefix inc/dec
llong& operator++() { if (!++lo) ++hi; return *this; }
llong& operator--() { if (!lo--) --hi; return *this; }
// postfix inc/dec
llong operator++(int) { llong r(*this); if (!++lo) ++hi; return r; }
llong operator--(int) { llong r(*this); if (!lo--) --hi; return r; }
// unary minus
llong operator-() const { uint32_t l = ~lo + 1; return llong(l ? ~hi : ~hi + 1, l); }
// addition and subtraction
llong& operator-=(const llong& rhs) { hi -= rhs.hi; if (lo < rhs.lo) --hi; lo -= rhs.lo; return *this; }
friend llong operator-(const llong& lhs, const llong& rhs);
llong& operator+=(const llong& rhs) { return *this -= -rhs; }
friend llong operator+(const llong& lhs, const llong& rhs);
// pluttification and fizzen'
llong& operator*=(const llong& rhs);
friend llong operator*(const llong& lhs, const llong& rhs);
llong& operator/=(const llong& rhs);
friend llong operator/(const llong& lhs, const llong& rhs);
llong& operator%=(const llong& rhs) { return operator-=((*this / rhs) * rhs); }
friend llong operator%(const llong& lhs, const llong& rhs);
// power function, positive integral powers only
friend llong llong_pow(const llong& lhs, uint32_t n);
// absolute value
friend llong llong_abs(const llong& lhs);
// simple construction from ASCII and Unicode strings
friend llong atoll(const char* str, uint32_t radix = 10);
friend llong u_atoll(const UChar* str, uint32_t radix = 10);
// output as ASCII or Unicode strings or as raw values, preceeding '-' if signed
friend uint32_t lltoa(const llong& lhs, char* buffer, uint32_t buflen, uint32_t radix = 10, UBool raw = FALSE);
friend uint32_t u_lltoa(const llong& lhs, UChar* buffer, uint32_t buflen, uint32_t radix = 10, UBool raw = FALSE);
// useful public constants - perhaps should not have class statics
static const llong& kMaxValue;
static const llong& kMinValue;
static const llong& kMinusOne;
static const llong& kZero;
static const llong& kOne;
static const llong& kTwo;
private:
static const llong& kMaxDouble;
static const llong& kMinDouble;
// right shift without sign extension
llong& ushr(int32_t shift) {
shift &= 0x63;
if (shift < 32) {
lo >>= shift;
lo |= (hi << (32 - shift));
hi = (signed)(((unsigned)hi) >> shift);
} else {
lo = (unsigned)(((unsigned)hi) >> (shift - 32));
hi = 0;
}
return *this;
}
// back door for test
friend void llong_test();
};
inline llong operator& (const llong& lhs, const llong& rhs) { return llong(lhs.hi & rhs.hi, lhs.lo & rhs.lo); }
inline llong operator| (const llong& lhs, const llong& rhs) { return llong(lhs.hi | rhs.hi, lhs.lo | rhs.lo); }
inline llong operator^ (const llong& lhs, const llong& rhs) { return llong(lhs.hi ^ rhs.hi, lhs.lo ^ rhs.lo); }
inline llong operator& (const llong& lhs, const uint32_t rhs) { return llong(0, lhs.lo & rhs); }
inline llong operator| (const llong& lhs, const uint32_t rhs) { return llong(lhs.hi, lhs.lo | rhs); }
inline llong operator^ (const llong& lhs, const uint32_t rhs) { return llong(lhs.hi, lhs.lo ^ rhs); }
inline UBool operator==(const llong& lhs, const llong& rhs) { return lhs.lo == rhs.lo && lhs.hi == rhs.hi; }
inline UBool operator!=(const llong& lhs, const llong& rhs) { return lhs.lo != rhs.lo || lhs.hi != rhs.hi; }
inline UBool operator> (const llong& lhs, const llong& rhs) { return lhs.hi == rhs.hi ? lhs.lo > rhs.lo : lhs.hi > rhs.hi; }
inline UBool operator< (const llong& lhs, const llong& rhs) { return lhs.hi == rhs.hi ? lhs.lo < rhs.lo : lhs.hi < rhs.hi; }
inline UBool operator>=(const llong& lhs, const llong& rhs) { return lhs.hi == rhs.hi ? lhs.lo >= rhs.lo : lhs.hi >= rhs.hi; }
inline UBool operator<=(const llong& lhs, const llong& rhs) { return lhs.hi == rhs.hi ? lhs.lo <= rhs.lo : lhs.hi <= rhs.hi; }
inline UBool operator==(const llong& lhs, const int32_t rhs) { return lhs.lo == (unsigned)rhs && lhs.hi == (rhs < 0 ? -1 : 0); }
inline UBool operator!=(const llong& lhs, const int32_t rhs) { return lhs.lo != (unsigned)rhs || lhs.hi != (rhs < 0 ? -1 : 0); }
inline UBool operator> (const llong& lhs, const int32_t rhs) { return rhs < 0 ? (lhs.hi == -1 ? lhs.lo > (unsigned)rhs : lhs.hi > -1)
: (lhs.hi == 0 ? lhs.lo > (unsigned)rhs : lhs.hi > 0); }
inline UBool operator< (const llong& lhs, const int32_t rhs) { return rhs < 0 ? (lhs.hi == -1 ? lhs.lo < (unsigned)rhs : lhs.hi < -1)
: (lhs.hi == 0 ? lhs.lo < (unsigned)rhs : lhs.hi < 0); }
inline UBool operator>=(const llong& lhs, const int32_t rhs) { return rhs < 0 ? (lhs.hi == -1 ? lhs.lo >= (unsigned)rhs : lhs.hi > -1)
: (lhs.hi == 0 ? lhs.lo >= (unsigned)rhs : lhs.hi > 0); }
inline UBool operator<=(const llong& lhs, const int32_t rhs) { return rhs < 0 ? (lhs.hi == -1 ? lhs.lo <= (unsigned)rhs : lhs.hi < -1)
: (lhs.hi == 0 ? lhs.lo <= (unsigned)rhs : lhs.hi < 0); }
inline UBool ugt(const llong& lhs, const llong& rhs) { return lhs.hi == rhs.hi ? lhs.lo > rhs.lo : (unsigned)lhs.hi > (unsigned)rhs.hi; }
inline UBool ult(const llong& lhs, const llong& rhs) { return lhs.hi == rhs.hi ? lhs.lo < rhs.lo : (unsigned)lhs.hi < (unsigned)rhs.hi; }
inline UBool uge(const llong& lhs, const llong& rhs) { return lhs.hi == rhs.hi ? lhs.lo >= rhs.lo : (unsigned)lhs.hi >= (unsigned)rhs.hi; }
inline UBool ule(const llong& lhs, const llong& rhs) { return lhs.hi == rhs.hi ? lhs.lo <= rhs.lo : (unsigned)lhs.hi <= (unsigned)rhs.hi; }
inline llong ushr(const llong& lhs, int32_t shift) { llong r(lhs); r.ushr(shift); return r; }
inline llong operator-(const llong& lhs, const llong& rhs) { return llong(lhs.lo < rhs.lo ? lhs.hi - rhs.hi - 1 : lhs.hi - rhs.hi, lhs.lo - rhs.lo); }
inline llong operator+(const llong& lhs, const llong& rhs) { return lhs - -rhs; }
inline llong operator*(const llong& lhs, const llong& rhs) { llong r(lhs); r *= rhs; return r; }
inline llong operator/(const llong& lhs, const llong& rhs) { llong r(lhs); r /= rhs; return r; }
inline llong operator%(const llong& lhs, const llong& rhs) { llong r(lhs); r %= rhs; return r; }
inline int32_t llong_asInt(const llong& lhs) { return (int32_t)(lhs.lo | (lhs.hi < 0 ? 0x80000000 : 0)); }
inline uint32_t llong_asUInt(const llong& lhs) { return lhs.lo; }
inline double llong_asDouble(const llong& lhs) { return llong::kD32 * lhs.hi + lhs.lo; }
inline llong llong_pow(const llong& lhs, uint32_t n) {
if (lhs.isZero()) {
return llong::kZero;
} else if (n == 0) {
return llong::kOne;
} else {
llong r(lhs);
while (--n > 0) {
r *= lhs;
}
return r;
}
}
inline llong llong_abs(const llong& lhs) { return lhs.isNegative() ? -lhs : lhs; }
// Originally, I thought that overloading on int32 was too complex or to large to get inlined, and
// since I mainly wanted to optimize comparisons to zero, I overloaded on uint32_t instead
// since it has a simpler implementation.
// But this means that llong(-1) != -1 (since the comparison treats the rhs as unsigned, but
// the constructor does not). So I am using the signed versions after all.
#if 0
inline UBool operator==(const llong& lhs, const uint32_t rhs) { return lhs.lo == rhs && lhs.hi == 0; }
inline UBool operator!=(const llong& lhs, const uint32_t rhs) { return lhs.lo != rhs || lhs.hi != 0; }
inline UBool operator> (const llong& lhs, const uint32_t rhs) { return lhs.hi == 0 ? lhs.lo > rhs : lhs.hi > 0; }
inline UBool operator< (const llong& lhs, const uint32_t rhs) { return lhs.hi == 0 ? lhs.lo < rhs : lhs.hi < 0; }
inline UBool operator>=(const llong& lhs, const uint32_t rhs) { return lhs.hi == 0 ? lhs.lo >= rhs : lhs.hi >= 0; }
inline UBool operator<=(const llong& lhs, const uint32_t rhs) { return lhs.hi == 0 ? lhs.lo <= rhs : lhs.hi <= 0; }
#endif
// LLONG_H
#endif

View File

@ -0,0 +1,68 @@
/*
*******************************************************************************
* Copyright (C) 1997-2001, International Business Machines Corporation and others. All Rights Reserved.
*******************************************************************************
*/
#ifndef NFRLIST_H
#define NFRLIST_H
#include "cmemory.h"
#include "unicode/umachine.h"
#include "nfrule.h"
U_NAMESPACE_BEGIN
// unsafe class for internal use only. assume memory allocations succeed, indexes are valid.
// should be a template, but we can't use them
class NFRuleList {
protected:
NFRule** fStuff;
uint32_t fCount;
uint32_t fCapacity;
public:
NFRuleList(int capacity = 10)
: fStuff(capacity ? (NFRule**)uprv_malloc(capacity * sizeof(NFRule*)) : NULL)
, fCount(0)
, fCapacity(capacity) {};
~NFRuleList() {
if (fStuff) {
for(uint32_t i = 0; i < fCount; ++i) {
delete fStuff[i];
}
uprv_free(fStuff);
}
}
NFRule* operator[](uint32_t index) const { return fStuff[index]; }
NFRule* remove(uint32_t index) {
NFRule* result = fStuff[index];
fCount -= 1;
for (uint32_t i = index; i < fCount; ++i) { // assumes small arrays
fStuff[i] = fStuff[i+1];
}
return result;
}
void add(NFRule* thing) {
if (fCount == fCapacity) {
fCapacity += 10;
fStuff = (NFRule**)uprv_realloc(fStuff, fCapacity * sizeof(NFRule*)); // assume success
}
fStuff[fCount++] = thing;
}
uint32_t size() const { return fCount; }
NFRule* last() const { return fCount > 0 ? fStuff[fCount-1] : NULL; }
NFRule** release() {
add(NULL); // ensure null termination
NFRule** result = fStuff;
fStuff = NULL;
fCount = 0;
fCapacity = 0;
return result;
}
};
U_NAMESPACE_END
// NFRLIST_H
#endif

659
icu4c/source/i18n/nfrs.cpp Normal file
View File

@ -0,0 +1,659 @@
/*
*******************************************************************************
* Copyright (C) 1997-2001, International Business Machines Corporation and others. All Rights Reserved.
*******************************************************************************
*/
#include <math.h>
#include "nfrs.h"
#include "nfrule.h"
#include "nfrlist.h"
#include "cmemory.h"
U_NAMESPACE_BEGIN
#if 0
// euclid's algorithm works with doubles
// note, doubles only get us up to one quadrillion or so, which
// isn't as much range as we get with longs. We probably still
// want either 64-bit math, or BigInteger.
static llong
util_lcm(llong x, llong y)
{
x.abs();
y.abs();
if (x == 0 || y == 0) {
return 0;
} else {
do {
if (x < y) {
llong t = x; x = y; y = t;
}
x -= y * (x/y);
} while (x != 0);
return y;
}
}
#else
/**
* Calculates the least common multiple of x and y.
*/
static llong
util_lcm(llong x, llong y)
{
// binary gcd algorithm from Knuth, "The Art of Computer Programming,"
// vol. 2, 1st ed., pp. 298-299
llong x1 = x;
llong y1 = y;
int p2 = 0;
while ((x1 & 1) == 0 && (y1 & 1) == 0) {
++p2;
x1 >>= 1;
y1 >>= 1;
}
llong t;
if ((x1 & 1) == 1) {
t = -y1;
} else {
t = x1;
}
while (t != 0) {
while ((t & 1) == 0) {
t >>= 1;
}
if (t > 0) {
x1 = t;
} else {
y1 = -t;
}
t = x1 - y1;
}
llong gcd = x1 << p2;
// x * y == gcd(x, y) * lcm(x, y)
return x / gcd * y;
}
#endif
static const UChar gPercent = 0x0025;
static const UChar gColon = 0x003a;
static const UChar gSemicolon = 0x003b;
static const UChar gLineFeed = 0x0010;
static const UnicodeString gFourSpaces(" ");
static const UnicodeString gPercentPercent("%%");
NFRuleSet::NFRuleSet(UnicodeString* descriptions, int32_t index, UErrorCode& status)
: name()
, rules(0)
, negativeNumberRule(NULL)
, fIsFractionRuleSet(FALSE)
, fIsPublic(FALSE)
{
for (int i = 0; i < 3; ++i) {
fractionRules[i] = NULL;
}
if (U_FAILURE(status)) {
return;
}
UnicodeString& description = descriptions[index]; // !!! make sure index is valid
// if the description begins with a rule set name (the rule set
// name can be omitted in formatter descriptions that consist
// of only one rule set), copy it out into our "name" member
// and delete it from the description
if (description.charAt(0) == gPercent) {
UTextOffset pos = description.indexOf(gColon);
if (pos == -1) {
// throw new IllegalArgumentException("Rule set name doesn't end in colon");
status = U_PARSE_ERROR;
} else {
name.setTo(description, 0, pos);
while (pos < description.length() && u_isWhitespace(description.charAt(++pos))) {
}
description.remove(0, pos);
}
} else {
name.setTo("%default");
}
if (description.length() == 0) {
// throw new IllegalArgumentException("Empty rule set description");
status = U_PARSE_ERROR;
}
fIsPublic = name.indexOf(gPercentPercent) != 0;
// all of the other members of NFRuleSet are initialized
// by parseRules()
}
void
NFRuleSet::parseRules(UnicodeString& description, const RuleBasedNumberFormat* owner, UErrorCode& status)
{
// start by creating a Vector whose elements are Strings containing
// the descriptions of the rules (one rule per element). The rules
// are separated by semicolons (there's no escape facility: ALL
// semicolons are rule delimiters)
if (U_FAILURE(status)) {
return;
}
// dlf - the original code kept a separate description array for no reason,
// so I got rid of it. The loop was too complex so I simplified it.
UnicodeString currentDescription;
UTextOffset oldP = 0;
while (oldP < description.length()) {
UTextOffset p = description.indexOf(gSemicolon, oldP);
if (p == -1) {
p = description.length();
}
currentDescription.setTo(description, oldP, p - oldP);
NFRule::makeRules(currentDescription, this, rules.last(), owner, rules, status);
oldP = p + 1;
}
// for rules that didn't specify a base value, their base values
// were initialized to 0. Make another pass through the list and
// set all those rules' base values. We also remove any special
// rules from the list and put them into their own member variables
llong defaultBaseValue = (int32_t)0;
// (this isn't a for loop because we might be deleting items from
// the vector-- we want to make sure we only increment i when
// we _didn't_ delete aything from the vector)
uint32_t i = 0;
while (i < rules.size()) {
NFRule* rule = rules[i];
switch (rule->getType()) {
// if the rule's base value is 0, fill in a default
// base value (this will be 1 plus the preceding
// rule's base value for regular rule sets, and the
// same as the preceding rule's base value in fraction
// rule sets)
case NFRule::kNoBase:
rule->setBaseValue(defaultBaseValue);
if (!isFractionRuleSet()) {
++defaultBaseValue;
}
++i;
break;
// if it's the negative-number rule, copy it into its own
// data member and delete it from the list
case NFRule::kNegativeNumberRule:
negativeNumberRule = rules.remove(i);
break;
// if it's the improper fraction rule, copy it into the
// correct element of fractionRules
case NFRule::kImproperFractionRule:
fractionRules[0] = rules.remove(i);
break;
// if it's the proper fraction rule, copy it into the
// correct element of fractionRules
case NFRule::kProperFractionRule:
fractionRules[1] = rules.remove(i);
break;
// if it's the master rule, copy it into the
// correct element of fractionRules
case NFRule::kMasterRule:
fractionRules[2] = rules.remove(i);
break;
// if it's a regular rule that already knows its base value,
// check to make sure the rules are in order, and update
// the default base value for the next rule
default:
if (rule->getBaseValue() < defaultBaseValue) {
// throw new IllegalArgumentException("Rules are not in order");
status = U_PARSE_ERROR;
return;
}
defaultBaseValue = rule->getBaseValue();
if (!isFractionRuleSet()) {
++defaultBaseValue;
}
++i;
break;
}
}
}
NFRuleSet::~NFRuleSet()
{
delete negativeNumberRule;
delete fractionRules[0];
delete fractionRules[1];
delete fractionRules[2];
}
UBool
util_equalRules(const NFRule* rule1, const NFRule* rule2)
{
if (rule1) {
if (rule2) {
return *rule1 == *rule2;
}
} else if (!rule2) {
return TRUE;
}
return FALSE;
}
UBool
NFRuleSet::operator==(const NFRuleSet& rhs) const
{
if (rules.size() == rhs.rules.size() &&
fIsFractionRuleSet == rhs.fIsFractionRuleSet &&
name == rhs.name &&
util_equalRules(negativeNumberRule, rhs.negativeNumberRule) &&
util_equalRules(fractionRules[0], rhs.fractionRules[0]) &&
util_equalRules(fractionRules[1], rhs.fractionRules[1]) &&
util_equalRules(fractionRules[2], rhs.fractionRules[2])) {
for (uint32_t i = 0; i < rules.size(); ++i) {
if (*rules[i] != *rhs.rules[i]) {
return FALSE;
}
}
return TRUE;
}
return FALSE;
}
void
NFRuleSet::format(llong number, UnicodeString& toAppendTo, int32_t pos) const
{
NFRule *rule = findNormalRule(number);
rule->doFormat(number, toAppendTo, pos);
}
void
NFRuleSet::format(double number, UnicodeString& toAppendTo, int32_t pos) const
{
NFRule *rule = findDoubleRule(number);
rule->doFormat(number, toAppendTo, pos);
}
NFRule*
NFRuleSet::findDoubleRule(double number) const
{
// if this is a fraction rule set, use findFractionRuleSetRule()
if (isFractionRuleSet()) {
return findFractionRuleSetRule(number);
}
// if the number is negative, return the negative number rule
// (if there isn't a negative-number rule, we pretend it's a
// positive number)
if (number < 0) {
if (negativeNumberRule) {
return negativeNumberRule;
} else {
number = -number;
}
}
// if the number isn't an integer, we use one of the fraction rules...
if (number != uprv_floor(number)) {
// if the number is between 0 and 1, return the proper
// fraction rule
if (number < 1 && fractionRules[1]) {
return fractionRules[1];
}
// otherwise, return the improper fraction rule
else if (fractionRules[0]) {
return fractionRules[0];
}
}
// if there's a master rule, use it to format the number
if (fractionRules[2]) {
return fractionRules[2];
}
// and if we haven't yet returned a rule, use findNormalRule()
// to find the applicable rule
llong r = number + 0.5;
return findNormalRule(r);
}
NFRule *
NFRuleSet::findNormalRule(llong number) const
{
// if this is a fraction rule set, use findFractionRuleSetRule()
// to find the rule (we should only go into this clause if the
// value is 0)
if (fIsFractionRuleSet) {
return findFractionRuleSetRule(llong_asDouble(number));
}
// if the number is negative, return the negative-number rule
// (if there isn't one, pretend the number is positive)
if (number < 0) {
if (negativeNumberRule) {
return negativeNumberRule;
} else {
number = -number;
}
}
// we have to repeat the preceding two checks, even though we
// do them in findRule(), because the version of format() that
// takes a long bypasses findRule() and goes straight to this
// function. This function does skip the fraction rules since
// we know the value is an integer (it also skips the master
// rule, since it's considered a fraction rule. Skipping the
// master rule in this function is also how we avoid infinite
// recursion)
// binary-search the rule list for the applicable rule
// (a rule is used for all values from its base value to
// the next rule's base value)
int32_t lo = 0;
int32_t hi = rules.size();
while (lo < hi) {
int32_t mid = (lo + hi) / 2;
if (rules[mid]->getBaseValue() == number) {
return rules[mid];
}
else if (rules[mid]->getBaseValue() > number) {
hi = mid;
}
else {
lo = mid + 1;
}
}
NFRule *result = rules[hi - 1];
// use shouldRollBack() to see whether we need to invoke the
// rollback rule (see shouldRollBack()'s documentation for
// an explanation of the rollback rule). If we do, roll back
// one rule and return that one instead of the one we'd normally
// return
if (result->shouldRollBack(llong_asDouble(number))) {
result = rules[hi - 2];
}
return result;
}
/**
* If this rule is a fraction rule set, this function is used by
* findRule() to select the most appropriate rule for formatting
* the number. Basically, the base value of each rule in the rule
* set is treated as the denominator of a fraction. Whichever
* denominator can produce the fraction closest in value to the
* number passed in is the result. If there's a tie, the earlier
* one in the list wins. (If there are two rules in a row with the
* same base value, the first one is used when the numerator of the
* fraction would be 1, and the second rule is used the rest of the
* time.
* @param number The number being formatted (which will always be
* a number between 0 and 1)
* @return The rule to use to format this number
*/
NFRule*
NFRuleSet::findFractionRuleSetRule(double number) const
{
// the obvious way to do this (multiply the value being formatted
// by each rule's base value until you get an integral result)
// doesn't work because of rounding error. This method is more
// accurate
// find the least common multiple of the rules' base values
// and multiply this by the number being formatted. This is
// all the precision we need, and we can do all of the rest
// of the math using integer arithmetic
llong leastCommonMultiple = rules[0]->getBaseValue();
llong numerator;
{
for (uint32_t i = 1; i < rules.size(); ++i) {
leastCommonMultiple = util_lcm(leastCommonMultiple, rules[i]->getBaseValue());
}
numerator = number * llong_asDouble(leastCommonMultiple) + 0.5;
}
// for each rule, do the following...
llong tempDifference;
llong difference = llong::kMaxValue;
int32_t winner = 0;
for (uint32_t i = 0; i < rules.size(); ++i) {
// "numerator" is the numerator of the fraction if the
// denominator is the LCD. The numerator if the rule's
// base value is the denominator is "numerator" times the
// base value divided bythe LCD. Here we check to see if
// that's an integer, and if not, how close it is to being
// an integer.
tempDifference = numerator * rules[i]->getBaseValue() % leastCommonMultiple;
// normalize the result of the above calculation: we want
// the numerator's distance from the CLOSEST multiple
// of the LCD
if (leastCommonMultiple - tempDifference < tempDifference) {
tempDifference = leastCommonMultiple - tempDifference;
}
// if this is as close as we've come, keep track of how close
// that is, and the line number of the rule that did it. If
// we've scored a direct hit, we don't have to look at any more
// rules
if (tempDifference < difference) {
difference = tempDifference;
winner = i;
if (difference == 0) {
break;
}
}
}
// if we have two successive rules that both have the winning base
// value, then the first one (the one we found above) is used if
// the numerator of the fraction is 1 and the second one is used if
// the numerator of the fraction is anything else (this lets us
// do things like "one third"/"two thirds" without haveing to define
// a whole bunch of extra rule sets)
if ((unsigned)(winner + 1) < rules.size() &&
rules[winner + 1]->getBaseValue() == rules[winner]->getBaseValue()) {
double n = llong_asDouble(rules[winner]->getBaseValue()) * number;
if (n < 0.5 || n >= 2) {
++winner;
}
}
// finally, return the winning rule
return rules[winner];
}
/**
* Parses a string. Matches the string to be parsed against each
* of its rules (with a base value less than upperBound) and returns
* the value produced by the rule that matched the most charcters
* in the source string.
* @param text The string to parse
* @param parsePosition The initial position is ignored and assumed
* to be 0. On exit, this object has been updated to point to the
* first character position this rule set didn't consume.
* @param upperBound Limits the rules that can be allowed to match.
* Only rules whose base values are strictly less than upperBound
* are considered.
* @return The numerical result of parsing this string. This will
* be the matching rule's base value, composed appropriately with
* the results of matching any of its substitutions. The object
* will be an instance of Long if it's an integral value; otherwise,
* it will be an instance of Double. This function always returns
* a valid object: If nothing matched the input string at all,
* this function returns new Long(0), and the parse position is
* left unchanged.
*/
#ifdef RBNF_DEBUG
static void dumpUS(FILE* f, const UnicodeString& us) {
int len = us.length();
char* buf = new char[len+1];
us.extract(0, len, buf);
buf[len] = 0;
fprintf(f, "%s", buf);
delete[] buf;
}
#endif
UBool
NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, Formattable& result) const
{
// try matching each rule in the rule set against the text being
// parsed. Whichever one matches the most characters is the one
// that determines the value we return.
result.setLong(0);
// dump out if there's no text to parse
if (text.length() == 0) {
return 0;
}
ParsePosition highWaterMark;
ParsePosition workingPos = pos;
#ifdef RBNF_DEBUG
fprintf(stderr, "<nfrs> %x '", this);
dumpUS(stderr, name);
fprintf(stderr, "' text '");
dumpUS(stderr, text);
fprintf(stderr, "'\n");
fprintf(stderr, " parse negative: %d\n", this, negativeNumberRule != 0);
#endif
// start by trying the negative number rule (if there is one)
if (negativeNumberRule) {
Formattable tempResult;
#ifdef RBNF_DEBUG
fprintf(stderr, " <nfrs before negative> %x ub: %g\n", negativeNumberRule, upperBound);
#endif
UBool success = negativeNumberRule->doParse(text, workingPos, 0, upperBound, tempResult);
#ifdef RBNF_DEBUG
fprintf(stderr, " <nfrs after negative> success: %d wpi: %d\n", success, workingPos.getIndex());
#endif
if (success && workingPos.getIndex() > highWaterMark.getIndex()) {
result = tempResult;
highWaterMark = workingPos;
}
workingPos = pos;
}
#ifdef RBNF_DEBUG
fprintf(stderr, "<nfrs> continue fractional with text '");
dumpUS(stderr, text);
fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex());
#endif
// then try each of the fraction rules
{
for (int i = 0; i < 3; i++) {
if (fractionRules[i]) {
Formattable tempResult;
UBool success = fractionRules[i]->doParse(text, workingPos, 0, upperBound, tempResult);
if (success && (workingPos.getIndex() > highWaterMark.getIndex())) {
result = tempResult;
highWaterMark = workingPos;
}
workingPos = pos;
}
}
}
#ifdef RBNF_DEBUG
fprintf(stderr, "<nfrs> continue other with text '");
dumpUS(stderr, text);
fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex());
#endif
// finally, go through the regular rules one at a time. We start
// at the end of the list because we want to try matching the most
// sigificant rule first (this helps ensure that we parse
// "five thousand three hundred six" as
// "(five thousand) (three hundred) (six)" rather than
// "((five thousand three) hundred) (six)"). Skip rules whose
// base values are higher than the upper bound (again, this helps
// limit ambiguity by making sure the rules that match a rule's
// are less significant than the rule containing the substitutions)/
{
llong ub(upperBound);
#ifdef RBNF_DEBUG
{
char ubstr[64];
lltoa(ub, ubstr, 64);
fprintf(stderr, "ub: %g, ll: %s(%x/%x)\n", upperBound, ubstr, ub.hi, ub.lo);
}
#endif
for (int32_t i = rules.size(); --i >= 0 && highWaterMark.getIndex() < text.length();) {
if ((!fIsFractionRuleSet) && (rules[i]->getBaseValue() >= ub)) {
continue;
}
Formattable tempResult;
UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, tempResult);
if (success && workingPos.getIndex() > highWaterMark.getIndex()) {
result = tempResult;
highWaterMark = workingPos;
}
workingPos = pos;
}
}
#ifdef RBNF_DEBUG
fprintf(stderr, "<nfrs> exit\n");
#endif
// finally, update the parse postion we were passed to point to the
// first character we didn't use, and return the result that
// corresponds to that string of characters
pos = highWaterMark;
return 1;
}
void
NFRuleSet::appendRules(UnicodeString& result) const
{
// the rule set name goes first...
result.append(name);
result.append(gColon);
result.append(gLineFeed);
// followed by the regular rules...
for (uint32_t i = 0; i < rules.size(); i++) {
result.append(gFourSpaces);
rules[i]->appendRuleText(result);
result.append(gLineFeed);
}
// followed by the special rules (if they exist)
if (negativeNumberRule) {
result.append(gFourSpaces);
negativeNumberRule->appendRuleText(result);
result.append(gLineFeed);
}
{
for (uint32_t i = 0; i < 3; ++i) {
if (fractionRules[i]) {
result.append(gFourSpaces);
fractionRules[i]->appendRuleText(result);
result.append(gLineFeed);
}
}
}
}
U_NAMESPACE_END

61
icu4c/source/i18n/nfrs.h Normal file
View File

@ -0,0 +1,61 @@
/*
*******************************************************************************
* Copyright (C) 1997-2001, International Business Machines Corporation and others. All Rights Reserved.
*******************************************************************************
*/
#ifndef NFRS_H
#define NFRS_H
#include "unicode/utypes.h"
#include "unicode/umisc.h"
#include "unicode/rbnf.h"
#include "nfrlist.h"
#include "llong.h"
U_NAMESPACE_BEGIN
class NFRuleSet {
public:
NFRuleSet(UnicodeString* descriptions, int32_t index, UErrorCode& status);
void parseRules(UnicodeString& rules, const RuleBasedNumberFormat* owner, UErrorCode& status);
void makeIntoFractionRuleSet() { fIsFractionRuleSet = TRUE; }
~NFRuleSet();
UBool operator==(const NFRuleSet& rhs) const;
UBool operator!=(const NFRuleSet& rhs) const { return !operator==(rhs); }
UBool isPublic() const { return fIsPublic; }
UBool isFractionRuleSet() const { return fIsFractionRuleSet; }
void getName(UnicodeString& result) const { result.setTo(name); }
UBool isNamed(const UnicodeString& _name) const { return this->name == _name; }
void format(llong number, UnicodeString& toAppendTo, int32_t pos) const;
void format(double number, UnicodeString& toAppendTo, int32_t pos) const;
UBool parse(const UnicodeString& text, ParsePosition& pos, double upperBound, Formattable& result) const;
void appendRules(UnicodeString& result) const; // toString
private:
NFRule * findNormalRule(llong number) const;
NFRule * findDoubleRule(double number) const;
NFRule * findFractionRuleSetRule(double number) const;
private:
UnicodeString name;
NFRuleList rules;
NFRule *negativeNumberRule;
NFRule *fractionRules[3];
UBool fIsFractionRuleSet;
UBool fIsPublic;
};
U_NAMESPACE_END
// NFRS_H
#endif

1377
icu4c/source/i18n/nfrule.cpp Normal file

File diff suppressed because it is too large Load Diff

104
icu4c/source/i18n/nfrule.h Normal file
View File

@ -0,0 +1,104 @@
/*
*******************************************************************************
* Copyright (C) 1997-2001, International Business Machines Corporation and others. All Rights Reserved.
*******************************************************************************
*/
#ifndef NFRULE_H
#define NFRULE_H
#include "unicode/utypes.h"
#include "unicode/unistr.h"
#include "llong.h"
#include <math.h>
U_NAMESPACE_BEGIN
class FieldPosition;
class Formattable;
class NFRuleList;
class NFRuleSet;
class NFSubstitution;
class ParsePosition;
class RuleBasedNumberFormat;
class UnicodeString;
class NFRule {
public:
enum ERuleType {
kNoBase = 0,
kNegativeNumberRule = -1,
kImproperFractionRule = -2,
kProperFractionRule = -3,
kMasterRule = -4,
kOtherRule = -5
};
static void makeRules(UnicodeString& definition,
const NFRuleSet* ruleSet,
const NFRule* predecessor,
const RuleBasedNumberFormat* rbnf,
NFRuleList& ruleList,
UErrorCode& status);
NFRule(const RuleBasedNumberFormat* rbnf);
~NFRule();
UBool operator==(const NFRule& rhs) const;
UBool operator!=(const NFRule& rhs) const { return !operator==(rhs); }
ERuleType getType() const { return (ERuleType)(baseValue <= 0 ? llong_asInt(baseValue) : kOtherRule); }
void setType(ERuleType ruleType) { baseValue = (int32_t)ruleType; }
llong getBaseValue() const { return baseValue; }
void setBaseValue(llong value);
double getDivisor() const { return pow(radix, exponent); }
void doFormat(llong number, UnicodeString& toAppendTo, int32_t pos) const;
void doFormat(double number, UnicodeString& toAppendTo, int32_t pos) const;
UBool doParse(const UnicodeString& text,
ParsePosition& pos,
UBool isFractional,
double upperBound,
Formattable& result) const;
UBool shouldRollBack(double number) const;
void appendRuleText(UnicodeString& result) const;
private:
void parseRuleDescriptor(UnicodeString& descriptor, UErrorCode& status);
void extractSubstitutions(const NFRuleSet* ruleSet, const NFRule* predecessor, const RuleBasedNumberFormat* rbnf, UErrorCode& status);
NFSubstitution* extractSubstitution(const NFRuleSet* ruleSet, const NFRule* predecessor, const RuleBasedNumberFormat* rbnf, UErrorCode& status);
int16_t expectedExponent() const;
int32_t indexOfAny(const UnicodeString* strings[]) const;
double matchToDelimiter(const UnicodeString& text, int32_t startPos, double baseValue,
const UnicodeString& delimiter, ParsePosition& pp, const NFSubstitution* sub,
double upperBound) const;
void stripPrefix(UnicodeString& text, const UnicodeString& prefix, ParsePosition& pp) const;
int32_t prefixLength(const UnicodeString& str, const UnicodeString& prefix) const;
UBool allIgnorable(const UnicodeString& str) const;
int32_t findText(const UnicodeString& str, const UnicodeString& key,
int32_t startingAt, int32_t* resultCount) const;
private:
llong baseValue;
int16_t radix;
int16_t exponent;
UnicodeString ruleText;
NFSubstitution* sub1;
NFSubstitution* sub2;
const RuleBasedNumberFormat* formatter;
};
U_NAMESPACE_END
// NFRULE_H
#endif

View File

@ -0,0 +1,892 @@
#include "nfsubs.h"
static const UChar gLessThan = 0x003c;
static const UChar gEquals = 0x003d;
static const UChar gGreaterThan = 0x003e;
static const UChar gPercent = 0x0025;
static const UChar gPound = 0x0023;
static const UChar gZero = 0x0030;
static const UChar gSpace = 0x0020;
static const UnicodeString gEqualsEquals("==");
static const UnicodeString gGreaterGreaterGreaterThan(">>>");
static const UnicodeString gGreaterGreaterThan(">>");
NFSubstitution*
NFSubstitution::makeSubstitution(int32_t pos,
const NFRule* rule,
const NFRule* predecessor,
const NFRuleSet* ruleSet,
const RuleBasedNumberFormat* formatter,
const UnicodeString& description,
UErrorCode& status)
{
// if the description is empty, return a NullSubstitution
if (description.length() == 0) {
return new NullSubstitution(pos, ruleSet, formatter, description, status);
}
switch (description.charAt(0)) {
// if the description begins with '<'...
case gLessThan:
// throw an exception if the rule is a negative number
// rule
if (rule->getBaseValue() == NFRule::kNegativeNumberRule) {
// throw new IllegalArgumentException("<< not allowed in negative-number rule");
status = U_PARSE_ERROR;
return NULL;
}
// if the rule is a fraction rule, return an
// IntegralPartSubstitution
else if (rule->getBaseValue() == NFRule::kImproperFractionRule
|| rule->getBaseValue() == NFRule::kProperFractionRule
|| rule->getBaseValue() == NFRule::kMasterRule) {
return new IntegralPartSubstitution(pos, ruleSet, formatter, description, status);
}
// if the rule set containing the rule is a fraction
// rule set, return a NumeratorSubstitution
else if (ruleSet->isFractionRuleSet()) {
return new NumeratorSubstitution(pos, llong_asDouble(rule->getBaseValue()),
formatter->getDefaultRuleSet(), formatter, description, status);
}
// otherwise, return a MultiplierSubstitution
else {
return new MultiplierSubstitution(pos, rule->getDivisor(), ruleSet,
formatter, description, status);
}
// if the description begins with '>'...
case gGreaterThan:
// if the rule is a negative-number rule, return
// an AbsoluteValueSubstitution
if (rule->getBaseValue() == NFRule::kNegativeNumberRule) {
return new AbsoluteValueSubstitution(pos, ruleSet, formatter, description, status);
}
// if the rule is a fraction rule, return a
// FractionalPartSubstitution
else if (rule->getBaseValue() == NFRule::kImproperFractionRule
|| rule->getBaseValue() == NFRule::kProperFractionRule
|| rule->getBaseValue() == NFRule::kMasterRule) {
return new FractionalPartSubstitution(pos, ruleSet, formatter, description, status);
}
// if the rule set owning the rule is a fraction rule set,
// throw an exception
else if (ruleSet->isFractionRuleSet()) {
// throw new IllegalArgumentException(">> not allowed in fraction rule set");
status = U_PARSE_ERROR;
return NULL;
}
// otherwise, return a ModulusSubstitution
else {
return new ModulusSubstitution(pos, rule->getDivisor(), predecessor,
ruleSet, formatter, description, status);
}
// if the description begins with '=', always return a
// SameValueSubstitution
case gEquals:
return new SameValueSubstitution(pos, ruleSet, formatter, description, status);
// and if it's anything else, throw an exception
default:
// throw new IllegalArgumentException("Illegal substitution character");
status = U_PARSE_ERROR;
}
return NULL;
}
NFSubstitution::NFSubstitution(int32_t _pos,
const NFRuleSet* _ruleSet,
const RuleBasedNumberFormat* formatter,
const UnicodeString& description,
UErrorCode& status)
: pos(_pos), ruleSet(NULL), numberFormat(NULL)
{
// the description should begin and end with the same character.
// If it doesn't that's a syntax error. Otherwise,
// makeSubstitution() was the only thing that needed to know
// about these characters, so strip them off
UnicodeString workingDescription(description);
if (description.length() >= 2 && description.charAt(0) == description.charAt(
description.length() - 1)) {
workingDescription.remove(description.length() - 1, 1);
workingDescription.remove(0, 1);
}
else if (description.length() != 0) {
// throw new IllegalArgumentException("Illegal substitution syntax");
status = U_PARSE_ERROR;
return;
}
// if the description was just two paired token characters
// (i.e., "<<" or ">>"), it uses the rule set it belongs to to
// format its result
if (workingDescription.length() == 0) {
this->ruleSet = _ruleSet;
}
// if the description contains a rule set name, that's the rule
// set we use to format the result: get a reference to the
// names rule set
else if (workingDescription.charAt(0) == gPercent) {
this->ruleSet = formatter->findRuleSet(workingDescription, status);
}
// if the description begins with 0 or #, treat it as a
// DecimalFormat pattern, and initialize a DecimalFormat with
// that pattern (then set it to use the DecimalFormatSymbols
// belonging to our formatter)
else if (workingDescription.charAt(0) == gPound || workingDescription.charAt(0) ==gZero) {
this->numberFormat = new DecimalFormat(workingDescription, *(formatter->getDecimalFormatSymbols()), status);
// this->numberFormat->setDecimalFormatSymbols(formatter->getDecimalFormatSymbols());
}
// if the description is ">>>", this substitution bypasses the
// usual rule-search process and always uses the rule that precedes
// it in its own rule set's rule list (this is used for place-value
// notations: formats where you want to see a particular part of
// a number even when it's 0)
else if (workingDescription.charAt(0) == gGreaterThan) {
// this causes problems when >>> is used in a frationalPartSubstitution
// this->ruleSet = NULL;
this->ruleSet = _ruleSet;
this->numberFormat = NULL;
}
// and of the description is none of these things, it's a syntax error
else {
// throw new IllegalArgumentException("Illegal substitution syntax");
status = U_PARSE_ERROR;
}
}
NFSubstitution::~NFSubstitution()
{
// cast away const
delete (NumberFormat*)numberFormat; numberFormat = NULL;
}
/**
* Set's the substitution's divisor. Used by NFRule.setBaseValue().
* A no-op for all substitutions except multiplier and modulus
* substitutions.
* @param radix The radix of the divisor
* @param exponent The exponent of the divisor
*/
void
NFSubstitution::setDivisor(int32_t radix, int32_t exponent) {
// a no-op for all substitutions except multiplier and modulus substitutions
}
//-----------------------------------------------------------------------
// boilerplate
//-----------------------------------------------------------------------
char NFSubstitution::fgClassID;
UClassID
NFSubstitution::getDynamicClassID() const {
return getStaticClassID();
}
/**
* Compares two substitutions for equality
* @param The substitution to compare this one to
* @return true if the two substitutions are functionally equivalent
*/
UBool
NFSubstitution::operator==(const NFSubstitution& rhs) const
{
// compare class and all of the fields all substitutions have
// in common
// this should be called by subclasses before their own equality tests
return getDynamicClassID() == rhs.getDynamicClassID()
&& pos == rhs.pos
&& ruleSet == rhs.ruleSet
&& *numberFormat == *rhs.numberFormat;
}
/**
* Returns a textual description of the substitution
* @return A textual description of the substitution. This might
* not be identical to the description it was created from, but
* it'll produce the same result.
*/
void
NFSubstitution::toString(UnicodeString& text) const {
// use tokenChar() to get the character at the beginning and
// end of the substitutin token. In between them will go
// either the name of the rule set it uses, or the pattern of
// the DecimalFormat it uses
text.remove();
text.append(tokenChar());
UnicodeString temp;
if (ruleSet != NULL) {
ruleSet->getName(temp);
} else {
numberFormat->toPattern(temp);
}
text.append(temp);
text.append(tokenChar());
}
//-----------------------------------------------------------------------
// formatting
//-----------------------------------------------------------------------
/**
* Performs a mathematical operation on the number, formats it using
* either ruleSet or decimalFormat, and inserts the result into
* toInsertInto.
* @param number The number being formatted.
* @param toInsertInto The string we insert the result into
* @param pos The position in toInsertInto where the owning rule's
* rule text begins (this value is added to this substitution's
* position to determine exactly where to insert the new text)
*/
void
NFSubstitution::doSubstitution(llong number, UnicodeString& toInsertInto, int32_t _pos) const
{
if (ruleSet != NULL) {
// perform a transformation on the number that is dependent
// on the type of substitution this is, then just call its
// rule set's format() method to format the result
llong numberToFormat = transformNumber(number);
ruleSet->format(numberToFormat, toInsertInto, _pos + this->pos);
} else {
// or perform the transformation on the number (preserving
// the result's fractional part if the formatter it set
// to show it), then use that formatter's format() method
// to format the result
double numberToFormat = transformNumber(llong_asDouble(number));
if (numberFormat->getMaximumFractionDigits() == 0) {
numberToFormat = floor(numberToFormat);
}
UnicodeString temp;
numberFormat->format(numberToFormat, temp);
toInsertInto.insert(_pos + this->pos, temp);
}
}
/**
* Performs a mathematical operation on the number, formats it using
* either ruleSet or decimalFormat, and inserts the result into
* toInsertInto.
* @param number The number being formatted.
* @param toInsertInto The string we insert the result into
* @param pos The position in toInsertInto where the owning rule's
* rule text begins (this value is added to this substitution's
* position to determine exactly where to insert the new text)
*/
void
NFSubstitution::doSubstitution(double number, UnicodeString& toInsertInto, int32_t _pos) const {
// perform a transformation on the number being formatted that
// is dependent on the type of substitution this is
double numberToFormat = transformNumber(number);
// if the result is an integer, from here on out we work in integer
// space (saving time and memory and preserving accuracy)
if (numberToFormat == floor(numberToFormat) && ruleSet != NULL) {
ruleSet->format(llong(numberToFormat), toInsertInto, _pos + this->pos);
// if the result isn't an integer, then call either our rule set's
// format() method or our DecimalFormat's format() method to
// format the result
} else {
if (ruleSet != NULL) {
ruleSet->format(numberToFormat, toInsertInto, _pos + this->pos);
} else {
UnicodeString temp;
numberFormat->format(numberToFormat, temp);
toInsertInto.insert(_pos + this->pos, temp);
}
}
}
//-----------------------------------------------------------------------
// parsing
//-----------------------------------------------------------------------
/**
* Parses a string using the rule set or DecimalFormat belonging
* to this substitution. If there's a match, a mathematical
* operation (the inverse of the one used in formatting) is
* performed on the result of the parse and the value passed in
* and returned as the result. The parse position is updated to
* point to the first unmatched character in the string.
* @param text The string to parse
* @param parsePosition On entry, ignored, but assumed to be 0.
* On exit, this is updated to point to the first unmatched
* character (or 0 if the substitution didn't match)
* @param baseValue A partial parse result that should be
* combined with the result of this parse
* @param upperBound When searching the rule set for a rule
* matching the string passed in, only rules with base values
* lower than this are considered
* @param lenientParse If true and matching against rules fails,
* the substitution will also try matching the text against
* numerals using a default-costructed NumberFormat. If false,
* no extra work is done. (This value is false whenever the
* formatter isn't in lenient-parse mode, but is also false
* under some conditions even when the formatter _is_ in
* lenient-parse mode.)
* @return If there's a match, this is the result of composing
* baseValue with whatever was returned from matching the
* characters. This will be either a Long or a Double. If there's
* no match this is new Long(0) (not null), and parsePosition
* is left unchanged.
*/
UBool
NFSubstitution::doParse(const UnicodeString& text,
ParsePosition& parsePosition,
double baseValue,
double upperBound,
UBool lenientParse,
Formattable& result) const
{
#ifdef RBNF_DEBUG
fprintf(stderr, "<nfsubs> %x bv: %g ub: %g\n", this, baseValue, upperBound);
#endif
// figure out the highest base value a rule can have and match
// the text being parsed (this varies according to the type of
// substitutions: multiplier, modulus, and numerator substitutions
// restrict the search to rules with base values lower than their
// own; same-value substitutions leave the upper bound wherever
// it was, and the others allow any rule to match
upperBound = calcUpperBound(upperBound);
// use our rule set to parse the text. If that fails and
// lenient parsing is enabled (this is always false if the
// formatter's lenient-parsing mode is off, but it may also
// be false even when the formatter's lenient-parse mode is
// on), then also try parsing the text using a default-
// constructed NumberFormat
if (ruleSet != NULL) {
ruleSet->parse(text, parsePosition, upperBound, result);
if (lenientParse && !ruleSet->isFractionRuleSet() && parsePosition.getIndex() == 0) {
UErrorCode status = U_ZERO_ERROR;
NumberFormat* fmt = NumberFormat::createInstance(status);
if (U_SUCCESS(status)) {
fmt->parse(text, result, parsePosition);
}
delete fmt;
}
// ...or use our DecimalFormat to parse the text
} else {
numberFormat->parse(text, result, parsePosition);
}
// if the parse was successful, we've already advanced the caller's
// parse position (this is the one function that doesn't have one
// of its own). Derive a parse result and return it as a Long,
// if possible, or a Double
if (parsePosition.getIndex() != 0) {
double tempResult = (result.getType() == Formattable::kLong) ?
(double)result.getLong() :
result.getDouble();
// composeRuleValue() produces a full parse result from
// the partial parse result passed to this function from
// the caller (this is either the owning rule's base value
// or the partial result obtained from composing the
// owning rule's base value with its other substitution's
// parse result) and the partial parse result obtained by
// matching the substitution (which will be the same value
// the caller would get by parsing just this part of the
// text with RuleBasedNumberFormat.parse() ). How the two
// values are used to derive the full parse result depends
// on the types of substitutions: For a regular rule, the
// ultimate result is its multiplier substitution's result
// times the rule's divisor (or the rule's base value) plus
// the modulus substitution's result (which will actually
// supersede part of the rule's base value). For a negative-
// number rule, the result is the negative of its substitution's
// result. For a fraction rule, it's the sum of its two
// substitution results. For a rule in a fraction rule set,
// it's the numerator substitution's result divided by
// the rule's base value. Results from same-value substitutions
// propagate back upard, and null substitutions don't affect
// the result.
tempResult = composeRuleValue(tempResult, baseValue);
result.setDouble(tempResult);
return TRUE;
// if the parse was UNsuccessful, return 0
} else {
result.setLong(0);
return FALSE;
}
}
UBool
NFSubstitution::isNullSubstitution() const {
return FALSE;
}
/**
* Returns true if this is a modulus substitution. (We didn't do this
* with instanceof partially because it causes source files to
* proliferate and partially because we have to port this to C++.)
* @return true if this object is an instance of ModulusSubstitution
*/
UBool
NFSubstitution::isModulusSubstitution() const {
return FALSE;
}
//===================================================================
// SameValueSubstitution
//===================================================================
/**
* A substitution that passes the value passed to it through unchanged.
* Represented by == in rule descriptions.
*/
SameValueSubstitution::SameValueSubstitution(int32_t _pos,
const NFRuleSet* _ruleSet,
const RuleBasedNumberFormat* formatter,
const UnicodeString& description,
UErrorCode& status)
: NFSubstitution(_pos, _ruleSet, formatter, description, status)
{
if (description == gEqualsEquals) {
// throw new IllegalArgumentException("== is not a legal token");
status = U_PARSE_ERROR;
}
}
char SameValueSubstitution::fgClassID;
UClassID
SameValueSubstitution::getDynamicClassID() const {
return getStaticClassID();
}
//===================================================================
// MultiplierSubstitution
//===================================================================
char MultiplierSubstitution::fgClassID;
UClassID
MultiplierSubstitution::getDynamicClassID() const {
return getStaticClassID();
}
UBool MultiplierSubstitution::operator==(const NFSubstitution& rhs) const
{
return NFSubstitution::operator==(rhs) &&
divisor == ((const MultiplierSubstitution*)&rhs)->divisor;
}
//===================================================================
// ModulusSubstitution
//===================================================================
/**
* A substitution that divides the number being formatted by the its rule's
* divisor and formats the remainder. Represented by "&gt;&gt;" in a
* regular rule.
*/
ModulusSubstitution::ModulusSubstitution(int32_t _pos,
double _divisor,
const NFRule* predecessor,
const NFRuleSet* _ruleSet,
const RuleBasedNumberFormat* formatter,
const UnicodeString& description,
UErrorCode& status)
: NFSubstitution(_pos, _ruleSet, formatter, description, status)
, divisor(_divisor)
, ruleToUse(NULL)
{
ldivisor = _divisor;
// the owning rule's divisor controls the behavior of this
// substitution: rather than keeping a backpointer to the rule,
// we keep a copy of the divisor
if (description == gGreaterGreaterGreaterThan) {
// the >>> token doesn't alter how this substituion calculates the
// values it uses for formatting and parsing, but it changes
// what's done with that value after it's obtained: >>> short-
// circuits the rule-search process and goes straight to the
// specified rule to format the substitution value
ruleToUse = predecessor;
}
}
char ModulusSubstitution::fgClassID;
UClassID
ModulusSubstitution::getDynamicClassID() const {
return getStaticClassID();
}
UBool ModulusSubstitution::operator==(const NFSubstitution& rhs) const
{
return NFSubstitution::operator==(rhs) &&
divisor == ((const ModulusSubstitution*)&rhs)->divisor &&
ruleToUse == ((const ModulusSubstitution*)&rhs)->ruleToUse;
}
//-----------------------------------------------------------------------
// formatting
//-----------------------------------------------------------------------
/**
* If this is a &gt;&gt;&gt; substitution, use ruleToUse to fill in
* the substitution. Otherwise, just use the superclass function.
* @param number The number being formatted
* @toInsertInto The string to insert the result of this substitution
* into
* @param pos The position of the rule text in toInsertInto
*/
void
ModulusSubstitution::doSubstitution(llong number, UnicodeString& toInsertInto, int32_t _pos) const
{
// if this isn't a >>> substitution, just use the inherited version
// of this function (which uses either a rule set or a DecimalFormat
// to format its substitution value)
if (ruleToUse == NULL) {
NFSubstitution::doSubstitution(number, toInsertInto, _pos);
// a >>> substitution goes straight to a particular rule to
// format the substitution value
} else {
llong numberToFormat = transformNumber(number);
ruleToUse->doFormat(numberToFormat, toInsertInto, _pos + getPos());
}
}
/**
* If this is a &gt;&gt;&gt; substitution, use ruleToUse to fill in
* the substitution. Otherwise, just use the superclass function.
* @param number The number being formatted
* @toInsertInto The string to insert the result of this substitution
* into
* @param pos The position of the rule text in toInsertInto
*/
void
ModulusSubstitution::doSubstitution(double number, UnicodeString& toInsertInto, int32_t _pos) const
{
// if this isn't a >>> substitution, just use the inherited version
// of this function (which uses either a rule set or a DecimalFormat
// to format its substitution value)
if (ruleToUse == NULL) {
NFSubstitution::doSubstitution(number, toInsertInto, _pos);
// a >>> substitution goes straight to a particular rule to
// format the substitution value
} else {
double numberToFormat = transformNumber(number);
ruleToUse->doFormat(numberToFormat, toInsertInto, _pos + getPos());
}
}
//-----------------------------------------------------------------------
// parsing
//-----------------------------------------------------------------------
/**
* If this is a &gt;&gt;&gt; substitution, match only against ruleToUse.
* Otherwise, use the superclass function.
* @param text The string to parse
* @param parsePosition Ignored on entry, updated on exit to point to
* the first unmatched character.
* @param baseValue The partial parse result prior to calling this
* routine.
*/
UBool
ModulusSubstitution::doParse(const UnicodeString& text,
ParsePosition& parsePosition,
double baseValue,
double upperBound,
UBool lenientParse,
Formattable& result) const
{
// if this isn't a >>> substitution, we can just use the
// inherited parse() routine to do the parsing
if (ruleToUse == NULL) {
return NFSubstitution::doParse(text, parsePosition, baseValue, upperBound, lenientParse, result);
// but if it IS a >>> substitution, we have to do it here: we
// use the specific rule's doParse() method, and then we have to
// do some of the other work of NFRuleSet.parse()
} else {
ruleToUse->doParse(text, parsePosition, FALSE, upperBound, result);
if (parsePosition.getIndex() != 0) {
double tempResult = result.getDouble();
tempResult = composeRuleValue(tempResult, baseValue);
result.setDouble(tempResult);
}
return TRUE;
}
}
//===================================================================
// IntegralPartSubstitution
//===================================================================
char IntegralPartSubstitution::fgClassID;
UClassID
IntegralPartSubstitution::getDynamicClassID() const {
return getStaticClassID();
}
//===================================================================
// FractionalPartSubstitution
//===================================================================
/**
* Constructs a FractionalPartSubstitution. This object keeps a flag
* telling whether it should format by digits or not. In addition,
* it marks the rule set it calls (if any) as a fraction rule set.
*/
FractionalPartSubstitution::FractionalPartSubstitution(int32_t _pos,
const NFRuleSet* _ruleSet,
const RuleBasedNumberFormat* formatter,
const UnicodeString& description,
UErrorCode& status)
: NFSubstitution(_pos, _ruleSet, formatter, description, status)
, byDigits(FALSE)
, useSpaces(TRUE)
{
// akk, ruleSet can change in superclass constructor
if (description == gGreaterGreaterThan ||
description == gGreaterGreaterGreaterThan ||
_ruleSet == getRuleSet()) {
byDigits = TRUE;
if (description == gGreaterGreaterGreaterThan) {
useSpaces = FALSE;
}
} else {
// cast away const
((NFRuleSet*)getRuleSet())->makeIntoFractionRuleSet();
}
// TODO: Thai doesn't use spaces, so spelling out decimals with
// spaces between the words for each digit is incorrect.
// The rules don't seem to accomodate this, at least I can't figure
// out how to handle it using the rules. Need to provide better
// control over fractional part formatting.
// For now, just check if locale uses the Thai language.
// useSpaces = strcmp(formatter->locale.getLanguage(), "th") != 0;
}
//-----------------------------------------------------------------------
// formatting
//-----------------------------------------------------------------------
/**
* If in "by digits" mode, fills in the substitution one decimal digit
* at a time using the rule set containing this substitution.
* Otherwise, uses the superclass function.
* @param number The number being formatted
* @param toInsertInto The string to insert the result of formatting
* the substitution into
* @param pos The position of the owning rule's rule text in
* toInsertInto
*/
void
FractionalPartSubstitution::doSubstitution(double number, UnicodeString& toInsertInto, int32_t _pos) const
{
// if we're not in "byDigits" mode, just use the inherited
// doSubstitution() routine
if (!byDigits) {
NFSubstitution::doSubstitution(number, toInsertInto, _pos);
// if we're in "byDigits" mode, transform the value into an integer
// by moving the decimal point eight places to the right and
// pulling digits off the right one at a time, formatting each digit
// as an integer using this substitution's owning rule set
// (this is slower, but more accurate, than doing it from the
// other end)
} else {
int32_t numberToFormat = (int32_t)round(transformNumber(number) * pow(10, kMaxDecimalDigits));
// this flag keeps us from formatting trailing zeros. It starts
// out false because we're pulling from the right, and switches
// to true the first time we encounter a non-zero digit
UBool doZeros = FALSE;
for (int32_t i = 0; i < kMaxDecimalDigits; i++) {
int32_t digit = numberToFormat % 10;
if (digit != 0 || doZeros) {
if (doZeros && useSpaces) {
toInsertInto.insert(_pos + getPos(), gSpace);
}
doZeros = TRUE;
getRuleSet()->format(digit, toInsertInto, _pos + getPos());
}
numberToFormat /= 10;
}
}
}
//-----------------------------------------------------------------------
// parsing
//-----------------------------------------------------------------------
/**
* If in "by digits" mode, parses the string as if it were a string
* of individual digits; otherwise, uses the superclass function.
* @param text The string to parse
* @param parsePosition Ignored on entry, but updated on exit to point
* to the first unmatched character
* @param baseValue The partial parse result prior to entering this
* function
* @param upperBound Only consider rules with base values lower than
* this when filling in the substitution
* @param lenientParse If true, try matching the text as numerals if
* matching as words doesn't work
* @return If the match was successful, the current partial parse
* result; otherwise new Long(0). The result is either a Long or
* a Double.
*/
UBool
FractionalPartSubstitution::doParse(const UnicodeString& text,
ParsePosition& parsePosition,
double baseValue,
double upperBound,
UBool lenientParse,
Formattable& resVal) const
{
// if we're not in byDigits mode, we can just use the inherited
// doParse()
if (!byDigits) {
return NFSubstitution::doParse(text, parsePosition, baseValue, 0, lenientParse, resVal);
// if we ARE in byDigits mode, parse the text one digit at a time
// using this substitution's owning rule set (we do this by setting
// upperBound to 10 when calling doParse() ) until we reach
// nonmatching text
} else {
UnicodeString workText(text);
ParsePosition workPos(1);
double result = 0;
int32_t digit;
double p10 = 0.1;
NumberFormat* fmt = NULL;
while (workText.length() > 0 && workPos.getIndex() != 0) {
workPos.setIndex(0);
Formattable temp;
getRuleSet()->parse(workText, workPos, 10, temp);
digit = temp.getType() == Formattable::kLong ?
temp.getLong() :
(int32_t)temp.getDouble();
if (lenientParse && workPos.getIndex() == 0) {
if (!fmt) {
UErrorCode status = U_ZERO_ERROR;
fmt = NumberFormat::createInstance(status);
if (U_FAILURE(status)) {
delete fmt;
fmt = NULL;
}
}
if (fmt) {
fmt->parse(workText, temp, workPos);
digit = temp.getLong();
}
}
if (workPos.getIndex() != 0) {
result += digit * p10;
p10 /= 10;
parsePosition.setIndex(parsePosition.getIndex() + workPos.getIndex());
workText.removeBetween(0, workPos.getIndex());
while (workText.length() > 0 && workText.charAt(0) == gSpace) {
workText.removeBetween(0, 1);
parsePosition.setIndex(parsePosition.getIndex() + 1);
}
}
}
delete fmt;
result = composeRuleValue(result, baseValue);
resVal.setDouble(result);
return TRUE;
}
}
UBool
FractionalPartSubstitution::operator==(const NFSubstitution& rhs) const
{
return NFSubstitution::operator==(rhs) &&
((const FractionalPartSubstitution*)&rhs)->byDigits == byDigits;
}
char FractionalPartSubstitution::fgClassID;
UClassID
FractionalPartSubstitution::getDynamicClassID() const {
return getStaticClassID();
}
//===================================================================
// AbsoluteValueSubstitution
//===================================================================
char AbsoluteValueSubstitution::fgClassID;
UClassID
AbsoluteValueSubstitution::getDynamicClassID() const {
return getStaticClassID();
}
//===================================================================
// NumeratorSubstitution
//===================================================================
UBool
NumeratorSubstitution::operator==(const NFSubstitution& rhs) const
{
return NFSubstitution::operator==(rhs) &&
denominator == ((const NumeratorSubstitution*)&rhs)->denominator;
}
char NumeratorSubstitution::fgClassID;
UClassID
NumeratorSubstitution::getDynamicClassID() const {
return getStaticClassID();
}
//===================================================================
// NullSubstitution
//===================================================================
char NullSubstitution::fgClassID;
UClassID
NullSubstitution::getDynamicClassID() const {
return getStaticClassID();
}

498
icu4c/source/i18n/nfsubs.h Normal file
View File

@ -0,0 +1,498 @@
/*
*******************************************************************************
* Copyright (C) 1997-2001, International Business Machines Corporation and others. All Rights Reserved.
*******************************************************************************
*/
#ifndef NFSUBS_H
#define NFSUBS_H
#include "unicode/utypes.h"
#include "unicode/decimfmt.h"
#include "nfrs.h"
#include "nfrule.h"
#include "llong.h"
U_NAMESPACE_BEGIN
static double MAX_DOUBLE = 1.7976931348623157e+308;
static double java_fmod(double n, double d)
{
// c doesn't define '%' for floating point, but java does.
// from the java language spec 15.17:
// "In the remaining cases, where neither an infinity, nor a zero,
// nor NaN is involved, the floating-point remainder r from the
// division of a dividend n by a divisor d is defined by the
// mathematical relation r = n - (d . q) where q is an integer
// that is negative only if n/d is negative and positive only if
// n/d is positive, and whose magnitude is as large as possible
// without exceeding the magnitude of the true mathematical
// quotient of n and d."
//
// I'm not sure if fmod (from what header?) has the same implemenation
double q = n/d;
q = q < 0 ? -floor(-q) : floor(q);
return n - d * q;
}
static double round(double n)
{
return floor(n + .5);
}
class NFSubstitution {
int32_t pos;
const NFRuleSet* ruleSet;
const DecimalFormat* numberFormat;
protected:
NFSubstitution(int32_t pos,
const NFRuleSet* ruleSet,
const RuleBasedNumberFormat* rbnf,
const UnicodeString& description,
UErrorCode& status);
const NFRuleSet* getRuleSet() const { return ruleSet; }
const DecimalFormat* getNumberFormat() const { return numberFormat; }
public:
static NFSubstitution* makeSubstitution(int32_t pos,
const NFRule* rule,
const NFRule* predecessor,
const NFRuleSet* ruleSet,
const RuleBasedNumberFormat* rbnf,
const UnicodeString& description,
UErrorCode& status);
virtual ~NFSubstitution();
virtual UBool operator==(const NFSubstitution& rhs) const;
UBool operator!=(const NFSubstitution& rhs) const { return !operator==(rhs); }
/**
* Sets the substitution's divisor. Used by NFRule.setBaseValue().
* A no-op for all substitutions except multiplier and modulus
* substitutions.
* @param radix The radix of the divisor
* @param exponent The exponent of the divisor
*/
virtual void setDivisor(int32_t radix, int32_t exponent);
/**
* Replaces result with the string describing the substitution.
*/
virtual void toString(UnicodeString& result) const;
//-----------------------------------------------------------------------
// formatting
//-----------------------------------------------------------------------
/**
* Performs a mathematical operation on the number, formats it using
* either ruleSet or decimalFormat, and inserts the result into
* toInsertInto.
* @param number The number being formatted.
* @param toInsertInto The string we insert the result into
* @param pos The position in toInsertInto where the owning rule's
* rule text begins (this value is added to this substitution's
* position to determine exactly where to insert the new text)
*/
virtual void doSubstitution(llong number, UnicodeString& toInsertInto, int32_t pos) const;
virtual void doSubstitution(double number, UnicodeString& toInsertInto, int32_t pos) const;
protected:
/**
* Subclasses override this function to perform some kind of
* mathematical operation on the number. The result of this operation
* is formatted using the rule set or DecimalFormat that this
* substitution refers to, and the result is inserted into the result
* string.
* @param The number being formatted
* @return The result of performing the opreration on the number
*/
virtual llong transformNumber(llong number) const = 0;
virtual double transformNumber(double number) const = 0;
public:
//-----------------------------------------------------------------------
// parsing
//-----------------------------------------------------------------------
/**
* Parses a string using the rule set or DecimalFormat belonging
* to this substitution. If there's a match, a mathematical
* operation (the inverse of the one used in formatting) is
* performed on the result of the parse and the value passed in
* and returned as the result. The parse position is updated to
* point to the first unmatched character in the string.
* @param text The string to parse
* @param parsePosition On entry, ignored, but assumed to be 0.
* On exit, this is updated to point to the first unmatched
* character (or 0 if the substitution didn't match)
* @param baseValue A partial parse result that should be
* combined with the result of this parse
* @param upperBound When searching the rule set for a rule
* matching the string passed in, only rules with base values
* lower than this are considered
* @param lenientParse If true and matching against rules fails,
* the substitution will also try matching the text against
* numerals using a default-costructed NumberFormat. If false,
* no extra work is done. (This value is false whenever the
* formatter isn't in lenient-parse mode, but is also false
* under some conditions even when the formatter _is_ in
* lenient-parse mode.)
* @return If there's a match, this is the result of composing
* baseValue with whatever was returned from matching the
* characters. This will be either a Long or a Double. If there's
* no match this is new Long(0) (not null), and parsePosition
* is left unchanged.
*/
virtual UBool doParse(const UnicodeString& text,
ParsePosition& parsePosition,
double baseValue,
double upperBound,
UBool lenientParse,
Formattable& result) const;
/**
* Derives a new value from the two values passed in. The two values
* are typically either the base values of two rules (the one containing
* the substitution and the one matching the substitution) or partial
* parse results derived in some other way. The operation is generally
* the inverse of the operation performed by transformNumber().
* @param newRuleValue The value produced by matching this substitution
* @param oldRuleValue The value that was passed to the substitution
* by the rule that owns it
* @return A third value derived from the other two, representing a
* partial parse result
*/
virtual double composeRuleValue(double newRuleValue, double oldRuleValue) const = 0;
/**
* Calculates an upper bound when searching for a rule that matches
* this substitution. Rules with base values greater than or equal
* to upperBound are not considered.
* @param oldUpperBound The current upper-bound setting. The new
* upper bound can't be any higher.
*/
virtual double calcUpperBound(double oldUpperBound) const = 0;
//-----------------------------------------------------------------------
// simple accessors
//-----------------------------------------------------------------------
/**
* Returns the substitution's position in the rule that owns it.
* @return The substitution's position in the rule that owns it.
*/
int32_t getPos() const { return pos; }
/**
* Returns the character used in the textual representation of
* substitutions of this type. Used by toString().
* @return This substitution's token character.
*/
virtual UChar tokenChar() const = 0;
/**
* Returns true if this is a null substitution. (We didn't do this
* with instanceof partially because it causes source files to
* proliferate and partially because we have to port this to C++.)
* @return true if this object is an instance of NullSubstitution
*/
virtual UBool isNullSubstitution() const;
/**
* Returns true if this is a modulus substitution. (We didn't do this
* with instanceof partially because it causes source files to
* proliferate and partially because we have to port this to C++.)
* @return true if this object is an instance of ModulusSubstitution
*/
virtual UBool isModulusSubstitution() const;
private:
static char fgClassID;
public:
static UClassID getStaticClassID(void) { return (UClassID)&fgClassID; }
virtual UClassID getDynamicClassID(void) const;
};
class SameValueSubstitution : public NFSubstitution {
public:
SameValueSubstitution(int32_t pos,
const NFRuleSet* ruleset,
const RuleBasedNumberFormat* formatter,
const UnicodeString& description,
UErrorCode& status);
llong transformNumber(llong number) const { return number; }
double transformNumber(double number) const { return number; }
double composeRuleValue(double newRuleValue, double oldRuleValue) const { return newRuleValue; }
double calcUpperBound(double oldUpperBound) const { return oldUpperBound; }
UChar tokenChar() const { return (UChar)0x003d; } // '='
private:
static char fgClassID;
public:
static UClassID getStaticClassID(void) { return (UClassID)&fgClassID; }
virtual UClassID getDynamicClassID(void) const;
};
class MultiplierSubstitution : public NFSubstitution {
double divisor;
llong ldivisor;
public:
MultiplierSubstitution(int32_t _pos,
double _divisor,
const NFRuleSet* _ruleSet,
const RuleBasedNumberFormat* formatter,
const UnicodeString& description,
UErrorCode& status)
: NFSubstitution(_pos, _ruleSet, formatter, description, status), divisor(_divisor)
{
ldivisor = _divisor;
}
void setDivisor(int32_t radix, int32_t exponent) {
divisor = pow(radix, exponent);
ldivisor = divisor;
}
UBool operator==(const NFSubstitution& rhs) const;
llong transformNumber(llong number) const {
return number / ldivisor;
}
double transformNumber(double number) const {
return floor(number / divisor);
}
double composeRuleValue(double newRuleValue, double oldRuleValue) const {
return newRuleValue * divisor;
}
double calcUpperBound(double oldUpperBound) const { return divisor; }
UChar tokenChar() const { return (UChar)0x003c; } // '<'
private:
static char fgClassID;
public:
static UClassID getStaticClassID(void) { return (UClassID)&fgClassID; }
virtual UClassID getDynamicClassID(void) const;
};
class ModulusSubstitution : public NFSubstitution {
double divisor;
llong ldivisor;
const NFRule* ruleToUse;
public:
ModulusSubstitution(int32_t pos,
double _divisor,
const NFRule* rulePredecessor,
const NFRuleSet* ruleSet,
const RuleBasedNumberFormat* formatter,
const UnicodeString& description,
UErrorCode& status);
void setDivisor(int32_t radix, int32_t exponent) {
divisor = pow(radix, exponent);
ldivisor = divisor;
}
UBool operator==(const NFSubstitution& rhs) const;
void doSubstitution(llong number, UnicodeString& toInsertInto, int32_t pos) const;
void doSubstitution(double number, UnicodeString& toInsertInto, int32_t pos) const;
llong transformNumber(llong number) const { return number % ldivisor; }
double transformNumber(double number) const { return java_fmod(number, divisor); }
UBool doParse(const UnicodeString& text,
ParsePosition& parsePosition,
double baseValue,
double upperBound,
UBool lenientParse,
Formattable& result) const;
double composeRuleValue(double newRuleValue, double oldRuleValue) const {
return oldRuleValue - java_fmod(oldRuleValue, divisor) + newRuleValue;
}
double calcUpperBound(double oldUpperBound) const { return divisor; }
UBool isModulusSubstitution() const { return TRUE; }
UChar tokenChar() const { return (UChar)0x003e; } // '>'
private:
static char fgClassID;
public:
static UClassID getStaticClassID(void) { return (UClassID)&fgClassID; }
virtual UClassID getDynamicClassID(void) const;
};
class IntegralPartSubstitution : public NFSubstitution {
public:
IntegralPartSubstitution(int32_t _pos,
const NFRuleSet* _ruleSet,
const RuleBasedNumberFormat* formatter,
const UnicodeString& description,
UErrorCode& status)
: NFSubstitution(_pos, _ruleSet, formatter, description, status) {}
llong transformNumber(llong number) const { return number; }
double transformNumber(double number) const { return floor(number); }
double composeRuleValue(double newRuleValue, double oldRuleValue) const { return newRuleValue + oldRuleValue; }
double calcUpperBound(double oldUpperBound) const { return MAX_DOUBLE; }
UChar tokenChar() const { return (UChar)0x003c; } // '<'
private:
static char fgClassID;
public:
static UClassID getStaticClassID(void) { return (UClassID)&fgClassID; }
virtual UClassID getDynamicClassID(void) const;
};
class FractionalPartSubstitution : public NFSubstitution {
UBool byDigits;
UBool useSpaces;
enum { kMaxDecimalDigits = 8 };
public:
FractionalPartSubstitution(int32_t pos,
const NFRuleSet* ruleSet,
const RuleBasedNumberFormat* formatter,
const UnicodeString& description,
UErrorCode& status);
UBool operator==(const NFSubstitution& rhs) const;
void doSubstitution(double number, UnicodeString& toInsertInto, int32_t pos) const;
llong transformNumber(llong number) const { return llong::kZero; }
double transformNumber(double number) const { return number - floor(number); }
UBool doParse(const UnicodeString& text,
ParsePosition& parsePosition,
double baseValue,
double upperBound,
UBool lenientParse,
Formattable& result) const;
double composeRuleValue(double newRuleValue, double oldRuleValue) const { return newRuleValue + oldRuleValue; }
double calcUpperBound(double oldUpperBound) const { return 0; }
UChar tokenChar() const { return (UChar)0x003e; } // '>'
private:
static char fgClassID;
public:
static UClassID getStaticClassID(void) { return (UClassID)&fgClassID; }
virtual UClassID getDynamicClassID(void) const;
};
class AbsoluteValueSubstitution : public NFSubstitution {
public:
AbsoluteValueSubstitution(int32_t _pos,
const NFRuleSet* _ruleSet,
const RuleBasedNumberFormat* formatter,
const UnicodeString& description,
UErrorCode& status)
: NFSubstitution(_pos, _ruleSet, formatter, description, status) {}
llong transformNumber(llong number) const { return llong_abs(number); }
double transformNumber(double number) const { return fabs(number); }
double composeRuleValue(double newRuleValue, double oldRuleValue) const { return -newRuleValue; }
double calcUpperBound(double oldUpperBound) const { return MAX_DOUBLE; }
UChar tokenChar() const { return (UChar)0x003e; } // '>'
private:
static char fgClassID;
public:
static UClassID getStaticClassID(void) { return (UClassID)&fgClassID; }
virtual UClassID getDynamicClassID(void) const;
};
class NumeratorSubstitution : public NFSubstitution {
double denominator;
llong ldenominator;
public:
NumeratorSubstitution(int32_t _pos,
double _denominator,
const NFRuleSet* _ruleSet,
const RuleBasedNumberFormat* formatter,
const UnicodeString& description,
UErrorCode& status)
: NFSubstitution(_pos, _ruleSet, formatter, description, status), denominator(_denominator)
{
ldenominator = _denominator;
}
UBool operator==(const NFSubstitution& rhs) const;
llong transformNumber(llong number) const { return number * ldenominator; }
double transformNumber(double number) const { return round(number * denominator); }
UBool doParse(const UnicodeString& text,
ParsePosition& parsePosition,
double baseValue,
double upperBound,
UBool lenientParse,
Formattable& result) const
{
// we don't have to do anything special to do the parsing here,
// but we have to turn lenient parsing off-- if we leave it on,
// it SERIOUSLY messes up the algorithm
return NFSubstitution::doParse(text, parsePosition, baseValue, upperBound, FALSE, result);
}
double composeRuleValue(double newRuleValue, double oldRuleValue) const { return newRuleValue / oldRuleValue; }
double calcUpperBound(double oldUpperBound) const { return denominator; }
UChar tokenChar() const { return (UChar)0x003c; } // '<'
private:
static char fgClassID;
public:
static UClassID getStaticClassID(void) { return (UClassID)&fgClassID; }
virtual UClassID getDynamicClassID(void) const;
};
class NullSubstitution : public NFSubstitution {
public:
NullSubstitution(int32_t _pos,
const NFRuleSet* _ruleSet,
const RuleBasedNumberFormat* formatter,
const UnicodeString& description,
UErrorCode& status)
: NFSubstitution(_pos, _ruleSet, formatter, description, status) {}
void toString(UnicodeString& result) const {}
void doSubstitution(double number, UnicodeString& toInsertInto, int32_t _pos) const {}
void doSubstitution(llong number, UnicodeString& toInsertInto, int32_t _pos) const {}
llong transformNumber(llong number) const { return llong::kZero; }
double transformNumber(double number) const { return 0; }
UBool doParse(const UnicodeString& text,
ParsePosition& parsePosition,
double baseValue,
double upperBound,
UBool lenientParse,
Formattable& result) const
{ result.setDouble(baseValue); return TRUE; }
double composeRuleValue(double newRuleValue, double oldRuleValue) const { return 0; } // never called
double calcUpperBound(double oldUpperBound) const { return 0; } // never called
UBool isNullSubstitution() const { return TRUE; }
UChar tokenChar() const { return (UChar)0x0020; } // ' ' never called
private:
static char fgClassID;
public:
static UClassID getStaticClassID(void) { return (UClassID)&fgClassID; }
virtual UClassID getDynamicClassID(void) const;
};
U_NAMESPACE_END
// NFSUBS_H
#endif

623
icu4c/source/i18n/rbnf.cpp Normal file
View File

@ -0,0 +1,623 @@
/*
*******************************************************************************
* Copyright (C) 1997-2001, International Business Machines Corporation and others. All Rights Reserved.
*******************************************************************************
*/
#include "unicode/rbnf.h"
#include "nfrs.h"
#include "cmemory.h"
#include "cstring.h"
#include "unicode/normlzr.h"
#include "unicode/tblcoll.h"
#include "unicode/uchar.h"
#include "unicode/ucol.h"
#include "unicode/uloc.h"
#include "unicode/unum.h"
#include "unicode/ures.h"
#include "unicode/ustring.h"
#include "unicode/utf16.h"
#include <stdio.h>
static const UnicodeString gPercentPercent("%%");
#define kSomeNumberOfBitsDiv2 22
#define kHalfMaxDouble (double)(1 << kSomeNumberOfBitsDiv2)
#define kMaxDouble (kHalfMaxDouble * kHalfMaxDouble)
const char RuleBasedNumberFormat::fgClassID = 0;
RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, const Locale& alocale, UParseError& perror, UErrorCode& status)
: ruleSets(NULL)
, defaultRuleSet(NULL)
, locale(alocale)
, collator(NULL)
, decimalFormatSymbols(NULL)
, lenient(FALSE)
, lenientParseRules(NULL)
{
init(description, perror, status);
}
RuleBasedNumberFormat::RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale& alocale, UErrorCode& status)
: ruleSets(NULL)
, defaultRuleSet(NULL)
, locale(alocale)
, collator(NULL)
, decimalFormatSymbols(NULL)
, lenient(FALSE)
, lenientParseRules(NULL)
{
if (U_FAILURE(status)) {
return;
}
const char* fmt_tag = "";
switch (tag) {
case URBNF_SPELLOUT: fmt_tag = "SpelloutRules"; break;
case URBNF_ORDINAL: fmt_tag = "OrdinalRules"; break;
case URBNF_DURATION: fmt_tag = "DurationRules"; break;
default: status = U_ILLEGAL_ARGUMENT_ERROR; return;
}
UResourceBundle* nfrb = ures_open(NULL, locale.getName(), &status);
int32_t len = 0;
const UChar* description = ures_getStringByKey(nfrb, fmt_tag, &len, &status);
if (U_SUCCESS(status)) {
UnicodeString desc(description, len);
UParseError perror;
init (desc, perror, status);
}
ures_close(nfrb);
}
RuleBasedNumberFormat::RuleBasedNumberFormat(const RuleBasedNumberFormat& rhs)
: ruleSets(NULL)
, defaultRuleSet(NULL)
, locale(rhs.locale)
, collator(NULL)
, decimalFormatSymbols(NULL)
, lenient(FALSE)
, lenientParseRules(NULL)
{
this->operator==(rhs);
}
RuleBasedNumberFormat&
RuleBasedNumberFormat::operator=(const RuleBasedNumberFormat& rhs)
{
UErrorCode status = U_ZERO_ERROR;
dispose();
locale = rhs.locale;
UnicodeString rules = rhs.getRules();
UParseError perror;
init(rules, perror, status);
lenient = rhs.lenient;
return *this;
}
RuleBasedNumberFormat::~RuleBasedNumberFormat()
{
dispose();
}
Format*
RuleBasedNumberFormat::clone(void) const
{
RuleBasedNumberFormat * result = NULL;
UnicodeString rules = getRules();
int32_t len = rules.length();
UChar* rulestring = new UChar[len+1];
if (rulestring) {
rules.extract(0, len, rulestring);
UErrorCode status = U_ZERO_ERROR;
UParseError perror;
result = new RuleBasedNumberFormat(rulestring, locale, perror, status);
if (U_FAILURE(status)) {
delete result;
result = NULL;
} else {
result->lenient = lenient;
}
delete[] rulestring;
}
return result;
}
UBool
RuleBasedNumberFormat::operator==(const Format& other) const
{
if (this == &other) {
return TRUE;
}
if (other.getDynamicClassID() == getStaticClassID()) {
const RuleBasedNumberFormat& rhs = (const RuleBasedNumberFormat&)other;
if (locale == rhs.locale &&
lenient == rhs.lenient) {
NFRuleSet** p = ruleSets;
NFRuleSet** q = rhs.ruleSets;
while (*p && *q && (**p == **q)) {
++p;
++q;
}
return *q == NULL && *p == NULL;
}
}
return FALSE;
}
UnicodeString
RuleBasedNumberFormat::getRules() const
{
UnicodeString result;
for (NFRuleSet** p = ruleSets; *p; ++p) {
(*p)->appendRules(result);
}
return result;
}
UnicodeString
RuleBasedNumberFormat::getRuleSetName(int32_t index) const
{
UnicodeString result;
for (NFRuleSet** p = ruleSets; *p; ++p) {
NFRuleSet* rs = *p;
if (rs->isPublic()) {
if (--index == -1) {
rs->getName(result);
return result;
}
}
}
return *(UnicodeString*)NULL;
}
int32_t
RuleBasedNumberFormat::getNumberOfRuleSetNames() const
{
int32_t result = 0;
for (NFRuleSet** p = ruleSets; *p; ++p) {
if ((**p).isPublic()) {
++result;
}
}
return result;
}
NFRuleSet*
RuleBasedNumberFormat::findRuleSet(const UnicodeString& name, UErrorCode& status) const
{
if (U_SUCCESS(status)) {
for (NFRuleSet** p = ruleSets; *p; ++p) {
NFRuleSet* rs = *p;
if (rs->isNamed(name)) {
return rs;
}
}
status = U_ILLEGAL_ARGUMENT_ERROR;
}
return NULL;
}
UnicodeString&
RuleBasedNumberFormat::format(int32_t number,
UnicodeString& toAppendTo,
FieldPosition& pos) const
{
defaultRuleSet->format(llong(number), toAppendTo, toAppendTo.length());
return toAppendTo;
}
#if 0
UnicodeString&
RuleBasedNumberFormat::format(llong number,
UnicodeString& toAppendTo,
FieldPosition& pos) const
{
defaultRuleSet->format(number, toAppendTo, toAppendTo.length());
return toAppendTo;
}
#endif
UnicodeString&
RuleBasedNumberFormat::format(double number,
UnicodeString& toAppendTo,
FieldPosition& pos) const
{
defaultRuleSet->format(number, toAppendTo, toAppendTo.length());
return toAppendTo;
}
UnicodeString&
RuleBasedNumberFormat::format(int32_t number,
const UnicodeString& ruleSetName,
UnicodeString& toAppendTo,
FieldPosition& pos,
UErrorCode& status) const
{
// return format(llong(number), ruleSetName, toAppendTo, pos, status);
if (U_SUCCESS(status)) {
if (ruleSetName.indexOf(gPercentPercent) == 0) {
// throw new IllegalArgumentException("Can't use internal rule set");
status = U_ILLEGAL_ARGUMENT_ERROR;
} else {
NFRuleSet *rs = findRuleSet(ruleSetName, status);
if (rs) {
rs->format(llong(number), toAppendTo, toAppendTo.length());
}
}
}
return toAppendTo;
}
#if 0
UnicodeString&
RuleBasedNumberFormat::format(llong number,
const UnicodeString& ruleSetName,
UnicodeString& toAppendTo,
FieldPosition& pos,
UErrorCode& status) const
{
if (U_SUCCESS(status)) {
if (ruleSetName.indexOf(gPercentPercent) == 0) {
// throw new IllegalArgumentException("Can't use internal rule set");
status = U_ILLEGAL_ARGUMENT_ERROR;
} else {
NFRuleSet *rs = findRuleSet(ruleSetName, status);
if (rs) {
rs->format(number, toAppendTo, toAppendTo.length());
}
}
}
return toAppendTo;
}
#endif
// make linker happy
UnicodeString&
RuleBasedNumberFormat::format(const Formattable& obj,
UnicodeString& toAppendTo,
FieldPosition& pos,
UErrorCode& status) const
{
return NumberFormat::format(obj, toAppendTo, pos, status);
}
UnicodeString&
RuleBasedNumberFormat::format(double number,
const UnicodeString& ruleSetName,
UnicodeString& toAppendTo,
FieldPosition& pos,
UErrorCode& status) const
{
if (U_SUCCESS(status)) {
if (ruleSetName.indexOf(gPercentPercent) == 0) {
// throw new IllegalArgumentException("Can't use internal rule set");
status = U_ILLEGAL_ARGUMENT_ERROR;
} else {
NFRuleSet *rs = findRuleSet(ruleSetName, status);
if (rs) {
rs->format(number, toAppendTo, toAppendTo.length());
}
}
}
return toAppendTo;
}
void
RuleBasedNumberFormat::parse(const UnicodeString& text,
Formattable& result,
ParsePosition& parsePosition) const
{
ParsePosition high_pp;
Formattable high_result;
for (NFRuleSet** p = ruleSets; *p; ++p) {
NFRuleSet *rp = *p;
if (rp->isPublic()) {
ParsePosition working_pp = parsePosition;
Formattable working_result;
rp->parse(text, working_pp, kMaxDouble, working_result);
if (working_pp.getIndex() > high_pp.getIndex()) {
high_pp = working_pp;
high_result = working_result;
if (high_pp.getIndex() == text.length()) {
break;
}
}
}
}
parsePosition = high_pp;
result = high_result;
if (result.getType() == Formattable::kDouble) {
int32_t r = (int32_t)result.getDouble();
if ((double)r == result.getDouble()) {
result.setLong(r);
}
}
}
void
RuleBasedNumberFormat::setLenient(UBool enabled)
{
lenient = enabled;
if (!enabled && collator) {
delete collator;
collator = NULL;
}
}
// All urbnf objects are created through openRules, so we init all of the
// Unicode string constants required by rbnf, nfrs, or nfr here.
static const UnicodeString gLenientParse("%%lenient-parse:");
static const UChar gSemiColon = 0x003B;
static const UnicodeString gSemiPercent(";%");
void
RuleBasedNumberFormat::init(const UnicodeString& rules, UParseError& perror, UErrorCode& status)
{
// TODO: implement perror
if (U_FAILURE(status)) {
return;
}
UnicodeString description(rules);
if (!description.length()) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
// start by stripping the trailing whitespace from all the rules
// (this is all the whitespace follwing each semicolon in the
// description). This allows us to look for rule-set boundaries
// by searching for ";%" without having to worry about whitespace
// between the ; and the %
stripWhitespace(description);
// check to see if there's a set of lenient-parse rules. If there
// is, pull them out into our temporary holding place for them,
// and delete them from the description before the real desciption-
// parsing code sees them
UTextOffset lp = description.indexOf(gLenientParse);
if (lp != -1) {
// we've got to make sure we're not in the middle of a rule
// (where "%%lenient-parse" would actually get treated as
// rule text)
if (lp == 0 || description.charAt(lp - 1) == gSemiColon) {
// locate the beginning and end of the actual collation
// rules (there may be whitespace between the name and
// the first token in the description)
int lpEnd = description.indexOf(gSemiPercent, lp);
if (lpEnd == -1) {
lpEnd = description.length() - 1;
}
int lpStart = lp + gLenientParse.length();
while (u_isWhitespace(description.charAt(lpStart))) {
++lpStart;
}
// copy out the lenient-parse rules and delete them
// from the description
lenientParseRules = new UnicodeString();
lenientParseRules->setTo(description, lpStart, lpEnd - lpStart);
description.remove(lp, lpEnd + 1 - lp);
}
}
// pre-flight parsing the description and count the number of
// rule sets (";%" marks the end of one rule set and the beginning
// of the next)
int numRuleSets = 0;
for (UTextOffset p = description.indexOf(gSemiPercent); p != -1; p = description.indexOf(gSemiPercent, p)) {
++numRuleSets;
++p;
}
++numRuleSets;
// our rule list is an array of the appropriate size
ruleSets = new NFRuleSet*[numRuleSets + 1];
for (int i = 0; i <= numRuleSets; ++i) {
ruleSets[i] = NULL;
}
// divide up the descriptions into individual rule-set descriptions
// and store them in a temporary array. At each step, we also
// new up a rule set, but all this does is initialize its name
// and remove it from its description. We can't actually parse
// the rest of the descriptions and finish initializing everything
// because we have to know the names and locations of all the rule
// sets before we can actually set everything up
UnicodeString* ruleSetDescriptions = new UnicodeString[numRuleSets];
{
int curRuleSet = 0;
UTextOffset start = 0;
for (UTextOffset p = description.indexOf(gSemiPercent); p != -1; p = description.indexOf(gSemiPercent, start)) {
ruleSetDescriptions[curRuleSet].setTo(description, start, p + 1 - start);
ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions, curRuleSet, status);
++curRuleSet;
start = p + 1;
}
ruleSetDescriptions[curRuleSet].setTo(description, start, description.length() - start);
ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions, curRuleSet, status);
}
// now we can take note of the formatter's default rule set, which
// is the last public rule set in the description (it's the last
// rather than the first so that a user can create a new formatter
// from an existing formatter and change its default behavior just
// by appending more rule sets to the end)
// setDefaultRuleSet
{
defaultRuleSet = ruleSets[numRuleSets - 1];
if (!defaultRuleSet->isPublic()) {
for (int i = numRuleSets - 2; i >= 0; --i) {
if (ruleSets[i]->isPublic()) {
defaultRuleSet = ruleSets[i];
break;
}
}
}
}
// finally, we can go back through the temporary descriptions
// list and finish seting up the substructure (and we throw
// away the temporary descriptions as we go)
{
for (int i = 0; i < numRuleSets; i++) {
ruleSets[i]->parseRules(ruleSetDescriptions[i], this, status);
}
}
delete[] ruleSetDescriptions;
}
void
RuleBasedNumberFormat::stripWhitespace(UnicodeString& description)
{
// iterate through the characters...
UnicodeString result;
int start = 0;
while (start != -1 && start < description.length()) {
// seek to the first non-whitespace character...
while (start < description.length()
&& u_isWhitespace(description.charAt(start))) {
++start;
}
// locate the next semicolon in the text and copy the text from
// our current position up to that semicolon into the result
UTextOffset p = description.indexOf(gSemiColon, start);
if (p == -1) {
// or if we don't find a semicolon, just copy the rest of
// the string into the result
result.append(description, start, description.length() - start);
start = -1;
}
else if (p < description.length()) {
result.append(description, start, p + 1 - start);
start = p + 1;
}
// when we get here, we've seeked off the end of the sring, and
// we terminate the loop (we continue until *start* is -1 rather
// than until *p* is -1, because otherwise we'd miss the last
// rule in the description)
else {
start = -1;
}
}
description.setTo(result);
}
void
RuleBasedNumberFormat::dispose()
{
if (ruleSets) {
for (NFRuleSet** p = ruleSets; *p; ++p) {
delete *p;
}
delete[] ruleSets;
ruleSets = NULL;
}
delete collator;
delete decimalFormatSymbols;
delete lenientParseRules;
}
//-----------------------------------------------------------------------
// package-internal API
//-----------------------------------------------------------------------
/**
* Returns the collator to use for lenient parsing. The collator is lazily created:
* this function creates it the first time it's called.
* @return The collator to use for lenient parsing, or null if lenient parsing
* is turned off.
*/
Collator*
RuleBasedNumberFormat::getCollator() const
{
// lazy-evaulate the collator
if (collator == NULL && lenient) {
// create a default collator based on the formatter's locale,
// then pull out that collator's rules, append any additional
// rules specified in the description, and create a _new_
// collator based on the combinaiton of those rules
UErrorCode status = U_ZERO_ERROR;
Collator* temp = Collator::createInstance(locale, status);
if (U_SUCCESS(status) &&
temp->getDynamicClassID() == RuleBasedCollator::getStaticClassID()) {
RuleBasedCollator* newCollator = (RuleBasedCollator*)temp;
if (lenientParseRules) {
UnicodeString rules(newCollator->getRules());
rules.append(*lenientParseRules);
newCollator = new RuleBasedCollator(rules, status);
} else {
temp = NULL;
}
if (U_SUCCESS(status)) {
newCollator->setDecomposition(Normalizer::DECOMP);
// cast away const
((RuleBasedNumberFormat*)this)->collator = newCollator;
} else {
delete newCollator;
}
}
delete temp;
}
// if lenient-parse mode is off, this will be null
// (see setLenientParseMode())
return collator;
}
/**
* Returns the DecimalFormatSymbols object that should be used by all DecimalFormat
* instances owned by this formatter. This object is lazily created: this function
* creates it the first time it's called.
* @return The DecimalFormatSymbols object that should be used by all DecimalFormat
* instances owned by this formatter.
*/
DecimalFormatSymbols*
RuleBasedNumberFormat::getDecimalFormatSymbols() const
{
// lazy-evaluate the DecimalFormatSymbols object. This object
// is shared by all DecimalFormat instances belonging to this
// formatter
if (decimalFormatSymbols == NULL) {
UErrorCode status = U_ZERO_ERROR;
DecimalFormatSymbols* temp = new DecimalFormatSymbols(locale, status);
if (U_SUCCESS(status)) {
((RuleBasedNumberFormat*)this)->decimalFormatSymbols = temp;
} else {
delete temp;
}
}
return decimalFormatSymbols;
}

View File

@ -0,0 +1,828 @@
/*
*******************************************************************************
* Copyright (C) 1997-2001, International Business Machines Corporation and others. All Rights Reserved.
*******************************************************************************
*/
#ifndef RBNF_H
#define RBNF_H
#include "unicode/coll.h"
#include "unicode/dcfmtsym.h"
#include "unicode/fmtable.h"
#include "unicode/locid.h"
#include "unicode/numfmt.h"
#include "unicode/unistr.h"
#include "unicode/utypes.h"
U_NAMESPACE_BEGIN
class NFRuleSet;
/**
* \file
* \brief C++ API: RuleBasedNumberFormat
*
* <h2> Rule Based Number Format C++ API </h2>
*
* <p>A class that formats numbers according to a set of rules. This number formatter is
* typically used for spelling out numeric values in words (e.g., 25,3476 as
* &quot;twenty-five thousand three hundred seventy-six&quot; or &quot;vingt-cinq mille trois
* cents soixante-seize&quot; or
* &quot;f&uuml;nfundzwanzigtausenddreihundertsechsundsiebzig&quot;), but can also be used for
* other complicated formatting tasks, such as formatting a number of seconds as hours,
* minutes and seconds (e.g., 3,730 as &quot;1:02:10&quot;).</p>
*
* <p>The resources contain three predefined formatters for each locale: spellout, which
* spells out a value in words (123 is &quot;one hundred twenty-three&quot;); ordinal, which
* appends an ordinal suffix to the end of a numeral (123 is &quot;123rd&quot;); and
* duration, which shows a duration in seconds as hours, minutes, and seconds (123 is
* &quot;2:03&quot;).&nbsp; The client can also define more specialized <tt>RuleBasedNumberFormat</tt>s
* by supplying programmer-defined rule sets.</p>
*
* <p>The behavior of a <tt>RuleBasedNumberFormat</tt> is specified by a textual description
* that is either passed to the constructor as a <tt>String</tt> or loaded from a resource
* bundle. In its simplest form, the description consists of a semicolon-delimited list of <em>rules.</em>
* Each rule has a string of output text and a value or range of values it is applicable to.
* In a typical spellout rule set, the first twenty rules are the words for the numbers from
* 0 to 19:</p>
*
* <pre>zero; one; two; three; four; five; six; seven; eight; nine;
* ten; eleven; twelve; thirteen; fourteen; fifteen; sixteen; seventeen; eighteen; nineteen;</pre>
*
* <p>For larger numbers, we can use the preceding set of rules to format the ones place, and
* we only have to supply the words for the multiples of 10:</p>
*
* <pre> 20: twenty[-&gt;&gt;];
* 30: thirty[-&gt;&gt;];
* 40: forty[-&gt;&gt;];
* 50: fifty[-&gt;&gt;];
* 60: sixty[-&gt;&gt;];
* 70: seventy[-&gt;&gt;];
* 80: eighty[-&gt;&gt;];
* 90: ninety[-&gt;&gt;];</pre>
*
* <p>In these rules, the <em>base value</em> is spelled out explicitly and set off from the
* rule's output text with a colon. The rules are in a sorted list, and a rule is applicable
* to all numbers from its own base value to one less than the next rule's base value. The
* &quot;&gt;&gt;&quot; token is called a <em>substitution</em> and tells the fomatter to
* isolate the number's ones digit, format it using this same set of rules, and place the
* result at the position of the &quot;&gt;&gt;&quot; token. Text in brackets is omitted if
* the number being formatted is an even multiple of 10 (the hyphen is a literal hyphen; 24
* is &quot;twenty-four,&quot; not &quot;twenty four&quot;).</p>
*
* <p>For even larger numbers, we can actually look up several parts of the number in the
* list:</p>
*
* <pre>100: &lt;&lt; hundred[ &gt;&gt;];</pre>
*
* <p>The &quot;&lt;&lt;&quot; represents a new kind of substitution. The &lt;&lt; isolates
* the hundreds digit (and any digits to its left), formats it using this same rule set, and
* places the result where the &quot;&lt;&lt;&quot; was. Notice also that the meaning of
* &gt;&gt; has changed: it now refers to both the tens and the ones digits. The meaning of
* both substitutions depends on the rule's base value. The base value determines the rule's <em>divisor,</em>
* which is the highest power of 10 that is less than or equal to the base value (the user
* can change this). To fill in the substitutions, the formatter divides the number being
* formatted by the divisor. The integral quotient is used to fill in the &lt;&lt;
* substitution, and the remainder is used to fill in the &gt;&gt; substitution. The meaning
* of the brackets changes similarly: text in brackets is omitted if the value being
* formatted is an even multiple of the rule's divisor. The rules are applied recursively, so
* if a substitution is filled in with text that includes another substitution, that
* substitution is also filled in.</p>
*
* <p>This rule covers values up to 999, at which point we add another rule:</p>
*
* <pre>1000: &lt;&lt; thousand[ &gt;&gt;];</pre>
*
* <p>Again, the meanings of the brackets and substitution tokens shift because the rule's
* base value is a higher power of 10, changing the rule's divisor. This rule can actually be
* used all the way up to 999,999. This allows us to finish out the rules as follows:</p>
*
* <pre> 1,000,000: &lt;&lt; million[ &gt;&gt;];
* 1,000,000,000: &lt;&lt; billion[ &gt;&gt;];
* 1,000,000,000,000: &lt;&lt; trillion[ &gt;&gt;];
* 1,000,000,000,000,000: OUT OF RANGE!;</pre>
*
* <p>Commas, periods, and spaces can be used in the base values to improve legibility and
* are ignored by the rule parser. The last rule in the list is customarily treated as an
* &quot;overflow rule,&quot; applying to everything from its base value on up, and often (as
* in this example) being used to print out an error message or default representation.
* Notice also that the size of the major groupings in large numbers is controlled by the
* spacing of the rules: because in English we group numbers by thousand, the higher rules
* are separated from each other by a factor of 1,000.</p>
*
* <p>To see how these rules actually work in practice, consider the following example:
* Formatting 25,430 with this rule set would work like this:</p>
*
* <table border="0" width="630">
* <tr>
* <td width="21"></td>
* <td width="257" valign="top"><strong>&lt;&lt; thousand &gt;&gt;</strong></td>
* <td width="340" valign="top">[the rule whose base value is 1,000 is applicable to 25,340]</td>
* </tr>
* <tr>
* <td width="21"></td>
* <td width="257" valign="top"><strong>twenty-&gt;&gt;</strong> thousand &gt;&gt;</td>
* <td width="340" valign="top">[25,340 over 1,000 is 25. The rule for 20 applies.]</td>
* </tr>
* <tr>
* <td width="21"></td>
* <td width="257" valign="top">twenty-<strong>five</strong> thousand &gt;&gt;</td>
* <td width="340" valign="top">[25 mod 10 is 5. The rule for 5 is &quot;five.&quot;</td>
* </tr>
* <tr>
* <td width="21"></td>
* <td width="257" valign="top">twenty-five thousand <strong>&lt;&lt; hundred &gt;&gt;</strong></td>
* <td width="340" valign="top">[25,340 mod 1,000 is 340. The rule for 100 applies.]</td>
* </tr>
* <tr>
* <td width="21"></td>
* <td width="257" valign="top">twenty-five thousand <strong>three</strong> hundred &gt;&gt;</td>
* <td width="340" valign="top">[340 over 100 is 3. The rule for 3 is &quot;three.&quot;]</td>
* </tr>
* <tr>
* <td width="21"></td>
* <td width="257" valign="top">twenty-five thousand three hundred <strong>forty</strong></td>
* <td width="340" valign="top">[340 mod 100 is 40. The rule for 40 applies. Since 40 divides
* evenly by 10, the hyphen and substitution in the brackets are omitted.]</td>
* </tr>
* </table>
*
* <p>The above syntax suffices only to format positive integers. To format negative numbers,
* we add a special rule:</p>
*
* <pre>-x: minus &gt;&gt;;</pre>
*
* <p>This is called a <em>negative-number rule,</em> and is identified by &quot;-x&quot;
* where the base value would be. This rule is used to format all negative numbers. the
* &gt;&gt; token here means &quot;find the number's absolute value, format it with these
* rules, and put the result here.&quot;</p>
*
* <p>We also add a special rule called a <em>fraction rule </em>for numbers with fractional
* parts:</p>
*
* <pre>x.x: &lt;&lt; point &gt;&gt;;</pre>
*
* <p>This rule is used for all positive non-integers (negative non-integers pass through the
* negative-number rule first and then through this rule). Here, the &lt;&lt; token refers to
* the number's integral part, and the &gt;&gt; to the number's fractional part. The
* fractional part is formatted as a series of single-digit numbers (e.g., 123.456 would be
* formatted as &quot;one hundred twenty-three point four five six&quot;).</p>
*
* <p>To see how this rule syntax is applied to various languages, examine the resource data.</p>
*
* <p>There is actually much more flexibility built into the rule language than the
* description above shows. A formatter may own multiple rule sets, which can be selected by
* the caller, and which can use each other to fill in their substitutions. Substitutions can
* also be filled in with digits, using a DecimalFormat object. There is syntax that can be
* used to alter a rule's divisor in various ways. And there is provision for much more
* flexible fraction handling. A complete description of the rule syntax follows:</p>
*
* <hr>
*
* <p>The description of a <tt>RuleBasedNumberFormat</tt>'s behavior consists of one or more <em>rule
* sets.</em> Each rule set consists of a name, a colon, and a list of <em>rules.</em> A rule
* set name must begin with a % sign. Rule sets with names that begin with a single % sign
* are <em>public:</em> the caller can specify that they be used to format and parse numbers.
* Rule sets with names that begin with %% are <em>private:</em> they exist only for the use
* of other rule sets. If a formatter only has one rule set, the name may be omitted.</p>
*
* <p>The user can also specify a special &quot;rule set&quot; named <tt>%%lenient-parse</tt>.
* The body of <tt>%%lenient-parse</tt> isn't a set of number-formatting rules, but a <tt>RuleBasedCollator</tt>
* description which is used to define equivalences for lenient parsing. For more information
* on the syntax, see <tt>RuleBasedCollator</tt>. For more information on lenient parsing,
* see <tt>setLenientParse()</tt>.</p>
*
* <p>The body of a rule set consists of an ordered, semicolon-delimited list of <em>rules.</em>
* Internally, every rule has a base value, a divisor, rule text, and zero, one, or two <em>substitutions.</em>
* These parameters are controlled by the description syntax, which consists of a <em>rule
* descriptor,</em> a colon, and a <em>rule body.</em></p>
*
* <p>A rule descriptor can take one of the following forms (text in <em>italics</em> is the
* name of a token):</p>
*
* <table border="0" width="100%">
* <tr>
* <td width="5%" valign="top"></td>
* <td width="8%" valign="top"><em>bv</em>:</td>
* <td valign="top"><em>bv</em> specifies the rule's base value. <em>bv</em> is a decimal
* number expressed using ASCII digits. <em>bv</em> may contain spaces, period, and commas,
* which are ignored. The rule's divisor is the highest power of 10 less than or equal to
* the base value.</td>
* </tr>
* <tr>
* <td width="5%" valign="top"></td>
* <td width="8%" valign="top"><em>bv</em>/<em>rad</em>:</td>
* <td valign="top"><em>bv</em> specifies the rule's base value. The rule's divisor is the
* highest power of <em>rad</em> less than or equal to the base value.</td>
* </tr>
* <tr>
* <td width="5%" valign="top"></td>
* <td width="8%" valign="top"><em>bv</em>&gt;:</td>
* <td valign="top"><em>bv</em> specifies the rule's base value. To calculate the divisor,
* let the radix be 10, and the exponent be the highest exponent of the radix that yields a
* result less than or equal to the base value. Every &gt; character after the base value
* decreases the exponent by 1. If the exponent is positive or 0, the divisor is the radix
* raised to the power of the exponent; otherwise, the divisor is 1.</td>
* </tr>
* <tr>
* <td width="5%" valign="top"></td>
* <td width="8%" valign="top"><em>bv</em>/<em>rad</em>&gt;:</td>
* <td valign="top"><em>bv</em> specifies the rule's base value. To calculate the divisor,
* let the radix be <em>rad</em>, and the exponent be the highest exponent of the radix that
* yields a result less than or equal to the base value. Every &gt; character after the radix
* decreases the exponent by 1. If the exponent is positive or 0, the divisor is the radix
* raised to the power of the exponent; otherwise, the divisor is 1.</td>
* </tr>
* <tr>
* <td width="5%" valign="top"></td>
* <td width="8%" valign="top">-x:</td>
* <td valign="top">The rule is a negative-number rule.</td>
* </tr>
* <tr>
* <td width="5%" valign="top"></td>
* <td width="8%" valign="top">x.x:</td>
* <td valign="top">The rule is an <em>improper fraction rule.</em></td>
* </tr>
* <tr>
* <td width="5%" valign="top"></td>
* <td width="8%" valign="top">0.x:</td>
* <td valign="top">The rule is a <em>proper fraction rule.</em></td>
* </tr>
* <tr>
* <td width="5%" valign="top"></td>
* <td width="8%" valign="top">x.0:</td>
* <td valign="top">The rule is a <em>master rule.</em></td>
* </tr>
* <tr>
* <td width="5%" valign="top"></td>
* <td width="8%" valign="top"><em>nothing</em></td>
* <td valign="top">If the rule's rule descriptor is left out, the base value is one plus the
* preceding rule's base value (or zero if this is the first rule in the list) in a normal
* rule set.&nbsp; In a fraction rule set, the base value is the same as the preceding rule's
* base value.</td>
* </tr>
* </table>
*
* <p>A rule set may be either a regular rule set or a <em>fraction rule set,</em> depending
* on whether it is used to format a number's integral part (or the whole number) or a
* number's fractional part. Using a rule set to format a rule's fractional part makes it a
* fraction rule set.</p>
*
* <p>Which rule is used to format a number is defined according to one of the following
* algorithms: If the rule set is a regular rule set, do the following:
*
* <ul>
* <li>If the rule set includes a master rule (and the number was passed in as a <tt>double</tt>),
* use the master rule.&nbsp; (If the number being formatted was passed in as a <tt>long</tt>,
* the master rule is ignored.)</li>
* <li>If the number is negative, use the negative-number rule.</li>
* <li>If the number has a fractional part and is greater than 1, use the improper fraction
* rule.</li>
* <li>If the number has a fractional part and is between 0 and 1, use the proper fraction
* rule.</li>
* <li>Binary-search the rule list for the rule with the highest base value less than or equal
* to the number. If that rule has two substitutions, its base value is not an even multiple
* of its divisor, and the number <em>is</em> an even multiple of the rule's divisor, use the
* rule that precedes it in the rule list. Otherwise, use the rule itself.</li>
* </ul>
*
* <p>If the rule set is a fraction rule set, do the following:
*
* <ul>
* <li>Ignore negative-number and fraction rules.</li>
* <li>For each rule in the list, multiply the number being formatted (which will always be
* between 0 and 1) by the rule's base value. Keep track of the distance between the result
* the nearest integer.</li>
* <li>Use the rule that produced the result closest to zero in the above calculation. In the
* event of a tie or a direct hit, use the first matching rule encountered. (The idea here is
* to try each rule's base value as a possible denominator of a fraction. Whichever
* denominator produces the fraction closest in value to the number being formatted wins.) If
* the rule following the matching rule has the same base value, use it if the numerator of
* the fraction is anything other than 1; if the numerator is 1, use the original matching
* rule. (This is to allow singular and plural forms of the rule text without a lot of extra
* hassle.)</li>
* </ul>
*
* <p>A rule's body consists of a string of characters terminated by a semicolon. The rule
* may include zero, one, or two <em>substitution tokens,</em> and a range of text in
* brackets. The brackets denote optional text (and may also include one or both
* substitutions). The exact meanings of the substitution tokens, and under what conditions
* optional text is omitted, depend on the syntax of the substitution token and the context.
* The rest of the text in a rule body is literal text that is output when the rule matches
* the number being formatted.</p>
*
* <p>A substitution token begins and ends with a <em>token character.</em> The token
* character and the context together specify a mathematical operation to be performed on the
* number being formatted. An optional <em>substitution descriptor </em>specifies how the
* value resulting from that operation is used to fill in the substitution. The position of
* the substitution token in the rule body specifies the location of the resultant text in
* the original rule text.</p>
*
* <p>The meanings of the substitution token characters are as follows:</p>
*
* <table border="0" width="100%">
* <tr>
* <td width="37"></td>
* <td width="23">&gt;&gt;</td>
* <td width="165" valign="top">in normal rule</td>
* <td>Divide the number by the rule's divisor and format the remainder</td>
* </tr>
* <tr>
* <td width="37"></td>
* <td width="23"></td>
* <td width="165" valign="top">in negative-number rule</td>
* <td>Find the absolute value of the number and format the result</td>
* </tr>
* <tr>
* <td width="37"></td>
* <td width="23"></td>
* <td width="165" valign="top">in fraction or master rule</td>
* <td>Isolate the number's fractional part and format it.</td>
* </tr>
* <tr>
* <td width="37"></td>
* <td width="23"></td>
* <td width="165" valign="top">in rule in fraction rule set</td>
* <td>Not allowed.</td>
* </tr>
* <tr>
* <td width="37"></td>
* <td width="23">&gt;&gt;&gt;</td>
* <td width="165" valign="top">in normal rule</td>
* <td>Divide the number by the rule's divisor and format the remainder,
* but bypass the normal rule-selection process and just use the
* rule that precedes this one in this rule list.</td>
* </tr>
* <tr>
* <td width="37"></td>
* <td width="23"></td>
* <td width="165" valign="top">in all other rules</td>
* <td>Not allowed.</td>
* </tr>
* <tr>
* <td width="37"></td>
* <td width="23">&lt;&lt;</td>
* <td width="165" valign="top">in normal rule</td>
* <td>Divide the number by the rule's divisor and format the quotient</td>
* </tr>
* <tr>
* <td width="37"></td>
* <td width="23"></td>
* <td width="165" valign="top">in negative-number rule</td>
* <td>Not allowed.</td>
* </tr>
* <tr>
* <td width="37"></td>
* <td width="23"></td>
* <td width="165" valign="top">in fraction or master rule</td>
* <td>Isolate the number's integral part and format it.</td>
* </tr>
* <tr>
* <td width="37"></td>
* <td width="23"></td>
* <td width="165" valign="top">in rule in fraction rule set</td>
* <td>Multiply the number by the rule's base value and format the result.</td>
* </tr>
* <tr>
* <td width="37"></td>
* <td width="23">==</td>
* <td width="165" valign="top">in all rule sets</td>
* <td>Format the number unchanged</td>
* </tr>
* <tr>
* <td width="37"></td>
* <td width="23">[]</td>
* <td width="165" valign="top">in normal rule</td>
* <td>Omit the optional text if the number is an even multiple of the rule's divisor</td>
* </tr>
* <tr>
* <td width="37"></td>
* <td width="23"></td>
* <td width="165" valign="top">in negative-number rule</td>
* <td>Not allowed.</td>
* </tr>
* <tr>
* <td width="37"></td>
* <td width="23"></td>
* <td width="165" valign="top">in improper-fraction rule</td>
* <td>Omit the optional text if the number is between 0 and 1 (same as specifying both an
* x.x rule and a 0.x rule)</td>
* </tr>
* <tr>
* <td width="37"></td>
* <td width="23"></td>
* <td width="165" valign="top">in master rule</td>
* <td>Omit the optional text if the number is an integer (same as specifying both an x.x
* rule and an x.0 rule)</td>
* </tr>
* <tr>
* <td width="37"></td>
* <td width="23"></td>
* <td width="165" valign="top">in proper-fraction rule</td>
* <td>Not allowed.</td>
* </tr>
* <tr>
* <td width="37"></td>
* <td width="23"></td>
* <td width="165" valign="top">in rule in fraction rule set</td>
* <td>Omit the optional text if multiplying the number by the rule's base value yields 1.</td>
* </tr>
* </table>
*
* <p>The substitution descriptor (i.e., the text between the token characters) may take one
* of three forms:</p>
*
* <table border="0" width="100%">
* <tr>
* <td width="42"></td>
* <td width="166" valign="top">a rule set name</td>
* <td>Perform the mathematical operation on the number, and format the result using the
* named rule set.</td>
* </tr>
* <tr>
* <td width="42"></td>
* <td width="166" valign="top">a DecimalFormat pattern</td>
* <td>Perform the mathematical operation on the number, and format the result using a
* DecimalFormat with the specified pattern.&nbsp; The pattern must begin with 0 or #.</td>
* </tr>
* <tr>
* <td width="42"></td>
* <td width="166" valign="top">nothing</td>
* <td>Perform the mathematical operation on the number, and format the result using the rule
* set containing the current rule, except:<ul>
* <li>You can't have an empty substitution descriptor with a == substitution.</li>
* <li>If you omit the substitution descriptor in a &gt;&gt; substitution in a fraction rule,
* format the result one digit at a time using the rule set containing the current rule.</li>
* <li>If you omit the substitution descriptor in a &lt;&lt; substitution in a rule in a
* fraction rule set, format the result using the default rule set for this formatter.</li>
* </ul>
* </td>
* </tr>
* </table>
*
* <p>Whitespace is ignored between a rule set name and a rule set body, between a rule
* descriptor and a rule body, or between rules. If a rule body begins with an apostrophe,
* the apostrophe is ignored, but all text after it becomes significant (this is how you can
* have a rule's rule text begin with whitespace). There is no escape function: the semicolon
* is not allowed in rule set names or in rule text, and the colon is not allowed in rule set
* names. The characters beginning a substitution token are always treated as the beginning
* of a substitution token.</p>
*
* <p>See the resource data and the demo program for annotated examples of real rule sets
* using these features.</p>
*
* @author Richard Gillam
* @see NumberFormat
* @see DecimalFormat
* @draft
*/
/** Tags for the predefined rulesets. */
enum URBNFRuleSetTag {
URBNF_SPELLOUT,
URBNF_ORDINAL,
URBNF_DURATION,
URBNF_COUNT
};
class U_I18N_API RuleBasedNumberFormat : public NumberFormat {
public:
//-----------------------------------------------------------------------
// constructors
//-----------------------------------------------------------------------
/**
* Creates a RuleBasedNumberFormat that behaves according to the rules
* passed in. The formatter uses the specified locale to determine the
* characters to use when formatting numerals, and to define equivalences
* for lenient parsing.
* @param rules The formatter rules.
* See the class documentation for a complete explanation of the rule
* syntax.
* @param locale A locale, that governs which characters are used for
* formatting values in numerals, and which characters are equivalent in
* lenient parsing.
* @param perror The parse error if an error was encountered.
* @param status The status indicating whether the constructor succeeded.
* @draft
*/
RuleBasedNumberFormat(const UnicodeString& rules, const Locale& locale,
UParseError& perror, UErrorCode& status);
/**
* Creates a RuleBasedNumberFormat from a predefined ruleset. The selector
* code choosed among three possible predefined formats: spellout, ordinal,
* and duration.
* @param tag A selector code specifying which kind of formatter to create for that
* locale. There are three legal values: URBNF_SPELLOUT, which creates a formatter that
* spells out a value in words in the desired language, URBNF_ORDINAL, which attaches
* an ordinal suffix from the desired language to the end of a number (e.g. "123rd"),
* and URBNF_DURATION, which formats a duration in seconds as hours, minutes, and seconds.
* @param locale The locale for the formatter.
* @param status The status indicating whether the constructor succeeded.
* @draft
*/
RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale& locale, UErrorCode& status);
//-----------------------------------------------------------------------
// boilerplate
//-----------------------------------------------------------------------
/**
* Copy constructor
*/
RuleBasedNumberFormat(const RuleBasedNumberFormat& rhs);
/**
* Assignment operator
*/
RuleBasedNumberFormat& operator=(const RuleBasedNumberFormat& rhs);
/**
* Release memory allocated for a RuleBasedNumberFormat when you are finished with it.
*/
virtual ~RuleBasedNumberFormat();
/**
* Clone this object polymorphically. The caller is responsible
* for deleting the result when done.
*/
virtual Format* clone(void) const;
/**
* Return true if the given Format objects are semantically equal.
* Objects of different subclasses are considered unequal.
*/
virtual UBool operator==(const Format& other) const;
//-----------------------------------------------------------------------
// public API functions
//-----------------------------------------------------------------------
/**
* @return the rules that were provided to the RuleBasedNumberFormat.
* @return the result String that was passed in
* @draft
*/
virtual UnicodeString getRules() const;
/**
* Return the name of the index'th public ruleSet. If index is not valid,
* the function returns null.
* @param index the index of the ruleset
* @return the name of the index'th public ruleSet.
* @draft
*/
virtual UnicodeString getRuleSetName(int32_t index) const;
/**
* Return the number of public rule set names.
* @return the number of public rule set names.
* @draft
*/
virtual int32_t getNumberOfRuleSetNames() const;
/**
* Formats the specified number using the default ruleset.
* @param number The number to format.
* @param toAppendTo the string that will hold the (appended) result
* @param pos the fieldposition
* @return A textual representation of the number.
* @draft
*/
virtual UnicodeString& format(int32_t number,
UnicodeString& toAppendTo,
FieldPosition& pos) const;
/**
* Formats the specified number using the default ruleset.
* @param number The number to format.
* @param toAppendTo the string that will hold the (appended) result
* @param pos the fieldposition
* @return A textual representation of the number.
* @draft
*/
virtual UnicodeString& format(double number,
UnicodeString& toAppendTo,
FieldPosition& pos) const;
/**
* Formats the specified number using the default ruleset.
* @param number The number to format.
* @param ruleSetName The name of the rule set to format the number with.
* This must be the name of a valid public rule set for this formatter.
* @param toAppendTo the string that will hold the (appended) result
* @param pos the fieldposition
* @param status the status
* @return A textual representation of the number.
* @draft
*/
virtual UnicodeString& format(int32_t number,
const UnicodeString& ruleSetName,
UnicodeString& toAppendTo,
FieldPosition& pos,
UErrorCode& status) const;
/**
* Formats the specified number using the default ruleset.
* @param number The number to format.
* @param ruleSetName The name of the rule set to format the number with.
* This must be the name of a valid public rule set for this formatter.
* @param toAppendTo the string that will hold the (appended) result
* @param pos the fieldposition
* @param status the status
* @return A textual representation of the number.
* @draft
*/
virtual UnicodeString& format(double number,
const UnicodeString& ruleSetName,
UnicodeString& toAppendTo,
FieldPosition& pos,
UErrorCode& status) const;
/**
* Formats the specified number using the default ruleset.
* @param obj The number to format.
* @param toAppendTo the string that will hold the (appended) result
* @param pos the fieldposition
* @param status the status
* @return A textual representation of the number.
* @draft
*/
virtual UnicodeString& format(const Formattable& obj,
UnicodeString& toAppendTo,
FieldPosition& pos,
UErrorCode& status) const;
/**
* Redeclared Format method.
* @stable
*/
UnicodeString& format(const Formattable& obj,
UnicodeString& result,
UErrorCode& status) const;
/**
* Redeclared NumberFormat method.
* @stable
*/
UnicodeString& format(double number,
UnicodeString& output) const;
/**
* Redeclared NumberFormat method.
* @stable
*/
UnicodeString& format(int32_t number,
UnicodeString& output) const;
/**
* Parses the specfied string, beginning at the specified position, according
* to this formatter's rules. This will match the string against all of the
* formatter's public rule sets and return the value corresponding to the longest
* parseable substring. This function's behavior is affected by the lenient
* parse mode.
* @param text The string to parse
* @param result the result of the parse, either a double or a long.
* @param parsePosition On entry, contains the position of the first character
* in "text" to examine. On exit, has been updated to contain the position
* of the first character in "text" that wasn't consumed by the parse.
* @see #setLenientParseMode
* @draft
*/
virtual void parse(const UnicodeString& text,
Formattable& result,
ParsePosition& parsePosition) const;
/**
* Redeclared Format method.
* @stable
*/
virtual inline void parse(const UnicodeString& text,
Formattable& result,
UErrorCode& status) const;
/**
* Turns lenient parse mode on and off.
*
* When in lenient parse mode, the formatter uses a Collator for parsing the text.
* Only primary differences are treated as significant. This means that case
* differences, accent differences, alternate spellings of the same letter
* (e.g., ae and a-umlaut in German), ignorable characters, etc. are ignored in
* matching the text. In many cases, numerals will be accepted in place of words
* or phrases as well.
*
* For example, all of the following will correctly parse as 255 in English in
* lenient-parse mode:
* <br>"two hundred fifty-five"
* <br>"two hundred fifty five"
* <br>"TWO HUNDRED FIFTY-FIVE"
* <br>"twohundredfiftyfive"
* <br>"2 hundred fifty-5"
*
* The Collator used is determined by the locale that was
* passed to this object on construction. The description passed to this object
* on construction may supply additional collation rules that are appended to the
* end of the default collator for the locale, enabling additional equivalences
* (such as adding more ignorable characters or permitting spelled-out version of
* symbols; see the demo program for examples).
*
* It's important to emphasize that even strict parsing is relatively lenient: it
* will accept some text that it won't produce as output. In English, for example,
* it will correctly parse "two hundred zero" and "fifteen hundred".
*
* @param enabled If true, turns lenient-parse mode on; if false, turns it off.
* @see RuleBasedCollator
* @draft
*/
virtual void setLenient(UBool enabled);
/**
* Returns true if lenient-parse mode is turned on. Lenient parsing is off
* by default.
* @return true if lenient-parse mode is turned on.
* @see #setLenientParseMode
* @draft
*/
virtual inline UBool isLenient(void) const;
private:
void init(const UnicodeString& rules, UParseError& perror, UErrorCode& status);
void dispose();
void stripWhitespace(UnicodeString& src);
void setDefaultRuleSet();
void format(double number, NFRuleSet& ruleSet);
NFRuleSet* findRuleSet(const UnicodeString& name, UErrorCode& status) const;
/* friend access */
friend class NFSubstitution;
friend class NFRule;
friend class FractionalPartSubstitution;
inline NFRuleSet * getDefaultRuleSet() const;
Collator * getCollator() const;
DecimalFormatSymbols * getDecimalFormatSymbols() const;
private:
static const char fgClassID;
public:
static UClassID getStaticClassID(void) { return (UClassID)&fgClassID; }
virtual UClassID getDynamicClassID(void) const { return getStaticClassID(); }
private:
NFRuleSet **ruleSets;
NFRuleSet *defaultRuleSet;
Locale locale;
Collator* collator;
DecimalFormatSymbols* decimalFormatSymbols;
UBool lenient;
UnicodeString* lenientParseRules;
};
// ---------------
inline UnicodeString&
RuleBasedNumberFormat::format(const Formattable& obj,
UnicodeString& result,
UErrorCode& status) const {
// Don't use Format:: - use immediate base class only,
// in case immediate base modifies behavior later.
// dlf - the above comment is bogus, if there were a reason to modify
// it, it would be virtual, and there's no reason because it is
// a one-line macro in NumberFormat anyway, just like this one.
return NumberFormat::format(obj, result, status);
}
inline UnicodeString&
RuleBasedNumberFormat::format(double number, UnicodeString& output) const {
FieldPosition pos(0);
return format(number, output, pos);
}
inline UnicodeString&
RuleBasedNumberFormat::format(int32_t number, UnicodeString& output) const {
FieldPosition pos(0);
return format(number, output, pos);
}
inline void
RuleBasedNumberFormat::parse(const UnicodeString& text, Formattable& result, UErrorCode& status) const {
NumberFormat::parse(text, result, status);
}
inline UBool
RuleBasedNumberFormat::isLenient(void) const {
return lenient;
}
inline NFRuleSet*
RuleBasedNumberFormat::getDefaultRuleSet() const {
return defaultRuleSet;
}
U_NAMESPACE_END
/* RBNF_H */
#endif

View File

@ -48,7 +48,8 @@ tsmthred.o tsmutex.o tsnmfmt.o tsputil.o tstnorm.o tzbdtest.o \
tzregts.o tztest.o ucdtest.o usettest.o ustrtest.o transtst.o strtest.o thcoll.o \
itrbbi.o rbbiapts.o rbbitst.o ittrans.o transapi.o cpdtrtst.o unhxtrts.o hxuntrts.o \
ufltlgts.o testutil.o transrt.o normconf.o sfwdchit.o indictrn.o\
jamotest.o srchtest.o
jamotest.o srchtest.o \
itrbnf.o itrbnfrt.o
DEPS = $(OBJECTS:.o=.d)

View File

@ -217,6 +217,14 @@ SOURCE=.\itrbbi.cpp
# End Source File
# Begin Source File
SOURCE=.\itrbnf.cpp
# End Source File
# Begin Source File
SOURCE=.\itrbnfrt.cpp
# End Source File
# Begin Source File
SOURCE=.\ittrans.cpp
# End Source File
# Begin Source File
@ -545,6 +553,14 @@ SOURCE=.\itrbbi.h
# End Source File
# Begin Source File
SOURCE=.\itrbnf.h
# End Source File
# Begin Source File
SOURCE=.\itrbnfrt.h
# End Source File
# Begin Source File
SOURCE=.\ittrans.h
# End Source File
# Begin Source File

View File

@ -24,6 +24,8 @@
#include "itconv.h"
#include "ittrans.h"
#include "itrbbi.h"
#include "itrbnf.h"
#include "itrbnfrt.h"
#include "normconf.h"
#include "tstnorm.h"
@ -102,6 +104,20 @@ void MajorTestLevel::runIndexedTest( int32_t index, UBool exec, const char* &nam
callTest( test, par );
}
break;
case 8: name = "rbnf";
if (exec) {
logln("TestSuite RuleBasedNumberFormat----"); logln();
IntlTestRBNF test;
callTest(test, par);
}
break;
case 9: name = "rbnfrt";
if (exec) {
logln("TestSuite RuleBasedNumberFormat RT----"); logln();
RbnfRoundTripTest test;
callTest(test, par);
}
break;
default: name = ""; break;
}

View File

@ -0,0 +1,618 @@
/*
*******************************************************************************
* Copyright (C) 1996-2000, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*/
#include "itrbnf.h"
#include "unicode/tblcoll.h"
#include "unicode/coleitr.h"
// import com.ibm.text.RuleBasedNumberFormat;
// import com.ibm.test.TestFmwk;
// import java.util.Locale;
// import java.text.NumberFormat;
// current macro not in icu1.8.1
#define TESTCASE(id,test) \
case id: \
name = #test; \
if (exec) { \
logln(#test "---"); \
logln((UnicodeString)""); \
test(); \
} \
break
void IntlTestRBNF::runIndexedTest(int32_t index, UBool exec, const char* &name, char* par)
{
if (exec) logln("TestSuite RuleBasedNumberFormat");
switch (index) {
TESTCASE(0, TestEnglishSpellout);
TESTCASE(1, TestOrdinalAbbreviations);
TESTCASE(2, TestDurations);
TESTCASE(3, TestSpanishSpellout);
TESTCASE(4, TestFrenchSpellout);
TESTCASE(5, TestSwissFrenchSpellout);
TESTCASE(6, TestItalianSpellout);
TESTCASE(7, TestGermanSpellout);
TESTCASE(8, TestThaiSpellout);
default:
name = "";
break;
}
}
void
IntlTestRBNF::TestEnglishSpellout()
{
#if 0
// temporary test code
{
int32_t result = 0;
UErrorCode status = U_ZERO_ERROR;
Collator* temp = Collator::createInstance(Locale::US, status);
if (U_SUCCESS(status) &&
temp->getDynamicClassID() == RuleBasedCollator::getStaticClassID()) {
RuleBasedCollator* collator = (RuleBasedCollator*)temp;
UnicodeString rules(collator->getRules());
UnicodeString tailoring("&'\\u0000' << ' ' << '-'\n");
tailoring = tailoring.unescape();
rules.append(tailoring);
collator = new RuleBasedCollator(rules, status);
if (U_SUCCESS(status)) {
collator->setDecomposition(Normalizer::DECOMP);
UnicodeString prefix(" hundred");
UnicodeString str("hundred-fifty");
CollationElementIterator* strIter = collator->createCollationElementIterator(str);
CollationElementIterator* prefixIter = collator->createCollationElementIterator(prefix);
// match collation elements between the strings
int32_t oStr = strIter->next(status);
int32_t oPrefix = prefixIter->next(status);
while (oPrefix != CollationElementIterator::NULLORDER) {
// skip over ignorable characters in the target string
while (CollationElementIterator::primaryOrder(oStr) == 0
&& oStr != CollationElementIterator::NULLORDER) {
oStr = strIter->next(status);
}
// skip over ignorable characters in the prefix
while (CollationElementIterator::primaryOrder(oPrefix) == 0
&& oPrefix != CollationElementIterator::NULLORDER) {
oPrefix = prefixIter->next(status);
}
// if skipping over ignorables brought us to the end
// of the target string, we didn't match and return 0
if (oStr == CollationElementIterator::NULLORDER) {
result = -1;
break;
}
// if skipping over ignorables brought to the end of
// the prefix, we DID match: drop out of the loop
else if (oPrefix == CollationElementIterator::NULLORDER) {
break;
}
// match collation elements from the two strings
// (considering only primary differences). If we
// get a mismatch, dump out and return 0
if (CollationElementIterator::primaryOrder(oStr)
!= CollationElementIterator::primaryOrder(oPrefix)) {
result = -1;
break;
// otherwise, advance to the next character in each string
// and loop (we drop out of the loop when we exhaust
// collation elements in the prefix)
} else {
oStr = strIter->next(status);
oPrefix = prefixIter->next(status);
}
}
if (result == 0) {
result = strIter->getOffset();
}
delete prefixIter;
delete strIter;
}
delete collator;
}
delete temp;
printf("result: %d\n", result);
}
#endif
UErrorCode status = U_ZERO_ERROR;
RuleBasedNumberFormat* formatter
= new RuleBasedNumberFormat(URBNF_SPELLOUT, Locale::US, status);
if (U_FAILURE(status)) {
errln("FAIL: could not construct formatter");
} else {
const char* testData[][2] = {
{ "1", "one" },
{ "2", "two" },
{ "15", "fifteen" },
{ "20", "twenty" },
{ "23", "twenty-three" },
{ "73", "seventy-three" },
{ "88", "eighty-eight" },
{ "100", "one hundred" },
{ "106", "one hundred and six" },
{ "127", "one hundred and twenty-seven" },
{ "200", "two hundred" },
{ "579", "five hundred and seventy-nine" },
{ "1,000", "one thousand" },
{ "2,000", "two thousand" },
{ "3,004", "three thousand and four" },
{ "4,567", "four thousand five hundred and sixty-seven" },
{ "15,943", "fifteen thousand nine hundred and forty-three" },
{ "2,345,678", "two million, three hundred and forty-five thousand, six hundred and seventy-eight" },
{ "-36", "minus thirty-six" },
{ "234.567", "two hundred and thirty-four point five six seven" },
NULL
};
doTest(formatter, testData, TRUE);
formatter->setLenient(TRUE);
const char* lpTestData[][2] = {
{ "2 thousand six HUNDRED fifty-7", "2,657" },
{ "fifteen hundred and zero", "1,500" },
{ "FOurhundred thiRTY six", "436" },
NULL
};
doLenientParseTest(formatter, lpTestData);
}
}
void
IntlTestRBNF::TestOrdinalAbbreviations()
{
UErrorCode status = U_ZERO_ERROR;
RuleBasedNumberFormat* formatter
= new RuleBasedNumberFormat(URBNF_ORDINAL, Locale::US, status);
if (U_FAILURE(status)) {
errln("FAIL: could not construct formatter");
} else {
const char* testData[][2] = {
{ "1", "1st" },
{ "2", "2nd" },
{ "3", "3rd" },
{ "4", "4th" },
{ "7", "7th" },
{ "10", "10th" },
{ "11", "11th" },
{ "13", "13th" },
{ "20", "20th" },
{ "21", "21st" },
{ "22", "22nd" },
{ "23", "23rd" },
{ "24", "24th" },
{ "33", "33rd" },
{ "102", "102nd" },
{ "312", "312th" },
{ "12,345", "12,345th" },
NULL
};
doTest(formatter, testData, FALSE);
}
}
void
IntlTestRBNF::TestDurations()
{
UErrorCode status = U_ZERO_ERROR;
RuleBasedNumberFormat* formatter
= new RuleBasedNumberFormat(URBNF_DURATION, Locale::US, status);
if (U_FAILURE(status)) {
errln("FAIL: could not construct formatter");
} else {
const char* testData[][2] = {
{ "3,600", "1:00:00" }, //move me and I fail
{ "0", "0 sec." },
{ "1", "1 sec." },
{ "24", "24 sec." },
{ "60", "1:00" },
{ "73", "1:13" },
{ "145", "2:25" },
{ "666", "11:06" },
// { "3,600", "1:00:00" },
{ "3,740", "1:02:20" },
{ "10,293", "2:51:33" },
NULL
};
doTest(formatter, testData, TRUE);
formatter->setLenient(TRUE);
const char* lpTestData[][2] = {
{ "2-51-33", "10,293" },
NULL
};
doLenientParseTest(formatter, lpTestData);
}
}
void
IntlTestRBNF::TestSpanishSpellout()
{
UErrorCode status = U_ZERO_ERROR;
RuleBasedNumberFormat* formatter
= new RuleBasedNumberFormat(URBNF_SPELLOUT, Locale("es", "ES", ""), status);
if (U_FAILURE(status)) {
errln("FAIL: could not construct formatter");
} else {
const char* testData[][2] = {
{ "1", "uno" },
{ "6", "seis" },
{ "16", "diecis\\u00e9is" },
{ "20", "veinte" },
{ "24", "veinticuatro" },
{ "26", "veintis\\u00e9is" },
{ "73", "setenta y tres" },
{ "88", "ochenta y ocho" },
{ "100", "cien" },
{ "106", "ciento seis" },
{ "127", "ciento veintisiete" },
{ "200", "doscientos" },
{ "579", "quinientos setenta y nueve" },
{ "1,000", "mil" },
{ "2,000", "dos mil" },
{ "3,004", "tres mil cuatro" },
{ "4,567", "cuatro mil quinientos sesenta y siete" },
{ "15,943", "quince mil novecientos cuarenta y tres" },
{ "2,345,678", "dos mill\\u00f3n trescientos cuarenta y cinco mil seiscientos setenta y ocho"},
{ "-36", "menos treinta y seis" },
{ "234.567", "doscientos treinta y cuatro punto cinco seis siete" },
NULL
};
doTest(formatter, testData, TRUE);
}
}
void
IntlTestRBNF::TestFrenchSpellout()
{
UErrorCode status = U_ZERO_ERROR;
RuleBasedNumberFormat* formatter
= new RuleBasedNumberFormat(URBNF_SPELLOUT, Locale::FRANCE, status);
if (U_FAILURE(status)) {
errln("FAIL: could not construct formatter");
} else {
const char* testData[][2] = {
{ "1", "un" },
{ "15", "quinze" },
{ "20", "vingt" },
{ "21", "vingt-et-un" },
{ "23", "vingt-trois" },
{ "62", "soixante-deux" },
{ "70", "soixante-dix" },
{ "71", "soixante et onze" },
{ "73", "soixante-treize" },
{ "80", "quatre-vingts" },
{ "88", "quatre-vingt-huit" },
{ "100", "cent" },
{ "106", "cent six" },
{ "127", "cent vingt-sept" },
{ "200", "deux cents" },
{ "579", "cinq cents soixante-dix-neuf" },
{ "1,000", "mille" },
{ "1,123", "onze cents vingt-trois" },
{ "1,594", "mille cinq cents quatre-vingt-quatorze" },
{ "2,000", "deux mille" },
{ "3,004", "trois mille quatre" },
{ "4,567", "quatre mille cinq cents soixante-sept" },
{ "15,943", "quinze mille neuf cents quarante-trois" },
{ "2,345,678", "deux million trois cents quarante-cinq mille six cents soixante-dix-huit" },
{ "-36", "moins trente-six" },
{ "234.567", "deux cents trente-quatre virgule cinq six sept" },
NULL
};
doTest(formatter, testData, TRUE);
formatter->setLenient(TRUE);
const char* lpTestData[][2] = {
{ "trente-un", "31" },
{ "un cents quatre vingt dix huit", "198" },
NULL
};
doLenientParseTest(formatter, lpTestData);
}
}
void
IntlTestRBNF::TestSwissFrenchSpellout()
{
UErrorCode status = U_ZERO_ERROR;
RuleBasedNumberFormat* formatter
= new RuleBasedNumberFormat(URBNF_SPELLOUT, Locale("fr", "CH", ""), status);
if (U_FAILURE(status)) {
errln("FAIL: could not construct formatter");
} else {
const char* testData[][2] = {
{ "1", "un" },
{ "15", "quinze" },
{ "20", "vingt" },
{ "21", "vingt-et-un" },
{ "23", "vingt-trois" },
{ "62", "soixante-deux" },
{ "70", "septante" },
{ "71", "septante-et-un" },
{ "73", "septante-trois" },
{ "80", "octante" },
{ "88", "octante-huit" },
{ "100", "cent" },
{ "106", "cent six" },
{ "127", "cent vingt-sept" },
{ "200", "deux cents" },
{ "579", "cinq cents septante-neuf" },
{ "1,000", "mille" },
{ "1,123", "onze cents vingt-trois" },
{ "1,594", "mille cinq cents nonante-quatre" },
{ "2,000", "deux mille" },
{ "3,004", "trois mille quatre" },
{ "4,567", "quatre mille cinq cents soixante-sept" },
{ "15,943", "quinze mille neuf cents quarante-trois" },
{ "2,345,678", "deux million trois cents quarante-cinq mille six cents septante-huit" },
{ "-36", "moins trente-six" },
{ "234.567", "deux cents trente-quatre virgule cinq six sept" },
NULL
};
doTest(formatter, testData, TRUE);
}
}
void
IntlTestRBNF::TestItalianSpellout()
{
UErrorCode status = U_ZERO_ERROR;
RuleBasedNumberFormat* formatter
= new RuleBasedNumberFormat(URBNF_SPELLOUT, Locale::ITALIAN, status);
if (U_FAILURE(status)) {
errln("FAIL: could not construct formatter");
} else {
const char* testData[][2] = {
{ "1", "uno" },
{ "15", "quindici" },
{ "20", "venti" },
{ "23", "ventitre" },
{ "73", "settantatre" },
{ "88", "ottantotto" },
{ "100", "cento" },
{ "106", "centosei" },
{ "108", "centotto" },
{ "127", "centoventisette" },
{ "181", "centottantuno" },
{ "200", "duecento" },
{ "579", "cinquecentosettantanove" },
{ "1,000", "mille" },
{ "2,000", "duemila" },
{ "3,004", "tremilaquattro" },
{ "4,567", "quattromilacinquecentosessantasette" },
{ "15,943", "quindicimilanovecentoquarantatre" },
{ "-36", "meno trentisei" },
{ "234.567", "duecentotrentiquattro virgola cinque sei sette" },
NULL
};
doTest(formatter, testData, TRUE);
}
}
void
IntlTestRBNF::TestGermanSpellout()
{
UErrorCode status = U_ZERO_ERROR;
RuleBasedNumberFormat* formatter
= new RuleBasedNumberFormat(URBNF_SPELLOUT, Locale::GERMANY, status);
if (U_FAILURE(status)) {
errln("FAIL: could not construct formatter");
} else {
const char* testData[][2] = {
{ "1", "eins" },
{ "15", "f\\u00fcnfzehn" },
{ "20", "zwanzig" },
{ "23", "dreiundzwanzig" },
{ "73", "dreiundsiebzig" },
{ "88", "achtundachtzig" },
{ "100", "hundert" },
{ "106", "hundertsechs" },
{ "127", "hundertsiebenundzwanzig" },
{ "200", "zweihundert" },
{ "579", "f\\u00fcnfhundertneunundsiebzig" },
{ "1,000", "tausend" },
{ "2,000", "zweitausend" },
{ "3,004", "dreitausendvier" },
{ "4,567", "viertausendf\\u00fcnfhundertsiebenundsechzig" },
{ "15,943", "f\\u00fcnfzehntausendneunhundertdreiundvierzig" },
{ "2,345,678", "zwei Millionen dreihundertf\\u00fcnfundvierzigtausendsechshundertachtundsiebzig" },
NULL
};
doTest(formatter, testData, TRUE);
formatter->setLenient(TRUE);
const char* lpTestData[][2] = {
{ "ein Tausend sechs Hundert fuenfunddreissig", "1,635" },
NULL
};
doLenientParseTest(formatter, lpTestData);
}
}
void
IntlTestRBNF::TestThaiSpellout()
{
UErrorCode status = U_ZERO_ERROR;
RuleBasedNumberFormat* formatter
= new RuleBasedNumberFormat(URBNF_SPELLOUT, Locale("th"), status);
if (U_FAILURE(status)) {
errln("FAIL: could not construct formatter");
} else {
const char* testData[][2] = {
{ "0", "\\u0e28\\u0e39\\u0e19\\u0e22\\u0e4c" },
{ "1", "\\u0e2b\\u0e19\\u0e36\\u0e48\\u0e07" },
{ "10", "\\u0e2a\\u0e34\\u0e1a" },
{ "11", "\\u0e2a\\u0e34\\u0e1a\\u0e40\\u0e2d\\u0e47\\u0e14" },
{ "21", "\\u0e22\\u0e35\\u0e48\\u0e2a\\u0e34\\u0e1a\\u0e40\\u0e2d\\u0e47\\u0e14" },
{ "101", "\\u0e2b\\u0e19\\u0e36\\u0e48\\u0e07\\u0e23\\u0e49\\u0e2d\\u0e22\\u0e2b\\u0e19\\u0e36\\u0e48\\u0e07" },
{ "1.234", "\\u0e2b\\u0e19\\u0e36\\u0e48\\u0e07\\u0e08\\u0e38\\u0e14\\u0e2a\\u0e2d\\u0e07\\u0e2a\\u0e32\\u0e21\\u0e2a\\u0e35\\u0e48" },
NULL
};
doTest(formatter, testData, TRUE);
}
}
void
IntlTestRBNF::doTest(RuleBasedNumberFormat* formatter, const char* testData[][2], UBool testParsing)
{
// man, error reporting would be easier with printf-style syntax for unicode string and formattable
UErrorCode status = U_ZERO_ERROR;
NumberFormat* decFmt = NumberFormat::createInstance(Locale::US, status);
if (U_FAILURE(status)) {
errln("FAIL: could not create NumberFormat");
} else {
for (int i = 0; testData[i][0]; ++i) {
const char* numString = testData[i][0];
const char* expectedWords = testData[i][1];
Formattable expectedNumber;
decFmt->parse(numString, expectedNumber, status);
if (U_FAILURE(status)) {
errln("FAIL: decFmt could not parse %s", numString);
break;
} else {
UnicodeString actualString;
FieldPosition pos;
formatter->format(expectedNumber, actualString/* , pos*/, status);
if (U_FAILURE(status)) {
UnicodeString msg = "Fail: formatter could not format ";
decFmt->format(expectedNumber, msg, status);
errln(msg);
break;
} else {
UnicodeString expectedString = UnicodeString(expectedWords).unescape();
if (actualString != expectedString) {
UnicodeString msg = "FAIL: check failed for ";
decFmt->format(expectedNumber, msg, status);
msg.append(", expected ");
msg.append(expectedString);
msg.append(" but got ");
msg.append(actualString);
errln(msg);
break;
} else if (testParsing) {
Formattable parsedNumber;
formatter->parse(actualString, parsedNumber, status);
if (U_FAILURE(status)) {
UnicodeString msg = "FAIL: formatter could not parse ";
msg.append(actualString);
msg.append(" status code: " );
char buffer[32];
sprintf(buffer, "0x%x\0", status);
msg.append(buffer);
errln(msg);
break;
} else {
if (parsedNumber != expectedNumber) {
UnicodeString msg = "FAIL: parse failed for ";
msg.append(actualString);
msg.append(", expected ");
decFmt->format(expectedNumber, msg, status);
msg.append(", but got ");
decFmt->format(parsedNumber, msg, status);
errln(msg);
break;
}
}
}
}
}
}
delete decFmt;
}
}
void
IntlTestRBNF::doLenientParseTest(RuleBasedNumberFormat* formatter, const char* testData[][2])
{
UErrorCode status = U_ZERO_ERROR;
NumberFormat* decFmt = NumberFormat::createInstance(Locale::US, status);
if (U_FAILURE(status)) {
errln("FAIL: could not create NumberFormat");
} else {
for (int i = 0; testData[i][0]; ++i) {
const char* spelledNumber = testData[i][0]; // spelled-out number
const char* asciiUSNumber = testData[i][1]; // number as ascii digits formatted for US locale
UnicodeString spelledNumberString = UnicodeString(spelledNumber).unescape();
Formattable actualNumber;
formatter->parse(spelledNumberString, actualNumber, status);
if (U_FAILURE(status)) {
UnicodeString msg = "FAIL: formatter could not parse ";
msg.append(spelledNumberString);
errln(msg);
break;
} else {
// I changed the logic of this test somewhat from Java-- instead of comparing the
// strings, I compare the Formattables. Hmmm, but the Formattables don't compare,
// so change it back.
UnicodeString asciiUSNumberString = asciiUSNumber;
Formattable expectedNumber;
decFmt->parse(asciiUSNumberString, expectedNumber, status);
if (U_FAILURE(status)) {
UnicodeString msg = "FAIL: decFmt could not parse ";
msg.append(asciiUSNumberString);
errln(msg);
break;
} else {
UnicodeString actualNumberString;
UnicodeString expectedNumberString;
decFmt->format(actualNumber, actualNumberString, status);
decFmt->format(expectedNumber, expectedNumberString, status);
if (actualNumberString != expectedNumberString) {
UnicodeString msg = "FAIL: parsing";
msg.append(asciiUSNumberString);
msg.append("\n");
msg.append(" lenient parse failed for ");
msg.append(spelledNumberString);
msg.append(", expected ");
msg.append(expectedNumberString);
msg.append(", but got ");
msg.append(actualNumberString);
errln(msg);
break;
}
}
}
}
delete decFmt;
}
}

View File

@ -0,0 +1,74 @@
/*
*******************************************************************************
* Copyright (C) 1996-2000, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*/
#ifndef ITRBNF_H
#define ITRBNF_H
#include "intltest.h"
#include "unicode/utypes.h"
#include "unicode/rbnf.h"
class IntlTestRBNF : public IntlTest {
public:
// IntlTest override
virtual void runIndexedTest(int32_t index, UBool exec, const char* &name, char* par);
/**
* Perform a simple spot check on the English spellout rules
*/
virtual void TestEnglishSpellout();
/**
* Perform a simple spot check on the English ordinal-abbreviation rules
*/
virtual void TestOrdinalAbbreviations();
/**
* Perform a simple spot check on the duration-formatting rules
*/
virtual void TestDurations();
/**
* Perform a simple spot check on the Spanish spellout rules
*/
virtual void TestSpanishSpellout();
/**
* Perform a simple spot check on the French spellout rules
*/
virtual void TestFrenchSpellout();
/**
* Perform a simple spot check on the Swiss French spellout rules
*/
virtual void TestSwissFrenchSpellout();
/**
* Perform a simple spot check on the Italian spellout rules
*/
virtual void TestItalianSpellout();
/**
* Perform a simple spot check on the German spellout rules
*/
virtual void TestGermanSpellout();
/**
* Perform a simple spot check on the Thai spellout rules
*/
virtual void TestThaiSpellout();
protected:
virtual void doTest(RuleBasedNumberFormat* formatter, const char* testData[][2], UBool testParsing);
virtual void doLenientParseTest(RuleBasedNumberFormat* formatter, const char* testData[][2]);
};
// endif ITRBNF_H
#endif

View File

@ -0,0 +1,341 @@
/*
*******************************************************************************
* Copyright (C) 1996-2000, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*/
#include "itrbnfrt.h"
#include "unicode/fmtable.h"
#include "math.h" // fabs
// current macro not in icu1.8.1
#define TESTCASE(id,test) \
case id: \
name = #test; \
if (exec) { \
logln(#test "---"); \
logln((UnicodeString)""); \
test(); \
} \
break
void RbnfRoundTripTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* par)
{
if (exec) logln("TestSuite RuleBasedNumberFormatRT");
switch (index) {
TESTCASE(0, TestEnglishSpelloutRT);
TESTCASE(1, TestDurationsRT);
TESTCASE(2, TestSpanishSpelloutRT);
TESTCASE(3, TestFrenchSpelloutRT);
TESTCASE(4, TestSwissFrenchSpelloutRT);
TESTCASE(5, TestItalianSpelloutRT);
TESTCASE(6, TestGermanSpelloutRT);
TESTCASE(7, TestSwedishSpelloutRT);
TESTCASE(8, TestDutchSpelloutRT);
TESTCASE(9, TestJapaneseSpelloutRT);
TESTCASE(10, TestRussianSpelloutRT);
TESTCASE(11, TestGreekSpelloutRT);
default:
name = "";
break;
}
}
/**
* Perform an exhaustive round-trip test on the English spellout rules
*/
void
RbnfRoundTripTest::TestEnglishSpelloutRT()
{
UErrorCode status = U_ZERO_ERROR;
RuleBasedNumberFormat* formatter
= new RuleBasedNumberFormat(URBNF_SPELLOUT, Locale::US, status);
if (U_FAILURE(status)) {
errln("failed to construct formatter");
} else {
doTest(formatter, -12345678, 12345678);
}
delete formatter;
}
/**
* Perform an exhaustive round-trip test on the duration-formatting rules
*/
void
RbnfRoundTripTest::TestDurationsRT()
{
UErrorCode status = U_ZERO_ERROR;
RuleBasedNumberFormat* formatter
= new RuleBasedNumberFormat(URBNF_DURATION, Locale::US, status);
if (U_FAILURE(status)) {
errln("failed to construct formatter");
} else {
doTest(formatter, 0, 12345678);
}
delete formatter;
}
/**
* Perform an exhaustive round-trip test on the Spanish spellout rules
*/
void
RbnfRoundTripTest::TestSpanishSpelloutRT()
{
UErrorCode status = U_ZERO_ERROR;
RuleBasedNumberFormat* formatter
= new RuleBasedNumberFormat(URBNF_SPELLOUT, Locale("es", "es"), status);
if (U_FAILURE(status)) {
errln("failed to construct formatter");
} else {
doTest(formatter, -12345678, 12345678);
}
delete formatter;
}
/**
* Perform an exhaustive round-trip test on the French spellout rules
*/
void
RbnfRoundTripTest::TestFrenchSpelloutRT()
{
UErrorCode status = U_ZERO_ERROR;
RuleBasedNumberFormat* formatter
= new RuleBasedNumberFormat(URBNF_SPELLOUT, Locale::FRANCE, status);
if (U_FAILURE(status)) {
errln("failed to construct formatter");
} else {
doTest(formatter, -12345678, 12345678);
}
delete formatter;
}
/**
* Perform an exhaustive round-trip test on the Swiss French spellout rules
*/
void
RbnfRoundTripTest::TestSwissFrenchSpelloutRT()
{
UErrorCode status = U_ZERO_ERROR;
RuleBasedNumberFormat* formatter
= new RuleBasedNumberFormat(URBNF_SPELLOUT, Locale("fr", "CH"), status);
if (U_FAILURE(status)) {
errln("failed to construct formatter");
} else {
doTest(formatter, -12345678, 12345678);
}
delete formatter;
}
/**
* Perform an exhaustive round-trip test on the Italian spellout rules
*/
void
RbnfRoundTripTest::TestItalianSpelloutRT()
{
UErrorCode status = U_ZERO_ERROR;
RuleBasedNumberFormat* formatter
= new RuleBasedNumberFormat(URBNF_SPELLOUT, Locale::ITALIAN, status);
if (U_FAILURE(status)) {
errln("failed to construct formatter");
} else {
doTest(formatter, -999999, 999999);
}
delete formatter;
}
/**
* Perform an exhaustive round-trip test on the German spellout rules
*/
void
RbnfRoundTripTest::TestGermanSpelloutRT()
{
UErrorCode status = U_ZERO_ERROR;
RuleBasedNumberFormat* formatter
= new RuleBasedNumberFormat(URBNF_SPELLOUT, Locale::GERMANY, status);
if (U_FAILURE(status)) {
errln("failed to construct formatter");
} else {
doTest(formatter, 0, 12345678);
}
delete formatter;
}
/**
* Perform an exhaustive round-trip test on the Swedish spellout rules
*/
void
RbnfRoundTripTest::TestSwedishSpelloutRT()
{
UErrorCode status = U_ZERO_ERROR;
RuleBasedNumberFormat* formatter
= new RuleBasedNumberFormat(URBNF_SPELLOUT, Locale("sv", "SE"), status);
if (U_FAILURE(status)) {
errln("failed to construct formatter");
} else {
doTest(formatter, 0, 12345678);
}
delete formatter;
}
/**
* Perform an exhaustive round-trip test on the Dutch spellout rules
*/
void
RbnfRoundTripTest::TestDutchSpelloutRT()
{
UErrorCode status = U_ZERO_ERROR;
RuleBasedNumberFormat* formatter
= new RuleBasedNumberFormat(URBNF_SPELLOUT, Locale("nl", "NL"), status);
if (U_FAILURE(status)) {
errln("failed to construct formatter");
} else {
doTest(formatter, -12345678, 12345678);
}
delete formatter;
}
/**
* Perform an exhaustive round-trip test on the Japanese spellout rules
*/
void
RbnfRoundTripTest::TestJapaneseSpelloutRT()
{
UErrorCode status = U_ZERO_ERROR;
RuleBasedNumberFormat* formatter
= new RuleBasedNumberFormat(URBNF_SPELLOUT, Locale::JAPAN, status);
if (U_FAILURE(status)) {
errln("failed to construct formatter");
} else {
doTest(formatter, 0, 12345678);
}
delete formatter;
}
/**
* Perform an exhaustive round-trip test on the Russian spellout rules
*/
void
RbnfRoundTripTest::TestRussianSpelloutRT()
{
UErrorCode status = U_ZERO_ERROR;
RuleBasedNumberFormat* formatter
= new RuleBasedNumberFormat(URBNF_SPELLOUT, Locale("ru", "RU"), status);
if (U_FAILURE(status)) {
errln("failed to construct formatter");
} else {
doTest(formatter, 0, 12345678);
}
delete formatter;
}
/**
* Perform an exhaustive round-trip test on the Greek spellout rules
*/
void
RbnfRoundTripTest::TestGreekSpelloutRT()
{
UErrorCode status = U_ZERO_ERROR;
RuleBasedNumberFormat* formatter
= new RuleBasedNumberFormat(URBNF_SPELLOUT, Locale("el", "GR"), status);
if (U_FAILURE(status)) {
errln("failed to construct formatter");
} else {
doTest(formatter, 0, 12345678);
}
delete formatter;
}
void
RbnfRoundTripTest::doTest(const RuleBasedNumberFormat* formatter,
double lowLimit,
double highLimit)
{
char buf[128];
uint32_t count = 0;
double increment = 1;
for (double i = lowLimit; i <= highLimit; i += increment) {
if (count % 1000 == 0) {
sprintf(buf, "%.12g", i);
logln(buf);
}
if (fabs(i) < 5000)
increment = 1;
else if (fabs(i) < 500000)
increment = 2737;
else
increment = 267437;
UnicodeString formatResult;
formatter->format(i, formatResult);
UErrorCode status = U_ZERO_ERROR;
Formattable parseResult;
formatter->parse(formatResult, parseResult, status);
if (U_FAILURE(status)) {
sprintf(buf, "Round-trip status failure: %.12g, status: %d", i, status);
errln(buf);
return;
} else {
double rt = (parseResult.getType() == Formattable::kDouble) ?
parseResult.getDouble() :
(double)parseResult.getLong();
if (rt != i) {
sprintf(buf, "Round-trip failed: %.12g -> %.12g", i, rt);
errln(buf);
return;
}
}
++count;
}
if (lowLimit < 0) {
double d = 1.234;
while (d < 1000) {
UnicodeString formatResult;
formatter->format(d, formatResult);
UErrorCode status = U_ZERO_ERROR;
Formattable parseResult;
formatter->parse(formatResult, parseResult, status);
if (U_FAILURE(status)) {
sprintf(buf, "Round-trip status failure: %.12g, status: %d", d, status);
errln(buf);
return;
} else {
double rt = (parseResult.getType() == Formattable::kDouble) ?
parseResult.getDouble() :
(double)parseResult.getLong();
if (rt != d) {
UnicodeString msg;
sprintf(buf, "Round-trip failed: %.12g -> ", d);
msg.append(buf);
msg.append(formatResult);
sprintf(buf, " -> %.12g", rt);
msg.append(buf);
errln(msg);
return;
}
}
d *= 10;
}
}
}

View File

@ -0,0 +1,86 @@
/*
*******************************************************************************
* Copyright (C) 1996-2000, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*/
#ifndef ITRBNFRT_H
#define ITRBNFRT_H
#include "intltest.h"
#include "unicode/utypes.h"
#include "unicode/rbnf.h"
class RbnfRoundTripTest : public IntlTest {
// IntlTest override
virtual void runIndexedTest(int32_t index, UBool exec, const char* &name, char* par);
/**
* Perform an exhaustive round-trip test on the English spellout rules
*/
virtual void TestEnglishSpelloutRT();
/**
* Perform an exhaustive round-trip test on the duration-formatting rules
*/
virtual void TestDurationsRT();
/**
* Perform an exhaustive round-trip test on the Spanish spellout rules
*/
virtual void TestSpanishSpelloutRT();
/**
* Perform an exhaustive round-trip test on the French spellout rules
*/
virtual void TestFrenchSpelloutRT();
/**
* Perform an exhaustive round-trip test on the Swiss French spellout rules
*/
virtual void TestSwissFrenchSpelloutRT();
/**
* Perform an exhaustive round-trip test on the Italian spellout rules
*/
virtual void TestItalianSpelloutRT();
/**
* Perform an exhaustive round-trip test on the German spellout rules
*/
virtual void TestGermanSpelloutRT();
/**
* Perform an exhaustive round-trip test on the Swedish spellout rules
*/
virtual void TestSwedishSpelloutRT();
/**
* Perform an exhaustive round-trip test on the Dutch spellout rules
*/
virtual void TestDutchSpelloutRT();
/**
* Perform an exhaustive round-trip test on the Japanese spellout rules
*/
virtual void TestJapaneseSpelloutRT();
/**
* Perform an exhaustive round-trip test on the Russian spellout rules
*/
virtual void TestRussianSpelloutRT();
/**
* Perform an exhaustive round-trip test on the Greek spellout rules
*/
virtual void TestGreekSpelloutRT();
protected:
void doTest(const RuleBasedNumberFormat* formatter, double lowLimit, double highLimit);
};
// endif ITRBNFRT_H
#endif