ICU-1271 Port RuleBasedNumberFormat to ICU
X-SVN-Rev: 6172
This commit is contained in:
parent
00ac42f02f
commit
7258dac736
@ -519,4 +519,58 @@ de {
|
||||
"Latf", // ISO 15924 Name
|
||||
"Latg", // ISO 15924 Name
|
||||
}
|
||||
|
||||
//------------------------------------------------------------
|
||||
// Rule Based Number Format Support
|
||||
//------------------------------------------------------------
|
||||
|
||||
|
||||
// * RuleBasedNumberFormat data for German
|
||||
|
||||
// again, I'm not 100% sure of these rules. I think both "hundert" and
|
||||
// "einhundert" are correct or 100, but I'm not sure which is preferable
|
||||
// in situations where this framework is likely to be used. Also, is it
|
||||
// really true that numbers are run together into compound words all the
|
||||
// time?
|
||||
|
||||
SpelloutRules {
|
||||
// 1 is "eins" when by itself, but turns into "ein" in most
|
||||
// combinations
|
||||
"%alt-ones:\n"
|
||||
" -x: minus >>;\n"
|
||||
" x.x: << komma >>;\n"
|
||||
" null; eins; =%%main=;\n"
|
||||
"%%main:\n"
|
||||
// words for numbers from 0 to 12. Notice that the values
|
||||
// from 13 to 19 can derived algorithmically, unlike in most
|
||||
// other languages
|
||||
" null; ein; zwei; drei; vier; f\u00fcnf; sechs; sieben; acht; neun;\n"
|
||||
" zehn; elf; zw\u00f6lf; >>zehn;\n"
|
||||
// rules for the multiples of 10. Notice that the ones digit
|
||||
// goes on the front
|
||||
" 20: [>>und]zwanzig;\n"
|
||||
" 30: [>>und]drei\u00dfig;\n"
|
||||
" 40: [>>und]vierzig;\n"
|
||||
" 50: [>>und]f\u00fcnfzig;\n"
|
||||
" 60: [>>und]sechzig;\n"
|
||||
" 70: [>>und]siebzig;\n"
|
||||
" 80: [>>und]achtzig;\n"
|
||||
" 90: [>>und]neunzig;\n"
|
||||
" 100: hundert[>%alt-ones>];\n"
|
||||
" 200: <<hundert[>%alt-ones>];\n"
|
||||
" 1000: tausend[>%alt-ones>];\n"
|
||||
" 2000: <<tausend[>%alt-ones>];\n"
|
||||
" 1,000,000: eine Million[ >%alt-ones>];\n"
|
||||
" 2,000,000: << Millionen[ >%alt-ones>];\n"
|
||||
" 1,000,000,000: eine Milliarde[ >%alt-ones>];\n"
|
||||
" 2,000,000,000: << Milliarden[ >%alt-ones>];\n"
|
||||
" 1,000,000,000,000: eine Billion[ >%alt-ones>];\n"
|
||||
" 2,000,000,000,000: << Billionen[ >%alt-ones>];\n"
|
||||
" 1,000,000,000,000,000: =#,##0=;"
|
||||
"%%lenient-parse:\n"
|
||||
" &\u0000 << ' ' << '-'\n"
|
||||
" & ae , \u00e4 & ae , \u00c4\n"
|
||||
" & oe , \u00f6 & oe , \u00d6\n"
|
||||
" & ue , \u00fc & ue , \u00dc\n"
|
||||
}
|
||||
}
|
||||
|
@ -116,4 +116,53 @@ el {
|
||||
"Greek",// Script Name
|
||||
"Grek" // ISO 15924 Name
|
||||
}
|
||||
|
||||
//------------------------------------------------------------
|
||||
// Rule Based Number Format Support
|
||||
//------------------------------------------------------------
|
||||
|
||||
// * Spellout rules for Greek. Again in Greek we have to supply the words
|
||||
// * for the multiples of 100 because they can't be derived algorithmically.
|
||||
// * Also, the tens dgit changes form when followed by a ones digit: an
|
||||
// * accent mark disappears from the tens digit and moves to the ones digit.
|
||||
// * Therefore, instead of using the [] notation, we actually have to use
|
||||
// * two separate rules for each multiple of 10 to show the two forms of
|
||||
// * the word.
|
||||
|
||||
// Can someone supply me with information on negatives and decimals?
|
||||
// I'm also missing the word for zero. Can someone clue me in?
|
||||
|
||||
SpelloutRules {
|
||||
"zero (incomplete data); \u03ad\u03bd\u03b1; \u03b4\u03cd\u03bf; \u03b4\u03c1\u03af\u03b1; "
|
||||
"\u03c4\u03ad\u03c3\u03c3\u03b5\u03c1\u03b1; \u03c0\u03ad\u03bd\u03c4\u03b5; "
|
||||
"\u03ad\u03be\u03b9; \u03b5\u03c0\u03c4\u03ac; \u03bf\u03ba\u03c4\u03ce; "
|
||||
"\u03b5\u03bd\u03bd\u03ad\u03b1;\n"
|
||||
"10: \u03b4\u03ad\u03ba\u03b1; "
|
||||
"\u03ad\u03bd\u03b4\u03b5\u03ba\u03b1; \u03b4\u03ce\u03b4\u03b5\u03ba\u03b1; "
|
||||
"\u03b4\u03b5\u03ba\u03b1>>;\n"
|
||||
"20: \u03b5\u03af\u03ba\u03bf\u03c3\u03b9; \u03b5\u03b9\u03ba\u03bf\u03c3\u03b9>>;\n"
|
||||
"30: \u03c4\u03c1\u03b9\u03ac\u03bd\u03c4\u03b1; \u03c4\u03c1\u03b9\u03b1\u03bd\u03c4\u03b1>>;\n"
|
||||
"40: \u03c3\u03b1\u03c1\u03ac\u03bd\u03c4\u03b1; \u03c3\u03b1\u03c1\u03b1\u03bd\u03c4\u03b1>>;\n"
|
||||
"50: \u03c0\u03b5\u03bd\u03ae\u03bd\u03c4\u03b1; \u03c0\u03b5\u03bd\u03b7\u03bd\u03c4\u03b1>>;\n"
|
||||
"60: \u03b5\u03be\u03ae\u03bd\u03c4\u03b1; \u03b5\u03be\u03b7\u03bd\u03c4\u03b1>>;\n"
|
||||
"70: \u03b5\u03b2\u03b4\u03bf\u03bc\u03ae\u03bd\u03c4\u03b1; "
|
||||
"\u03b5\u03b2\u03b4\u03bf\u03bc\u03b7\u03bd\u03c4\u03b1>>;\n"
|
||||
"80: \u03bf\u03b3\u03b4\u03cc\u03bd\u03c4\u03b1; \u03bf\u03b3\u03b4\u03bf\u03bd\u03c4\u03b1>>;\n"
|
||||
"90: \u03b5\u03bd\u03bd\u03b5\u03bd\u03ae\u03bd\u03c4\u03b1; "
|
||||
"\u03b5\u03bd\u03bd\u03b5\u03bd\u03b7\u03bd\u03c4\u03b1>>;\n"
|
||||
"100: \u03b5\u03ba\u03b1\u03c4\u03cc[\u03bd >>];\n"
|
||||
"200: \u03b4\u03b9\u03b1\u03ba\u03cc\u03c3\u03b9\u03b1[ >>];\n"
|
||||
"300: \u03c4\u03c1\u03b9\u03b1\u03ba\u03cc\u03c3\u03b9\u03b1[ >>];\n"
|
||||
"400: \u03c4\u03b5\u03c4\u03c1\u03b1\u03ba\u03cc\u03c3\u03b9\u03b1[ >>];\n"
|
||||
"500: \u03c0\u03b5\u03bd\u03c4\u03b1\u03ba\u03cc\u03c3\u03b9\u03b1[ >>];\n"
|
||||
"600: \u03b5\u03be\u03b1\u03ba\u03cc\u03c3\u03b9\u03b1[ >>];\n"
|
||||
"700: \u03b5\u03c0\u03c4\u03b1\u03ba\u03cc\u03c3\u03b9\u03b1[ >>];\n"
|
||||
"800: \u03bf\u03ba\u03c4\u03b1\u03ba\u03cc\u03c3\u03b9\u03b1[ >>];\n"
|
||||
"900: \u03b5\u03bd\u03bd\u03b9\u03b1\u03ba\u03cc\u03c3\u03b9\u03b1[ >>];\n"
|
||||
"1000: \u03c7\u03af\u03bb\u03b9\u03b1[ >>];\n"
|
||||
"2000: << \u03c7\u03af\u03bb\u03b9\u03b1[ >>];\n"
|
||||
"1,000,000: << \u03b5\u03ba\u03b1\u03c4\u03bf\u03bc\u03bc\u03b9\u03cc\u03c1\u03b9\u03bf[ >>];\n"
|
||||
"1,000,000,000: << \u03b4\u03b9\u03c3\u03b5\u03ba\u03b1\u03c4\u03bf\u03bc\u03bc\u03b9\u03cc\u03c1\u03b9\u03bf[ >>];\n"
|
||||
"1,000,000,000,000: =#,##0="
|
||||
}
|
||||
}
|
||||
|
@ -233,4 +233,11 @@ en {
|
||||
"Latf", // ISO 15924 Name
|
||||
"Latg", // ISO 15924 Name
|
||||
}
|
||||
|
||||
//------------------------------------------------------------
|
||||
// Rule Based Number Format Support
|
||||
//------------------------------------------------------------
|
||||
|
||||
// inherited from root
|
||||
|
||||
}
|
||||
|
@ -49,4 +49,70 @@ en_GB {
|
||||
"BST",
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------------------------------
|
||||
// Rule Based Number Format Support
|
||||
//------------------------------------------------------------
|
||||
|
||||
// * Spellout rules for U.K. English. U.K. English has one significant
|
||||
// * difference from U.S. English: the names for values of 1,000,000,000
|
||||
// * and higher. In American English, each successive "-illion" is 1,000
|
||||
// * times greater than the preceding one: 1,000,000,000 is "one billion"
|
||||
// * and 1,000,000,000,000 is "one trillion." In British English, each
|
||||
// * successive "-illion" is one million times greater than the one before:
|
||||
// * "one billion" is 1,000,000,000,000 (or what Americans would call a
|
||||
// * "trillion"), and "one trillion" is 1,000,000,000,000,000,000.
|
||||
// * 1,000,000,000 in British English is "one thousand million." (This
|
||||
// * value is sometimes called a "milliard," but this word seems to have
|
||||
// * fallen into disuse.)
|
||||
|
||||
// Could someone please correct me if I'm wrong about "milliard" falling
|
||||
// into disuse, or have missed any other details of how large numbers
|
||||
// are rendered. Also, could someone please provide me with information
|
||||
// on which other English-speaking countries use which system? Right now,
|
||||
// I'm assuming that the U.S. system is used in Canada and that all the
|
||||
// other English-speaking countries follow the British system. Can
|
||||
// someone out there confirm this?
|
||||
|
||||
SpelloutRules {
|
||||
"%simplified:\n"
|
||||
" -x: minus >>;\n"
|
||||
" x.x: << point >>;\n"
|
||||
" zero; one; two; three; four; five; six; seven; eight; nine;\n"
|
||||
" ten; eleven; twelve; thirteen; fourteen; fifteen; sixteen;\n"
|
||||
" seventeen; eighteen; nineteen;\n"
|
||||
" 20: twenty[->>];\n"
|
||||
" 30: thirty[->>];\n"
|
||||
" 40: forty[->>];\n"
|
||||
" 50: fifty[->>];\n"
|
||||
" 60: sixty[->>];\n"
|
||||
" 70: seventy[->>];\n"
|
||||
" 80: eighty[->>];\n"
|
||||
" 90: ninety[->>];\n"
|
||||
" 100: << hundred[ >>];\n"
|
||||
" 1000: << thousand[ >>];\n"
|
||||
" 1,000,000: << million[ >>];\n"
|
||||
" 1,000,000,000,000: << billion[ >>];\n"
|
||||
" 1,000,000,000,000,000: =#,##0=;\n"
|
||||
"%default:\n"
|
||||
" -x: minus >>;\n"
|
||||
" x.x: << point >>;\n"
|
||||
" =%simplified=;\n"
|
||||
" 100: << hundred[ >%%and>];\n"
|
||||
" 1000: << thousand[ >%%and>];\n"
|
||||
" 100,000>>: << thousand[>%%commas>];\n"
|
||||
" 1,000,000: << million[>%%commas>];\n"
|
||||
" 1,000,000,000,000: << billion[>%%commas>];\n"
|
||||
" 1,000,000,000,000,000: =#,##0=;\n"
|
||||
"%%and:\n"
|
||||
" and =%default=;\n"
|
||||
" 100: =%default=;\n"
|
||||
"%%commas:\n"
|
||||
" ' and =%default=;\n"
|
||||
" 100: , =%default=;\n"
|
||||
" 1000: , <%default< thousand, >%default>;\n"
|
||||
" 1,000,000: , =%default=;"
|
||||
"%%lenient-parse:\n"
|
||||
" & ' ' , ',' ;\n"
|
||||
}
|
||||
}
|
||||
|
@ -140,4 +140,31 @@ eo {
|
||||
"Latf", // ISO 15924 Name
|
||||
"Latg", // ISO 15924 Name
|
||||
}
|
||||
|
||||
//------------------------------------------------------------
|
||||
// Rule Based Number Format Support
|
||||
//------------------------------------------------------------
|
||||
|
||||
// data from 'Esperanto-programita 1' courtesy of Markus Scherer
|
||||
|
||||
SpelloutRules {
|
||||
"-x: minus >>;\n"
|
||||
"x.x: << komo >>;\n"
|
||||
"nulo; unu; du; tri; kvar; kvin; ses; sep; ok; na\u016d;\n"
|
||||
"10: dek[ >>];\n"
|
||||
"20: <<dek[ >>];\n"
|
||||
"100: cent[ >>];\n"
|
||||
"200: <<cent[ >>];\n"
|
||||
"1000: mil[ >>];\n"
|
||||
"2000: <<mil[ >>];\n"
|
||||
"10000: dekmil[ >>];\n"
|
||||
"11000>: << mil[ >>];\n"
|
||||
"1,000,000: miliono[ >>];\n"
|
||||
"2,000,000: << milionoj[ >>];\n"
|
||||
"1,000,000,000: miliardo[ >>];\n"
|
||||
"2,000,000,000: << miliardoj[ >>];\n"
|
||||
"1,000,000,000,000: biliono[ >>];\n"
|
||||
"2,000,000,000,000: << bilionoj[ >>];\n"
|
||||
"1,000,000,000,000,000: =#,##0=;\n"
|
||||
}
|
||||
}
|
||||
|
@ -258,4 +258,69 @@ es {
|
||||
"Latf", // ISO 15924 Name
|
||||
"Latg", // ISO 15924 Name
|
||||
}
|
||||
|
||||
//------------------------------------------------------------
|
||||
// Rule Based Number Format Support
|
||||
//------------------------------------------------------------
|
||||
|
||||
// * Spellout rules for Spanish. The Spanish rules are quite similar to
|
||||
// * the English rules, but there are some important differences:
|
||||
// * First, we have to provide separate rules for most of the twenties
|
||||
// * because the ones digit frequently picks up an accent mark that it
|
||||
// * doesn't have when standing alone. Second, each multiple of 100 has
|
||||
// * to be specified separately because the multiplier on 100 very often
|
||||
// * changes form in the contraction: 500 is "quinientos," not
|
||||
// * "cincocientos." In addition, the word for 100 is "cien" when
|
||||
// * standing alone, but changes to "ciento" when followed by more digits.
|
||||
// * There also some other differences.
|
||||
|
||||
// The Spanish rules are incomplete. I'm missing information on negative
|
||||
// numbers and numbers with fractional parts. I also don't have
|
||||
// information on numbers higher than the millions.
|
||||
|
||||
SpelloutRules {
|
||||
// negative-number and fraction rules
|
||||
"-x: menos >>;\n"
|
||||
"x.x: << punto >>;\n"
|
||||
// words for values from 0 to 19
|
||||
"cero; uno; dos; tres; cuatro; cinco; seis; siete; ocho; nueve;\n"
|
||||
"diez; once; doce; trece; catorce; quince; diecis\u00e9is;\n"
|
||||
" diecisiete; dieciocho; diecinueve;\n"
|
||||
// words for values from 20 to 29 (necessary because the ones digit
|
||||
// often picks up an accent mark it doesn't have when standing alone)
|
||||
"veinte; veintiuno; veintid\u00f3s; veintitr\u00e9s; veinticuatro;\n"
|
||||
" veinticinco; veintis\u00e9is; veintisiete; veintiocho;\n"
|
||||
" veintinueve;\n"
|
||||
// words for multiples of 10 (notice that the tens digit is separated
|
||||
// from the ones digit by the word "y".)
|
||||
"30: treinta[ y >>];\n"
|
||||
"40: cuarenta[ y >>];\n"
|
||||
"50: cincuenta[ y >>];\n"
|
||||
"60: sesenta[ y >>];\n"
|
||||
"70: setenta[ y >>];\n"
|
||||
"80: ochenta[ y >>];\n"
|
||||
"90: noventa[ y >>];\n"
|
||||
// 100 by itself is "cien," but 100 followed by something is "cineto"
|
||||
"100: cien;\n"
|
||||
"101: ciento >>;\n"
|
||||
// words for multiples of 100 (must be stated because they're
|
||||
// rarely simple concatenations)
|
||||
"200: doscientos[ >>];\n"
|
||||
"300: trescientos[ >>];\n"
|
||||
"400: cuatrocientos[ >>];\n"
|
||||
"500: quinientos[ >>];\n"
|
||||
"600: seiscientos[ >>];\n"
|
||||
"700: setecientos[ >>];\n"
|
||||
"800: ochocientos[ >>];\n"
|
||||
"900: novecientos[ >>];\n"
|
||||
// for 1,000, the multiplier on "mil" is omitted: 2,000 is "dos mil,"
|
||||
// but 1,000 is just "mil."
|
||||
"1000: mil[ >>];\n"
|
||||
"2000: << mil[ >>];\n"
|
||||
// 1,000,000 is "un millon," not "uno millon"
|
||||
"1,000,000: un mill\u00f3n[ >>];\n"
|
||||
"2,000,000: << mill\u00f3n[ >>];\n"
|
||||
// overflow rule
|
||||
"1,000,000,000: =#,##0= (incomplete data);"
|
||||
}
|
||||
}
|
||||
|
@ -190,10 +190,73 @@ fr {
|
||||
"Latf", // ISO 15924 Name
|
||||
"Latg", // ISO 15924 Name
|
||||
}
|
||||
|
||||
//------------------------------------------------------------
|
||||
// Rule Based Number Format Support
|
||||
//------------------------------------------------------------
|
||||
|
||||
// * Spellout rules for French. French adds some interesting quirks of its
|
||||
// * own: 1) The word "et" is interposed between the tens and ones digits,
|
||||
// * but only if the ones digit if 1: 20 is "vingt," and 2 is "vingt-deux,"
|
||||
// * but 21 is "vingt-et-un." 2) There are no words for 70, 80, or 90.
|
||||
// * "quatre-vingts" ("four twenties") is used for 80, and values proceed
|
||||
// * by score from 60 to 99 (e.g., 73 is "soixante-treize" ["sixty-thirteen"]).
|
||||
// * Numbers from 1,100 to 1,199 are rendered as hundreds rather than
|
||||
// * thousands: 1,100 is "onze cents" ("eleven hundred"), rather than
|
||||
// * "mille cent" ("one thousand one hundred")
|
||||
|
||||
SpelloutRules {
|
||||
// the main rule set
|
||||
"%main:\n"
|
||||
" -x: moins >>;\n"
|
||||
" x.x: << virgule >>;\n"
|
||||
// words for numbers from 0 to 10
|
||||
" z\u00e9ro; un; deux; trois; quatre; cinq; six; sept; huit; neuf;\n"
|
||||
" dix; onze; douze; treize; quatorze; quinze; seize;\n"
|
||||
" dix-sept; dix-huit; dix-neuf;\n"
|
||||
// ords for the multiples of 10: %%alt-ones inserts "et"
|
||||
// when needed
|
||||
" 20: vingt[->%%alt-ones>];\n"
|
||||
" 30: trente[->%%alt-ones>];\n"
|
||||
" 40: quarante[->%%alt-ones>];\n"
|
||||
" 50: cinquante[->%%alt-ones>];\n"
|
||||
// rule for 60. The /20 causes this rule's multiplier to be
|
||||
// 20 rather than 10, allowinhg us to recurse for all values
|
||||
// from 60 to 79...
|
||||
" 60/20: soixante[->%%alt-ones>];\n"
|
||||
// ...except for 71, which must be special-cased
|
||||
" 71: soixante et onze;\n"
|
||||
// at 72, we have to repeat the rule for 60 to get us to 79
|
||||
" 72/20: soixante->%%alt-ones>;\n"
|
||||
// at 80, we state a new rule with the phrase for 80. Since
|
||||
// it changes form when there's a ones digit, we need a second
|
||||
// rule at 81. This rule also includes "/20," allowing it to
|
||||
// be used correctly for all values up to 99
|
||||
" 80: quatre-vingts; 81/20: quatre-vingt->>;\n"
|
||||
// "cent" becomes plural when preceded by a multiplier, and
|
||||
// the multiplier is omitted from the singular form
|
||||
" 100: cent[ >>];\n"
|
||||
" 200: << cents[ >>];\n"
|
||||
" 1000: mille[ >>];\n"
|
||||
// values from 1,100 to 1,199 are rendered as "onze cents..."
|
||||
// instead of "mille cent..." The > after "1000" decreases
|
||||
// the rule's exponent, causing its multiplier to be 100 instead
|
||||
// of 1,000. This prevents us from getting "onze cents cent
|
||||
// vingt-deux" ("eleven hundred one hundred twenty-two").
|
||||
" 1100>: onze cents[ >>];\n"
|
||||
// at 1,200, we go back to formating in thousands, so we
|
||||
// repeat the rule for 1,000
|
||||
" 1200: mille >>;\n"
|
||||
// at 2,000, the multiplier is added
|
||||
" 2000: << mille[ >>];\n"
|
||||
" 1,000,000: << million[ >>];\n"
|
||||
" 1,000,000,000: << milliarde[ >>];\n"
|
||||
" 1,000,000,000,000: << billion[ >>];\n"
|
||||
" 1,000,000,000,000,000: =#,##0=;\n"
|
||||
// %%alt-ones is used to insert "et" when the ones digit is 1
|
||||
"%%alt-ones:\n"
|
||||
" ; et-un; =%main=;\n"
|
||||
"%%lenient-parse:\n"
|
||||
" &\u0000 << ' ' << ',' << '-';\n"
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -56,4 +56,51 @@ fr_CH {
|
||||
"GMT",
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------------------------------
|
||||
// Rule Based Number Format Support
|
||||
//------------------------------------------------------------
|
||||
|
||||
// * Spellout rules for Swiss French. Swiss French differs from French French
|
||||
// * in that it does have words for 70, 80, and 90. This rule set shows them,
|
||||
// * and is simpler as a result.
|
||||
|
||||
// again, I'm missing information on negative numbers and decimals for
|
||||
// these to rule sets. Also, I'm not 100% sure about Swiss French. Is
|
||||
// this correct? Is "onze cents" commonly used for 1,100 in both France
|
||||
// and Switzerland? Can someone fill me in on the rules for the other
|
||||
// French-speaking countries? I've heard conflicting opinions on which
|
||||
// version is used in Canada, and I understand there's an alternate set
|
||||
// of words for 70, 80, and 90 that is used somewhere, but I don't know
|
||||
// what those words are or where they're used.
|
||||
|
||||
SpelloutRules {
|
||||
"%main:\n"
|
||||
" -x: moins >>;\n"
|
||||
" x.x: << virgule >>;\n"
|
||||
" z\u00e9ro; un; deux; trois; quatre; cinq; six; sept; huit; neuf;\n"
|
||||
" dix; onze; douze; treize; quatorze; quinze; seize;\n"
|
||||
" dix-sept; dix-huit; dix-neuf;\n"
|
||||
" 20: vingt[->%%alt-ones>];\n"
|
||||
" 30: trente[->%%alt-ones>];\n"
|
||||
" 40: quarante[->%%alt-ones>];\n"
|
||||
" 50: cinquante[->%%alt-ones>];\n"
|
||||
" 60: soixante[->%%alt-ones>];\n"
|
||||
// notice new words for 70, 80, and 90
|
||||
" 70: septante[->%%alt-ones>];\n"
|
||||
" 80: octante[->%%alt-ones>];\n"
|
||||
" 90: nonante[->%%alt-ones>];\n"
|
||||
" 100: cent[ >>];\n"
|
||||
" 200: << cents[ >>];\n"
|
||||
" 1000: mille[ >>];\n"
|
||||
" 1100>: onze cents[ >>];\n"
|
||||
" 1200: mille >>;\n"
|
||||
" 2000: << mille[ >>];\n"
|
||||
" 1,000,000: << million[ >>];\n"
|
||||
" 1,000,000,000: << milliarde[ >>];\n"
|
||||
" 1,000,000,000,000: << billion[ >>];\n"
|
||||
" 1,000,000,000,000,000: =#,##0=;\n"
|
||||
"%%alt-ones:\n"
|
||||
" ; et-un; =%main=;"
|
||||
}
|
||||
}
|
||||
|
@ -102,4 +102,38 @@ he {
|
||||
"Hebrew", // Script Name
|
||||
"Hebr" // ISO 15924 Name
|
||||
}
|
||||
|
||||
//------------------------------------------------------------
|
||||
// Rule Based Number Format Support
|
||||
//------------------------------------------------------------
|
||||
|
||||
// * Spellout rules for Hebrew. Hebrew actually has inflected forms for
|
||||
// * most of the lower-order numbers. The masculine forms are shown
|
||||
// * here.
|
||||
|
||||
// This data is woefully incomplete. Can someone fill me in on the
|
||||
// various inflected forms of the numbers, which seem to be necessary
|
||||
// to do Hebrew correctly? Can somone supply me with data for values
|
||||
// from 1,000,000 on up? What about the word for zero? What about
|
||||
// information on negatives and decimals?
|
||||
|
||||
SpelloutRules {
|
||||
"zero (incomplete data); \u05d0\u05d4\u05d3; \u05e9\u05d2\u05d9\u05d9\u05dd; \u05e9\u05dc\u05d5\u05e9\u05d4;\n"
|
||||
"4: \u05d0\u05d3\u05d1\u05e6\u05d4; \u05d7\u05d2\u05d5\u05d9\u05e9\u05d4; \u05e9\u05e9\u05d4;\n"
|
||||
"7: \u05e9\u05d1\u05e6\u05d4; \u05e9\u05de\u05d5\u05d2\u05d4; \u05ea\u05e9\u05e6\u05d4;\n"
|
||||
"10: \u05e6\u05e9\u05d3\u05d4[ >>];\n"
|
||||
"20: \u05e6\u05e9\u05d3\u05d9\u05dd[ >>];\n"
|
||||
"30: \u05e9\u05dc\u05d5\u05e9\u05d9\u05dd[ >>];\n"
|
||||
"40: \u05d0\u05d3\u05d1\u05e6\u05d9\u05dd[ >>];\n"
|
||||
"50: \u05d7\u05de\u05d9\u05e9\u05d9\u05dd[ >>];\n"
|
||||
"60: \u05e9\u05e9\u05d9\u05dd[ >>];\n"
|
||||
"70: \u05e9\u05d1\u05e6\u05d9\u05dd[ >>];\n"
|
||||
"80: \u05e9\u05de\u05d5\u05d2\u05d9\u05dd[ >>];\n"
|
||||
"90: \u05ea\u05e9\u05e6\u05d9\u05dd[ >>];\n"
|
||||
"100: \u05de\u05d0\u05d4[ >>];\n"
|
||||
"200: << \u05de\u05d0\u05d4[ >>];\n"
|
||||
"1000: \u05d0\u05dc\u05e3[ >>];\n"
|
||||
"2000: << \u05d0\u05dc\u05e3[ >>];\n"
|
||||
"1,000,000: =#,##0= (incomplete data);"
|
||||
}
|
||||
}
|
||||
|
@ -117,4 +117,106 @@ it {
|
||||
"Latf", // ISO 15924 Name
|
||||
"Latg", // ISO 15924 Name
|
||||
}
|
||||
|
||||
//------------------------------------------------------------
|
||||
// Rule Based Number Format Support
|
||||
//------------------------------------------------------------
|
||||
|
||||
// * Spellout rules for Italian. Like German, most Italian numbers are
|
||||
// * written as single words. What makes these rules complicated is the rule
|
||||
// * that says that when a word ending in a vowel and a word beginning with
|
||||
// * a vowel are combined into a compound, the vowel is dropped from the
|
||||
// * end of the first word: 180 is "centottanta," not "centoottanta."
|
||||
// * The complexity of this rule set is to produce this behavior.
|
||||
|
||||
// Can someone confirm that I did the vowel-eliding thing right? I'm
|
||||
// not 100% sure I'm doing it in all the right places, or completely
|
||||
// correctly. Also, I don't have information for negatives and decimals,
|
||||
// and I lack words fror values from 1,000,000 on up.
|
||||
|
||||
SpelloutRules {
|
||||
// main rule set. Follows the patterns of the preceding rule sets,
|
||||
// except that the final vowel is omitted from words ending in
|
||||
// vowels when they are followed by another word; instead, we have
|
||||
// separate rule sets that are identical to this one, except that
|
||||
// all the words that don't begin with a vowel have a vowel tacked
|
||||
// onto them at the front. A word ending in a vowel calls a
|
||||
// substitution that will supply that vowel, unless that vowel is to
|
||||
// be elided.
|
||||
"%main:\n"
|
||||
" -x: meno >>;\n"
|
||||
" x.x: << virgola >>;\n"
|
||||
" zero; uno; due; tre; quattro; cinque; sei; sette; otto;\n"
|
||||
" nove;\n"
|
||||
" dieci; undici; dodici; tredici; quattordici; quindici; sedici;\n"
|
||||
" diciasette; diciotto; diciannove;\n"
|
||||
" 20: venti; vent>%%with-i>;\n"
|
||||
" 30: trenta; trent>%%with-i>;\n"
|
||||
" 40: quaranta; quarant>%%with-a>;\n"
|
||||
" 50: cinquanta; cinquant>%%with-a>;\n"
|
||||
" 60: sessanta; sessant>%%with-a>;\n"
|
||||
" 70: settanta; settant>%%with-a>;\n"
|
||||
" 80: ottanta; ottant>%%with-a>;\n"
|
||||
" 90: novanta; novant>%%with-a>;\n"
|
||||
" 100: cento; cent[>%%with-o>];\n"
|
||||
" 200: <<cento; <<cent[>%%with-o>];\n"
|
||||
" 1000: mille; mill[>%%with-i>];\n"
|
||||
" 2000: <<mila; <<mil[>%%with-a>];\n"
|
||||
" 100,000>>: <<mila[ >>];\n"
|
||||
" 1,000,000: =#,##0= (incomplete data);\n"
|
||||
"%%with-a:\n"
|
||||
" azero; uno; adue; atre; aquattro; acinque; asei; asette; otto;\n"
|
||||
" anove;\n"
|
||||
" adieci; undici; adodici; atredici; aquattordici; aquindici; asedici;\n"
|
||||
" adiciasette; adiciotto; adiciannove;\n"
|
||||
" 20: aventi; avent>%%with-i>;\n"
|
||||
" 30: atrenta; atrent>%%with-i>;\n"
|
||||
" 40: aquaranta; aquarant>%%with-a>;\n"
|
||||
" 50: acinquanta; acinquant>%%with-a>;\n"
|
||||
" 60: asessanta; asessant>%%with-a>;\n"
|
||||
" 70: asettanta; asettant>%%with-a>;\n"
|
||||
" 80: ottanta; ottant>%%with-a>;\n"
|
||||
" 90: anovanta; anovant>%%with-a>;\n"
|
||||
" 100: acento; acent[>%%with-o>];\n"
|
||||
" 200: <%%with-a<cento; <%%with-a<cent[>%%with-o>];\n"
|
||||
" 1000: amille; amill[>%%with-i>];\n"
|
||||
" 2000: <%%with-a<mila; <%%with-a<mil[>%%with-a>];\n"
|
||||
" 100,000: =%main=;\n"
|
||||
"%%with-i:\n"
|
||||
" izero; uno; idue; itre; iquattro; icinque; isei; isette; otto;\n"
|
||||
" inove;\n"
|
||||
" idieci; undici; idodici; itredici; iquattordici; iquindici; isedici;\n"
|
||||
" idiciasette; idiciotto; idiciannove;\n"
|
||||
" 20: iventi; ivent>%%with-i>;\n"
|
||||
" 30: itrenta; itrent>%%with-i>;\n"
|
||||
" 40: iquaranta; iquarant>%%with-a>;\n"
|
||||
" 50: icinquanta; icinquant>%%with-a>;\n"
|
||||
" 60: isessanta; isessant>%%with-a>;\n"
|
||||
" 70: isettanta; isettant>%%with-a>;\n"
|
||||
" 80: ottanta; ottant>%%with-a>;\n"
|
||||
" 90: inovanta; inovant>%%with-a>;\n"
|
||||
" 100: icento; icent[>%%with-o>];\n"
|
||||
" 200: <%%with-i<cento; <%%with-i<cent[>%%with-o>];\n"
|
||||
" 1000: imille; imill[>%%with-i>];\n"
|
||||
" 2000: <%%with-i<mila; <%%with-i<mil[>%%with-a>];\n"
|
||||
" 100,000: =%main=;\n"
|
||||
"%%with-o:\n"
|
||||
" ozero; uno; odue; otre; oquattro; ocinque; osei; osette; otto;\n"
|
||||
" onove;\n"
|
||||
" odieci; undici; ododici; otredici; oquattordici; oquindici; osedici;\n"
|
||||
" odiciasette; odiciotto; odiciannove;\n"
|
||||
" 20: oventi; ovent>%%with-i>;\n"
|
||||
" 30: otrenta; otrent>%%with-i>;\n"
|
||||
" 40: oquaranta; oquarant>%%with-a>;\n"
|
||||
" 50: ocinquanta; ocinquant>%%with-a>;\n"
|
||||
" 60: osessanta; osessant>%%with-a>;\n"
|
||||
" 70: osettanta; osettant>%%with-a>;\n"
|
||||
" 80: ottanta; ottant>%%with-a>;\n"
|
||||
" 90: onovanta; onovant>%%with-a>;\n"
|
||||
" 100: ocento; ocent[>%%with-o>];\n"
|
||||
" 200: <%%with-o<cento; <%%with-o<cent[>%%with-o>];\n"
|
||||
" 1000: omille; omill[>%%with-i>];\n"
|
||||
" 2000: <%%with-o<mila; <%%with-o<mil[>%%with-a>];\n"
|
||||
" 100,000: =%main=;\n"
|
||||
}
|
||||
}
|
||||
|
@ -859,4 +859,47 @@ ja {
|
||||
"JST",
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------------------------------
|
||||
// Rule Based Number Format Support
|
||||
//------------------------------------------------------------
|
||||
|
||||
// * Spellout rules for Japanese. In Japanese, there really isn't any
|
||||
// * distinction between a number written out in digits and a number
|
||||
// * written out in words: the ideographic characters are both digits
|
||||
// * and words. This rule set provides two variants: %traditional
|
||||
// * uses the traditional CJK numerals (which are also used in China
|
||||
// * and Korea). %financial uses alternate ideographs for many numbers
|
||||
// * that are harder to alter than the traditional numerals (one could
|
||||
// * fairly easily change a one to
|
||||
// * a three just by adding two strokes, for example). This is also done in
|
||||
// * the other countries using Chinese idographs, but different ideographs
|
||||
// * are used in those places.
|
||||
|
||||
// Can someone supply me with the right fraud-proof ideographs for
|
||||
// Simplified and Traditional Chinese, and for Korean? Can someone
|
||||
// supply me with information on negatives and decimals?
|
||||
|
||||
SpelloutRules {
|
||||
"%financial:\n"
|
||||
" \u96f6; \u58f1; \u5f10; \u53c2; \u56db; \u4f0d; \u516d; \u4e03; \u516b; \u4e5d;\n"
|
||||
" \u62fe[>>];\n"
|
||||
" 20: <<\u62fe[>>];\n"
|
||||
" 100: <<\u767e[>>];\n"
|
||||
" 1000: <<\u5343[>>];\n"
|
||||
" 10,000: <<\u4e07[>>];\n"
|
||||
" 100,000,000: <<\u5104[>>];\n"
|
||||
" 1,000,000,000,000: <<\u5146[>>];\n"
|
||||
" 10,000,000,000,000,000: =#,##0=;\n"
|
||||
"%traditional:\n"
|
||||
" \u96f6; \u4e00; \u4e8c; \u4e09; \u56db; \u4e94; \u516d; \u4e03; \u516b; \u4e5d;\n"
|
||||
" \u5341[>>];\n"
|
||||
" 20: <<\u5341[>>];\n"
|
||||
" 100: <<\u767e[>>];\n"
|
||||
" 1000: <<\u5343[>>];\n"
|
||||
" 10,000: <<\u4e07[>>];\n"
|
||||
" 100,000,000: <<\u5104[>>];\n"
|
||||
" 1,000,000,000,000: <<\u5146[>>];\n"
|
||||
" 10,000,000,000,000,000: =#,##0=;"
|
||||
}
|
||||
}
|
||||
|
@ -108,4 +108,32 @@ nl {
|
||||
"Latg", // ISO 15924 Name
|
||||
}
|
||||
|
||||
//------------------------------------------------------------
|
||||
// Rule Based Number Format Support
|
||||
//------------------------------------------------------------
|
||||
|
||||
// * Spellout rules for Dutch
|
||||
|
||||
// can someone supply me with information on negatives and decimals?
|
||||
|
||||
SpelloutRules {
|
||||
" -x: min >>;\n"
|
||||
"x.x: << komma >>;\n"
|
||||
"(zero?); een; twee; drie; vier; vijf; zes; zeven; acht; negen;\n"
|
||||
"tien; elf; twaalf; dertien; veertien; vijftien; zestien;\n"
|
||||
"zeventien; achtien; negentien;\n"
|
||||
"20: [>> en ]twintig;\n"
|
||||
"30: [>> en ]dertig;\n"
|
||||
"40: [>> en ]veertig;\n"
|
||||
"50: [>> en ]vijftig;\n"
|
||||
"60: [>> en ]zestig;\n"
|
||||
"70: [>> en ]zeventig;\n"
|
||||
"80: [>> en ]tachtig;\n"
|
||||
"90: [>> en ]negentig;\n"
|
||||
"100: << honderd[ >>];\n"
|
||||
"1000: << duizend[ >>];\n"
|
||||
"1,000,000: << miljoen[ >>];\n"
|
||||
"1,000,000,000: << biljoen[ >>];\n"
|
||||
"1,000,000,000,000: =#,##0="
|
||||
}
|
||||
}
|
||||
|
@ -1180,11 +1180,221 @@ root {
|
||||
"Anchorage",
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
LocaleScript{
|
||||
"Latin",
|
||||
"Latn",
|
||||
"Latf",
|
||||
"Latg"
|
||||
}
|
||||
|
||||
//------------------------------------------------------------
|
||||
// Rule Based Number Format Support
|
||||
//------------------------------------------------------------
|
||||
|
||||
// * Spellout rules for U.S. English. This rule set has two variants:
|
||||
// * %simplified is a set of rules showing the simple method of spelling
|
||||
// * out numbers in English: 289 is formatted as "two hundred eighty-nine".
|
||||
// * %default uses a more complicated algorithm to format
|
||||
// * numbers in a more natural way: 289 is formatted as "two hundred AND
|
||||
// * eighty-nine" and commas are inserted between the thousands groups for
|
||||
// * values above 100,000.
|
||||
|
||||
SpelloutRules {
|
||||
// This rule set shows the normal simple formatting rules for English
|
||||
"%simplified:\n"
|
||||
// negative number rule. This rule is used to format negative
|
||||
// numbers. The result of formatting the number's absolute
|
||||
// value is placed where the >> is.
|
||||
" -x: minus >>;\n"
|
||||
// faction rule. This rule is used for formatting numbers
|
||||
// with fractional parts. The result of formatting the
|
||||
// number's integral part is substituted for the <<, and
|
||||
// the result of formatting the number's fractional part
|
||||
// (one digit at a time, e.g., 0.123 is "zero point one two
|
||||
// three") replaces the >>.
|
||||
" x.x: << point >>;\n"
|
||||
// the rules for the values from 0 to 19 are simply the
|
||||
// words for those numbers
|
||||
" zero; one; two; three; four; five; six; seven; eight; nine;\n"
|
||||
" ten; eleven; twelve; thirteen; fourteen; fifteen; sixteen;\n"
|
||||
" seventeen; eighteen; nineteen;\n"
|
||||
// beginning at 20, we use the >> to mark the position where
|
||||
// the result of formatting the number's ones digit. Thus,
|
||||
// we only need a new rule at every multiple of 10. Text in
|
||||
// backets is omitted if the value being formatted is an
|
||||
// even multiple of 10.
|
||||
" 20: twenty[->>];\n"
|
||||
" 30: thirty[->>];\n"
|
||||
" 40: forty[->>];\n"
|
||||
" 50: fifty[->>];\n"
|
||||
" 60: sixty[->>];\n"
|
||||
" 70: seventy[->>];\n"
|
||||
" 80: eighty[->>];\n"
|
||||
" 90: ninety[->>];\n"
|
||||
// beginning at 100, we can use << to mark the position where
|
||||
// the result of formatting the multiple of 100 is to be
|
||||
// inserted. Notice also that the meaning of >> has shifted:
|
||||
// here, it refers to both the ones place and the tens place.
|
||||
// The meanings of the << and >> tokens depend on the base value
|
||||
// of the rule. A rule's divisor is (usually) the highest
|
||||
// power of 10 that is less than or equal to the rule's base
|
||||
// value. The value being formatted is divided by the rule's
|
||||
// divisor, and the integral quotient is used to get the text
|
||||
// for <<, while the remainder is used to produce the text
|
||||
// for >>. Again, text in brackets is omitted if the value
|
||||
// being formatted is an even multiple of the rule's divisor
|
||||
// (in this case, an even multiple of 100)
|
||||
" 100: << hundred[ >>];\n"
|
||||
// The rules for the higher numbers work the same way as the
|
||||
// rule for 100: Again, the << and >> tokens depend on the
|
||||
// rule's divisor, which for all these rules is also the rule's
|
||||
// base value. To group by thousand, we simply don't have any
|
||||
// rules between 1,000 and 1,000,000.
|
||||
" 1000: << thousand[ >>];\n"
|
||||
" 1,000,000: << million[ >>];\n"
|
||||
" 1,000,000,000: << billion[ >>];\n"
|
||||
" 1,000,000,000,000: << trillion[ >>];\n"
|
||||
// overflow rule. This rule specifies that values of a
|
||||
// quadrillion or more are shown in numerals rather than words.
|
||||
// The == token means to format (with new rules) the value
|
||||
// being formatted by this rule and place the result where
|
||||
// the == is. The #,##0 inside the == signs is a
|
||||
// DecimalFormat pattern. It specifies that the value should
|
||||
// be formatted with a DecimalFormat object, and that it
|
||||
// should be formatted with no decimal places, at least one
|
||||
// digit, and a thousands separator.
|
||||
" 1,000,000,000,000,000: =#,##0=;\n"
|
||||
|
||||
// %default is a more elaborate form of %simplified; It is basically
|
||||
// the same, except that it introduces "and" before the ones digit
|
||||
// when appropriate (basically, between the tens and ones digits) and
|
||||
// separates the thousands groups with commas in values over 100,000.
|
||||
"%default:\n"
|
||||
// negative-number and fraction rules. These are the same
|
||||
// as those for %simplified, but have to be stated here too
|
||||
// because this is an entry point
|
||||
" -x: minus >>;\n"
|
||||
" x.x: << point >>;\n"
|
||||
// just use %simplified for values below 100
|
||||
" =%simplified=;\n"
|
||||
// for values from 100 to 9,999 use %%and to decide whether or
|
||||
// not to interpose the "and"
|
||||
" 100: << hundred[ >%%and>];\n"
|
||||
" 1000: << thousand[ >%%and>];\n"
|
||||
// for values of 100,000 and up, use %%commas to interpose the
|
||||
// commas in the right places (and also to interpose the "and")
|
||||
" 100,000>>: << thousand[>%%commas>];\n"
|
||||
" 1,000,000: << million[>%%commas>];\n"
|
||||
" 1,000,000,000: << billion[>%%commas>];\n"
|
||||
" 1,000,000,000,000: << trillion[>%%commas>];\n"
|
||||
" 1,000,000,000,000,000: =#,##0=;\n"
|
||||
// if the value passed to this rule set is greater than 100, don't
|
||||
// add the "and"; if it's less than 100, add "and" before the last
|
||||
// digits
|
||||
"%%and:\n"
|
||||
" and =%default=;\n"
|
||||
" 100: =%default=;\n"
|
||||
// this rule set is used to place the commas
|
||||
"%%commas:\n"
|
||||
// for values below 100, add "and" (the apostrophe at the
|
||||
// beginning is ignored, but causes the space that follows it
|
||||
// to be significant: this is necessary because the rules
|
||||
// calling %%commas don't put a space before it)
|
||||
" ' and =%default=;\n"
|
||||
// put a comma after the thousands (or whatever preceded the
|
||||
// hundreds)
|
||||
" 100: , =%default=;\n"
|
||||
// put a comma after the millions (or whatever precedes the
|
||||
// thousands)
|
||||
" 1000: , <%default< thousand, >%default>;\n"
|
||||
// and so on...
|
||||
" 1,000,000: , =%default=;"
|
||||
// %%lenient-parse isn't really a set of number formatting rules;
|
||||
// it's a set of collation rules. Lenient-parse mode uses a Collator
|
||||
// object to compare fragments of the text being parsed to the text
|
||||
// in the rules, allowing more leeway in the matching text. This set
|
||||
// of rules tells the formatter to ignore commas when parsing (it
|
||||
// already ignores spaces, which is why we refer to the space; it also
|
||||
// ignores hyphens, making "twenty one" and "twenty-one" parse
|
||||
// identically)
|
||||
"%%lenient-parse:\n"
|
||||
// " & ' ' , ',' ;\n"
|
||||
" &\u0000 << ' ' << ',' << '-'; \n"
|
||||
}
|
||||
|
||||
|
||||
// * This rule set adds an English ordinal abbreviation to the end of a
|
||||
// * number. For example, 2 is formatted as "2nd". Parsing doesn't work with
|
||||
// * this rule set. To parse, use DecimalFormat on the numeral.
|
||||
OrdinalRules {
|
||||
// this rule set formats the numeral and calls %%abbrev to
|
||||
// supply the abbreviation
|
||||
"%main:\n"
|
||||
" =#,##0==%%abbrev=;\n"
|
||||
// this rule set supplies the abbreviation
|
||||
"%%abbrev:\n"
|
||||
// the abbreviations. Everything from 4 to 19 ends in "th"
|
||||
" th; st; nd; rd; th;\n"
|
||||
// at 20, we begin repeating the cycle every 10 (13 is "13th",
|
||||
// but 23 and 33 are "23rd" and "33rd") We do this by
|
||||
// ignoring all bug the ones digit in selecting the abbreviation
|
||||
" 20: >>;\n"
|
||||
// at 100, we repeat the whole cycle by considering only the
|
||||
// tens and ones digits in picking an abbreviation
|
||||
" 100: >>;\n"
|
||||
}
|
||||
|
||||
// * This rule set formats a number of seconds in sexagesimal notation
|
||||
// * (i.e., hours, minutes, and seconds). %with-words formats it with
|
||||
// * words (3,740 is "1 hour, 2 minutes, 20 seconds") and %in-numerals
|
||||
// * formats it entirely in numerals (3,740 is "1:02:20").
|
||||
DurationRules {
|
||||
// main rule set for formatting with words
|
||||
"%with-words:\n"
|
||||
// take care of singular and plural forms of "second"
|
||||
" 0 seconds; 1 second; =0= seconds;\n"
|
||||
// use %%min to format values greater than 60 seconds
|
||||
" 60/60: <%%min<[, >>];\n"
|
||||
// use %%hr to format values greater than 3,600 seconds
|
||||
// (the ">>>" below causes us to see the number of minutes
|
||||
// when when there are zero minutes)
|
||||
" 3600/60: <%%hr<[, >>>];\n"
|
||||
// this rule set takes care of the singular and plural forms
|
||||
// of "minute"
|
||||
"%%min:\n"
|
||||
" 0 minutes; 1 minute; =0= minutes;\n"
|
||||
// this rule set takes care of the singular and plural forms
|
||||
// of "hour"
|
||||
"%%hr:\n"
|
||||
" 0 hours; 1 hour; =0= hours;\n"
|
||||
|
||||
// main rule set for formatting in numerals
|
||||
"%in-numerals:\n"
|
||||
// values below 60 seconds are shown with "sec."
|
||||
" =0= sec.;\n"
|
||||
// higher values are shown with colons: %%min-sec is used for
|
||||
// values below 3,600 seconds...
|
||||
" 60: =%%min-sec=;\n"
|
||||
// ...and %%hr-min-sec is used for values of 3,600 seconds
|
||||
// and above
|
||||
" 3600: =%%hr-min-sec=;\n"
|
||||
// this rule causes values of less than 10 minutes to show without
|
||||
// a leading zero
|
||||
"%%min-sec:\n"
|
||||
" 0: :=00=;\n"
|
||||
" 60/60: <0<>>;\n"
|
||||
// this rule set is used for values of 3,600 or more. Minutes are always
|
||||
// shown, and always shown with two digits
|
||||
"%%hr-min-sec:\n"
|
||||
" 0: :=00=;\n"
|
||||
" 60/60: <00<>>;\n"
|
||||
" 3600/60: <#,##0<:>>>;\n"
|
||||
// the lenient-parse rules allow several different characters to be used
|
||||
// as delimiters between hours, minutes, and seconds
|
||||
"%%lenient-parse:\n"
|
||||
" & ':' = '.' = ' ' = '-';\n"
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -122,4 +122,44 @@ ru {
|
||||
"Cyrs" // ISO 15924 Name
|
||||
|
||||
}
|
||||
|
||||
//------------------------------------------------------------
|
||||
// Rule Based Number Format Support
|
||||
//------------------------------------------------------------
|
||||
|
||||
// * Spellout rules for Russian.
|
||||
|
||||
// Can someone supply me with information on negatives and decimals?
|
||||
// How about words for billions and trillions?
|
||||
|
||||
SpelloutRules {
|
||||
"\u043d\u043e\u043b\u044c; \u043e\u0434\u0438\u043d; \u0434\u0432\u0430; \u0442\u0440\u0438; "
|
||||
"\u0447\u0435\u0442\u044b\u0440\u0435; \u043f\u044f\u0442; \u0448\u0435\u0441\u0442; "
|
||||
"\u0441\u0435\u043c\u044c; \u0432\u043e\u0441\u0435\u043c\u044c; \u0434\u0435\u0432\u044f\u0442;\n"
|
||||
"10: \u0434\u0435\u0441\u044f\u0442; "
|
||||
"\u043e\u0434\u0438\u043d\u043d\u0430\u0434\u0446\u0430\u0442\u044c;\n"
|
||||
"\u0434\u0432\u0435\u043d\u043d\u0430\u0434\u0446\u0430\u0442\u044c; "
|
||||
"\u0442\u0440\u0438\u043d\u0430\u0434\u0446\u0430\u0442\u044c; "
|
||||
"\u0447\u0435\u0442\u044b\u0440\u043d\u0430\u0434\u0446\u0430\u0442\u044c;\n"
|
||||
"15: \u043f\u044f\u0442\u043d\u0430\u0434\u0446\u0430\u0442\u044c; "
|
||||
"\u0448\u0435\u0441\u0442\u043d\u0430\u0434\u0446\u0430\u0442\u044c; "
|
||||
"\u0441\u0435\u043c\u043d\u0430\u0434\u0446\u0430\u0442\u044c; "
|
||||
"\u0432\u043e\u0441\u0435\u043c\u043d\u0430\u0434\u0446\u0430\u0442\u044c; "
|
||||
"\u0434\u0435\u0432\u044f\u0442\u043d\u0430\u0434\u0446\u0430\u0442\u044c;\n"
|
||||
"20: \u0434\u0432\u0430\u0434\u0446\u0430\u0442\u044c[ >>];\n"
|
||||
"30: \u0442\u0440\u043b\u0434\u0446\u0430\u0442\u044c[ >>];\n"
|
||||
"40: \u0441\u043e\u0440\u043e\u043a[ >>];\n"
|
||||
"50: \u043f\u044f\u0442\u044c\u0434\u0435\u0441\u044f\u0442[ >>];\n"
|
||||
"60: \u0448\u0435\u0441\u0442\u044c\u0434\u0435\u0441\u044f\u0442[ >>];\n"
|
||||
"70: \u0441\u0435\u043c\u044c\u0434\u0435\u0441\u044f\u0442[ >>];\n"
|
||||
"80: \u0432\u043e\u0441\u0435\u043c\u044c\u0434\u0435\u0441\u044f\u0442[ >>];\n"
|
||||
"90: \u0434\u0435\u0432\u044f\u043d\u043e\u0441\u0442\u043e[ >>];\n"
|
||||
"100: \u0441\u0442\u043e[ >>];\n"
|
||||
"200: << \u0441\u0442\u043e[ >>];\n"
|
||||
"1000: \u0442\u044b\u0441\u044f\u0447\u0430[ >>];\n"
|
||||
"2000: << \u0442\u044b\u0441\u044f\u0447\u0430[ >>];\n"
|
||||
"1,000,000: \u043c\u0438\u043b\u043b\u0438\u043e\u043d[ >>];\n"
|
||||
"2,000,000: << \u043c\u0438\u043b\u043b\u0438\u043e\u043d[ >>];\n"
|
||||
"1,000,000,000: =#,##0=;"
|
||||
}
|
||||
}
|
||||
|
@ -118,4 +118,36 @@ sv {
|
||||
"Latf", // ISO 15924 Name
|
||||
"Latg", // ISO 15924 Name
|
||||
}
|
||||
|
||||
//------------------------------------------------------------
|
||||
// Rule Based Number Format Support
|
||||
//------------------------------------------------------------
|
||||
|
||||
// * Spellout rules for Swedish.
|
||||
|
||||
// can someone supply me with information on negatives and decimals?
|
||||
|
||||
SpelloutRules {
|
||||
"noll; ett; tv\u00e5; tre; fyra; fem; sex; sjo; \u00e5tta; nio;\n"
|
||||
"tio; elva; tolv; tretton; fjorton; femton; sexton; sjutton; arton; nitton;\n"
|
||||
"20: tjugo[>>];\n"
|
||||
"30: trettio[>>];\n"
|
||||
"40: fyrtio[>>];\n"
|
||||
"50: femtio[>>];\n"
|
||||
"60: sextio[>>];\n"
|
||||
"70: sjuttio[>>];\n"
|
||||
"80: \u00e5ttio[>>];\n"
|
||||
"90: nittio[>>];\n"
|
||||
"100: hundra[>>];\n"
|
||||
"200: <<hundra[>>];\n"
|
||||
"1000: tusen[ >>];\n"
|
||||
"2000: << tusen[ >>];\n"
|
||||
"1,000,000: en miljon[ >>];\n"
|
||||
"2,000,000: << miljon[ >>];\n"
|
||||
"1,000,000,000: en miljard[ >>];\n"
|
||||
"2,000,000,000: << miljard[ >>];\n"
|
||||
"1,000,000,000,000: en biljon[ >>];\n"
|
||||
"2,000,000,000,000: << biljon[ >>];\n"
|
||||
"1,000,000,000,000,000: =#,##0="
|
||||
}
|
||||
}
|
||||
|
@ -245,4 +245,43 @@ th {
|
||||
"Thai", // Script Name
|
||||
"Thai" // ISO 15924 Name
|
||||
}
|
||||
|
||||
//------------------------------------------------------------
|
||||
// Rule Based Number Format Support
|
||||
//------------------------------------------------------------
|
||||
|
||||
// Spellout rules for Thai. Data from Suwit Srivilairith, IBM Thailand
|
||||
|
||||
SpelloutRules {
|
||||
"%default:\n"
|
||||
" -x: \u0e25\u0e1a>>;\n"
|
||||
" x.x: <<\u0e08\u0e38\u0e14>>>;\n"
|
||||
" \u0e28\u0e39\u0e19\u0e22\u0e4c; \u0e2b\u0e19\u0e36\u0e48\u0e07; \u0e2a\u0e2d\u0e07; \u0e2a\u0e32\u0e21;\n"
|
||||
" \u0e2a\u0e35\u0e48; \u0e2b\u0e49\u0e32; \u0e2b\u0e01; \u0e40\u0e08\u0e47\u0e14; \u0e41\u0e1b\u0e14;\n"
|
||||
" \u0e40\u0e01\u0e49\u0e32; \u0e2a\u0e34\u0e1a; \u0e2a\u0e34\u0e1a\u0e40\u0e2d\u0e47\u0e14;\n"
|
||||
" \u0e2a\u0e34\u0e1a\u0e2a\u0e2d\u0e07; \u0e2a\u0e34\u0e1a\u0e2a\u0e32\u0e21;\n"
|
||||
" \u0e2a\u0e34\u0e1a\u0e2a\u0e35\u0e48; \u0e2a\u0e34\u0e1a\u0e2b\u0e49\u0e32;\n"
|
||||
" \u0e2a\u0e34\u0e1a\u0e2b\u0e01; \u0e2a\u0e34\u0e1a\u0e40\u0e08\u0e47\u0e14;\n"
|
||||
" \u0e2a\u0e34\u0e1a\u0e41\u0e1b\u0e14; \u0e2a\u0e34\u0e1a\u0e40\u0e01\u0e49\u0e32;\n"
|
||||
" 20: \u0e22\u0e35\u0e48\u0e2a\u0e34\u0e1a[>%%alt-ones>];\n"
|
||||
" 30: \u0e2a\u0e32\u0e21\u0e2a\u0e34\u0e1a[>%%alt-ones>];\n"
|
||||
" 40: \u0e2a\u0e35\u0e48\u0e2a\u0e34\u0e1a[>%%alt-ones>];\n"
|
||||
" 50: \u0e2b\u0e49\u0e32\u0e2a\u0e34\u0e1a[>%%alt-ones>];\n"
|
||||
" 60: \u0e2b\u0e01\u0e2a\u0e34\u0e1a[>%%alt-ones>];\n"
|
||||
" 70: \u0e40\u0e08\u0e47\u0e14\u0e2a\u0e34\u0e1a[>%%alt-ones>];\n"
|
||||
" 80: \u0e41\u0e1b\u0e14\u0e2a\u0e34\u0e1a[>%%alt-ones>];\n"
|
||||
" 90: \u0e40\u0e01\u0e49\u0e32\u0e2a\u0e34\u0e1a[>%%alt-ones>];\n"
|
||||
" 100: <<\u0e23\u0e49\u0e2d\u0e22[>>];\n"
|
||||
" 1000: <<\u0e1e\u0e31\u0e19[>>];\n"
|
||||
" 10000: <<\u0e2b\u0e21\u0e37\u0e48\u0e19[>>];\n"
|
||||
" 100000: <<\u0e41\u0e2a\u0e19[>>];\n"
|
||||
" 1,000,000: <<\u0e25\u0e49\u0e32\u0e19[>>];\n"
|
||||
" 1,000,000,000: <<\u0e1e\u0e31\u0e19\u0e25\u0e49\u0e32\u0e19[>>];\n"
|
||||
" 1,000,000,000,000: <<\u0e25\u0e49\u0e32\u0e19\u0e25\u0e49\u0e32\u0e19[>>];\n"
|
||||
" 1,000,000,000,000,000: =#,##0=;\n"
|
||||
"%%alt-ones:\n"
|
||||
" \u0e28\u0e39\u0e19\u0e22\u0e4c;\n"
|
||||
" \u0e40\u0e2d\u0e47\u0e14;\n"
|
||||
" =%default=;\n";
|
||||
}
|
||||
}
|
||||
|
@ -519,4 +519,58 @@ de {
|
||||
"Latf", // ISO 15924 Name
|
||||
"Latg", // ISO 15924 Name
|
||||
}
|
||||
|
||||
//------------------------------------------------------------
|
||||
// Rule Based Number Format Support
|
||||
//------------------------------------------------------------
|
||||
|
||||
|
||||
// * RuleBasedNumberFormat data for German
|
||||
|
||||
// again, I'm not 100% sure of these rules. I think both "hundert" and
|
||||
// "einhundert" are correct or 100, but I'm not sure which is preferable
|
||||
// in situations where this framework is likely to be used. Also, is it
|
||||
// really true that numbers are run together into compound words all the
|
||||
// time?
|
||||
|
||||
SpelloutRules {
|
||||
// 1 is "eins" when by itself, but turns into "ein" in most
|
||||
// combinations
|
||||
"%alt-ones:\n"
|
||||
" -x: minus >>;\n"
|
||||
" x.x: << komma >>;\n"
|
||||
" null; eins; =%%main=;\n"
|
||||
"%%main:\n"
|
||||
// words for numbers from 0 to 12. Notice that the values
|
||||
// from 13 to 19 can derived algorithmically, unlike in most
|
||||
// other languages
|
||||
" null; ein; zwei; drei; vier; f\u00fcnf; sechs; sieben; acht; neun;\n"
|
||||
" zehn; elf; zw\u00f6lf; >>zehn;\n"
|
||||
// rules for the multiples of 10. Notice that the ones digit
|
||||
// goes on the front
|
||||
" 20: [>>und]zwanzig;\n"
|
||||
" 30: [>>und]drei\u00dfig;\n"
|
||||
" 40: [>>und]vierzig;\n"
|
||||
" 50: [>>und]f\u00fcnfzig;\n"
|
||||
" 60: [>>und]sechzig;\n"
|
||||
" 70: [>>und]siebzig;\n"
|
||||
" 80: [>>und]achtzig;\n"
|
||||
" 90: [>>und]neunzig;\n"
|
||||
" 100: hundert[>%alt-ones>];\n"
|
||||
" 200: <<hundert[>%alt-ones>];\n"
|
||||
" 1000: tausend[>%alt-ones>];\n"
|
||||
" 2000: <<tausend[>%alt-ones>];\n"
|
||||
" 1,000,000: eine Million[ >%alt-ones>];\n"
|
||||
" 2,000,000: << Millionen[ >%alt-ones>];\n"
|
||||
" 1,000,000,000: eine Milliarde[ >%alt-ones>];\n"
|
||||
" 2,000,000,000: << Milliarden[ >%alt-ones>];\n"
|
||||
" 1,000,000,000,000: eine Billion[ >%alt-ones>];\n"
|
||||
" 2,000,000,000,000: << Billionen[ >%alt-ones>];\n"
|
||||
" 1,000,000,000,000,000: =#,##0=;"
|
||||
"%%lenient-parse:\n"
|
||||
" &\u0000 << ' ' << '-'\n"
|
||||
" & ae , \u00e4 & ae , \u00c4\n"
|
||||
" & oe , \u00f6 & oe , \u00d6\n"
|
||||
" & ue , \u00fc & ue , \u00dc\n"
|
||||
}
|
||||
}
|
||||
|
@ -116,4 +116,53 @@ el {
|
||||
"Greek",// Script Name
|
||||
"Grek" // ISO 15924 Name
|
||||
}
|
||||
|
||||
//------------------------------------------------------------
|
||||
// Rule Based Number Format Support
|
||||
//------------------------------------------------------------
|
||||
|
||||
// * Spellout rules for Greek. Again in Greek we have to supply the words
|
||||
// * for the multiples of 100 because they can't be derived algorithmically.
|
||||
// * Also, the tens dgit changes form when followed by a ones digit: an
|
||||
// * accent mark disappears from the tens digit and moves to the ones digit.
|
||||
// * Therefore, instead of using the [] notation, we actually have to use
|
||||
// * two separate rules for each multiple of 10 to show the two forms of
|
||||
// * the word.
|
||||
|
||||
// Can someone supply me with information on negatives and decimals?
|
||||
// I'm also missing the word for zero. Can someone clue me in?
|
||||
|
||||
SpelloutRules {
|
||||
"zero (incomplete data); \u03ad\u03bd\u03b1; \u03b4\u03cd\u03bf; \u03b4\u03c1\u03af\u03b1; "
|
||||
"\u03c4\u03ad\u03c3\u03c3\u03b5\u03c1\u03b1; \u03c0\u03ad\u03bd\u03c4\u03b5; "
|
||||
"\u03ad\u03be\u03b9; \u03b5\u03c0\u03c4\u03ac; \u03bf\u03ba\u03c4\u03ce; "
|
||||
"\u03b5\u03bd\u03bd\u03ad\u03b1;\n"
|
||||
"10: \u03b4\u03ad\u03ba\u03b1; "
|
||||
"\u03ad\u03bd\u03b4\u03b5\u03ba\u03b1; \u03b4\u03ce\u03b4\u03b5\u03ba\u03b1; "
|
||||
"\u03b4\u03b5\u03ba\u03b1>>;\n"
|
||||
"20: \u03b5\u03af\u03ba\u03bf\u03c3\u03b9; \u03b5\u03b9\u03ba\u03bf\u03c3\u03b9>>;\n"
|
||||
"30: \u03c4\u03c1\u03b9\u03ac\u03bd\u03c4\u03b1; \u03c4\u03c1\u03b9\u03b1\u03bd\u03c4\u03b1>>;\n"
|
||||
"40: \u03c3\u03b1\u03c1\u03ac\u03bd\u03c4\u03b1; \u03c3\u03b1\u03c1\u03b1\u03bd\u03c4\u03b1>>;\n"
|
||||
"50: \u03c0\u03b5\u03bd\u03ae\u03bd\u03c4\u03b1; \u03c0\u03b5\u03bd\u03b7\u03bd\u03c4\u03b1>>;\n"
|
||||
"60: \u03b5\u03be\u03ae\u03bd\u03c4\u03b1; \u03b5\u03be\u03b7\u03bd\u03c4\u03b1>>;\n"
|
||||
"70: \u03b5\u03b2\u03b4\u03bf\u03bc\u03ae\u03bd\u03c4\u03b1; "
|
||||
"\u03b5\u03b2\u03b4\u03bf\u03bc\u03b7\u03bd\u03c4\u03b1>>;\n"
|
||||
"80: \u03bf\u03b3\u03b4\u03cc\u03bd\u03c4\u03b1; \u03bf\u03b3\u03b4\u03bf\u03bd\u03c4\u03b1>>;\n"
|
||||
"90: \u03b5\u03bd\u03bd\u03b5\u03bd\u03ae\u03bd\u03c4\u03b1; "
|
||||
"\u03b5\u03bd\u03bd\u03b5\u03bd\u03b7\u03bd\u03c4\u03b1>>;\n"
|
||||
"100: \u03b5\u03ba\u03b1\u03c4\u03cc[\u03bd >>];\n"
|
||||
"200: \u03b4\u03b9\u03b1\u03ba\u03cc\u03c3\u03b9\u03b1[ >>];\n"
|
||||
"300: \u03c4\u03c1\u03b9\u03b1\u03ba\u03cc\u03c3\u03b9\u03b1[ >>];\n"
|
||||
"400: \u03c4\u03b5\u03c4\u03c1\u03b1\u03ba\u03cc\u03c3\u03b9\u03b1[ >>];\n"
|
||||
"500: \u03c0\u03b5\u03bd\u03c4\u03b1\u03ba\u03cc\u03c3\u03b9\u03b1[ >>];\n"
|
||||
"600: \u03b5\u03be\u03b1\u03ba\u03cc\u03c3\u03b9\u03b1[ >>];\n"
|
||||
"700: \u03b5\u03c0\u03c4\u03b1\u03ba\u03cc\u03c3\u03b9\u03b1[ >>];\n"
|
||||
"800: \u03bf\u03ba\u03c4\u03b1\u03ba\u03cc\u03c3\u03b9\u03b1[ >>];\n"
|
||||
"900: \u03b5\u03bd\u03bd\u03b9\u03b1\u03ba\u03cc\u03c3\u03b9\u03b1[ >>];\n"
|
||||
"1000: \u03c7\u03af\u03bb\u03b9\u03b1[ >>];\n"
|
||||
"2000: << \u03c7\u03af\u03bb\u03b9\u03b1[ >>];\n"
|
||||
"1,000,000: << \u03b5\u03ba\u03b1\u03c4\u03bf\u03bc\u03bc\u03b9\u03cc\u03c1\u03b9\u03bf[ >>];\n"
|
||||
"1,000,000,000: << \u03b4\u03b9\u03c3\u03b5\u03ba\u03b1\u03c4\u03bf\u03bc\u03bc\u03b9\u03cc\u03c1\u03b9\u03bf[ >>];\n"
|
||||
"1,000,000,000,000: =#,##0="
|
||||
}
|
||||
}
|
||||
|
@ -233,4 +233,11 @@ en {
|
||||
"Latf", // ISO 15924 Name
|
||||
"Latg", // ISO 15924 Name
|
||||
}
|
||||
|
||||
//------------------------------------------------------------
|
||||
// Rule Based Number Format Support
|
||||
//------------------------------------------------------------
|
||||
|
||||
// inherited from root
|
||||
|
||||
}
|
||||
|
@ -49,4 +49,70 @@ en_GB {
|
||||
"BST",
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------------------------------
|
||||
// Rule Based Number Format Support
|
||||
//------------------------------------------------------------
|
||||
|
||||
// * Spellout rules for U.K. English. U.K. English has one significant
|
||||
// * difference from U.S. English: the names for values of 1,000,000,000
|
||||
// * and higher. In American English, each successive "-illion" is 1,000
|
||||
// * times greater than the preceding one: 1,000,000,000 is "one billion"
|
||||
// * and 1,000,000,000,000 is "one trillion." In British English, each
|
||||
// * successive "-illion" is one million times greater than the one before:
|
||||
// * "one billion" is 1,000,000,000,000 (or what Americans would call a
|
||||
// * "trillion"), and "one trillion" is 1,000,000,000,000,000,000.
|
||||
// * 1,000,000,000 in British English is "one thousand million." (This
|
||||
// * value is sometimes called a "milliard," but this word seems to have
|
||||
// * fallen into disuse.)
|
||||
|
||||
// Could someone please correct me if I'm wrong about "milliard" falling
|
||||
// into disuse, or have missed any other details of how large numbers
|
||||
// are rendered. Also, could someone please provide me with information
|
||||
// on which other English-speaking countries use which system? Right now,
|
||||
// I'm assuming that the U.S. system is used in Canada and that all the
|
||||
// other English-speaking countries follow the British system. Can
|
||||
// someone out there confirm this?
|
||||
|
||||
SpelloutRules {
|
||||
"%simplified:\n"
|
||||
" -x: minus >>;\n"
|
||||
" x.x: << point >>;\n"
|
||||
" zero; one; two; three; four; five; six; seven; eight; nine;\n"
|
||||
" ten; eleven; twelve; thirteen; fourteen; fifteen; sixteen;\n"
|
||||
" seventeen; eighteen; nineteen;\n"
|
||||
" 20: twenty[->>];\n"
|
||||
" 30: thirty[->>];\n"
|
||||
" 40: forty[->>];\n"
|
||||
" 50: fifty[->>];\n"
|
||||
" 60: sixty[->>];\n"
|
||||
" 70: seventy[->>];\n"
|
||||
" 80: eighty[->>];\n"
|
||||
" 90: ninety[->>];\n"
|
||||
" 100: << hundred[ >>];\n"
|
||||
" 1000: << thousand[ >>];\n"
|
||||
" 1,000,000: << million[ >>];\n"
|
||||
" 1,000,000,000,000: << billion[ >>];\n"
|
||||
" 1,000,000,000,000,000: =#,##0=;\n"
|
||||
"%default:\n"
|
||||
" -x: minus >>;\n"
|
||||
" x.x: << point >>;\n"
|
||||
" =%simplified=;\n"
|
||||
" 100: << hundred[ >%%and>];\n"
|
||||
" 1000: << thousand[ >%%and>];\n"
|
||||
" 100,000>>: << thousand[>%%commas>];\n"
|
||||
" 1,000,000: << million[>%%commas>];\n"
|
||||
" 1,000,000,000,000: << billion[>%%commas>];\n"
|
||||
" 1,000,000,000,000,000: =#,##0=;\n"
|
||||
"%%and:\n"
|
||||
" and =%default=;\n"
|
||||
" 100: =%default=;\n"
|
||||
"%%commas:\n"
|
||||
" ' and =%default=;\n"
|
||||
" 100: , =%default=;\n"
|
||||
" 1000: , <%default< thousand, >%default>;\n"
|
||||
" 1,000,000: , =%default=;"
|
||||
"%%lenient-parse:\n"
|
||||
" & ' ' , ',' ;\n"
|
||||
}
|
||||
}
|
||||
|
@ -140,4 +140,31 @@ eo {
|
||||
"Latf", // ISO 15924 Name
|
||||
"Latg", // ISO 15924 Name
|
||||
}
|
||||
|
||||
//------------------------------------------------------------
|
||||
// Rule Based Number Format Support
|
||||
//------------------------------------------------------------
|
||||
|
||||
// data from 'Esperanto-programita 1' courtesy of Markus Scherer
|
||||
|
||||
SpelloutRules {
|
||||
"-x: minus >>;\n"
|
||||
"x.x: << komo >>;\n"
|
||||
"nulo; unu; du; tri; kvar; kvin; ses; sep; ok; na\u016d;\n"
|
||||
"10: dek[ >>];\n"
|
||||
"20: <<dek[ >>];\n"
|
||||
"100: cent[ >>];\n"
|
||||
"200: <<cent[ >>];\n"
|
||||
"1000: mil[ >>];\n"
|
||||
"2000: <<mil[ >>];\n"
|
||||
"10000: dekmil[ >>];\n"
|
||||
"11000>: << mil[ >>];\n"
|
||||
"1,000,000: miliono[ >>];\n"
|
||||
"2,000,000: << milionoj[ >>];\n"
|
||||
"1,000,000,000: miliardo[ >>];\n"
|
||||
"2,000,000,000: << miliardoj[ >>];\n"
|
||||
"1,000,000,000,000: biliono[ >>];\n"
|
||||
"2,000,000,000,000: << bilionoj[ >>];\n"
|
||||
"1,000,000,000,000,000: =#,##0=;\n"
|
||||
}
|
||||
}
|
||||
|
@ -258,4 +258,69 @@ es {
|
||||
"Latf", // ISO 15924 Name
|
||||
"Latg", // ISO 15924 Name
|
||||
}
|
||||
|
||||
//------------------------------------------------------------
|
||||
// Rule Based Number Format Support
|
||||
//------------------------------------------------------------
|
||||
|
||||
// * Spellout rules for Spanish. The Spanish rules are quite similar to
|
||||
// * the English rules, but there are some important differences:
|
||||
// * First, we have to provide separate rules for most of the twenties
|
||||
// * because the ones digit frequently picks up an accent mark that it
|
||||
// * doesn't have when standing alone. Second, each multiple of 100 has
|
||||
// * to be specified separately because the multiplier on 100 very often
|
||||
// * changes form in the contraction: 500 is "quinientos," not
|
||||
// * "cincocientos." In addition, the word for 100 is "cien" when
|
||||
// * standing alone, but changes to "ciento" when followed by more digits.
|
||||
// * There also some other differences.
|
||||
|
||||
// The Spanish rules are incomplete. I'm missing information on negative
|
||||
// numbers and numbers with fractional parts. I also don't have
|
||||
// information on numbers higher than the millions.
|
||||
|
||||
SpelloutRules {
|
||||
// negative-number and fraction rules
|
||||
"-x: menos >>;\n"
|
||||
"x.x: << punto >>;\n"
|
||||
// words for values from 0 to 19
|
||||
"cero; uno; dos; tres; cuatro; cinco; seis; siete; ocho; nueve;\n"
|
||||
"diez; once; doce; trece; catorce; quince; diecis\u00e9is;\n"
|
||||
" diecisiete; dieciocho; diecinueve;\n"
|
||||
// words for values from 20 to 29 (necessary because the ones digit
|
||||
// often picks up an accent mark it doesn't have when standing alone)
|
||||
"veinte; veintiuno; veintid\u00f3s; veintitr\u00e9s; veinticuatro;\n"
|
||||
" veinticinco; veintis\u00e9is; veintisiete; veintiocho;\n"
|
||||
" veintinueve;\n"
|
||||
// words for multiples of 10 (notice that the tens digit is separated
|
||||
// from the ones digit by the word "y".)
|
||||
"30: treinta[ y >>];\n"
|
||||
"40: cuarenta[ y >>];\n"
|
||||
"50: cincuenta[ y >>];\n"
|
||||
"60: sesenta[ y >>];\n"
|
||||
"70: setenta[ y >>];\n"
|
||||
"80: ochenta[ y >>];\n"
|
||||
"90: noventa[ y >>];\n"
|
||||
// 100 by itself is "cien," but 100 followed by something is "cineto"
|
||||
"100: cien;\n"
|
||||
"101: ciento >>;\n"
|
||||
// words for multiples of 100 (must be stated because they're
|
||||
// rarely simple concatenations)
|
||||
"200: doscientos[ >>];\n"
|
||||
"300: trescientos[ >>];\n"
|
||||
"400: cuatrocientos[ >>];\n"
|
||||
"500: quinientos[ >>];\n"
|
||||
"600: seiscientos[ >>];\n"
|
||||
"700: setecientos[ >>];\n"
|
||||
"800: ochocientos[ >>];\n"
|
||||
"900: novecientos[ >>];\n"
|
||||
// for 1,000, the multiplier on "mil" is omitted: 2,000 is "dos mil,"
|
||||
// but 1,000 is just "mil."
|
||||
"1000: mil[ >>];\n"
|
||||
"2000: << mil[ >>];\n"
|
||||
// 1,000,000 is "un millon," not "uno millon"
|
||||
"1,000,000: un mill\u00f3n[ >>];\n"
|
||||
"2,000,000: << mill\u00f3n[ >>];\n"
|
||||
// overflow rule
|
||||
"1,000,000,000: =#,##0= (incomplete data);"
|
||||
}
|
||||
}
|
||||
|
@ -190,10 +190,73 @@ fr {
|
||||
"Latf", // ISO 15924 Name
|
||||
"Latg", // ISO 15924 Name
|
||||
}
|
||||
|
||||
//------------------------------------------------------------
|
||||
// Rule Based Number Format Support
|
||||
//------------------------------------------------------------
|
||||
|
||||
// * Spellout rules for French. French adds some interesting quirks of its
|
||||
// * own: 1) The word "et" is interposed between the tens and ones digits,
|
||||
// * but only if the ones digit if 1: 20 is "vingt," and 2 is "vingt-deux,"
|
||||
// * but 21 is "vingt-et-un." 2) There are no words for 70, 80, or 90.
|
||||
// * "quatre-vingts" ("four twenties") is used for 80, and values proceed
|
||||
// * by score from 60 to 99 (e.g., 73 is "soixante-treize" ["sixty-thirteen"]).
|
||||
// * Numbers from 1,100 to 1,199 are rendered as hundreds rather than
|
||||
// * thousands: 1,100 is "onze cents" ("eleven hundred"), rather than
|
||||
// * "mille cent" ("one thousand one hundred")
|
||||
|
||||
SpelloutRules {
|
||||
// the main rule set
|
||||
"%main:\n"
|
||||
" -x: moins >>;\n"
|
||||
" x.x: << virgule >>;\n"
|
||||
// words for numbers from 0 to 10
|
||||
" z\u00e9ro; un; deux; trois; quatre; cinq; six; sept; huit; neuf;\n"
|
||||
" dix; onze; douze; treize; quatorze; quinze; seize;\n"
|
||||
" dix-sept; dix-huit; dix-neuf;\n"
|
||||
// ords for the multiples of 10: %%alt-ones inserts "et"
|
||||
// when needed
|
||||
" 20: vingt[->%%alt-ones>];\n"
|
||||
" 30: trente[->%%alt-ones>];\n"
|
||||
" 40: quarante[->%%alt-ones>];\n"
|
||||
" 50: cinquante[->%%alt-ones>];\n"
|
||||
// rule for 60. The /20 causes this rule's multiplier to be
|
||||
// 20 rather than 10, allowinhg us to recurse for all values
|
||||
// from 60 to 79...
|
||||
" 60/20: soixante[->%%alt-ones>];\n"
|
||||
// ...except for 71, which must be special-cased
|
||||
" 71: soixante et onze;\n"
|
||||
// at 72, we have to repeat the rule for 60 to get us to 79
|
||||
" 72/20: soixante->%%alt-ones>;\n"
|
||||
// at 80, we state a new rule with the phrase for 80. Since
|
||||
// it changes form when there's a ones digit, we need a second
|
||||
// rule at 81. This rule also includes "/20," allowing it to
|
||||
// be used correctly for all values up to 99
|
||||
" 80: quatre-vingts; 81/20: quatre-vingt->>;\n"
|
||||
// "cent" becomes plural when preceded by a multiplier, and
|
||||
// the multiplier is omitted from the singular form
|
||||
" 100: cent[ >>];\n"
|
||||
" 200: << cents[ >>];\n"
|
||||
" 1000: mille[ >>];\n"
|
||||
// values from 1,100 to 1,199 are rendered as "onze cents..."
|
||||
// instead of "mille cent..." The > after "1000" decreases
|
||||
// the rule's exponent, causing its multiplier to be 100 instead
|
||||
// of 1,000. This prevents us from getting "onze cents cent
|
||||
// vingt-deux" ("eleven hundred one hundred twenty-two").
|
||||
" 1100>: onze cents[ >>];\n"
|
||||
// at 1,200, we go back to formating in thousands, so we
|
||||
// repeat the rule for 1,000
|
||||
" 1200: mille >>;\n"
|
||||
// at 2,000, the multiplier is added
|
||||
" 2000: << mille[ >>];\n"
|
||||
" 1,000,000: << million[ >>];\n"
|
||||
" 1,000,000,000: << milliarde[ >>];\n"
|
||||
" 1,000,000,000,000: << billion[ >>];\n"
|
||||
" 1,000,000,000,000,000: =#,##0=;\n"
|
||||
// %%alt-ones is used to insert "et" when the ones digit is 1
|
||||
"%%alt-ones:\n"
|
||||
" ; et-un; =%main=;\n"
|
||||
"%%lenient-parse:\n"
|
||||
" &\u0000 << ' ' << ',' << '-';\n"
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -56,4 +56,51 @@ fr_CH {
|
||||
"GMT",
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------------------------------
|
||||
// Rule Based Number Format Support
|
||||
//------------------------------------------------------------
|
||||
|
||||
// * Spellout rules for Swiss French. Swiss French differs from French French
|
||||
// * in that it does have words for 70, 80, and 90. This rule set shows them,
|
||||
// * and is simpler as a result.
|
||||
|
||||
// again, I'm missing information on negative numbers and decimals for
|
||||
// these to rule sets. Also, I'm not 100% sure about Swiss French. Is
|
||||
// this correct? Is "onze cents" commonly used for 1,100 in both France
|
||||
// and Switzerland? Can someone fill me in on the rules for the other
|
||||
// French-speaking countries? I've heard conflicting opinions on which
|
||||
// version is used in Canada, and I understand there's an alternate set
|
||||
// of words for 70, 80, and 90 that is used somewhere, but I don't know
|
||||
// what those words are or where they're used.
|
||||
|
||||
SpelloutRules {
|
||||
"%main:\n"
|
||||
" -x: moins >>;\n"
|
||||
" x.x: << virgule >>;\n"
|
||||
" z\u00e9ro; un; deux; trois; quatre; cinq; six; sept; huit; neuf;\n"
|
||||
" dix; onze; douze; treize; quatorze; quinze; seize;\n"
|
||||
" dix-sept; dix-huit; dix-neuf;\n"
|
||||
" 20: vingt[->%%alt-ones>];\n"
|
||||
" 30: trente[->%%alt-ones>];\n"
|
||||
" 40: quarante[->%%alt-ones>];\n"
|
||||
" 50: cinquante[->%%alt-ones>];\n"
|
||||
" 60: soixante[->%%alt-ones>];\n"
|
||||
// notice new words for 70, 80, and 90
|
||||
" 70: septante[->%%alt-ones>];\n"
|
||||
" 80: octante[->%%alt-ones>];\n"
|
||||
" 90: nonante[->%%alt-ones>];\n"
|
||||
" 100: cent[ >>];\n"
|
||||
" 200: << cents[ >>];\n"
|
||||
" 1000: mille[ >>];\n"
|
||||
" 1100>: onze cents[ >>];\n"
|
||||
" 1200: mille >>;\n"
|
||||
" 2000: << mille[ >>];\n"
|
||||
" 1,000,000: << million[ >>];\n"
|
||||
" 1,000,000,000: << milliarde[ >>];\n"
|
||||
" 1,000,000,000,000: << billion[ >>];\n"
|
||||
" 1,000,000,000,000,000: =#,##0=;\n"
|
||||
"%%alt-ones:\n"
|
||||
" ; et-un; =%main=;"
|
||||
}
|
||||
}
|
||||
|
@ -102,4 +102,38 @@ he {
|
||||
"Hebrew", // Script Name
|
||||
"Hebr" // ISO 15924 Name
|
||||
}
|
||||
|
||||
//------------------------------------------------------------
|
||||
// Rule Based Number Format Support
|
||||
//------------------------------------------------------------
|
||||
|
||||
// * Spellout rules for Hebrew. Hebrew actually has inflected forms for
|
||||
// * most of the lower-order numbers. The masculine forms are shown
|
||||
// * here.
|
||||
|
||||
// This data is woefully incomplete. Can someone fill me in on the
|
||||
// various inflected forms of the numbers, which seem to be necessary
|
||||
// to do Hebrew correctly? Can somone supply me with data for values
|
||||
// from 1,000,000 on up? What about the word for zero? What about
|
||||
// information on negatives and decimals?
|
||||
|
||||
SpelloutRules {
|
||||
"zero (incomplete data); \u05d0\u05d4\u05d3; \u05e9\u05d2\u05d9\u05d9\u05dd; \u05e9\u05dc\u05d5\u05e9\u05d4;\n"
|
||||
"4: \u05d0\u05d3\u05d1\u05e6\u05d4; \u05d7\u05d2\u05d5\u05d9\u05e9\u05d4; \u05e9\u05e9\u05d4;\n"
|
||||
"7: \u05e9\u05d1\u05e6\u05d4; \u05e9\u05de\u05d5\u05d2\u05d4; \u05ea\u05e9\u05e6\u05d4;\n"
|
||||
"10: \u05e6\u05e9\u05d3\u05d4[ >>];\n"
|
||||
"20: \u05e6\u05e9\u05d3\u05d9\u05dd[ >>];\n"
|
||||
"30: \u05e9\u05dc\u05d5\u05e9\u05d9\u05dd[ >>];\n"
|
||||
"40: \u05d0\u05d3\u05d1\u05e6\u05d9\u05dd[ >>];\n"
|
||||
"50: \u05d7\u05de\u05d9\u05e9\u05d9\u05dd[ >>];\n"
|
||||
"60: \u05e9\u05e9\u05d9\u05dd[ >>];\n"
|
||||
"70: \u05e9\u05d1\u05e6\u05d9\u05dd[ >>];\n"
|
||||
"80: \u05e9\u05de\u05d5\u05d2\u05d9\u05dd[ >>];\n"
|
||||
"90: \u05ea\u05e9\u05e6\u05d9\u05dd[ >>];\n"
|
||||
"100: \u05de\u05d0\u05d4[ >>];\n"
|
||||
"200: << \u05de\u05d0\u05d4[ >>];\n"
|
||||
"1000: \u05d0\u05dc\u05e3[ >>];\n"
|
||||
"2000: << \u05d0\u05dc\u05e3[ >>];\n"
|
||||
"1,000,000: =#,##0= (incomplete data);"
|
||||
}
|
||||
}
|
||||
|
@ -117,4 +117,106 @@ it {
|
||||
"Latf", // ISO 15924 Name
|
||||
"Latg", // ISO 15924 Name
|
||||
}
|
||||
|
||||
//------------------------------------------------------------
|
||||
// Rule Based Number Format Support
|
||||
//------------------------------------------------------------
|
||||
|
||||
// * Spellout rules for Italian. Like German, most Italian numbers are
|
||||
// * written as single words. What makes these rules complicated is the rule
|
||||
// * that says that when a word ending in a vowel and a word beginning with
|
||||
// * a vowel are combined into a compound, the vowel is dropped from the
|
||||
// * end of the first word: 180 is "centottanta," not "centoottanta."
|
||||
// * The complexity of this rule set is to produce this behavior.
|
||||
|
||||
// Can someone confirm that I did the vowel-eliding thing right? I'm
|
||||
// not 100% sure I'm doing it in all the right places, or completely
|
||||
// correctly. Also, I don't have information for negatives and decimals,
|
||||
// and I lack words fror values from 1,000,000 on up.
|
||||
|
||||
SpelloutRules {
|
||||
// main rule set. Follows the patterns of the preceding rule sets,
|
||||
// except that the final vowel is omitted from words ending in
|
||||
// vowels when they are followed by another word; instead, we have
|
||||
// separate rule sets that are identical to this one, except that
|
||||
// all the words that don't begin with a vowel have a vowel tacked
|
||||
// onto them at the front. A word ending in a vowel calls a
|
||||
// substitution that will supply that vowel, unless that vowel is to
|
||||
// be elided.
|
||||
"%main:\n"
|
||||
" -x: meno >>;\n"
|
||||
" x.x: << virgola >>;\n"
|
||||
" zero; uno; due; tre; quattro; cinque; sei; sette; otto;\n"
|
||||
" nove;\n"
|
||||
" dieci; undici; dodici; tredici; quattordici; quindici; sedici;\n"
|
||||
" diciasette; diciotto; diciannove;\n"
|
||||
" 20: venti; vent>%%with-i>;\n"
|
||||
" 30: trenta; trent>%%with-i>;\n"
|
||||
" 40: quaranta; quarant>%%with-a>;\n"
|
||||
" 50: cinquanta; cinquant>%%with-a>;\n"
|
||||
" 60: sessanta; sessant>%%with-a>;\n"
|
||||
" 70: settanta; settant>%%with-a>;\n"
|
||||
" 80: ottanta; ottant>%%with-a>;\n"
|
||||
" 90: novanta; novant>%%with-a>;\n"
|
||||
" 100: cento; cent[>%%with-o>];\n"
|
||||
" 200: <<cento; <<cent[>%%with-o>];\n"
|
||||
" 1000: mille; mill[>%%with-i>];\n"
|
||||
" 2000: <<mila; <<mil[>%%with-a>];\n"
|
||||
" 100,000>>: <<mila[ >>];\n"
|
||||
" 1,000,000: =#,##0= (incomplete data);\n"
|
||||
"%%with-a:\n"
|
||||
" azero; uno; adue; atre; aquattro; acinque; asei; asette; otto;\n"
|
||||
" anove;\n"
|
||||
" adieci; undici; adodici; atredici; aquattordici; aquindici; asedici;\n"
|
||||
" adiciasette; adiciotto; adiciannove;\n"
|
||||
" 20: aventi; avent>%%with-i>;\n"
|
||||
" 30: atrenta; atrent>%%with-i>;\n"
|
||||
" 40: aquaranta; aquarant>%%with-a>;\n"
|
||||
" 50: acinquanta; acinquant>%%with-a>;\n"
|
||||
" 60: asessanta; asessant>%%with-a>;\n"
|
||||
" 70: asettanta; asettant>%%with-a>;\n"
|
||||
" 80: ottanta; ottant>%%with-a>;\n"
|
||||
" 90: anovanta; anovant>%%with-a>;\n"
|
||||
" 100: acento; acent[>%%with-o>];\n"
|
||||
" 200: <%%with-a<cento; <%%with-a<cent[>%%with-o>];\n"
|
||||
" 1000: amille; amill[>%%with-i>];\n"
|
||||
" 2000: <%%with-a<mila; <%%with-a<mil[>%%with-a>];\n"
|
||||
" 100,000: =%main=;\n"
|
||||
"%%with-i:\n"
|
||||
" izero; uno; idue; itre; iquattro; icinque; isei; isette; otto;\n"
|
||||
" inove;\n"
|
||||
" idieci; undici; idodici; itredici; iquattordici; iquindici; isedici;\n"
|
||||
" idiciasette; idiciotto; idiciannove;\n"
|
||||
" 20: iventi; ivent>%%with-i>;\n"
|
||||
" 30: itrenta; itrent>%%with-i>;\n"
|
||||
" 40: iquaranta; iquarant>%%with-a>;\n"
|
||||
" 50: icinquanta; icinquant>%%with-a>;\n"
|
||||
" 60: isessanta; isessant>%%with-a>;\n"
|
||||
" 70: isettanta; isettant>%%with-a>;\n"
|
||||
" 80: ottanta; ottant>%%with-a>;\n"
|
||||
" 90: inovanta; inovant>%%with-a>;\n"
|
||||
" 100: icento; icent[>%%with-o>];\n"
|
||||
" 200: <%%with-i<cento; <%%with-i<cent[>%%with-o>];\n"
|
||||
" 1000: imille; imill[>%%with-i>];\n"
|
||||
" 2000: <%%with-i<mila; <%%with-i<mil[>%%with-a>];\n"
|
||||
" 100,000: =%main=;\n"
|
||||
"%%with-o:\n"
|
||||
" ozero; uno; odue; otre; oquattro; ocinque; osei; osette; otto;\n"
|
||||
" onove;\n"
|
||||
" odieci; undici; ododici; otredici; oquattordici; oquindici; osedici;\n"
|
||||
" odiciasette; odiciotto; odiciannove;\n"
|
||||
" 20: oventi; ovent>%%with-i>;\n"
|
||||
" 30: otrenta; otrent>%%with-i>;\n"
|
||||
" 40: oquaranta; oquarant>%%with-a>;\n"
|
||||
" 50: ocinquanta; ocinquant>%%with-a>;\n"
|
||||
" 60: osessanta; osessant>%%with-a>;\n"
|
||||
" 70: osettanta; osettant>%%with-a>;\n"
|
||||
" 80: ottanta; ottant>%%with-a>;\n"
|
||||
" 90: onovanta; onovant>%%with-a>;\n"
|
||||
" 100: ocento; ocent[>%%with-o>];\n"
|
||||
" 200: <%%with-o<cento; <%%with-o<cent[>%%with-o>];\n"
|
||||
" 1000: omille; omill[>%%with-i>];\n"
|
||||
" 2000: <%%with-o<mila; <%%with-o<mil[>%%with-a>];\n"
|
||||
" 100,000: =%main=;\n"
|
||||
}
|
||||
}
|
||||
|
@ -859,4 +859,47 @@ ja {
|
||||
"JST",
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------------------------------
|
||||
// Rule Based Number Format Support
|
||||
//------------------------------------------------------------
|
||||
|
||||
// * Spellout rules for Japanese. In Japanese, there really isn't any
|
||||
// * distinction between a number written out in digits and a number
|
||||
// * written out in words: the ideographic characters are both digits
|
||||
// * and words. This rule set provides two variants: %traditional
|
||||
// * uses the traditional CJK numerals (which are also used in China
|
||||
// * and Korea). %financial uses alternate ideographs for many numbers
|
||||
// * that are harder to alter than the traditional numerals (one could
|
||||
// * fairly easily change a one to
|
||||
// * a three just by adding two strokes, for example). This is also done in
|
||||
// * the other countries using Chinese idographs, but different ideographs
|
||||
// * are used in those places.
|
||||
|
||||
// Can someone supply me with the right fraud-proof ideographs for
|
||||
// Simplified and Traditional Chinese, and for Korean? Can someone
|
||||
// supply me with information on negatives and decimals?
|
||||
|
||||
SpelloutRules {
|
||||
"%financial:\n"
|
||||
" \u96f6; \u58f1; \u5f10; \u53c2; \u56db; \u4f0d; \u516d; \u4e03; \u516b; \u4e5d;\n"
|
||||
" \u62fe[>>];\n"
|
||||
" 20: <<\u62fe[>>];\n"
|
||||
" 100: <<\u767e[>>];\n"
|
||||
" 1000: <<\u5343[>>];\n"
|
||||
" 10,000: <<\u4e07[>>];\n"
|
||||
" 100,000,000: <<\u5104[>>];\n"
|
||||
" 1,000,000,000,000: <<\u5146[>>];\n"
|
||||
" 10,000,000,000,000,000: =#,##0=;\n"
|
||||
"%traditional:\n"
|
||||
" \u96f6; \u4e00; \u4e8c; \u4e09; \u56db; \u4e94; \u516d; \u4e03; \u516b; \u4e5d;\n"
|
||||
" \u5341[>>];\n"
|
||||
" 20: <<\u5341[>>];\n"
|
||||
" 100: <<\u767e[>>];\n"
|
||||
" 1000: <<\u5343[>>];\n"
|
||||
" 10,000: <<\u4e07[>>];\n"
|
||||
" 100,000,000: <<\u5104[>>];\n"
|
||||
" 1,000,000,000,000: <<\u5146[>>];\n"
|
||||
" 10,000,000,000,000,000: =#,##0=;"
|
||||
}
|
||||
}
|
||||
|
@ -108,4 +108,32 @@ nl {
|
||||
"Latg", // ISO 15924 Name
|
||||
}
|
||||
|
||||
//------------------------------------------------------------
|
||||
// Rule Based Number Format Support
|
||||
//------------------------------------------------------------
|
||||
|
||||
// * Spellout rules for Dutch
|
||||
|
||||
// can someone supply me with information on negatives and decimals?
|
||||
|
||||
SpelloutRules {
|
||||
" -x: min >>;\n"
|
||||
"x.x: << komma >>;\n"
|
||||
"(zero?); een; twee; drie; vier; vijf; zes; zeven; acht; negen;\n"
|
||||
"tien; elf; twaalf; dertien; veertien; vijftien; zestien;\n"
|
||||
"zeventien; achtien; negentien;\n"
|
||||
"20: [>> en ]twintig;\n"
|
||||
"30: [>> en ]dertig;\n"
|
||||
"40: [>> en ]veertig;\n"
|
||||
"50: [>> en ]vijftig;\n"
|
||||
"60: [>> en ]zestig;\n"
|
||||
"70: [>> en ]zeventig;\n"
|
||||
"80: [>> en ]tachtig;\n"
|
||||
"90: [>> en ]negentig;\n"
|
||||
"100: << honderd[ >>];\n"
|
||||
"1000: << duizend[ >>];\n"
|
||||
"1,000,000: << miljoen[ >>];\n"
|
||||
"1,000,000,000: << biljoen[ >>];\n"
|
||||
"1,000,000,000,000: =#,##0="
|
||||
}
|
||||
}
|
||||
|
@ -1180,11 +1180,221 @@ root {
|
||||
"Anchorage",
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
LocaleScript{
|
||||
"Latin",
|
||||
"Latn",
|
||||
"Latf",
|
||||
"Latg"
|
||||
}
|
||||
|
||||
//------------------------------------------------------------
|
||||
// Rule Based Number Format Support
|
||||
//------------------------------------------------------------
|
||||
|
||||
// * Spellout rules for U.S. English. This rule set has two variants:
|
||||
// * %simplified is a set of rules showing the simple method of spelling
|
||||
// * out numbers in English: 289 is formatted as "two hundred eighty-nine".
|
||||
// * %default uses a more complicated algorithm to format
|
||||
// * numbers in a more natural way: 289 is formatted as "two hundred AND
|
||||
// * eighty-nine" and commas are inserted between the thousands groups for
|
||||
// * values above 100,000.
|
||||
|
||||
SpelloutRules {
|
||||
// This rule set shows the normal simple formatting rules for English
|
||||
"%simplified:\n"
|
||||
// negative number rule. This rule is used to format negative
|
||||
// numbers. The result of formatting the number's absolute
|
||||
// value is placed where the >> is.
|
||||
" -x: minus >>;\n"
|
||||
// faction rule. This rule is used for formatting numbers
|
||||
// with fractional parts. The result of formatting the
|
||||
// number's integral part is substituted for the <<, and
|
||||
// the result of formatting the number's fractional part
|
||||
// (one digit at a time, e.g., 0.123 is "zero point one two
|
||||
// three") replaces the >>.
|
||||
" x.x: << point >>;\n"
|
||||
// the rules for the values from 0 to 19 are simply the
|
||||
// words for those numbers
|
||||
" zero; one; two; three; four; five; six; seven; eight; nine;\n"
|
||||
" ten; eleven; twelve; thirteen; fourteen; fifteen; sixteen;\n"
|
||||
" seventeen; eighteen; nineteen;\n"
|
||||
// beginning at 20, we use the >> to mark the position where
|
||||
// the result of formatting the number's ones digit. Thus,
|
||||
// we only need a new rule at every multiple of 10. Text in
|
||||
// backets is omitted if the value being formatted is an
|
||||
// even multiple of 10.
|
||||
" 20: twenty[->>];\n"
|
||||
" 30: thirty[->>];\n"
|
||||
" 40: forty[->>];\n"
|
||||
" 50: fifty[->>];\n"
|
||||
" 60: sixty[->>];\n"
|
||||
" 70: seventy[->>];\n"
|
||||
" 80: eighty[->>];\n"
|
||||
" 90: ninety[->>];\n"
|
||||
// beginning at 100, we can use << to mark the position where
|
||||
// the result of formatting the multiple of 100 is to be
|
||||
// inserted. Notice also that the meaning of >> has shifted:
|
||||
// here, it refers to both the ones place and the tens place.
|
||||
// The meanings of the << and >> tokens depend on the base value
|
||||
// of the rule. A rule's divisor is (usually) the highest
|
||||
// power of 10 that is less than or equal to the rule's base
|
||||
// value. The value being formatted is divided by the rule's
|
||||
// divisor, and the integral quotient is used to get the text
|
||||
// for <<, while the remainder is used to produce the text
|
||||
// for >>. Again, text in brackets is omitted if the value
|
||||
// being formatted is an even multiple of the rule's divisor
|
||||
// (in this case, an even multiple of 100)
|
||||
" 100: << hundred[ >>];\n"
|
||||
// The rules for the higher numbers work the same way as the
|
||||
// rule for 100: Again, the << and >> tokens depend on the
|
||||
// rule's divisor, which for all these rules is also the rule's
|
||||
// base value. To group by thousand, we simply don't have any
|
||||
// rules between 1,000 and 1,000,000.
|
||||
" 1000: << thousand[ >>];\n"
|
||||
" 1,000,000: << million[ >>];\n"
|
||||
" 1,000,000,000: << billion[ >>];\n"
|
||||
" 1,000,000,000,000: << trillion[ >>];\n"
|
||||
// overflow rule. This rule specifies that values of a
|
||||
// quadrillion or more are shown in numerals rather than words.
|
||||
// The == token means to format (with new rules) the value
|
||||
// being formatted by this rule and place the result where
|
||||
// the == is. The #,##0 inside the == signs is a
|
||||
// DecimalFormat pattern. It specifies that the value should
|
||||
// be formatted with a DecimalFormat object, and that it
|
||||
// should be formatted with no decimal places, at least one
|
||||
// digit, and a thousands separator.
|
||||
" 1,000,000,000,000,000: =#,##0=;\n"
|
||||
|
||||
// %default is a more elaborate form of %simplified; It is basically
|
||||
// the same, except that it introduces "and" before the ones digit
|
||||
// when appropriate (basically, between the tens and ones digits) and
|
||||
// separates the thousands groups with commas in values over 100,000.
|
||||
"%default:\n"
|
||||
// negative-number and fraction rules. These are the same
|
||||
// as those for %simplified, but have to be stated here too
|
||||
// because this is an entry point
|
||||
" -x: minus >>;\n"
|
||||
" x.x: << point >>;\n"
|
||||
// just use %simplified for values below 100
|
||||
" =%simplified=;\n"
|
||||
// for values from 100 to 9,999 use %%and to decide whether or
|
||||
// not to interpose the "and"
|
||||
" 100: << hundred[ >%%and>];\n"
|
||||
" 1000: << thousand[ >%%and>];\n"
|
||||
// for values of 100,000 and up, use %%commas to interpose the
|
||||
// commas in the right places (and also to interpose the "and")
|
||||
" 100,000>>: << thousand[>%%commas>];\n"
|
||||
" 1,000,000: << million[>%%commas>];\n"
|
||||
" 1,000,000,000: << billion[>%%commas>];\n"
|
||||
" 1,000,000,000,000: << trillion[>%%commas>];\n"
|
||||
" 1,000,000,000,000,000: =#,##0=;\n"
|
||||
// if the value passed to this rule set is greater than 100, don't
|
||||
// add the "and"; if it's less than 100, add "and" before the last
|
||||
// digits
|
||||
"%%and:\n"
|
||||
" and =%default=;\n"
|
||||
" 100: =%default=;\n"
|
||||
// this rule set is used to place the commas
|
||||
"%%commas:\n"
|
||||
// for values below 100, add "and" (the apostrophe at the
|
||||
// beginning is ignored, but causes the space that follows it
|
||||
// to be significant: this is necessary because the rules
|
||||
// calling %%commas don't put a space before it)
|
||||
" ' and =%default=;\n"
|
||||
// put a comma after the thousands (or whatever preceded the
|
||||
// hundreds)
|
||||
" 100: , =%default=;\n"
|
||||
// put a comma after the millions (or whatever precedes the
|
||||
// thousands)
|
||||
" 1000: , <%default< thousand, >%default>;\n"
|
||||
// and so on...
|
||||
" 1,000,000: , =%default=;"
|
||||
// %%lenient-parse isn't really a set of number formatting rules;
|
||||
// it's a set of collation rules. Lenient-parse mode uses a Collator
|
||||
// object to compare fragments of the text being parsed to the text
|
||||
// in the rules, allowing more leeway in the matching text. This set
|
||||
// of rules tells the formatter to ignore commas when parsing (it
|
||||
// already ignores spaces, which is why we refer to the space; it also
|
||||
// ignores hyphens, making "twenty one" and "twenty-one" parse
|
||||
// identically)
|
||||
"%%lenient-parse:\n"
|
||||
// " & ' ' , ',' ;\n"
|
||||
" &\u0000 << ' ' << ',' << '-'; \n"
|
||||
}
|
||||
|
||||
|
||||
// * This rule set adds an English ordinal abbreviation to the end of a
|
||||
// * number. For example, 2 is formatted as "2nd". Parsing doesn't work with
|
||||
// * this rule set. To parse, use DecimalFormat on the numeral.
|
||||
OrdinalRules {
|
||||
// this rule set formats the numeral and calls %%abbrev to
|
||||
// supply the abbreviation
|
||||
"%main:\n"
|
||||
" =#,##0==%%abbrev=;\n"
|
||||
// this rule set supplies the abbreviation
|
||||
"%%abbrev:\n"
|
||||
// the abbreviations. Everything from 4 to 19 ends in "th"
|
||||
" th; st; nd; rd; th;\n"
|
||||
// at 20, we begin repeating the cycle every 10 (13 is "13th",
|
||||
// but 23 and 33 are "23rd" and "33rd") We do this by
|
||||
// ignoring all bug the ones digit in selecting the abbreviation
|
||||
" 20: >>;\n"
|
||||
// at 100, we repeat the whole cycle by considering only the
|
||||
// tens and ones digits in picking an abbreviation
|
||||
" 100: >>;\n"
|
||||
}
|
||||
|
||||
// * This rule set formats a number of seconds in sexagesimal notation
|
||||
// * (i.e., hours, minutes, and seconds). %with-words formats it with
|
||||
// * words (3,740 is "1 hour, 2 minutes, 20 seconds") and %in-numerals
|
||||
// * formats it entirely in numerals (3,740 is "1:02:20").
|
||||
DurationRules {
|
||||
// main rule set for formatting with words
|
||||
"%with-words:\n"
|
||||
// take care of singular and plural forms of "second"
|
||||
" 0 seconds; 1 second; =0= seconds;\n"
|
||||
// use %%min to format values greater than 60 seconds
|
||||
" 60/60: <%%min<[, >>];\n"
|
||||
// use %%hr to format values greater than 3,600 seconds
|
||||
// (the ">>>" below causes us to see the number of minutes
|
||||
// when when there are zero minutes)
|
||||
" 3600/60: <%%hr<[, >>>];\n"
|
||||
// this rule set takes care of the singular and plural forms
|
||||
// of "minute"
|
||||
"%%min:\n"
|
||||
" 0 minutes; 1 minute; =0= minutes;\n"
|
||||
// this rule set takes care of the singular and plural forms
|
||||
// of "hour"
|
||||
"%%hr:\n"
|
||||
" 0 hours; 1 hour; =0= hours;\n"
|
||||
|
||||
// main rule set for formatting in numerals
|
||||
"%in-numerals:\n"
|
||||
// values below 60 seconds are shown with "sec."
|
||||
" =0= sec.;\n"
|
||||
// higher values are shown with colons: %%min-sec is used for
|
||||
// values below 3,600 seconds...
|
||||
" 60: =%%min-sec=;\n"
|
||||
// ...and %%hr-min-sec is used for values of 3,600 seconds
|
||||
// and above
|
||||
" 3600: =%%hr-min-sec=;\n"
|
||||
// this rule causes values of less than 10 minutes to show without
|
||||
// a leading zero
|
||||
"%%min-sec:\n"
|
||||
" 0: :=00=;\n"
|
||||
" 60/60: <0<>>;\n"
|
||||
// this rule set is used for values of 3,600 or more. Minutes are always
|
||||
// shown, and always shown with two digits
|
||||
"%%hr-min-sec:\n"
|
||||
" 0: :=00=;\n"
|
||||
" 60/60: <00<>>;\n"
|
||||
" 3600/60: <#,##0<:>>>;\n"
|
||||
// the lenient-parse rules allow several different characters to be used
|
||||
// as delimiters between hours, minutes, and seconds
|
||||
"%%lenient-parse:\n"
|
||||
" & ':' = '.' = ' ' = '-';\n"
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -122,4 +122,44 @@ ru {
|
||||
"Cyrs" // ISO 15924 Name
|
||||
|
||||
}
|
||||
|
||||
//------------------------------------------------------------
|
||||
// Rule Based Number Format Support
|
||||
//------------------------------------------------------------
|
||||
|
||||
// * Spellout rules for Russian.
|
||||
|
||||
// Can someone supply me with information on negatives and decimals?
|
||||
// How about words for billions and trillions?
|
||||
|
||||
SpelloutRules {
|
||||
"\u043d\u043e\u043b\u044c; \u043e\u0434\u0438\u043d; \u0434\u0432\u0430; \u0442\u0440\u0438; "
|
||||
"\u0447\u0435\u0442\u044b\u0440\u0435; \u043f\u044f\u0442; \u0448\u0435\u0441\u0442; "
|
||||
"\u0441\u0435\u043c\u044c; \u0432\u043e\u0441\u0435\u043c\u044c; \u0434\u0435\u0432\u044f\u0442;\n"
|
||||
"10: \u0434\u0435\u0441\u044f\u0442; "
|
||||
"\u043e\u0434\u0438\u043d\u043d\u0430\u0434\u0446\u0430\u0442\u044c;\n"
|
||||
"\u0434\u0432\u0435\u043d\u043d\u0430\u0434\u0446\u0430\u0442\u044c; "
|
||||
"\u0442\u0440\u0438\u043d\u0430\u0434\u0446\u0430\u0442\u044c; "
|
||||
"\u0447\u0435\u0442\u044b\u0440\u043d\u0430\u0434\u0446\u0430\u0442\u044c;\n"
|
||||
"15: \u043f\u044f\u0442\u043d\u0430\u0434\u0446\u0430\u0442\u044c; "
|
||||
"\u0448\u0435\u0441\u0442\u043d\u0430\u0434\u0446\u0430\u0442\u044c; "
|
||||
"\u0441\u0435\u043c\u043d\u0430\u0434\u0446\u0430\u0442\u044c; "
|
||||
"\u0432\u043e\u0441\u0435\u043c\u043d\u0430\u0434\u0446\u0430\u0442\u044c; "
|
||||
"\u0434\u0435\u0432\u044f\u0442\u043d\u0430\u0434\u0446\u0430\u0442\u044c;\n"
|
||||
"20: \u0434\u0432\u0430\u0434\u0446\u0430\u0442\u044c[ >>];\n"
|
||||
"30: \u0442\u0440\u043b\u0434\u0446\u0430\u0442\u044c[ >>];\n"
|
||||
"40: \u0441\u043e\u0440\u043e\u043a[ >>];\n"
|
||||
"50: \u043f\u044f\u0442\u044c\u0434\u0435\u0441\u044f\u0442[ >>];\n"
|
||||
"60: \u0448\u0435\u0441\u0442\u044c\u0434\u0435\u0441\u044f\u0442[ >>];\n"
|
||||
"70: \u0441\u0435\u043c\u044c\u0434\u0435\u0441\u044f\u0442[ >>];\n"
|
||||
"80: \u0432\u043e\u0441\u0435\u043c\u044c\u0434\u0435\u0441\u044f\u0442[ >>];\n"
|
||||
"90: \u0434\u0435\u0432\u044f\u043d\u043e\u0441\u0442\u043e[ >>];\n"
|
||||
"100: \u0441\u0442\u043e[ >>];\n"
|
||||
"200: << \u0441\u0442\u043e[ >>];\n"
|
||||
"1000: \u0442\u044b\u0441\u044f\u0447\u0430[ >>];\n"
|
||||
"2000: << \u0442\u044b\u0441\u044f\u0447\u0430[ >>];\n"
|
||||
"1,000,000: \u043c\u0438\u043b\u043b\u0438\u043e\u043d[ >>];\n"
|
||||
"2,000,000: << \u043c\u0438\u043b\u043b\u0438\u043e\u043d[ >>];\n"
|
||||
"1,000,000,000: =#,##0=;"
|
||||
}
|
||||
}
|
||||
|
@ -118,4 +118,36 @@ sv {
|
||||
"Latf", // ISO 15924 Name
|
||||
"Latg", // ISO 15924 Name
|
||||
}
|
||||
|
||||
//------------------------------------------------------------
|
||||
// Rule Based Number Format Support
|
||||
//------------------------------------------------------------
|
||||
|
||||
// * Spellout rules for Swedish.
|
||||
|
||||
// can someone supply me with information on negatives and decimals?
|
||||
|
||||
SpelloutRules {
|
||||
"noll; ett; tv\u00e5; tre; fyra; fem; sex; sjo; \u00e5tta; nio;\n"
|
||||
"tio; elva; tolv; tretton; fjorton; femton; sexton; sjutton; arton; nitton;\n"
|
||||
"20: tjugo[>>];\n"
|
||||
"30: trettio[>>];\n"
|
||||
"40: fyrtio[>>];\n"
|
||||
"50: femtio[>>];\n"
|
||||
"60: sextio[>>];\n"
|
||||
"70: sjuttio[>>];\n"
|
||||
"80: \u00e5ttio[>>];\n"
|
||||
"90: nittio[>>];\n"
|
||||
"100: hundra[>>];\n"
|
||||
"200: <<hundra[>>];\n"
|
||||
"1000: tusen[ >>];\n"
|
||||
"2000: << tusen[ >>];\n"
|
||||
"1,000,000: en miljon[ >>];\n"
|
||||
"2,000,000: << miljon[ >>];\n"
|
||||
"1,000,000,000: en miljard[ >>];\n"
|
||||
"2,000,000,000: << miljard[ >>];\n"
|
||||
"1,000,000,000,000: en biljon[ >>];\n"
|
||||
"2,000,000,000,000: << biljon[ >>];\n"
|
||||
"1,000,000,000,000,000: =#,##0="
|
||||
}
|
||||
}
|
||||
|
@ -245,4 +245,43 @@ th {
|
||||
"Thai", // Script Name
|
||||
"Thai" // ISO 15924 Name
|
||||
}
|
||||
|
||||
//------------------------------------------------------------
|
||||
// Rule Based Number Format Support
|
||||
//------------------------------------------------------------
|
||||
|
||||
// Spellout rules for Thai. Data from Suwit Srivilairith, IBM Thailand
|
||||
|
||||
SpelloutRules {
|
||||
"%default:\n"
|
||||
" -x: \u0e25\u0e1a>>;\n"
|
||||
" x.x: <<\u0e08\u0e38\u0e14>>>;\n"
|
||||
" \u0e28\u0e39\u0e19\u0e22\u0e4c; \u0e2b\u0e19\u0e36\u0e48\u0e07; \u0e2a\u0e2d\u0e07; \u0e2a\u0e32\u0e21;\n"
|
||||
" \u0e2a\u0e35\u0e48; \u0e2b\u0e49\u0e32; \u0e2b\u0e01; \u0e40\u0e08\u0e47\u0e14; \u0e41\u0e1b\u0e14;\n"
|
||||
" \u0e40\u0e01\u0e49\u0e32; \u0e2a\u0e34\u0e1a; \u0e2a\u0e34\u0e1a\u0e40\u0e2d\u0e47\u0e14;\n"
|
||||
" \u0e2a\u0e34\u0e1a\u0e2a\u0e2d\u0e07; \u0e2a\u0e34\u0e1a\u0e2a\u0e32\u0e21;\n"
|
||||
" \u0e2a\u0e34\u0e1a\u0e2a\u0e35\u0e48; \u0e2a\u0e34\u0e1a\u0e2b\u0e49\u0e32;\n"
|
||||
" \u0e2a\u0e34\u0e1a\u0e2b\u0e01; \u0e2a\u0e34\u0e1a\u0e40\u0e08\u0e47\u0e14;\n"
|
||||
" \u0e2a\u0e34\u0e1a\u0e41\u0e1b\u0e14; \u0e2a\u0e34\u0e1a\u0e40\u0e01\u0e49\u0e32;\n"
|
||||
" 20: \u0e22\u0e35\u0e48\u0e2a\u0e34\u0e1a[>%%alt-ones>];\n"
|
||||
" 30: \u0e2a\u0e32\u0e21\u0e2a\u0e34\u0e1a[>%%alt-ones>];\n"
|
||||
" 40: \u0e2a\u0e35\u0e48\u0e2a\u0e34\u0e1a[>%%alt-ones>];\n"
|
||||
" 50: \u0e2b\u0e49\u0e32\u0e2a\u0e34\u0e1a[>%%alt-ones>];\n"
|
||||
" 60: \u0e2b\u0e01\u0e2a\u0e34\u0e1a[>%%alt-ones>];\n"
|
||||
" 70: \u0e40\u0e08\u0e47\u0e14\u0e2a\u0e34\u0e1a[>%%alt-ones>];\n"
|
||||
" 80: \u0e41\u0e1b\u0e14\u0e2a\u0e34\u0e1a[>%%alt-ones>];\n"
|
||||
" 90: \u0e40\u0e01\u0e49\u0e32\u0e2a\u0e34\u0e1a[>%%alt-ones>];\n"
|
||||
" 100: <<\u0e23\u0e49\u0e2d\u0e22[>>];\n"
|
||||
" 1000: <<\u0e1e\u0e31\u0e19[>>];\n"
|
||||
" 10000: <<\u0e2b\u0e21\u0e37\u0e48\u0e19[>>];\n"
|
||||
" 100000: <<\u0e41\u0e2a\u0e19[>>];\n"
|
||||
" 1,000,000: <<\u0e25\u0e49\u0e32\u0e19[>>];\n"
|
||||
" 1,000,000,000: <<\u0e1e\u0e31\u0e19\u0e25\u0e49\u0e32\u0e19[>>];\n"
|
||||
" 1,000,000,000,000: <<\u0e25\u0e49\u0e32\u0e19\u0e25\u0e49\u0e32\u0e19[>>];\n"
|
||||
" 1,000,000,000,000,000: =#,##0=;\n"
|
||||
"%%alt-ones:\n"
|
||||
" \u0e28\u0e39\u0e19\u0e22\u0e4c;\n"
|
||||
" \u0e40\u0e2d\u0e47\u0e14;\n"
|
||||
" =%default=;\n";
|
||||
}
|
||||
}
|
||||
|
@ -70,7 +70,9 @@ uniset.o unifltlg.o unirange.o translit.o utrans.o \
|
||||
cpdtrans.o hextouni.o rbt.o rbt_data.o rbt_pars.o rbt_rule.o rbt_set.o \
|
||||
dbbi.o dbbi_tbl.o rbbi.o rbbi_tbl.o nultrans.o \
|
||||
remtrans.o titletrn.o tolowtrn.o toupptrn.o xformtrn.o \
|
||||
name2uni.o uni2name.o unitohex.o nortrans.o unifilt.o quant.o transreg.o
|
||||
name2uni.o uni2name.o unitohex.o nortrans.o unifilt.o quant.o transreg.o \
|
||||
llong.o nfrs.o nfrule.o nfsubs.o rbnf.o
|
||||
|
||||
|
||||
|
||||
STATIC_OBJECTS = $(OBJECTS:.o=.$(STATIC_O))
|
||||
|
@ -166,6 +166,10 @@ SOURCE=.\hextouni.cpp
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\llong.cpp
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\msgfmt.cpp
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
@ -174,6 +178,18 @@ SOURCE=.\name2uni.cpp
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\nfrs.cpp
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\nfrule.cpp
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\nfsubs.cpp
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\nortrans.cpp
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
@ -198,6 +214,10 @@ SOURCE=.\rbbi_tbl.cpp
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\rbnf.cpp
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\rbt.cpp
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
@ -814,6 +834,10 @@ InputPath=.\unicode\hextouni.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\llong.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\unicode\msgfmt.h
|
||||
|
||||
!IF "$(CFG)" == "i18n - Win32 Release"
|
||||
@ -860,6 +884,22 @@ InputPath=.\unicode\name2uni.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\nfrlist.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\nfrs.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\nfrule.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\nfsubs.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\unicode\nortrans.h
|
||||
|
||||
!IF "$(CFG)" == "i18n - Win32 Release"
|
||||
@ -1022,6 +1062,25 @@ SOURCE=.\rbbi_tbl.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\unicode\rbnf.h
|
||||
|
||||
!IF "$(CFG)" == "i18n - Win32 Release"
|
||||
|
||||
!ELSEIF "$(CFG)" == "i18n - Win32 Debug"
|
||||
|
||||
# Begin Custom Build
|
||||
InputPath=.\unicode\rbnf.h
|
||||
|
||||
"..\..\include\unicode\rbnf.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy unicode\rbnf.h ..\..\include\unicode
|
||||
|
||||
# End Custom Build
|
||||
|
||||
!ENDIF
|
||||
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\unicode\rbt.h
|
||||
|
||||
!IF "$(CFG)" == "i18n - Win32 Release"
|
||||
|
301
icu4c/source/i18n/llong.cpp
Normal file
301
icu4c/source/i18n/llong.cpp
Normal file
@ -0,0 +1,301 @@
|
||||
#include "llong.h"
|
||||
|
||||
#if 0
|
||||
/*
|
||||
* This should work, I think, but SOLARISCC -xO3 can't handle it.
|
||||
* Works with SOLARISGCC, SOLARISCC -g, Win32...
|
||||
*
|
||||
*/
|
||||
const llong& llong::kMaxValue = llong(0x7fffffff, 0xffffffff);
|
||||
const llong& llong::kMinValue = llong(0x80000000, 0x0);
|
||||
const llong& llong::kMinusOne = llong(0xffffffff, 0xffffffff);
|
||||
const llong& llong::kZero = llong(0x0, 0x0);
|
||||
const llong& llong::kOne = llong(0x0, 0x1);
|
||||
const llong& llong::kTwo = llong(0x0, 0x2);
|
||||
const llong& llong::kMaxDouble = llong(0x200000, 0x0);
|
||||
const llong& llong::kMinDouble = -kMaxDouble;
|
||||
#endif
|
||||
|
||||
static llong kMaxValueObj(0x7fffffff, 0xffffffff);
|
||||
static llong kMinValueObj(0x80000000, 0x0);
|
||||
static llong kMinusOneObj(0xffffffff, 0xffffffff);
|
||||
static llong kZeroObj(0x0, 0x0);
|
||||
static llong kOneObj(0x0, 0x1);
|
||||
static llong kTwoObj(0x0, 0x2);
|
||||
static llong kMaxDoubleObj(0x200000, 0x0);
|
||||
static llong kMinDoubleObj(-kMaxDoubleObj);
|
||||
|
||||
const llong& llong::kMaxValue = kMaxValueObj;
|
||||
const llong& llong::kMinValue = kMinValueObj;
|
||||
const llong& llong::kMinusOne = kMinusOneObj;
|
||||
const llong& llong::kZero = kZeroObj;
|
||||
const llong& llong::kOne = kOneObj;
|
||||
const llong& llong::kTwo = kTwoObj;
|
||||
const llong& llong::kMaxDouble = kMaxDoubleObj;
|
||||
const llong& llong::kMinDouble = kMinDoubleObj;
|
||||
|
||||
#define SQRT231 46340
|
||||
|
||||
const double llong::kD32 = ((double)(0xffffffffu)) + 1;
|
||||
const double llong::kDMax = llong_asDouble(kMaxDouble);
|
||||
const double llong::kDMin = -kDMax;
|
||||
|
||||
llong& llong::operator*=(const llong& rhs)
|
||||
{
|
||||
// optimize small positive multiplications
|
||||
if (hi == 0 && rhs.hi == 0 && lo < SQRT231 && rhs.lo < SQRT231) {
|
||||
lo *= rhs.lo;
|
||||
} else {
|
||||
int retry = 0;
|
||||
|
||||
llong a(*this);
|
||||
if (a.isNegative()) {
|
||||
retry = 1;
|
||||
a.negate();
|
||||
}
|
||||
|
||||
llong b(rhs);
|
||||
if (b.isNegative()) {
|
||||
retry = 1;
|
||||
b.negate();
|
||||
}
|
||||
|
||||
llong r;
|
||||
// optimize small negative multiplications
|
||||
if (retry && a.hi == 0 && b.hi == 0 && a.lo < SQRT231 && b.lo < SQRT231) {
|
||||
r.lo = a.lo * b.lo;
|
||||
} else {
|
||||
if (a < b) {
|
||||
llong t = a;
|
||||
a = b;
|
||||
b = t;
|
||||
}
|
||||
while (b.notZero()) {
|
||||
if (b.lo & 0x1) {
|
||||
r += a;
|
||||
}
|
||||
b >>= 1;
|
||||
a <<= 1;
|
||||
}
|
||||
}
|
||||
if (isNegative() != rhs.isNegative()) {
|
||||
r.negate();
|
||||
}
|
||||
*this = r;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
llong& llong::operator/=(const llong& rhs)
|
||||
{
|
||||
if (isZero()) {
|
||||
return *this;
|
||||
}
|
||||
int32_t sign = 1;
|
||||
llong a(*this);
|
||||
if (a.isNegative()) {
|
||||
sign = -1;
|
||||
a.negate();
|
||||
}
|
||||
llong b(rhs);
|
||||
if (b.isNegative()) {
|
||||
sign = -sign;
|
||||
b.negate();
|
||||
}
|
||||
|
||||
if (b.isZero()) { // should throw div by zero error
|
||||
*this = sign < 0 ? kMinValue : kMaxValue;
|
||||
} else if (a.hi == 0 && b.hi == 0) {
|
||||
*this = (int32_t)(sign * (a.lo / b.lo));
|
||||
} else if (b > a) {
|
||||
*this = kZero;
|
||||
} else if (b == a) {
|
||||
*this = sign;
|
||||
} else {
|
||||
llong r;
|
||||
llong m((int32_t)1);
|
||||
|
||||
while (ule(b, a)) { // a positive so topmost bit is 0, this will always terminate
|
||||
m <<= 1;
|
||||
b <<= 1;
|
||||
}
|
||||
|
||||
do {
|
||||
m.ushr(1); // don't sign-extend!
|
||||
if (m.isZero()) break;
|
||||
|
||||
b.ushr(1);
|
||||
if (b <= a) {
|
||||
r |= m;
|
||||
a -= b;
|
||||
}
|
||||
} while (a >= rhs);
|
||||
|
||||
if (sign < 0) {
|
||||
r.negate();
|
||||
}
|
||||
*this = r;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
static uint8_t asciiDigits[] = {
|
||||
(char)0x30, (char)0x31, (char)0x32, (char)0x33, (char)0x34, (char)0x35, (char)0x36, (char)0x37,
|
||||
(char)0x38, (char)0x39, (char)0x61, (char)0x62, (char)0x63, (char)0x64, (char)0x65, (char)0x66,
|
||||
(char)0x67, (char)0x68, (char)0x69, (char)0x6a, (char)0x6b, (char)0x6c, (char)0x6d, (char)0x6e,
|
||||
(char)0x6f, (char)0x70, (char)0x71, (char)0x72, (char)0x73, (char)0x74, (char)0x75, (char)0x76,
|
||||
(char)0x77, (char)0x78, (char)0x79, (char)0x7a,
|
||||
};
|
||||
|
||||
static UChar kUMinus = (UChar)0x002d;
|
||||
static char kMinus = (char)0x2d;
|
||||
|
||||
static uint8_t digitInfo[] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0, 0, 0, 0, 0, 0,
|
||||
0, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
|
||||
0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0, 0, 0, 0, 0,
|
||||
0, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
|
||||
0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0, 0, 0, 0, 0,
|
||||
};
|
||||
|
||||
llong atoll(const char* str, uint32_t radix)
|
||||
{
|
||||
if (radix > 36) {
|
||||
radix = 36;
|
||||
} else if (radix < 2) {
|
||||
radix = 2;
|
||||
}
|
||||
llong lradix(radix);
|
||||
|
||||
int neg = 0;
|
||||
if (*str == kMinus) {
|
||||
++str;
|
||||
neg = 1;
|
||||
}
|
||||
llong result;
|
||||
uint8_t b;
|
||||
while ((b = digitInfo[*str++]) && ((b &= 0x7f) < radix)) {
|
||||
result *= lradix;
|
||||
result += (int32_t)b;
|
||||
}
|
||||
if (neg) {
|
||||
result.negate();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
llong u_atoll(const UChar* str, uint32_t radix)
|
||||
{
|
||||
if (radix > 36) {
|
||||
radix = 36;
|
||||
} else if (radix < 2) {
|
||||
radix = 2;
|
||||
}
|
||||
llong lradix(radix);
|
||||
|
||||
int neg = 0;
|
||||
if (*str == kUMinus) {
|
||||
++str;
|
||||
neg = 1;
|
||||
}
|
||||
llong result;
|
||||
UChar c;
|
||||
uint8_t b;
|
||||
while (((c = *str++) < 0x0080) && (b = digitInfo[c]) && ((b &= 0x7f) < radix)) {
|
||||
result *= lradix;
|
||||
result += (int32_t)b;
|
||||
}
|
||||
if (neg) {
|
||||
result.negate();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
uint32_t lltoa(const llong& val, char* buf, uint32_t len, uint32_t radix, UBool raw)
|
||||
{
|
||||
if (radix > 36) {
|
||||
radix = 36;
|
||||
} else if (radix < 2) {
|
||||
radix = 2;
|
||||
}
|
||||
llong base(radix);
|
||||
|
||||
char* p = buf;
|
||||
llong w(val);
|
||||
if (len && w.isNegative()) {
|
||||
w.negate();
|
||||
*p++ = kMinus;
|
||||
--len;
|
||||
}
|
||||
|
||||
while (len && w.notZero()) {
|
||||
llong n = w / base;
|
||||
llong m = n * base;
|
||||
int32_t d = llong_asInt(w-m);
|
||||
*p++ = raw ? (char)d : asciiDigits[d];
|
||||
w = n;
|
||||
--len;
|
||||
}
|
||||
if (len) {
|
||||
*p = 0; // null terminate if room for caller convenience
|
||||
}
|
||||
|
||||
len = p - buf;
|
||||
if (*buf == kMinus) {
|
||||
++buf;
|
||||
}
|
||||
while (--p > buf) {
|
||||
char c = *p;
|
||||
*p = *buf;
|
||||
*buf = c;
|
||||
++buf;
|
||||
}
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
uint32_t u_lltoa(const llong& val, UChar* buf, uint32_t len, uint32_t radix, UBool raw)
|
||||
{
|
||||
if (radix > 36) {
|
||||
radix = 36;
|
||||
} else if (radix < 2) {
|
||||
radix = 2;
|
||||
}
|
||||
llong base(radix);
|
||||
|
||||
UChar* p = buf;
|
||||
llong w(val);
|
||||
if (len && w.isNegative()) {
|
||||
w.negate();
|
||||
*p++ = kUMinus;
|
||||
--len;
|
||||
}
|
||||
|
||||
while (len && w.notZero()) {
|
||||
llong n = w / base;
|
||||
llong m = n * base;
|
||||
int32_t d = llong_asInt(w-m);
|
||||
*p++ = (UChar)(raw ? d : asciiDigits[d]);
|
||||
w = n;
|
||||
--len;
|
||||
}
|
||||
if (len) {
|
||||
*p = 0; // null terminate if room for caller convenience
|
||||
}
|
||||
|
||||
len = p - buf;
|
||||
if (*buf == kUMinus) {
|
||||
++buf;
|
||||
}
|
||||
while (--p > buf) {
|
||||
UChar c = *p;
|
||||
*p = *buf;
|
||||
*buf = c;
|
||||
++buf;
|
||||
}
|
||||
|
||||
return len;
|
||||
}
|
312
icu4c/source/i18n/llong.h
Normal file
312
icu4c/source/i18n/llong.h
Normal file
@ -0,0 +1,312 @@
|
||||
// thanks to Mike Cowlishaw
|
||||
|
||||
#ifndef LLONG_H
|
||||
#define LLONG_H
|
||||
|
||||
// debug
|
||||
#include <stdio.h>
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
// machine dependent value, need to move
|
||||
#define __u_IntBits 32
|
||||
|
||||
class llong {
|
||||
public:
|
||||
uint32_t lo;
|
||||
int32_t hi;
|
||||
private:
|
||||
enum {
|
||||
MASK32 = 0xffffffffu
|
||||
};
|
||||
|
||||
static const double kD32; // 2^^32 as a double
|
||||
static const double kDMin; // -(2^^54), minimum double with full integer precision
|
||||
static const double kDMax; // 2^^54, maximum double with full integer precision
|
||||
|
||||
// private constructor
|
||||
// should be private, but we can't construct the way we want using SOLARISCC
|
||||
// so make public in order that file statics can access this constructor
|
||||
public:
|
||||
llong(int32_t h, uint32_t l) : lo(l), hi(h) {}
|
||||
private:
|
||||
// convenience, size reduction in inline code
|
||||
llong& nnot() { hi = ~hi; lo = ~lo; return *this; }
|
||||
llong& negate() { hi = ~hi; lo = ~lo; if (!++lo) ++hi; return *this; }
|
||||
llong& abs() { if (hi < 0) negate(); return *this; }
|
||||
UBool notZero() const { return (hi | lo) != 0; }
|
||||
UBool isZero() const { return (hi | lo) == 0; }
|
||||
UBool isNegative() const { return hi < 0; }
|
||||
|
||||
public:
|
||||
llong() : lo(0), hi(0) {}
|
||||
llong(const int32_t l) : lo((unsigned)l), hi(l < 0 ? -1 : 0) {}
|
||||
llong(const int16_t l) : lo((unsigned)l), hi(l < 0 ? -1 : 0) {}
|
||||
llong(const int8_t l) : lo((unsigned)l), hi(l < 0 ? -1 : 0) {}
|
||||
#if __u_IntBits == 64
|
||||
llong(const int i) : lo(i & MASK32), hi(i >> 32) {}
|
||||
#endif
|
||||
llong(uint16_t s) : lo(s), hi(0) {}
|
||||
llong(uint32_t l) : lo(l), hi(0) {}
|
||||
#if __u_IntBits == 64
|
||||
llong(unsigned int i) : lo(i & MASK32), hi(i >> 32) {}
|
||||
#endif
|
||||
llong(double d) { // avoid dependency on bit representation of double
|
||||
if (uprv_isNaN(d)) {
|
||||
*this = llong::kZero;
|
||||
} else if (d < kDMin) {
|
||||
*this = llong::kMinDouble;
|
||||
} else if (d > kDMax) {
|
||||
*this = llong::kMaxDouble;
|
||||
} else {
|
||||
int neg = d < 0;
|
||||
if (neg) d = -d;
|
||||
d = uprv_floor(d);
|
||||
hi = (int32_t)uprv_floor(d / kD32);
|
||||
d -= kD32 * hi;
|
||||
lo = (uint32_t)d;
|
||||
if (neg) negate();
|
||||
}
|
||||
}
|
||||
|
||||
llong(const llong& rhs) : lo(rhs.lo), hi(rhs.hi) {}
|
||||
|
||||
// the following cause ambiguities in binary expressions,
|
||||
// even if we overload all methods on all args!
|
||||
// so you have to use global functions
|
||||
// operator const int32_t() const;
|
||||
// operator const uint32_t() const;
|
||||
// operator const double() const;
|
||||
|
||||
friend int32_t llong_asInt(const llong& lhs);
|
||||
friend uint32_t llong_asUInt(const llong& lhs);
|
||||
friend double llong_asDouble(const llong& lhs);
|
||||
|
||||
llong& operator=(const llong& rhs) { lo = rhs.lo; hi = rhs.hi; return *this; }
|
||||
|
||||
// left shift
|
||||
llong& operator<<=(int32_t shift) {
|
||||
shift &= 63; // like java spec
|
||||
if (shift < 32) {
|
||||
hi = (signed)(hi << shift | lo >> (32 - shift)); // no sign extension on lo since unsigned
|
||||
lo <<= shift;
|
||||
} else {
|
||||
hi = (signed)(lo << (shift - 32));
|
||||
lo = 0;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
llong operator<<(int32_t shift) const { llong r(*this); r <<= shift; return r; }
|
||||
|
||||
// right shift with sign extension
|
||||
llong& operator>>=(int32_t shift) {
|
||||
shift &= 63; // like java spec
|
||||
if (shift < 32) {
|
||||
lo >>= shift;
|
||||
lo |= (hi << (32 - shift));
|
||||
hi = hi >> shift; // note sign extension
|
||||
} else {
|
||||
lo = (unsigned)(hi >> (shift - 32)); // note sign extension
|
||||
hi = hi < 0 ? -1 : 0;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
llong operator>>(int32_t shift) const { llong r(*this); r >>= shift; return r; }
|
||||
|
||||
// unsigned right shift
|
||||
friend llong ushr(const llong& lhs, int32_t shift);
|
||||
|
||||
// bit operations
|
||||
friend llong operator&(const llong& lhs, const llong& rhs);
|
||||
friend llong operator|(const llong& lhs, const llong& rhs);
|
||||
friend llong operator^(const llong& lhs, const llong& rhs);
|
||||
|
||||
friend llong operator&(const llong& lhs, const uint32_t rhs);
|
||||
friend llong operator|(const llong& lhs, const uint32_t rhs);
|
||||
friend llong operator^(const llong& lhs, const uint32_t rhs);
|
||||
|
||||
llong operator~() const { return llong(~hi, ~lo); }
|
||||
// is this useful?
|
||||
// UBool operator!() const { return !(hi | lo); }
|
||||
|
||||
llong& operator&=(const llong& rhs) { hi &= rhs.hi; lo &= rhs.lo; return *this; }
|
||||
llong& operator|=(const llong& rhs) { hi |= rhs.hi; lo |= rhs.lo; return *this; }
|
||||
llong& operator^=(const llong& rhs) { hi ^= rhs.hi; lo ^= rhs.lo; return *this; }
|
||||
|
||||
llong& operator&=(const uint32_t rhs) { hi = 0; lo &= rhs; return *this; }
|
||||
llong& operator|=(const uint32_t rhs) { lo |= rhs; return *this; }
|
||||
llong& operator^=(const uint32_t rhs) { lo ^= rhs; return *this; }
|
||||
|
||||
// no logical ops since we can't enforce order of evaluation, not much use anyway?
|
||||
|
||||
// comparison
|
||||
friend UBool operator==(const llong& lhs, const llong& rhs);
|
||||
friend UBool operator!=(const llong& lhs, const llong& rhs);
|
||||
friend UBool operator> (const llong& lhs, const llong& rhs);
|
||||
friend UBool operator< (const llong& lhs, const llong& rhs);
|
||||
friend UBool operator>=(const llong& lhs, const llong& rhs);
|
||||
friend UBool operator<=(const llong& lhs, const llong& rhs);
|
||||
|
||||
// overload comparison to native int to avoid conversion to llong for common comparisons
|
||||
friend UBool operator==(const llong& lhs, const int32_t rhs);
|
||||
friend UBool operator!=(const llong& lhs, const int32_t rhs);
|
||||
friend UBool operator> (const llong& lhs, const int32_t rhs);
|
||||
friend UBool operator< (const llong& lhs, const int32_t rhs);
|
||||
friend UBool operator>=(const llong& lhs, const int32_t rhs);
|
||||
friend UBool operator<=(const llong& lhs, const int32_t rhs);
|
||||
|
||||
// unsigned comparison
|
||||
friend UBool ugt(const llong& lhs, const llong& rhs);
|
||||
friend UBool ult(const llong& lhs, const llong& rhs);
|
||||
friend UBool uge(const llong& lhs, const llong& rhs);
|
||||
friend UBool ule(const llong& lhs, const llong& rhs);
|
||||
|
||||
// prefix inc/dec
|
||||
llong& operator++() { if (!++lo) ++hi; return *this; }
|
||||
llong& operator--() { if (!lo--) --hi; return *this; }
|
||||
|
||||
// postfix inc/dec
|
||||
llong operator++(int) { llong r(*this); if (!++lo) ++hi; return r; }
|
||||
llong operator--(int) { llong r(*this); if (!lo--) --hi; return r; }
|
||||
|
||||
// unary minus
|
||||
llong operator-() const { uint32_t l = ~lo + 1; return llong(l ? ~hi : ~hi + 1, l); }
|
||||
|
||||
// addition and subtraction
|
||||
llong& operator-=(const llong& rhs) { hi -= rhs.hi; if (lo < rhs.lo) --hi; lo -= rhs.lo; return *this; }
|
||||
friend llong operator-(const llong& lhs, const llong& rhs);
|
||||
|
||||
llong& operator+=(const llong& rhs) { return *this -= -rhs; }
|
||||
friend llong operator+(const llong& lhs, const llong& rhs);
|
||||
|
||||
// pluttification and fizzen'
|
||||
llong& operator*=(const llong& rhs);
|
||||
friend llong operator*(const llong& lhs, const llong& rhs);
|
||||
|
||||
llong& operator/=(const llong& rhs);
|
||||
friend llong operator/(const llong& lhs, const llong& rhs);
|
||||
|
||||
llong& operator%=(const llong& rhs) { return operator-=((*this / rhs) * rhs); }
|
||||
friend llong operator%(const llong& lhs, const llong& rhs);
|
||||
|
||||
// power function, positive integral powers only
|
||||
friend llong llong_pow(const llong& lhs, uint32_t n);
|
||||
|
||||
// absolute value
|
||||
friend llong llong_abs(const llong& lhs);
|
||||
|
||||
// simple construction from ASCII and Unicode strings
|
||||
friend llong atoll(const char* str, uint32_t radix = 10);
|
||||
friend llong u_atoll(const UChar* str, uint32_t radix = 10);
|
||||
|
||||
// output as ASCII or Unicode strings or as raw values, preceeding '-' if signed
|
||||
friend uint32_t lltoa(const llong& lhs, char* buffer, uint32_t buflen, uint32_t radix = 10, UBool raw = FALSE);
|
||||
friend uint32_t u_lltoa(const llong& lhs, UChar* buffer, uint32_t buflen, uint32_t radix = 10, UBool raw = FALSE);
|
||||
|
||||
// useful public constants - perhaps should not have class statics
|
||||
static const llong& kMaxValue;
|
||||
static const llong& kMinValue;
|
||||
static const llong& kMinusOne;
|
||||
static const llong& kZero;
|
||||
static const llong& kOne;
|
||||
static const llong& kTwo;
|
||||
|
||||
private:
|
||||
static const llong& kMaxDouble;
|
||||
static const llong& kMinDouble;
|
||||
|
||||
// right shift without sign extension
|
||||
llong& ushr(int32_t shift) {
|
||||
shift &= 0x63;
|
||||
if (shift < 32) {
|
||||
lo >>= shift;
|
||||
lo |= (hi << (32 - shift));
|
||||
hi = (signed)(((unsigned)hi) >> shift);
|
||||
} else {
|
||||
lo = (unsigned)(((unsigned)hi) >> (shift - 32));
|
||||
hi = 0;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
// back door for test
|
||||
friend void llong_test();
|
||||
};
|
||||
|
||||
inline llong operator& (const llong& lhs, const llong& rhs) { return llong(lhs.hi & rhs.hi, lhs.lo & rhs.lo); }
|
||||
inline llong operator| (const llong& lhs, const llong& rhs) { return llong(lhs.hi | rhs.hi, lhs.lo | rhs.lo); }
|
||||
inline llong operator^ (const llong& lhs, const llong& rhs) { return llong(lhs.hi ^ rhs.hi, lhs.lo ^ rhs.lo); }
|
||||
|
||||
inline llong operator& (const llong& lhs, const uint32_t rhs) { return llong(0, lhs.lo & rhs); }
|
||||
inline llong operator| (const llong& lhs, const uint32_t rhs) { return llong(lhs.hi, lhs.lo | rhs); }
|
||||
inline llong operator^ (const llong& lhs, const uint32_t rhs) { return llong(lhs.hi, lhs.lo ^ rhs); }
|
||||
|
||||
inline UBool operator==(const llong& lhs, const llong& rhs) { return lhs.lo == rhs.lo && lhs.hi == rhs.hi; }
|
||||
inline UBool operator!=(const llong& lhs, const llong& rhs) { return lhs.lo != rhs.lo || lhs.hi != rhs.hi; }
|
||||
inline UBool operator> (const llong& lhs, const llong& rhs) { return lhs.hi == rhs.hi ? lhs.lo > rhs.lo : lhs.hi > rhs.hi; }
|
||||
inline UBool operator< (const llong& lhs, const llong& rhs) { return lhs.hi == rhs.hi ? lhs.lo < rhs.lo : lhs.hi < rhs.hi; }
|
||||
inline UBool operator>=(const llong& lhs, const llong& rhs) { return lhs.hi == rhs.hi ? lhs.lo >= rhs.lo : lhs.hi >= rhs.hi; }
|
||||
inline UBool operator<=(const llong& lhs, const llong& rhs) { return lhs.hi == rhs.hi ? lhs.lo <= rhs.lo : lhs.hi <= rhs.hi; }
|
||||
|
||||
inline UBool operator==(const llong& lhs, const int32_t rhs) { return lhs.lo == (unsigned)rhs && lhs.hi == (rhs < 0 ? -1 : 0); }
|
||||
inline UBool operator!=(const llong& lhs, const int32_t rhs) { return lhs.lo != (unsigned)rhs || lhs.hi != (rhs < 0 ? -1 : 0); }
|
||||
inline UBool operator> (const llong& lhs, const int32_t rhs) { return rhs < 0 ? (lhs.hi == -1 ? lhs.lo > (unsigned)rhs : lhs.hi > -1)
|
||||
: (lhs.hi == 0 ? lhs.lo > (unsigned)rhs : lhs.hi > 0); }
|
||||
inline UBool operator< (const llong& lhs, const int32_t rhs) { return rhs < 0 ? (lhs.hi == -1 ? lhs.lo < (unsigned)rhs : lhs.hi < -1)
|
||||
: (lhs.hi == 0 ? lhs.lo < (unsigned)rhs : lhs.hi < 0); }
|
||||
inline UBool operator>=(const llong& lhs, const int32_t rhs) { return rhs < 0 ? (lhs.hi == -1 ? lhs.lo >= (unsigned)rhs : lhs.hi > -1)
|
||||
: (lhs.hi == 0 ? lhs.lo >= (unsigned)rhs : lhs.hi > 0); }
|
||||
inline UBool operator<=(const llong& lhs, const int32_t rhs) { return rhs < 0 ? (lhs.hi == -1 ? lhs.lo <= (unsigned)rhs : lhs.hi < -1)
|
||||
: (lhs.hi == 0 ? lhs.lo <= (unsigned)rhs : lhs.hi < 0); }
|
||||
|
||||
inline UBool ugt(const llong& lhs, const llong& rhs) { return lhs.hi == rhs.hi ? lhs.lo > rhs.lo : (unsigned)lhs.hi > (unsigned)rhs.hi; }
|
||||
inline UBool ult(const llong& lhs, const llong& rhs) { return lhs.hi == rhs.hi ? lhs.lo < rhs.lo : (unsigned)lhs.hi < (unsigned)rhs.hi; }
|
||||
inline UBool uge(const llong& lhs, const llong& rhs) { return lhs.hi == rhs.hi ? lhs.lo >= rhs.lo : (unsigned)lhs.hi >= (unsigned)rhs.hi; }
|
||||
inline UBool ule(const llong& lhs, const llong& rhs) { return lhs.hi == rhs.hi ? lhs.lo <= rhs.lo : (unsigned)lhs.hi <= (unsigned)rhs.hi; }
|
||||
|
||||
inline llong ushr(const llong& lhs, int32_t shift) { llong r(lhs); r.ushr(shift); return r; }
|
||||
|
||||
inline llong operator-(const llong& lhs, const llong& rhs) { return llong(lhs.lo < rhs.lo ? lhs.hi - rhs.hi - 1 : lhs.hi - rhs.hi, lhs.lo - rhs.lo); }
|
||||
inline llong operator+(const llong& lhs, const llong& rhs) { return lhs - -rhs; }
|
||||
|
||||
inline llong operator*(const llong& lhs, const llong& rhs) { llong r(lhs); r *= rhs; return r; }
|
||||
inline llong operator/(const llong& lhs, const llong& rhs) { llong r(lhs); r /= rhs; return r; }
|
||||
inline llong operator%(const llong& lhs, const llong& rhs) { llong r(lhs); r %= rhs; return r; }
|
||||
|
||||
inline int32_t llong_asInt(const llong& lhs) { return (int32_t)(lhs.lo | (lhs.hi < 0 ? 0x80000000 : 0)); }
|
||||
inline uint32_t llong_asUInt(const llong& lhs) { return lhs.lo; }
|
||||
inline double llong_asDouble(const llong& lhs) { return llong::kD32 * lhs.hi + lhs.lo; }
|
||||
|
||||
inline llong llong_pow(const llong& lhs, uint32_t n) {
|
||||
if (lhs.isZero()) {
|
||||
return llong::kZero;
|
||||
} else if (n == 0) {
|
||||
return llong::kOne;
|
||||
} else {
|
||||
llong r(lhs);
|
||||
while (--n > 0) {
|
||||
r *= lhs;
|
||||
}
|
||||
return r;
|
||||
}
|
||||
}
|
||||
|
||||
inline llong llong_abs(const llong& lhs) { return lhs.isNegative() ? -lhs : lhs; }
|
||||
|
||||
// Originally, I thought that overloading on int32 was too complex or to large to get inlined, and
|
||||
// since I mainly wanted to optimize comparisons to zero, I overloaded on uint32_t instead
|
||||
// since it has a simpler implementation.
|
||||
// But this means that llong(-1) != -1 (since the comparison treats the rhs as unsigned, but
|
||||
// the constructor does not). So I am using the signed versions after all.
|
||||
|
||||
#if 0
|
||||
inline UBool operator==(const llong& lhs, const uint32_t rhs) { return lhs.lo == rhs && lhs.hi == 0; }
|
||||
inline UBool operator!=(const llong& lhs, const uint32_t rhs) { return lhs.lo != rhs || lhs.hi != 0; }
|
||||
inline UBool operator> (const llong& lhs, const uint32_t rhs) { return lhs.hi == 0 ? lhs.lo > rhs : lhs.hi > 0; }
|
||||
inline UBool operator< (const llong& lhs, const uint32_t rhs) { return lhs.hi == 0 ? lhs.lo < rhs : lhs.hi < 0; }
|
||||
inline UBool operator>=(const llong& lhs, const uint32_t rhs) { return lhs.hi == 0 ? lhs.lo >= rhs : lhs.hi >= 0; }
|
||||
inline UBool operator<=(const llong& lhs, const uint32_t rhs) { return lhs.hi == 0 ? lhs.lo <= rhs : lhs.hi <= 0; }
|
||||
#endif
|
||||
|
||||
// LLONG_H
|
||||
#endif
|
68
icu4c/source/i18n/nfrlist.h
Normal file
68
icu4c/source/i18n/nfrlist.h
Normal file
@ -0,0 +1,68 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1997-2001, International Business Machines Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
*/
|
||||
#ifndef NFRLIST_H
|
||||
#define NFRLIST_H
|
||||
|
||||
#include "cmemory.h"
|
||||
#include "unicode/umachine.h"
|
||||
|
||||
#include "nfrule.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
// unsafe class for internal use only. assume memory allocations succeed, indexes are valid.
|
||||
// should be a template, but we can't use them
|
||||
|
||||
class NFRuleList {
|
||||
protected:
|
||||
NFRule** fStuff;
|
||||
uint32_t fCount;
|
||||
uint32_t fCapacity;
|
||||
public:
|
||||
NFRuleList(int capacity = 10)
|
||||
: fStuff(capacity ? (NFRule**)uprv_malloc(capacity * sizeof(NFRule*)) : NULL)
|
||||
, fCount(0)
|
||||
, fCapacity(capacity) {};
|
||||
~NFRuleList() {
|
||||
if (fStuff) {
|
||||
for(uint32_t i = 0; i < fCount; ++i) {
|
||||
delete fStuff[i];
|
||||
}
|
||||
uprv_free(fStuff);
|
||||
}
|
||||
}
|
||||
NFRule* operator[](uint32_t index) const { return fStuff[index]; }
|
||||
NFRule* remove(uint32_t index) {
|
||||
NFRule* result = fStuff[index];
|
||||
fCount -= 1;
|
||||
for (uint32_t i = index; i < fCount; ++i) { // assumes small arrays
|
||||
fStuff[i] = fStuff[i+1];
|
||||
}
|
||||
return result;
|
||||
}
|
||||
void add(NFRule* thing) {
|
||||
if (fCount == fCapacity) {
|
||||
fCapacity += 10;
|
||||
fStuff = (NFRule**)uprv_realloc(fStuff, fCapacity * sizeof(NFRule*)); // assume success
|
||||
}
|
||||
fStuff[fCount++] = thing;
|
||||
}
|
||||
uint32_t size() const { return fCount; }
|
||||
NFRule* last() const { return fCount > 0 ? fStuff[fCount-1] : NULL; }
|
||||
NFRule** release() {
|
||||
add(NULL); // ensure null termination
|
||||
NFRule** result = fStuff;
|
||||
fStuff = NULL;
|
||||
fCount = 0;
|
||||
fCapacity = 0;
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
// NFRLIST_H
|
||||
#endif
|
659
icu4c/source/i18n/nfrs.cpp
Normal file
659
icu4c/source/i18n/nfrs.cpp
Normal file
@ -0,0 +1,659 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1997-2001, International Business Machines Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#include "nfrs.h"
|
||||
#include "nfrule.h"
|
||||
#include "nfrlist.h"
|
||||
#include "cmemory.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
#if 0
|
||||
// euclid's algorithm works with doubles
|
||||
// note, doubles only get us up to one quadrillion or so, which
|
||||
// isn't as much range as we get with longs. We probably still
|
||||
// want either 64-bit math, or BigInteger.
|
||||
|
||||
static llong
|
||||
util_lcm(llong x, llong y)
|
||||
{
|
||||
x.abs();
|
||||
y.abs();
|
||||
|
||||
if (x == 0 || y == 0) {
|
||||
return 0;
|
||||
} else {
|
||||
do {
|
||||
if (x < y) {
|
||||
llong t = x; x = y; y = t;
|
||||
}
|
||||
x -= y * (x/y);
|
||||
} while (x != 0);
|
||||
|
||||
return y;
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
/**
|
||||
* Calculates the least common multiple of x and y.
|
||||
*/
|
||||
static llong
|
||||
util_lcm(llong x, llong y)
|
||||
{
|
||||
// binary gcd algorithm from Knuth, "The Art of Computer Programming,"
|
||||
// vol. 2, 1st ed., pp. 298-299
|
||||
llong x1 = x;
|
||||
llong y1 = y;
|
||||
|
||||
int p2 = 0;
|
||||
while ((x1 & 1) == 0 && (y1 & 1) == 0) {
|
||||
++p2;
|
||||
x1 >>= 1;
|
||||
y1 >>= 1;
|
||||
}
|
||||
|
||||
llong t;
|
||||
if ((x1 & 1) == 1) {
|
||||
t = -y1;
|
||||
} else {
|
||||
t = x1;
|
||||
}
|
||||
|
||||
while (t != 0) {
|
||||
while ((t & 1) == 0) {
|
||||
t >>= 1;
|
||||
}
|
||||
if (t > 0) {
|
||||
x1 = t;
|
||||
} else {
|
||||
y1 = -t;
|
||||
}
|
||||
t = x1 - y1;
|
||||
}
|
||||
|
||||
llong gcd = x1 << p2;
|
||||
|
||||
// x * y == gcd(x, y) * lcm(x, y)
|
||||
return x / gcd * y;
|
||||
}
|
||||
#endif
|
||||
|
||||
static const UChar gPercent = 0x0025;
|
||||
static const UChar gColon = 0x003a;
|
||||
static const UChar gSemicolon = 0x003b;
|
||||
static const UChar gLineFeed = 0x0010;
|
||||
|
||||
static const UnicodeString gFourSpaces(" ");
|
||||
static const UnicodeString gPercentPercent("%%");
|
||||
|
||||
NFRuleSet::NFRuleSet(UnicodeString* descriptions, int32_t index, UErrorCode& status)
|
||||
: name()
|
||||
, rules(0)
|
||||
, negativeNumberRule(NULL)
|
||||
, fIsFractionRuleSet(FALSE)
|
||||
, fIsPublic(FALSE)
|
||||
{
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
fractionRules[i] = NULL;
|
||||
}
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
|
||||
UnicodeString& description = descriptions[index]; // !!! make sure index is valid
|
||||
|
||||
// if the description begins with a rule set name (the rule set
|
||||
// name can be omitted in formatter descriptions that consist
|
||||
// of only one rule set), copy it out into our "name" member
|
||||
// and delete it from the description
|
||||
if (description.charAt(0) == gPercent) {
|
||||
UTextOffset pos = description.indexOf(gColon);
|
||||
if (pos == -1) {
|
||||
// throw new IllegalArgumentException("Rule set name doesn't end in colon");
|
||||
status = U_PARSE_ERROR;
|
||||
} else {
|
||||
name.setTo(description, 0, pos);
|
||||
while (pos < description.length() && u_isWhitespace(description.charAt(++pos))) {
|
||||
}
|
||||
description.remove(0, pos);
|
||||
}
|
||||
} else {
|
||||
name.setTo("%default");
|
||||
}
|
||||
|
||||
if (description.length() == 0) {
|
||||
// throw new IllegalArgumentException("Empty rule set description");
|
||||
status = U_PARSE_ERROR;
|
||||
}
|
||||
|
||||
fIsPublic = name.indexOf(gPercentPercent) != 0;
|
||||
|
||||
// all of the other members of NFRuleSet are initialized
|
||||
// by parseRules()
|
||||
}
|
||||
|
||||
void
|
||||
NFRuleSet::parseRules(UnicodeString& description, const RuleBasedNumberFormat* owner, UErrorCode& status)
|
||||
{
|
||||
// start by creating a Vector whose elements are Strings containing
|
||||
// the descriptions of the rules (one rule per element). The rules
|
||||
// are separated by semicolons (there's no escape facility: ALL
|
||||
// semicolons are rule delimiters)
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// dlf - the original code kept a separate description array for no reason,
|
||||
// so I got rid of it. The loop was too complex so I simplified it.
|
||||
|
||||
UnicodeString currentDescription;
|
||||
UTextOffset oldP = 0;
|
||||
while (oldP < description.length()) {
|
||||
UTextOffset p = description.indexOf(gSemicolon, oldP);
|
||||
if (p == -1) {
|
||||
p = description.length();
|
||||
}
|
||||
currentDescription.setTo(description, oldP, p - oldP);
|
||||
NFRule::makeRules(currentDescription, this, rules.last(), owner, rules, status);
|
||||
oldP = p + 1;
|
||||
}
|
||||
|
||||
// for rules that didn't specify a base value, their base values
|
||||
// were initialized to 0. Make another pass through the list and
|
||||
// set all those rules' base values. We also remove any special
|
||||
// rules from the list and put them into their own member variables
|
||||
llong defaultBaseValue = (int32_t)0;
|
||||
|
||||
// (this isn't a for loop because we might be deleting items from
|
||||
// the vector-- we want to make sure we only increment i when
|
||||
// we _didn't_ delete aything from the vector)
|
||||
uint32_t i = 0;
|
||||
while (i < rules.size()) {
|
||||
NFRule* rule = rules[i];
|
||||
|
||||
switch (rule->getType()) {
|
||||
// if the rule's base value is 0, fill in a default
|
||||
// base value (this will be 1 plus the preceding
|
||||
// rule's base value for regular rule sets, and the
|
||||
// same as the preceding rule's base value in fraction
|
||||
// rule sets)
|
||||
case NFRule::kNoBase:
|
||||
rule->setBaseValue(defaultBaseValue);
|
||||
if (!isFractionRuleSet()) {
|
||||
++defaultBaseValue;
|
||||
}
|
||||
++i;
|
||||
break;
|
||||
|
||||
// if it's the negative-number rule, copy it into its own
|
||||
// data member and delete it from the list
|
||||
case NFRule::kNegativeNumberRule:
|
||||
negativeNumberRule = rules.remove(i);
|
||||
break;
|
||||
|
||||
// if it's the improper fraction rule, copy it into the
|
||||
// correct element of fractionRules
|
||||
case NFRule::kImproperFractionRule:
|
||||
fractionRules[0] = rules.remove(i);
|
||||
break;
|
||||
|
||||
// if it's the proper fraction rule, copy it into the
|
||||
// correct element of fractionRules
|
||||
case NFRule::kProperFractionRule:
|
||||
fractionRules[1] = rules.remove(i);
|
||||
break;
|
||||
|
||||
// if it's the master rule, copy it into the
|
||||
// correct element of fractionRules
|
||||
case NFRule::kMasterRule:
|
||||
fractionRules[2] = rules.remove(i);
|
||||
break;
|
||||
|
||||
// if it's a regular rule that already knows its base value,
|
||||
// check to make sure the rules are in order, and update
|
||||
// the default base value for the next rule
|
||||
default:
|
||||
if (rule->getBaseValue() < defaultBaseValue) {
|
||||
// throw new IllegalArgumentException("Rules are not in order");
|
||||
status = U_PARSE_ERROR;
|
||||
return;
|
||||
}
|
||||
defaultBaseValue = rule->getBaseValue();
|
||||
if (!isFractionRuleSet()) {
|
||||
++defaultBaseValue;
|
||||
}
|
||||
++i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
NFRuleSet::~NFRuleSet()
|
||||
{
|
||||
delete negativeNumberRule;
|
||||
delete fractionRules[0];
|
||||
delete fractionRules[1];
|
||||
delete fractionRules[2];
|
||||
}
|
||||
|
||||
UBool
|
||||
util_equalRules(const NFRule* rule1, const NFRule* rule2)
|
||||
{
|
||||
if (rule1) {
|
||||
if (rule2) {
|
||||
return *rule1 == *rule2;
|
||||
}
|
||||
} else if (!rule2) {
|
||||
return TRUE;
|
||||
}
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
UBool
|
||||
NFRuleSet::operator==(const NFRuleSet& rhs) const
|
||||
{
|
||||
if (rules.size() == rhs.rules.size() &&
|
||||
fIsFractionRuleSet == rhs.fIsFractionRuleSet &&
|
||||
name == rhs.name &&
|
||||
util_equalRules(negativeNumberRule, rhs.negativeNumberRule) &&
|
||||
util_equalRules(fractionRules[0], rhs.fractionRules[0]) &&
|
||||
util_equalRules(fractionRules[1], rhs.fractionRules[1]) &&
|
||||
util_equalRules(fractionRules[2], rhs.fractionRules[2])) {
|
||||
|
||||
for (uint32_t i = 0; i < rules.size(); ++i) {
|
||||
if (*rules[i] != *rhs.rules[i]) {
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
void
|
||||
NFRuleSet::format(llong number, UnicodeString& toAppendTo, int32_t pos) const
|
||||
{
|
||||
NFRule *rule = findNormalRule(number);
|
||||
rule->doFormat(number, toAppendTo, pos);
|
||||
}
|
||||
|
||||
void
|
||||
NFRuleSet::format(double number, UnicodeString& toAppendTo, int32_t pos) const
|
||||
{
|
||||
NFRule *rule = findDoubleRule(number);
|
||||
rule->doFormat(number, toAppendTo, pos);
|
||||
}
|
||||
|
||||
NFRule*
|
||||
NFRuleSet::findDoubleRule(double number) const
|
||||
{
|
||||
// if this is a fraction rule set, use findFractionRuleSetRule()
|
||||
if (isFractionRuleSet()) {
|
||||
return findFractionRuleSetRule(number);
|
||||
}
|
||||
|
||||
// if the number is negative, return the negative number rule
|
||||
// (if there isn't a negative-number rule, we pretend it's a
|
||||
// positive number)
|
||||
if (number < 0) {
|
||||
if (negativeNumberRule) {
|
||||
return negativeNumberRule;
|
||||
} else {
|
||||
number = -number;
|
||||
}
|
||||
}
|
||||
|
||||
// if the number isn't an integer, we use one of the fraction rules...
|
||||
if (number != uprv_floor(number)) {
|
||||
// if the number is between 0 and 1, return the proper
|
||||
// fraction rule
|
||||
if (number < 1 && fractionRules[1]) {
|
||||
return fractionRules[1];
|
||||
}
|
||||
// otherwise, return the improper fraction rule
|
||||
else if (fractionRules[0]) {
|
||||
return fractionRules[0];
|
||||
}
|
||||
}
|
||||
|
||||
// if there's a master rule, use it to format the number
|
||||
if (fractionRules[2]) {
|
||||
return fractionRules[2];
|
||||
}
|
||||
|
||||
// and if we haven't yet returned a rule, use findNormalRule()
|
||||
// to find the applicable rule
|
||||
llong r = number + 0.5;
|
||||
return findNormalRule(r);
|
||||
}
|
||||
|
||||
NFRule *
|
||||
NFRuleSet::findNormalRule(llong number) const
|
||||
{
|
||||
// if this is a fraction rule set, use findFractionRuleSetRule()
|
||||
// to find the rule (we should only go into this clause if the
|
||||
// value is 0)
|
||||
if (fIsFractionRuleSet) {
|
||||
return findFractionRuleSetRule(llong_asDouble(number));
|
||||
}
|
||||
|
||||
// if the number is negative, return the negative-number rule
|
||||
// (if there isn't one, pretend the number is positive)
|
||||
if (number < 0) {
|
||||
if (negativeNumberRule) {
|
||||
return negativeNumberRule;
|
||||
} else {
|
||||
number = -number;
|
||||
}
|
||||
}
|
||||
|
||||
// we have to repeat the preceding two checks, even though we
|
||||
// do them in findRule(), because the version of format() that
|
||||
// takes a long bypasses findRule() and goes straight to this
|
||||
// function. This function does skip the fraction rules since
|
||||
// we know the value is an integer (it also skips the master
|
||||
// rule, since it's considered a fraction rule. Skipping the
|
||||
// master rule in this function is also how we avoid infinite
|
||||
// recursion)
|
||||
|
||||
// binary-search the rule list for the applicable rule
|
||||
// (a rule is used for all values from its base value to
|
||||
// the next rule's base value)
|
||||
int32_t lo = 0;
|
||||
int32_t hi = rules.size();
|
||||
while (lo < hi) {
|
||||
int32_t mid = (lo + hi) / 2;
|
||||
if (rules[mid]->getBaseValue() == number) {
|
||||
return rules[mid];
|
||||
}
|
||||
else if (rules[mid]->getBaseValue() > number) {
|
||||
hi = mid;
|
||||
}
|
||||
else {
|
||||
lo = mid + 1;
|
||||
}
|
||||
}
|
||||
NFRule *result = rules[hi - 1];
|
||||
|
||||
// use shouldRollBack() to see whether we need to invoke the
|
||||
// rollback rule (see shouldRollBack()'s documentation for
|
||||
// an explanation of the rollback rule). If we do, roll back
|
||||
// one rule and return that one instead of the one we'd normally
|
||||
// return
|
||||
if (result->shouldRollBack(llong_asDouble(number))) {
|
||||
result = rules[hi - 2];
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* If this rule is a fraction rule set, this function is used by
|
||||
* findRule() to select the most appropriate rule for formatting
|
||||
* the number. Basically, the base value of each rule in the rule
|
||||
* set is treated as the denominator of a fraction. Whichever
|
||||
* denominator can produce the fraction closest in value to the
|
||||
* number passed in is the result. If there's a tie, the earlier
|
||||
* one in the list wins. (If there are two rules in a row with the
|
||||
* same base value, the first one is used when the numerator of the
|
||||
* fraction would be 1, and the second rule is used the rest of the
|
||||
* time.
|
||||
* @param number The number being formatted (which will always be
|
||||
* a number between 0 and 1)
|
||||
* @return The rule to use to format this number
|
||||
*/
|
||||
NFRule*
|
||||
NFRuleSet::findFractionRuleSetRule(double number) const
|
||||
{
|
||||
// the obvious way to do this (multiply the value being formatted
|
||||
// by each rule's base value until you get an integral result)
|
||||
// doesn't work because of rounding error. This method is more
|
||||
// accurate
|
||||
|
||||
// find the least common multiple of the rules' base values
|
||||
// and multiply this by the number being formatted. This is
|
||||
// all the precision we need, and we can do all of the rest
|
||||
// of the math using integer arithmetic
|
||||
llong leastCommonMultiple = rules[0]->getBaseValue();
|
||||
llong numerator;
|
||||
{
|
||||
for (uint32_t i = 1; i < rules.size(); ++i) {
|
||||
leastCommonMultiple = util_lcm(leastCommonMultiple, rules[i]->getBaseValue());
|
||||
}
|
||||
numerator = number * llong_asDouble(leastCommonMultiple) + 0.5;
|
||||
}
|
||||
// for each rule, do the following...
|
||||
llong tempDifference;
|
||||
llong difference = llong::kMaxValue;
|
||||
int32_t winner = 0;
|
||||
for (uint32_t i = 0; i < rules.size(); ++i) {
|
||||
// "numerator" is the numerator of the fraction if the
|
||||
// denominator is the LCD. The numerator if the rule's
|
||||
// base value is the denominator is "numerator" times the
|
||||
// base value divided bythe LCD. Here we check to see if
|
||||
// that's an integer, and if not, how close it is to being
|
||||
// an integer.
|
||||
tempDifference = numerator * rules[i]->getBaseValue() % leastCommonMultiple;
|
||||
|
||||
|
||||
// normalize the result of the above calculation: we want
|
||||
// the numerator's distance from the CLOSEST multiple
|
||||
// of the LCD
|
||||
if (leastCommonMultiple - tempDifference < tempDifference) {
|
||||
tempDifference = leastCommonMultiple - tempDifference;
|
||||
}
|
||||
|
||||
// if this is as close as we've come, keep track of how close
|
||||
// that is, and the line number of the rule that did it. If
|
||||
// we've scored a direct hit, we don't have to look at any more
|
||||
// rules
|
||||
if (tempDifference < difference) {
|
||||
difference = tempDifference;
|
||||
winner = i;
|
||||
if (difference == 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// if we have two successive rules that both have the winning base
|
||||
// value, then the first one (the one we found above) is used if
|
||||
// the numerator of the fraction is 1 and the second one is used if
|
||||
// the numerator of the fraction is anything else (this lets us
|
||||
// do things like "one third"/"two thirds" without haveing to define
|
||||
// a whole bunch of extra rule sets)
|
||||
if ((unsigned)(winner + 1) < rules.size() &&
|
||||
rules[winner + 1]->getBaseValue() == rules[winner]->getBaseValue()) {
|
||||
double n = llong_asDouble(rules[winner]->getBaseValue()) * number;
|
||||
if (n < 0.5 || n >= 2) {
|
||||
++winner;
|
||||
}
|
||||
}
|
||||
|
||||
// finally, return the winning rule
|
||||
return rules[winner];
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses a string. Matches the string to be parsed against each
|
||||
* of its rules (with a base value less than upperBound) and returns
|
||||
* the value produced by the rule that matched the most charcters
|
||||
* in the source string.
|
||||
* @param text The string to parse
|
||||
* @param parsePosition The initial position is ignored and assumed
|
||||
* to be 0. On exit, this object has been updated to point to the
|
||||
* first character position this rule set didn't consume.
|
||||
* @param upperBound Limits the rules that can be allowed to match.
|
||||
* Only rules whose base values are strictly less than upperBound
|
||||
* are considered.
|
||||
* @return The numerical result of parsing this string. This will
|
||||
* be the matching rule's base value, composed appropriately with
|
||||
* the results of matching any of its substitutions. The object
|
||||
* will be an instance of Long if it's an integral value; otherwise,
|
||||
* it will be an instance of Double. This function always returns
|
||||
* a valid object: If nothing matched the input string at all,
|
||||
* this function returns new Long(0), and the parse position is
|
||||
* left unchanged.
|
||||
*/
|
||||
#ifdef RBNF_DEBUG
|
||||
static void dumpUS(FILE* f, const UnicodeString& us) {
|
||||
int len = us.length();
|
||||
char* buf = new char[len+1];
|
||||
us.extract(0, len, buf);
|
||||
buf[len] = 0;
|
||||
fprintf(f, "%s", buf);
|
||||
delete[] buf;
|
||||
}
|
||||
#endif
|
||||
|
||||
UBool
|
||||
NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, Formattable& result) const
|
||||
{
|
||||
// try matching each rule in the rule set against the text being
|
||||
// parsed. Whichever one matches the most characters is the one
|
||||
// that determines the value we return.
|
||||
|
||||
result.setLong(0);
|
||||
|
||||
// dump out if there's no text to parse
|
||||
if (text.length() == 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
ParsePosition highWaterMark;
|
||||
ParsePosition workingPos = pos;
|
||||
|
||||
#ifdef RBNF_DEBUG
|
||||
fprintf(stderr, "<nfrs> %x '", this);
|
||||
dumpUS(stderr, name);
|
||||
fprintf(stderr, "' text '");
|
||||
dumpUS(stderr, text);
|
||||
fprintf(stderr, "'\n");
|
||||
fprintf(stderr, " parse negative: %d\n", this, negativeNumberRule != 0);
|
||||
#endif
|
||||
|
||||
// start by trying the negative number rule (if there is one)
|
||||
if (negativeNumberRule) {
|
||||
Formattable tempResult;
|
||||
#ifdef RBNF_DEBUG
|
||||
fprintf(stderr, " <nfrs before negative> %x ub: %g\n", negativeNumberRule, upperBound);
|
||||
#endif
|
||||
UBool success = negativeNumberRule->doParse(text, workingPos, 0, upperBound, tempResult);
|
||||
#ifdef RBNF_DEBUG
|
||||
fprintf(stderr, " <nfrs after negative> success: %d wpi: %d\n", success, workingPos.getIndex());
|
||||
#endif
|
||||
if (success && workingPos.getIndex() > highWaterMark.getIndex()) {
|
||||
result = tempResult;
|
||||
highWaterMark = workingPos;
|
||||
}
|
||||
workingPos = pos;
|
||||
}
|
||||
#ifdef RBNF_DEBUG
|
||||
fprintf(stderr, "<nfrs> continue fractional with text '");
|
||||
dumpUS(stderr, text);
|
||||
fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex());
|
||||
#endif
|
||||
// then try each of the fraction rules
|
||||
{
|
||||
for (int i = 0; i < 3; i++) {
|
||||
if (fractionRules[i]) {
|
||||
Formattable tempResult;
|
||||
UBool success = fractionRules[i]->doParse(text, workingPos, 0, upperBound, tempResult);
|
||||
if (success && (workingPos.getIndex() > highWaterMark.getIndex())) {
|
||||
result = tempResult;
|
||||
highWaterMark = workingPos;
|
||||
}
|
||||
workingPos = pos;
|
||||
}
|
||||
}
|
||||
}
|
||||
#ifdef RBNF_DEBUG
|
||||
fprintf(stderr, "<nfrs> continue other with text '");
|
||||
dumpUS(stderr, text);
|
||||
fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex());
|
||||
#endif
|
||||
|
||||
// finally, go through the regular rules one at a time. We start
|
||||
// at the end of the list because we want to try matching the most
|
||||
// sigificant rule first (this helps ensure that we parse
|
||||
// "five thousand three hundred six" as
|
||||
// "(five thousand) (three hundred) (six)" rather than
|
||||
// "((five thousand three) hundred) (six)"). Skip rules whose
|
||||
// base values are higher than the upper bound (again, this helps
|
||||
// limit ambiguity by making sure the rules that match a rule's
|
||||
// are less significant than the rule containing the substitutions)/
|
||||
{
|
||||
llong ub(upperBound);
|
||||
#ifdef RBNF_DEBUG
|
||||
{
|
||||
char ubstr[64];
|
||||
lltoa(ub, ubstr, 64);
|
||||
fprintf(stderr, "ub: %g, ll: %s(%x/%x)\n", upperBound, ubstr, ub.hi, ub.lo);
|
||||
}
|
||||
#endif
|
||||
for (int32_t i = rules.size(); --i >= 0 && highWaterMark.getIndex() < text.length();) {
|
||||
if ((!fIsFractionRuleSet) && (rules[i]->getBaseValue() >= ub)) {
|
||||
continue;
|
||||
}
|
||||
Formattable tempResult;
|
||||
UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, tempResult);
|
||||
if (success && workingPos.getIndex() > highWaterMark.getIndex()) {
|
||||
result = tempResult;
|
||||
highWaterMark = workingPos;
|
||||
}
|
||||
workingPos = pos;
|
||||
}
|
||||
}
|
||||
#ifdef RBNF_DEBUG
|
||||
fprintf(stderr, "<nfrs> exit\n");
|
||||
#endif
|
||||
// finally, update the parse postion we were passed to point to the
|
||||
// first character we didn't use, and return the result that
|
||||
// corresponds to that string of characters
|
||||
pos = highWaterMark;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
void
|
||||
NFRuleSet::appendRules(UnicodeString& result) const
|
||||
{
|
||||
// the rule set name goes first...
|
||||
result.append(name);
|
||||
result.append(gColon);
|
||||
result.append(gLineFeed);
|
||||
|
||||
// followed by the regular rules...
|
||||
for (uint32_t i = 0; i < rules.size(); i++) {
|
||||
result.append(gFourSpaces);
|
||||
rules[i]->appendRuleText(result);
|
||||
result.append(gLineFeed);
|
||||
}
|
||||
|
||||
// followed by the special rules (if they exist)
|
||||
if (negativeNumberRule) {
|
||||
result.append(gFourSpaces);
|
||||
negativeNumberRule->appendRuleText(result);
|
||||
result.append(gLineFeed);
|
||||
}
|
||||
|
||||
{
|
||||
for (uint32_t i = 0; i < 3; ++i) {
|
||||
if (fractionRules[i]) {
|
||||
result.append(gFourSpaces);
|
||||
fractionRules[i]->appendRuleText(result);
|
||||
result.append(gLineFeed);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
61
icu4c/source/i18n/nfrs.h
Normal file
61
icu4c/source/i18n/nfrs.h
Normal file
@ -0,0 +1,61 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1997-2001, International Business Machines Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef NFRS_H
|
||||
#define NFRS_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/umisc.h"
|
||||
|
||||
#include "unicode/rbnf.h"
|
||||
#include "nfrlist.h"
|
||||
#include "llong.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class NFRuleSet {
|
||||
public:
|
||||
NFRuleSet(UnicodeString* descriptions, int32_t index, UErrorCode& status);
|
||||
void parseRules(UnicodeString& rules, const RuleBasedNumberFormat* owner, UErrorCode& status);
|
||||
void makeIntoFractionRuleSet() { fIsFractionRuleSet = TRUE; }
|
||||
|
||||
~NFRuleSet();
|
||||
|
||||
UBool operator==(const NFRuleSet& rhs) const;
|
||||
UBool operator!=(const NFRuleSet& rhs) const { return !operator==(rhs); }
|
||||
|
||||
UBool isPublic() const { return fIsPublic; }
|
||||
UBool isFractionRuleSet() const { return fIsFractionRuleSet; }
|
||||
|
||||
void getName(UnicodeString& result) const { result.setTo(name); }
|
||||
UBool isNamed(const UnicodeString& _name) const { return this->name == _name; }
|
||||
|
||||
void format(llong number, UnicodeString& toAppendTo, int32_t pos) const;
|
||||
void format(double number, UnicodeString& toAppendTo, int32_t pos) const;
|
||||
|
||||
UBool parse(const UnicodeString& text, ParsePosition& pos, double upperBound, Formattable& result) const;
|
||||
|
||||
void appendRules(UnicodeString& result) const; // toString
|
||||
|
||||
private:
|
||||
NFRule * findNormalRule(llong number) const;
|
||||
NFRule * findDoubleRule(double number) const;
|
||||
NFRule * findFractionRuleSetRule(double number) const;
|
||||
|
||||
private:
|
||||
UnicodeString name;
|
||||
NFRuleList rules;
|
||||
NFRule *negativeNumberRule;
|
||||
NFRule *fractionRules[3];
|
||||
UBool fIsFractionRuleSet;
|
||||
UBool fIsPublic;
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
// NFRS_H
|
||||
#endif
|
||||
|
1377
icu4c/source/i18n/nfrule.cpp
Normal file
1377
icu4c/source/i18n/nfrule.cpp
Normal file
File diff suppressed because it is too large
Load Diff
104
icu4c/source/i18n/nfrule.h
Normal file
104
icu4c/source/i18n/nfrule.h
Normal file
@ -0,0 +1,104 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1997-2001, International Business Machines Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef NFRULE_H
|
||||
#define NFRULE_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/unistr.h"
|
||||
|
||||
#include "llong.h"
|
||||
|
||||
#include <math.h>
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class FieldPosition;
|
||||
class Formattable;
|
||||
class NFRuleList;
|
||||
class NFRuleSet;
|
||||
class NFSubstitution;
|
||||
class ParsePosition;
|
||||
class RuleBasedNumberFormat;
|
||||
class UnicodeString;
|
||||
|
||||
class NFRule {
|
||||
public:
|
||||
|
||||
enum ERuleType {
|
||||
kNoBase = 0,
|
||||
kNegativeNumberRule = -1,
|
||||
kImproperFractionRule = -2,
|
||||
kProperFractionRule = -3,
|
||||
kMasterRule = -4,
|
||||
kOtherRule = -5
|
||||
};
|
||||
|
||||
static void makeRules(UnicodeString& definition,
|
||||
const NFRuleSet* ruleSet,
|
||||
const NFRule* predecessor,
|
||||
const RuleBasedNumberFormat* rbnf,
|
||||
NFRuleList& ruleList,
|
||||
UErrorCode& status);
|
||||
|
||||
NFRule(const RuleBasedNumberFormat* rbnf);
|
||||
~NFRule();
|
||||
|
||||
UBool operator==(const NFRule& rhs) const;
|
||||
UBool operator!=(const NFRule& rhs) const { return !operator==(rhs); }
|
||||
|
||||
ERuleType getType() const { return (ERuleType)(baseValue <= 0 ? llong_asInt(baseValue) : kOtherRule); }
|
||||
void setType(ERuleType ruleType) { baseValue = (int32_t)ruleType; }
|
||||
|
||||
llong getBaseValue() const { return baseValue; }
|
||||
void setBaseValue(llong value);
|
||||
|
||||
double getDivisor() const { return pow(radix, exponent); }
|
||||
|
||||
void doFormat(llong number, UnicodeString& toAppendTo, int32_t pos) const;
|
||||
void doFormat(double number, UnicodeString& toAppendTo, int32_t pos) const;
|
||||
|
||||
UBool doParse(const UnicodeString& text,
|
||||
ParsePosition& pos,
|
||||
UBool isFractional,
|
||||
double upperBound,
|
||||
Formattable& result) const;
|
||||
|
||||
UBool shouldRollBack(double number) const;
|
||||
|
||||
void appendRuleText(UnicodeString& result) const;
|
||||
|
||||
private:
|
||||
void parseRuleDescriptor(UnicodeString& descriptor, UErrorCode& status);
|
||||
void extractSubstitutions(const NFRuleSet* ruleSet, const NFRule* predecessor, const RuleBasedNumberFormat* rbnf, UErrorCode& status);
|
||||
NFSubstitution* extractSubstitution(const NFRuleSet* ruleSet, const NFRule* predecessor, const RuleBasedNumberFormat* rbnf, UErrorCode& status);
|
||||
|
||||
int16_t expectedExponent() const;
|
||||
int32_t indexOfAny(const UnicodeString* strings[]) const;
|
||||
double matchToDelimiter(const UnicodeString& text, int32_t startPos, double baseValue,
|
||||
const UnicodeString& delimiter, ParsePosition& pp, const NFSubstitution* sub,
|
||||
double upperBound) const;
|
||||
void stripPrefix(UnicodeString& text, const UnicodeString& prefix, ParsePosition& pp) const;
|
||||
|
||||
int32_t prefixLength(const UnicodeString& str, const UnicodeString& prefix) const;
|
||||
UBool allIgnorable(const UnicodeString& str) const;
|
||||
int32_t findText(const UnicodeString& str, const UnicodeString& key,
|
||||
int32_t startingAt, int32_t* resultCount) const;
|
||||
|
||||
private:
|
||||
llong baseValue;
|
||||
int16_t radix;
|
||||
int16_t exponent;
|
||||
UnicodeString ruleText;
|
||||
NFSubstitution* sub1;
|
||||
NFSubstitution* sub2;
|
||||
const RuleBasedNumberFormat* formatter;
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
// NFRULE_H
|
||||
#endif
|
892
icu4c/source/i18n/nfsubs.cpp
Normal file
892
icu4c/source/i18n/nfsubs.cpp
Normal file
@ -0,0 +1,892 @@
|
||||
#include "nfsubs.h"
|
||||
|
||||
static const UChar gLessThan = 0x003c;
|
||||
static const UChar gEquals = 0x003d;
|
||||
static const UChar gGreaterThan = 0x003e;
|
||||
static const UChar gPercent = 0x0025;
|
||||
static const UChar gPound = 0x0023;
|
||||
static const UChar gZero = 0x0030;
|
||||
static const UChar gSpace = 0x0020;
|
||||
|
||||
static const UnicodeString gEqualsEquals("==");
|
||||
static const UnicodeString gGreaterGreaterGreaterThan(">>>");
|
||||
static const UnicodeString gGreaterGreaterThan(">>");
|
||||
|
||||
NFSubstitution*
|
||||
NFSubstitution::makeSubstitution(int32_t pos,
|
||||
const NFRule* rule,
|
||||
const NFRule* predecessor,
|
||||
const NFRuleSet* ruleSet,
|
||||
const RuleBasedNumberFormat* formatter,
|
||||
const UnicodeString& description,
|
||||
UErrorCode& status)
|
||||
{
|
||||
// if the description is empty, return a NullSubstitution
|
||||
if (description.length() == 0) {
|
||||
return new NullSubstitution(pos, ruleSet, formatter, description, status);
|
||||
}
|
||||
|
||||
switch (description.charAt(0)) {
|
||||
// if the description begins with '<'...
|
||||
case gLessThan:
|
||||
// throw an exception if the rule is a negative number
|
||||
// rule
|
||||
if (rule->getBaseValue() == NFRule::kNegativeNumberRule) {
|
||||
// throw new IllegalArgumentException("<< not allowed in negative-number rule");
|
||||
status = U_PARSE_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// if the rule is a fraction rule, return an
|
||||
// IntegralPartSubstitution
|
||||
else if (rule->getBaseValue() == NFRule::kImproperFractionRule
|
||||
|| rule->getBaseValue() == NFRule::kProperFractionRule
|
||||
|| rule->getBaseValue() == NFRule::kMasterRule) {
|
||||
return new IntegralPartSubstitution(pos, ruleSet, formatter, description, status);
|
||||
}
|
||||
|
||||
// if the rule set containing the rule is a fraction
|
||||
// rule set, return a NumeratorSubstitution
|
||||
else if (ruleSet->isFractionRuleSet()) {
|
||||
return new NumeratorSubstitution(pos, llong_asDouble(rule->getBaseValue()),
|
||||
formatter->getDefaultRuleSet(), formatter, description, status);
|
||||
}
|
||||
|
||||
// otherwise, return a MultiplierSubstitution
|
||||
else {
|
||||
return new MultiplierSubstitution(pos, rule->getDivisor(), ruleSet,
|
||||
formatter, description, status);
|
||||
}
|
||||
|
||||
// if the description begins with '>'...
|
||||
case gGreaterThan:
|
||||
// if the rule is a negative-number rule, return
|
||||
// an AbsoluteValueSubstitution
|
||||
if (rule->getBaseValue() == NFRule::kNegativeNumberRule) {
|
||||
return new AbsoluteValueSubstitution(pos, ruleSet, formatter, description, status);
|
||||
}
|
||||
|
||||
// if the rule is a fraction rule, return a
|
||||
// FractionalPartSubstitution
|
||||
else if (rule->getBaseValue() == NFRule::kImproperFractionRule
|
||||
|| rule->getBaseValue() == NFRule::kProperFractionRule
|
||||
|| rule->getBaseValue() == NFRule::kMasterRule) {
|
||||
return new FractionalPartSubstitution(pos, ruleSet, formatter, description, status);
|
||||
}
|
||||
|
||||
// if the rule set owning the rule is a fraction rule set,
|
||||
// throw an exception
|
||||
else if (ruleSet->isFractionRuleSet()) {
|
||||
// throw new IllegalArgumentException(">> not allowed in fraction rule set");
|
||||
status = U_PARSE_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// otherwise, return a ModulusSubstitution
|
||||
else {
|
||||
return new ModulusSubstitution(pos, rule->getDivisor(), predecessor,
|
||||
ruleSet, formatter, description, status);
|
||||
}
|
||||
|
||||
// if the description begins with '=', always return a
|
||||
// SameValueSubstitution
|
||||
case gEquals:
|
||||
return new SameValueSubstitution(pos, ruleSet, formatter, description, status);
|
||||
|
||||
// and if it's anything else, throw an exception
|
||||
default:
|
||||
// throw new IllegalArgumentException("Illegal substitution character");
|
||||
status = U_PARSE_ERROR;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
NFSubstitution::NFSubstitution(int32_t _pos,
|
||||
const NFRuleSet* _ruleSet,
|
||||
const RuleBasedNumberFormat* formatter,
|
||||
const UnicodeString& description,
|
||||
UErrorCode& status)
|
||||
: pos(_pos), ruleSet(NULL), numberFormat(NULL)
|
||||
{
|
||||
// the description should begin and end with the same character.
|
||||
// If it doesn't that's a syntax error. Otherwise,
|
||||
// makeSubstitution() was the only thing that needed to know
|
||||
// about these characters, so strip them off
|
||||
UnicodeString workingDescription(description);
|
||||
if (description.length() >= 2 && description.charAt(0) == description.charAt(
|
||||
description.length() - 1)) {
|
||||
workingDescription.remove(description.length() - 1, 1);
|
||||
workingDescription.remove(0, 1);
|
||||
}
|
||||
else if (description.length() != 0) {
|
||||
// throw new IllegalArgumentException("Illegal substitution syntax");
|
||||
status = U_PARSE_ERROR;
|
||||
return;
|
||||
}
|
||||
|
||||
// if the description was just two paired token characters
|
||||
// (i.e., "<<" or ">>"), it uses the rule set it belongs to to
|
||||
// format its result
|
||||
if (workingDescription.length() == 0) {
|
||||
this->ruleSet = _ruleSet;
|
||||
}
|
||||
|
||||
// if the description contains a rule set name, that's the rule
|
||||
// set we use to format the result: get a reference to the
|
||||
// names rule set
|
||||
else if (workingDescription.charAt(0) == gPercent) {
|
||||
this->ruleSet = formatter->findRuleSet(workingDescription, status);
|
||||
}
|
||||
|
||||
// if the description begins with 0 or #, treat it as a
|
||||
// DecimalFormat pattern, and initialize a DecimalFormat with
|
||||
// that pattern (then set it to use the DecimalFormatSymbols
|
||||
// belonging to our formatter)
|
||||
else if (workingDescription.charAt(0) == gPound || workingDescription.charAt(0) ==gZero) {
|
||||
this->numberFormat = new DecimalFormat(workingDescription, *(formatter->getDecimalFormatSymbols()), status);
|
||||
// this->numberFormat->setDecimalFormatSymbols(formatter->getDecimalFormatSymbols());
|
||||
}
|
||||
|
||||
// if the description is ">>>", this substitution bypasses the
|
||||
// usual rule-search process and always uses the rule that precedes
|
||||
// it in its own rule set's rule list (this is used for place-value
|
||||
// notations: formats where you want to see a particular part of
|
||||
// a number even when it's 0)
|
||||
else if (workingDescription.charAt(0) == gGreaterThan) {
|
||||
// this causes problems when >>> is used in a frationalPartSubstitution
|
||||
// this->ruleSet = NULL;
|
||||
this->ruleSet = _ruleSet;
|
||||
this->numberFormat = NULL;
|
||||
}
|
||||
|
||||
// and of the description is none of these things, it's a syntax error
|
||||
else {
|
||||
// throw new IllegalArgumentException("Illegal substitution syntax");
|
||||
status = U_PARSE_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
NFSubstitution::~NFSubstitution()
|
||||
{
|
||||
// cast away const
|
||||
delete (NumberFormat*)numberFormat; numberFormat = NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set's the substitution's divisor. Used by NFRule.setBaseValue().
|
||||
* A no-op for all substitutions except multiplier and modulus
|
||||
* substitutions.
|
||||
* @param radix The radix of the divisor
|
||||
* @param exponent The exponent of the divisor
|
||||
*/
|
||||
void
|
||||
NFSubstitution::setDivisor(int32_t radix, int32_t exponent) {
|
||||
// a no-op for all substitutions except multiplier and modulus substitutions
|
||||
}
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------
|
||||
// boilerplate
|
||||
//-----------------------------------------------------------------------
|
||||
|
||||
char NFSubstitution::fgClassID;
|
||||
|
||||
UClassID
|
||||
NFSubstitution::getDynamicClassID() const {
|
||||
return getStaticClassID();
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Compares two substitutions for equality
|
||||
* @param The substitution to compare this one to
|
||||
* @return true if the two substitutions are functionally equivalent
|
||||
*/
|
||||
UBool
|
||||
NFSubstitution::operator==(const NFSubstitution& rhs) const
|
||||
{
|
||||
// compare class and all of the fields all substitutions have
|
||||
// in common
|
||||
// this should be called by subclasses before their own equality tests
|
||||
return getDynamicClassID() == rhs.getDynamicClassID()
|
||||
&& pos == rhs.pos
|
||||
&& ruleSet == rhs.ruleSet
|
||||
&& *numberFormat == *rhs.numberFormat;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a textual description of the substitution
|
||||
* @return A textual description of the substitution. This might
|
||||
* not be identical to the description it was created from, but
|
||||
* it'll produce the same result.
|
||||
*/
|
||||
void
|
||||
NFSubstitution::toString(UnicodeString& text) const {
|
||||
// use tokenChar() to get the character at the beginning and
|
||||
// end of the substitutin token. In between them will go
|
||||
// either the name of the rule set it uses, or the pattern of
|
||||
// the DecimalFormat it uses
|
||||
text.remove();
|
||||
text.append(tokenChar());
|
||||
|
||||
UnicodeString temp;
|
||||
if (ruleSet != NULL) {
|
||||
ruleSet->getName(temp);
|
||||
} else {
|
||||
numberFormat->toPattern(temp);
|
||||
}
|
||||
text.append(temp);
|
||||
text.append(tokenChar());
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------
|
||||
// formatting
|
||||
//-----------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Performs a mathematical operation on the number, formats it using
|
||||
* either ruleSet or decimalFormat, and inserts the result into
|
||||
* toInsertInto.
|
||||
* @param number The number being formatted.
|
||||
* @param toInsertInto The string we insert the result into
|
||||
* @param pos The position in toInsertInto where the owning rule's
|
||||
* rule text begins (this value is added to this substitution's
|
||||
* position to determine exactly where to insert the new text)
|
||||
*/
|
||||
void
|
||||
NFSubstitution::doSubstitution(llong number, UnicodeString& toInsertInto, int32_t _pos) const
|
||||
{
|
||||
if (ruleSet != NULL) {
|
||||
// perform a transformation on the number that is dependent
|
||||
// on the type of substitution this is, then just call its
|
||||
// rule set's format() method to format the result
|
||||
llong numberToFormat = transformNumber(number);
|
||||
|
||||
ruleSet->format(numberToFormat, toInsertInto, _pos + this->pos);
|
||||
} else {
|
||||
// or perform the transformation on the number (preserving
|
||||
// the result's fractional part if the formatter it set
|
||||
// to show it), then use that formatter's format() method
|
||||
// to format the result
|
||||
double numberToFormat = transformNumber(llong_asDouble(number));
|
||||
if (numberFormat->getMaximumFractionDigits() == 0) {
|
||||
numberToFormat = floor(numberToFormat);
|
||||
}
|
||||
|
||||
UnicodeString temp;
|
||||
numberFormat->format(numberToFormat, temp);
|
||||
toInsertInto.insert(_pos + this->pos, temp);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Performs a mathematical operation on the number, formats it using
|
||||
* either ruleSet or decimalFormat, and inserts the result into
|
||||
* toInsertInto.
|
||||
* @param number The number being formatted.
|
||||
* @param toInsertInto The string we insert the result into
|
||||
* @param pos The position in toInsertInto where the owning rule's
|
||||
* rule text begins (this value is added to this substitution's
|
||||
* position to determine exactly where to insert the new text)
|
||||
*/
|
||||
void
|
||||
NFSubstitution::doSubstitution(double number, UnicodeString& toInsertInto, int32_t _pos) const {
|
||||
// perform a transformation on the number being formatted that
|
||||
// is dependent on the type of substitution this is
|
||||
double numberToFormat = transformNumber(number);
|
||||
|
||||
// if the result is an integer, from here on out we work in integer
|
||||
// space (saving time and memory and preserving accuracy)
|
||||
if (numberToFormat == floor(numberToFormat) && ruleSet != NULL) {
|
||||
ruleSet->format(llong(numberToFormat), toInsertInto, _pos + this->pos);
|
||||
|
||||
// if the result isn't an integer, then call either our rule set's
|
||||
// format() method or our DecimalFormat's format() method to
|
||||
// format the result
|
||||
} else {
|
||||
if (ruleSet != NULL) {
|
||||
ruleSet->format(numberToFormat, toInsertInto, _pos + this->pos);
|
||||
} else {
|
||||
UnicodeString temp;
|
||||
numberFormat->format(numberToFormat, temp);
|
||||
toInsertInto.insert(_pos + this->pos, temp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------
|
||||
// parsing
|
||||
//-----------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Parses a string using the rule set or DecimalFormat belonging
|
||||
* to this substitution. If there's a match, a mathematical
|
||||
* operation (the inverse of the one used in formatting) is
|
||||
* performed on the result of the parse and the value passed in
|
||||
* and returned as the result. The parse position is updated to
|
||||
* point to the first unmatched character in the string.
|
||||
* @param text The string to parse
|
||||
* @param parsePosition On entry, ignored, but assumed to be 0.
|
||||
* On exit, this is updated to point to the first unmatched
|
||||
* character (or 0 if the substitution didn't match)
|
||||
* @param baseValue A partial parse result that should be
|
||||
* combined with the result of this parse
|
||||
* @param upperBound When searching the rule set for a rule
|
||||
* matching the string passed in, only rules with base values
|
||||
* lower than this are considered
|
||||
* @param lenientParse If true and matching against rules fails,
|
||||
* the substitution will also try matching the text against
|
||||
* numerals using a default-costructed NumberFormat. If false,
|
||||
* no extra work is done. (This value is false whenever the
|
||||
* formatter isn't in lenient-parse mode, but is also false
|
||||
* under some conditions even when the formatter _is_ in
|
||||
* lenient-parse mode.)
|
||||
* @return If there's a match, this is the result of composing
|
||||
* baseValue with whatever was returned from matching the
|
||||
* characters. This will be either a Long or a Double. If there's
|
||||
* no match this is new Long(0) (not null), and parsePosition
|
||||
* is left unchanged.
|
||||
*/
|
||||
UBool
|
||||
NFSubstitution::doParse(const UnicodeString& text,
|
||||
ParsePosition& parsePosition,
|
||||
double baseValue,
|
||||
double upperBound,
|
||||
UBool lenientParse,
|
||||
Formattable& result) const
|
||||
{
|
||||
#ifdef RBNF_DEBUG
|
||||
fprintf(stderr, "<nfsubs> %x bv: %g ub: %g\n", this, baseValue, upperBound);
|
||||
#endif
|
||||
// figure out the highest base value a rule can have and match
|
||||
// the text being parsed (this varies according to the type of
|
||||
// substitutions: multiplier, modulus, and numerator substitutions
|
||||
// restrict the search to rules with base values lower than their
|
||||
// own; same-value substitutions leave the upper bound wherever
|
||||
// it was, and the others allow any rule to match
|
||||
upperBound = calcUpperBound(upperBound);
|
||||
|
||||
// use our rule set to parse the text. If that fails and
|
||||
// lenient parsing is enabled (this is always false if the
|
||||
// formatter's lenient-parsing mode is off, but it may also
|
||||
// be false even when the formatter's lenient-parse mode is
|
||||
// on), then also try parsing the text using a default-
|
||||
// constructed NumberFormat
|
||||
if (ruleSet != NULL) {
|
||||
ruleSet->parse(text, parsePosition, upperBound, result);
|
||||
if (lenientParse && !ruleSet->isFractionRuleSet() && parsePosition.getIndex() == 0) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
NumberFormat* fmt = NumberFormat::createInstance(status);
|
||||
if (U_SUCCESS(status)) {
|
||||
fmt->parse(text, result, parsePosition);
|
||||
}
|
||||
delete fmt;
|
||||
}
|
||||
|
||||
// ...or use our DecimalFormat to parse the text
|
||||
} else {
|
||||
numberFormat->parse(text, result, parsePosition);
|
||||
}
|
||||
|
||||
// if the parse was successful, we've already advanced the caller's
|
||||
// parse position (this is the one function that doesn't have one
|
||||
// of its own). Derive a parse result and return it as a Long,
|
||||
// if possible, or a Double
|
||||
if (parsePosition.getIndex() != 0) {
|
||||
double tempResult = (result.getType() == Formattable::kLong) ?
|
||||
(double)result.getLong() :
|
||||
result.getDouble();
|
||||
|
||||
// composeRuleValue() produces a full parse result from
|
||||
// the partial parse result passed to this function from
|
||||
// the caller (this is either the owning rule's base value
|
||||
// or the partial result obtained from composing the
|
||||
// owning rule's base value with its other substitution's
|
||||
// parse result) and the partial parse result obtained by
|
||||
// matching the substitution (which will be the same value
|
||||
// the caller would get by parsing just this part of the
|
||||
// text with RuleBasedNumberFormat.parse() ). How the two
|
||||
// values are used to derive the full parse result depends
|
||||
// on the types of substitutions: For a regular rule, the
|
||||
// ultimate result is its multiplier substitution's result
|
||||
// times the rule's divisor (or the rule's base value) plus
|
||||
// the modulus substitution's result (which will actually
|
||||
// supersede part of the rule's base value). For a negative-
|
||||
// number rule, the result is the negative of its substitution's
|
||||
// result. For a fraction rule, it's the sum of its two
|
||||
// substitution results. For a rule in a fraction rule set,
|
||||
// it's the numerator substitution's result divided by
|
||||
// the rule's base value. Results from same-value substitutions
|
||||
// propagate back upard, and null substitutions don't affect
|
||||
// the result.
|
||||
tempResult = composeRuleValue(tempResult, baseValue);
|
||||
result.setDouble(tempResult);
|
||||
return TRUE;
|
||||
// if the parse was UNsuccessful, return 0
|
||||
} else {
|
||||
result.setLong(0);
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
UBool
|
||||
NFSubstitution::isNullSubstitution() const {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if this is a modulus substitution. (We didn't do this
|
||||
* with instanceof partially because it causes source files to
|
||||
* proliferate and partially because we have to port this to C++.)
|
||||
* @return true if this object is an instance of ModulusSubstitution
|
||||
*/
|
||||
UBool
|
||||
NFSubstitution::isModulusSubstitution() const {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
//===================================================================
|
||||
// SameValueSubstitution
|
||||
//===================================================================
|
||||
|
||||
/**
|
||||
* A substitution that passes the value passed to it through unchanged.
|
||||
* Represented by == in rule descriptions.
|
||||
*/
|
||||
SameValueSubstitution::SameValueSubstitution(int32_t _pos,
|
||||
const NFRuleSet* _ruleSet,
|
||||
const RuleBasedNumberFormat* formatter,
|
||||
const UnicodeString& description,
|
||||
UErrorCode& status)
|
||||
: NFSubstitution(_pos, _ruleSet, formatter, description, status)
|
||||
|
||||
{
|
||||
if (description == gEqualsEquals) {
|
||||
// throw new IllegalArgumentException("== is not a legal token");
|
||||
status = U_PARSE_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
char SameValueSubstitution::fgClassID;
|
||||
|
||||
UClassID
|
||||
SameValueSubstitution::getDynamicClassID() const {
|
||||
return getStaticClassID();
|
||||
}
|
||||
|
||||
|
||||
//===================================================================
|
||||
// MultiplierSubstitution
|
||||
//===================================================================
|
||||
|
||||
char MultiplierSubstitution::fgClassID;
|
||||
|
||||
UClassID
|
||||
MultiplierSubstitution::getDynamicClassID() const {
|
||||
return getStaticClassID();
|
||||
}
|
||||
|
||||
UBool MultiplierSubstitution::operator==(const NFSubstitution& rhs) const
|
||||
{
|
||||
return NFSubstitution::operator==(rhs) &&
|
||||
divisor == ((const MultiplierSubstitution*)&rhs)->divisor;
|
||||
}
|
||||
|
||||
|
||||
//===================================================================
|
||||
// ModulusSubstitution
|
||||
//===================================================================
|
||||
|
||||
/**
|
||||
* A substitution that divides the number being formatted by the its rule's
|
||||
* divisor and formats the remainder. Represented by ">>" in a
|
||||
* regular rule.
|
||||
*/
|
||||
ModulusSubstitution::ModulusSubstitution(int32_t _pos,
|
||||
double _divisor,
|
||||
const NFRule* predecessor,
|
||||
const NFRuleSet* _ruleSet,
|
||||
const RuleBasedNumberFormat* formatter,
|
||||
const UnicodeString& description,
|
||||
UErrorCode& status)
|
||||
: NFSubstitution(_pos, _ruleSet, formatter, description, status)
|
||||
, divisor(_divisor)
|
||||
, ruleToUse(NULL)
|
||||
{
|
||||
ldivisor = _divisor;
|
||||
|
||||
// the owning rule's divisor controls the behavior of this
|
||||
// substitution: rather than keeping a backpointer to the rule,
|
||||
// we keep a copy of the divisor
|
||||
|
||||
if (description == gGreaterGreaterGreaterThan) {
|
||||
// the >>> token doesn't alter how this substituion calculates the
|
||||
// values it uses for formatting and parsing, but it changes
|
||||
// what's done with that value after it's obtained: >>> short-
|
||||
// circuits the rule-search process and goes straight to the
|
||||
// specified rule to format the substitution value
|
||||
ruleToUse = predecessor;
|
||||
}
|
||||
}
|
||||
|
||||
char ModulusSubstitution::fgClassID;
|
||||
|
||||
UClassID
|
||||
ModulusSubstitution::getDynamicClassID() const {
|
||||
return getStaticClassID();
|
||||
}
|
||||
|
||||
UBool ModulusSubstitution::operator==(const NFSubstitution& rhs) const
|
||||
{
|
||||
return NFSubstitution::operator==(rhs) &&
|
||||
divisor == ((const ModulusSubstitution*)&rhs)->divisor &&
|
||||
ruleToUse == ((const ModulusSubstitution*)&rhs)->ruleToUse;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------
|
||||
// formatting
|
||||
//-----------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* If this is a >>> substitution, use ruleToUse to fill in
|
||||
* the substitution. Otherwise, just use the superclass function.
|
||||
* @param number The number being formatted
|
||||
* @toInsertInto The string to insert the result of this substitution
|
||||
* into
|
||||
* @param pos The position of the rule text in toInsertInto
|
||||
*/
|
||||
void
|
||||
ModulusSubstitution::doSubstitution(llong number, UnicodeString& toInsertInto, int32_t _pos) const
|
||||
{
|
||||
// if this isn't a >>> substitution, just use the inherited version
|
||||
// of this function (which uses either a rule set or a DecimalFormat
|
||||
// to format its substitution value)
|
||||
if (ruleToUse == NULL) {
|
||||
NFSubstitution::doSubstitution(number, toInsertInto, _pos);
|
||||
|
||||
// a >>> substitution goes straight to a particular rule to
|
||||
// format the substitution value
|
||||
} else {
|
||||
llong numberToFormat = transformNumber(number);
|
||||
ruleToUse->doFormat(numberToFormat, toInsertInto, _pos + getPos());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* If this is a >>> substitution, use ruleToUse to fill in
|
||||
* the substitution. Otherwise, just use the superclass function.
|
||||
* @param number The number being formatted
|
||||
* @toInsertInto The string to insert the result of this substitution
|
||||
* into
|
||||
* @param pos The position of the rule text in toInsertInto
|
||||
*/
|
||||
void
|
||||
ModulusSubstitution::doSubstitution(double number, UnicodeString& toInsertInto, int32_t _pos) const
|
||||
{
|
||||
// if this isn't a >>> substitution, just use the inherited version
|
||||
// of this function (which uses either a rule set or a DecimalFormat
|
||||
// to format its substitution value)
|
||||
if (ruleToUse == NULL) {
|
||||
NFSubstitution::doSubstitution(number, toInsertInto, _pos);
|
||||
|
||||
// a >>> substitution goes straight to a particular rule to
|
||||
// format the substitution value
|
||||
} else {
|
||||
double numberToFormat = transformNumber(number);
|
||||
|
||||
ruleToUse->doFormat(numberToFormat, toInsertInto, _pos + getPos());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------
|
||||
// parsing
|
||||
//-----------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* If this is a >>> substitution, match only against ruleToUse.
|
||||
* Otherwise, use the superclass function.
|
||||
* @param text The string to parse
|
||||
* @param parsePosition Ignored on entry, updated on exit to point to
|
||||
* the first unmatched character.
|
||||
* @param baseValue The partial parse result prior to calling this
|
||||
* routine.
|
||||
*/
|
||||
UBool
|
||||
ModulusSubstitution::doParse(const UnicodeString& text,
|
||||
ParsePosition& parsePosition,
|
||||
double baseValue,
|
||||
double upperBound,
|
||||
UBool lenientParse,
|
||||
Formattable& result) const
|
||||
{
|
||||
// if this isn't a >>> substitution, we can just use the
|
||||
// inherited parse() routine to do the parsing
|
||||
if (ruleToUse == NULL) {
|
||||
return NFSubstitution::doParse(text, parsePosition, baseValue, upperBound, lenientParse, result);
|
||||
|
||||
// but if it IS a >>> substitution, we have to do it here: we
|
||||
// use the specific rule's doParse() method, and then we have to
|
||||
// do some of the other work of NFRuleSet.parse()
|
||||
} else {
|
||||
ruleToUse->doParse(text, parsePosition, FALSE, upperBound, result);
|
||||
|
||||
if (parsePosition.getIndex() != 0) {
|
||||
double tempResult = result.getDouble();
|
||||
tempResult = composeRuleValue(tempResult, baseValue);
|
||||
result.setDouble(tempResult);
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//===================================================================
|
||||
// IntegralPartSubstitution
|
||||
//===================================================================
|
||||
|
||||
char IntegralPartSubstitution::fgClassID;
|
||||
|
||||
UClassID
|
||||
IntegralPartSubstitution::getDynamicClassID() const {
|
||||
return getStaticClassID();
|
||||
}
|
||||
|
||||
|
||||
//===================================================================
|
||||
// FractionalPartSubstitution
|
||||
//===================================================================
|
||||
|
||||
|
||||
/**
|
||||
* Constructs a FractionalPartSubstitution. This object keeps a flag
|
||||
* telling whether it should format by digits or not. In addition,
|
||||
* it marks the rule set it calls (if any) as a fraction rule set.
|
||||
*/
|
||||
FractionalPartSubstitution::FractionalPartSubstitution(int32_t _pos,
|
||||
const NFRuleSet* _ruleSet,
|
||||
const RuleBasedNumberFormat* formatter,
|
||||
const UnicodeString& description,
|
||||
UErrorCode& status)
|
||||
: NFSubstitution(_pos, _ruleSet, formatter, description, status)
|
||||
, byDigits(FALSE)
|
||||
, useSpaces(TRUE)
|
||||
|
||||
{
|
||||
// akk, ruleSet can change in superclass constructor
|
||||
if (description == gGreaterGreaterThan ||
|
||||
description == gGreaterGreaterGreaterThan ||
|
||||
_ruleSet == getRuleSet()) {
|
||||
byDigits = TRUE;
|
||||
if (description == gGreaterGreaterGreaterThan) {
|
||||
useSpaces = FALSE;
|
||||
}
|
||||
} else {
|
||||
// cast away const
|
||||
((NFRuleSet*)getRuleSet())->makeIntoFractionRuleSet();
|
||||
}
|
||||
|
||||
// TODO: Thai doesn't use spaces, so spelling out decimals with
|
||||
// spaces between the words for each digit is incorrect.
|
||||
// The rules don't seem to accomodate this, at least I can't figure
|
||||
// out how to handle it using the rules. Need to provide better
|
||||
// control over fractional part formatting.
|
||||
// For now, just check if locale uses the Thai language.
|
||||
|
||||
// useSpaces = strcmp(formatter->locale.getLanguage(), "th") != 0;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------
|
||||
// formatting
|
||||
//-----------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* If in "by digits" mode, fills in the substitution one decimal digit
|
||||
* at a time using the rule set containing this substitution.
|
||||
* Otherwise, uses the superclass function.
|
||||
* @param number The number being formatted
|
||||
* @param toInsertInto The string to insert the result of formatting
|
||||
* the substitution into
|
||||
* @param pos The position of the owning rule's rule text in
|
||||
* toInsertInto
|
||||
*/
|
||||
void
|
||||
FractionalPartSubstitution::doSubstitution(double number, UnicodeString& toInsertInto, int32_t _pos) const
|
||||
{
|
||||
// if we're not in "byDigits" mode, just use the inherited
|
||||
// doSubstitution() routine
|
||||
if (!byDigits) {
|
||||
NFSubstitution::doSubstitution(number, toInsertInto, _pos);
|
||||
|
||||
// if we're in "byDigits" mode, transform the value into an integer
|
||||
// by moving the decimal point eight places to the right and
|
||||
// pulling digits off the right one at a time, formatting each digit
|
||||
// as an integer using this substitution's owning rule set
|
||||
// (this is slower, but more accurate, than doing it from the
|
||||
// other end)
|
||||
} else {
|
||||
int32_t numberToFormat = (int32_t)round(transformNumber(number) * pow(10, kMaxDecimalDigits));
|
||||
// this flag keeps us from formatting trailing zeros. It starts
|
||||
// out false because we're pulling from the right, and switches
|
||||
// to true the first time we encounter a non-zero digit
|
||||
UBool doZeros = FALSE;
|
||||
for (int32_t i = 0; i < kMaxDecimalDigits; i++) {
|
||||
int32_t digit = numberToFormat % 10;
|
||||
if (digit != 0 || doZeros) {
|
||||
if (doZeros && useSpaces) {
|
||||
toInsertInto.insert(_pos + getPos(), gSpace);
|
||||
}
|
||||
doZeros = TRUE;
|
||||
getRuleSet()->format(digit, toInsertInto, _pos + getPos());
|
||||
}
|
||||
numberToFormat /= 10;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------
|
||||
// parsing
|
||||
//-----------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* If in "by digits" mode, parses the string as if it were a string
|
||||
* of individual digits; otherwise, uses the superclass function.
|
||||
* @param text The string to parse
|
||||
* @param parsePosition Ignored on entry, but updated on exit to point
|
||||
* to the first unmatched character
|
||||
* @param baseValue The partial parse result prior to entering this
|
||||
* function
|
||||
* @param upperBound Only consider rules with base values lower than
|
||||
* this when filling in the substitution
|
||||
* @param lenientParse If true, try matching the text as numerals if
|
||||
* matching as words doesn't work
|
||||
* @return If the match was successful, the current partial parse
|
||||
* result; otherwise new Long(0). The result is either a Long or
|
||||
* a Double.
|
||||
*/
|
||||
UBool
|
||||
FractionalPartSubstitution::doParse(const UnicodeString& text,
|
||||
ParsePosition& parsePosition,
|
||||
double baseValue,
|
||||
double upperBound,
|
||||
UBool lenientParse,
|
||||
Formattable& resVal) const
|
||||
{
|
||||
// if we're not in byDigits mode, we can just use the inherited
|
||||
// doParse()
|
||||
if (!byDigits) {
|
||||
return NFSubstitution::doParse(text, parsePosition, baseValue, 0, lenientParse, resVal);
|
||||
|
||||
// if we ARE in byDigits mode, parse the text one digit at a time
|
||||
// using this substitution's owning rule set (we do this by setting
|
||||
// upperBound to 10 when calling doParse() ) until we reach
|
||||
// nonmatching text
|
||||
} else {
|
||||
UnicodeString workText(text);
|
||||
ParsePosition workPos(1);
|
||||
double result = 0;
|
||||
int32_t digit;
|
||||
double p10 = 0.1;
|
||||
|
||||
NumberFormat* fmt = NULL;
|
||||
while (workText.length() > 0 && workPos.getIndex() != 0) {
|
||||
workPos.setIndex(0);
|
||||
Formattable temp;
|
||||
getRuleSet()->parse(workText, workPos, 10, temp);
|
||||
digit = temp.getType() == Formattable::kLong ?
|
||||
temp.getLong() :
|
||||
(int32_t)temp.getDouble();
|
||||
|
||||
if (lenientParse && workPos.getIndex() == 0) {
|
||||
if (!fmt) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
fmt = NumberFormat::createInstance(status);
|
||||
if (U_FAILURE(status)) {
|
||||
delete fmt;
|
||||
fmt = NULL;
|
||||
}
|
||||
}
|
||||
if (fmt) {
|
||||
fmt->parse(workText, temp, workPos);
|
||||
digit = temp.getLong();
|
||||
}
|
||||
}
|
||||
|
||||
if (workPos.getIndex() != 0) {
|
||||
result += digit * p10;
|
||||
p10 /= 10;
|
||||
parsePosition.setIndex(parsePosition.getIndex() + workPos.getIndex());
|
||||
workText.removeBetween(0, workPos.getIndex());
|
||||
while (workText.length() > 0 && workText.charAt(0) == gSpace) {
|
||||
workText.removeBetween(0, 1);
|
||||
parsePosition.setIndex(parsePosition.getIndex() + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
delete fmt;
|
||||
|
||||
result = composeRuleValue(result, baseValue);
|
||||
resVal.setDouble(result);
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
UBool
|
||||
FractionalPartSubstitution::operator==(const NFSubstitution& rhs) const
|
||||
{
|
||||
return NFSubstitution::operator==(rhs) &&
|
||||
((const FractionalPartSubstitution*)&rhs)->byDigits == byDigits;
|
||||
}
|
||||
|
||||
char FractionalPartSubstitution::fgClassID;
|
||||
|
||||
UClassID
|
||||
FractionalPartSubstitution::getDynamicClassID() const {
|
||||
return getStaticClassID();
|
||||
}
|
||||
|
||||
|
||||
//===================================================================
|
||||
// AbsoluteValueSubstitution
|
||||
//===================================================================
|
||||
|
||||
char AbsoluteValueSubstitution::fgClassID;
|
||||
|
||||
UClassID
|
||||
AbsoluteValueSubstitution::getDynamicClassID() const {
|
||||
return getStaticClassID();
|
||||
}
|
||||
|
||||
//===================================================================
|
||||
// NumeratorSubstitution
|
||||
//===================================================================
|
||||
|
||||
UBool
|
||||
NumeratorSubstitution::operator==(const NFSubstitution& rhs) const
|
||||
{
|
||||
return NFSubstitution::operator==(rhs) &&
|
||||
denominator == ((const NumeratorSubstitution*)&rhs)->denominator;
|
||||
}
|
||||
|
||||
char NumeratorSubstitution::fgClassID;
|
||||
|
||||
UClassID
|
||||
NumeratorSubstitution::getDynamicClassID() const {
|
||||
return getStaticClassID();
|
||||
}
|
||||
|
||||
//===================================================================
|
||||
// NullSubstitution
|
||||
//===================================================================
|
||||
|
||||
char NullSubstitution::fgClassID;
|
||||
|
||||
UClassID
|
||||
NullSubstitution::getDynamicClassID() const {
|
||||
return getStaticClassID();
|
||||
}
|
||||
|
498
icu4c/source/i18n/nfsubs.h
Normal file
498
icu4c/source/i18n/nfsubs.h
Normal file
@ -0,0 +1,498 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1997-2001, International Business Machines Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef NFSUBS_H
|
||||
#define NFSUBS_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/decimfmt.h"
|
||||
#include "nfrs.h"
|
||||
#include "nfrule.h"
|
||||
#include "llong.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
static double MAX_DOUBLE = 1.7976931348623157e+308;
|
||||
static double java_fmod(double n, double d)
|
||||
{
|
||||
// c doesn't define '%' for floating point, but java does.
|
||||
// from the java language spec 15.17:
|
||||
// "In the remaining cases, where neither an infinity, nor a zero,
|
||||
// nor NaN is involved, the floating-point remainder r from the
|
||||
// division of a dividend n by a divisor d is defined by the
|
||||
// mathematical relation r = n - (d . q) where q is an integer
|
||||
// that is negative only if n/d is negative and positive only if
|
||||
// n/d is positive, and whose magnitude is as large as possible
|
||||
// without exceeding the magnitude of the true mathematical
|
||||
// quotient of n and d."
|
||||
//
|
||||
// I'm not sure if fmod (from what header?) has the same implemenation
|
||||
|
||||
double q = n/d;
|
||||
q = q < 0 ? -floor(-q) : floor(q);
|
||||
return n - d * q;
|
||||
}
|
||||
|
||||
static double round(double n)
|
||||
{
|
||||
return floor(n + .5);
|
||||
}
|
||||
|
||||
class NFSubstitution {
|
||||
int32_t pos;
|
||||
const NFRuleSet* ruleSet;
|
||||
const DecimalFormat* numberFormat;
|
||||
|
||||
protected:
|
||||
NFSubstitution(int32_t pos,
|
||||
const NFRuleSet* ruleSet,
|
||||
const RuleBasedNumberFormat* rbnf,
|
||||
const UnicodeString& description,
|
||||
UErrorCode& status);
|
||||
|
||||
const NFRuleSet* getRuleSet() const { return ruleSet; }
|
||||
const DecimalFormat* getNumberFormat() const { return numberFormat; }
|
||||
|
||||
public:
|
||||
static NFSubstitution* makeSubstitution(int32_t pos,
|
||||
const NFRule* rule,
|
||||
const NFRule* predecessor,
|
||||
const NFRuleSet* ruleSet,
|
||||
const RuleBasedNumberFormat* rbnf,
|
||||
const UnicodeString& description,
|
||||
UErrorCode& status);
|
||||
|
||||
virtual ~NFSubstitution();
|
||||
|
||||
virtual UBool operator==(const NFSubstitution& rhs) const;
|
||||
UBool operator!=(const NFSubstitution& rhs) const { return !operator==(rhs); }
|
||||
|
||||
/**
|
||||
* Sets the substitution's divisor. Used by NFRule.setBaseValue().
|
||||
* A no-op for all substitutions except multiplier and modulus
|
||||
* substitutions.
|
||||
* @param radix The radix of the divisor
|
||||
* @param exponent The exponent of the divisor
|
||||
*/
|
||||
virtual void setDivisor(int32_t radix, int32_t exponent);
|
||||
|
||||
/**
|
||||
* Replaces result with the string describing the substitution.
|
||||
*/
|
||||
virtual void toString(UnicodeString& result) const;
|
||||
|
||||
//-----------------------------------------------------------------------
|
||||
// formatting
|
||||
//-----------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Performs a mathematical operation on the number, formats it using
|
||||
* either ruleSet or decimalFormat, and inserts the result into
|
||||
* toInsertInto.
|
||||
* @param number The number being formatted.
|
||||
* @param toInsertInto The string we insert the result into
|
||||
* @param pos The position in toInsertInto where the owning rule's
|
||||
* rule text begins (this value is added to this substitution's
|
||||
* position to determine exactly where to insert the new text)
|
||||
*/
|
||||
virtual void doSubstitution(llong number, UnicodeString& toInsertInto, int32_t pos) const;
|
||||
virtual void doSubstitution(double number, UnicodeString& toInsertInto, int32_t pos) const;
|
||||
|
||||
protected:
|
||||
/**
|
||||
* Subclasses override this function to perform some kind of
|
||||
* mathematical operation on the number. The result of this operation
|
||||
* is formatted using the rule set or DecimalFormat that this
|
||||
* substitution refers to, and the result is inserted into the result
|
||||
* string.
|
||||
* @param The number being formatted
|
||||
* @return The result of performing the opreration on the number
|
||||
*/
|
||||
virtual llong transformNumber(llong number) const = 0;
|
||||
virtual double transformNumber(double number) const = 0;
|
||||
|
||||
public:
|
||||
//-----------------------------------------------------------------------
|
||||
// parsing
|
||||
//-----------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Parses a string using the rule set or DecimalFormat belonging
|
||||
* to this substitution. If there's a match, a mathematical
|
||||
* operation (the inverse of the one used in formatting) is
|
||||
* performed on the result of the parse and the value passed in
|
||||
* and returned as the result. The parse position is updated to
|
||||
* point to the first unmatched character in the string.
|
||||
* @param text The string to parse
|
||||
* @param parsePosition On entry, ignored, but assumed to be 0.
|
||||
* On exit, this is updated to point to the first unmatched
|
||||
* character (or 0 if the substitution didn't match)
|
||||
* @param baseValue A partial parse result that should be
|
||||
* combined with the result of this parse
|
||||
* @param upperBound When searching the rule set for a rule
|
||||
* matching the string passed in, only rules with base values
|
||||
* lower than this are considered
|
||||
* @param lenientParse If true and matching against rules fails,
|
||||
* the substitution will also try matching the text against
|
||||
* numerals using a default-costructed NumberFormat. If false,
|
||||
* no extra work is done. (This value is false whenever the
|
||||
* formatter isn't in lenient-parse mode, but is also false
|
||||
* under some conditions even when the formatter _is_ in
|
||||
* lenient-parse mode.)
|
||||
* @return If there's a match, this is the result of composing
|
||||
* baseValue with whatever was returned from matching the
|
||||
* characters. This will be either a Long or a Double. If there's
|
||||
* no match this is new Long(0) (not null), and parsePosition
|
||||
* is left unchanged.
|
||||
*/
|
||||
virtual UBool doParse(const UnicodeString& text,
|
||||
ParsePosition& parsePosition,
|
||||
double baseValue,
|
||||
double upperBound,
|
||||
UBool lenientParse,
|
||||
Formattable& result) const;
|
||||
|
||||
/**
|
||||
* Derives a new value from the two values passed in. The two values
|
||||
* are typically either the base values of two rules (the one containing
|
||||
* the substitution and the one matching the substitution) or partial
|
||||
* parse results derived in some other way. The operation is generally
|
||||
* the inverse of the operation performed by transformNumber().
|
||||
* @param newRuleValue The value produced by matching this substitution
|
||||
* @param oldRuleValue The value that was passed to the substitution
|
||||
* by the rule that owns it
|
||||
* @return A third value derived from the other two, representing a
|
||||
* partial parse result
|
||||
*/
|
||||
virtual double composeRuleValue(double newRuleValue, double oldRuleValue) const = 0;
|
||||
|
||||
/**
|
||||
* Calculates an upper bound when searching for a rule that matches
|
||||
* this substitution. Rules with base values greater than or equal
|
||||
* to upperBound are not considered.
|
||||
* @param oldUpperBound The current upper-bound setting. The new
|
||||
* upper bound can't be any higher.
|
||||
*/
|
||||
virtual double calcUpperBound(double oldUpperBound) const = 0;
|
||||
|
||||
//-----------------------------------------------------------------------
|
||||
// simple accessors
|
||||
//-----------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Returns the substitution's position in the rule that owns it.
|
||||
* @return The substitution's position in the rule that owns it.
|
||||
*/
|
||||
|
||||
int32_t getPos() const { return pos; }
|
||||
|
||||
/**
|
||||
* Returns the character used in the textual representation of
|
||||
* substitutions of this type. Used by toString().
|
||||
* @return This substitution's token character.
|
||||
*/
|
||||
virtual UChar tokenChar() const = 0;
|
||||
|
||||
/**
|
||||
* Returns true if this is a null substitution. (We didn't do this
|
||||
* with instanceof partially because it causes source files to
|
||||
* proliferate and partially because we have to port this to C++.)
|
||||
* @return true if this object is an instance of NullSubstitution
|
||||
*/
|
||||
virtual UBool isNullSubstitution() const;
|
||||
|
||||
/**
|
||||
* Returns true if this is a modulus substitution. (We didn't do this
|
||||
* with instanceof partially because it causes source files to
|
||||
* proliferate and partially because we have to port this to C++.)
|
||||
* @return true if this object is an instance of ModulusSubstitution
|
||||
*/
|
||||
virtual UBool isModulusSubstitution() const;
|
||||
|
||||
private:
|
||||
static char fgClassID;
|
||||
|
||||
public:
|
||||
static UClassID getStaticClassID(void) { return (UClassID)&fgClassID; }
|
||||
virtual UClassID getDynamicClassID(void) const;
|
||||
};
|
||||
|
||||
class SameValueSubstitution : public NFSubstitution {
|
||||
public:
|
||||
SameValueSubstitution(int32_t pos,
|
||||
const NFRuleSet* ruleset,
|
||||
const RuleBasedNumberFormat* formatter,
|
||||
const UnicodeString& description,
|
||||
UErrorCode& status);
|
||||
|
||||
llong transformNumber(llong number) const { return number; }
|
||||
double transformNumber(double number) const { return number; }
|
||||
double composeRuleValue(double newRuleValue, double oldRuleValue) const { return newRuleValue; }
|
||||
double calcUpperBound(double oldUpperBound) const { return oldUpperBound; }
|
||||
UChar tokenChar() const { return (UChar)0x003d; } // '='
|
||||
private:
|
||||
static char fgClassID;
|
||||
|
||||
public:
|
||||
static UClassID getStaticClassID(void) { return (UClassID)&fgClassID; }
|
||||
virtual UClassID getDynamicClassID(void) const;
|
||||
};
|
||||
|
||||
class MultiplierSubstitution : public NFSubstitution {
|
||||
double divisor;
|
||||
llong ldivisor;
|
||||
|
||||
public:
|
||||
MultiplierSubstitution(int32_t _pos,
|
||||
double _divisor,
|
||||
const NFRuleSet* _ruleSet,
|
||||
const RuleBasedNumberFormat* formatter,
|
||||
const UnicodeString& description,
|
||||
UErrorCode& status)
|
||||
: NFSubstitution(_pos, _ruleSet, formatter, description, status), divisor(_divisor)
|
||||
{
|
||||
ldivisor = _divisor;
|
||||
}
|
||||
|
||||
void setDivisor(int32_t radix, int32_t exponent) {
|
||||
divisor = pow(radix, exponent);
|
||||
ldivisor = divisor;
|
||||
}
|
||||
|
||||
UBool operator==(const NFSubstitution& rhs) const;
|
||||
|
||||
llong transformNumber(llong number) const {
|
||||
return number / ldivisor;
|
||||
}
|
||||
|
||||
double transformNumber(double number) const {
|
||||
return floor(number / divisor);
|
||||
}
|
||||
|
||||
double composeRuleValue(double newRuleValue, double oldRuleValue) const {
|
||||
return newRuleValue * divisor;
|
||||
}
|
||||
|
||||
double calcUpperBound(double oldUpperBound) const { return divisor; }
|
||||
|
||||
UChar tokenChar() const { return (UChar)0x003c; } // '<'
|
||||
private:
|
||||
static char fgClassID;
|
||||
|
||||
public:
|
||||
static UClassID getStaticClassID(void) { return (UClassID)&fgClassID; }
|
||||
virtual UClassID getDynamicClassID(void) const;
|
||||
};
|
||||
|
||||
class ModulusSubstitution : public NFSubstitution {
|
||||
double divisor;
|
||||
llong ldivisor;
|
||||
const NFRule* ruleToUse;
|
||||
public:
|
||||
ModulusSubstitution(int32_t pos,
|
||||
double _divisor,
|
||||
const NFRule* rulePredecessor,
|
||||
const NFRuleSet* ruleSet,
|
||||
const RuleBasedNumberFormat* formatter,
|
||||
const UnicodeString& description,
|
||||
UErrorCode& status);
|
||||
|
||||
void setDivisor(int32_t radix, int32_t exponent) {
|
||||
divisor = pow(radix, exponent);
|
||||
ldivisor = divisor;
|
||||
}
|
||||
|
||||
UBool operator==(const NFSubstitution& rhs) const;
|
||||
|
||||
void doSubstitution(llong number, UnicodeString& toInsertInto, int32_t pos) const;
|
||||
void doSubstitution(double number, UnicodeString& toInsertInto, int32_t pos) const;
|
||||
|
||||
llong transformNumber(llong number) const { return number % ldivisor; }
|
||||
double transformNumber(double number) const { return java_fmod(number, divisor); }
|
||||
|
||||
UBool doParse(const UnicodeString& text,
|
||||
ParsePosition& parsePosition,
|
||||
double baseValue,
|
||||
double upperBound,
|
||||
UBool lenientParse,
|
||||
Formattable& result) const;
|
||||
|
||||
double composeRuleValue(double newRuleValue, double oldRuleValue) const {
|
||||
return oldRuleValue - java_fmod(oldRuleValue, divisor) + newRuleValue;
|
||||
}
|
||||
|
||||
double calcUpperBound(double oldUpperBound) const { return divisor; }
|
||||
|
||||
UBool isModulusSubstitution() const { return TRUE; }
|
||||
|
||||
UChar tokenChar() const { return (UChar)0x003e; } // '>'
|
||||
private:
|
||||
static char fgClassID;
|
||||
|
||||
public:
|
||||
static UClassID getStaticClassID(void) { return (UClassID)&fgClassID; }
|
||||
virtual UClassID getDynamicClassID(void) const;
|
||||
};
|
||||
|
||||
class IntegralPartSubstitution : public NFSubstitution {
|
||||
public:
|
||||
IntegralPartSubstitution(int32_t _pos,
|
||||
const NFRuleSet* _ruleSet,
|
||||
const RuleBasedNumberFormat* formatter,
|
||||
const UnicodeString& description,
|
||||
UErrorCode& status)
|
||||
: NFSubstitution(_pos, _ruleSet, formatter, description, status) {}
|
||||
|
||||
llong transformNumber(llong number) const { return number; }
|
||||
double transformNumber(double number) const { return floor(number); }
|
||||
double composeRuleValue(double newRuleValue, double oldRuleValue) const { return newRuleValue + oldRuleValue; }
|
||||
double calcUpperBound(double oldUpperBound) const { return MAX_DOUBLE; }
|
||||
UChar tokenChar() const { return (UChar)0x003c; } // '<'
|
||||
private:
|
||||
static char fgClassID;
|
||||
|
||||
public:
|
||||
static UClassID getStaticClassID(void) { return (UClassID)&fgClassID; }
|
||||
virtual UClassID getDynamicClassID(void) const;
|
||||
};
|
||||
|
||||
class FractionalPartSubstitution : public NFSubstitution {
|
||||
UBool byDigits;
|
||||
UBool useSpaces;
|
||||
enum { kMaxDecimalDigits = 8 };
|
||||
public:
|
||||
FractionalPartSubstitution(int32_t pos,
|
||||
const NFRuleSet* ruleSet,
|
||||
const RuleBasedNumberFormat* formatter,
|
||||
const UnicodeString& description,
|
||||
UErrorCode& status);
|
||||
|
||||
UBool operator==(const NFSubstitution& rhs) const;
|
||||
|
||||
void doSubstitution(double number, UnicodeString& toInsertInto, int32_t pos) const;
|
||||
llong transformNumber(llong number) const { return llong::kZero; }
|
||||
double transformNumber(double number) const { return number - floor(number); }
|
||||
|
||||
UBool doParse(const UnicodeString& text,
|
||||
ParsePosition& parsePosition,
|
||||
double baseValue,
|
||||
double upperBound,
|
||||
UBool lenientParse,
|
||||
Formattable& result) const;
|
||||
|
||||
double composeRuleValue(double newRuleValue, double oldRuleValue) const { return newRuleValue + oldRuleValue; }
|
||||
double calcUpperBound(double oldUpperBound) const { return 0; }
|
||||
UChar tokenChar() const { return (UChar)0x003e; } // '>'
|
||||
private:
|
||||
static char fgClassID;
|
||||
|
||||
public:
|
||||
static UClassID getStaticClassID(void) { return (UClassID)&fgClassID; }
|
||||
virtual UClassID getDynamicClassID(void) const;
|
||||
};
|
||||
|
||||
class AbsoluteValueSubstitution : public NFSubstitution {
|
||||
public:
|
||||
AbsoluteValueSubstitution(int32_t _pos,
|
||||
const NFRuleSet* _ruleSet,
|
||||
const RuleBasedNumberFormat* formatter,
|
||||
const UnicodeString& description,
|
||||
UErrorCode& status)
|
||||
: NFSubstitution(_pos, _ruleSet, formatter, description, status) {}
|
||||
|
||||
llong transformNumber(llong number) const { return llong_abs(number); }
|
||||
double transformNumber(double number) const { return fabs(number); }
|
||||
double composeRuleValue(double newRuleValue, double oldRuleValue) const { return -newRuleValue; }
|
||||
double calcUpperBound(double oldUpperBound) const { return MAX_DOUBLE; }
|
||||
UChar tokenChar() const { return (UChar)0x003e; } // '>'
|
||||
private:
|
||||
static char fgClassID;
|
||||
|
||||
public:
|
||||
static UClassID getStaticClassID(void) { return (UClassID)&fgClassID; }
|
||||
virtual UClassID getDynamicClassID(void) const;
|
||||
};
|
||||
|
||||
class NumeratorSubstitution : public NFSubstitution {
|
||||
double denominator;
|
||||
llong ldenominator;
|
||||
public:
|
||||
NumeratorSubstitution(int32_t _pos,
|
||||
double _denominator,
|
||||
const NFRuleSet* _ruleSet,
|
||||
const RuleBasedNumberFormat* formatter,
|
||||
const UnicodeString& description,
|
||||
UErrorCode& status)
|
||||
: NFSubstitution(_pos, _ruleSet, formatter, description, status), denominator(_denominator)
|
||||
{
|
||||
ldenominator = _denominator;
|
||||
}
|
||||
|
||||
UBool operator==(const NFSubstitution& rhs) const;
|
||||
|
||||
llong transformNumber(llong number) const { return number * ldenominator; }
|
||||
double transformNumber(double number) const { return round(number * denominator); }
|
||||
|
||||
UBool doParse(const UnicodeString& text,
|
||||
ParsePosition& parsePosition,
|
||||
double baseValue,
|
||||
double upperBound,
|
||||
UBool lenientParse,
|
||||
Formattable& result) const
|
||||
{
|
||||
// we don't have to do anything special to do the parsing here,
|
||||
// but we have to turn lenient parsing off-- if we leave it on,
|
||||
// it SERIOUSLY messes up the algorithm
|
||||
return NFSubstitution::doParse(text, parsePosition, baseValue, upperBound, FALSE, result);
|
||||
}
|
||||
double composeRuleValue(double newRuleValue, double oldRuleValue) const { return newRuleValue / oldRuleValue; }
|
||||
double calcUpperBound(double oldUpperBound) const { return denominator; }
|
||||
UChar tokenChar() const { return (UChar)0x003c; } // '<'
|
||||
private:
|
||||
static char fgClassID;
|
||||
|
||||
public:
|
||||
static UClassID getStaticClassID(void) { return (UClassID)&fgClassID; }
|
||||
virtual UClassID getDynamicClassID(void) const;
|
||||
};
|
||||
|
||||
class NullSubstitution : public NFSubstitution {
|
||||
public:
|
||||
NullSubstitution(int32_t _pos,
|
||||
const NFRuleSet* _ruleSet,
|
||||
const RuleBasedNumberFormat* formatter,
|
||||
const UnicodeString& description,
|
||||
UErrorCode& status)
|
||||
: NFSubstitution(_pos, _ruleSet, formatter, description, status) {}
|
||||
|
||||
void toString(UnicodeString& result) const {}
|
||||
void doSubstitution(double number, UnicodeString& toInsertInto, int32_t _pos) const {}
|
||||
void doSubstitution(llong number, UnicodeString& toInsertInto, int32_t _pos) const {}
|
||||
llong transformNumber(llong number) const { return llong::kZero; }
|
||||
double transformNumber(double number) const { return 0; }
|
||||
UBool doParse(const UnicodeString& text,
|
||||
ParsePosition& parsePosition,
|
||||
double baseValue,
|
||||
double upperBound,
|
||||
UBool lenientParse,
|
||||
Formattable& result) const
|
||||
{ result.setDouble(baseValue); return TRUE; }
|
||||
double composeRuleValue(double newRuleValue, double oldRuleValue) const { return 0; } // never called
|
||||
double calcUpperBound(double oldUpperBound) const { return 0; } // never called
|
||||
UBool isNullSubstitution() const { return TRUE; }
|
||||
UChar tokenChar() const { return (UChar)0x0020; } // ' ' never called
|
||||
private:
|
||||
static char fgClassID;
|
||||
|
||||
public:
|
||||
static UClassID getStaticClassID(void) { return (UClassID)&fgClassID; }
|
||||
virtual UClassID getDynamicClassID(void) const;
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
// NFSUBS_H
|
||||
#endif
|
623
icu4c/source/i18n/rbnf.cpp
Normal file
623
icu4c/source/i18n/rbnf.cpp
Normal file
@ -0,0 +1,623 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1997-2001, International Business Machines Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#include "unicode/rbnf.h"
|
||||
|
||||
#include "nfrs.h"
|
||||
|
||||
#include "cmemory.h"
|
||||
#include "cstring.h"
|
||||
#include "unicode/normlzr.h"
|
||||
#include "unicode/tblcoll.h"
|
||||
#include "unicode/uchar.h"
|
||||
#include "unicode/ucol.h"
|
||||
#include "unicode/uloc.h"
|
||||
#include "unicode/unum.h"
|
||||
#include "unicode/ures.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "unicode/utf16.h"
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
static const UnicodeString gPercentPercent("%%");
|
||||
|
||||
#define kSomeNumberOfBitsDiv2 22
|
||||
#define kHalfMaxDouble (double)(1 << kSomeNumberOfBitsDiv2)
|
||||
#define kMaxDouble (kHalfMaxDouble * kHalfMaxDouble)
|
||||
|
||||
const char RuleBasedNumberFormat::fgClassID = 0;
|
||||
|
||||
RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, const Locale& alocale, UParseError& perror, UErrorCode& status)
|
||||
: ruleSets(NULL)
|
||||
, defaultRuleSet(NULL)
|
||||
, locale(alocale)
|
||||
, collator(NULL)
|
||||
, decimalFormatSymbols(NULL)
|
||||
, lenient(FALSE)
|
||||
, lenientParseRules(NULL)
|
||||
{
|
||||
init(description, perror, status);
|
||||
}
|
||||
|
||||
RuleBasedNumberFormat::RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale& alocale, UErrorCode& status)
|
||||
: ruleSets(NULL)
|
||||
, defaultRuleSet(NULL)
|
||||
, locale(alocale)
|
||||
, collator(NULL)
|
||||
, decimalFormatSymbols(NULL)
|
||||
, lenient(FALSE)
|
||||
, lenientParseRules(NULL)
|
||||
{
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
|
||||
const char* fmt_tag = "";
|
||||
switch (tag) {
|
||||
case URBNF_SPELLOUT: fmt_tag = "SpelloutRules"; break;
|
||||
case URBNF_ORDINAL: fmt_tag = "OrdinalRules"; break;
|
||||
case URBNF_DURATION: fmt_tag = "DurationRules"; break;
|
||||
default: status = U_ILLEGAL_ARGUMENT_ERROR; return;
|
||||
}
|
||||
|
||||
UResourceBundle* nfrb = ures_open(NULL, locale.getName(), &status);
|
||||
int32_t len = 0;
|
||||
const UChar* description = ures_getStringByKey(nfrb, fmt_tag, &len, &status);
|
||||
if (U_SUCCESS(status)) {
|
||||
UnicodeString desc(description, len);
|
||||
UParseError perror;
|
||||
init (desc, perror, status);
|
||||
}
|
||||
ures_close(nfrb);
|
||||
}
|
||||
|
||||
RuleBasedNumberFormat::RuleBasedNumberFormat(const RuleBasedNumberFormat& rhs)
|
||||
: ruleSets(NULL)
|
||||
, defaultRuleSet(NULL)
|
||||
, locale(rhs.locale)
|
||||
, collator(NULL)
|
||||
, decimalFormatSymbols(NULL)
|
||||
, lenient(FALSE)
|
||||
, lenientParseRules(NULL)
|
||||
{
|
||||
this->operator==(rhs);
|
||||
}
|
||||
|
||||
RuleBasedNumberFormat&
|
||||
RuleBasedNumberFormat::operator=(const RuleBasedNumberFormat& rhs)
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
dispose();
|
||||
locale = rhs.locale;
|
||||
UnicodeString rules = rhs.getRules();
|
||||
UParseError perror;
|
||||
init(rules, perror, status);
|
||||
lenient = rhs.lenient;
|
||||
return *this;
|
||||
}
|
||||
|
||||
RuleBasedNumberFormat::~RuleBasedNumberFormat()
|
||||
{
|
||||
dispose();
|
||||
}
|
||||
|
||||
Format*
|
||||
RuleBasedNumberFormat::clone(void) const
|
||||
{
|
||||
RuleBasedNumberFormat * result = NULL;
|
||||
UnicodeString rules = getRules();
|
||||
int32_t len = rules.length();
|
||||
UChar* rulestring = new UChar[len+1];
|
||||
if (rulestring) {
|
||||
rules.extract(0, len, rulestring);
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UParseError perror;
|
||||
result = new RuleBasedNumberFormat(rulestring, locale, perror, status);
|
||||
if (U_FAILURE(status)) {
|
||||
delete result;
|
||||
result = NULL;
|
||||
} else {
|
||||
result->lenient = lenient;
|
||||
}
|
||||
delete[] rulestring;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
UBool
|
||||
RuleBasedNumberFormat::operator==(const Format& other) const
|
||||
{
|
||||
if (this == &other) {
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
if (other.getDynamicClassID() == getStaticClassID()) {
|
||||
const RuleBasedNumberFormat& rhs = (const RuleBasedNumberFormat&)other;
|
||||
if (locale == rhs.locale &&
|
||||
lenient == rhs.lenient) {
|
||||
NFRuleSet** p = ruleSets;
|
||||
NFRuleSet** q = rhs.ruleSets;
|
||||
while (*p && *q && (**p == **q)) {
|
||||
++p;
|
||||
++q;
|
||||
}
|
||||
return *q == NULL && *p == NULL;
|
||||
}
|
||||
}
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
UnicodeString
|
||||
RuleBasedNumberFormat::getRules() const
|
||||
{
|
||||
UnicodeString result;
|
||||
for (NFRuleSet** p = ruleSets; *p; ++p) {
|
||||
(*p)->appendRules(result);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
UnicodeString
|
||||
RuleBasedNumberFormat::getRuleSetName(int32_t index) const
|
||||
{
|
||||
UnicodeString result;
|
||||
for (NFRuleSet** p = ruleSets; *p; ++p) {
|
||||
NFRuleSet* rs = *p;
|
||||
if (rs->isPublic()) {
|
||||
if (--index == -1) {
|
||||
rs->getName(result);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
}
|
||||
return *(UnicodeString*)NULL;
|
||||
}
|
||||
|
||||
int32_t
|
||||
RuleBasedNumberFormat::getNumberOfRuleSetNames() const
|
||||
{
|
||||
int32_t result = 0;
|
||||
for (NFRuleSet** p = ruleSets; *p; ++p) {
|
||||
if ((**p).isPublic()) {
|
||||
++result;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
NFRuleSet*
|
||||
RuleBasedNumberFormat::findRuleSet(const UnicodeString& name, UErrorCode& status) const
|
||||
{
|
||||
if (U_SUCCESS(status)) {
|
||||
for (NFRuleSet** p = ruleSets; *p; ++p) {
|
||||
NFRuleSet* rs = *p;
|
||||
if (rs->isNamed(name)) {
|
||||
return rs;
|
||||
}
|
||||
}
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
UnicodeString&
|
||||
RuleBasedNumberFormat::format(int32_t number,
|
||||
UnicodeString& toAppendTo,
|
||||
FieldPosition& pos) const
|
||||
{
|
||||
defaultRuleSet->format(llong(number), toAppendTo, toAppendTo.length());
|
||||
return toAppendTo;
|
||||
}
|
||||
|
||||
#if 0
|
||||
UnicodeString&
|
||||
RuleBasedNumberFormat::format(llong number,
|
||||
UnicodeString& toAppendTo,
|
||||
FieldPosition& pos) const
|
||||
{
|
||||
defaultRuleSet->format(number, toAppendTo, toAppendTo.length());
|
||||
return toAppendTo;
|
||||
}
|
||||
#endif
|
||||
|
||||
UnicodeString&
|
||||
RuleBasedNumberFormat::format(double number,
|
||||
UnicodeString& toAppendTo,
|
||||
FieldPosition& pos) const
|
||||
{
|
||||
defaultRuleSet->format(number, toAppendTo, toAppendTo.length());
|
||||
return toAppendTo;
|
||||
}
|
||||
|
||||
|
||||
UnicodeString&
|
||||
RuleBasedNumberFormat::format(int32_t number,
|
||||
const UnicodeString& ruleSetName,
|
||||
UnicodeString& toAppendTo,
|
||||
FieldPosition& pos,
|
||||
UErrorCode& status) const
|
||||
{
|
||||
// return format(llong(number), ruleSetName, toAppendTo, pos, status);
|
||||
if (U_SUCCESS(status)) {
|
||||
if (ruleSetName.indexOf(gPercentPercent) == 0) {
|
||||
// throw new IllegalArgumentException("Can't use internal rule set");
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
} else {
|
||||
NFRuleSet *rs = findRuleSet(ruleSetName, status);
|
||||
if (rs) {
|
||||
rs->format(llong(number), toAppendTo, toAppendTo.length());
|
||||
}
|
||||
}
|
||||
}
|
||||
return toAppendTo;
|
||||
|
||||
}
|
||||
|
||||
#if 0
|
||||
UnicodeString&
|
||||
RuleBasedNumberFormat::format(llong number,
|
||||
const UnicodeString& ruleSetName,
|
||||
UnicodeString& toAppendTo,
|
||||
FieldPosition& pos,
|
||||
UErrorCode& status) const
|
||||
{
|
||||
if (U_SUCCESS(status)) {
|
||||
if (ruleSetName.indexOf(gPercentPercent) == 0) {
|
||||
// throw new IllegalArgumentException("Can't use internal rule set");
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
} else {
|
||||
NFRuleSet *rs = findRuleSet(ruleSetName, status);
|
||||
if (rs) {
|
||||
rs->format(number, toAppendTo, toAppendTo.length());
|
||||
}
|
||||
}
|
||||
}
|
||||
return toAppendTo;
|
||||
}
|
||||
#endif
|
||||
|
||||
// make linker happy
|
||||
UnicodeString&
|
||||
RuleBasedNumberFormat::format(const Formattable& obj,
|
||||
UnicodeString& toAppendTo,
|
||||
FieldPosition& pos,
|
||||
UErrorCode& status) const
|
||||
{
|
||||
return NumberFormat::format(obj, toAppendTo, pos, status);
|
||||
}
|
||||
|
||||
UnicodeString&
|
||||
RuleBasedNumberFormat::format(double number,
|
||||
const UnicodeString& ruleSetName,
|
||||
UnicodeString& toAppendTo,
|
||||
FieldPosition& pos,
|
||||
UErrorCode& status) const
|
||||
{
|
||||
if (U_SUCCESS(status)) {
|
||||
if (ruleSetName.indexOf(gPercentPercent) == 0) {
|
||||
// throw new IllegalArgumentException("Can't use internal rule set");
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
} else {
|
||||
NFRuleSet *rs = findRuleSet(ruleSetName, status);
|
||||
if (rs) {
|
||||
rs->format(number, toAppendTo, toAppendTo.length());
|
||||
}
|
||||
}
|
||||
}
|
||||
return toAppendTo;
|
||||
}
|
||||
|
||||
void
|
||||
RuleBasedNumberFormat::parse(const UnicodeString& text,
|
||||
Formattable& result,
|
||||
ParsePosition& parsePosition) const
|
||||
{
|
||||
ParsePosition high_pp;
|
||||
Formattable high_result;
|
||||
|
||||
for (NFRuleSet** p = ruleSets; *p; ++p) {
|
||||
NFRuleSet *rp = *p;
|
||||
if (rp->isPublic()) {
|
||||
ParsePosition working_pp = parsePosition;
|
||||
Formattable working_result;
|
||||
|
||||
rp->parse(text, working_pp, kMaxDouble, working_result);
|
||||
if (working_pp.getIndex() > high_pp.getIndex()) {
|
||||
high_pp = working_pp;
|
||||
high_result = working_result;
|
||||
|
||||
if (high_pp.getIndex() == text.length()) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
parsePosition = high_pp;
|
||||
result = high_result;
|
||||
if (result.getType() == Formattable::kDouble) {
|
||||
int32_t r = (int32_t)result.getDouble();
|
||||
if ((double)r == result.getDouble()) {
|
||||
result.setLong(r);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
RuleBasedNumberFormat::setLenient(UBool enabled)
|
||||
{
|
||||
lenient = enabled;
|
||||
if (!enabled && collator) {
|
||||
delete collator;
|
||||
collator = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
// All urbnf objects are created through openRules, so we init all of the
|
||||
// Unicode string constants required by rbnf, nfrs, or nfr here.
|
||||
static const UnicodeString gLenientParse("%%lenient-parse:");
|
||||
static const UChar gSemiColon = 0x003B;
|
||||
static const UnicodeString gSemiPercent(";%");
|
||||
|
||||
void
|
||||
RuleBasedNumberFormat::init(const UnicodeString& rules, UParseError& perror, UErrorCode& status)
|
||||
{
|
||||
// TODO: implement perror
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
|
||||
UnicodeString description(rules);
|
||||
if (!description.length()) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
|
||||
// start by stripping the trailing whitespace from all the rules
|
||||
// (this is all the whitespace follwing each semicolon in the
|
||||
// description). This allows us to look for rule-set boundaries
|
||||
// by searching for ";%" without having to worry about whitespace
|
||||
// between the ; and the %
|
||||
stripWhitespace(description);
|
||||
|
||||
// check to see if there's a set of lenient-parse rules. If there
|
||||
// is, pull them out into our temporary holding place for them,
|
||||
// and delete them from the description before the real desciption-
|
||||
// parsing code sees them
|
||||
UTextOffset lp = description.indexOf(gLenientParse);
|
||||
if (lp != -1) {
|
||||
// we've got to make sure we're not in the middle of a rule
|
||||
// (where "%%lenient-parse" would actually get treated as
|
||||
// rule text)
|
||||
if (lp == 0 || description.charAt(lp - 1) == gSemiColon) {
|
||||
// locate the beginning and end of the actual collation
|
||||
// rules (there may be whitespace between the name and
|
||||
// the first token in the description)
|
||||
int lpEnd = description.indexOf(gSemiPercent, lp);
|
||||
|
||||
if (lpEnd == -1) {
|
||||
lpEnd = description.length() - 1;
|
||||
}
|
||||
int lpStart = lp + gLenientParse.length();
|
||||
while (u_isWhitespace(description.charAt(lpStart))) {
|
||||
++lpStart;
|
||||
}
|
||||
|
||||
// copy out the lenient-parse rules and delete them
|
||||
// from the description
|
||||
lenientParseRules = new UnicodeString();
|
||||
lenientParseRules->setTo(description, lpStart, lpEnd - lpStart);
|
||||
|
||||
description.remove(lp, lpEnd + 1 - lp);
|
||||
}
|
||||
}
|
||||
|
||||
// pre-flight parsing the description and count the number of
|
||||
// rule sets (";%" marks the end of one rule set and the beginning
|
||||
// of the next)
|
||||
int numRuleSets = 0;
|
||||
for (UTextOffset p = description.indexOf(gSemiPercent); p != -1; p = description.indexOf(gSemiPercent, p)) {
|
||||
++numRuleSets;
|
||||
++p;
|
||||
}
|
||||
++numRuleSets;
|
||||
|
||||
// our rule list is an array of the appropriate size
|
||||
ruleSets = new NFRuleSet*[numRuleSets + 1];
|
||||
for (int i = 0; i <= numRuleSets; ++i) {
|
||||
ruleSets[i] = NULL;
|
||||
}
|
||||
|
||||
// divide up the descriptions into individual rule-set descriptions
|
||||
// and store them in a temporary array. At each step, we also
|
||||
// new up a rule set, but all this does is initialize its name
|
||||
// and remove it from its description. We can't actually parse
|
||||
// the rest of the descriptions and finish initializing everything
|
||||
// because we have to know the names and locations of all the rule
|
||||
// sets before we can actually set everything up
|
||||
UnicodeString* ruleSetDescriptions = new UnicodeString[numRuleSets];
|
||||
|
||||
{
|
||||
int curRuleSet = 0;
|
||||
UTextOffset start = 0;
|
||||
for (UTextOffset p = description.indexOf(gSemiPercent); p != -1; p = description.indexOf(gSemiPercent, start)) {
|
||||
ruleSetDescriptions[curRuleSet].setTo(description, start, p + 1 - start);
|
||||
ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions, curRuleSet, status);
|
||||
++curRuleSet;
|
||||
start = p + 1;
|
||||
}
|
||||
ruleSetDescriptions[curRuleSet].setTo(description, start, description.length() - start);
|
||||
ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions, curRuleSet, status);
|
||||
}
|
||||
|
||||
// now we can take note of the formatter's default rule set, which
|
||||
// is the last public rule set in the description (it's the last
|
||||
// rather than the first so that a user can create a new formatter
|
||||
// from an existing formatter and change its default behavior just
|
||||
// by appending more rule sets to the end)
|
||||
// setDefaultRuleSet
|
||||
{
|
||||
defaultRuleSet = ruleSets[numRuleSets - 1];
|
||||
if (!defaultRuleSet->isPublic()) {
|
||||
for (int i = numRuleSets - 2; i >= 0; --i) {
|
||||
if (ruleSets[i]->isPublic()) {
|
||||
defaultRuleSet = ruleSets[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// finally, we can go back through the temporary descriptions
|
||||
// list and finish seting up the substructure (and we throw
|
||||
// away the temporary descriptions as we go)
|
||||
{
|
||||
for (int i = 0; i < numRuleSets; i++) {
|
||||
ruleSets[i]->parseRules(ruleSetDescriptions[i], this, status);
|
||||
}
|
||||
}
|
||||
|
||||
delete[] ruleSetDescriptions;
|
||||
}
|
||||
|
||||
void
|
||||
RuleBasedNumberFormat::stripWhitespace(UnicodeString& description)
|
||||
{
|
||||
// iterate through the characters...
|
||||
UnicodeString result;
|
||||
|
||||
int start = 0;
|
||||
while (start != -1 && start < description.length()) {
|
||||
// seek to the first non-whitespace character...
|
||||
while (start < description.length()
|
||||
&& u_isWhitespace(description.charAt(start))) {
|
||||
++start;
|
||||
}
|
||||
|
||||
// locate the next semicolon in the text and copy the text from
|
||||
// our current position up to that semicolon into the result
|
||||
UTextOffset p = description.indexOf(gSemiColon, start);
|
||||
if (p == -1) {
|
||||
// or if we don't find a semicolon, just copy the rest of
|
||||
// the string into the result
|
||||
result.append(description, start, description.length() - start);
|
||||
start = -1;
|
||||
}
|
||||
else if (p < description.length()) {
|
||||
result.append(description, start, p + 1 - start);
|
||||
start = p + 1;
|
||||
}
|
||||
|
||||
// when we get here, we've seeked off the end of the sring, and
|
||||
// we terminate the loop (we continue until *start* is -1 rather
|
||||
// than until *p* is -1, because otherwise we'd miss the last
|
||||
// rule in the description)
|
||||
else {
|
||||
start = -1;
|
||||
}
|
||||
}
|
||||
|
||||
description.setTo(result);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
RuleBasedNumberFormat::dispose()
|
||||
{
|
||||
if (ruleSets) {
|
||||
for (NFRuleSet** p = ruleSets; *p; ++p) {
|
||||
delete *p;
|
||||
}
|
||||
delete[] ruleSets;
|
||||
ruleSets = NULL;
|
||||
}
|
||||
|
||||
delete collator;
|
||||
|
||||
delete decimalFormatSymbols;
|
||||
|
||||
delete lenientParseRules;
|
||||
}
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------
|
||||
// package-internal API
|
||||
//-----------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Returns the collator to use for lenient parsing. The collator is lazily created:
|
||||
* this function creates it the first time it's called.
|
||||
* @return The collator to use for lenient parsing, or null if lenient parsing
|
||||
* is turned off.
|
||||
*/
|
||||
Collator*
|
||||
RuleBasedNumberFormat::getCollator() const
|
||||
{
|
||||
// lazy-evaulate the collator
|
||||
if (collator == NULL && lenient) {
|
||||
// create a default collator based on the formatter's locale,
|
||||
// then pull out that collator's rules, append any additional
|
||||
// rules specified in the description, and create a _new_
|
||||
// collator based on the combinaiton of those rules
|
||||
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
|
||||
Collator* temp = Collator::createInstance(locale, status);
|
||||
if (U_SUCCESS(status) &&
|
||||
temp->getDynamicClassID() == RuleBasedCollator::getStaticClassID()) {
|
||||
|
||||
RuleBasedCollator* newCollator = (RuleBasedCollator*)temp;
|
||||
if (lenientParseRules) {
|
||||
UnicodeString rules(newCollator->getRules());
|
||||
rules.append(*lenientParseRules);
|
||||
|
||||
newCollator = new RuleBasedCollator(rules, status);
|
||||
} else {
|
||||
temp = NULL;
|
||||
}
|
||||
if (U_SUCCESS(status)) {
|
||||
newCollator->setDecomposition(Normalizer::DECOMP);
|
||||
// cast away const
|
||||
((RuleBasedNumberFormat*)this)->collator = newCollator;
|
||||
} else {
|
||||
delete newCollator;
|
||||
}
|
||||
}
|
||||
delete temp;
|
||||
}
|
||||
|
||||
// if lenient-parse mode is off, this will be null
|
||||
// (see setLenientParseMode())
|
||||
return collator;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the DecimalFormatSymbols object that should be used by all DecimalFormat
|
||||
* instances owned by this formatter. This object is lazily created: this function
|
||||
* creates it the first time it's called.
|
||||
* @return The DecimalFormatSymbols object that should be used by all DecimalFormat
|
||||
* instances owned by this formatter.
|
||||
*/
|
||||
DecimalFormatSymbols*
|
||||
RuleBasedNumberFormat::getDecimalFormatSymbols() const
|
||||
{
|
||||
// lazy-evaluate the DecimalFormatSymbols object. This object
|
||||
// is shared by all DecimalFormat instances belonging to this
|
||||
// formatter
|
||||
if (decimalFormatSymbols == NULL) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
DecimalFormatSymbols* temp = new DecimalFormatSymbols(locale, status);
|
||||
if (U_SUCCESS(status)) {
|
||||
((RuleBasedNumberFormat*)this)->decimalFormatSymbols = temp;
|
||||
} else {
|
||||
delete temp;
|
||||
}
|
||||
}
|
||||
return decimalFormatSymbols;
|
||||
}
|
||||
|
828
icu4c/source/i18n/unicode/rbnf.h
Normal file
828
icu4c/source/i18n/unicode/rbnf.h
Normal file
@ -0,0 +1,828 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1997-2001, International Business Machines Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef RBNF_H
|
||||
#define RBNF_H
|
||||
|
||||
#include "unicode/coll.h"
|
||||
#include "unicode/dcfmtsym.h"
|
||||
#include "unicode/fmtable.h"
|
||||
#include "unicode/locid.h"
|
||||
#include "unicode/numfmt.h"
|
||||
#include "unicode/unistr.h"
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class NFRuleSet;
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: RuleBasedNumberFormat
|
||||
*
|
||||
* <h2> Rule Based Number Format C++ API </h2>
|
||||
*
|
||||
* <p>A class that formats numbers according to a set of rules. This number formatter is
|
||||
* typically used for spelling out numeric values in words (e.g., 25,3476 as
|
||||
* "twenty-five thousand three hundred seventy-six" or "vingt-cinq mille trois
|
||||
* cents soixante-seize" or
|
||||
* "fünfundzwanzigtausenddreihundertsechsundsiebzig"), but can also be used for
|
||||
* other complicated formatting tasks, such as formatting a number of seconds as hours,
|
||||
* minutes and seconds (e.g., 3,730 as "1:02:10").</p>
|
||||
*
|
||||
* <p>The resources contain three predefined formatters for each locale: spellout, which
|
||||
* spells out a value in words (123 is "one hundred twenty-three"); ordinal, which
|
||||
* appends an ordinal suffix to the end of a numeral (123 is "123rd"); and
|
||||
* duration, which shows a duration in seconds as hours, minutes, and seconds (123 is
|
||||
* "2:03"). The client can also define more specialized <tt>RuleBasedNumberFormat</tt>s
|
||||
* by supplying programmer-defined rule sets.</p>
|
||||
*
|
||||
* <p>The behavior of a <tt>RuleBasedNumberFormat</tt> is specified by a textual description
|
||||
* that is either passed to the constructor as a <tt>String</tt> or loaded from a resource
|
||||
* bundle. In its simplest form, the description consists of a semicolon-delimited list of <em>rules.</em>
|
||||
* Each rule has a string of output text and a value or range of values it is applicable to.
|
||||
* In a typical spellout rule set, the first twenty rules are the words for the numbers from
|
||||
* 0 to 19:</p>
|
||||
*
|
||||
* <pre>zero; one; two; three; four; five; six; seven; eight; nine;
|
||||
* ten; eleven; twelve; thirteen; fourteen; fifteen; sixteen; seventeen; eighteen; nineteen;</pre>
|
||||
*
|
||||
* <p>For larger numbers, we can use the preceding set of rules to format the ones place, and
|
||||
* we only have to supply the words for the multiples of 10:</p>
|
||||
*
|
||||
* <pre> 20: twenty[->>];
|
||||
* 30: thirty[->>];
|
||||
* 40: forty[->>];
|
||||
* 50: fifty[->>];
|
||||
* 60: sixty[->>];
|
||||
* 70: seventy[->>];
|
||||
* 80: eighty[->>];
|
||||
* 90: ninety[->>];</pre>
|
||||
*
|
||||
* <p>In these rules, the <em>base value</em> is spelled out explicitly and set off from the
|
||||
* rule's output text with a colon. The rules are in a sorted list, and a rule is applicable
|
||||
* to all numbers from its own base value to one less than the next rule's base value. The
|
||||
* ">>" token is called a <em>substitution</em> and tells the fomatter to
|
||||
* isolate the number's ones digit, format it using this same set of rules, and place the
|
||||
* result at the position of the ">>" token. Text in brackets is omitted if
|
||||
* the number being formatted is an even multiple of 10 (the hyphen is a literal hyphen; 24
|
||||
* is "twenty-four," not "twenty four").</p>
|
||||
*
|
||||
* <p>For even larger numbers, we can actually look up several parts of the number in the
|
||||
* list:</p>
|
||||
*
|
||||
* <pre>100: << hundred[ >>];</pre>
|
||||
*
|
||||
* <p>The "<<" represents a new kind of substitution. The << isolates
|
||||
* the hundreds digit (and any digits to its left), formats it using this same rule set, and
|
||||
* places the result where the "<<" was. Notice also that the meaning of
|
||||
* >> has changed: it now refers to both the tens and the ones digits. The meaning of
|
||||
* both substitutions depends on the rule's base value. The base value determines the rule's <em>divisor,</em>
|
||||
* which is the highest power of 10 that is less than or equal to the base value (the user
|
||||
* can change this). To fill in the substitutions, the formatter divides the number being
|
||||
* formatted by the divisor. The integral quotient is used to fill in the <<
|
||||
* substitution, and the remainder is used to fill in the >> substitution. The meaning
|
||||
* of the brackets changes similarly: text in brackets is omitted if the value being
|
||||
* formatted is an even multiple of the rule's divisor. The rules are applied recursively, so
|
||||
* if a substitution is filled in with text that includes another substitution, that
|
||||
* substitution is also filled in.</p>
|
||||
*
|
||||
* <p>This rule covers values up to 999, at which point we add another rule:</p>
|
||||
*
|
||||
* <pre>1000: << thousand[ >>];</pre>
|
||||
*
|
||||
* <p>Again, the meanings of the brackets and substitution tokens shift because the rule's
|
||||
* base value is a higher power of 10, changing the rule's divisor. This rule can actually be
|
||||
* used all the way up to 999,999. This allows us to finish out the rules as follows:</p>
|
||||
*
|
||||
* <pre> 1,000,000: << million[ >>];
|
||||
* 1,000,000,000: << billion[ >>];
|
||||
* 1,000,000,000,000: << trillion[ >>];
|
||||
* 1,000,000,000,000,000: OUT OF RANGE!;</pre>
|
||||
*
|
||||
* <p>Commas, periods, and spaces can be used in the base values to improve legibility and
|
||||
* are ignored by the rule parser. The last rule in the list is customarily treated as an
|
||||
* "overflow rule," applying to everything from its base value on up, and often (as
|
||||
* in this example) being used to print out an error message or default representation.
|
||||
* Notice also that the size of the major groupings in large numbers is controlled by the
|
||||
* spacing of the rules: because in English we group numbers by thousand, the higher rules
|
||||
* are separated from each other by a factor of 1,000.</p>
|
||||
*
|
||||
* <p>To see how these rules actually work in practice, consider the following example:
|
||||
* Formatting 25,430 with this rule set would work like this:</p>
|
||||
*
|
||||
* <table border="0" width="630">
|
||||
* <tr>
|
||||
* <td width="21"></td>
|
||||
* <td width="257" valign="top"><strong><< thousand >></strong></td>
|
||||
* <td width="340" valign="top">[the rule whose base value is 1,000 is applicable to 25,340]</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td width="21"></td>
|
||||
* <td width="257" valign="top"><strong>twenty->></strong> thousand >></td>
|
||||
* <td width="340" valign="top">[25,340 over 1,000 is 25. The rule for 20 applies.]</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td width="21"></td>
|
||||
* <td width="257" valign="top">twenty-<strong>five</strong> thousand >></td>
|
||||
* <td width="340" valign="top">[25 mod 10 is 5. The rule for 5 is "five."</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td width="21"></td>
|
||||
* <td width="257" valign="top">twenty-five thousand <strong><< hundred >></strong></td>
|
||||
* <td width="340" valign="top">[25,340 mod 1,000 is 340. The rule for 100 applies.]</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td width="21"></td>
|
||||
* <td width="257" valign="top">twenty-five thousand <strong>three</strong> hundred >></td>
|
||||
* <td width="340" valign="top">[340 over 100 is 3. The rule for 3 is "three."]</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td width="21"></td>
|
||||
* <td width="257" valign="top">twenty-five thousand three hundred <strong>forty</strong></td>
|
||||
* <td width="340" valign="top">[340 mod 100 is 40. The rule for 40 applies. Since 40 divides
|
||||
* evenly by 10, the hyphen and substitution in the brackets are omitted.]</td>
|
||||
* </tr>
|
||||
* </table>
|
||||
*
|
||||
* <p>The above syntax suffices only to format positive integers. To format negative numbers,
|
||||
* we add a special rule:</p>
|
||||
*
|
||||
* <pre>-x: minus >>;</pre>
|
||||
*
|
||||
* <p>This is called a <em>negative-number rule,</em> and is identified by "-x"
|
||||
* where the base value would be. This rule is used to format all negative numbers. the
|
||||
* >> token here means "find the number's absolute value, format it with these
|
||||
* rules, and put the result here."</p>
|
||||
*
|
||||
* <p>We also add a special rule called a <em>fraction rule </em>for numbers with fractional
|
||||
* parts:</p>
|
||||
*
|
||||
* <pre>x.x: << point >>;</pre>
|
||||
*
|
||||
* <p>This rule is used for all positive non-integers (negative non-integers pass through the
|
||||
* negative-number rule first and then through this rule). Here, the << token refers to
|
||||
* the number's integral part, and the >> to the number's fractional part. The
|
||||
* fractional part is formatted as a series of single-digit numbers (e.g., 123.456 would be
|
||||
* formatted as "one hundred twenty-three point four five six").</p>
|
||||
*
|
||||
* <p>To see how this rule syntax is applied to various languages, examine the resource data.</p>
|
||||
*
|
||||
* <p>There is actually much more flexibility built into the rule language than the
|
||||
* description above shows. A formatter may own multiple rule sets, which can be selected by
|
||||
* the caller, and which can use each other to fill in their substitutions. Substitutions can
|
||||
* also be filled in with digits, using a DecimalFormat object. There is syntax that can be
|
||||
* used to alter a rule's divisor in various ways. And there is provision for much more
|
||||
* flexible fraction handling. A complete description of the rule syntax follows:</p>
|
||||
*
|
||||
* <hr>
|
||||
*
|
||||
* <p>The description of a <tt>RuleBasedNumberFormat</tt>'s behavior consists of one or more <em>rule
|
||||
* sets.</em> Each rule set consists of a name, a colon, and a list of <em>rules.</em> A rule
|
||||
* set name must begin with a % sign. Rule sets with names that begin with a single % sign
|
||||
* are <em>public:</em> the caller can specify that they be used to format and parse numbers.
|
||||
* Rule sets with names that begin with %% are <em>private:</em> they exist only for the use
|
||||
* of other rule sets. If a formatter only has one rule set, the name may be omitted.</p>
|
||||
*
|
||||
* <p>The user can also specify a special "rule set" named <tt>%%lenient-parse</tt>.
|
||||
* The body of <tt>%%lenient-parse</tt> isn't a set of number-formatting rules, but a <tt>RuleBasedCollator</tt>
|
||||
* description which is used to define equivalences for lenient parsing. For more information
|
||||
* on the syntax, see <tt>RuleBasedCollator</tt>. For more information on lenient parsing,
|
||||
* see <tt>setLenientParse()</tt>.</p>
|
||||
*
|
||||
* <p>The body of a rule set consists of an ordered, semicolon-delimited list of <em>rules.</em>
|
||||
* Internally, every rule has a base value, a divisor, rule text, and zero, one, or two <em>substitutions.</em>
|
||||
* These parameters are controlled by the description syntax, which consists of a <em>rule
|
||||
* descriptor,</em> a colon, and a <em>rule body.</em></p>
|
||||
*
|
||||
* <p>A rule descriptor can take one of the following forms (text in <em>italics</em> is the
|
||||
* name of a token):</p>
|
||||
*
|
||||
* <table border="0" width="100%">
|
||||
* <tr>
|
||||
* <td width="5%" valign="top"></td>
|
||||
* <td width="8%" valign="top"><em>bv</em>:</td>
|
||||
* <td valign="top"><em>bv</em> specifies the rule's base value. <em>bv</em> is a decimal
|
||||
* number expressed using ASCII digits. <em>bv</em> may contain spaces, period, and commas,
|
||||
* which are ignored. The rule's divisor is the highest power of 10 less than or equal to
|
||||
* the base value.</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td width="5%" valign="top"></td>
|
||||
* <td width="8%" valign="top"><em>bv</em>/<em>rad</em>:</td>
|
||||
* <td valign="top"><em>bv</em> specifies the rule's base value. The rule's divisor is the
|
||||
* highest power of <em>rad</em> less than or equal to the base value.</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td width="5%" valign="top"></td>
|
||||
* <td width="8%" valign="top"><em>bv</em>>:</td>
|
||||
* <td valign="top"><em>bv</em> specifies the rule's base value. To calculate the divisor,
|
||||
* let the radix be 10, and the exponent be the highest exponent of the radix that yields a
|
||||
* result less than or equal to the base value. Every > character after the base value
|
||||
* decreases the exponent by 1. If the exponent is positive or 0, the divisor is the radix
|
||||
* raised to the power of the exponent; otherwise, the divisor is 1.</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td width="5%" valign="top"></td>
|
||||
* <td width="8%" valign="top"><em>bv</em>/<em>rad</em>>:</td>
|
||||
* <td valign="top"><em>bv</em> specifies the rule's base value. To calculate the divisor,
|
||||
* let the radix be <em>rad</em>, and the exponent be the highest exponent of the radix that
|
||||
* yields a result less than or equal to the base value. Every > character after the radix
|
||||
* decreases the exponent by 1. If the exponent is positive or 0, the divisor is the radix
|
||||
* raised to the power of the exponent; otherwise, the divisor is 1.</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td width="5%" valign="top"></td>
|
||||
* <td width="8%" valign="top">-x:</td>
|
||||
* <td valign="top">The rule is a negative-number rule.</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td width="5%" valign="top"></td>
|
||||
* <td width="8%" valign="top">x.x:</td>
|
||||
* <td valign="top">The rule is an <em>improper fraction rule.</em></td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td width="5%" valign="top"></td>
|
||||
* <td width="8%" valign="top">0.x:</td>
|
||||
* <td valign="top">The rule is a <em>proper fraction rule.</em></td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td width="5%" valign="top"></td>
|
||||
* <td width="8%" valign="top">x.0:</td>
|
||||
* <td valign="top">The rule is a <em>master rule.</em></td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td width="5%" valign="top"></td>
|
||||
* <td width="8%" valign="top"><em>nothing</em></td>
|
||||
* <td valign="top">If the rule's rule descriptor is left out, the base value is one plus the
|
||||
* preceding rule's base value (or zero if this is the first rule in the list) in a normal
|
||||
* rule set. In a fraction rule set, the base value is the same as the preceding rule's
|
||||
* base value.</td>
|
||||
* </tr>
|
||||
* </table>
|
||||
*
|
||||
* <p>A rule set may be either a regular rule set or a <em>fraction rule set,</em> depending
|
||||
* on whether it is used to format a number's integral part (or the whole number) or a
|
||||
* number's fractional part. Using a rule set to format a rule's fractional part makes it a
|
||||
* fraction rule set.</p>
|
||||
*
|
||||
* <p>Which rule is used to format a number is defined according to one of the following
|
||||
* algorithms: If the rule set is a regular rule set, do the following:
|
||||
*
|
||||
* <ul>
|
||||
* <li>If the rule set includes a master rule (and the number was passed in as a <tt>double</tt>),
|
||||
* use the master rule. (If the number being formatted was passed in as a <tt>long</tt>,
|
||||
* the master rule is ignored.)</li>
|
||||
* <li>If the number is negative, use the negative-number rule.</li>
|
||||
* <li>If the number has a fractional part and is greater than 1, use the improper fraction
|
||||
* rule.</li>
|
||||
* <li>If the number has a fractional part and is between 0 and 1, use the proper fraction
|
||||
* rule.</li>
|
||||
* <li>Binary-search the rule list for the rule with the highest base value less than or equal
|
||||
* to the number. If that rule has two substitutions, its base value is not an even multiple
|
||||
* of its divisor, and the number <em>is</em> an even multiple of the rule's divisor, use the
|
||||
* rule that precedes it in the rule list. Otherwise, use the rule itself.</li>
|
||||
* </ul>
|
||||
*
|
||||
* <p>If the rule set is a fraction rule set, do the following:
|
||||
*
|
||||
* <ul>
|
||||
* <li>Ignore negative-number and fraction rules.</li>
|
||||
* <li>For each rule in the list, multiply the number being formatted (which will always be
|
||||
* between 0 and 1) by the rule's base value. Keep track of the distance between the result
|
||||
* the nearest integer.</li>
|
||||
* <li>Use the rule that produced the result closest to zero in the above calculation. In the
|
||||
* event of a tie or a direct hit, use the first matching rule encountered. (The idea here is
|
||||
* to try each rule's base value as a possible denominator of a fraction. Whichever
|
||||
* denominator produces the fraction closest in value to the number being formatted wins.) If
|
||||
* the rule following the matching rule has the same base value, use it if the numerator of
|
||||
* the fraction is anything other than 1; if the numerator is 1, use the original matching
|
||||
* rule. (This is to allow singular and plural forms of the rule text without a lot of extra
|
||||
* hassle.)</li>
|
||||
* </ul>
|
||||
*
|
||||
* <p>A rule's body consists of a string of characters terminated by a semicolon. The rule
|
||||
* may include zero, one, or two <em>substitution tokens,</em> and a range of text in
|
||||
* brackets. The brackets denote optional text (and may also include one or both
|
||||
* substitutions). The exact meanings of the substitution tokens, and under what conditions
|
||||
* optional text is omitted, depend on the syntax of the substitution token and the context.
|
||||
* The rest of the text in a rule body is literal text that is output when the rule matches
|
||||
* the number being formatted.</p>
|
||||
*
|
||||
* <p>A substitution token begins and ends with a <em>token character.</em> The token
|
||||
* character and the context together specify a mathematical operation to be performed on the
|
||||
* number being formatted. An optional <em>substitution descriptor </em>specifies how the
|
||||
* value resulting from that operation is used to fill in the substitution. The position of
|
||||
* the substitution token in the rule body specifies the location of the resultant text in
|
||||
* the original rule text.</p>
|
||||
*
|
||||
* <p>The meanings of the substitution token characters are as follows:</p>
|
||||
*
|
||||
* <table border="0" width="100%">
|
||||
* <tr>
|
||||
* <td width="37"></td>
|
||||
* <td width="23">>></td>
|
||||
* <td width="165" valign="top">in normal rule</td>
|
||||
* <td>Divide the number by the rule's divisor and format the remainder</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td width="37"></td>
|
||||
* <td width="23"></td>
|
||||
* <td width="165" valign="top">in negative-number rule</td>
|
||||
* <td>Find the absolute value of the number and format the result</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td width="37"></td>
|
||||
* <td width="23"></td>
|
||||
* <td width="165" valign="top">in fraction or master rule</td>
|
||||
* <td>Isolate the number's fractional part and format it.</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td width="37"></td>
|
||||
* <td width="23"></td>
|
||||
* <td width="165" valign="top">in rule in fraction rule set</td>
|
||||
* <td>Not allowed.</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td width="37"></td>
|
||||
* <td width="23">>>></td>
|
||||
* <td width="165" valign="top">in normal rule</td>
|
||||
* <td>Divide the number by the rule's divisor and format the remainder,
|
||||
* but bypass the normal rule-selection process and just use the
|
||||
* rule that precedes this one in this rule list.</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td width="37"></td>
|
||||
* <td width="23"></td>
|
||||
* <td width="165" valign="top">in all other rules</td>
|
||||
* <td>Not allowed.</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td width="37"></td>
|
||||
* <td width="23"><<</td>
|
||||
* <td width="165" valign="top">in normal rule</td>
|
||||
* <td>Divide the number by the rule's divisor and format the quotient</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td width="37"></td>
|
||||
* <td width="23"></td>
|
||||
* <td width="165" valign="top">in negative-number rule</td>
|
||||
* <td>Not allowed.</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td width="37"></td>
|
||||
* <td width="23"></td>
|
||||
* <td width="165" valign="top">in fraction or master rule</td>
|
||||
* <td>Isolate the number's integral part and format it.</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td width="37"></td>
|
||||
* <td width="23"></td>
|
||||
* <td width="165" valign="top">in rule in fraction rule set</td>
|
||||
* <td>Multiply the number by the rule's base value and format the result.</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td width="37"></td>
|
||||
* <td width="23">==</td>
|
||||
* <td width="165" valign="top">in all rule sets</td>
|
||||
* <td>Format the number unchanged</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td width="37"></td>
|
||||
* <td width="23">[]</td>
|
||||
* <td width="165" valign="top">in normal rule</td>
|
||||
* <td>Omit the optional text if the number is an even multiple of the rule's divisor</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td width="37"></td>
|
||||
* <td width="23"></td>
|
||||
* <td width="165" valign="top">in negative-number rule</td>
|
||||
* <td>Not allowed.</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td width="37"></td>
|
||||
* <td width="23"></td>
|
||||
* <td width="165" valign="top">in improper-fraction rule</td>
|
||||
* <td>Omit the optional text if the number is between 0 and 1 (same as specifying both an
|
||||
* x.x rule and a 0.x rule)</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td width="37"></td>
|
||||
* <td width="23"></td>
|
||||
* <td width="165" valign="top">in master rule</td>
|
||||
* <td>Omit the optional text if the number is an integer (same as specifying both an x.x
|
||||
* rule and an x.0 rule)</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td width="37"></td>
|
||||
* <td width="23"></td>
|
||||
* <td width="165" valign="top">in proper-fraction rule</td>
|
||||
* <td>Not allowed.</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td width="37"></td>
|
||||
* <td width="23"></td>
|
||||
* <td width="165" valign="top">in rule in fraction rule set</td>
|
||||
* <td>Omit the optional text if multiplying the number by the rule's base value yields 1.</td>
|
||||
* </tr>
|
||||
* </table>
|
||||
*
|
||||
* <p>The substitution descriptor (i.e., the text between the token characters) may take one
|
||||
* of three forms:</p>
|
||||
*
|
||||
* <table border="0" width="100%">
|
||||
* <tr>
|
||||
* <td width="42"></td>
|
||||
* <td width="166" valign="top">a rule set name</td>
|
||||
* <td>Perform the mathematical operation on the number, and format the result using the
|
||||
* named rule set.</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td width="42"></td>
|
||||
* <td width="166" valign="top">a DecimalFormat pattern</td>
|
||||
* <td>Perform the mathematical operation on the number, and format the result using a
|
||||
* DecimalFormat with the specified pattern. The pattern must begin with 0 or #.</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td width="42"></td>
|
||||
* <td width="166" valign="top">nothing</td>
|
||||
* <td>Perform the mathematical operation on the number, and format the result using the rule
|
||||
* set containing the current rule, except:<ul>
|
||||
* <li>You can't have an empty substitution descriptor with a == substitution.</li>
|
||||
* <li>If you omit the substitution descriptor in a >> substitution in a fraction rule,
|
||||
* format the result one digit at a time using the rule set containing the current rule.</li>
|
||||
* <li>If you omit the substitution descriptor in a << substitution in a rule in a
|
||||
* fraction rule set, format the result using the default rule set for this formatter.</li>
|
||||
* </ul>
|
||||
* </td>
|
||||
* </tr>
|
||||
* </table>
|
||||
*
|
||||
* <p>Whitespace is ignored between a rule set name and a rule set body, between a rule
|
||||
* descriptor and a rule body, or between rules. If a rule body begins with an apostrophe,
|
||||
* the apostrophe is ignored, but all text after it becomes significant (this is how you can
|
||||
* have a rule's rule text begin with whitespace). There is no escape function: the semicolon
|
||||
* is not allowed in rule set names or in rule text, and the colon is not allowed in rule set
|
||||
* names. The characters beginning a substitution token are always treated as the beginning
|
||||
* of a substitution token.</p>
|
||||
*
|
||||
* <p>See the resource data and the demo program for annotated examples of real rule sets
|
||||
* using these features.</p>
|
||||
*
|
||||
* @author Richard Gillam
|
||||
* @see NumberFormat
|
||||
* @see DecimalFormat
|
||||
* @draft
|
||||
*/
|
||||
|
||||
/** Tags for the predefined rulesets. */
|
||||
enum URBNFRuleSetTag {
|
||||
URBNF_SPELLOUT,
|
||||
URBNF_ORDINAL,
|
||||
URBNF_DURATION,
|
||||
URBNF_COUNT
|
||||
};
|
||||
|
||||
class U_I18N_API RuleBasedNumberFormat : public NumberFormat {
|
||||
public:
|
||||
|
||||
//-----------------------------------------------------------------------
|
||||
// constructors
|
||||
//-----------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Creates a RuleBasedNumberFormat that behaves according to the rules
|
||||
* passed in. The formatter uses the specified locale to determine the
|
||||
* characters to use when formatting numerals, and to define equivalences
|
||||
* for lenient parsing.
|
||||
* @param rules The formatter rules.
|
||||
* See the class documentation for a complete explanation of the rule
|
||||
* syntax.
|
||||
* @param locale A locale, that governs which characters are used for
|
||||
* formatting values in numerals, and which characters are equivalent in
|
||||
* lenient parsing.
|
||||
* @param perror The parse error if an error was encountered.
|
||||
* @param status The status indicating whether the constructor succeeded.
|
||||
* @draft
|
||||
*/
|
||||
RuleBasedNumberFormat(const UnicodeString& rules, const Locale& locale,
|
||||
UParseError& perror, UErrorCode& status);
|
||||
|
||||
/**
|
||||
* Creates a RuleBasedNumberFormat from a predefined ruleset. The selector
|
||||
* code choosed among three possible predefined formats: spellout, ordinal,
|
||||
* and duration.
|
||||
* @param tag A selector code specifying which kind of formatter to create for that
|
||||
* locale. There are three legal values: URBNF_SPELLOUT, which creates a formatter that
|
||||
* spells out a value in words in the desired language, URBNF_ORDINAL, which attaches
|
||||
* an ordinal suffix from the desired language to the end of a number (e.g. "123rd"),
|
||||
* and URBNF_DURATION, which formats a duration in seconds as hours, minutes, and seconds.
|
||||
* @param locale The locale for the formatter.
|
||||
* @param status The status indicating whether the constructor succeeded.
|
||||
* @draft
|
||||
*/
|
||||
RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale& locale, UErrorCode& status);
|
||||
|
||||
//-----------------------------------------------------------------------
|
||||
// boilerplate
|
||||
//-----------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Copy constructor
|
||||
*/
|
||||
RuleBasedNumberFormat(const RuleBasedNumberFormat& rhs);
|
||||
|
||||
/**
|
||||
* Assignment operator
|
||||
*/
|
||||
RuleBasedNumberFormat& operator=(const RuleBasedNumberFormat& rhs);
|
||||
|
||||
/**
|
||||
* Release memory allocated for a RuleBasedNumberFormat when you are finished with it.
|
||||
*/
|
||||
virtual ~RuleBasedNumberFormat();
|
||||
|
||||
/**
|
||||
* Clone this object polymorphically. The caller is responsible
|
||||
* for deleting the result when done.
|
||||
*/
|
||||
virtual Format* clone(void) const;
|
||||
|
||||
/**
|
||||
* Return true if the given Format objects are semantically equal.
|
||||
* Objects of different subclasses are considered unequal.
|
||||
*/
|
||||
virtual UBool operator==(const Format& other) const;
|
||||
|
||||
//-----------------------------------------------------------------------
|
||||
// public API functions
|
||||
//-----------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* @return the rules that were provided to the RuleBasedNumberFormat.
|
||||
* @return the result String that was passed in
|
||||
* @draft
|
||||
*/
|
||||
virtual UnicodeString getRules() const;
|
||||
|
||||
/**
|
||||
* Return the name of the index'th public ruleSet. If index is not valid,
|
||||
* the function returns null.
|
||||
* @param index the index of the ruleset
|
||||
* @return the name of the index'th public ruleSet.
|
||||
* @draft
|
||||
*/
|
||||
virtual UnicodeString getRuleSetName(int32_t index) const;
|
||||
|
||||
/**
|
||||
* Return the number of public rule set names.
|
||||
* @return the number of public rule set names.
|
||||
* @draft
|
||||
*/
|
||||
virtual int32_t getNumberOfRuleSetNames() const;
|
||||
|
||||
/**
|
||||
* Formats the specified number using the default ruleset.
|
||||
* @param number The number to format.
|
||||
* @param toAppendTo the string that will hold the (appended) result
|
||||
* @param pos the fieldposition
|
||||
* @return A textual representation of the number.
|
||||
* @draft
|
||||
*/
|
||||
virtual UnicodeString& format(int32_t number,
|
||||
UnicodeString& toAppendTo,
|
||||
FieldPosition& pos) const;
|
||||
/**
|
||||
* Formats the specified number using the default ruleset.
|
||||
* @param number The number to format.
|
||||
* @param toAppendTo the string that will hold the (appended) result
|
||||
* @param pos the fieldposition
|
||||
* @return A textual representation of the number.
|
||||
* @draft
|
||||
*/
|
||||
virtual UnicodeString& format(double number,
|
||||
UnicodeString& toAppendTo,
|
||||
FieldPosition& pos) const;
|
||||
|
||||
/**
|
||||
* Formats the specified number using the default ruleset.
|
||||
* @param number The number to format.
|
||||
* @param ruleSetName The name of the rule set to format the number with.
|
||||
* This must be the name of a valid public rule set for this formatter.
|
||||
* @param toAppendTo the string that will hold the (appended) result
|
||||
* @param pos the fieldposition
|
||||
* @param status the status
|
||||
* @return A textual representation of the number.
|
||||
* @draft
|
||||
*/
|
||||
virtual UnicodeString& format(int32_t number,
|
||||
const UnicodeString& ruleSetName,
|
||||
UnicodeString& toAppendTo,
|
||||
FieldPosition& pos,
|
||||
UErrorCode& status) const;
|
||||
/**
|
||||
* Formats the specified number using the default ruleset.
|
||||
* @param number The number to format.
|
||||
* @param ruleSetName The name of the rule set to format the number with.
|
||||
* This must be the name of a valid public rule set for this formatter.
|
||||
* @param toAppendTo the string that will hold the (appended) result
|
||||
* @param pos the fieldposition
|
||||
* @param status the status
|
||||
* @return A textual representation of the number.
|
||||
* @draft
|
||||
*/
|
||||
virtual UnicodeString& format(double number,
|
||||
const UnicodeString& ruleSetName,
|
||||
UnicodeString& toAppendTo,
|
||||
FieldPosition& pos,
|
||||
UErrorCode& status) const;
|
||||
|
||||
/**
|
||||
* Formats the specified number using the default ruleset.
|
||||
* @param obj The number to format.
|
||||
* @param toAppendTo the string that will hold the (appended) result
|
||||
* @param pos the fieldposition
|
||||
* @param status the status
|
||||
* @return A textual representation of the number.
|
||||
* @draft
|
||||
*/
|
||||
virtual UnicodeString& format(const Formattable& obj,
|
||||
UnicodeString& toAppendTo,
|
||||
FieldPosition& pos,
|
||||
UErrorCode& status) const;
|
||||
/**
|
||||
* Redeclared Format method.
|
||||
* @stable
|
||||
*/
|
||||
UnicodeString& format(const Formattable& obj,
|
||||
UnicodeString& result,
|
||||
UErrorCode& status) const;
|
||||
|
||||
/**
|
||||
* Redeclared NumberFormat method.
|
||||
* @stable
|
||||
*/
|
||||
UnicodeString& format(double number,
|
||||
UnicodeString& output) const;
|
||||
|
||||
/**
|
||||
* Redeclared NumberFormat method.
|
||||
* @stable
|
||||
*/
|
||||
UnicodeString& format(int32_t number,
|
||||
UnicodeString& output) const;
|
||||
|
||||
/**
|
||||
* Parses the specfied string, beginning at the specified position, according
|
||||
* to this formatter's rules. This will match the string against all of the
|
||||
* formatter's public rule sets and return the value corresponding to the longest
|
||||
* parseable substring. This function's behavior is affected by the lenient
|
||||
* parse mode.
|
||||
* @param text The string to parse
|
||||
* @param result the result of the parse, either a double or a long.
|
||||
* @param parsePosition On entry, contains the position of the first character
|
||||
* in "text" to examine. On exit, has been updated to contain the position
|
||||
* of the first character in "text" that wasn't consumed by the parse.
|
||||
* @see #setLenientParseMode
|
||||
* @draft
|
||||
*/
|
||||
virtual void parse(const UnicodeString& text,
|
||||
Formattable& result,
|
||||
ParsePosition& parsePosition) const;
|
||||
|
||||
|
||||
/**
|
||||
* Redeclared Format method.
|
||||
* @stable
|
||||
*/
|
||||
virtual inline void parse(const UnicodeString& text,
|
||||
Formattable& result,
|
||||
UErrorCode& status) const;
|
||||
|
||||
|
||||
/**
|
||||
* Turns lenient parse mode on and off.
|
||||
*
|
||||
* When in lenient parse mode, the formatter uses a Collator for parsing the text.
|
||||
* Only primary differences are treated as significant. This means that case
|
||||
* differences, accent differences, alternate spellings of the same letter
|
||||
* (e.g., ae and a-umlaut in German), ignorable characters, etc. are ignored in
|
||||
* matching the text. In many cases, numerals will be accepted in place of words
|
||||
* or phrases as well.
|
||||
*
|
||||
* For example, all of the following will correctly parse as 255 in English in
|
||||
* lenient-parse mode:
|
||||
* <br>"two hundred fifty-five"
|
||||
* <br>"two hundred fifty five"
|
||||
* <br>"TWO HUNDRED FIFTY-FIVE"
|
||||
* <br>"twohundredfiftyfive"
|
||||
* <br>"2 hundred fifty-5"
|
||||
*
|
||||
* The Collator used is determined by the locale that was
|
||||
* passed to this object on construction. The description passed to this object
|
||||
* on construction may supply additional collation rules that are appended to the
|
||||
* end of the default collator for the locale, enabling additional equivalences
|
||||
* (such as adding more ignorable characters or permitting spelled-out version of
|
||||
* symbols; see the demo program for examples).
|
||||
*
|
||||
* It's important to emphasize that even strict parsing is relatively lenient: it
|
||||
* will accept some text that it won't produce as output. In English, for example,
|
||||
* it will correctly parse "two hundred zero" and "fifteen hundred".
|
||||
*
|
||||
* @param enabled If true, turns lenient-parse mode on; if false, turns it off.
|
||||
* @see RuleBasedCollator
|
||||
* @draft
|
||||
*/
|
||||
virtual void setLenient(UBool enabled);
|
||||
|
||||
/**
|
||||
* Returns true if lenient-parse mode is turned on. Lenient parsing is off
|
||||
* by default.
|
||||
* @return true if lenient-parse mode is turned on.
|
||||
* @see #setLenientParseMode
|
||||
* @draft
|
||||
*/
|
||||
virtual inline UBool isLenient(void) const;
|
||||
|
||||
private:
|
||||
void init(const UnicodeString& rules, UParseError& perror, UErrorCode& status);
|
||||
void dispose();
|
||||
void stripWhitespace(UnicodeString& src);
|
||||
void setDefaultRuleSet();
|
||||
void format(double number, NFRuleSet& ruleSet);
|
||||
NFRuleSet* findRuleSet(const UnicodeString& name, UErrorCode& status) const;
|
||||
|
||||
/* friend access */
|
||||
friend class NFSubstitution;
|
||||
friend class NFRule;
|
||||
friend class FractionalPartSubstitution;
|
||||
|
||||
inline NFRuleSet * getDefaultRuleSet() const;
|
||||
Collator * getCollator() const;
|
||||
DecimalFormatSymbols * getDecimalFormatSymbols() const;
|
||||
|
||||
private:
|
||||
static const char fgClassID;
|
||||
|
||||
public:
|
||||
static UClassID getStaticClassID(void) { return (UClassID)&fgClassID; }
|
||||
virtual UClassID getDynamicClassID(void) const { return getStaticClassID(); }
|
||||
|
||||
private:
|
||||
NFRuleSet **ruleSets;
|
||||
NFRuleSet *defaultRuleSet;
|
||||
Locale locale;
|
||||
Collator* collator;
|
||||
DecimalFormatSymbols* decimalFormatSymbols;
|
||||
UBool lenient;
|
||||
UnicodeString* lenientParseRules;
|
||||
};
|
||||
|
||||
// ---------------
|
||||
|
||||
inline UnicodeString&
|
||||
RuleBasedNumberFormat::format(const Formattable& obj,
|
||||
UnicodeString& result,
|
||||
UErrorCode& status) const {
|
||||
// Don't use Format:: - use immediate base class only,
|
||||
// in case immediate base modifies behavior later.
|
||||
// dlf - the above comment is bogus, if there were a reason to modify
|
||||
// it, it would be virtual, and there's no reason because it is
|
||||
// a one-line macro in NumberFormat anyway, just like this one.
|
||||
return NumberFormat::format(obj, result, status);
|
||||
}
|
||||
|
||||
inline UnicodeString&
|
||||
RuleBasedNumberFormat::format(double number, UnicodeString& output) const {
|
||||
FieldPosition pos(0);
|
||||
return format(number, output, pos);
|
||||
}
|
||||
|
||||
inline UnicodeString&
|
||||
RuleBasedNumberFormat::format(int32_t number, UnicodeString& output) const {
|
||||
FieldPosition pos(0);
|
||||
return format(number, output, pos);
|
||||
}
|
||||
|
||||
inline void
|
||||
RuleBasedNumberFormat::parse(const UnicodeString& text, Formattable& result, UErrorCode& status) const {
|
||||
NumberFormat::parse(text, result, status);
|
||||
}
|
||||
|
||||
inline UBool
|
||||
RuleBasedNumberFormat::isLenient(void) const {
|
||||
return lenient;
|
||||
}
|
||||
|
||||
inline NFRuleSet*
|
||||
RuleBasedNumberFormat::getDefaultRuleSet() const {
|
||||
return defaultRuleSet;
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
/* RBNF_H */
|
||||
#endif
|
@ -48,7 +48,8 @@ tsmthred.o tsmutex.o tsnmfmt.o tsputil.o tstnorm.o tzbdtest.o \
|
||||
tzregts.o tztest.o ucdtest.o usettest.o ustrtest.o transtst.o strtest.o thcoll.o \
|
||||
itrbbi.o rbbiapts.o rbbitst.o ittrans.o transapi.o cpdtrtst.o unhxtrts.o hxuntrts.o \
|
||||
ufltlgts.o testutil.o transrt.o normconf.o sfwdchit.o indictrn.o\
|
||||
jamotest.o srchtest.o
|
||||
jamotest.o srchtest.o \
|
||||
itrbnf.o itrbnfrt.o
|
||||
|
||||
DEPS = $(OBJECTS:.o=.d)
|
||||
|
||||
|
@ -217,6 +217,14 @@ SOURCE=.\itrbbi.cpp
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\itrbnf.cpp
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\itrbnfrt.cpp
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\ittrans.cpp
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
@ -545,6 +553,14 @@ SOURCE=.\itrbbi.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\itrbnf.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\itrbnfrt.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\ittrans.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
@ -24,6 +24,8 @@
|
||||
#include "itconv.h"
|
||||
#include "ittrans.h"
|
||||
#include "itrbbi.h"
|
||||
#include "itrbnf.h"
|
||||
#include "itrbnfrt.h"
|
||||
#include "normconf.h"
|
||||
#include "tstnorm.h"
|
||||
|
||||
@ -102,6 +104,20 @@ void MajorTestLevel::runIndexedTest( int32_t index, UBool exec, const char* &nam
|
||||
callTest( test, par );
|
||||
}
|
||||
break;
|
||||
case 8: name = "rbnf";
|
||||
if (exec) {
|
||||
logln("TestSuite RuleBasedNumberFormat----"); logln();
|
||||
IntlTestRBNF test;
|
||||
callTest(test, par);
|
||||
}
|
||||
break;
|
||||
case 9: name = "rbnfrt";
|
||||
if (exec) {
|
||||
logln("TestSuite RuleBasedNumberFormat RT----"); logln();
|
||||
RbnfRoundTripTest test;
|
||||
callTest(test, par);
|
||||
}
|
||||
break;
|
||||
|
||||
default: name = ""; break;
|
||||
}
|
||||
|
618
icu4c/source/test/intltest/itrbnf.cpp
Normal file
618
icu4c/source/test/intltest/itrbnf.cpp
Normal file
@ -0,0 +1,618 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2000, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
#include "itrbnf.h"
|
||||
|
||||
#include "unicode/tblcoll.h"
|
||||
#include "unicode/coleitr.h"
|
||||
|
||||
// import com.ibm.text.RuleBasedNumberFormat;
|
||||
// import com.ibm.test.TestFmwk;
|
||||
|
||||
// import java.util.Locale;
|
||||
// import java.text.NumberFormat;
|
||||
|
||||
// current macro not in icu1.8.1
|
||||
#define TESTCASE(id,test) \
|
||||
case id: \
|
||||
name = #test; \
|
||||
if (exec) { \
|
||||
logln(#test "---"); \
|
||||
logln((UnicodeString)""); \
|
||||
test(); \
|
||||
} \
|
||||
break
|
||||
|
||||
void IntlTestRBNF::runIndexedTest(int32_t index, UBool exec, const char* &name, char* par)
|
||||
{
|
||||
if (exec) logln("TestSuite RuleBasedNumberFormat");
|
||||
switch (index) {
|
||||
TESTCASE(0, TestEnglishSpellout);
|
||||
TESTCASE(1, TestOrdinalAbbreviations);
|
||||
TESTCASE(2, TestDurations);
|
||||
TESTCASE(3, TestSpanishSpellout);
|
||||
TESTCASE(4, TestFrenchSpellout);
|
||||
TESTCASE(5, TestSwissFrenchSpellout);
|
||||
TESTCASE(6, TestItalianSpellout);
|
||||
TESTCASE(7, TestGermanSpellout);
|
||||
TESTCASE(8, TestThaiSpellout);
|
||||
default:
|
||||
name = "";
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
IntlTestRBNF::TestEnglishSpellout()
|
||||
{
|
||||
#if 0
|
||||
// temporary test code
|
||||
{
|
||||
int32_t result = 0;
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
Collator* temp = Collator::createInstance(Locale::US, status);
|
||||
if (U_SUCCESS(status) &&
|
||||
temp->getDynamicClassID() == RuleBasedCollator::getStaticClassID()) {
|
||||
|
||||
RuleBasedCollator* collator = (RuleBasedCollator*)temp;
|
||||
UnicodeString rules(collator->getRules());
|
||||
UnicodeString tailoring("&'\\u0000' << ' ' << '-'\n");
|
||||
tailoring = tailoring.unescape();
|
||||
rules.append(tailoring);
|
||||
|
||||
collator = new RuleBasedCollator(rules, status);
|
||||
if (U_SUCCESS(status)) {
|
||||
collator->setDecomposition(Normalizer::DECOMP);
|
||||
|
||||
UnicodeString prefix(" hundred");
|
||||
UnicodeString str("hundred-fifty");
|
||||
|
||||
CollationElementIterator* strIter = collator->createCollationElementIterator(str);
|
||||
CollationElementIterator* prefixIter = collator->createCollationElementIterator(prefix);
|
||||
|
||||
// match collation elements between the strings
|
||||
int32_t oStr = strIter->next(status);
|
||||
int32_t oPrefix = prefixIter->next(status);
|
||||
|
||||
while (oPrefix != CollationElementIterator::NULLORDER) {
|
||||
// skip over ignorable characters in the target string
|
||||
while (CollationElementIterator::primaryOrder(oStr) == 0
|
||||
&& oStr != CollationElementIterator::NULLORDER) {
|
||||
oStr = strIter->next(status);
|
||||
}
|
||||
|
||||
// skip over ignorable characters in the prefix
|
||||
while (CollationElementIterator::primaryOrder(oPrefix) == 0
|
||||
&& oPrefix != CollationElementIterator::NULLORDER) {
|
||||
oPrefix = prefixIter->next(status);
|
||||
}
|
||||
|
||||
// if skipping over ignorables brought us to the end
|
||||
// of the target string, we didn't match and return 0
|
||||
if (oStr == CollationElementIterator::NULLORDER) {
|
||||
result = -1;
|
||||
break;
|
||||
}
|
||||
|
||||
// if skipping over ignorables brought to the end of
|
||||
// the prefix, we DID match: drop out of the loop
|
||||
else if (oPrefix == CollationElementIterator::NULLORDER) {
|
||||
break;
|
||||
}
|
||||
|
||||
// match collation elements from the two strings
|
||||
// (considering only primary differences). If we
|
||||
// get a mismatch, dump out and return 0
|
||||
if (CollationElementIterator::primaryOrder(oStr)
|
||||
!= CollationElementIterator::primaryOrder(oPrefix)) {
|
||||
result = -1;
|
||||
break;
|
||||
|
||||
// otherwise, advance to the next character in each string
|
||||
// and loop (we drop out of the loop when we exhaust
|
||||
// collation elements in the prefix)
|
||||
} else {
|
||||
oStr = strIter->next(status);
|
||||
oPrefix = prefixIter->next(status);
|
||||
}
|
||||
}
|
||||
if (result == 0) {
|
||||
result = strIter->getOffset();
|
||||
}
|
||||
delete prefixIter;
|
||||
delete strIter;
|
||||
}
|
||||
delete collator;
|
||||
}
|
||||
delete temp;
|
||||
|
||||
printf("result: %d\n", result);
|
||||
}
|
||||
#endif
|
||||
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
RuleBasedNumberFormat* formatter
|
||||
= new RuleBasedNumberFormat(URBNF_SPELLOUT, Locale::US, status);
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
errln("FAIL: could not construct formatter");
|
||||
} else {
|
||||
const char* testData[][2] = {
|
||||
{ "1", "one" },
|
||||
{ "2", "two" },
|
||||
{ "15", "fifteen" },
|
||||
{ "20", "twenty" },
|
||||
{ "23", "twenty-three" },
|
||||
{ "73", "seventy-three" },
|
||||
{ "88", "eighty-eight" },
|
||||
{ "100", "one hundred" },
|
||||
{ "106", "one hundred and six" },
|
||||
{ "127", "one hundred and twenty-seven" },
|
||||
{ "200", "two hundred" },
|
||||
{ "579", "five hundred and seventy-nine" },
|
||||
{ "1,000", "one thousand" },
|
||||
{ "2,000", "two thousand" },
|
||||
{ "3,004", "three thousand and four" },
|
||||
{ "4,567", "four thousand five hundred and sixty-seven" },
|
||||
{ "15,943", "fifteen thousand nine hundred and forty-three" },
|
||||
{ "2,345,678", "two million, three hundred and forty-five thousand, six hundred and seventy-eight" },
|
||||
{ "-36", "minus thirty-six" },
|
||||
{ "234.567", "two hundred and thirty-four point five six seven" },
|
||||
NULL
|
||||
};
|
||||
|
||||
doTest(formatter, testData, TRUE);
|
||||
|
||||
formatter->setLenient(TRUE);
|
||||
const char* lpTestData[][2] = {
|
||||
{ "2 thousand six HUNDRED fifty-7", "2,657" },
|
||||
{ "fifteen hundred and zero", "1,500" },
|
||||
{ "FOurhundred thiRTY six", "436" },
|
||||
NULL
|
||||
};
|
||||
doLenientParseTest(formatter, lpTestData);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
IntlTestRBNF::TestOrdinalAbbreviations()
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
RuleBasedNumberFormat* formatter
|
||||
= new RuleBasedNumberFormat(URBNF_ORDINAL, Locale::US, status);
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
errln("FAIL: could not construct formatter");
|
||||
} else {
|
||||
const char* testData[][2] = {
|
||||
{ "1", "1st" },
|
||||
{ "2", "2nd" },
|
||||
{ "3", "3rd" },
|
||||
{ "4", "4th" },
|
||||
{ "7", "7th" },
|
||||
{ "10", "10th" },
|
||||
{ "11", "11th" },
|
||||
{ "13", "13th" },
|
||||
{ "20", "20th" },
|
||||
{ "21", "21st" },
|
||||
{ "22", "22nd" },
|
||||
{ "23", "23rd" },
|
||||
{ "24", "24th" },
|
||||
{ "33", "33rd" },
|
||||
{ "102", "102nd" },
|
||||
{ "312", "312th" },
|
||||
{ "12,345", "12,345th" },
|
||||
NULL
|
||||
};
|
||||
|
||||
doTest(formatter, testData, FALSE);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
IntlTestRBNF::TestDurations()
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
RuleBasedNumberFormat* formatter
|
||||
= new RuleBasedNumberFormat(URBNF_DURATION, Locale::US, status);
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
errln("FAIL: could not construct formatter");
|
||||
} else {
|
||||
const char* testData[][2] = {
|
||||
{ "3,600", "1:00:00" }, //move me and I fail
|
||||
{ "0", "0 sec." },
|
||||
{ "1", "1 sec." },
|
||||
{ "24", "24 sec." },
|
||||
{ "60", "1:00" },
|
||||
{ "73", "1:13" },
|
||||
{ "145", "2:25" },
|
||||
{ "666", "11:06" },
|
||||
// { "3,600", "1:00:00" },
|
||||
{ "3,740", "1:02:20" },
|
||||
{ "10,293", "2:51:33" },
|
||||
NULL
|
||||
};
|
||||
|
||||
doTest(formatter, testData, TRUE);
|
||||
|
||||
formatter->setLenient(TRUE);
|
||||
const char* lpTestData[][2] = {
|
||||
{ "2-51-33", "10,293" },
|
||||
NULL
|
||||
};
|
||||
doLenientParseTest(formatter, lpTestData);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
IntlTestRBNF::TestSpanishSpellout()
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
RuleBasedNumberFormat* formatter
|
||||
= new RuleBasedNumberFormat(URBNF_SPELLOUT, Locale("es", "ES", ""), status);
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
errln("FAIL: could not construct formatter");
|
||||
} else {
|
||||
const char* testData[][2] = {
|
||||
{ "1", "uno" },
|
||||
{ "6", "seis" },
|
||||
{ "16", "diecis\\u00e9is" },
|
||||
{ "20", "veinte" },
|
||||
{ "24", "veinticuatro" },
|
||||
{ "26", "veintis\\u00e9is" },
|
||||
{ "73", "setenta y tres" },
|
||||
{ "88", "ochenta y ocho" },
|
||||
{ "100", "cien" },
|
||||
{ "106", "ciento seis" },
|
||||
{ "127", "ciento veintisiete" },
|
||||
{ "200", "doscientos" },
|
||||
{ "579", "quinientos setenta y nueve" },
|
||||
{ "1,000", "mil" },
|
||||
{ "2,000", "dos mil" },
|
||||
{ "3,004", "tres mil cuatro" },
|
||||
{ "4,567", "cuatro mil quinientos sesenta y siete" },
|
||||
{ "15,943", "quince mil novecientos cuarenta y tres" },
|
||||
{ "2,345,678", "dos mill\\u00f3n trescientos cuarenta y cinco mil seiscientos setenta y ocho"},
|
||||
{ "-36", "menos treinta y seis" },
|
||||
{ "234.567", "doscientos treinta y cuatro punto cinco seis siete" },
|
||||
NULL
|
||||
};
|
||||
|
||||
doTest(formatter, testData, TRUE);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
IntlTestRBNF::TestFrenchSpellout()
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
RuleBasedNumberFormat* formatter
|
||||
= new RuleBasedNumberFormat(URBNF_SPELLOUT, Locale::FRANCE, status);
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
errln("FAIL: could not construct formatter");
|
||||
} else {
|
||||
const char* testData[][2] = {
|
||||
{ "1", "un" },
|
||||
{ "15", "quinze" },
|
||||
{ "20", "vingt" },
|
||||
{ "21", "vingt-et-un" },
|
||||
{ "23", "vingt-trois" },
|
||||
{ "62", "soixante-deux" },
|
||||
{ "70", "soixante-dix" },
|
||||
{ "71", "soixante et onze" },
|
||||
{ "73", "soixante-treize" },
|
||||
{ "80", "quatre-vingts" },
|
||||
{ "88", "quatre-vingt-huit" },
|
||||
{ "100", "cent" },
|
||||
{ "106", "cent six" },
|
||||
{ "127", "cent vingt-sept" },
|
||||
{ "200", "deux cents" },
|
||||
{ "579", "cinq cents soixante-dix-neuf" },
|
||||
{ "1,000", "mille" },
|
||||
{ "1,123", "onze cents vingt-trois" },
|
||||
{ "1,594", "mille cinq cents quatre-vingt-quatorze" },
|
||||
{ "2,000", "deux mille" },
|
||||
{ "3,004", "trois mille quatre" },
|
||||
{ "4,567", "quatre mille cinq cents soixante-sept" },
|
||||
{ "15,943", "quinze mille neuf cents quarante-trois" },
|
||||
{ "2,345,678", "deux million trois cents quarante-cinq mille six cents soixante-dix-huit" },
|
||||
{ "-36", "moins trente-six" },
|
||||
{ "234.567", "deux cents trente-quatre virgule cinq six sept" },
|
||||
NULL
|
||||
};
|
||||
|
||||
doTest(formatter, testData, TRUE);
|
||||
|
||||
formatter->setLenient(TRUE);
|
||||
const char* lpTestData[][2] = {
|
||||
{ "trente-un", "31" },
|
||||
{ "un cents quatre vingt dix huit", "198" },
|
||||
NULL
|
||||
};
|
||||
doLenientParseTest(formatter, lpTestData);
|
||||
}
|
||||
}
|
||||
void
|
||||
IntlTestRBNF::TestSwissFrenchSpellout()
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
RuleBasedNumberFormat* formatter
|
||||
= new RuleBasedNumberFormat(URBNF_SPELLOUT, Locale("fr", "CH", ""), status);
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
errln("FAIL: could not construct formatter");
|
||||
} else {
|
||||
const char* testData[][2] = {
|
||||
{ "1", "un" },
|
||||
{ "15", "quinze" },
|
||||
{ "20", "vingt" },
|
||||
{ "21", "vingt-et-un" },
|
||||
{ "23", "vingt-trois" },
|
||||
{ "62", "soixante-deux" },
|
||||
{ "70", "septante" },
|
||||
{ "71", "septante-et-un" },
|
||||
{ "73", "septante-trois" },
|
||||
{ "80", "octante" },
|
||||
{ "88", "octante-huit" },
|
||||
{ "100", "cent" },
|
||||
{ "106", "cent six" },
|
||||
{ "127", "cent vingt-sept" },
|
||||
{ "200", "deux cents" },
|
||||
{ "579", "cinq cents septante-neuf" },
|
||||
{ "1,000", "mille" },
|
||||
{ "1,123", "onze cents vingt-trois" },
|
||||
{ "1,594", "mille cinq cents nonante-quatre" },
|
||||
{ "2,000", "deux mille" },
|
||||
{ "3,004", "trois mille quatre" },
|
||||
{ "4,567", "quatre mille cinq cents soixante-sept" },
|
||||
{ "15,943", "quinze mille neuf cents quarante-trois" },
|
||||
{ "2,345,678", "deux million trois cents quarante-cinq mille six cents septante-huit" },
|
||||
{ "-36", "moins trente-six" },
|
||||
{ "234.567", "deux cents trente-quatre virgule cinq six sept" },
|
||||
NULL
|
||||
};
|
||||
|
||||
doTest(formatter, testData, TRUE);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
IntlTestRBNF::TestItalianSpellout()
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
RuleBasedNumberFormat* formatter
|
||||
= new RuleBasedNumberFormat(URBNF_SPELLOUT, Locale::ITALIAN, status);
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
errln("FAIL: could not construct formatter");
|
||||
} else {
|
||||
const char* testData[][2] = {
|
||||
{ "1", "uno" },
|
||||
{ "15", "quindici" },
|
||||
{ "20", "venti" },
|
||||
{ "23", "ventitre" },
|
||||
{ "73", "settantatre" },
|
||||
{ "88", "ottantotto" },
|
||||
{ "100", "cento" },
|
||||
{ "106", "centosei" },
|
||||
{ "108", "centotto" },
|
||||
{ "127", "centoventisette" },
|
||||
{ "181", "centottantuno" },
|
||||
{ "200", "duecento" },
|
||||
{ "579", "cinquecentosettantanove" },
|
||||
{ "1,000", "mille" },
|
||||
{ "2,000", "duemila" },
|
||||
{ "3,004", "tremilaquattro" },
|
||||
{ "4,567", "quattromilacinquecentosessantasette" },
|
||||
{ "15,943", "quindicimilanovecentoquarantatre" },
|
||||
{ "-36", "meno trentisei" },
|
||||
{ "234.567", "duecentotrentiquattro virgola cinque sei sette" },
|
||||
NULL
|
||||
};
|
||||
|
||||
doTest(formatter, testData, TRUE);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
IntlTestRBNF::TestGermanSpellout()
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
RuleBasedNumberFormat* formatter
|
||||
= new RuleBasedNumberFormat(URBNF_SPELLOUT, Locale::GERMANY, status);
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
errln("FAIL: could not construct formatter");
|
||||
} else {
|
||||
const char* testData[][2] = {
|
||||
{ "1", "eins" },
|
||||
{ "15", "f\\u00fcnfzehn" },
|
||||
{ "20", "zwanzig" },
|
||||
{ "23", "dreiundzwanzig" },
|
||||
{ "73", "dreiundsiebzig" },
|
||||
{ "88", "achtundachtzig" },
|
||||
{ "100", "hundert" },
|
||||
{ "106", "hundertsechs" },
|
||||
{ "127", "hundertsiebenundzwanzig" },
|
||||
{ "200", "zweihundert" },
|
||||
{ "579", "f\\u00fcnfhundertneunundsiebzig" },
|
||||
{ "1,000", "tausend" },
|
||||
{ "2,000", "zweitausend" },
|
||||
{ "3,004", "dreitausendvier" },
|
||||
{ "4,567", "viertausendf\\u00fcnfhundertsiebenundsechzig" },
|
||||
{ "15,943", "f\\u00fcnfzehntausendneunhundertdreiundvierzig" },
|
||||
{ "2,345,678", "zwei Millionen dreihundertf\\u00fcnfundvierzigtausendsechshundertachtundsiebzig" },
|
||||
NULL
|
||||
};
|
||||
|
||||
doTest(formatter, testData, TRUE);
|
||||
|
||||
formatter->setLenient(TRUE);
|
||||
const char* lpTestData[][2] = {
|
||||
{ "ein Tausend sechs Hundert fuenfunddreissig", "1,635" },
|
||||
NULL
|
||||
};
|
||||
doLenientParseTest(formatter, lpTestData);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
IntlTestRBNF::TestThaiSpellout()
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
RuleBasedNumberFormat* formatter
|
||||
= new RuleBasedNumberFormat(URBNF_SPELLOUT, Locale("th"), status);
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
errln("FAIL: could not construct formatter");
|
||||
} else {
|
||||
const char* testData[][2] = {
|
||||
{ "0", "\\u0e28\\u0e39\\u0e19\\u0e22\\u0e4c" },
|
||||
{ "1", "\\u0e2b\\u0e19\\u0e36\\u0e48\\u0e07" },
|
||||
{ "10", "\\u0e2a\\u0e34\\u0e1a" },
|
||||
{ "11", "\\u0e2a\\u0e34\\u0e1a\\u0e40\\u0e2d\\u0e47\\u0e14" },
|
||||
{ "21", "\\u0e22\\u0e35\\u0e48\\u0e2a\\u0e34\\u0e1a\\u0e40\\u0e2d\\u0e47\\u0e14" },
|
||||
{ "101", "\\u0e2b\\u0e19\\u0e36\\u0e48\\u0e07\\u0e23\\u0e49\\u0e2d\\u0e22\\u0e2b\\u0e19\\u0e36\\u0e48\\u0e07" },
|
||||
{ "1.234", "\\u0e2b\\u0e19\\u0e36\\u0e48\\u0e07\\u0e08\\u0e38\\u0e14\\u0e2a\\u0e2d\\u0e07\\u0e2a\\u0e32\\u0e21\\u0e2a\\u0e35\\u0e48" },
|
||||
NULL
|
||||
};
|
||||
|
||||
doTest(formatter, testData, TRUE);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
IntlTestRBNF::doTest(RuleBasedNumberFormat* formatter, const char* testData[][2], UBool testParsing)
|
||||
{
|
||||
// man, error reporting would be easier with printf-style syntax for unicode string and formattable
|
||||
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
NumberFormat* decFmt = NumberFormat::createInstance(Locale::US, status);
|
||||
if (U_FAILURE(status)) {
|
||||
errln("FAIL: could not create NumberFormat");
|
||||
} else {
|
||||
for (int i = 0; testData[i][0]; ++i) {
|
||||
const char* numString = testData[i][0];
|
||||
const char* expectedWords = testData[i][1];
|
||||
|
||||
Formattable expectedNumber;
|
||||
decFmt->parse(numString, expectedNumber, status);
|
||||
if (U_FAILURE(status)) {
|
||||
errln("FAIL: decFmt could not parse %s", numString);
|
||||
break;
|
||||
} else {
|
||||
UnicodeString actualString;
|
||||
FieldPosition pos;
|
||||
formatter->format(expectedNumber, actualString/* , pos*/, status);
|
||||
if (U_FAILURE(status)) {
|
||||
UnicodeString msg = "Fail: formatter could not format ";
|
||||
decFmt->format(expectedNumber, msg, status);
|
||||
errln(msg);
|
||||
break;
|
||||
} else {
|
||||
UnicodeString expectedString = UnicodeString(expectedWords).unescape();
|
||||
if (actualString != expectedString) {
|
||||
UnicodeString msg = "FAIL: check failed for ";
|
||||
decFmt->format(expectedNumber, msg, status);
|
||||
msg.append(", expected ");
|
||||
msg.append(expectedString);
|
||||
msg.append(" but got ");
|
||||
msg.append(actualString);
|
||||
errln(msg);
|
||||
break;
|
||||
} else if (testParsing) {
|
||||
Formattable parsedNumber;
|
||||
formatter->parse(actualString, parsedNumber, status);
|
||||
if (U_FAILURE(status)) {
|
||||
UnicodeString msg = "FAIL: formatter could not parse ";
|
||||
msg.append(actualString);
|
||||
msg.append(" status code: " );
|
||||
char buffer[32];
|
||||
sprintf(buffer, "0x%x\0", status);
|
||||
msg.append(buffer);
|
||||
errln(msg);
|
||||
break;
|
||||
} else {
|
||||
if (parsedNumber != expectedNumber) {
|
||||
UnicodeString msg = "FAIL: parse failed for ";
|
||||
msg.append(actualString);
|
||||
msg.append(", expected ");
|
||||
decFmt->format(expectedNumber, msg, status);
|
||||
msg.append(", but got ");
|
||||
decFmt->format(parsedNumber, msg, status);
|
||||
errln(msg);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
delete decFmt;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
IntlTestRBNF::doLenientParseTest(RuleBasedNumberFormat* formatter, const char* testData[][2])
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
NumberFormat* decFmt = NumberFormat::createInstance(Locale::US, status);
|
||||
if (U_FAILURE(status)) {
|
||||
errln("FAIL: could not create NumberFormat");
|
||||
} else {
|
||||
for (int i = 0; testData[i][0]; ++i) {
|
||||
const char* spelledNumber = testData[i][0]; // spelled-out number
|
||||
const char* asciiUSNumber = testData[i][1]; // number as ascii digits formatted for US locale
|
||||
|
||||
UnicodeString spelledNumberString = UnicodeString(spelledNumber).unescape();
|
||||
Formattable actualNumber;
|
||||
formatter->parse(spelledNumberString, actualNumber, status);
|
||||
if (U_FAILURE(status)) {
|
||||
UnicodeString msg = "FAIL: formatter could not parse ";
|
||||
msg.append(spelledNumberString);
|
||||
errln(msg);
|
||||
break;
|
||||
} else {
|
||||
// I changed the logic of this test somewhat from Java-- instead of comparing the
|
||||
// strings, I compare the Formattables. Hmmm, but the Formattables don't compare,
|
||||
// so change it back.
|
||||
|
||||
UnicodeString asciiUSNumberString = asciiUSNumber;
|
||||
Formattable expectedNumber;
|
||||
decFmt->parse(asciiUSNumberString, expectedNumber, status);
|
||||
if (U_FAILURE(status)) {
|
||||
UnicodeString msg = "FAIL: decFmt could not parse ";
|
||||
msg.append(asciiUSNumberString);
|
||||
errln(msg);
|
||||
break;
|
||||
} else {
|
||||
UnicodeString actualNumberString;
|
||||
UnicodeString expectedNumberString;
|
||||
decFmt->format(actualNumber, actualNumberString, status);
|
||||
decFmt->format(expectedNumber, expectedNumberString, status);
|
||||
if (actualNumberString != expectedNumberString) {
|
||||
UnicodeString msg = "FAIL: parsing";
|
||||
msg.append(asciiUSNumberString);
|
||||
msg.append("\n");
|
||||
msg.append(" lenient parse failed for ");
|
||||
msg.append(spelledNumberString);
|
||||
msg.append(", expected ");
|
||||
msg.append(expectedNumberString);
|
||||
msg.append(", but got ");
|
||||
msg.append(actualNumberString);
|
||||
errln(msg);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
delete decFmt;
|
||||
}
|
||||
}
|
||||
|
74
icu4c/source/test/intltest/itrbnf.h
Normal file
74
icu4c/source/test/intltest/itrbnf.h
Normal file
@ -0,0 +1,74 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2000, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef ITRBNF_H
|
||||
#define ITRBNF_H
|
||||
|
||||
#include "intltest.h"
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/rbnf.h"
|
||||
|
||||
|
||||
class IntlTestRBNF : public IntlTest {
|
||||
public:
|
||||
|
||||
// IntlTest override
|
||||
virtual void runIndexedTest(int32_t index, UBool exec, const char* &name, char* par);
|
||||
|
||||
/**
|
||||
* Perform a simple spot check on the English spellout rules
|
||||
*/
|
||||
virtual void TestEnglishSpellout();
|
||||
|
||||
/**
|
||||
* Perform a simple spot check on the English ordinal-abbreviation rules
|
||||
*/
|
||||
virtual void TestOrdinalAbbreviations();
|
||||
|
||||
/**
|
||||
* Perform a simple spot check on the duration-formatting rules
|
||||
*/
|
||||
virtual void TestDurations();
|
||||
|
||||
/**
|
||||
* Perform a simple spot check on the Spanish spellout rules
|
||||
*/
|
||||
virtual void TestSpanishSpellout();
|
||||
|
||||
/**
|
||||
* Perform a simple spot check on the French spellout rules
|
||||
*/
|
||||
virtual void TestFrenchSpellout();
|
||||
|
||||
/**
|
||||
* Perform a simple spot check on the Swiss French spellout rules
|
||||
*/
|
||||
virtual void TestSwissFrenchSpellout();
|
||||
|
||||
/**
|
||||
* Perform a simple spot check on the Italian spellout rules
|
||||
*/
|
||||
virtual void TestItalianSpellout();
|
||||
|
||||
/**
|
||||
* Perform a simple spot check on the German spellout rules
|
||||
*/
|
||||
virtual void TestGermanSpellout();
|
||||
|
||||
/**
|
||||
* Perform a simple spot check on the Thai spellout rules
|
||||
*/
|
||||
virtual void TestThaiSpellout();
|
||||
|
||||
protected:
|
||||
virtual void doTest(RuleBasedNumberFormat* formatter, const char* testData[][2], UBool testParsing);
|
||||
virtual void doLenientParseTest(RuleBasedNumberFormat* formatter, const char* testData[][2]);
|
||||
};
|
||||
|
||||
// endif ITRBNF_H
|
||||
#endif
|
341
icu4c/source/test/intltest/itrbnfrt.cpp
Normal file
341
icu4c/source/test/intltest/itrbnfrt.cpp
Normal file
@ -0,0 +1,341 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2000, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#include "itrbnfrt.h"
|
||||
|
||||
#include "unicode/fmtable.h"
|
||||
#include "math.h" // fabs
|
||||
|
||||
// current macro not in icu1.8.1
|
||||
#define TESTCASE(id,test) \
|
||||
case id: \
|
||||
name = #test; \
|
||||
if (exec) { \
|
||||
logln(#test "---"); \
|
||||
logln((UnicodeString)""); \
|
||||
test(); \
|
||||
} \
|
||||
break
|
||||
|
||||
void RbnfRoundTripTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* par)
|
||||
{
|
||||
if (exec) logln("TestSuite RuleBasedNumberFormatRT");
|
||||
switch (index) {
|
||||
TESTCASE(0, TestEnglishSpelloutRT);
|
||||
TESTCASE(1, TestDurationsRT);
|
||||
TESTCASE(2, TestSpanishSpelloutRT);
|
||||
TESTCASE(3, TestFrenchSpelloutRT);
|
||||
TESTCASE(4, TestSwissFrenchSpelloutRT);
|
||||
TESTCASE(5, TestItalianSpelloutRT);
|
||||
TESTCASE(6, TestGermanSpelloutRT);
|
||||
TESTCASE(7, TestSwedishSpelloutRT);
|
||||
TESTCASE(8, TestDutchSpelloutRT);
|
||||
TESTCASE(9, TestJapaneseSpelloutRT);
|
||||
TESTCASE(10, TestRussianSpelloutRT);
|
||||
TESTCASE(11, TestGreekSpelloutRT);
|
||||
default:
|
||||
name = "";
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform an exhaustive round-trip test on the English spellout rules
|
||||
*/
|
||||
void
|
||||
RbnfRoundTripTest::TestEnglishSpelloutRT()
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
RuleBasedNumberFormat* formatter
|
||||
= new RuleBasedNumberFormat(URBNF_SPELLOUT, Locale::US, status);
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
errln("failed to construct formatter");
|
||||
} else {
|
||||
doTest(formatter, -12345678, 12345678);
|
||||
}
|
||||
delete formatter;
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform an exhaustive round-trip test on the duration-formatting rules
|
||||
*/
|
||||
void
|
||||
RbnfRoundTripTest::TestDurationsRT()
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
RuleBasedNumberFormat* formatter
|
||||
= new RuleBasedNumberFormat(URBNF_DURATION, Locale::US, status);
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
errln("failed to construct formatter");
|
||||
} else {
|
||||
doTest(formatter, 0, 12345678);
|
||||
}
|
||||
delete formatter;
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform an exhaustive round-trip test on the Spanish spellout rules
|
||||
*/
|
||||
void
|
||||
RbnfRoundTripTest::TestSpanishSpelloutRT()
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
RuleBasedNumberFormat* formatter
|
||||
= new RuleBasedNumberFormat(URBNF_SPELLOUT, Locale("es", "es"), status);
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
errln("failed to construct formatter");
|
||||
} else {
|
||||
doTest(formatter, -12345678, 12345678);
|
||||
}
|
||||
delete formatter;
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform an exhaustive round-trip test on the French spellout rules
|
||||
*/
|
||||
void
|
||||
RbnfRoundTripTest::TestFrenchSpelloutRT()
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
RuleBasedNumberFormat* formatter
|
||||
= new RuleBasedNumberFormat(URBNF_SPELLOUT, Locale::FRANCE, status);
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
errln("failed to construct formatter");
|
||||
} else {
|
||||
doTest(formatter, -12345678, 12345678);
|
||||
}
|
||||
delete formatter;
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform an exhaustive round-trip test on the Swiss French spellout rules
|
||||
*/
|
||||
void
|
||||
RbnfRoundTripTest::TestSwissFrenchSpelloutRT()
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
RuleBasedNumberFormat* formatter
|
||||
= new RuleBasedNumberFormat(URBNF_SPELLOUT, Locale("fr", "CH"), status);
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
errln("failed to construct formatter");
|
||||
} else {
|
||||
doTest(formatter, -12345678, 12345678);
|
||||
}
|
||||
delete formatter;
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform an exhaustive round-trip test on the Italian spellout rules
|
||||
*/
|
||||
void
|
||||
RbnfRoundTripTest::TestItalianSpelloutRT()
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
RuleBasedNumberFormat* formatter
|
||||
= new RuleBasedNumberFormat(URBNF_SPELLOUT, Locale::ITALIAN, status);
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
errln("failed to construct formatter");
|
||||
} else {
|
||||
doTest(formatter, -999999, 999999);
|
||||
}
|
||||
delete formatter;
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform an exhaustive round-trip test on the German spellout rules
|
||||
*/
|
||||
void
|
||||
RbnfRoundTripTest::TestGermanSpelloutRT()
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
RuleBasedNumberFormat* formatter
|
||||
= new RuleBasedNumberFormat(URBNF_SPELLOUT, Locale::GERMANY, status);
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
errln("failed to construct formatter");
|
||||
} else {
|
||||
doTest(formatter, 0, 12345678);
|
||||
}
|
||||
delete formatter;
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform an exhaustive round-trip test on the Swedish spellout rules
|
||||
*/
|
||||
void
|
||||
RbnfRoundTripTest::TestSwedishSpelloutRT()
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
RuleBasedNumberFormat* formatter
|
||||
= new RuleBasedNumberFormat(URBNF_SPELLOUT, Locale("sv", "SE"), status);
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
errln("failed to construct formatter");
|
||||
} else {
|
||||
doTest(formatter, 0, 12345678);
|
||||
}
|
||||
delete formatter;
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform an exhaustive round-trip test on the Dutch spellout rules
|
||||
*/
|
||||
void
|
||||
RbnfRoundTripTest::TestDutchSpelloutRT()
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
RuleBasedNumberFormat* formatter
|
||||
= new RuleBasedNumberFormat(URBNF_SPELLOUT, Locale("nl", "NL"), status);
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
errln("failed to construct formatter");
|
||||
} else {
|
||||
doTest(formatter, -12345678, 12345678);
|
||||
}
|
||||
delete formatter;
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform an exhaustive round-trip test on the Japanese spellout rules
|
||||
*/
|
||||
void
|
||||
RbnfRoundTripTest::TestJapaneseSpelloutRT()
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
RuleBasedNumberFormat* formatter
|
||||
= new RuleBasedNumberFormat(URBNF_SPELLOUT, Locale::JAPAN, status);
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
errln("failed to construct formatter");
|
||||
} else {
|
||||
doTest(formatter, 0, 12345678);
|
||||
}
|
||||
delete formatter;
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform an exhaustive round-trip test on the Russian spellout rules
|
||||
*/
|
||||
void
|
||||
RbnfRoundTripTest::TestRussianSpelloutRT()
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
RuleBasedNumberFormat* formatter
|
||||
= new RuleBasedNumberFormat(URBNF_SPELLOUT, Locale("ru", "RU"), status);
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
errln("failed to construct formatter");
|
||||
} else {
|
||||
doTest(formatter, 0, 12345678);
|
||||
}
|
||||
delete formatter;
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform an exhaustive round-trip test on the Greek spellout rules
|
||||
*/
|
||||
void
|
||||
RbnfRoundTripTest::TestGreekSpelloutRT()
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
RuleBasedNumberFormat* formatter
|
||||
= new RuleBasedNumberFormat(URBNF_SPELLOUT, Locale("el", "GR"), status);
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
errln("failed to construct formatter");
|
||||
} else {
|
||||
doTest(formatter, 0, 12345678);
|
||||
}
|
||||
delete formatter;
|
||||
}
|
||||
|
||||
void
|
||||
RbnfRoundTripTest::doTest(const RuleBasedNumberFormat* formatter,
|
||||
double lowLimit,
|
||||
double highLimit)
|
||||
{
|
||||
char buf[128];
|
||||
|
||||
uint32_t count = 0;
|
||||
double increment = 1;
|
||||
for (double i = lowLimit; i <= highLimit; i += increment) {
|
||||
if (count % 1000 == 0) {
|
||||
sprintf(buf, "%.12g", i);
|
||||
logln(buf);
|
||||
}
|
||||
|
||||
if (fabs(i) < 5000)
|
||||
increment = 1;
|
||||
else if (fabs(i) < 500000)
|
||||
increment = 2737;
|
||||
else
|
||||
increment = 267437;
|
||||
|
||||
UnicodeString formatResult;
|
||||
formatter->format(i, formatResult);
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
Formattable parseResult;
|
||||
formatter->parse(formatResult, parseResult, status);
|
||||
if (U_FAILURE(status)) {
|
||||
sprintf(buf, "Round-trip status failure: %.12g, status: %d", i, status);
|
||||
errln(buf);
|
||||
return;
|
||||
} else {
|
||||
double rt = (parseResult.getType() == Formattable::kDouble) ?
|
||||
parseResult.getDouble() :
|
||||
(double)parseResult.getLong();
|
||||
|
||||
if (rt != i) {
|
||||
sprintf(buf, "Round-trip failed: %.12g -> %.12g", i, rt);
|
||||
errln(buf);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
++count;
|
||||
}
|
||||
|
||||
if (lowLimit < 0) {
|
||||
double d = 1.234;
|
||||
while (d < 1000) {
|
||||
UnicodeString formatResult;
|
||||
formatter->format(d, formatResult);
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
Formattable parseResult;
|
||||
formatter->parse(formatResult, parseResult, status);
|
||||
if (U_FAILURE(status)) {
|
||||
sprintf(buf, "Round-trip status failure: %.12g, status: %d", d, status);
|
||||
errln(buf);
|
||||
return;
|
||||
} else {
|
||||
double rt = (parseResult.getType() == Formattable::kDouble) ?
|
||||
parseResult.getDouble() :
|
||||
(double)parseResult.getLong();
|
||||
|
||||
if (rt != d) {
|
||||
UnicodeString msg;
|
||||
sprintf(buf, "Round-trip failed: %.12g -> ", d);
|
||||
msg.append(buf);
|
||||
msg.append(formatResult);
|
||||
sprintf(buf, " -> %.12g", rt);
|
||||
msg.append(buf);
|
||||
errln(msg);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
d *= 10;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
86
icu4c/source/test/intltest/itrbnfrt.h
Normal file
86
icu4c/source/test/intltest/itrbnfrt.h
Normal file
@ -0,0 +1,86 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2000, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef ITRBNFRT_H
|
||||
#define ITRBNFRT_H
|
||||
|
||||
#include "intltest.h"
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/rbnf.h"
|
||||
|
||||
class RbnfRoundTripTest : public IntlTest {
|
||||
|
||||
// IntlTest override
|
||||
virtual void runIndexedTest(int32_t index, UBool exec, const char* &name, char* par);
|
||||
|
||||
/**
|
||||
* Perform an exhaustive round-trip test on the English spellout rules
|
||||
*/
|
||||
virtual void TestEnglishSpelloutRT();
|
||||
|
||||
/**
|
||||
* Perform an exhaustive round-trip test on the duration-formatting rules
|
||||
*/
|
||||
virtual void TestDurationsRT();
|
||||
|
||||
/**
|
||||
* Perform an exhaustive round-trip test on the Spanish spellout rules
|
||||
*/
|
||||
virtual void TestSpanishSpelloutRT();
|
||||
|
||||
/**
|
||||
* Perform an exhaustive round-trip test on the French spellout rules
|
||||
*/
|
||||
virtual void TestFrenchSpelloutRT();
|
||||
|
||||
/**
|
||||
* Perform an exhaustive round-trip test on the Swiss French spellout rules
|
||||
*/
|
||||
virtual void TestSwissFrenchSpelloutRT();
|
||||
|
||||
/**
|
||||
* Perform an exhaustive round-trip test on the Italian spellout rules
|
||||
*/
|
||||
virtual void TestItalianSpelloutRT();
|
||||
|
||||
/**
|
||||
* Perform an exhaustive round-trip test on the German spellout rules
|
||||
*/
|
||||
virtual void TestGermanSpelloutRT();
|
||||
|
||||
/**
|
||||
* Perform an exhaustive round-trip test on the Swedish spellout rules
|
||||
*/
|
||||
virtual void TestSwedishSpelloutRT();
|
||||
|
||||
/**
|
||||
* Perform an exhaustive round-trip test on the Dutch spellout rules
|
||||
*/
|
||||
virtual void TestDutchSpelloutRT();
|
||||
|
||||
/**
|
||||
* Perform an exhaustive round-trip test on the Japanese spellout rules
|
||||
*/
|
||||
virtual void TestJapaneseSpelloutRT();
|
||||
|
||||
/**
|
||||
* Perform an exhaustive round-trip test on the Russian spellout rules
|
||||
*/
|
||||
virtual void TestRussianSpelloutRT();
|
||||
|
||||
/**
|
||||
* Perform an exhaustive round-trip test on the Greek spellout rules
|
||||
*/
|
||||
virtual void TestGreekSpelloutRT();
|
||||
|
||||
protected:
|
||||
void doTest(const RuleBasedNumberFormat* formatter, double lowLimit, double highLimit);
|
||||
};
|
||||
|
||||
// endif ITRBNFRT_H
|
||||
#endif
|
Loading…
Reference in New Issue
Block a user