scuffed-code/icu4c/source/i18n/msgfmt.cpp
2003-12-16 21:26:12 +00:00

1488 lines
46 KiB
C++

/*
*******************************************************************************
* Copyright (C) 1997-2003, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
* File MSGFMT.CPP
*
* Modification History:
*
* Date Name Description
* 02/19/97 aliu Converted from java.
* 03/20/97 helena Finished first cut of implementation.
* 04/10/97 aliu Made to work on AIX. Added stoi to replace wtoi.
* 06/11/97 helena Fixed addPattern to take the pattern correctly.
* 06/17/97 helena Fixed the getPattern to return the correct pattern.
* 07/09/97 helena Made ParsePosition into a class.
* 02/22/99 stephen Removed character literals for EBCDIC safety
********************************************************************************
*/
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING
#include "unicode/msgfmt.h"
#include "unicode/decimfmt.h"
#include "unicode/datefmt.h"
#include "unicode/smpdtfmt.h"
#include "unicode/choicfmt.h"
#include "unicode/ustring.h"
#include "unicode/ucnv_err.h"
#include "unicode/uchar.h"
#include "ustrfmt.h"
#include "cmemory.h"
#include "uprops.h"
#include "uassert.h"
// *****************************************************************************
// class MessageFormat
// *****************************************************************************
#define COMMA ((UChar)0x002C)
#define SINGLE_QUOTE ((UChar)0x0027)
#define LEFT_CURLY_BRACE ((UChar)0x007B)
#define RIGHT_CURLY_BRACE ((UChar)0x007D)
//---------------------------------------
// static data
static const UChar ID_NUMBER[] = {
0x6E, 0x75, 0x6D, 0x62, 0x65, 0x72, 0 /* "number" */
};
static const UChar ID_DATE[] = {
0x64, 0x61, 0x74, 0x65, 0 /* "date" */
};
static const UChar ID_TIME[] = {
0x74, 0x69, 0x6D, 0x65, 0 /* "time" */
};
static const UChar ID_CHOICE[] = {
0x63, 0x68, 0x6F, 0x69, 0x63, 0x65, 0 /* "choice" */
};
// MessageFormat Type List Number, Date, Time or Choice
static const UChar * const TYPE_IDS[] = {
NULL,
ID_NUMBER,
ID_DATE,
ID_TIME,
ID_CHOICE
};
static const UChar ID_CURRENCY[] = {
0x63, 0x75, 0x72, 0x72, 0x65, 0x6E, 0x63, 0x79, 0 /* "currency" */
};
static const UChar ID_PERCENT[] = {
0x70, 0x65, 0x72, 0x63, 0x65, 0x6E, 0x74, 0 /* "percent" */
};
static const UChar ID_INTEGER[] = {
0x69, 0x6E, 0x74, 0x65, 0x67, 0x65, 0x72, 0 /* "integer" */
};
// NumberFormat modifier list, default, currency, percent or integer
static const UChar * const NUMBER_STYLE_IDS[] = {
NULL,
ID_CURRENCY,
ID_PERCENT,
ID_INTEGER,
NULL,
};
static const UChar ID_SHORT[] = {
0x73, 0x68, 0x6F, 0x72, 0x74, 0 /* "short" */
};
static const UChar ID_MEDIUM[] = {
0x6D, 0x65, 0x64, 0x69, 0x75, 0x6D, 0 /* "medium" */
};
static const UChar ID_LONG[] = {
0x6C, 0x6F, 0x6E, 0x67, 0 /* "long" */
};
static const UChar ID_FULL[] = {
0x66, 0x75, 0x6C, 0x6C, 0 /* "full" */
};
// DateFormat modifier list, default, short, medium, long or full
static const UChar * const DATE_STYLE_IDS[] = {
NULL,
ID_SHORT,
ID_MEDIUM,
ID_LONG,
ID_FULL
};
static const DateFormat::EStyle DATE_STYLES[] = {
DateFormat::kDefault,
DateFormat::kShort,
DateFormat::kMedium,
DateFormat::kLong,
DateFormat::kFull,
};
static const int32_t ID_LIST_LENGTH = 5;
static const int32_t DEFAULT_INITIAL_CAPACITY = 10;
U_NAMESPACE_BEGIN
// -------------------------------------
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(MessageFormat)
/**
* A structure representing one subformat of this MessageFormat.
* Each subformat has a Format object, an offset into the plain
* pattern text fPattern, and an argument number. The argument
* number corresponds to the array of arguments to be formatted.
* @internal
*/
class MessageFormat::Subformat : public UMemory {
public:
/**
* @internal
*/
Format* format; // formatter
/**
* @internal
*/
int32_t offset; // offset into fPattern
/**
* @internal
*/
int32_t arg; // 0-based argument number
/**
* Clone that.format and assign it to this.format
* Do NOT delete this.format
* @internal
*/
Subformat& operator=(const Subformat& that) {
format = that.format ? that.format->clone() : NULL;
offset = that.offset;
arg = that.arg;
return *this;
}
/**
* @internal
*/
UBool operator==(const Subformat& that) const {
// Do cheap comparisons first
return offset == that.offset &&
arg == that.arg &&
((format == that.format) || // handles NULL
(*format == *that.format));
}
/**
* @internal
*/
UBool operator!=(const Subformat& that) const {
return !operator==(that);
}
};
//--------------------------------------------------------------------
/**
* Convert a string to an unsigned decimal, ignoring rule whitespace.
* @return a non-negative number if successful, or a negative number
* upon failure.
*/
static int32_t stou(const UnicodeString& string) {
int32_t n = 0;
int32_t count = 0;
UChar32 c;
for (int32_t i=0; i<string.length(); i+=U16_LENGTH(c)) {
c = string.char32At(i);
if (uprv_isRuleWhiteSpace(c)) {
continue;
}
int32_t d = u_digit(c, 10);
if (d < 0 || ++count > 10) {
return -1;
}
n = 10*n + d;
}
return n;
}
/**
* Convert an integer value to a string and append the result to
* the given UnicodeString.
*/
static UnicodeString& itos(int32_t i, UnicodeString& appendTo) {
UChar temp[16];
uprv_itou(temp,16,i,10,0); // 10 == radix
appendTo.append(temp);
return appendTo;
}
// -------------------------------------
// Creates a MessageFormat instance based on the pattern.
MessageFormat::MessageFormat(const UnicodeString& pattern,
UErrorCode& success)
: fLocale(Locale::getDefault()), // Uses the default locale
formatAliases(NULL),
formatAliasesCapacity(0),
subformats(NULL),
subformatCount(0),
subformatCapacity(0),
argTypes(NULL),
argTypeCount(0),
argTypeCapacity(0),
defaultNumberFormat(NULL),
defaultDateFormat(NULL)
{
if (!allocateSubformats(DEFAULT_INITIAL_CAPACITY) ||
!allocateArgTypes(DEFAULT_INITIAL_CAPACITY)) {
success = U_MEMORY_ALLOCATION_ERROR;
return;
}
applyPattern(pattern, success);
}
MessageFormat::MessageFormat(const UnicodeString& pattern,
const Locale& newLocale,
UErrorCode& success)
: fLocale(newLocale),
formatAliases(NULL),
formatAliasesCapacity(0),
subformats(NULL),
subformatCount(0),
subformatCapacity(0),
argTypes(NULL),
argTypeCount(0),
argTypeCapacity(0),
defaultNumberFormat(NULL),
defaultDateFormat(NULL)
{
if (!allocateSubformats(DEFAULT_INITIAL_CAPACITY) ||
!allocateArgTypes(DEFAULT_INITIAL_CAPACITY)) {
success = U_MEMORY_ALLOCATION_ERROR;
return;
}
applyPattern(pattern, success);
}
MessageFormat::MessageFormat(const UnicodeString& pattern,
const Locale& newLocale,
UParseError& parseError,
UErrorCode& success)
: fLocale(newLocale),
formatAliases(NULL),
formatAliasesCapacity(0),
subformats(NULL),
subformatCount(0),
subformatCapacity(0),
argTypes(NULL),
argTypeCount(0),
argTypeCapacity(0),
defaultNumberFormat(NULL),
defaultDateFormat(NULL)
{
if (!allocateSubformats(DEFAULT_INITIAL_CAPACITY) ||
!allocateArgTypes(DEFAULT_INITIAL_CAPACITY)) {
success = U_MEMORY_ALLOCATION_ERROR;
return;
}
applyPattern(pattern, parseError, success);
}
MessageFormat::MessageFormat(const MessageFormat& that)
: Format(that),
formatAliases(NULL),
formatAliasesCapacity(0),
subformats(NULL),
subformatCount(0),
subformatCapacity(0),
argTypes(NULL),
argTypeCount(0),
argTypeCapacity(0),
defaultNumberFormat(NULL),
defaultDateFormat(NULL)
{
*this = that;
}
MessageFormat::~MessageFormat()
{
int32_t idx;
for (idx = 0; idx < subformatCount; idx++) {
delete subformats[idx].format;
}
uprv_free(subformats);
subformats = NULL;
subformatCount = subformatCapacity = 0;
uprv_free(argTypes);
argTypes = NULL;
argTypeCount = argTypeCapacity = 0;
uprv_free(formatAliases);
delete defaultNumberFormat;
delete defaultDateFormat;
}
//--------------------------------------------------------------------
// Variable-size array management
/**
* Allocate subformats[] to at least the given capacity and return
* TRUE if successful. If not, leave subformats[] unchanged.
*
* If subformats is NULL, allocate it. If it is not NULL, enlarge it
* if necessary to be at least as large as specified.
*/
UBool MessageFormat::allocateSubformats(int32_t capacity) {
if (subformats == NULL) {
subformats = (Subformat*) uprv_malloc(sizeof(*subformats) * capacity);
subformatCapacity = capacity;
subformatCount = 0;
if (subformats == NULL) {
subformatCapacity = 0;
return FALSE;
}
} else if (subformatCapacity < capacity) {
if (capacity < 2*subformatCapacity) {
capacity = 2*subformatCapacity;
}
Subformat* a = (Subformat*)
uprv_realloc(subformats, sizeof(*subformats) * capacity);
if (a == NULL) {
return FALSE; // request failed
}
subformats = a;
subformatCapacity = capacity;
}
return TRUE;
}
/**
* Allocate argTypes[] to at least the given capacity and return
* TRUE if successful. If not, leave argTypes[] unchanged.
*
* If argTypes is NULL, allocate it. If it is not NULL, enlarge it
* if necessary to be at least as large as specified.
*/
UBool MessageFormat::allocateArgTypes(int32_t capacity) {
if (argTypes == NULL) {
argTypes = (Formattable::Type*) uprv_malloc(sizeof(*argTypes) * capacity);
argTypeCount = 0;
argTypeCapacity = capacity;
if (argTypes == NULL) {
argTypeCapacity = 0;
return FALSE;
}
for (int32_t i=0; i<capacity; ++i) {
argTypes[i] = Formattable::kString;
}
} else if (argTypeCapacity < capacity) {
if (capacity < 2*argTypeCapacity) {
capacity = 2*argTypeCapacity;
}
Formattable::Type* a = (Formattable::Type*)
uprv_realloc(argTypes, sizeof(*argTypes) * capacity);
if (a == NULL) {
return FALSE; // request failed
}
for (int32_t i=argTypeCapacity; i<capacity; ++i) {
a[i] = Formattable::kString;
}
argTypes = a;
argTypeCapacity = capacity;
}
return TRUE;
}
// -------------------------------------
// assignment operator
const MessageFormat&
MessageFormat::operator=(const MessageFormat& that)
{
// Reallocate the arrays BEFORE changing this object
if (this != &that &&
allocateSubformats(that.subformatCount) &&
allocateArgTypes(that.argTypeCount)) {
// Calls the super class for assignment first.
Format::operator=(that);
fPattern = that.fPattern;
setLocale(that.fLocale);
int32_t j;
for (j=0; j<subformatCount; ++j) {
delete subformats[j].format;
}
subformatCount = 0;
for (j=0; j<that.subformatCount; ++j) {
// Subformat::operator= does NOT delete this.format
subformats[j] = that.subformats[j];
}
subformatCount = that.subformatCount;
for (j=0; j<that.argTypeCount; ++j) {
argTypes[j] = that.argTypes[j];
}
argTypeCount = that.argTypeCount;
}
return *this;
}
UBool
MessageFormat::operator==(const Format& rhs) const
{
if (this == &rhs) return TRUE;
MessageFormat& that = (MessageFormat&)rhs;
// Check class ID before checking MessageFormat members
if (!Format::operator==(rhs) ||
getDynamicClassID() != that.getDynamicClassID() ||
fPattern != that.fPattern ||
fLocale != that.fLocale) {
return FALSE;
}
int32_t j;
for (j=0; j<subformatCount; ++j) {
if (subformats[j] != that.subformats[j]) {
return FALSE;
}
}
return TRUE;
}
// -------------------------------------
// Creates a copy of this MessageFormat, the caller owns the copy.
Format*
MessageFormat::clone() const
{
return new MessageFormat(*this);
}
// -------------------------------------
// Sets the locale of this MessageFormat object to theLocale.
void
MessageFormat::setLocale(const Locale& theLocale)
{
if (fLocale != theLocale) {
delete defaultNumberFormat;
defaultNumberFormat = NULL;
delete defaultDateFormat;
defaultDateFormat = NULL;
}
fLocale = theLocale;
}
// -------------------------------------
// Gets the locale of this MessageFormat object.
const Locale&
MessageFormat::getLocale() const
{
return fLocale;
}
void
MessageFormat::applyPattern(const UnicodeString& newPattern,
UErrorCode& status)
{
UParseError parseError;
applyPattern(newPattern,parseError,status);
}
// -------------------------------------
// Applies the new pattern and returns an error if the pattern
// is not correct.
void
MessageFormat::applyPattern(const UnicodeString& pattern,
UParseError& parseError,
UErrorCode& ec)
{
if(U_FAILURE(ec)) {
return;
}
// The pattern is broken up into segments. Each time a subformat
// is encountered, 4 segments are recorded. For example, consider
// the pattern:
// "There {0,choice,0.0#are no files|1.0#is one file|1.0<are {0, number} files} on disk {1}."
// The first set of segments is:
// segments[0] = "There "
// segments[1] = "0"
// segments[2] = "choice"
// segments[3] = "0.0#are no files|1.0#is one file|1.0<are {0, number} files"
// During parsing, the plain text is accumulated into segments[0].
// Segments 1..3 are used to parse each subpattern. Each time a
// subpattern is parsed, it creates a format object that is stored
// in the subformats array, together with an offset and argument
// number. The offset into the plain text stored in
// segments[0].
// Quotes in segment 0 are handled normally. They are removed.
// Quotes may not occur in segments 1 or 2.
// Quotes in segment 3 are parsed and _copied_. This makes
// subformat patterns work, e.g., {1,number,'#'.##} passes
// the pattern "'#'.##" to DecimalFormat.
UnicodeString segments[4];
int32_t part = 0; // segment we are in, 0..3
// Record the highest argument number in the pattern. (In the
// subpattern {3,number} the argument number is 3.)
int32_t formatNumber = 0;
UBool inQuote = FALSE;
int32_t braceStack = 0;
// Clear error struct
parseError.offset = -1;
parseError.preContext[0] = parseError.postContext[0] = (UChar)0;
int32_t patLen = pattern.length();
int32_t i;
for (i=0; i<subformatCount; ++i) {
delete subformats[i].format;
}
subformatCount = 0;
argTypeCount = 0;
for (i=0; i<patLen; ++i) {
UChar ch = pattern[i];
if (part == 0) {
// In segment 0, recognize and remove quotes
if (ch == SINGLE_QUOTE) {
if (i+1 < patLen && pattern[i+1] == SINGLE_QUOTE) {
segments[0] += ch;
++i;
} else {
inQuote = !inQuote;
}
} else if (ch == LEFT_CURLY_BRACE && !inQuote) {
// The only way we get from segment 0 to 1 is via an
// unquoted '{'.
part = 1;
} else {
segments[0] += ch;
}
} else if (inQuote) {
// In segments 1..3, recognize quoted matter, and copy it
// into the segment, together with the quotes. This takes
// care of '' as well.
segments[part] += ch;
if (ch == SINGLE_QUOTE) {
inQuote = FALSE;
}
} else {
// We have an unquoted character in segment 1..3
switch (ch) {
case COMMA:
// Commas bump us to the next segment, except for segment 3,
// which can contain commas. See example above.
if (part < 3)
part += 1;
else
segments[3] += ch;
break;
case LEFT_CURLY_BRACE:
// Handle '{' within segment 3. The initial '{'
// before segment 1 is handled above.
if (part != 3) {
ec = U_PATTERN_SYNTAX_ERROR;
goto SYNTAX_ERROR;
}
++braceStack;
segments[part] += ch;
break;
case RIGHT_CURLY_BRACE:
if (braceStack == 0) {
makeFormat(formatNumber, segments, parseError,ec);
if (U_FAILURE(ec)){
goto SYNTAX_ERROR;
}
formatNumber++;
segments[1].remove();
segments[2].remove();
segments[3].remove();
part = 0;
} else {
--braceStack;
segments[part] += ch;
}
break;
case SINGLE_QUOTE:
inQuote = TRUE;
// fall through (copy quote chars in segments 1..3)
default:
segments[part] += ch;
break;
}
}
}
if (braceStack != 0 || part != 0) {
// Unmatched braces in the pattern
ec = U_UNMATCHED_BRACES;
goto SYNTAX_ERROR;
}
fPattern = segments[0];
return;
SYNTAX_ERROR:
syntaxError(pattern, i, parseError);
for (i=0; i<subformatCount; ++i) {
delete subformats[i].format;
}
argTypeCount = subformatCount = 0;
}
// -------------------------------------
// Converts this MessageFormat instance to a pattern.
UnicodeString&
MessageFormat::toPattern(UnicodeString& appendTo) const {
// later, make this more extensible
int32_t lastOffset = 0;
int32_t i;
for (i=0; i<subformatCount; ++i) {
copyAndFixQuotes(fPattern, lastOffset, subformats[i].offset, appendTo);
lastOffset = subformats[i].offset;
appendTo += LEFT_CURLY_BRACE;
itos(subformats[i].arg, appendTo);
Format* fmt = subformats[i].format;
if (fmt == NULL) {
// do nothing, string format
}
else if (fmt->getDynamicClassID() == DecimalFormat::getStaticClassID()) {
UErrorCode ec = U_ZERO_ERROR;
NumberFormat& formatAlias = *(NumberFormat*)fmt;
NumberFormat *defaultTemplate = NumberFormat::createInstance(fLocale, ec);
NumberFormat *currencyTemplate = NumberFormat::createCurrencyInstance(fLocale, ec);
NumberFormat *percentTemplate = NumberFormat::createPercentInstance(fLocale, ec);
NumberFormat *integerTemplate = createIntegerFormat(fLocale, ec);
appendTo += COMMA;
appendTo += ID_NUMBER;
if (formatAlias != *defaultTemplate) {
appendTo += COMMA;
if (formatAlias == *currencyTemplate) {
appendTo += ID_CURRENCY;
}
else if (formatAlias == *percentTemplate) {
appendTo += ID_PERCENT;
}
else if (formatAlias == *integerTemplate) {
appendTo += ID_INTEGER;
}
else {
UnicodeString buffer;
appendTo += ((DecimalFormat*)fmt)->toPattern(buffer);
}
}
delete defaultTemplate;
delete currencyTemplate;
delete percentTemplate;
delete integerTemplate;
}
else if (fmt->getDynamicClassID() == SimpleDateFormat::getStaticClassID()) {
DateFormat& formatAlias = *(DateFormat*)fmt;
DateFormat *defaultDateTemplate = DateFormat::createDateInstance(DateFormat::kDefault, fLocale);
DateFormat *shortDateTemplate = DateFormat::createDateInstance(DateFormat::kShort, fLocale);
DateFormat *longDateTemplate = DateFormat::createDateInstance(DateFormat::kLong, fLocale);
DateFormat *fullDateTemplate = DateFormat::createDateInstance(DateFormat::kFull, fLocale);
DateFormat *defaultTimeTemplate = DateFormat::createTimeInstance(DateFormat::kDefault, fLocale);
DateFormat *shortTimeTemplate = DateFormat::createTimeInstance(DateFormat::kShort, fLocale);
DateFormat *longTimeTemplate = DateFormat::createTimeInstance(DateFormat::kLong, fLocale);
DateFormat *fullTimeTemplate = DateFormat::createTimeInstance(DateFormat::kFull, fLocale);
appendTo += COMMA;
if (formatAlias == *defaultDateTemplate) {
appendTo += ID_DATE;
}
else if (formatAlias == *shortDateTemplate) {
appendTo += ID_DATE;
appendTo += COMMA;
appendTo += ID_SHORT;
}
else if (formatAlias == *defaultDateTemplate) {
appendTo += ID_DATE;
appendTo += COMMA;
appendTo += ID_MEDIUM;
}
else if (formatAlias == *longDateTemplate) {
appendTo += ID_DATE;
appendTo += COMMA;
appendTo += ID_LONG;
}
else if (formatAlias == *fullDateTemplate) {
appendTo += ID_DATE;
appendTo += COMMA;
appendTo += ID_FULL;
}
else if (formatAlias == *defaultTimeTemplate) {
appendTo += ID_TIME;
}
else if (formatAlias == *shortTimeTemplate) {
appendTo += ID_TIME;
appendTo += COMMA;
appendTo += ID_SHORT;
}
else if (formatAlias == *defaultTimeTemplate) {
appendTo += ID_TIME;
appendTo += COMMA;
appendTo += ID_MEDIUM;
}
else if (formatAlias == *longTimeTemplate) {
appendTo += ID_TIME;
appendTo += COMMA;
appendTo += ID_LONG;
}
else if (formatAlias == *fullTimeTemplate) {
appendTo += ID_TIME;
appendTo += COMMA;
appendTo += ID_FULL;
}
else {
UnicodeString buffer;
appendTo += ID_DATE;
appendTo += COMMA;
appendTo += ((SimpleDateFormat*)fmt)->toPattern(buffer);
}
delete defaultDateTemplate;
delete shortDateTemplate;
delete longDateTemplate;
delete fullDateTemplate;
delete defaultTimeTemplate;
delete shortTimeTemplate;
delete longTimeTemplate;
delete fullTimeTemplate;
// {sfb} there should be a more efficient way to do this!
}
else if (fmt->getDynamicClassID() == ChoiceFormat::getStaticClassID()) {
UnicodeString buffer;
appendTo += COMMA;
appendTo += ID_CHOICE;
appendTo += COMMA;
appendTo += ((ChoiceFormat*)fmt)->toPattern(buffer);
}
else {
//appendTo += ", unknown";
}
appendTo += RIGHT_CURLY_BRACE;
}
copyAndFixQuotes(fPattern, lastOffset, fPattern.length(), appendTo);
return appendTo;
}
// -------------------------------------
// Adopts the new formats array and updates the array count.
// This MessageFormat instance owns the new formats.
void
MessageFormat::adoptFormats(Format** newFormats,
int32_t count) {
if (newFormats == NULL || count < 0) {
return;
}
int32_t i;
if (allocateSubformats(count)) {
for (i=0; i<subformatCount; ++i) {
delete subformats[i].format;
}
for (i=0; i<count; ++i) {
subformats[i].format = newFormats[i];
}
subformatCount = count;
} else {
// An adopt method must always take ownership. Delete
// the incoming format objects and return unchanged.
for (i=0; i<count; ++i) {
delete newFormats[i];
}
}
// TODO: What about the .offset and .arg fields?
}
// -------------------------------------
// Sets the new formats array and updates the array count.
// This MessageFormat instance maks a copy of the new formats.
void
MessageFormat::setFormats(const Format** newFormats,
int32_t count) {
if (newFormats == NULL || count < 0) {
return;
}
if (allocateSubformats(count)) {
int32_t i;
for (i=0; i<subformatCount; ++i) {
delete subformats[i].format;
}
subformatCount = 0;
for (i=0; i<count; ++i) {
subformats[i].format = newFormats[i] ? newFormats[i]->clone() : NULL;
}
subformatCount = count;
}
// TODO: What about the .offset and .arg fields?
}
// -------------------------------------
// Adopt a single format.
// Do nothing is the format number is not less than the array count.
void
MessageFormat::adoptFormat(int32_t n, Format *newFormat) {
if (n < 0 || n >= subformatCount) {
delete newFormat;
} else {
delete subformats[n].format;
subformats[n].format = newFormat;
}
}
// -------------------------------------
// Set a single format.
// Do nothing is the variable is not less than the array count.
void
MessageFormat::setFormat(int32_t n, const Format& newFormat) {
if (n >= 0 && n < subformatCount) {
delete subformats[n].format;
if (&newFormat == NULL) {
// This should never happen -- but we'll be nice if it does
subformats[n].format = NULL;
} else {
subformats[n].format = newFormat.clone();
}
}
}
// -------------------------------------
// Gets the format array.
const Format**
MessageFormat::getFormats(int32_t& cnt) const
{
// This old API returns an array (which we hold) of Format*
// pointers. The array is valid up to the next call to any
// method on this object. We construct and resize an array
// on demand that contains aliases to the subformats[i].format
// pointers.
MessageFormat* t = (MessageFormat*) this;
cnt = 0;
if (formatAliases == NULL) {
t->formatAliasesCapacity = (subformatCount<10) ? 10 : subformatCount;
Format** a = (Format**)
uprv_malloc(sizeof(Format*) * formatAliasesCapacity);
if (a == NULL) {
return NULL;
}
t->formatAliases = a;
} else if (subformatCount > formatAliasesCapacity) {
Format** a = (Format**)
uprv_realloc(formatAliases, sizeof(Format*) * subformatCount);
if (a == NULL) {
return NULL;
}
t->formatAliases = a;
t->formatAliasesCapacity = subformatCount;
}
for (int32_t i=0; i<subformatCount; ++i) {
t->formatAliases[i] = subformats[i].format;
}
cnt = subformatCount;
return (const Format**)formatAliases;
}
// -------------------------------------
// Formats the source Formattable array and copy into the result buffer.
// Ignore the FieldPosition result for error checking.
UnicodeString&
MessageFormat::format(const Formattable* source,
int32_t cnt,
UnicodeString& appendTo,
FieldPosition& ignore,
UErrorCode& success) const
{
if (U_FAILURE(success))
return appendTo;
return format(source, cnt, appendTo, ignore, 0, success);
}
// -------------------------------------
// Internally creates a MessageFormat instance based on the
// pattern and formats the arguments Formattable array and
// copy into the appendTo buffer.
UnicodeString&
MessageFormat::format( const UnicodeString& pattern,
const Formattable* arguments,
int32_t cnt,
UnicodeString& appendTo,
UErrorCode& success)
{
MessageFormat temp(pattern, success);
FieldPosition ignore(0);
temp.format(arguments, cnt, appendTo, ignore, success);
return appendTo;
}
// -------------------------------------
// Formats the source Formattable object and copy into the
// appendTo buffer. The Formattable object must be an array
// of Formattable instances, returns error otherwise.
UnicodeString&
MessageFormat::format(const Formattable& source,
UnicodeString& appendTo,
FieldPosition& ignore,
UErrorCode& success) const
{
int32_t cnt;
if (U_FAILURE(success))
return appendTo;
if (source.getType() != Formattable::kArray) {
success = U_ILLEGAL_ARGUMENT_ERROR;
return appendTo;
}
const Formattable* tmpPtr = source.getArray(cnt);
return format(tmpPtr, cnt, appendTo, ignore, 0, success);
}
// -------------------------------------
// Formats the arguments Formattable array and copy into the appendTo buffer.
// Ignore the FieldPosition result for error checking.
UnicodeString&
MessageFormat::format(const Formattable* arguments,
int32_t cnt,
UnicodeString& appendTo,
FieldPosition& status,
int32_t recursionProtection,
UErrorCode& success) const
{
// Allow NULL array only if cnt == 0
if (cnt < 0 || (cnt && arguments == NULL)) {
success = U_ILLEGAL_ARGUMENT_ERROR;
return appendTo;
}
int32_t lastOffset = 0;
for (int32_t i=0; i<subformatCount; ++i) {
// Append the prefix of current format element.
appendTo.append(fPattern, lastOffset, subformats[i].offset - lastOffset);
lastOffset = subformats[i].offset;
int32_t argumentNumber = subformats[i].arg;
// Checks the scope of the argument number.
if (argumentNumber >= cnt) {
appendTo += LEFT_CURLY_BRACE;
itos(argumentNumber, appendTo);
appendTo += RIGHT_CURLY_BRACE;
continue;
}
const Formattable *obj = arguments + argumentNumber;
Formattable::Type type = obj->getType();
// Recursively calling the format process only if the current
// format argument refers to a ChoiceFormat object.
Format* fmt = subformats[i].format;
if (fmt != NULL) {
UnicodeString arg;
fmt->format(*obj, arg, success);
// Needs to reprocess the ChoiceFormat option by using the
// MessageFormat pattern application.
if (fmt->getDynamicClassID() == ChoiceFormat::getStaticClassID() &&
arg.indexOf(LEFT_CURLY_BRACE) >= 0) {
MessageFormat temp(arg, fLocale, success);
// TODO: Implement recursion protection
temp.format(arguments, cnt, appendTo, status, recursionProtection, success);
if (U_FAILURE(success)) {
return appendTo;
}
}
else {
appendTo += arg;
}
}
// If the obj data type is a number, use a NumberFormat instance.
else if ((type == Formattable::kDouble) ||
(type == Formattable::kLong) ||
(type == Formattable::kInt64)) {
const NumberFormat* nf = getDefaultNumberFormat(success);
if (nf == NULL) {
return appendTo;
}
if (type == Formattable::kDouble) {
nf->format(obj->getDouble(), appendTo);
} else if (type == Formattable::kLong) {
nf->format(obj->getLong(), appendTo);
} else {
nf->format(obj->getInt64(), appendTo);
}
}
// If the obj data type is a Date instance, use a DateFormat instance.
else if (type == Formattable::kDate) {
const DateFormat* df = getDefaultDateFormat(success);
if (df == NULL) {
return appendTo;
}
df->format(obj->getDate(), appendTo);
}
else if (type == Formattable::kString) {
appendTo += obj->getString();
}
else {
success = U_ILLEGAL_ARGUMENT_ERROR;
return appendTo;
}
}
// Appends the rest of the pattern characters after the real last offset.
appendTo.append(fPattern, lastOffset, 0x7fffffff);
return appendTo;
}
// -------------------------------------
// Parses the source pattern and returns the Formattable objects array,
// the array count and the ending parse position. The caller of this method
// owns the array.
Formattable*
MessageFormat::parse(const UnicodeString& source,
ParsePosition& pos,
int32_t& count) const
{
// Allocate at least one element. Allocating an array of length
// zero causes problems on some platforms (e.g. Win32).
Formattable *resultArray = new Formattable[argTypeCount ? argTypeCount : 1];
int32_t patternOffset = 0;
int32_t sourceOffset = pos.getIndex();
ParsePosition tempPos(0);
count = 0; // {sfb} reset to zero
int32_t len;
for (int32_t i = 0; i < subformatCount; ++i) {
// match up to format
len = subformats[i].offset - patternOffset;
if (len == 0 ||
fPattern.compare(patternOffset, len, source, sourceOffset, len) == 0) {
sourceOffset += len;
patternOffset += len;
}
else {
goto PARSE_ERROR;
}
// now use format
Format* fmt = subformats[i].format;
int32_t arg = subformats[i].arg;
if (fmt == NULL) { // string format
// if at end, use longest possible match
// otherwise uses first match to intervening string
// does NOT recursively try all possibilities
int32_t tempLength = (i+1<subformatCount) ?
subformats[i+1].offset : fPattern.length();
int32_t next;
if (patternOffset >= tempLength) {
next = source.length();
}
else {
UnicodeString buffer;
fPattern.extract(patternOffset,tempLength - patternOffset, buffer);
next = source.indexOf(buffer, sourceOffset);
}
if (next < 0) {
goto PARSE_ERROR;
}
else {
UnicodeString buffer;
source.extract(sourceOffset,next - sourceOffset, buffer);
UnicodeString strValue = buffer;
UnicodeString temp(LEFT_CURLY_BRACE);
// {sfb} check this later
itos(arg, temp);
temp += RIGHT_CURLY_BRACE;
if (strValue != temp) {
source.extract(sourceOffset,next - sourceOffset, buffer);
resultArray[arg].setString(buffer);
// {sfb} not sure about this
if ((arg + 1) > count) {
count = arg + 1;
}
}
sourceOffset = next;
}
}
else {
tempPos.setIndex(sourceOffset);
fmt->parseObject(source, resultArray[arg], tempPos);
if (tempPos.getIndex() == sourceOffset) {
goto PARSE_ERROR;
}
if ((arg + 1) > count) {
count = arg + 1;
}
sourceOffset = tempPos.getIndex(); // update
}
}
len = fPattern.length() - patternOffset;
if (len == 0 ||
fPattern.compare(patternOffset, len, source, sourceOffset, len) == 0) {
pos.setIndex(sourceOffset + len);
return resultArray;
}
// else fall through...
PARSE_ERROR:
pos.setErrorIndex(sourceOffset);
delete [] resultArray;
count = 0;
return NULL; // leave index as is to signal error
}
// -------------------------------------
// Parses the source string and returns the array of
// Formattable objects and the array count. The caller
// owns the returned array.
Formattable*
MessageFormat::parse(const UnicodeString& source,
int32_t& cnt,
UErrorCode& success) const
{
ParsePosition status(0);
// Calls the actual implementation method and starts
// from zero offset of the source text.
Formattable* result = parse(source, status, cnt);
if (status.getIndex() == 0) {
success = U_MESSAGE_PARSE_ERROR;
delete[] result;
return NULL;
}
return result;
}
// -------------------------------------
// Parses the source text and copy into the result buffer.
void
MessageFormat::parseObject( const UnicodeString& source,
Formattable& result,
ParsePosition& status) const
{
int32_t cnt = 0;
Formattable* tmpResult = parse(source, status, cnt);
if (tmpResult != NULL)
result.adoptArray(tmpResult, cnt);
}
// -------------------------------------
/**
* Reads the segments[] array (see applyPattern()) and parses the
* segments[1..3] into a Format* object. Stores the format object in
* the subformats[] array. Updates the argTypes[] array type
* information for the corresponding argument.
*
* @param formatNumber index into subformats[] for this format
* @param segments array of strings with the parsed pattern segments
* @param parseError parse error data (output param)
* @param ec error code
*/
void
MessageFormat::makeFormat(int32_t formatNumber,
UnicodeString* segments,
UParseError& parseError,
UErrorCode& ec) {
if (U_FAILURE(ec)) {
return;
}
// Parse the argument number
int32_t argumentNumber = stou(segments[1]); // always unlocalized!
if (argumentNumber < 0) {
ec = U_INVALID_FORMAT_ERROR;
return;
}
// Parse the format, recording the argument type and creating a
// new Format object (except for string arguments).
Formattable::Type argType;
Format *fmt = NULL;
int32_t typeID, styleID;
DateFormat::EStyle style;
switch (typeID = findKeyword(segments[2], TYPE_IDS)) {
case 0: // string
argType = Formattable::kString;
break;
case 1: // number
argType = Formattable::kDouble;
switch (findKeyword(segments[3], NUMBER_STYLE_IDS)) {
case 0: // default
fmt = NumberFormat::createInstance(fLocale, ec);
break;
case 1: // currency
fmt = NumberFormat::createCurrencyInstance(fLocale, ec);
break;
case 2: // percent
fmt = NumberFormat::createPercentInstance(fLocale, ec);
break;
case 3: // integer
argType = Formattable::kLong;
fmt = createIntegerFormat(fLocale, ec);
break;
default: // pattern
fmt = NumberFormat::createInstance(fLocale, ec);
if (fmt &&
fmt->getDynamicClassID() == DecimalFormat::getStaticClassID()) {
((DecimalFormat*)fmt)->applyPattern(segments[3],parseError,ec);
}
break;
}
break;
case 2: // date
case 3: // time
argType = Formattable::kDate;
styleID = findKeyword(segments[3], DATE_STYLE_IDS);
style = (styleID >= 0) ? DATE_STYLES[styleID] : DateFormat::kDefault;
if (typeID == 2) {
fmt = DateFormat::createDateInstance(style, fLocale);
} else {
fmt = DateFormat::createTimeInstance(style, fLocale);
}
if (styleID < 0 &&
fmt != NULL &&
fmt->getDynamicClassID() == SimpleDateFormat::getStaticClassID()) {
((SimpleDateFormat*)fmt)->applyPattern(segments[3]);
}
break;
case 4: // choice
argType = Formattable::kDouble;
fmt = new ChoiceFormat(segments[3], parseError, ec);
break;
default:
argType = Formattable::kString;
ec = U_ILLEGAL_ARGUMENT_ERROR;
break;
}
if (fmt==NULL && argType!=Formattable::kString && U_SUCCESS(ec)) {
ec = U_MEMORY_ALLOCATION_ERROR;
}
if (!allocateSubformats(formatNumber+1) ||
!allocateArgTypes(argumentNumber+1)) {
ec = U_MEMORY_ALLOCATION_ERROR;
}
if (U_FAILURE(ec)) {
delete fmt;
return;
}
// Parse succeeded; record results in our arrays
subformats[formatNumber].format = fmt;
subformats[formatNumber].offset = segments[0].length();
subformats[formatNumber].arg = argumentNumber;
subformatCount = formatNumber+1;
// Careful here: argumentNumber may in general arrive out of
// sequence, e.g., "There was {2} on {0,date} (see {1,number})."
argTypes[argumentNumber] = argType;
if (argumentNumber+1 > argTypeCount) {
argTypeCount = argumentNumber+1;
}
}
// -------------------------------------
// Finds the string, s, in the string array, list.
int32_t MessageFormat::findKeyword(const UnicodeString& s,
const UChar * const *list)
{
if (s.length() == 0)
return 0;
UnicodeString buffer = s;
// Trims the space characters and turns all characters
// in s to lower case.
buffer.trim().toLower();
for (int32_t i = 0; i < ID_LIST_LENGTH; ++i) {
if (list[i] && !buffer.compare(list[i], u_strlen(list[i])))
return i;
}
return -1;
}
// -------------------------------------
// Checks the range of the source text to quote the special
// characters, { and ' and copy to target buffer.
void
MessageFormat::copyAndFixQuotes(const UnicodeString& source,
int32_t start,
int32_t end,
UnicodeString& appendTo)
{
UBool gotLB = FALSE;
for (int32_t i = start; i < end; ++i) {
UChar ch = source[i];
if (ch == LEFT_CURLY_BRACE) {
appendTo += SINGLE_QUOTE;
appendTo += LEFT_CURLY_BRACE;
appendTo += SINGLE_QUOTE;
gotLB = TRUE;
}
else if (ch == RIGHT_CURLY_BRACE) {
if(gotLB) {
appendTo += RIGHT_CURLY_BRACE;
gotLB = FALSE;
}
else {
// orig code.
appendTo += SINGLE_QUOTE;
appendTo += RIGHT_CURLY_BRACE;
appendTo += SINGLE_QUOTE;
}
}
else if (ch == SINGLE_QUOTE) {
appendTo += SINGLE_QUOTE;
appendTo += SINGLE_QUOTE;
}
else {
appendTo += ch;
}
}
}
/**
* Convenience method that ought to be in NumberFormat
*/
NumberFormat*
MessageFormat::createIntegerFormat(const Locale& locale, UErrorCode& status) const {
NumberFormat *temp = NumberFormat::createInstance(locale, status);
if (temp->getDynamicClassID() == DecimalFormat::getStaticClassID()) {
DecimalFormat *temp2 = (DecimalFormat*) temp;
temp2->setMaximumFractionDigits(0);
temp2->setDecimalSeparatorAlwaysShown(FALSE);
temp2->setParseIntegerOnly(TRUE);
}
return temp;
}
/**
* Return the default number format. Used to format a numeric
* argument when subformats[i].format is NULL. Returns NULL
* on failure.
*
* Semantically const but may modify *this.
*/
const NumberFormat* MessageFormat::getDefaultNumberFormat(UErrorCode& ec) const {
if (defaultNumberFormat == NULL) {
MessageFormat* t = (MessageFormat*) this;
t->defaultNumberFormat = NumberFormat::createInstance(fLocale, ec);
if (U_FAILURE(ec)) {
delete t->defaultNumberFormat;
t->defaultNumberFormat = NULL;
} else if (t->defaultNumberFormat == NULL) {
ec = U_MEMORY_ALLOCATION_ERROR;
}
}
return defaultNumberFormat;
}
/**
* Return the default date format. Used to format a date
* argument when subformats[i].format is NULL. Returns NULL
* on failure.
*
* Semantically const but may modify *this.
*/
const DateFormat* MessageFormat::getDefaultDateFormat(UErrorCode& ec) const {
if (defaultDateFormat == NULL) {
MessageFormat* t = (MessageFormat*) this;
t->defaultDateFormat = DateFormat::createDateTimeInstance(DateFormat::kShort, DateFormat::kShort, fLocale);
if (t->defaultDateFormat == NULL) {
ec = U_MEMORY_ALLOCATION_ERROR;
}
}
return defaultDateFormat;
}
Locale
MessageFormat::getLocale(ULocDataLocaleType type, UErrorCode& status) const
{
switch(type) {
case ULOC_VALID_LOCALE:
return fLocale;
break;
case ULOC_ACTUAL_LOCALE:
return fLocale;
break;
default:
status = U_UNSUPPORTED_ERROR;
return Locale("");
break;
}
}
const char*
MessageFormat::getLocaleInternal(ULocDataLocaleType type, UErrorCode &status) const
{
switch(type) {
case ULOC_VALID_LOCALE:
return fLocale.getName();
break;
case ULOC_ACTUAL_LOCALE:
return fLocale.getName();
break;
default:
status = U_UNSUPPORTED_ERROR;
return NULL;
break;
}
}
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_FORMATTING */
//eof