/* ******************************************************************************* * Copyright (C) 1996-2001, International Business Machines * Corporation and others. All Rights Reserved. ******************************************************************************* */ #include "unicode/umsg.h" #include "mutex.h" #include "unicode/uloc.h" #include "unicode/ustring.h" #include "unicode/fmtable.h" #include "cpputils.h" #include "unicode/msgfmt.h" #include "unicode/unistr.h" #include "unicode/numfmt.h" // MessageFormat Type List Number, Date, Time or Choice const UnicodeString fgTypeList[] = { UnicodeString(), UnicodeString(), UNICODE_STRING("number", 6), UnicodeString(), UNICODE_STRING("date", 4), UnicodeString(), UNICODE_STRING("time", 4), UnicodeString(), UNICODE_STRING("choice", 6) }; // NumberFormat modifier list, default, currency, percent or integer const UnicodeString fgModifierList[] = { UnicodeString(), UnicodeString(), UNICODE_STRING("currency", 8), UnicodeString(), UNICODE_STRING("percent", 7), UnicodeString(), UNICODE_STRING("integer", 7), UnicodeString(), UnicodeString() }; // DateFormat modifier list, default, short, medium, long or full const UnicodeString fgDateModifierList[] = { UnicodeString(), UnicodeString(), UNICODE_STRING("short", 5), UnicodeString(), UNICODE_STRING("medium", 6), UnicodeString(), UNICODE_STRING("long", 4), UnicodeString(), UNICODE_STRING("full", 4) }; // Number of items in the lists const int32_t fgListLength = 9; // Determine if a keyword belongs to a list of keywords int32_t findKeyword(const UnicodeString& s, const UnicodeString *list, int32_t& kwLen) { UnicodeString buffer = s; // Trims the space characters and turns all characters // in s to lower case. buffer.trim().toLower(); for(int32_t i = 0; i < fgListLength; ++i) { // Determine if there is a ',' // If so, the string contains a modifier, and we only want to // parse the type int32_t commaPos = buffer.indexOf((UChar)0x002C); commaPos = (commaPos == -1 ? buffer.length() : commaPos); buffer.truncate(commaPos); if(buffer == list[i]) { kwLen = list[i].length(); return i; } } kwLen = 0; return - 1; } // Match the type of argument in a message format pattern // The type consists of a type indicator and an optional modifier // Possible types : number, date, time, choice // Possible modifiers : currency, percent, integer, full, long, short // We only worry about parsing the types and the "integer" modifier Formattable::Type matchType(const UChar *pat, int32_t openBrace, int32_t closeBrace) { int32_t len = (closeBrace - openBrace) - 1; Formattable::Type result = Formattable::kString; // Strings like "{0}" are strings if(len == 1) { result = Formattable::kString; return result; } // Assume the input is well-formed else { UnicodeString type((UChar*)pat + openBrace + 1 + 2, len - 2, len - 2); int32_t matchLen, kw; kw = findKeyword(type, fgTypeList, matchLen); // there is a modifier if type contains a ',' UBool hasModifier = (type.indexOf((UChar)0x002C) != -1); switch(kw) { // number case 1: case 2: result = Formattable::kDouble; if(hasModifier) { UnicodeString modifier((UChar*)pat + openBrace + 1 + 1 + 2 + matchLen, len - 2 - matchLen - 1, len - 2 - matchLen - 1); switch(findKeyword(modifier, fgModifierList, matchLen)) { // integer case 5: case 6: result = Formattable::kLong; break; } } break; // date case 3: case 4: // time case 5: case 6: result = Formattable::kDate; break; // choice case 7: case 8: result = Formattable::kDouble; break; } } return result; } // ========== // This code section is entirely bogus. I just need an eeasy way to // convert from string to an int, and I can't use the standard library static NumberFormat *fgNumberFormat = 0; NumberFormat* umsg_getNumberFormat(UErrorCode& status) { NumberFormat *theFormat = 0; if(fgNumberFormat != 0) { Mutex lock; if(fgNumberFormat != 0) { theFormat = fgNumberFormat; fgNumberFormat = 0; // We have exclusive right to this formatter. } } if(theFormat == 0) { theFormat = NumberFormat::createInstance(Locale::US, status); if(U_FAILURE(status)) return 0; theFormat->setParseIntegerOnly(TRUE); } return theFormat; } void umsg_releaseNumberFormat(NumberFormat *adopt) { if(fgNumberFormat == 0) { Mutex lock; if(fgNumberFormat == 0) { fgNumberFormat = adopt; adopt = 0; } } delete adopt; } int32_t umsg_stoi(const UnicodeString& string, UErrorCode& status) { NumberFormat *myFormat = umsg_getNumberFormat(status); if(U_FAILURE(status)) return -1; // OK? Formattable result; // Uses the global number formatter to parse the string. // Note: We assume here that parse() is thread-safe. myFormat->parse(string, result, status); umsg_releaseNumberFormat(myFormat); int32_t value = 0; if(U_SUCCESS(status) && result.getType() == Formattable::kLong) value = result.getLong(); return value; } UnicodeString& umsg_itos(int32_t i, UnicodeString& string) { UErrorCode status = U_ZERO_ERROR; NumberFormat *myFormat = umsg_getNumberFormat(status); if(U_FAILURE(status)) return (string = ""); myFormat->format(i, string); umsg_releaseNumberFormat(myFormat); return string; } // ========== #define MAX_ARGS 10 // Eventually, message format should be rewritten natively in C. // For now, this is a hack that should work: // 1. Parse the pattern, determining the argument types // 2. Create a Formattable array with the varargs // 3. Call through to the existing C++ code // // Right now this imposes the same limit as MessageFormat in C++ // Namely, only MAX_ARGS arguments are supported U_CAPI int32_t u_formatMessage( const char *locale, const UChar *pattern, int32_t patternLength, UChar *result, int32_t resultLength, UErrorCode *status, ...) { va_list ap; int32_t actLen; if(U_FAILURE(*status)) return -1; // start vararg processing va_start(ap, status); actLen = u_vformatMessage(locale,pattern,patternLength,result,resultLength,ap,status); // end vararg processing va_end(ap); return actLen; } U_CAPI int32_t u_vformatMessage( const char *locale, const UChar *pattern, int32_t patternLength, UChar *result, int32_t resultLength, va_list ap, UErrorCode *status) { if(U_FAILURE(*status)) return -1; int32_t patLen = (patternLength == -1 ? u_strlen(pattern) : patternLength); // ======================================== // Begin pseudo-parser // This is a simplified version of the C++ pattern parser // All it does is look for an unquoted '{' and read the type int32_t part = 0; UBool inQuote = FALSE; int32_t braceStack = 0; const UChar *pat = pattern; const UChar *patLimit = pattern + patLen; int32_t bracePos = 0; int32_t count = 0; Formattable args [ MAX_ARGS ]; Formattable::Type argTypes [ MAX_ARGS ]; // set the types to a bogus value initially (no such type as kArray from C) for(int32_t j = 0; j < MAX_ARGS; ++j) argTypes[j] = Formattable::kArray; // pseudo-parse the pattern while(pat < patLimit) { if(part == 0) { if(*pat == 0x0027 /*'\''*/) { // handle double quotes if( (pat + 1) < patLimit && *(pat + 1) == 0x0027 /*'\''*/) pat++; else inQuote = ! inQuote; } else if(*pat == 0x007B /*'{'*/ && ! inQuote) { part = 1; bracePos = (pat - pattern); } } else if(inQuote) { // just copy quotes in parts if(*pat == 0x0027 /*'\''*/) inQuote = FALSE; } else { switch (*pat) { case 0x002C /*','*/: if(part < 3) part += 1; break; case 0x007B /*'{'*/: ++braceStack; break; case 0x007D /*'}'*/: if(braceStack == 0) { part = 0; // found a close brace, determine the argument type enclosed // and the numeric ID of the argument Formattable::Type type = matchType(pattern, bracePos, (pat - pattern)); // the numeric ID is important, because if the pattern has a // section like "{0} {0} {0}" we only want to get one argument // from the variable argument list, despite the fact that // it is in the pattern three times int32_t argNum = umsg_stoi(pattern + bracePos + 1, *status); if(argNum >= MAX_ARGS) { *status = U_INTERNAL_PROGRAM_ERROR; return -1; } // register the type of this argument in our list argTypes[argNum] = type; // adjust argument count count = ( (argNum + 1) > count ? (argNum + 1) : count); } else --braceStack; break; case 0x0027 /*'\''*/: inQuote = TRUE; break; } } // increment position in pattern pat++; } // detect any unmatched braces in the pattern if(braceStack == 0 && part != 0) { *status = U_INVALID_FORMAT_ERROR; return -1; } // iterate through the vararg list, and get the arguments out for(int32_t i = 0; i < count; ++i) { UChar *stringVal; switch(argTypes[i]) { case Formattable::kDate: args[i].setDate(va_arg(ap, UDate)); break; case Formattable::kDouble: args[i].setDouble(va_arg(ap, double)); break; case Formattable::kLong: args[i].setLong(va_arg(ap, int32_t)); break; case Formattable::kString: // For some reason, a temporary is needed stringVal = va_arg(ap, UChar*); args[i].setString(stringVal); break; case Formattable::kArray: // throw away this argument // this is highly platform-dependent, and probably won't work // so, if you try to skip arguments in the list (and not use them) // you'll probably crash va_arg(ap, int); break; } } // End pseudo-parser // ======================================== // just call through to the C++ implementation UnicodeString patString((UChar*)pattern, patLen, patLen); MessageFormat fmt(patString, Locale(locale), *status); UnicodeString res(result, 0, resultLength); FieldPosition fp; fmt.format(args, count, res, fp, *status); return uprv_fillOutputString(res, result, resultLength, status); } // For parse, do the reverse of format: // 1. Call through to the C++ APIs // 2. Just assume the user passed in enough arguments. // 3. Iterate through each formattable returned, and assign to the arguments U_CAPI void u_parseMessage( const char *locale, const UChar *pattern, int32_t patternLength, const UChar *source, int32_t sourceLength, UErrorCode *status, ...) { va_list ap; if(U_FAILURE(*status)) return; // start vararg processing va_start(ap, status); u_vparseMessage(locale,pattern,patternLength,source,sourceLength,ap,status); // end vararg processing va_end(ap); } U_CAPI void u_vparseMessage( const char *locale, const UChar *pattern, int32_t patternLength, const UChar *source, int32_t sourceLength, va_list ap, UErrorCode *status) { if(U_FAILURE(*status)) return; int32_t patLen = (patternLength == -1 ? u_strlen(pattern) : patternLength); int32_t srcLen = (sourceLength == -1 ? u_strlen(source) : sourceLength); UnicodeString patString((UChar*)pattern, patLen, patLen); MessageFormat fmt(patString, Locale(locale), *status); UnicodeString srcString((UChar*)source, srcLen, srcLen); int32_t count = 0; Formattable *args = fmt.parse(srcString, count, *status); UDate *aDate; double *aDouble; UChar *aString; UnicodeString temp; // assign formattables to varargs for(int32_t i = 0; i < count; i++) { switch(args[i].getType()) { case Formattable::kDate: aDate = va_arg(ap, UDate*); *aDate = args[i].getDate(); break; case Formattable::kDouble: aDouble = va_arg(ap, double*); *aDouble = args[i].getDouble(); break; case Formattable::kLong: // always assume doubles for parsing aDouble = va_arg(ap, double*); *aDouble = (double) args[i].getLong(); break; case Formattable::kString: aString = va_arg(ap, UChar*); args[i].getString(temp); u_strcpy(aString, temp.getUChars()); break; // better not happen! case Formattable::kArray: // DIE break; } } // clean up delete [] args; }