scuffed-code/icu4c/source/common/simplepatternformatter.cpp
2014-12-05 20:52:28 +00:00

521 lines
16 KiB
C++

/*
******************************************************************************
* Copyright (C) 2014, International Business Machines
* Corporation and others. All Rights Reserved.
******************************************************************************
* simplepatternformatter.cpp
*/
#include "simplepatternformatter.h"
#include "cstring.h"
#include "uassert.h"
U_NAMESPACE_BEGIN
static UBool isInvalidArray(const void *array, int32_t size) {
return (size < 0 || (size > 0 && array == NULL));
}
typedef enum SimplePatternFormatterCompileState {
INIT,
APOSTROPHE,
PLACEHOLDER
} SimplePatternFormatterCompileState;
// Handles parsing placeholders in the pattern string, e.g {4} or {35}
class SimplePatternFormatterIdBuilder {
public:
SimplePatternFormatterIdBuilder() : id(0), idLen(0) { }
~SimplePatternFormatterIdBuilder() { }
// Resets so that this object has seen no placeholder ID.
void reset() { id = 0; idLen = 0; }
// Returns the numeric placeholder ID parsed so far
int32_t getId() const { return id; }
// Appends the numeric placeholder ID parsed so far back to a
// UChar buffer. Used to recover if parser using this object finds
// no closing curly brace.
void appendTo(UChar *buffer, int32_t *len) const;
// Returns true if this object has seen a placeholder ID.
UBool isValid() const { return (idLen > 0); }
// Processes a single digit character. Pattern string parser calls this
// as it processes digits after an opening curly brace.
void add(UChar ch);
private:
int32_t id;
int32_t idLen;
SimplePatternFormatterIdBuilder(
const SimplePatternFormatterIdBuilder &other);
SimplePatternFormatterIdBuilder &operator=(
const SimplePatternFormatterIdBuilder &other);
};
void SimplePatternFormatterIdBuilder::appendTo(
UChar *buffer, int32_t *len) const {
int32_t origLen = *len;
int32_t kId = id;
for (int32_t i = origLen + idLen - 1; i >= origLen; i--) {
int32_t digit = kId % 10;
buffer[i] = digit + 0x30;
kId /= 10;
}
*len = origLen + idLen;
}
void SimplePatternFormatterIdBuilder::add(UChar ch) {
id = id * 10 + (ch - 0x30);
idLen++;
}
// Represents placeholder values.
class SimplePatternFormatterPlaceholderValues : public UMemory {
public:
SimplePatternFormatterPlaceholderValues(
const UnicodeString * const *values,
int32_t valuesCount);
// Returns TRUE if appendTo value is at any index besides exceptIndex.
UBool isAppendToInAnyIndexExcept(
const UnicodeString &appendTo, int32_t exceptIndex) const;
// For each appendTo value, stores the snapshot of it in its place.
void snapshotAppendTo(const UnicodeString &appendTo);
// Returns the placeholder value at index. No range checking performed.
// Returned reference is valid for as long as this object exists.
const UnicodeString &get(int32_t index) const;
private:
const UnicodeString * const *fValues;
int32_t fValuesCount;
const UnicodeString *fAppendTo;
UnicodeString fAppendToCopy;
SimplePatternFormatterPlaceholderValues(
const SimplePatternFormatterPlaceholderValues &);
SimplePatternFormatterPlaceholderValues &operator=(
const SimplePatternFormatterPlaceholderValues &);
};
SimplePatternFormatterPlaceholderValues::SimplePatternFormatterPlaceholderValues(
const UnicodeString * const *values,
int32_t valuesCount)
: fValues(values),
fValuesCount(valuesCount),
fAppendTo(NULL),
fAppendToCopy() {
}
UBool SimplePatternFormatterPlaceholderValues::isAppendToInAnyIndexExcept(
const UnicodeString &appendTo, int32_t exceptIndex) const {
for (int32_t i = 0; i < fValuesCount; ++i) {
if (i != exceptIndex && fValues[i] == &appendTo) {
return TRUE;
}
}
return FALSE;
}
void SimplePatternFormatterPlaceholderValues::snapshotAppendTo(
const UnicodeString &appendTo) {
fAppendTo = &appendTo;
fAppendToCopy = appendTo;
}
const UnicodeString &SimplePatternFormatterPlaceholderValues::get(
int32_t index) const {
if (fAppendTo == NULL || fAppendTo != fValues[index]) {
return *fValues[index];
}
return fAppendToCopy;
}
SimplePatternFormatter::SimplePatternFormatter() :
noPlaceholders(),
placeholders(),
placeholderSize(0),
placeholderCount(0),
firstPlaceholderReused(FALSE) {
}
SimplePatternFormatter::SimplePatternFormatter(const UnicodeString &pattern) :
noPlaceholders(),
placeholders(),
placeholderSize(0),
placeholderCount(0),
firstPlaceholderReused(FALSE) {
UErrorCode status = U_ZERO_ERROR;
compile(pattern, status);
}
SimplePatternFormatter::SimplePatternFormatter(
const SimplePatternFormatter &other) :
noPlaceholders(other.noPlaceholders),
placeholders(),
placeholderSize(0),
placeholderCount(other.placeholderCount),
firstPlaceholderReused(other.firstPlaceholderReused) {
placeholderSize = ensureCapacity(other.placeholderSize);
uprv_memcpy(
placeholders.getAlias(),
other.placeholders.getAlias(),
placeholderSize * sizeof(PlaceholderInfo));
}
SimplePatternFormatter &SimplePatternFormatter::operator=(
const SimplePatternFormatter& other) {
if (this == &other) {
return *this;
}
noPlaceholders = other.noPlaceholders;
placeholderSize = ensureCapacity(other.placeholderSize);
placeholderCount = other.placeholderCount;
firstPlaceholderReused = other.firstPlaceholderReused;
uprv_memcpy(
placeholders.getAlias(),
other.placeholders.getAlias(),
placeholderSize * sizeof(PlaceholderInfo));
return *this;
}
SimplePatternFormatter::~SimplePatternFormatter() {
}
UBool SimplePatternFormatter::compile(
const UnicodeString &pattern, UErrorCode &status) {
if (U_FAILURE(status)) {
return FALSE;
}
const UChar *patternBuffer = pattern.getBuffer();
int32_t patternLength = pattern.length();
UChar *buffer = noPlaceholders.getBuffer(patternLength);
int32_t len = 0;
placeholderSize = 0;
placeholderCount = 0;
SimplePatternFormatterCompileState state = INIT;
SimplePatternFormatterIdBuilder idBuilder;
for (int32_t i = 0; i < patternLength; ++i) {
UChar ch = patternBuffer[i];
switch (state) {
case INIT:
if (ch == 0x27) {
state = APOSTROPHE;
} else if (ch == 0x7B) {
state = PLACEHOLDER;
idBuilder.reset();
} else {
buffer[len++] = ch;
}
break;
case APOSTROPHE:
if (ch == 0x27) {
buffer[len++] = 0x27;
} else if (ch == 0x7B) {
buffer[len++] = 0x7B;
} else {
buffer[len++] = 0x27;
buffer[len++] = ch;
}
state = INIT;
break;
case PLACEHOLDER:
if (ch >= 0x30 && ch <= 0x39) {
idBuilder.add(ch);
} else if (ch == 0x7D && idBuilder.isValid()) {
if (!addPlaceholder(idBuilder.getId(), len)) {
status = U_MEMORY_ALLOCATION_ERROR;
return FALSE;
}
state = INIT;
} else {
buffer[len++] = 0x7B;
idBuilder.appendTo(buffer, &len);
buffer[len++] = ch;
state = INIT;
}
break;
default:
U_ASSERT(FALSE);
break;
}
}
switch (state) {
case INIT:
break;
case APOSTROPHE:
buffer[len++] = 0x27;
break;
case PLACEHOLDER:
buffer[len++] = 0X7B;
idBuilder.appendTo(buffer, &len);
break;
default:
U_ASSERT(false);
break;
}
noPlaceholders.releaseBuffer(len);
return TRUE;
}
UnicodeString& SimplePatternFormatter::format(
const UnicodeString &arg0,
UnicodeString &appendTo,
UErrorCode &status) const {
const UnicodeString *params[] = {&arg0};
return formatAndAppend(
params,
UPRV_LENGTHOF(params),
appendTo,
NULL,
0,
status);
}
UnicodeString& SimplePatternFormatter::format(
const UnicodeString &arg0,
const UnicodeString &arg1,
UnicodeString &appendTo,
UErrorCode &status) const {
const UnicodeString *params[] = {&arg0, &arg1};
return formatAndAppend(
params,
UPRV_LENGTHOF(params),
appendTo,
NULL,
0,
status);
}
UnicodeString& SimplePatternFormatter::format(
const UnicodeString &arg0,
const UnicodeString &arg1,
const UnicodeString &arg2,
UnicodeString &appendTo,
UErrorCode &status) const {
const UnicodeString *params[] = {&arg0, &arg1, &arg2};
return formatAndAppend(
params,
UPRV_LENGTHOF(params),
appendTo,
NULL,
0,
status);
}
static void updatePlaceholderOffset(
int32_t placeholderId,
int32_t placeholderOffset,
int32_t *offsetArray,
int32_t offsetArrayLength) {
if (placeholderId < offsetArrayLength) {
offsetArray[placeholderId] = placeholderOffset;
}
}
static void appendRange(
const UnicodeString &src,
int32_t start,
int32_t end,
UnicodeString &dest) {
// This check improves performance significantly.
if (start == end) {
return;
}
dest.append(src, start, end - start);
}
UnicodeString& SimplePatternFormatter::formatAndAppend(
const UnicodeString * const *placeholderValues,
int32_t placeholderValueCount,
UnicodeString &appendTo,
int32_t *offsetArray,
int32_t offsetArrayLength,
UErrorCode &status) const {
if (U_FAILURE(status)) {
return appendTo;
}
if (isInvalidArray(placeholderValues, placeholderValueCount)
|| isInvalidArray(offsetArray, offsetArrayLength)) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return appendTo;
}
if (placeholderValueCount < placeholderCount) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return appendTo;
}
// Since we are disallowing parameter values that are the same as
// appendTo, we have to check all placeholderValues as opposed to
// the first placeholderCount placeholder values.
SimplePatternFormatterPlaceholderValues values(
placeholderValues, placeholderValueCount);
if (values.isAppendToInAnyIndexExcept(appendTo, -1)) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return appendTo;
}
return formatAndAppend(
values,
appendTo,
offsetArray,
offsetArrayLength);
}
UnicodeString& SimplePatternFormatter::formatAndReplace(
const UnicodeString * const *placeholderValues,
int32_t placeholderValueCount,
UnicodeString &result,
int32_t *offsetArray,
int32_t offsetArrayLength,
UErrorCode &status) const {
if (U_FAILURE(status)) {
return result;
}
if (isInvalidArray(placeholderValues, placeholderValueCount)
|| isInvalidArray(offsetArray, offsetArrayLength)) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return result;
}
if (placeholderValueCount < placeholderCount) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return result;
}
SimplePatternFormatterPlaceholderValues values(
placeholderValues, placeholderCount);
int32_t placeholderAtStart = getUniquePlaceholderAtStart();
// If pattern starts with a unique placeholder and that placeholder
// value is result, we may be able to optimize by just appending to result.
if (placeholderAtStart >= 0
&& placeholderValues[placeholderAtStart] == &result) {
// If result is the value for other placeholders, call off optimization.
if (values.isAppendToInAnyIndexExcept(result, placeholderAtStart)) {
values.snapshotAppendTo(result);
result.remove();
return formatAndAppend(
values,
result,
offsetArray,
offsetArrayLength);
}
// Otherwise we can optimize
formatAndAppend(
values,
result,
offsetArray,
offsetArrayLength);
// We have to make the offset for the placeholderAtStart
// placeholder be 0. Otherwise it would be the length of the
// previous value of result.
if (offsetArrayLength > placeholderAtStart) {
offsetArray[placeholderAtStart] = 0;
}
return result;
}
if (values.isAppendToInAnyIndexExcept(result, -1)) {
values.snapshotAppendTo(result);
}
result.remove();
return formatAndAppend(
values,
result,
offsetArray,
offsetArrayLength);
}
UnicodeString& SimplePatternFormatter::formatAndAppend(
const SimplePatternFormatterPlaceholderValues &values,
UnicodeString &appendTo,
int32_t *offsetArray,
int32_t offsetArrayLength) const {
for (int32_t i = 0; i < offsetArrayLength; ++i) {
offsetArray[i] = -1;
}
if (placeholderSize == 0) {
appendTo.append(noPlaceholders);
return appendTo;
}
appendRange(
noPlaceholders,
0,
placeholders[0].offset,
appendTo);
updatePlaceholderOffset(
placeholders[0].id,
appendTo.length(),
offsetArray,
offsetArrayLength);
const UnicodeString *placeholderValue = &values.get(placeholders[0].id);
if (placeholderValue != &appendTo) {
appendTo.append(*placeholderValue);
}
for (int32_t i = 1; i < placeholderSize; ++i) {
appendRange(
noPlaceholders,
placeholders[i - 1].offset,
placeholders[i].offset,
appendTo);
updatePlaceholderOffset(
placeholders[i].id,
appendTo.length(),
offsetArray,
offsetArrayLength);
placeholderValue = &values.get(placeholders[i].id);
if (placeholderValue != &appendTo) {
appendTo.append(*placeholderValue);
}
}
appendRange(
noPlaceholders,
placeholders[placeholderSize - 1].offset,
noPlaceholders.length(),
appendTo);
return appendTo;
}
int32_t SimplePatternFormatter::getUniquePlaceholderAtStart() const {
if (placeholderSize == 0
|| firstPlaceholderReused || placeholders[0].offset != 0) {
return -1;
}
return placeholders[0].id;
}
int32_t SimplePatternFormatter::ensureCapacity(
int32_t desiredCapacity, int32_t allocationSize) {
if (allocationSize < desiredCapacity) {
allocationSize = desiredCapacity;
}
if (desiredCapacity <= placeholders.getCapacity()) {
return desiredCapacity;
}
// allocate new buffer
if (placeholders.resize(allocationSize, placeholderSize) == NULL) {
return placeholders.getCapacity();
}
return desiredCapacity;
}
UBool SimplePatternFormatter::addPlaceholder(int32_t id, int32_t offset) {
if (ensureCapacity(placeholderSize + 1, 2 * placeholderSize) < placeholderSize + 1) {
return FALSE;
}
++placeholderSize;
PlaceholderInfo *placeholderEnd = &placeholders[placeholderSize - 1];
placeholderEnd->offset = offset;
placeholderEnd->id = id;
if (id >= placeholderCount) {
placeholderCount = id + 1;
}
if (placeholderSize > 1
&& placeholders[placeholderSize - 1].id == placeholders[0].id) {
firstPlaceholderReused = TRUE;
}
return TRUE;
}
U_NAMESPACE_END