ICU-21060 Fix behaviour of -per-, -and-, and dimensionless units.
This commit is contained in:
parent
b9d1ba87f5
commit
e03fa70541
@ -537,9 +537,9 @@ static const char * const gSubTypes[] = {
|
||||
"solar-mass",
|
||||
"stone",
|
||||
"ton",
|
||||
"one",
|
||||
"percent",
|
||||
"permille",
|
||||
"", // TODO(ICU-21076): manual edit of what should have been generated by Java.
|
||||
"percent", // TODO(ICU-21076): regenerate, deal with duplication.
|
||||
"permille", // TODO(ICU-21076): regenerate, deal with duplication.
|
||||
"gigawatt",
|
||||
"horsepower",
|
||||
"kilowatt",
|
||||
|
@ -34,17 +34,32 @@ namespace {
|
||||
// TODO: Propose a new error code for this?
|
||||
constexpr UErrorCode kUnitIdentifierSyntaxError = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
|
||||
// This is to ensure we only insert positive integers into the trie
|
||||
// Trie value offset for SI Prefixes. This is big enough to ensure we only
|
||||
// insert positive integers into the trie.
|
||||
constexpr int32_t kSIPrefixOffset = 64;
|
||||
|
||||
// Trie value offset for compound parts, e.g. "-per-", "-", "-and-".
|
||||
constexpr int32_t kCompoundPartOffset = 128;
|
||||
|
||||
enum CompoundPart {
|
||||
// Represents "-per-"
|
||||
COMPOUND_PART_PER = kCompoundPartOffset,
|
||||
// Represents "-"
|
||||
COMPOUND_PART_TIMES,
|
||||
// Represents "-and-"
|
||||
COMPOUND_PART_AND,
|
||||
};
|
||||
|
||||
// Trie value offset for "per-".
|
||||
constexpr int32_t kInitialCompoundPartOffset = 192;
|
||||
|
||||
enum InitialCompoundPart {
|
||||
// Represents "per-", the only compound part that can appear at the start of
|
||||
// an identifier.
|
||||
INITIAL_COMPOUND_PART_PER = kInitialCompoundPartOffset,
|
||||
};
|
||||
|
||||
// Trie value offset for powers like "square-", "cubic-", "p2-" etc.
|
||||
constexpr int32_t kPowerPartOffset = 256;
|
||||
|
||||
enum PowerPart {
|
||||
@ -64,6 +79,8 @@ enum PowerPart {
|
||||
POWER_PART_P15,
|
||||
};
|
||||
|
||||
// Trie value offset for simple units, e.g. "gram", "nautical-mile",
|
||||
// "fluid-ounce-imperial".
|
||||
constexpr int32_t kSimpleUnitOffset = 512;
|
||||
|
||||
const struct SIPrefixStrings {
|
||||
@ -94,7 +111,6 @@ const struct SIPrefixStrings {
|
||||
|
||||
// TODO(ICU-21059): Get this list from data
|
||||
const char16_t* const gSimpleUnits[] = {
|
||||
u"one", // note: expected to be index 0
|
||||
u"candela",
|
||||
u"carat",
|
||||
u"gram",
|
||||
@ -227,6 +243,7 @@ void U_CALLCONV initUnitExtras(UErrorCode& status) {
|
||||
b.add(u"-per-", COMPOUND_PART_PER, status);
|
||||
b.add(u"-", COMPOUND_PART_TIMES, status);
|
||||
b.add(u"-and-", COMPOUND_PART_AND, status);
|
||||
b.add(u"per-", INITIAL_COMPOUND_PART_PER, status);
|
||||
b.add(u"square-", POWER_PART_P2, status);
|
||||
b.add(u"cubic-", POWER_PART_P3, status);
|
||||
b.add(u"p2-", POWER_PART_P2, status);
|
||||
@ -270,28 +287,30 @@ public:
|
||||
enum Type {
|
||||
TYPE_UNDEFINED,
|
||||
TYPE_SI_PREFIX,
|
||||
// Token type for "-per-", "-", and "-and-".
|
||||
TYPE_COMPOUND_PART,
|
||||
// Token type for "per-".
|
||||
TYPE_INITIAL_COMPOUND_PART,
|
||||
TYPE_POWER_PART,
|
||||
TYPE_ONE,
|
||||
TYPE_SIMPLE_UNIT,
|
||||
};
|
||||
|
||||
// Calling getType() is invalid, resulting in an assertion failure, if Token
|
||||
// value isn't positive.
|
||||
Type getType() const {
|
||||
if (fMatch <= 0) {
|
||||
UPRV_UNREACHABLE;
|
||||
}
|
||||
U_ASSERT(fMatch > 0);
|
||||
if (fMatch < kCompoundPartOffset) {
|
||||
return TYPE_SI_PREFIX;
|
||||
}
|
||||
if (fMatch < kPowerPartOffset) {
|
||||
if (fMatch < kInitialCompoundPartOffset) {
|
||||
return TYPE_COMPOUND_PART;
|
||||
}
|
||||
if (fMatch < kPowerPartOffset) {
|
||||
return TYPE_INITIAL_COMPOUND_PART;
|
||||
}
|
||||
if (fMatch < kSimpleUnitOffset) {
|
||||
return TYPE_POWER_PART;
|
||||
}
|
||||
if (fMatch == kSimpleUnitOffset) {
|
||||
return TYPE_ONE;
|
||||
}
|
||||
return TYPE_SIMPLE_UNIT;
|
||||
}
|
||||
|
||||
@ -300,11 +319,22 @@ public:
|
||||
return static_cast<UMeasureSIPrefix>(fMatch - kSIPrefixOffset);
|
||||
}
|
||||
|
||||
// Valid only for tokens with type TYPE_COMPOUND_PART.
|
||||
int32_t getMatch() const {
|
||||
U_ASSERT(getType() == TYPE_COMPOUND_PART);
|
||||
return fMatch;
|
||||
}
|
||||
|
||||
int32_t getInitialCompoundPart() const {
|
||||
// Even if there is only one InitialCompoundPart value, we have this
|
||||
// function for the simplicity of code consistency.
|
||||
U_ASSERT(getType() == TYPE_INITIAL_COMPOUND_PART);
|
||||
// Defensive: if this assert fails, code using this function also needs
|
||||
// to change.
|
||||
U_ASSERT(fMatch == INITIAL_COMPOUND_PART_PER);
|
||||
return fMatch;
|
||||
}
|
||||
|
||||
int8_t getPower() const {
|
||||
U_ASSERT(getType() == TYPE_POWER_PART);
|
||||
return static_cast<int8_t>(fMatch - kPowerPartOffset);
|
||||
@ -347,6 +377,7 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
// Tracks parser progress: the offset into fSource.
|
||||
int32_t fIndex = 0;
|
||||
|
||||
// Since we're not owning this memory, whatever is passed to the constructor
|
||||
@ -355,6 +386,9 @@ private:
|
||||
StringPiece fSource;
|
||||
UCharsTrie fTrie;
|
||||
|
||||
// Set to true when we've seen a "-per-" or a "per-", after which all units
|
||||
// are in the denominator. Until we find an "-and-", at which point the
|
||||
// identifier is invalid pending TODO(CLDR-13700).
|
||||
bool fAfterPer = false;
|
||||
|
||||
Parser() : fSource(""), fTrie(u"") {}
|
||||
@ -366,11 +400,17 @@ private:
|
||||
return fIndex < fSource.length();
|
||||
}
|
||||
|
||||
// Returns the next Token parsed from fSource, advancing fIndex to the end
|
||||
// of that token in fSource. In case of U_FAILURE(status), the token
|
||||
// returned will cause an abort if getType() is called on it.
|
||||
Token nextToken(UErrorCode& status) {
|
||||
fTrie.reset();
|
||||
int32_t match = -1;
|
||||
// Saves the position in the fSource string for the end of the most
|
||||
// recent matching token.
|
||||
int32_t previ = -1;
|
||||
do {
|
||||
// Find the longest token that matches a value in the trie:
|
||||
while (fIndex < fSource.length()) {
|
||||
auto result = fTrie.next(fSource.data()[fIndex++]);
|
||||
if (result == USTRINGTRIE_NO_MATCH) {
|
||||
break;
|
||||
@ -385,7 +425,7 @@ private:
|
||||
}
|
||||
U_ASSERT(result == USTRINGTRIE_INTERMEDIATE_VALUE);
|
||||
// continue;
|
||||
} while (fIndex < fSource.length());
|
||||
}
|
||||
|
||||
if (match < 0) {
|
||||
status = kUnitIdentifierSyntaxError;
|
||||
@ -395,63 +435,88 @@ private:
|
||||
return Token(match);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the next "single unit" via result.
|
||||
*
|
||||
* If a "-per-" was parsed, the result will have appropriate negative
|
||||
* dimensionality.
|
||||
*
|
||||
* Returns an error if we parse both compound units and "-and-", since mixed
|
||||
* compound units are not yet supported - TODO(CLDR-13700).
|
||||
*
|
||||
* @param result Will be overwritten by the result, if status shows success.
|
||||
* @param sawAnd If an "-and-" was parsed prior to finding the "single
|
||||
* unit", sawAnd is set to true. If not, it is left as is.
|
||||
* @param status ICU error code.
|
||||
*/
|
||||
void nextSingleUnit(SingleUnitImpl& result, bool& sawAnd, UErrorCode& status) {
|
||||
sawAnd = false;
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!hasNext()) {
|
||||
// probably "one"
|
||||
return;
|
||||
}
|
||||
|
||||
// state:
|
||||
// 0 = no tokens seen yet (will accept power, SI prefix, or simple unit)
|
||||
// 1 = power token seen (will not accept another power token)
|
||||
// 2 = SI prefix token seen (will not accept a power or SI prefix token)
|
||||
int32_t state = 0;
|
||||
|
||||
// Maybe read a compound part
|
||||
if (fIndex != 0) {
|
||||
Token token = nextToken(status);
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
bool atStart = fIndex == 0;
|
||||
Token token = nextToken(status);
|
||||
if (U_FAILURE(status)) { return; }
|
||||
|
||||
if (atStart) {
|
||||
// Identifiers optionally start with "per-".
|
||||
if (token.getType() == Token::TYPE_INITIAL_COMPOUND_PART) {
|
||||
U_ASSERT(token.getInitialCompoundPart() == INITIAL_COMPOUND_PART_PER);
|
||||
fAfterPer = true;
|
||||
result.dimensionality = -1;
|
||||
|
||||
token = nextToken(status);
|
||||
if (U_FAILURE(status)) { return; }
|
||||
}
|
||||
} else {
|
||||
// All other SingleUnit's are separated from previous SingleUnit's
|
||||
// via a compound part:
|
||||
if (token.getType() != Token::TYPE_COMPOUND_PART) {
|
||||
status = kUnitIdentifierSyntaxError;
|
||||
return;
|
||||
}
|
||||
|
||||
switch (token.getMatch()) {
|
||||
case COMPOUND_PART_PER:
|
||||
if (fAfterPer) {
|
||||
status = kUnitIdentifierSyntaxError;
|
||||
return;
|
||||
}
|
||||
fAfterPer = true;
|
||||
case COMPOUND_PART_PER:
|
||||
if (sawAnd) {
|
||||
// Mixed compound units not yet supported,
|
||||
// TODO(CLDR-13700).
|
||||
status = kUnitIdentifierSyntaxError;
|
||||
return;
|
||||
}
|
||||
fAfterPer = true;
|
||||
result.dimensionality = -1;
|
||||
break;
|
||||
|
||||
case COMPOUND_PART_TIMES:
|
||||
if (fAfterPer) {
|
||||
result.dimensionality = -1;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case COMPOUND_PART_TIMES:
|
||||
if (fAfterPer) {
|
||||
result.dimensionality = -1;
|
||||
}
|
||||
break;
|
||||
|
||||
case COMPOUND_PART_AND:
|
||||
sawAnd = true;
|
||||
fAfterPer = false;
|
||||
break;
|
||||
case COMPOUND_PART_AND:
|
||||
if (fAfterPer) {
|
||||
// Can't start with "-and-", and mixed compound units
|
||||
// not yet supported, TODO(CLDR-13700).
|
||||
status = kUnitIdentifierSyntaxError;
|
||||
return;
|
||||
}
|
||||
sawAnd = true;
|
||||
break;
|
||||
}
|
||||
|
||||
token = nextToken(status);
|
||||
if (U_FAILURE(status)) { return; }
|
||||
}
|
||||
|
||||
// Read a unit
|
||||
while (hasNext()) {
|
||||
Token token = nextToken(status);
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Read tokens until we have a complete SingleUnit or we reach the end.
|
||||
while (true) {
|
||||
switch (token.getType()) {
|
||||
case Token::TYPE_POWER_PART:
|
||||
if (state > 0) {
|
||||
@ -471,10 +536,6 @@ private:
|
||||
state = 2;
|
||||
break;
|
||||
|
||||
case Token::TYPE_ONE:
|
||||
// Skip "one" and go to the next unit
|
||||
return nextSingleUnit(result, sawAnd, status);
|
||||
|
||||
case Token::TYPE_SIMPLE_UNIT:
|
||||
result.index = token.getSimpleUnitIndex();
|
||||
return;
|
||||
@ -483,27 +544,38 @@ private:
|
||||
status = kUnitIdentifierSyntaxError;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// We ran out of tokens before finding a complete single unit.
|
||||
status = kUnitIdentifierSyntaxError;
|
||||
if (!hasNext()) {
|
||||
// We ran out of tokens before finding a complete single unit.
|
||||
status = kUnitIdentifierSyntaxError;
|
||||
return;
|
||||
}
|
||||
token = nextToken(status);
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// @param result is modified, not overridden. Caller must pass in a
|
||||
/// default-constructed (empty) MeasureUnitImpl instance.
|
||||
void parseImpl(MeasureUnitImpl& result, UErrorCode& status) {
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
if (fSource.empty()) {
|
||||
// The dimenionless unit: nothing to parse. leave result as is.
|
||||
return;
|
||||
}
|
||||
int32_t unitNum = 0;
|
||||
while (hasNext()) {
|
||||
bool sawAnd;
|
||||
bool sawAnd = false;
|
||||
SingleUnitImpl singleUnit;
|
||||
nextSingleUnit(singleUnit, sawAnd, status);
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
if (singleUnit.index == 0) {
|
||||
continue;
|
||||
}
|
||||
U_ASSERT(!singleUnit.isDimensionless());
|
||||
bool added = result.append(singleUnit, status);
|
||||
if (sawAnd && !added) {
|
||||
// Two similar units are not allowed in a mixed unit
|
||||
@ -511,9 +583,12 @@ private:
|
||||
return;
|
||||
}
|
||||
if ((++unitNum) >= 2) {
|
||||
UMeasureUnitComplexity complexity = sawAnd
|
||||
? UMEASURE_UNIT_MIXED
|
||||
: UMEASURE_UNIT_COMPOUND;
|
||||
// nextSingleUnit fails appropriately for "per" and "and" in the
|
||||
// same identifier. It doesn't fail for other compound units
|
||||
// (COMPOUND_PART_TIMES). Consequently we take care of that
|
||||
// here.
|
||||
UMeasureUnitComplexity complexity =
|
||||
sawAnd ? UMEASURE_UNIT_MIXED : UMEASURE_UNIT_COMPOUND;
|
||||
if (unitNum == 2) {
|
||||
U_ASSERT(result.complexity == UMEASURE_UNIT_SINGLE);
|
||||
result.complexity = complexity;
|
||||
@ -536,15 +611,22 @@ compareSingleUnits(const void* /*context*/, const void* left, const void* right)
|
||||
|
||||
/**
|
||||
* Generate the identifier string for a single unit in place.
|
||||
*
|
||||
* Does not support the dimensionless SingleUnitImpl: calling serializeSingle
|
||||
* with the dimensionless unit results in an U_INTERNAL_PROGRAM_ERROR.
|
||||
*
|
||||
* @param first If singleUnit is part of a compound unit, and not its first
|
||||
* single unit, set this to false. Otherwise: set to true.
|
||||
*/
|
||||
void serializeSingle(const SingleUnitImpl& singleUnit, bool first, CharString& output, UErrorCode& status) {
|
||||
if (first && singleUnit.dimensionality < 0) {
|
||||
output.append("one-per-", status);
|
||||
// Essentially the "unary per". For compound units with a numerator, the
|
||||
// caller takes care of the "binary per".
|
||||
output.append("per-", status);
|
||||
}
|
||||
|
||||
if (singleUnit.index == 0) {
|
||||
// Don't propagate SI prefixes and powers on one
|
||||
output.append("one", status);
|
||||
if (singleUnit.isDimensionless()) {
|
||||
status = U_INTERNAL_PROGRAM_ERROR;
|
||||
return;
|
||||
}
|
||||
int8_t posPower = std::abs(singleUnit.dimensionality);
|
||||
@ -595,7 +677,8 @@ void serialize(MeasureUnitImpl& impl, UErrorCode& status) {
|
||||
}
|
||||
U_ASSERT(impl.identifier.isEmpty());
|
||||
if (impl.units.length() == 0) {
|
||||
impl.identifier.append("one", status);
|
||||
// Dimensionless, constructed by the default constructor: no appending
|
||||
// to impl.identifier, we wish it to contain the zero-length string.
|
||||
return;
|
||||
}
|
||||
if (impl.complexity == UMEASURE_UNIT_COMPOUND) {
|
||||
@ -634,8 +717,17 @@ void serialize(MeasureUnitImpl& impl, UErrorCode& status) {
|
||||
|
||||
}
|
||||
|
||||
/** @return true if a new item was added */
|
||||
/**
|
||||
* Appends a SingleUnitImpl to a MeasureUnitImpl.
|
||||
*
|
||||
* @return true if a new item was added. If unit is the dimensionless unit, it
|
||||
* is never added: the return value will always be false.
|
||||
*/
|
||||
bool appendImpl(MeasureUnitImpl& impl, const SingleUnitImpl& unit, UErrorCode& status) {
|
||||
if (unit.isDimensionless()) {
|
||||
// We don't append dimensionless units.
|
||||
return false;
|
||||
}
|
||||
// Find a similar unit that already exists, to attempt to coalesce
|
||||
SingleUnitImpl* oldUnit = nullptr;
|
||||
for (int32_t i = 0; i < impl.units.length(); i++) {
|
||||
@ -645,6 +737,8 @@ bool appendImpl(MeasureUnitImpl& impl, const SingleUnitImpl& unit, UErrorCode& s
|
||||
}
|
||||
}
|
||||
if (oldUnit) {
|
||||
// Both dimensionalities will be positive, or both will be negative, by
|
||||
// virtue of isCompatibleWith().
|
||||
oldUnit->dimensionality += unit.dimensionality;
|
||||
} else {
|
||||
SingleUnitImpl* destination = impl.units.emplaceBack();
|
||||
@ -744,7 +838,12 @@ MeasureUnit MeasureUnit::withSIPrefix(UMeasureSIPrefix prefix, UErrorCode& statu
|
||||
}
|
||||
|
||||
int32_t MeasureUnit::getDimensionality(UErrorCode& status) const {
|
||||
return SingleUnitImpl::forMeasureUnit(*this, status).dimensionality;
|
||||
SingleUnitImpl singleUnit = SingleUnitImpl::forMeasureUnit(*this, status);
|
||||
if (U_FAILURE(status)) { return 0; }
|
||||
if (singleUnit.isDimensionless()) {
|
||||
return 0;
|
||||
}
|
||||
return singleUnit.dimensionality;
|
||||
}
|
||||
|
||||
MeasureUnit MeasureUnit::withDimensionality(int32_t dimensionality, UErrorCode& status) const {
|
||||
|
@ -25,14 +25,25 @@ static const char kDefaultCurrency8[] = "XXX";
|
||||
struct SingleUnitImpl : public UMemory {
|
||||
/**
|
||||
* Gets a single unit from the MeasureUnit. If there are multiple single units, sets an error
|
||||
* code and return the base dimensionless unit. Parses if necessary.
|
||||
* code and returns the base dimensionless unit. Parses if necessary.
|
||||
*/
|
||||
static SingleUnitImpl forMeasureUnit(const MeasureUnit& measureUnit, UErrorCode& status);
|
||||
|
||||
/** Transform this SingleUnitImpl into a MeasureUnit, simplifying if possible. */
|
||||
MeasureUnit build(UErrorCode& status) const;
|
||||
|
||||
/** Compare this SingleUnitImpl to another SingleUnitImpl. */
|
||||
/**
|
||||
* Compare this SingleUnitImpl to another SingleUnitImpl for the sake of
|
||||
* sorting and coalescing.
|
||||
*
|
||||
* Takes the sign of dimensionality into account, but not the absolute
|
||||
* value: per-meter is not considered the same as meter, but meter is
|
||||
* considered the same as square-meter.
|
||||
*
|
||||
* The dimensionless unit generally does not get compared, but if it did, it
|
||||
* would sort before other units by virtue of index being < 0 and
|
||||
* dimensionality not being negative.
|
||||
*/
|
||||
int32_t compareTo(const SingleUnitImpl& other) const {
|
||||
if (dimensionality < 0 && other.dimensionality > 0) {
|
||||
// Positive dimensions first
|
||||
@ -66,13 +77,36 @@ struct SingleUnitImpl : public UMemory {
|
||||
return (compareTo(other) == 0);
|
||||
}
|
||||
|
||||
/** Simple unit index, unique for every simple unit. */
|
||||
int32_t index = 0;
|
||||
/**
|
||||
* Returns true if this unit is the "dimensionless base unit", as produced
|
||||
* by the MeasureUnit() default constructor. (This does not include the
|
||||
* likes of concentrations or angles.)
|
||||
*/
|
||||
bool isDimensionless() const {
|
||||
return index == -1;
|
||||
}
|
||||
|
||||
/** SI prefix. **/
|
||||
/**
|
||||
* Simple unit index, unique for every simple unit, -1 for the dimensionless
|
||||
* unit. This is an index into a string list in measunit_extra.cpp.
|
||||
*
|
||||
* The default value is -1, meaning the dimensionless unit:
|
||||
* isDimensionless() will return true, until index is changed.
|
||||
*/
|
||||
int32_t index = -1;
|
||||
|
||||
/**
|
||||
* SI prefix.
|
||||
*
|
||||
* This is ignored for the dimensionless unit.
|
||||
*/
|
||||
UMeasureSIPrefix siPrefix = UMEASURE_SI_PREFIX_ONE;
|
||||
|
||||
/** Dimensionality. **/
|
||||
/**
|
||||
* Dimensionality.
|
||||
*
|
||||
* This is meaningless for the dimensionless unit.
|
||||
*/
|
||||
int32_t dimensionality = 1;
|
||||
};
|
||||
|
||||
@ -92,7 +126,8 @@ struct MeasureUnitImpl : public UMemory {
|
||||
*
|
||||
* @param identifier The unit identifier string.
|
||||
* @param status Set if the identifier string is not valid.
|
||||
* @return A newly parsed value object.
|
||||
* @return A newly parsed value object. Behaviour of this unit is
|
||||
* unspecified if an error is returned via status.
|
||||
*/
|
||||
static MeasureUnitImpl forIdentifier(StringPiece identifier, UErrorCode& status);
|
||||
|
||||
@ -145,15 +180,23 @@ struct MeasureUnitImpl : public UMemory {
|
||||
/** Mutates this MeasureUnitImpl to take the reciprocal. */
|
||||
void takeReciprocal(UErrorCode& status);
|
||||
|
||||
/** Mutates this MeasureUnitImpl to append a single unit. */
|
||||
/**
|
||||
* Mutates this MeasureUnitImpl to append a single unit.
|
||||
*
|
||||
* @return true if a new item was added. If unit is the dimensionless unit,
|
||||
* it is never added: the return value will always be false.
|
||||
*/
|
||||
bool append(const SingleUnitImpl& singleUnit, UErrorCode& status);
|
||||
|
||||
/** The complexity, either SINGLE, COMPOUND, or MIXED. */
|
||||
UMeasureUnitComplexity complexity = UMEASURE_UNIT_SINGLE;
|
||||
|
||||
/**
|
||||
* The list of simple units. These may be summed or multiplied, based on the value of the
|
||||
* complexity field.
|
||||
* The list of simple units. These may be summed or multiplied, based on the
|
||||
* value of the complexity field.
|
||||
*
|
||||
* The "dimensionless" unit (SingleUnitImpl default constructor) must not be
|
||||
* added to this list.
|
||||
*/
|
||||
MaybeStackVector<SingleUnitImpl> units;
|
||||
|
||||
|
@ -11,7 +11,7 @@ U_NAMESPACE_BEGIN
|
||||
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(NoUnit)
|
||||
|
||||
NoUnit U_EXPORT2 NoUnit::base() {
|
||||
return NoUnit("one");
|
||||
return NoUnit("");
|
||||
}
|
||||
|
||||
NoUnit U_EXPORT2 NoUnit::percent() {
|
||||
|
@ -37,7 +37,7 @@ struct MeasureUnitImpl;
|
||||
* Enumeration for unit complexity. There are three levels:
|
||||
*
|
||||
* - SINGLE: A single unit, optionally with a power and/or SI prefix. Examples: hectare,
|
||||
* square-kilometer, kilojoule, one-per-second.
|
||||
* square-kilometer, kilojoule, per-second.
|
||||
* - COMPOUND: A unit composed of the product of multiple single units. Examples:
|
||||
* meter-per-second, kilowatt-hour, kilogram-meter-per-square-second.
|
||||
* - MIXED: A unit composed of the sum of multiple single units. Examples: foot+inch,
|
||||
@ -387,6 +387,8 @@ class U_I18N_API MeasureUnit: public UObject {
|
||||
* NOTE: Only works on SINGLE units. If this is a COMPOUND or MIXED unit, an error will
|
||||
* occur. For more information, see UMeasureUnitComplexity.
|
||||
*
|
||||
* For the base dimensionless unit, withDimensionality does nothing.
|
||||
*
|
||||
* @param dimensionality The dimensionality (power).
|
||||
* @param status Set if this is not a SINGLE unit or if another error occurs.
|
||||
* @return A new SINGLE unit.
|
||||
@ -401,6 +403,8 @@ class U_I18N_API MeasureUnit: public UObject {
|
||||
* NOTE: Only works on SINGLE units. If this is a COMPOUND or MIXED unit, an error will
|
||||
* occur. For more information, see UMeasureUnitComplexity.
|
||||
*
|
||||
* For the base dimensionless unit, getDimensionality returns 0.
|
||||
*
|
||||
* @param status Set if this is not a SINGLE unit or if another error occurs.
|
||||
* @return The dimensionality (power) of this simple unit.
|
||||
* @draft ICU 67
|
||||
@ -447,7 +451,7 @@ class U_I18N_API MeasureUnit: public UObject {
|
||||
*
|
||||
* Examples:
|
||||
* - Given "meter-kilogram-per-second", three units will be returned: "meter",
|
||||
* "kilogram", and "one-per-second".
|
||||
* "kilogram", and "per-second".
|
||||
* - Given "hour+minute+second", three units will be returned: "hour", "minute",
|
||||
* and "second".
|
||||
*
|
||||
@ -3375,11 +3379,15 @@ class U_I18N_API MeasureUnit: public UObject {
|
||||
|
||||
private:
|
||||
|
||||
// If non-null, fImpl is owned by the MeasureUnit.
|
||||
// Used by new draft APIs in ICU 67. If non-null, fImpl is owned by the
|
||||
// MeasureUnit.
|
||||
MeasureUnitImpl* fImpl;
|
||||
|
||||
// These two ints are indices into static string lists in measunit.cpp
|
||||
// An index into a static string list in measunit.cpp. If set to -1, fImpl
|
||||
// is in use instead of fTypeId and fSubTypeId.
|
||||
int16_t fSubTypeId;
|
||||
// An index into a static string list in measunit.cpp. If set to -1, fImpl
|
||||
// is in use instead of fTypeId and fSubTypeId.
|
||||
int8_t fTypeId;
|
||||
|
||||
MeasureUnit(int32_t typeId, int32_t subTypeId);
|
||||
@ -3389,7 +3397,11 @@ private:
|
||||
static MeasureUnit *create(int typeId, int subTypeId, UErrorCode &status);
|
||||
|
||||
/**
|
||||
* @return Whether subType is known to ICU.
|
||||
* Sets output's typeId and subTypeId according to subType, if subType is a
|
||||
* valid/known identifier.
|
||||
*
|
||||
* @return Whether subType is known to ICU. If false, output was not
|
||||
* modified.
|
||||
*/
|
||||
static bool findBySubType(StringPiece subType, MeasureUnit* output);
|
||||
|
||||
|
@ -79,9 +79,10 @@ private:
|
||||
void Test20332_PersonUnits();
|
||||
void TestNumericTime();
|
||||
void TestNumericTimeSomeSpecialFormats();
|
||||
void TestIdentifiers();
|
||||
void TestInvalidIdentifiers();
|
||||
void TestCompoundUnitOperations();
|
||||
void TestIdentifiers();
|
||||
void TestDimensionlessBehaviour();
|
||||
void Test21060_AddressSanitizerProblem();
|
||||
|
||||
void verifyFormat(
|
||||
@ -202,9 +203,10 @@ void MeasureFormatTest::runIndexedTest(
|
||||
TESTCASE_AUTO(Test20332_PersonUnits);
|
||||
TESTCASE_AUTO(TestNumericTime);
|
||||
TESTCASE_AUTO(TestNumericTimeSomeSpecialFormats);
|
||||
TESTCASE_AUTO(TestIdentifiers);
|
||||
TESTCASE_AUTO(TestInvalidIdentifiers);
|
||||
TESTCASE_AUTO(TestCompoundUnitOperations);
|
||||
TESTCASE_AUTO(TestIdentifiers);
|
||||
TESTCASE_AUTO(TestDimensionlessBehaviour);
|
||||
TESTCASE_AUTO(Test21060_AddressSanitizerProblem);
|
||||
TESTCASE_AUTO_END;
|
||||
}
|
||||
@ -3239,10 +3241,43 @@ void MeasureFormatTest::TestNumericTimeSomeSpecialFormats() {
|
||||
verifyFormat("Danish fhoursFminutes", fmtDa, fhoursFminutes, 2, "2.03,877");
|
||||
}
|
||||
|
||||
void MeasureFormatTest::TestIdentifiers() {
|
||||
IcuTestErrorCode status(*this, "TestIdentifiers");
|
||||
struct TestCase {
|
||||
const char* id;
|
||||
const char* normalized;
|
||||
} cases[] = {
|
||||
// Correctly normalized identifiers should not change
|
||||
{"", ""},
|
||||
{"square-meter-per-square-meter", "square-meter-per-square-meter"},
|
||||
{"kilogram-meter-per-square-meter-square-second",
|
||||
"kilogram-meter-per-square-meter-square-second"},
|
||||
{"square-mile-and-square-foot", "square-mile-and-square-foot"},
|
||||
{"square-foot-and-square-mile", "square-foot-and-square-mile"},
|
||||
{"per-cubic-centimeter", "per-cubic-centimeter"},
|
||||
{"per-kilometer", "per-kilometer"},
|
||||
|
||||
// Normalization of power and per
|
||||
{"p2-foot-and-p2-mile", "square-foot-and-square-mile"},
|
||||
{"gram-square-gram-per-dekagram", "cubic-gram-per-dekagram"},
|
||||
{"kilogram-per-meter-per-second", "kilogram-per-meter-second"},
|
||||
|
||||
// TODO(ICU-20920): Add more test cases once the proper ranking is available.
|
||||
};
|
||||
for (const auto &cas : cases) {
|
||||
status.setScope(cas.id);
|
||||
MeasureUnit unit = MeasureUnit::forIdentifier(cas.id, status);
|
||||
status.errIfFailureAndReset();
|
||||
const char* actual = unit.getIdentifier();
|
||||
assertEquals(cas.id, cas.normalized, actual);
|
||||
status.errIfFailureAndReset();
|
||||
}
|
||||
}
|
||||
|
||||
void MeasureFormatTest::TestInvalidIdentifiers() {
|
||||
IcuTestErrorCode status(*this, "TestInvalidIdentifiers");
|
||||
|
||||
const char* const inputs[] = {
|
||||
const char *const inputs[] = {
|
||||
"kilo",
|
||||
"kilokilo",
|
||||
"onekilo",
|
||||
@ -3258,7 +3293,23 @@ void MeasureFormatTest::TestInvalidIdentifiers() {
|
||||
"-p2-meter",
|
||||
"+p2-meter",
|
||||
"+",
|
||||
"-"
|
||||
"-",
|
||||
"-mile",
|
||||
"-and-mile",
|
||||
"-per-mile",
|
||||
"one",
|
||||
"one-one",
|
||||
"one-per-mile",
|
||||
"one-per-cubic-centimeter",
|
||||
"square--per-meter",
|
||||
"metersecond", // Must have compound part in between single units
|
||||
|
||||
// Negative powers not supported in mixed units yet. TODO(CLDR-13701).
|
||||
"per-hour-and-hertz",
|
||||
"hertz-and-per-hour",
|
||||
|
||||
// Compound units not supported in mixed units yet. TODO(CLDR-13700).
|
||||
"kilonewton-meter-and-newton-meter",
|
||||
};
|
||||
|
||||
for (const auto& input : inputs) {
|
||||
@ -3295,9 +3346,9 @@ void MeasureFormatTest::TestCompoundUnitOperations() {
|
||||
MeasureUnit overQuarticKilometer1 = kilometer.withDimensionality(-4, status);
|
||||
|
||||
verifySingleUnit(squareMeter, UMEASURE_SI_PREFIX_ONE, 2, "square-meter");
|
||||
verifySingleUnit(overCubicCentimeter, UMEASURE_SI_PREFIX_CENTI, -3, "one-per-cubic-centimeter");
|
||||
verifySingleUnit(overCubicCentimeter, UMEASURE_SI_PREFIX_CENTI, -3, "per-cubic-centimeter");
|
||||
verifySingleUnit(quarticKilometer, UMEASURE_SI_PREFIX_KILO, 4, "p4-kilometer");
|
||||
verifySingleUnit(overQuarticKilometer1, UMEASURE_SI_PREFIX_KILO, -4, "one-per-p4-kilometer");
|
||||
verifySingleUnit(overQuarticKilometer1, UMEASURE_SI_PREFIX_KILO, -4, "per-p4-kilometer");
|
||||
|
||||
assertTrue("power inequality", quarticKilometer != overQuarticKilometer1);
|
||||
|
||||
@ -3310,9 +3361,9 @@ void MeasureFormatTest::TestCompoundUnitOperations() {
|
||||
.reciprocal(status)
|
||||
.withSIPrefix(UMEASURE_SI_PREFIX_KILO, status);
|
||||
|
||||
verifySingleUnit(overQuarticKilometer2, UMEASURE_SI_PREFIX_KILO, -4, "one-per-p4-kilometer");
|
||||
verifySingleUnit(overQuarticKilometer3, UMEASURE_SI_PREFIX_KILO, -4, "one-per-p4-kilometer");
|
||||
verifySingleUnit(overQuarticKilometer4, UMEASURE_SI_PREFIX_KILO, -4, "one-per-p4-kilometer");
|
||||
verifySingleUnit(overQuarticKilometer2, UMEASURE_SI_PREFIX_KILO, -4, "per-p4-kilometer");
|
||||
verifySingleUnit(overQuarticKilometer3, UMEASURE_SI_PREFIX_KILO, -4, "per-p4-kilometer");
|
||||
verifySingleUnit(overQuarticKilometer4, UMEASURE_SI_PREFIX_KILO, -4, "per-p4-kilometer");
|
||||
|
||||
assertTrue("reciprocal equality", overQuarticKilometer1 == overQuarticKilometer2);
|
||||
assertTrue("reciprocal equality", overQuarticKilometer1 == overQuarticKilometer3);
|
||||
@ -3343,7 +3394,7 @@ void MeasureFormatTest::TestCompoundUnitOperations() {
|
||||
const char* secondCentimeterSub[] = {"centimeter", "square-kilosecond"};
|
||||
verifyCompoundUnit(secondCentimeter, "centimeter-square-kilosecond",
|
||||
secondCentimeterSub, UPRV_LENGTHOF(secondCentimeterSub));
|
||||
const char* secondCentimeterPerKilometerSub[] = {"centimeter", "square-kilosecond", "one-per-kilometer"};
|
||||
const char* secondCentimeterPerKilometerSub[] = {"centimeter", "square-kilosecond", "per-kilometer"};
|
||||
verifyCompoundUnit(secondCentimeterPerKilometer, "centimeter-square-kilosecond-per-kilometer",
|
||||
secondCentimeterPerKilometerSub, UPRV_LENGTHOF(secondCentimeterPerKilometerSub));
|
||||
|
||||
@ -3378,31 +3429,16 @@ void MeasureFormatTest::TestCompoundUnitOperations() {
|
||||
|
||||
assertTrue("order matters inequality", footInch != inchFoot);
|
||||
|
||||
MeasureUnit one1;
|
||||
MeasureUnit one2 = MeasureUnit::forIdentifier("one", status);
|
||||
MeasureUnit one3 = MeasureUnit::forIdentifier("", status);
|
||||
MeasureUnit squareOne = one2.withDimensionality(2, status);
|
||||
MeasureUnit onePerOne = one2.reciprocal(status);
|
||||
MeasureUnit squareKiloOne = squareOne.withSIPrefix(UMEASURE_SI_PREFIX_KILO, status);
|
||||
MeasureUnit onePerSquareKiloOne = squareKiloOne.reciprocal(status);
|
||||
MeasureUnit oneOne = MeasureUnit::forIdentifier("one-one", status);
|
||||
MeasureUnit onePlusOne = MeasureUnit::forIdentifier("one-and-one", status);
|
||||
MeasureUnit kilometer2 = one2.product(kilometer, status);
|
||||
MeasureUnit dimensionless;
|
||||
MeasureUnit dimensionless2 = MeasureUnit::forIdentifier("", status);
|
||||
status.errIfFailureAndReset("Dimensionless MeasureUnit.");
|
||||
assertTrue("dimensionless equality", dimensionless == dimensionless2);
|
||||
|
||||
verifySingleUnit(one1, UMEASURE_SI_PREFIX_ONE, 1, "one");
|
||||
verifySingleUnit(one2, UMEASURE_SI_PREFIX_ONE, 1, "one");
|
||||
verifySingleUnit(one3, UMEASURE_SI_PREFIX_ONE, 1, "one");
|
||||
verifySingleUnit(squareOne, UMEASURE_SI_PREFIX_ONE, 1, "one");
|
||||
verifySingleUnit(onePerOne, UMEASURE_SI_PREFIX_ONE, 1, "one");
|
||||
verifySingleUnit(squareKiloOne, UMEASURE_SI_PREFIX_ONE, 1, "one");
|
||||
verifySingleUnit(onePerSquareKiloOne, UMEASURE_SI_PREFIX_ONE, 1, "one");
|
||||
verifySingleUnit(oneOne, UMEASURE_SI_PREFIX_ONE, 1, "one");
|
||||
verifySingleUnit(onePlusOne, UMEASURE_SI_PREFIX_ONE, 1, "one");
|
||||
// We support starting from an "identity" MeasureUnit and then combining it
|
||||
// with others via product:
|
||||
MeasureUnit kilometer2 = dimensionless.product(kilometer, status);
|
||||
status.errIfFailureAndReset("dimensionless.product(kilometer, status)");
|
||||
verifySingleUnit(kilometer2, UMEASURE_SI_PREFIX_KILO, 1, "kilometer");
|
||||
|
||||
assertTrue("one equality", one1 == one2);
|
||||
assertTrue("one equality", one2 == one3);
|
||||
assertTrue("one-per-one equality", onePerOne == onePerSquareKiloOne);
|
||||
assertTrue("kilometer equality", kilometer == kilometer2);
|
||||
|
||||
// Test out-of-range powers
|
||||
@ -3413,49 +3449,81 @@ void MeasureFormatTest::TestCompoundUnitOperations() {
|
||||
status.expectErrorAndReset(U_ILLEGAL_ARGUMENT_ERROR);
|
||||
MeasureUnit power16b = power15.product(kilometer, status);
|
||||
status.expectErrorAndReset(U_ILLEGAL_ARGUMENT_ERROR);
|
||||
MeasureUnit powerN15 = MeasureUnit::forIdentifier("one-per-p15-kilometer", status);
|
||||
verifySingleUnit(powerN15, UMEASURE_SI_PREFIX_KILO, -15, "one-per-p15-kilometer");
|
||||
MeasureUnit powerN15 = MeasureUnit::forIdentifier("per-p15-kilometer", status);
|
||||
verifySingleUnit(powerN15, UMEASURE_SI_PREFIX_KILO, -15, "per-p15-kilometer");
|
||||
status.errIfFailureAndReset();
|
||||
MeasureUnit powerN16a = MeasureUnit::forIdentifier("one-per-p16-kilometer", status);
|
||||
MeasureUnit powerN16a = MeasureUnit::forIdentifier("per-p16-kilometer", status);
|
||||
status.expectErrorAndReset(U_ILLEGAL_ARGUMENT_ERROR);
|
||||
MeasureUnit powerN16b = powerN15.product(overQuarticKilometer1, status);
|
||||
status.expectErrorAndReset(U_ILLEGAL_ARGUMENT_ERROR);
|
||||
}
|
||||
|
||||
void MeasureFormatTest::TestIdentifiers() {
|
||||
IcuTestErrorCode status(*this, "TestIdentifiers");
|
||||
struct TestCase {
|
||||
bool valid;
|
||||
const char* id;
|
||||
const char* normalized;
|
||||
} cases[] = {
|
||||
{true, "square-meter-per-square-meter", "square-meter-per-square-meter"},
|
||||
{true, "kilogram-meter-per-square-meter-square-second",
|
||||
"kilogram-meter-per-square-meter-square-second"},
|
||||
// TODO(ICU-20920): Add more test cases once the proper ranking is available.
|
||||
};
|
||||
for (const auto& cas : cases) {
|
||||
status.setScope(cas.id);
|
||||
MeasureUnit unit = MeasureUnit::forIdentifier(cas.id, status);
|
||||
if (!cas.valid) {
|
||||
status.expectErrorAndReset(U_ILLEGAL_ARGUMENT_ERROR);
|
||||
continue;
|
||||
}
|
||||
const char* actual = unit.getIdentifier();
|
||||
assertEquals(cas.id, cas.normalized, actual);
|
||||
status.errIfFailureAndReset();
|
||||
}
|
||||
void MeasureFormatTest::TestDimensionlessBehaviour() {
|
||||
IcuTestErrorCode status(*this, "TestDimensionlessBehaviour");
|
||||
MeasureUnit dimensionless;
|
||||
MeasureUnit modified;
|
||||
|
||||
// At the time of writing, each of the seven groups below caused
|
||||
// Parser::from("") to be called:
|
||||
|
||||
// splitToSingleUnits
|
||||
int32_t count;
|
||||
LocalArray<MeasureUnit> singles = dimensionless.splitToSingleUnits(count, status);
|
||||
status.errIfFailureAndReset("dimensionless.splitToSingleUnits(...)");
|
||||
assertEquals("no singles in dimensionless", 0, count);
|
||||
|
||||
// product(dimensionless)
|
||||
MeasureUnit mile = MeasureUnit::getMile();
|
||||
mile = mile.product(dimensionless, status);
|
||||
status.errIfFailureAndReset("mile.product(dimensionless, ...)");
|
||||
verifySingleUnit(mile, UMEASURE_SI_PREFIX_ONE, 1, "mile");
|
||||
|
||||
// dimensionless.getSIPrefix()
|
||||
UMeasureSIPrefix siPrefix = dimensionless.getSIPrefix(status);
|
||||
status.errIfFailureAndReset("dimensionless.getSIPrefix(...)");
|
||||
assertEquals("dimensionless SIPrefix", UMEASURE_SI_PREFIX_ONE, siPrefix);
|
||||
|
||||
// dimensionless.withSIPrefix()
|
||||
modified = dimensionless.withSIPrefix(UMEASURE_SI_PREFIX_KILO, status);
|
||||
status.errIfFailureAndReset("dimensionless.withSIPrefix(...)");
|
||||
singles = modified.splitToSingleUnits(count, status);
|
||||
assertEquals("no singles in modified", 0, count);
|
||||
siPrefix = modified.getSIPrefix(status);
|
||||
status.errIfFailureAndReset("modified.getSIPrefix(...)");
|
||||
assertEquals("modified SIPrefix", UMEASURE_SI_PREFIX_ONE, siPrefix);
|
||||
|
||||
// dimensionless.getComplexity()
|
||||
UMeasureUnitComplexity complexity = dimensionless.getComplexity(status);
|
||||
status.errIfFailureAndReset("dimensionless.getComplexity(...)");
|
||||
assertEquals("dimensionless complexity", UMEASURE_UNIT_SINGLE, complexity);
|
||||
|
||||
// Dimensionality is mostly meaningless for dimensionless units, but it's
|
||||
// still considered a SINGLE unit, so this code doesn't throw errors:
|
||||
|
||||
// dimensionless.getDimensionality()
|
||||
int32_t dimensionality = dimensionless.getDimensionality(status);
|
||||
status.errIfFailureAndReset("dimensionless.getDimensionality(...)");
|
||||
assertEquals("dimensionless dimensionality", 0, dimensionality);
|
||||
|
||||
// dimensionless.withDimensionality()
|
||||
dimensionless.withDimensionality(-1, status);
|
||||
status.errIfFailureAndReset("dimensionless.withDimensionality(...)");
|
||||
dimensionality = dimensionless.getDimensionality(status);
|
||||
status.errIfFailureAndReset("dimensionless.getDimensionality(...)");
|
||||
assertEquals("dimensionless dimensionality", 0, dimensionality);
|
||||
}
|
||||
|
||||
// ICU-21060
|
||||
void MeasureFormatTest::Test21060_AddressSanitizerProblem() {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
MeasureUnit first = MeasureUnit::forIdentifier("one", status);
|
||||
IcuTestErrorCode status(*this, "Test21060_AddressSanitizerProblem");
|
||||
|
||||
MeasureUnit first = MeasureUnit::forIdentifier("", status);
|
||||
status.errIfFailureAndReset();
|
||||
|
||||
// Experimentally, a compound unit like "kilogram-meter" failed. A single
|
||||
// unit like "kilogram" or "meter" did not fail, did not trigger the
|
||||
// problem.
|
||||
MeasureUnit crux = MeasureUnit::forIdentifier("one-per-meter", status);
|
||||
MeasureUnit crux = MeasureUnit::forIdentifier("per-meter", status);
|
||||
|
||||
// Heap allocation of a new CharString for first.identifier happens here:
|
||||
first = first.product(crux, status);
|
||||
@ -3469,12 +3537,14 @@ void MeasureFormatTest::Test21060_AddressSanitizerProblem() {
|
||||
first = first.product(crux, status);
|
||||
|
||||
// Proving we've had no failure yet:
|
||||
if (U_FAILURE(status)) return;
|
||||
status.errIfFailureAndReset();
|
||||
|
||||
// heap-use-after-free failure happened here, since a SingleUnitImpl had
|
||||
// held onto a StringPiece pointing at a substring of an identifier that was
|
||||
// freed above:
|
||||
second = second.product(crux, status);
|
||||
|
||||
status.errIfFailureAndReset();
|
||||
}
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user