ICU-21060 Fix behaviour of -per-, -and-, and dimensionless units.

This commit is contained in:
Hugo van der Merwe 2020-04-18 01:24:20 +02:00 committed by Shane F. Carr
parent b9d1ba87f5
commit e03fa70541
6 changed files with 375 additions and 151 deletions

View File

@ -537,9 +537,9 @@ static const char * const gSubTypes[] = {
"solar-mass",
"stone",
"ton",
"one",
"percent",
"permille",
"", // TODO(ICU-21076): manual edit of what should have been generated by Java.
"percent", // TODO(ICU-21076): regenerate, deal with duplication.
"permille", // TODO(ICU-21076): regenerate, deal with duplication.
"gigawatt",
"horsepower",
"kilowatt",

View File

@ -34,17 +34,32 @@ namespace {
// TODO: Propose a new error code for this?
constexpr UErrorCode kUnitIdentifierSyntaxError = U_ILLEGAL_ARGUMENT_ERROR;
// This is to ensure we only insert positive integers into the trie
// Trie value offset for SI Prefixes. This is big enough to ensure we only
// insert positive integers into the trie.
constexpr int32_t kSIPrefixOffset = 64;
// Trie value offset for compound parts, e.g. "-per-", "-", "-and-".
constexpr int32_t kCompoundPartOffset = 128;
enum CompoundPart {
// Represents "-per-"
COMPOUND_PART_PER = kCompoundPartOffset,
// Represents "-"
COMPOUND_PART_TIMES,
// Represents "-and-"
COMPOUND_PART_AND,
};
// Trie value offset for "per-".
constexpr int32_t kInitialCompoundPartOffset = 192;
enum InitialCompoundPart {
// Represents "per-", the only compound part that can appear at the start of
// an identifier.
INITIAL_COMPOUND_PART_PER = kInitialCompoundPartOffset,
};
// Trie value offset for powers like "square-", "cubic-", "p2-" etc.
constexpr int32_t kPowerPartOffset = 256;
enum PowerPart {
@ -64,6 +79,8 @@ enum PowerPart {
POWER_PART_P15,
};
// Trie value offset for simple units, e.g. "gram", "nautical-mile",
// "fluid-ounce-imperial".
constexpr int32_t kSimpleUnitOffset = 512;
const struct SIPrefixStrings {
@ -94,7 +111,6 @@ const struct SIPrefixStrings {
// TODO(ICU-21059): Get this list from data
const char16_t* const gSimpleUnits[] = {
u"one", // note: expected to be index 0
u"candela",
u"carat",
u"gram",
@ -227,6 +243,7 @@ void U_CALLCONV initUnitExtras(UErrorCode& status) {
b.add(u"-per-", COMPOUND_PART_PER, status);
b.add(u"-", COMPOUND_PART_TIMES, status);
b.add(u"-and-", COMPOUND_PART_AND, status);
b.add(u"per-", INITIAL_COMPOUND_PART_PER, status);
b.add(u"square-", POWER_PART_P2, status);
b.add(u"cubic-", POWER_PART_P3, status);
b.add(u"p2-", POWER_PART_P2, status);
@ -270,28 +287,30 @@ public:
enum Type {
TYPE_UNDEFINED,
TYPE_SI_PREFIX,
// Token type for "-per-", "-", and "-and-".
TYPE_COMPOUND_PART,
// Token type for "per-".
TYPE_INITIAL_COMPOUND_PART,
TYPE_POWER_PART,
TYPE_ONE,
TYPE_SIMPLE_UNIT,
};
// Calling getType() is invalid, resulting in an assertion failure, if Token
// value isn't positive.
Type getType() const {
if (fMatch <= 0) {
UPRV_UNREACHABLE;
}
U_ASSERT(fMatch > 0);
if (fMatch < kCompoundPartOffset) {
return TYPE_SI_PREFIX;
}
if (fMatch < kPowerPartOffset) {
if (fMatch < kInitialCompoundPartOffset) {
return TYPE_COMPOUND_PART;
}
if (fMatch < kPowerPartOffset) {
return TYPE_INITIAL_COMPOUND_PART;
}
if (fMatch < kSimpleUnitOffset) {
return TYPE_POWER_PART;
}
if (fMatch == kSimpleUnitOffset) {
return TYPE_ONE;
}
return TYPE_SIMPLE_UNIT;
}
@ -300,11 +319,22 @@ public:
return static_cast<UMeasureSIPrefix>(fMatch - kSIPrefixOffset);
}
// Valid only for tokens with type TYPE_COMPOUND_PART.
int32_t getMatch() const {
U_ASSERT(getType() == TYPE_COMPOUND_PART);
return fMatch;
}
int32_t getInitialCompoundPart() const {
// Even if there is only one InitialCompoundPart value, we have this
// function for the simplicity of code consistency.
U_ASSERT(getType() == TYPE_INITIAL_COMPOUND_PART);
// Defensive: if this assert fails, code using this function also needs
// to change.
U_ASSERT(fMatch == INITIAL_COMPOUND_PART_PER);
return fMatch;
}
int8_t getPower() const {
U_ASSERT(getType() == TYPE_POWER_PART);
return static_cast<int8_t>(fMatch - kPowerPartOffset);
@ -347,6 +377,7 @@ public:
}
private:
// Tracks parser progress: the offset into fSource.
int32_t fIndex = 0;
// Since we're not owning this memory, whatever is passed to the constructor
@ -355,6 +386,9 @@ private:
StringPiece fSource;
UCharsTrie fTrie;
// Set to true when we've seen a "-per-" or a "per-", after which all units
// are in the denominator. Until we find an "-and-", at which point the
// identifier is invalid pending TODO(CLDR-13700).
bool fAfterPer = false;
Parser() : fSource(""), fTrie(u"") {}
@ -366,11 +400,17 @@ private:
return fIndex < fSource.length();
}
// Returns the next Token parsed from fSource, advancing fIndex to the end
// of that token in fSource. In case of U_FAILURE(status), the token
// returned will cause an abort if getType() is called on it.
Token nextToken(UErrorCode& status) {
fTrie.reset();
int32_t match = -1;
// Saves the position in the fSource string for the end of the most
// recent matching token.
int32_t previ = -1;
do {
// Find the longest token that matches a value in the trie:
while (fIndex < fSource.length()) {
auto result = fTrie.next(fSource.data()[fIndex++]);
if (result == USTRINGTRIE_NO_MATCH) {
break;
@ -385,7 +425,7 @@ private:
}
U_ASSERT(result == USTRINGTRIE_INTERMEDIATE_VALUE);
// continue;
} while (fIndex < fSource.length());
}
if (match < 0) {
status = kUnitIdentifierSyntaxError;
@ -395,63 +435,88 @@ private:
return Token(match);
}
/**
* Returns the next "single unit" via result.
*
* If a "-per-" was parsed, the result will have appropriate negative
* dimensionality.
*
* Returns an error if we parse both compound units and "-and-", since mixed
* compound units are not yet supported - TODO(CLDR-13700).
*
* @param result Will be overwritten by the result, if status shows success.
* @param sawAnd If an "-and-" was parsed prior to finding the "single
* unit", sawAnd is set to true. If not, it is left as is.
* @param status ICU error code.
*/
void nextSingleUnit(SingleUnitImpl& result, bool& sawAnd, UErrorCode& status) {
sawAnd = false;
if (U_FAILURE(status)) {
return;
}
if (!hasNext()) {
// probably "one"
return;
}
// state:
// 0 = no tokens seen yet (will accept power, SI prefix, or simple unit)
// 1 = power token seen (will not accept another power token)
// 2 = SI prefix token seen (will not accept a power or SI prefix token)
int32_t state = 0;
// Maybe read a compound part
if (fIndex != 0) {
Token token = nextToken(status);
if (U_FAILURE(status)) {
return;
bool atStart = fIndex == 0;
Token token = nextToken(status);
if (U_FAILURE(status)) { return; }
if (atStart) {
// Identifiers optionally start with "per-".
if (token.getType() == Token::TYPE_INITIAL_COMPOUND_PART) {
U_ASSERT(token.getInitialCompoundPart() == INITIAL_COMPOUND_PART_PER);
fAfterPer = true;
result.dimensionality = -1;
token = nextToken(status);
if (U_FAILURE(status)) { return; }
}
} else {
// All other SingleUnit's are separated from previous SingleUnit's
// via a compound part:
if (token.getType() != Token::TYPE_COMPOUND_PART) {
status = kUnitIdentifierSyntaxError;
return;
}
switch (token.getMatch()) {
case COMPOUND_PART_PER:
if (fAfterPer) {
status = kUnitIdentifierSyntaxError;
return;
}
fAfterPer = true;
case COMPOUND_PART_PER:
if (sawAnd) {
// Mixed compound units not yet supported,
// TODO(CLDR-13700).
status = kUnitIdentifierSyntaxError;
return;
}
fAfterPer = true;
result.dimensionality = -1;
break;
case COMPOUND_PART_TIMES:
if (fAfterPer) {
result.dimensionality = -1;
break;
}
break;
case COMPOUND_PART_TIMES:
if (fAfterPer) {
result.dimensionality = -1;
}
break;
case COMPOUND_PART_AND:
sawAnd = true;
fAfterPer = false;
break;
case COMPOUND_PART_AND:
if (fAfterPer) {
// Can't start with "-and-", and mixed compound units
// not yet supported, TODO(CLDR-13700).
status = kUnitIdentifierSyntaxError;
return;
}
sawAnd = true;
break;
}
token = nextToken(status);
if (U_FAILURE(status)) { return; }
}
// Read a unit
while (hasNext()) {
Token token = nextToken(status);
if (U_FAILURE(status)) {
return;
}
// Read tokens until we have a complete SingleUnit or we reach the end.
while (true) {
switch (token.getType()) {
case Token::TYPE_POWER_PART:
if (state > 0) {
@ -471,10 +536,6 @@ private:
state = 2;
break;
case Token::TYPE_ONE:
// Skip "one" and go to the next unit
return nextSingleUnit(result, sawAnd, status);
case Token::TYPE_SIMPLE_UNIT:
result.index = token.getSimpleUnitIndex();
return;
@ -483,27 +544,38 @@ private:
status = kUnitIdentifierSyntaxError;
return;
}
}
// We ran out of tokens before finding a complete single unit.
status = kUnitIdentifierSyntaxError;
if (!hasNext()) {
// We ran out of tokens before finding a complete single unit.
status = kUnitIdentifierSyntaxError;
return;
}
token = nextToken(status);
if (U_FAILURE(status)) {
return;
}
}
}
/// @param result is modified, not overridden. Caller must pass in a
/// default-constructed (empty) MeasureUnitImpl instance.
void parseImpl(MeasureUnitImpl& result, UErrorCode& status) {
if (U_FAILURE(status)) {
return;
}
if (fSource.empty()) {
// The dimenionless unit: nothing to parse. leave result as is.
return;
}
int32_t unitNum = 0;
while (hasNext()) {
bool sawAnd;
bool sawAnd = false;
SingleUnitImpl singleUnit;
nextSingleUnit(singleUnit, sawAnd, status);
if (U_FAILURE(status)) {
return;
}
if (singleUnit.index == 0) {
continue;
}
U_ASSERT(!singleUnit.isDimensionless());
bool added = result.append(singleUnit, status);
if (sawAnd && !added) {
// Two similar units are not allowed in a mixed unit
@ -511,9 +583,12 @@ private:
return;
}
if ((++unitNum) >= 2) {
UMeasureUnitComplexity complexity = sawAnd
? UMEASURE_UNIT_MIXED
: UMEASURE_UNIT_COMPOUND;
// nextSingleUnit fails appropriately for "per" and "and" in the
// same identifier. It doesn't fail for other compound units
// (COMPOUND_PART_TIMES). Consequently we take care of that
// here.
UMeasureUnitComplexity complexity =
sawAnd ? UMEASURE_UNIT_MIXED : UMEASURE_UNIT_COMPOUND;
if (unitNum == 2) {
U_ASSERT(result.complexity == UMEASURE_UNIT_SINGLE);
result.complexity = complexity;
@ -536,15 +611,22 @@ compareSingleUnits(const void* /*context*/, const void* left, const void* right)
/**
* Generate the identifier string for a single unit in place.
*
* Does not support the dimensionless SingleUnitImpl: calling serializeSingle
* with the dimensionless unit results in an U_INTERNAL_PROGRAM_ERROR.
*
* @param first If singleUnit is part of a compound unit, and not its first
* single unit, set this to false. Otherwise: set to true.
*/
void serializeSingle(const SingleUnitImpl& singleUnit, bool first, CharString& output, UErrorCode& status) {
if (first && singleUnit.dimensionality < 0) {
output.append("one-per-", status);
// Essentially the "unary per". For compound units with a numerator, the
// caller takes care of the "binary per".
output.append("per-", status);
}
if (singleUnit.index == 0) {
// Don't propagate SI prefixes and powers on one
output.append("one", status);
if (singleUnit.isDimensionless()) {
status = U_INTERNAL_PROGRAM_ERROR;
return;
}
int8_t posPower = std::abs(singleUnit.dimensionality);
@ -595,7 +677,8 @@ void serialize(MeasureUnitImpl& impl, UErrorCode& status) {
}
U_ASSERT(impl.identifier.isEmpty());
if (impl.units.length() == 0) {
impl.identifier.append("one", status);
// Dimensionless, constructed by the default constructor: no appending
// to impl.identifier, we wish it to contain the zero-length string.
return;
}
if (impl.complexity == UMEASURE_UNIT_COMPOUND) {
@ -634,8 +717,17 @@ void serialize(MeasureUnitImpl& impl, UErrorCode& status) {
}
/** @return true if a new item was added */
/**
* Appends a SingleUnitImpl to a MeasureUnitImpl.
*
* @return true if a new item was added. If unit is the dimensionless unit, it
* is never added: the return value will always be false.
*/
bool appendImpl(MeasureUnitImpl& impl, const SingleUnitImpl& unit, UErrorCode& status) {
if (unit.isDimensionless()) {
// We don't append dimensionless units.
return false;
}
// Find a similar unit that already exists, to attempt to coalesce
SingleUnitImpl* oldUnit = nullptr;
for (int32_t i = 0; i < impl.units.length(); i++) {
@ -645,6 +737,8 @@ bool appendImpl(MeasureUnitImpl& impl, const SingleUnitImpl& unit, UErrorCode& s
}
}
if (oldUnit) {
// Both dimensionalities will be positive, or both will be negative, by
// virtue of isCompatibleWith().
oldUnit->dimensionality += unit.dimensionality;
} else {
SingleUnitImpl* destination = impl.units.emplaceBack();
@ -744,7 +838,12 @@ MeasureUnit MeasureUnit::withSIPrefix(UMeasureSIPrefix prefix, UErrorCode& statu
}
int32_t MeasureUnit::getDimensionality(UErrorCode& status) const {
return SingleUnitImpl::forMeasureUnit(*this, status).dimensionality;
SingleUnitImpl singleUnit = SingleUnitImpl::forMeasureUnit(*this, status);
if (U_FAILURE(status)) { return 0; }
if (singleUnit.isDimensionless()) {
return 0;
}
return singleUnit.dimensionality;
}
MeasureUnit MeasureUnit::withDimensionality(int32_t dimensionality, UErrorCode& status) const {

View File

@ -25,14 +25,25 @@ static const char kDefaultCurrency8[] = "XXX";
struct SingleUnitImpl : public UMemory {
/**
* Gets a single unit from the MeasureUnit. If there are multiple single units, sets an error
* code and return the base dimensionless unit. Parses if necessary.
* code and returns the base dimensionless unit. Parses if necessary.
*/
static SingleUnitImpl forMeasureUnit(const MeasureUnit& measureUnit, UErrorCode& status);
/** Transform this SingleUnitImpl into a MeasureUnit, simplifying if possible. */
MeasureUnit build(UErrorCode& status) const;
/** Compare this SingleUnitImpl to another SingleUnitImpl. */
/**
* Compare this SingleUnitImpl to another SingleUnitImpl for the sake of
* sorting and coalescing.
*
* Takes the sign of dimensionality into account, but not the absolute
* value: per-meter is not considered the same as meter, but meter is
* considered the same as square-meter.
*
* The dimensionless unit generally does not get compared, but if it did, it
* would sort before other units by virtue of index being < 0 and
* dimensionality not being negative.
*/
int32_t compareTo(const SingleUnitImpl& other) const {
if (dimensionality < 0 && other.dimensionality > 0) {
// Positive dimensions first
@ -66,13 +77,36 @@ struct SingleUnitImpl : public UMemory {
return (compareTo(other) == 0);
}
/** Simple unit index, unique for every simple unit. */
int32_t index = 0;
/**
* Returns true if this unit is the "dimensionless base unit", as produced
* by the MeasureUnit() default constructor. (This does not include the
* likes of concentrations or angles.)
*/
bool isDimensionless() const {
return index == -1;
}
/** SI prefix. **/
/**
* Simple unit index, unique for every simple unit, -1 for the dimensionless
* unit. This is an index into a string list in measunit_extra.cpp.
*
* The default value is -1, meaning the dimensionless unit:
* isDimensionless() will return true, until index is changed.
*/
int32_t index = -1;
/**
* SI prefix.
*
* This is ignored for the dimensionless unit.
*/
UMeasureSIPrefix siPrefix = UMEASURE_SI_PREFIX_ONE;
/** Dimensionality. **/
/**
* Dimensionality.
*
* This is meaningless for the dimensionless unit.
*/
int32_t dimensionality = 1;
};
@ -92,7 +126,8 @@ struct MeasureUnitImpl : public UMemory {
*
* @param identifier The unit identifier string.
* @param status Set if the identifier string is not valid.
* @return A newly parsed value object.
* @return A newly parsed value object. Behaviour of this unit is
* unspecified if an error is returned via status.
*/
static MeasureUnitImpl forIdentifier(StringPiece identifier, UErrorCode& status);
@ -145,15 +180,23 @@ struct MeasureUnitImpl : public UMemory {
/** Mutates this MeasureUnitImpl to take the reciprocal. */
void takeReciprocal(UErrorCode& status);
/** Mutates this MeasureUnitImpl to append a single unit. */
/**
* Mutates this MeasureUnitImpl to append a single unit.
*
* @return true if a new item was added. If unit is the dimensionless unit,
* it is never added: the return value will always be false.
*/
bool append(const SingleUnitImpl& singleUnit, UErrorCode& status);
/** The complexity, either SINGLE, COMPOUND, or MIXED. */
UMeasureUnitComplexity complexity = UMEASURE_UNIT_SINGLE;
/**
* The list of simple units. These may be summed or multiplied, based on the value of the
* complexity field.
* The list of simple units. These may be summed or multiplied, based on the
* value of the complexity field.
*
* The "dimensionless" unit (SingleUnitImpl default constructor) must not be
* added to this list.
*/
MaybeStackVector<SingleUnitImpl> units;

View File

@ -11,7 +11,7 @@ U_NAMESPACE_BEGIN
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(NoUnit)
NoUnit U_EXPORT2 NoUnit::base() {
return NoUnit("one");
return NoUnit("");
}
NoUnit U_EXPORT2 NoUnit::percent() {

View File

@ -37,7 +37,7 @@ struct MeasureUnitImpl;
* Enumeration for unit complexity. There are three levels:
*
* - SINGLE: A single unit, optionally with a power and/or SI prefix. Examples: hectare,
* square-kilometer, kilojoule, one-per-second.
* square-kilometer, kilojoule, per-second.
* - COMPOUND: A unit composed of the product of multiple single units. Examples:
* meter-per-second, kilowatt-hour, kilogram-meter-per-square-second.
* - MIXED: A unit composed of the sum of multiple single units. Examples: foot+inch,
@ -387,6 +387,8 @@ class U_I18N_API MeasureUnit: public UObject {
* NOTE: Only works on SINGLE units. If this is a COMPOUND or MIXED unit, an error will
* occur. For more information, see UMeasureUnitComplexity.
*
* For the base dimensionless unit, withDimensionality does nothing.
*
* @param dimensionality The dimensionality (power).
* @param status Set if this is not a SINGLE unit or if another error occurs.
* @return A new SINGLE unit.
@ -401,6 +403,8 @@ class U_I18N_API MeasureUnit: public UObject {
* NOTE: Only works on SINGLE units. If this is a COMPOUND or MIXED unit, an error will
* occur. For more information, see UMeasureUnitComplexity.
*
* For the base dimensionless unit, getDimensionality returns 0.
*
* @param status Set if this is not a SINGLE unit or if another error occurs.
* @return The dimensionality (power) of this simple unit.
* @draft ICU 67
@ -447,7 +451,7 @@ class U_I18N_API MeasureUnit: public UObject {
*
* Examples:
* - Given "meter-kilogram-per-second", three units will be returned: "meter",
* "kilogram", and "one-per-second".
* "kilogram", and "per-second".
* - Given "hour+minute+second", three units will be returned: "hour", "minute",
* and "second".
*
@ -3375,11 +3379,15 @@ class U_I18N_API MeasureUnit: public UObject {
private:
// If non-null, fImpl is owned by the MeasureUnit.
// Used by new draft APIs in ICU 67. If non-null, fImpl is owned by the
// MeasureUnit.
MeasureUnitImpl* fImpl;
// These two ints are indices into static string lists in measunit.cpp
// An index into a static string list in measunit.cpp. If set to -1, fImpl
// is in use instead of fTypeId and fSubTypeId.
int16_t fSubTypeId;
// An index into a static string list in measunit.cpp. If set to -1, fImpl
// is in use instead of fTypeId and fSubTypeId.
int8_t fTypeId;
MeasureUnit(int32_t typeId, int32_t subTypeId);
@ -3389,7 +3397,11 @@ private:
static MeasureUnit *create(int typeId, int subTypeId, UErrorCode &status);
/**
* @return Whether subType is known to ICU.
* Sets output's typeId and subTypeId according to subType, if subType is a
* valid/known identifier.
*
* @return Whether subType is known to ICU. If false, output was not
* modified.
*/
static bool findBySubType(StringPiece subType, MeasureUnit* output);

View File

@ -79,9 +79,10 @@ private:
void Test20332_PersonUnits();
void TestNumericTime();
void TestNumericTimeSomeSpecialFormats();
void TestIdentifiers();
void TestInvalidIdentifiers();
void TestCompoundUnitOperations();
void TestIdentifiers();
void TestDimensionlessBehaviour();
void Test21060_AddressSanitizerProblem();
void verifyFormat(
@ -202,9 +203,10 @@ void MeasureFormatTest::runIndexedTest(
TESTCASE_AUTO(Test20332_PersonUnits);
TESTCASE_AUTO(TestNumericTime);
TESTCASE_AUTO(TestNumericTimeSomeSpecialFormats);
TESTCASE_AUTO(TestIdentifiers);
TESTCASE_AUTO(TestInvalidIdentifiers);
TESTCASE_AUTO(TestCompoundUnitOperations);
TESTCASE_AUTO(TestIdentifiers);
TESTCASE_AUTO(TestDimensionlessBehaviour);
TESTCASE_AUTO(Test21060_AddressSanitizerProblem);
TESTCASE_AUTO_END;
}
@ -3239,10 +3241,43 @@ void MeasureFormatTest::TestNumericTimeSomeSpecialFormats() {
verifyFormat("Danish fhoursFminutes", fmtDa, fhoursFminutes, 2, "2.03,877");
}
void MeasureFormatTest::TestIdentifiers() {
IcuTestErrorCode status(*this, "TestIdentifiers");
struct TestCase {
const char* id;
const char* normalized;
} cases[] = {
// Correctly normalized identifiers should not change
{"", ""},
{"square-meter-per-square-meter", "square-meter-per-square-meter"},
{"kilogram-meter-per-square-meter-square-second",
"kilogram-meter-per-square-meter-square-second"},
{"square-mile-and-square-foot", "square-mile-and-square-foot"},
{"square-foot-and-square-mile", "square-foot-and-square-mile"},
{"per-cubic-centimeter", "per-cubic-centimeter"},
{"per-kilometer", "per-kilometer"},
// Normalization of power and per
{"p2-foot-and-p2-mile", "square-foot-and-square-mile"},
{"gram-square-gram-per-dekagram", "cubic-gram-per-dekagram"},
{"kilogram-per-meter-per-second", "kilogram-per-meter-second"},
// TODO(ICU-20920): Add more test cases once the proper ranking is available.
};
for (const auto &cas : cases) {
status.setScope(cas.id);
MeasureUnit unit = MeasureUnit::forIdentifier(cas.id, status);
status.errIfFailureAndReset();
const char* actual = unit.getIdentifier();
assertEquals(cas.id, cas.normalized, actual);
status.errIfFailureAndReset();
}
}
void MeasureFormatTest::TestInvalidIdentifiers() {
IcuTestErrorCode status(*this, "TestInvalidIdentifiers");
const char* const inputs[] = {
const char *const inputs[] = {
"kilo",
"kilokilo",
"onekilo",
@ -3258,7 +3293,23 @@ void MeasureFormatTest::TestInvalidIdentifiers() {
"-p2-meter",
"+p2-meter",
"+",
"-"
"-",
"-mile",
"-and-mile",
"-per-mile",
"one",
"one-one",
"one-per-mile",
"one-per-cubic-centimeter",
"square--per-meter",
"metersecond", // Must have compound part in between single units
// Negative powers not supported in mixed units yet. TODO(CLDR-13701).
"per-hour-and-hertz",
"hertz-and-per-hour",
// Compound units not supported in mixed units yet. TODO(CLDR-13700).
"kilonewton-meter-and-newton-meter",
};
for (const auto& input : inputs) {
@ -3295,9 +3346,9 @@ void MeasureFormatTest::TestCompoundUnitOperations() {
MeasureUnit overQuarticKilometer1 = kilometer.withDimensionality(-4, status);
verifySingleUnit(squareMeter, UMEASURE_SI_PREFIX_ONE, 2, "square-meter");
verifySingleUnit(overCubicCentimeter, UMEASURE_SI_PREFIX_CENTI, -3, "one-per-cubic-centimeter");
verifySingleUnit(overCubicCentimeter, UMEASURE_SI_PREFIX_CENTI, -3, "per-cubic-centimeter");
verifySingleUnit(quarticKilometer, UMEASURE_SI_PREFIX_KILO, 4, "p4-kilometer");
verifySingleUnit(overQuarticKilometer1, UMEASURE_SI_PREFIX_KILO, -4, "one-per-p4-kilometer");
verifySingleUnit(overQuarticKilometer1, UMEASURE_SI_PREFIX_KILO, -4, "per-p4-kilometer");
assertTrue("power inequality", quarticKilometer != overQuarticKilometer1);
@ -3310,9 +3361,9 @@ void MeasureFormatTest::TestCompoundUnitOperations() {
.reciprocal(status)
.withSIPrefix(UMEASURE_SI_PREFIX_KILO, status);
verifySingleUnit(overQuarticKilometer2, UMEASURE_SI_PREFIX_KILO, -4, "one-per-p4-kilometer");
verifySingleUnit(overQuarticKilometer3, UMEASURE_SI_PREFIX_KILO, -4, "one-per-p4-kilometer");
verifySingleUnit(overQuarticKilometer4, UMEASURE_SI_PREFIX_KILO, -4, "one-per-p4-kilometer");
verifySingleUnit(overQuarticKilometer2, UMEASURE_SI_PREFIX_KILO, -4, "per-p4-kilometer");
verifySingleUnit(overQuarticKilometer3, UMEASURE_SI_PREFIX_KILO, -4, "per-p4-kilometer");
verifySingleUnit(overQuarticKilometer4, UMEASURE_SI_PREFIX_KILO, -4, "per-p4-kilometer");
assertTrue("reciprocal equality", overQuarticKilometer1 == overQuarticKilometer2);
assertTrue("reciprocal equality", overQuarticKilometer1 == overQuarticKilometer3);
@ -3343,7 +3394,7 @@ void MeasureFormatTest::TestCompoundUnitOperations() {
const char* secondCentimeterSub[] = {"centimeter", "square-kilosecond"};
verifyCompoundUnit(secondCentimeter, "centimeter-square-kilosecond",
secondCentimeterSub, UPRV_LENGTHOF(secondCentimeterSub));
const char* secondCentimeterPerKilometerSub[] = {"centimeter", "square-kilosecond", "one-per-kilometer"};
const char* secondCentimeterPerKilometerSub[] = {"centimeter", "square-kilosecond", "per-kilometer"};
verifyCompoundUnit(secondCentimeterPerKilometer, "centimeter-square-kilosecond-per-kilometer",
secondCentimeterPerKilometerSub, UPRV_LENGTHOF(secondCentimeterPerKilometerSub));
@ -3378,31 +3429,16 @@ void MeasureFormatTest::TestCompoundUnitOperations() {
assertTrue("order matters inequality", footInch != inchFoot);
MeasureUnit one1;
MeasureUnit one2 = MeasureUnit::forIdentifier("one", status);
MeasureUnit one3 = MeasureUnit::forIdentifier("", status);
MeasureUnit squareOne = one2.withDimensionality(2, status);
MeasureUnit onePerOne = one2.reciprocal(status);
MeasureUnit squareKiloOne = squareOne.withSIPrefix(UMEASURE_SI_PREFIX_KILO, status);
MeasureUnit onePerSquareKiloOne = squareKiloOne.reciprocal(status);
MeasureUnit oneOne = MeasureUnit::forIdentifier("one-one", status);
MeasureUnit onePlusOne = MeasureUnit::forIdentifier("one-and-one", status);
MeasureUnit kilometer2 = one2.product(kilometer, status);
MeasureUnit dimensionless;
MeasureUnit dimensionless2 = MeasureUnit::forIdentifier("", status);
status.errIfFailureAndReset("Dimensionless MeasureUnit.");
assertTrue("dimensionless equality", dimensionless == dimensionless2);
verifySingleUnit(one1, UMEASURE_SI_PREFIX_ONE, 1, "one");
verifySingleUnit(one2, UMEASURE_SI_PREFIX_ONE, 1, "one");
verifySingleUnit(one3, UMEASURE_SI_PREFIX_ONE, 1, "one");
verifySingleUnit(squareOne, UMEASURE_SI_PREFIX_ONE, 1, "one");
verifySingleUnit(onePerOne, UMEASURE_SI_PREFIX_ONE, 1, "one");
verifySingleUnit(squareKiloOne, UMEASURE_SI_PREFIX_ONE, 1, "one");
verifySingleUnit(onePerSquareKiloOne, UMEASURE_SI_PREFIX_ONE, 1, "one");
verifySingleUnit(oneOne, UMEASURE_SI_PREFIX_ONE, 1, "one");
verifySingleUnit(onePlusOne, UMEASURE_SI_PREFIX_ONE, 1, "one");
// We support starting from an "identity" MeasureUnit and then combining it
// with others via product:
MeasureUnit kilometer2 = dimensionless.product(kilometer, status);
status.errIfFailureAndReset("dimensionless.product(kilometer, status)");
verifySingleUnit(kilometer2, UMEASURE_SI_PREFIX_KILO, 1, "kilometer");
assertTrue("one equality", one1 == one2);
assertTrue("one equality", one2 == one3);
assertTrue("one-per-one equality", onePerOne == onePerSquareKiloOne);
assertTrue("kilometer equality", kilometer == kilometer2);
// Test out-of-range powers
@ -3413,49 +3449,81 @@ void MeasureFormatTest::TestCompoundUnitOperations() {
status.expectErrorAndReset(U_ILLEGAL_ARGUMENT_ERROR);
MeasureUnit power16b = power15.product(kilometer, status);
status.expectErrorAndReset(U_ILLEGAL_ARGUMENT_ERROR);
MeasureUnit powerN15 = MeasureUnit::forIdentifier("one-per-p15-kilometer", status);
verifySingleUnit(powerN15, UMEASURE_SI_PREFIX_KILO, -15, "one-per-p15-kilometer");
MeasureUnit powerN15 = MeasureUnit::forIdentifier("per-p15-kilometer", status);
verifySingleUnit(powerN15, UMEASURE_SI_PREFIX_KILO, -15, "per-p15-kilometer");
status.errIfFailureAndReset();
MeasureUnit powerN16a = MeasureUnit::forIdentifier("one-per-p16-kilometer", status);
MeasureUnit powerN16a = MeasureUnit::forIdentifier("per-p16-kilometer", status);
status.expectErrorAndReset(U_ILLEGAL_ARGUMENT_ERROR);
MeasureUnit powerN16b = powerN15.product(overQuarticKilometer1, status);
status.expectErrorAndReset(U_ILLEGAL_ARGUMENT_ERROR);
}
void MeasureFormatTest::TestIdentifiers() {
IcuTestErrorCode status(*this, "TestIdentifiers");
struct TestCase {
bool valid;
const char* id;
const char* normalized;
} cases[] = {
{true, "square-meter-per-square-meter", "square-meter-per-square-meter"},
{true, "kilogram-meter-per-square-meter-square-second",
"kilogram-meter-per-square-meter-square-second"},
// TODO(ICU-20920): Add more test cases once the proper ranking is available.
};
for (const auto& cas : cases) {
status.setScope(cas.id);
MeasureUnit unit = MeasureUnit::forIdentifier(cas.id, status);
if (!cas.valid) {
status.expectErrorAndReset(U_ILLEGAL_ARGUMENT_ERROR);
continue;
}
const char* actual = unit.getIdentifier();
assertEquals(cas.id, cas.normalized, actual);
status.errIfFailureAndReset();
}
void MeasureFormatTest::TestDimensionlessBehaviour() {
IcuTestErrorCode status(*this, "TestDimensionlessBehaviour");
MeasureUnit dimensionless;
MeasureUnit modified;
// At the time of writing, each of the seven groups below caused
// Parser::from("") to be called:
// splitToSingleUnits
int32_t count;
LocalArray<MeasureUnit> singles = dimensionless.splitToSingleUnits(count, status);
status.errIfFailureAndReset("dimensionless.splitToSingleUnits(...)");
assertEquals("no singles in dimensionless", 0, count);
// product(dimensionless)
MeasureUnit mile = MeasureUnit::getMile();
mile = mile.product(dimensionless, status);
status.errIfFailureAndReset("mile.product(dimensionless, ...)");
verifySingleUnit(mile, UMEASURE_SI_PREFIX_ONE, 1, "mile");
// dimensionless.getSIPrefix()
UMeasureSIPrefix siPrefix = dimensionless.getSIPrefix(status);
status.errIfFailureAndReset("dimensionless.getSIPrefix(...)");
assertEquals("dimensionless SIPrefix", UMEASURE_SI_PREFIX_ONE, siPrefix);
// dimensionless.withSIPrefix()
modified = dimensionless.withSIPrefix(UMEASURE_SI_PREFIX_KILO, status);
status.errIfFailureAndReset("dimensionless.withSIPrefix(...)");
singles = modified.splitToSingleUnits(count, status);
assertEquals("no singles in modified", 0, count);
siPrefix = modified.getSIPrefix(status);
status.errIfFailureAndReset("modified.getSIPrefix(...)");
assertEquals("modified SIPrefix", UMEASURE_SI_PREFIX_ONE, siPrefix);
// dimensionless.getComplexity()
UMeasureUnitComplexity complexity = dimensionless.getComplexity(status);
status.errIfFailureAndReset("dimensionless.getComplexity(...)");
assertEquals("dimensionless complexity", UMEASURE_UNIT_SINGLE, complexity);
// Dimensionality is mostly meaningless for dimensionless units, but it's
// still considered a SINGLE unit, so this code doesn't throw errors:
// dimensionless.getDimensionality()
int32_t dimensionality = dimensionless.getDimensionality(status);
status.errIfFailureAndReset("dimensionless.getDimensionality(...)");
assertEquals("dimensionless dimensionality", 0, dimensionality);
// dimensionless.withDimensionality()
dimensionless.withDimensionality(-1, status);
status.errIfFailureAndReset("dimensionless.withDimensionality(...)");
dimensionality = dimensionless.getDimensionality(status);
status.errIfFailureAndReset("dimensionless.getDimensionality(...)");
assertEquals("dimensionless dimensionality", 0, dimensionality);
}
// ICU-21060
void MeasureFormatTest::Test21060_AddressSanitizerProblem() {
UErrorCode status = U_ZERO_ERROR;
MeasureUnit first = MeasureUnit::forIdentifier("one", status);
IcuTestErrorCode status(*this, "Test21060_AddressSanitizerProblem");
MeasureUnit first = MeasureUnit::forIdentifier("", status);
status.errIfFailureAndReset();
// Experimentally, a compound unit like "kilogram-meter" failed. A single
// unit like "kilogram" or "meter" did not fail, did not trigger the
// problem.
MeasureUnit crux = MeasureUnit::forIdentifier("one-per-meter", status);
MeasureUnit crux = MeasureUnit::forIdentifier("per-meter", status);
// Heap allocation of a new CharString for first.identifier happens here:
first = first.product(crux, status);
@ -3469,12 +3537,14 @@ void MeasureFormatTest::Test21060_AddressSanitizerProblem() {
first = first.product(crux, status);
// Proving we've had no failure yet:
if (U_FAILURE(status)) return;
status.errIfFailureAndReset();
// heap-use-after-free failure happened here, since a SingleUnitImpl had
// held onto a StringPiece pointing at a substring of an identifier that was
// freed above:
second = second.product(crux, status);
status.errIfFailureAndReset();
}