ICU-21169 Add SingleUnitImpl::getSimpleUnitID().

Also:
- Use BytesTrie not UCharsTrie.
- Add a nullptr check for a uprv_malloc.
This commit is contained in:
Hugo van der Merwe 2020-06-16 16:12:15 +02:00 committed by Hugo
parent 982c4799bf
commit 6a1df9e16c
5 changed files with 164 additions and 144 deletions

View File

@ -101,9 +101,10 @@ public:
* Multiple calls to buildStringPiece() return StringPieces referring to the
* builder's same byte array, without rebuilding.
* If buildStringPiece() is called after build(), the trie will be
* re-serialized into a new array.
* If build() is called after buildStringPiece(), the trie object will become
* the owner of the previously returned array.
* re-serialized into a new array (because build() passes on ownership).
* If build() is called after buildStringPiece(), the trie object returned
* by build() will become the owner of the underlying string for the
* previously returned StringPiece.
* After clear() has been called, a new array will be used as well.
* @param buildOption Build option, see UStringTrieBuildOption.
* @param errorCode Standard ICU error code. Its input value must

View File

@ -101,9 +101,10 @@ public:
* Multiple calls to buildUnicodeString() set the UnicodeStrings to the
* builder's same char16_t array, without rebuilding.
* If buildUnicodeString() is called after build(), the trie will be
* re-serialized into a new array.
* If build() is called after buildUnicodeString(), the trie object will become
* the owner of the previously returned array.
* re-serialized into a new array (because build() passes on ownership).
* If build() is called after buildUnicodeString(), the trie object returned
* by build() will become the owner of the underlying data for the
* previously returned UnicodeString.
* After clear() has been called, a new array will be used as well.
* @param buildOption Build option, see UStringTrieBuildOption.
* @param result A UnicodeString which will be set to the char16_t-serialized

View File

@ -12,18 +12,18 @@
// Helpful in toString methods and elsewhere.
#define UNISTR_FROM_STRING_EXPLICIT
#include <cstdlib>
#include "cstring.h"
#include "measunit_impl.h"
#include "uarrsort.h"
#include "uassert.h"
#include "ucln_in.h"
#include "umutex.h"
#include "unicode/bytestrie.h"
#include "unicode/bytestriebuilder.h"
#include "unicode/errorcode.h"
#include "unicode/localpointer.h"
#include "unicode/measunit.h"
#include "unicode/ucharstrie.h"
#include "unicode/ucharstriebuilder.h"
#include <cstdlib>
#include "cstr.h"
@ -111,114 +111,117 @@ const struct SIPrefixStrings {
};
// TODO(ICU-21059): Get this list from data
const char16_t* const gSimpleUnits[] = {
u"candela",
u"carat",
u"gram",
u"ounce",
u"ounce-troy",
u"pound",
u"kilogram",
u"stone",
u"ton",
u"metric-ton",
u"earth-mass",
u"solar-mass",
u"point",
u"inch",
u"foot",
u"yard",
u"meter",
u"fathom",
u"furlong",
u"mile",
u"nautical-mile",
u"mile-scandinavian",
u"100-kilometer",
u"earth-radius",
u"solar-radius",
u"astronomical-unit",
u"light-year",
u"parsec",
u"second",
u"minute",
u"hour",
u"day",
u"day-person",
u"week",
u"week-person",
u"month",
u"month-person",
u"year",
u"year-person",
u"decade",
u"century",
u"ampere",
u"fahrenheit",
u"kelvin",
u"celsius",
u"arc-second",
u"arc-minute",
u"degree",
u"radian",
u"revolution",
u"item",
u"mole",
u"permillion",
u"permyriad",
u"permille",
u"percent",
u"karat",
u"portion",
u"bit",
u"byte",
u"dot",
u"pixel",
u"em",
u"hertz",
u"newton",
u"pound-force",
u"pascal",
u"bar",
u"atmosphere",
u"ofhg",
u"electronvolt",
u"dalton",
u"joule",
u"calorie",
u"british-thermal-unit",
u"foodcalorie",
u"therm-us",
u"watt",
u"horsepower",
u"solar-luminosity",
u"volt",
u"ohm",
u"dunam",
u"acre",
u"hectare",
u"teaspoon",
u"tablespoon",
u"fluid-ounce-imperial",
u"fluid-ounce",
u"cup",
u"cup-metric",
u"pint",
u"pint-metric",
u"quart",
u"liter",
u"gallon",
u"gallon-imperial",
u"bushel",
u"barrel",
u"knot",
u"g-force",
u"lux",
//
// NB: SingleUnitImpl::getSimpleUnitID() returns char*'s pointing at these
// strings, take appropriate care with refactoring and updating documentation.
const char *const gSimpleUnits[] = {
"candela",
"carat",
"gram",
"ounce",
"ounce-troy",
"pound",
"kilogram",
"stone",
"ton",
"metric-ton",
"earth-mass",
"solar-mass",
"point",
"inch",
"foot",
"yard",
"meter",
"fathom",
"furlong",
"mile",
"nautical-mile",
"mile-scandinavian",
"100-kilometer",
"earth-radius",
"solar-radius",
"astronomical-unit",
"light-year",
"parsec",
"second",
"minute",
"hour",
"day",
"day-person",
"week",
"week-person",
"month",
"month-person",
"year",
"year-person",
"decade",
"century",
"ampere",
"fahrenheit",
"kelvin",
"celsius",
"arc-second",
"arc-minute",
"degree",
"radian",
"revolution",
"item",
"mole",
"permillion",
"permyriad",
"permille",
"percent",
"karat",
"portion",
"bit",
"byte",
"dot",
"pixel",
"em",
"hertz",
"newton",
"pound-force",
"pascal",
"bar",
"atmosphere",
"ofhg",
"electronvolt",
"dalton",
"joule",
"calorie",
"british-thermal-unit",
"foodcalorie",
"therm-us",
"watt",
"horsepower",
"solar-luminosity",
"volt",
"ohm",
"dunam",
"acre",
"hectare",
"teaspoon",
"tablespoon",
"fluid-ounce-imperial",
"fluid-ounce",
"cup",
"cup-metric",
"pint",
"pint-metric",
"quart",
"liter",
"gallon",
"gallon-imperial",
"bushel",
"barrel",
"knot",
"g-force",
"lux",
};
icu::UInitOnce gUnitExtrasInitOnce = U_INITONCE_INITIALIZER;
char16_t* kSerializedUnitExtrasStemTrie = nullptr;
char *kSerializedUnitExtrasStemTrie = nullptr;
UBool U_CALLCONV cleanupUnitExtras() {
uprv_free(kSerializedUnitExtrasStemTrie);
@ -230,37 +233,36 @@ UBool U_CALLCONV cleanupUnitExtras() {
void U_CALLCONV initUnitExtras(UErrorCode& status) {
ucln_i18n_registerCleanup(UCLN_I18N_UNIT_EXTRAS, cleanupUnitExtras);
UCharsTrieBuilder b(status);
BytesTrieBuilder b(status);
if (U_FAILURE(status)) { return; }
// Add SI prefixes
for (const auto& siPrefixInfo : gSIPrefixStrings) {
UnicodeString uSIPrefix(siPrefixInfo.string, -1, US_INV);
b.add(uSIPrefix, siPrefixInfo.value + kSIPrefixOffset, status);
b.add(siPrefixInfo.string, siPrefixInfo.value + kSIPrefixOffset, status);
}
if (U_FAILURE(status)) { return; }
// Add syntax parts (compound, power prefixes)
b.add(u"-per-", COMPOUND_PART_PER, status);
b.add(u"-", COMPOUND_PART_TIMES, status);
b.add(u"-and-", COMPOUND_PART_AND, status);
b.add(u"per-", INITIAL_COMPOUND_PART_PER, status);
b.add(u"square-", POWER_PART_P2, status);
b.add(u"cubic-", POWER_PART_P3, status);
b.add(u"p2-", POWER_PART_P2, status);
b.add(u"p3-", POWER_PART_P3, status);
b.add(u"p4-", POWER_PART_P4, status);
b.add(u"p5-", POWER_PART_P5, status);
b.add(u"p6-", POWER_PART_P6, status);
b.add(u"p7-", POWER_PART_P7, status);
b.add(u"p8-", POWER_PART_P8, status);
b.add(u"p9-", POWER_PART_P9, status);
b.add(u"p10-", POWER_PART_P10, status);
b.add(u"p11-", POWER_PART_P11, status);
b.add(u"p12-", POWER_PART_P12, status);
b.add(u"p13-", POWER_PART_P13, status);
b.add(u"p14-", POWER_PART_P14, status);
b.add(u"p15-", POWER_PART_P15, status);
b.add("-per-", COMPOUND_PART_PER, status);
b.add("-", COMPOUND_PART_TIMES, status);
b.add("-and-", COMPOUND_PART_AND, status);
b.add("per-", INITIAL_COMPOUND_PART_PER, status);
b.add("square-", POWER_PART_P2, status);
b.add("cubic-", POWER_PART_P3, status);
b.add("p2-", POWER_PART_P2, status);
b.add("p3-", POWER_PART_P3, status);
b.add("p4-", POWER_PART_P4, status);
b.add("p5-", POWER_PART_P5, status);
b.add("p6-", POWER_PART_P6, status);
b.add("p7-", POWER_PART_P7, status);
b.add("p8-", POWER_PART_P8, status);
b.add("p9-", POWER_PART_P9, status);
b.add("p10-", POWER_PART_P10, status);
b.add("p11-", POWER_PART_P11, status);
b.add("p12-", POWER_PART_P12, status);
b.add("p13-", POWER_PART_P13, status);
b.add("p14-", POWER_PART_P14, status);
b.add("p15-", POWER_PART_P15, status);
if (U_FAILURE(status)) { return; }
// Add sanctioned simple units by offset
@ -271,14 +273,17 @@ void U_CALLCONV initUnitExtras(UErrorCode& status) {
// Build the CharsTrie
// TODO: Use SLOW or FAST here?
UnicodeString result;
b.buildUnicodeString(USTRINGTRIE_BUILD_FAST, result, status);
StringPiece result = b.buildStringPiece(USTRINGTRIE_BUILD_FAST, status);
if (U_FAILURE(status)) { return; }
// Copy the result into the global constant pointer
size_t numBytes = result.length() * sizeof(char16_t);
kSerializedUnitExtrasStemTrie = static_cast<char16_t*>(uprv_malloc(numBytes));
uprv_memcpy(kSerializedUnitExtrasStemTrie, result.getBuffer(), numBytes);
size_t numBytes = result.length();
kSerializedUnitExtrasStemTrie = static_cast<char *>(uprv_malloc(numBytes));
if (kSerializedUnitExtrasStemTrie == nullptr) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
uprv_memcpy(kSerializedUnitExtrasStemTrie, result.data(), numBytes);
}
class Token {
@ -385,7 +390,7 @@ private:
// should live longer than this Parser - and the parser shouldn't return any
// references to that string.
StringPiece fSource;
UCharsTrie fTrie;
BytesTrie fTrie;
// Set to true when we've seen a "-per-" or a "per-", after which all units
// are in the denominator. Until we find an "-and-", at which point the
@ -666,7 +671,7 @@ void serializeSingle(const SingleUnitImpl& singleUnit, bool first, CharString& o
return;
}
output.appendInvariantChars(gSimpleUnits[singleUnit.index], status);
output.append(gSimpleUnits[singleUnit.index], status);
}
/**
@ -777,6 +782,9 @@ MeasureUnit SingleUnitImpl::build(UErrorCode& status) const {
return std::move(temp).build(status);
}
const char *SingleUnitImpl::getSimpleUnitID() const {
return gSimpleUnits[index];
}
MeasureUnitImpl MeasureUnitImpl::forIdentifier(StringPiece identifier, UErrorCode& status) {
return Parser::from(identifier, status).parse(status);

View File

@ -32,6 +32,16 @@ struct SingleUnitImpl : public UMemory {
/** Transform this SingleUnitImpl into a MeasureUnit, simplifying if possible. */
MeasureUnit build(UErrorCode& status) const;
/**
* Returns the "simple unit ID", without SI or dimensionality prefix: this
* instance may represent a square-kilometer, but only "meter" will be
* returned.
*
* The returned pointer points at static memory and does not need to be
* cleaned up.
*/
const char *getSimpleUnitID() const;
/**
* Compare this SingleUnitImpl to another SingleUnitImpl for the sake of
* sorting and coalescing.

View File

@ -1065,7 +1065,7 @@ group: sharedbreakiterator
group: units_extra
measunit_extra.o
deps
units ucharstriebuilder ucharstrie uclean_i18n
units bytestriebuilder bytestrie uclean_i18n
group: units
measunit.o currunit.o nounit.o