From 08aafba6642d8719b662d2b0f9fdd38376c6fb99 Mon Sep 17 00:00:00 2001 From: Frank Tang Date: Tue, 1 Sep 2020 18:17:44 -0700 Subject: [PATCH] ICU-21236 Refactor UniqueCharStrings / CharStringMap Move them UniqueCharStrings and CharStringMap from loclikelysubtags.{h,cpp} to separate header files so so we can reuse them to implement https://github.com/unicode-org/icu/pull/1254 --- icu4c/source/common/charstrmap.h | 55 +++++++++++++++ icu4c/source/common/common.vcxproj | 2 + icu4c/source/common/common.vcxproj.filters | 6 ++ icu4c/source/common/common_uwp.vcxproj | 4 +- icu4c/source/common/loclikelysubtags.cpp | 66 +---------------- icu4c/source/common/loclikelysubtags.h | 40 +---------- icu4c/source/common/uniquecharstr.h | 82 ++++++++++++++++++++++ 7 files changed, 151 insertions(+), 104 deletions(-) create mode 100644 icu4c/source/common/charstrmap.h create mode 100644 icu4c/source/common/uniquecharstr.h diff --git a/icu4c/source/common/charstrmap.h b/icu4c/source/common/charstrmap.h new file mode 100644 index 0000000000..3320a46208 --- /dev/null +++ b/icu4c/source/common/charstrmap.h @@ -0,0 +1,55 @@ +// © 2020 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// charstrmap.h +// created: 2020sep01 Frank Yung-Fong Tang + +#ifndef __CHARSTRMAP_H__ +#define __CHARSTRMAP_H__ + +#include +#include "unicode/utypes.h" +#include "unicode/uobject.h" +#include "uhash.h" + +U_NAMESPACE_BEGIN + +/** + * Map of const char * keys & values. + * Stores pointers as is: Does not own/copy/adopt/release strings. + */ +class CharStringMap final : public UMemory { +public: + /** Constructs an unusable non-map. */ + CharStringMap() : map(nullptr) {} + CharStringMap(int32_t size, UErrorCode &errorCode) { + map = uhash_openSize(uhash_hashChars, uhash_compareChars, uhash_compareChars, + size, &errorCode); + } + CharStringMap(CharStringMap &&other) U_NOEXCEPT : map(other.map) { + other.map = nullptr; + } + CharStringMap(const CharStringMap &other) = delete; + ~CharStringMap() { + uhash_close(map); + } + + CharStringMap &operator=(CharStringMap &&other) U_NOEXCEPT { + map = other.map; + other.map = nullptr; + return *this; + } + CharStringMap &operator=(const CharStringMap &other) = delete; + + const char *get(const char *key) const { return static_cast(uhash_get(map, key)); } + void put(const char *key, const char *value, UErrorCode &errorCode) { + uhash_put(map, const_cast(key), const_cast(value), &errorCode); + } + +private: + UHashtable *map; +}; + +U_NAMESPACE_END + +#endif // __CHARSTRMAP_H__ diff --git a/icu4c/source/common/common.vcxproj b/icu4c/source/common/common.vcxproj index c9e4f98568..d7f668ea9d 100644 --- a/icu4c/source/common/common.vcxproj +++ b/icu4c/source/common/common.vcxproj @@ -341,7 +341,9 @@ + + diff --git a/icu4c/source/common/common.vcxproj.filters b/icu4c/source/common/common.vcxproj.filters index 5fabc6f461..f1ba90109c 100644 --- a/icu4c/source/common/common.vcxproj.filters +++ b/icu4c/source/common/common.vcxproj.filters @@ -840,6 +840,12 @@ locales & resources + + locales & resources + + + locales & resources + locales & resources diff --git a/icu4c/source/common/common_uwp.vcxproj b/icu4c/source/common/common_uwp.vcxproj index 65672e1825..a57917292a 100644 --- a/icu4c/source/common/common_uwp.vcxproj +++ b/icu4c/source/common/common_uwp.vcxproj @@ -476,6 +476,8 @@ + + @@ -528,4 +530,4 @@ - \ No newline at end of file + diff --git a/icu4c/source/common/loclikelysubtags.cpp b/icu4c/source/common/loclikelysubtags.cpp index 1fbf1a1463..03ab858274 100644 --- a/icu4c/source/common/loclikelysubtags.cpp +++ b/icu4c/source/common/loclikelysubtags.cpp @@ -20,6 +20,7 @@ #include "uhash.h" #include "uinvchar.h" #include "umutex.h" +#include "uniquecharstr.h" #include "uresdata.h" #include "uresimp.h" @@ -31,71 +32,6 @@ constexpr char PSEUDO_ACCENTS_PREFIX = '\''; // -XA, -PSACCENT constexpr char PSEUDO_BIDI_PREFIX = '+'; // -XB, -PSBIDI constexpr char PSEUDO_CRACKED_PREFIX = ','; // -XC, -PSCRACK -/** - * Stores NUL-terminated strings with duplicate elimination. - * Checks for unique UTF-16 string pointers and converts to invariant characters. - */ -class UniqueCharStrings { -public: - UniqueCharStrings(UErrorCode &errorCode) : strings(nullptr) { - uhash_init(&map, uhash_hashUChars, uhash_compareUChars, uhash_compareLong, &errorCode); - if (U_FAILURE(errorCode)) { return; } - strings = new CharString(); - if (strings == nullptr) { - errorCode = U_MEMORY_ALLOCATION_ERROR; - } - } - ~UniqueCharStrings() { - uhash_close(&map); - delete strings; - } - - /** Returns/orphans the CharString that contains all strings. */ - CharString *orphanCharStrings() { - CharString *result = strings; - strings = nullptr; - return result; - } - - /** Adds a string and returns a unique number for it. */ - int32_t add(const UnicodeString &s, UErrorCode &errorCode) { - if (U_FAILURE(errorCode)) { return 0; } - if (isFrozen) { - errorCode = U_NO_WRITE_PERMISSION; - return 0; - } - // The string points into the resource bundle. - const char16_t *p = s.getBuffer(); - int32_t oldIndex = uhash_geti(&map, p); - if (oldIndex != 0) { // found duplicate - return oldIndex; - } - // Explicit NUL terminator for the previous string. - // The strings object is also terminated with one implicit NUL. - strings->append(0, errorCode); - int32_t newIndex = strings->length(); - strings->appendInvariantChars(s, errorCode); - uhash_puti(&map, const_cast(p), newIndex, &errorCode); - return newIndex; - } - - void freeze() { isFrozen = true; } - - /** - * Returns a string pointer for its unique number, if this object is frozen. - * Otherwise nullptr. - */ - const char *get(int32_t i) const { - U_ASSERT(isFrozen); - return isFrozen && i > 0 ? strings->data() + i : nullptr; - } - -private: - UHashtable map; - CharString *strings; - bool isFrozen = false; -}; - } // namespace LocaleDistanceData::LocaleDistanceData(LocaleDistanceData &&data) : diff --git a/icu4c/source/common/loclikelysubtags.h b/icu4c/source/common/loclikelysubtags.h index 90ddfffaca..14a01a5eac 100644 --- a/icu4c/source/common/loclikelysubtags.h +++ b/icu4c/source/common/loclikelysubtags.h @@ -1,5 +1,5 @@ // © 2019 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html#License +// License & terms of use: http://www.unicode.org/copyright.html // loclikelysubtags.h // created: 2019may08 Markus W. Scherer @@ -13,49 +13,13 @@ #include "unicode/locid.h" #include "unicode/uobject.h" #include "unicode/ures.h" +#include "charstrmap.h" #include "lsr.h" -#include "uhash.h" U_NAMESPACE_BEGIN struct XLikelySubtagsData; -/** - * Map of const char * keys & values. - * Stores pointers as is: Does not own/copy/adopt/release strings. - */ -class CharStringMap final : public UMemory { -public: - /** Constructs an unusable non-map. */ - CharStringMap() : map(nullptr) {} - CharStringMap(int32_t size, UErrorCode &errorCode) { - map = uhash_openSize(uhash_hashChars, uhash_compareChars, uhash_compareChars, - size, &errorCode); - } - CharStringMap(CharStringMap &&other) U_NOEXCEPT : map(other.map) { - other.map = nullptr; - } - CharStringMap(const CharStringMap &other) = delete; - ~CharStringMap() { - uhash_close(map); - } - - CharStringMap &operator=(CharStringMap &&other) U_NOEXCEPT { - map = other.map; - other.map = nullptr; - return *this; - } - CharStringMap &operator=(const CharStringMap &other) = delete; - - const char *get(const char *key) const { return static_cast(uhash_get(map, key)); } - void put(const char *key, const char *value, UErrorCode &errorCode) { - uhash_put(map, const_cast(key), const_cast(value), &errorCode); - } - -private: - UHashtable *map; -}; - struct LocaleDistanceData { LocaleDistanceData() = default; LocaleDistanceData(LocaleDistanceData &&data); diff --git a/icu4c/source/common/uniquecharstr.h b/icu4c/source/common/uniquecharstr.h new file mode 100644 index 0000000000..7e871dc1d7 --- /dev/null +++ b/icu4c/source/common/uniquecharstr.h @@ -0,0 +1,82 @@ +// © 2020 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// uniquecharstr.h +// created: 2020sep01 Frank Yung-Fong Tang + +#ifndef __UNIQUECHARSTR_H__ +#define __UNIQUECHARSTR_H__ + +#include "charstr.h" +#include "uhash.h" + +U_NAMESPACE_BEGIN + +/** + * Stores NUL-terminated strings with duplicate elimination. + * Checks for unique UTF-16 string pointers and converts to invariant characters. + */ +class UniqueCharStrings { +public: + UniqueCharStrings(UErrorCode &errorCode) : strings(nullptr) { + uhash_init(&map, uhash_hashUChars, uhash_compareUChars, uhash_compareLong, &errorCode); + if (U_FAILURE(errorCode)) { return; } + strings = new CharString(); + if (strings == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } + } + ~UniqueCharStrings() { + uhash_close(&map); + delete strings; + } + + /** Returns/orphans the CharString that contains all strings. */ + CharString *orphanCharStrings() { + CharString *result = strings; + strings = nullptr; + return result; + } + + /** Adds a string and returns a unique number for it. */ + int32_t add(const UnicodeString &s, UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { return 0; } + if (isFrozen) { + errorCode = U_NO_WRITE_PERMISSION; + return 0; + } + // The string points into the resource bundle. + const char16_t *p = s.getBuffer(); + int32_t oldIndex = uhash_geti(&map, p); + if (oldIndex != 0) { // found duplicate + return oldIndex; + } + // Explicit NUL terminator for the previous string. + // The strings object is also terminated with one implicit NUL. + strings->append(0, errorCode); + int32_t newIndex = strings->length(); + strings->appendInvariantChars(s, errorCode); + uhash_puti(&map, const_cast(p), newIndex, &errorCode); + return newIndex; + } + + void freeze() { isFrozen = true; } + + /** + * Returns a string pointer for its unique number, if this object is frozen. + * Otherwise nullptr. + */ + const char *get(int32_t i) const { + U_ASSERT(isFrozen); + return isFrozen && i > 0 ? strings->data() + i : nullptr; + } + +private: + UHashtable map; + CharString *strings; + bool isFrozen = false; +}; + +U_NAMESPACE_END + +#endif // __UNIQUECHARSTR_H__