ICU-21236 Refactor UniqueCharStrings / CharStringMap
Move them UniqueCharStrings and CharStringMap from loclikelysubtags.{h,cpp} to separate header files so so we can reuse them to implement https://github.com/unicode-org/icu/pull/1254
This commit is contained in:
parent
341608f7b6
commit
08aafba664
55
icu4c/source/common/charstrmap.h
Normal file
55
icu4c/source/common/charstrmap.h
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
// © 2020 and later: Unicode, Inc. and others.
|
||||||
|
// License & terms of use: http://www.unicode.org/copyright.html
|
||||||
|
|
||||||
|
// charstrmap.h
|
||||||
|
// created: 2020sep01 Frank Yung-Fong Tang
|
||||||
|
|
||||||
|
#ifndef __CHARSTRMAP_H__
|
||||||
|
#define __CHARSTRMAP_H__
|
||||||
|
|
||||||
|
#include <utility>
|
||||||
|
#include "unicode/utypes.h"
|
||||||
|
#include "unicode/uobject.h"
|
||||||
|
#include "uhash.h"
|
||||||
|
|
||||||
|
U_NAMESPACE_BEGIN
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Map of const char * keys & values.
|
||||||
|
* Stores pointers as is: Does not own/copy/adopt/release strings.
|
||||||
|
*/
|
||||||
|
class CharStringMap final : public UMemory {
|
||||||
|
public:
|
||||||
|
/** Constructs an unusable non-map. */
|
||||||
|
CharStringMap() : map(nullptr) {}
|
||||||
|
CharStringMap(int32_t size, UErrorCode &errorCode) {
|
||||||
|
map = uhash_openSize(uhash_hashChars, uhash_compareChars, uhash_compareChars,
|
||||||
|
size, &errorCode);
|
||||||
|
}
|
||||||
|
CharStringMap(CharStringMap &&other) U_NOEXCEPT : map(other.map) {
|
||||||
|
other.map = nullptr;
|
||||||
|
}
|
||||||
|
CharStringMap(const CharStringMap &other) = delete;
|
||||||
|
~CharStringMap() {
|
||||||
|
uhash_close(map);
|
||||||
|
}
|
||||||
|
|
||||||
|
CharStringMap &operator=(CharStringMap &&other) U_NOEXCEPT {
|
||||||
|
map = other.map;
|
||||||
|
other.map = nullptr;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
CharStringMap &operator=(const CharStringMap &other) = delete;
|
||||||
|
|
||||||
|
const char *get(const char *key) const { return static_cast<const char *>(uhash_get(map, key)); }
|
||||||
|
void put(const char *key, const char *value, UErrorCode &errorCode) {
|
||||||
|
uhash_put(map, const_cast<char *>(key), const_cast<char *>(value), &errorCode);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
UHashtable *map;
|
||||||
|
};
|
||||||
|
|
||||||
|
U_NAMESPACE_END
|
||||||
|
|
||||||
|
#endif // __CHARSTRMAP_H__
|
@ -341,7 +341,9 @@
|
|||||||
<ClInclude Include="localeprioritylist.h" />
|
<ClInclude Include="localeprioritylist.h" />
|
||||||
<ClInclude Include="locbased.h" />
|
<ClInclude Include="locbased.h" />
|
||||||
<ClInclude Include="locdistance.h" />
|
<ClInclude Include="locdistance.h" />
|
||||||
|
<ClInclude Include="charstrmap.h" />
|
||||||
<ClInclude Include="loclikelysubtags.h" />
|
<ClInclude Include="loclikelysubtags.h" />
|
||||||
|
<ClInclude Include="uniquecharstr.h" />
|
||||||
<ClInclude Include="locutil.h" />
|
<ClInclude Include="locutil.h" />
|
||||||
<ClInclude Include="lsr.h" />
|
<ClInclude Include="lsr.h" />
|
||||||
<ClInclude Include="sharedobject.h" />
|
<ClInclude Include="sharedobject.h" />
|
||||||
|
@ -840,6 +840,12 @@
|
|||||||
<ClInclude Include="locdistance.h">
|
<ClInclude Include="locdistance.h">
|
||||||
<Filter>locales & resources</Filter>
|
<Filter>locales & resources</Filter>
|
||||||
</ClInclude>
|
</ClInclude>
|
||||||
|
<ClInclude Include="charstrmap.h">
|
||||||
|
<Filter>locales & resources</Filter>
|
||||||
|
</ClInclude>
|
||||||
|
<ClInclude Include="uniquecharstr.h">
|
||||||
|
<Filter>locales & resources</Filter>
|
||||||
|
</ClInclude>
|
||||||
<ClInclude Include="loclikelysubtags.h">
|
<ClInclude Include="loclikelysubtags.h">
|
||||||
<Filter>locales & resources</Filter>
|
<Filter>locales & resources</Filter>
|
||||||
</ClInclude>
|
</ClInclude>
|
||||||
|
@ -476,6 +476,8 @@
|
|||||||
<ClInclude Include="localeprioritylist.h" />
|
<ClInclude Include="localeprioritylist.h" />
|
||||||
<ClInclude Include="locbased.h" />
|
<ClInclude Include="locbased.h" />
|
||||||
<ClInclude Include="locdistance.h" />
|
<ClInclude Include="locdistance.h" />
|
||||||
|
<ClInclude Include="charstrmap.h" />
|
||||||
|
<ClInclude Include="uniquecharstr.h" />
|
||||||
<ClInclude Include="loclikelysubtags.h" />
|
<ClInclude Include="loclikelysubtags.h" />
|
||||||
<ClInclude Include="locutil.h" />
|
<ClInclude Include="locutil.h" />
|
||||||
<ClInclude Include="lsr.h" />
|
<ClInclude Include="lsr.h" />
|
||||||
@ -528,4 +530,4 @@
|
|||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" Condition="'$(SkipUWP)'!='true'" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" Condition="'$(SkipUWP)'!='true'" />
|
||||||
<ImportGroup Label="ExtensionTargets">
|
<ImportGroup Label="ExtensionTargets">
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
</Project>
|
</Project>
|
||||||
|
@ -20,6 +20,7 @@
|
|||||||
#include "uhash.h"
|
#include "uhash.h"
|
||||||
#include "uinvchar.h"
|
#include "uinvchar.h"
|
||||||
#include "umutex.h"
|
#include "umutex.h"
|
||||||
|
#include "uniquecharstr.h"
|
||||||
#include "uresdata.h"
|
#include "uresdata.h"
|
||||||
#include "uresimp.h"
|
#include "uresimp.h"
|
||||||
|
|
||||||
@ -31,71 +32,6 @@ constexpr char PSEUDO_ACCENTS_PREFIX = '\''; // -XA, -PSACCENT
|
|||||||
constexpr char PSEUDO_BIDI_PREFIX = '+'; // -XB, -PSBIDI
|
constexpr char PSEUDO_BIDI_PREFIX = '+'; // -XB, -PSBIDI
|
||||||
constexpr char PSEUDO_CRACKED_PREFIX = ','; // -XC, -PSCRACK
|
constexpr char PSEUDO_CRACKED_PREFIX = ','; // -XC, -PSCRACK
|
||||||
|
|
||||||
/**
|
|
||||||
* Stores NUL-terminated strings with duplicate elimination.
|
|
||||||
* Checks for unique UTF-16 string pointers and converts to invariant characters.
|
|
||||||
*/
|
|
||||||
class UniqueCharStrings {
|
|
||||||
public:
|
|
||||||
UniqueCharStrings(UErrorCode &errorCode) : strings(nullptr) {
|
|
||||||
uhash_init(&map, uhash_hashUChars, uhash_compareUChars, uhash_compareLong, &errorCode);
|
|
||||||
if (U_FAILURE(errorCode)) { return; }
|
|
||||||
strings = new CharString();
|
|
||||||
if (strings == nullptr) {
|
|
||||||
errorCode = U_MEMORY_ALLOCATION_ERROR;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
~UniqueCharStrings() {
|
|
||||||
uhash_close(&map);
|
|
||||||
delete strings;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Returns/orphans the CharString that contains all strings. */
|
|
||||||
CharString *orphanCharStrings() {
|
|
||||||
CharString *result = strings;
|
|
||||||
strings = nullptr;
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Adds a string and returns a unique number for it. */
|
|
||||||
int32_t add(const UnicodeString &s, UErrorCode &errorCode) {
|
|
||||||
if (U_FAILURE(errorCode)) { return 0; }
|
|
||||||
if (isFrozen) {
|
|
||||||
errorCode = U_NO_WRITE_PERMISSION;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
// The string points into the resource bundle.
|
|
||||||
const char16_t *p = s.getBuffer();
|
|
||||||
int32_t oldIndex = uhash_geti(&map, p);
|
|
||||||
if (oldIndex != 0) { // found duplicate
|
|
||||||
return oldIndex;
|
|
||||||
}
|
|
||||||
// Explicit NUL terminator for the previous string.
|
|
||||||
// The strings object is also terminated with one implicit NUL.
|
|
||||||
strings->append(0, errorCode);
|
|
||||||
int32_t newIndex = strings->length();
|
|
||||||
strings->appendInvariantChars(s, errorCode);
|
|
||||||
uhash_puti(&map, const_cast<char16_t *>(p), newIndex, &errorCode);
|
|
||||||
return newIndex;
|
|
||||||
}
|
|
||||||
|
|
||||||
void freeze() { isFrozen = true; }
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns a string pointer for its unique number, if this object is frozen.
|
|
||||||
* Otherwise nullptr.
|
|
||||||
*/
|
|
||||||
const char *get(int32_t i) const {
|
|
||||||
U_ASSERT(isFrozen);
|
|
||||||
return isFrozen && i > 0 ? strings->data() + i : nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
UHashtable map;
|
|
||||||
CharString *strings;
|
|
||||||
bool isFrozen = false;
|
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
LocaleDistanceData::LocaleDistanceData(LocaleDistanceData &&data) :
|
LocaleDistanceData::LocaleDistanceData(LocaleDistanceData &&data) :
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
// © 2019 and later: Unicode, Inc. and others.
|
// © 2019 and later: Unicode, Inc. and others.
|
||||||
// License & terms of use: http://www.unicode.org/copyright.html#License
|
// License & terms of use: http://www.unicode.org/copyright.html
|
||||||
|
|
||||||
// loclikelysubtags.h
|
// loclikelysubtags.h
|
||||||
// created: 2019may08 Markus W. Scherer
|
// created: 2019may08 Markus W. Scherer
|
||||||
@ -13,49 +13,13 @@
|
|||||||
#include "unicode/locid.h"
|
#include "unicode/locid.h"
|
||||||
#include "unicode/uobject.h"
|
#include "unicode/uobject.h"
|
||||||
#include "unicode/ures.h"
|
#include "unicode/ures.h"
|
||||||
|
#include "charstrmap.h"
|
||||||
#include "lsr.h"
|
#include "lsr.h"
|
||||||
#include "uhash.h"
|
|
||||||
|
|
||||||
U_NAMESPACE_BEGIN
|
U_NAMESPACE_BEGIN
|
||||||
|
|
||||||
struct XLikelySubtagsData;
|
struct XLikelySubtagsData;
|
||||||
|
|
||||||
/**
|
|
||||||
* Map of const char * keys & values.
|
|
||||||
* Stores pointers as is: Does not own/copy/adopt/release strings.
|
|
||||||
*/
|
|
||||||
class CharStringMap final : public UMemory {
|
|
||||||
public:
|
|
||||||
/** Constructs an unusable non-map. */
|
|
||||||
CharStringMap() : map(nullptr) {}
|
|
||||||
CharStringMap(int32_t size, UErrorCode &errorCode) {
|
|
||||||
map = uhash_openSize(uhash_hashChars, uhash_compareChars, uhash_compareChars,
|
|
||||||
size, &errorCode);
|
|
||||||
}
|
|
||||||
CharStringMap(CharStringMap &&other) U_NOEXCEPT : map(other.map) {
|
|
||||||
other.map = nullptr;
|
|
||||||
}
|
|
||||||
CharStringMap(const CharStringMap &other) = delete;
|
|
||||||
~CharStringMap() {
|
|
||||||
uhash_close(map);
|
|
||||||
}
|
|
||||||
|
|
||||||
CharStringMap &operator=(CharStringMap &&other) U_NOEXCEPT {
|
|
||||||
map = other.map;
|
|
||||||
other.map = nullptr;
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
CharStringMap &operator=(const CharStringMap &other) = delete;
|
|
||||||
|
|
||||||
const char *get(const char *key) const { return static_cast<const char *>(uhash_get(map, key)); }
|
|
||||||
void put(const char *key, const char *value, UErrorCode &errorCode) {
|
|
||||||
uhash_put(map, const_cast<char *>(key), const_cast<char *>(value), &errorCode);
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
UHashtable *map;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct LocaleDistanceData {
|
struct LocaleDistanceData {
|
||||||
LocaleDistanceData() = default;
|
LocaleDistanceData() = default;
|
||||||
LocaleDistanceData(LocaleDistanceData &&data);
|
LocaleDistanceData(LocaleDistanceData &&data);
|
||||||
|
82
icu4c/source/common/uniquecharstr.h
Normal file
82
icu4c/source/common/uniquecharstr.h
Normal file
@ -0,0 +1,82 @@
|
|||||||
|
// © 2020 and later: Unicode, Inc. and others.
|
||||||
|
// License & terms of use: http://www.unicode.org/copyright.html
|
||||||
|
|
||||||
|
// uniquecharstr.h
|
||||||
|
// created: 2020sep01 Frank Yung-Fong Tang
|
||||||
|
|
||||||
|
#ifndef __UNIQUECHARSTR_H__
|
||||||
|
#define __UNIQUECHARSTR_H__
|
||||||
|
|
||||||
|
#include "charstr.h"
|
||||||
|
#include "uhash.h"
|
||||||
|
|
||||||
|
U_NAMESPACE_BEGIN
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Stores NUL-terminated strings with duplicate elimination.
|
||||||
|
* Checks for unique UTF-16 string pointers and converts to invariant characters.
|
||||||
|
*/
|
||||||
|
class UniqueCharStrings {
|
||||||
|
public:
|
||||||
|
UniqueCharStrings(UErrorCode &errorCode) : strings(nullptr) {
|
||||||
|
uhash_init(&map, uhash_hashUChars, uhash_compareUChars, uhash_compareLong, &errorCode);
|
||||||
|
if (U_FAILURE(errorCode)) { return; }
|
||||||
|
strings = new CharString();
|
||||||
|
if (strings == nullptr) {
|
||||||
|
errorCode = U_MEMORY_ALLOCATION_ERROR;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
~UniqueCharStrings() {
|
||||||
|
uhash_close(&map);
|
||||||
|
delete strings;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns/orphans the CharString that contains all strings. */
|
||||||
|
CharString *orphanCharStrings() {
|
||||||
|
CharString *result = strings;
|
||||||
|
strings = nullptr;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Adds a string and returns a unique number for it. */
|
||||||
|
int32_t add(const UnicodeString &s, UErrorCode &errorCode) {
|
||||||
|
if (U_FAILURE(errorCode)) { return 0; }
|
||||||
|
if (isFrozen) {
|
||||||
|
errorCode = U_NO_WRITE_PERMISSION;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
// The string points into the resource bundle.
|
||||||
|
const char16_t *p = s.getBuffer();
|
||||||
|
int32_t oldIndex = uhash_geti(&map, p);
|
||||||
|
if (oldIndex != 0) { // found duplicate
|
||||||
|
return oldIndex;
|
||||||
|
}
|
||||||
|
// Explicit NUL terminator for the previous string.
|
||||||
|
// The strings object is also terminated with one implicit NUL.
|
||||||
|
strings->append(0, errorCode);
|
||||||
|
int32_t newIndex = strings->length();
|
||||||
|
strings->appendInvariantChars(s, errorCode);
|
||||||
|
uhash_puti(&map, const_cast<char16_t *>(p), newIndex, &errorCode);
|
||||||
|
return newIndex;
|
||||||
|
}
|
||||||
|
|
||||||
|
void freeze() { isFrozen = true; }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a string pointer for its unique number, if this object is frozen.
|
||||||
|
* Otherwise nullptr.
|
||||||
|
*/
|
||||||
|
const char *get(int32_t i) const {
|
||||||
|
U_ASSERT(isFrozen);
|
||||||
|
return isFrozen && i > 0 ? strings->data() + i : nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
UHashtable map;
|
||||||
|
CharString *strings;
|
||||||
|
bool isFrozen = false;
|
||||||
|
};
|
||||||
|
|
||||||
|
U_NAMESPACE_END
|
||||||
|
|
||||||
|
#endif // __UNIQUECHARSTR_H__
|
Loading…
Reference in New Issue
Block a user