[+] AuROString "support"

[+] AuCodepointsTransformASCIIOp
[+] AuCodepointsCount
[+] AuCodepointsNextLength
[+] AuCodepointsDecode
[+] AuCodepointsEncodeInto
[+] AuCodepointsTransform
This commit is contained in:
Reece Wilson 2024-04-19 06:24:40 +01:00
parent 5f69571450
commit b1d6eb0d80
4 changed files with 602 additions and 66 deletions

View File

@ -134,15 +134,41 @@ inline bool AuExists(const Range &a, const Key &key)
template <class Map, class Key, AU_TEMPLATE_ENABLE_WHEN(__audetail::AuHasfind_v<Map> && !AuIsPointer_v<Map>)>
inline bool AuExists(const Map &map, const Key &key)
{
auto itr = map.find(key);
if (itr != map.end())
if constexpr (AuIsSame_v<Key, AuROString> &&
Aurora::Build::kCurrentLanguage != Aurora::Build::ELanguage::eCpp20 &&
(int)Aurora::Build::kCurrentLanguage < 20)
{
return true;
for (auto itr = map.begin();
itr != map.end();
)
{
if constexpr (AuIsHashMap_v<Map> || AuIsBST_v<Map>)
{
if (itr->first == key)
{
return true;
}
}
else
{
if (*itr == key)
{
return true;
}
}
itr++;
}
}
else
{
return false;
auto itr = map.find(key);
if (itr != map.end())
{
return true;
}
}
return false;
}
template <class Map, class Key>
@ -161,31 +187,89 @@ inline bool AuTryClear(Container &container)
template <class Range, class Key, AU_TEMPLATE_ENABLE_WHEN(!__audetail::AuHasfind_v<Range> && !AuIsPointer_v<Range>)>
inline bool AuTryRemove(Range &list, const Key &key)
{
auto itr = std::find(list.begin(), list.end(), key);
if (itr != list.end())
if constexpr (AuIsSame_v<Key, AuROString> &&
Aurora::Build::kCurrentLanguage != Aurora::Build::ELanguage::eCpp20 &&
(int)Aurora::Build::kCurrentLanguage < 20)
{
list.erase(itr);
return true;
for (auto itr = list.begin();
itr != list.end();
)
{
if constexpr (AuIsHashMap_v<Range> || AuIsBST_v<Range>)
{
if (itr->first == key)
{
list.erase(itr);
return true;
}
}
else
{
if (*itr == key)
{
list.erase(itr);
return true;
}
}
itr++;
}
}
else
{
return false;
auto itr = std::find(list.begin(), list.end(), key);
if (itr != list.end())
{
list.erase(itr);
return true;
}
}
return false;
}
template <class Map, class Key, AU_TEMPLATE_ENABLE_WHEN(__audetail::AuHasfind_v<Map> && !AuIsPointer_v<Map>)>
inline bool AuTryRemove(Map &map, const Key &key)
{
auto itr = map.find(key);
if (itr != map.end())
if constexpr (AuIsSame_v<Key, AuROString> &&
Aurora::Build::kCurrentLanguage != Aurora::Build::ELanguage::eCpp20 &&
(int)Aurora::Build::kCurrentLanguage < 20)
{
map.erase(itr);
return true;
for (auto itr = map.begin();
itr != map.end();
)
{
if constexpr (AuIsHashMap_v<Map> || AuIsBST_v<Map>)
{
if (itr->first == key)
{
map.erase(itr);
return true;
}
}
else
{
if (*itr == key)
{
map.erase(itr);
return true;
}
}
itr++;
}
}
else
{
return false;
auto itr = map.find(key);
if (itr != map.end())
{
map.erase(itr);
return true;
}
}
return false;
}
template <class Map, class Key>

View File

@ -245,6 +245,17 @@ namespace AuHash
return lhs == rhs;
}
};
template <>
struct equal<std::string>
{
using is_transparent = void;
bool operator()(std::string_view lhs, std::string_view rhs) const
{
return lhs == rhs;
}
};
}
template <class T, AU_TEMPLATE_ENABLE_WHEN(AuHasHashCode_v<T>)>
@ -286,6 +297,71 @@ namespace AuHash
return uHashCode;
}
};
// container bug in msvc?
#if 0
template <>
struct hash<std::string_view>
{
size_t operator()(std::string_view txt) const
{
return AuFnv1aRuntime(txt.data(), txt.size());
}
};
#endif
template <>
struct hash<std::string>
{
using is_transparent = void;
using transparent_key_equal = equal<std::string>;
size_t operator()(std::string_view txt) const
{
return hash<std::string_view>{}(txt);
}
};
template <>
struct less<std::string>
{
using is_transparent = void;
bool operator()(std::string_view lhs, std::string_view rhs) const
{
#if 0
return AuFnv1aRuntime(lhs.data(), lhs.size()) < AuFnv1aRuntime(rhs.data(), rhs.size());
#else
return hash<std::string_view>{}(lhs) < hash<std::string_view>{}(rhs);
#endif
}
};
template <>
struct less<std::string_view>
{
bool operator()(std::string_view lhs, std::string_view rhs) const
{
#if 0
return AuFnv1aRuntime(lhs.data(), lhs.size()) < AuFnv1aRuntime(rhs.data(), rhs.size());
#else
return hash<std::string_view>{}(lhs) < hash<std::string_view>{}(rhs);
#endif
}
};
template <>
struct less<const char *>
{
bool operator()(const char *lhs, const char *rhs) const
{
#if 0
return AuFnv1aRuntime(lhs, strlen(lhs)) < AuFnv1aRuntime(rhs, strlen(rhs));
#else
return hash<std::string_view>{}(lhs) < hash<std::string_view>{}(rhs);
#endif
}
};
}
template <class T>

View File

@ -10,18 +10,18 @@
***/
#pragma once
static auline bool AuStringContains(const AuString &value, const AuString &subpattern)
static auline bool AuStringContains(const AuROString &value, const AuROString &subpattern)
{
return value.find(subpattern) != AuString::npos;
return value.find(subpattern) != AuROString::npos;
}
static auline bool AuEndsWith(AuString const &value, AuString const &ending)
static auline bool AuEndsWith(AuROString const &value, AuROString const &ending)
{
if (ending.size() > value.size()) return false;
return std::equal(ending.rbegin(), ending.rend(), value.rbegin());
}
static auline bool AuStartsWith(AuString const &value, AuString const &starting)
static auline bool AuStartsWith(AuROString const &value, AuROString const &starting)
{
#if defined(AU_STRING_IS_TINYUTF_EXPERIMENT)
return value.starts_with(starting);
@ -30,67 +30,436 @@ static auline bool AuStartsWith(AuString const &value, AuString const &starting)
#endif
}
#if defined(AU_STRING_IS_TINYUTF_EXPERIMENT)
static AuString AuStringTransform(const AuString &in, const AuSupplierConsumer<AuUInt32, AuUInt32> &out)
{
AuString cpy = in;
for (int i = 0; i < cpy.length(); i++)
{
cpy[i] = out(cpy[i]);
}
return cpy;
}
static auline AuString AuToUpper(const AuString &in)
{
return AuStringTransform(in, std::toupper);
}
static auline AuString AuToLower(const AuString &in)
{
return AuStringTransform(in, std::toupper);
}
#else
template <class T>
static auline AuString AuToStringASCIIOp(T op, const AuString &in)
static auline AuString AuCodepointsTransformASCIIOp(T op, const AuROString &in)
{
AuString ret;
ret.resize(in.size());
std::transform(in.begin(), in.end(), ret.begin(), [=](const char &c)
auto uLength = in.length();
ret.resize(uLength);
const char *pItr = in.data();
const char *pEnd = pItr + uLength;
AuUInt32 uCounter {};
while (pItr != pEnd)
{
return op(c);
});
AuUInt32 nby {};
auto ch = *pItr;
unsigned int result = (ch & 0xF0);
if ((ch & 0x80) == 0)
{
nby = 1;
}
else if ((ch & 0xE0) == 0xC0)
{
nby = 2;
}
else if (result == 0xE0)
{
nby = 3;
}
else if (result == 0xF0)
{
if ((ch & 0x08) == 0x08)
{
nby = 5;
}
else if ((ch & 0x0c) == 0x0c)
{
nby = 6;
}
else
{
nby = 4;
}
}
else
{
break;
}
if (pItr + nby > pEnd)
{
break;
}
if (nby == 1)
{
ret[uCounter] = op(in[uCounter]);
}
else
{
AuMemcpy(&ret[uCounter], &in[uCounter], nby);
}
uCounter += nby;
pItr += nby;
}
return ret;
}
static auline AuString AuToLower(const AuString &in)
static auline AuUInt AuCodepointsCount(const AuROString &in)
{
return AuToStringASCIIOp<int(*)(int)>(std::tolower, in);
}
AuUInt uCounter {};
auto uLength = in.length();
static auline AuString AuToUpper(const AuString &in)
{
return AuToStringASCIIOp<int(*)(int)>(std::toupper, in);
}
#endif
const char *pItr = in.data();
const char *pEnd = pItr + uLength;
static auline AuString &AuReplaceAll(AuString &str, const AuString &from, const AuString &to)
{
size_t start_pos = 0;
while ((start_pos = str.find(from, start_pos)) != std::string::npos)
while (pItr != pEnd)
{
str.replace(start_pos, from.length(), to);
start_pos += to.length(); // Handles case where 'to' is a substring of 'from'
AuUInt32 nby {};
auto ch = *pItr;
unsigned int result = (ch & 0xF0);
if ((ch & 0x80) == 0)
{
nby = 1;
}
else if ((ch & 0xE0) == 0xC0)
{
nby = 2;
}
else if (result == 0xE0)
{
nby = 3;
}
else if (result == 0xF0)
{
if ((ch & 0x08) == 0x08)
{
nby = 5;
}
else if ((ch & 0x0c) == 0x0c)
{
// Special/Historic UTF8
nby = 6;
}
else
{
nby = 4;
}
}
else
{
break;
}
if (pItr + nby > pEnd)
{
break;
}
uCounter++;
pItr += nby;
}
return uCounter;
}
static auline AuUInt AuCodepointsNextLength(const AuROString &in)
{
if (in.length())
{
auto ch = in[0];
unsigned int result = (ch & 0xF0);
if ((ch & 0x80) == 0)
{
return 1;
}
else if ((ch & 0xE0) == 0xC0)
{
return 2;
}
else if (result == 0xE0)
{
return 3;
}
else if (result == 0xF0)
{
if ((ch & 0x08) == 0x08)
{
return 5;
}
else if ((ch & 0x0c) == 0x0c)
{
// Special/Historic UTF8
return 6;
}
else
{
return 4;
}
}
}
return 0;
}
static auline char AuToLower(char c)
{
return c ? c | 0x20 : 0;
}
static auline char AuToUpper(char c)
{
return c & ~0x20;
}
static auline AuString AuCodepointsToLower(const AuROString &in)
{
return AuCodepointsTransformASCIIOp(((char(*)(char))&AuToLower), in);
}
static auline AuString AuCodepointsToUpper(const AuROString &in)
{
return AuCodepointsTransformASCIIOp(((char(*)(char))&AuToUpper), in);
}
static auline AuString AuToLower(const AuROString &in)
{
return AuCodepointsToLower(in);
}
static auline AuString AuToUpper(const AuROString &in)
{
return AuCodepointsToUpper(in);
}
static AuList<AuUInt32> AuCodepointsDecode(const AuROString &in)
{
AuList<AuUInt32> ret;
if (in.empty())
{
return ret;
}
auto uLength = in.length();
ret.reserve(uLength);
const char *pItr = in.data();
const char *pEnd = pItr + uLength;
while (pItr < pEnd)
{
AuUInt32 c {};
if ((c = *pItr) <= 0x7FU)
{
++pItr;
}
else
{
AuUInt32 nby {};
if ((*pItr & 0xC0U) != 0xC0U)
{
return {};
}
for (AuUInt8 b = *pItr; (b & 0x80U) != 0; b <<= 1, ++nby)
{
}
if (nby > 6)
{
return {};
}
if (AuUInt(pEnd - pItr) < AuUInt(nby))
{
return {};
}
c = *pItr & (AuUInt8(0xFFU) >> (nby + 1));
for (AuUInt32 i = 1; i < nby; ++i)
{
if ((pItr[i] & 0xC0U) != 0x80U)
{
return {};
}
c = (c << 6) | (pItr[i] & 0x3FU);
}
pItr += nby;
}
ret.push_back(c);
}
return ret;
}
static void AuCodepointsEncodeInto(AuUInt32 uCodepoint, AuString &out)
{
if (uCodepoint < 0x80)
{
auto uLength = out.size();
out.resize(uLength + 1);
out[uLength] = static_cast<AuUInt8>(uCodepoint);
}
else if (uCodepoint < 0x800)
{
auto uLength = out.size();
out.resize(uLength + 2);
out[uLength] = static_cast<AuUInt8>((uCodepoint >> 6) | 0xc0);
out[uLength + 1] = static_cast<AuUInt8>((uCodepoint & 0x3f) | 0x80);
}
else if (uCodepoint < 0x10000)
{
auto uLength = out.size();
out.resize(uLength + 3);
out[uLength] = static_cast<AuUInt8>((uCodepoint >> 12) | 0xe0);
out[uLength + 1] = static_cast<AuUInt8>(((uCodepoint >> 6) & 0x3f) | 0x80);
out[uLength + 2] = static_cast<AuUInt8>((uCodepoint & 0x3f) | 0x80);
}
else if (uCodepoint < 0x200000)
{
auto uLength = out.size();
out.resize(uLength + 4);
out[uLength] = static_cast<AuUInt8>((uCodepoint >> 18) | 0xf0);
out[uLength + 1] = static_cast<AuUInt8>(((uCodepoint >> 12) & 0x3f) | 0x80);
out[uLength + 2] = static_cast<AuUInt8>(((uCodepoint >> 6) & 0x3f) | 0x80);
out[uLength + 3] = static_cast<AuUInt8>((uCodepoint & 0x3f) | 0x80);
}
else if (uCodepoint < 0x4000000)
{
auto uLength = out.size();
out.resize(uLength + 5);
out[uLength] = static_cast<AuUInt8>((uCodepoint >> 24) | 0xf8);
out[uLength + 1] = static_cast<AuUInt8>(((uCodepoint >> 18) & 0x3f) | 0x80);
out[uLength + 2] = static_cast<AuUInt8>(((uCodepoint >> 12) & 0x3f) | 0x80);
out[uLength + 3] = static_cast<AuUInt8>(((uCodepoint >> 6) & 0x3f) | 0x80);
out[uLength + 4] = static_cast<AuUInt8>((uCodepoint & 0x3f) | 0x80);
}
else if (uCodepoint < 0x80000000)
{
auto uLength = out.size();
out.resize(uLength + 6);
out[uLength] = static_cast<AuUInt8>((uCodepoint >> 30) | 0xfc);
out[uLength + 1] = static_cast<AuUInt8>(((uCodepoint >> 24) & 0x3f) | 0x80);
out[uLength + 2] = static_cast<AuUInt8>(((uCodepoint >> 18) & 0x3f) | 0x80);
out[uLength + 3] = static_cast<AuUInt8>(((uCodepoint >> 12) & 0x3f) | 0x80);
out[uLength + 4] = static_cast<AuUInt8>(((uCodepoint >> 6) & 0x3f) | 0x80);
out[uLength + 5] = static_cast<AuUInt8>((uCodepoint & 0x3f) | 0x80);
}
}
template <class T>
static AuString AuCodepointsTransform(T op, const AuROString &in)
{
AuString ret;
if (in.empty())
{
return ret;
}
auto uLength = in.length();
ret.reserve(uLength);
const char *pItr = in.data();
const char *pEnd = pItr + uLength;
while (pItr < pEnd)
{
AuUInt32 c {};
if ((c = *pItr) <= 0x7FU)
{
++pItr;
}
else
{
AuUInt32 nby {};
if ((*pItr & 0xC0U) != 0xC0U)
{
return {};
}
for (AuUInt8 b = *pItr; (b & 0x80U) != 0; b <<= 1, ++nby)
{
}
if (nby > 6)
{
return {};
}
if (AuUInt(pEnd - pItr) < AuUInt(nby))
{
return {};
}
c = *pItr & (AuUInt8(0xFFU) >> (nby + 1));
for (AuUInt32 i = 1; i < nby; ++i)
{
if ((pItr[i] & 0xC0U) != 0x80U)
{
return {};
}
c = (c << 6) | (pItr[i] & 0x3FU);
}
pItr += nby;
}
c = op(c);
AuCodepointsEncodeInto(c, ret);
}
return ret;
}
static AuString &AuReplaceAll(AuString &str, const AuROString &from, const AuROString &to)
{
AuUInt uStartPosition {};
while ((uStartPosition = str.find(from, uStartPosition)) != AuROString::npos)
{
str.replace(uStartPosition, from.length(), to);
uStartPosition += to.length();
}
return str;
}
// i told myself not to copy this, required a split function twice, now here we are :D
static auline AuList<AuString> AuSplitString(const AuString &str, const AuString &delim, bool ignoreEmpty = true)
static AuList<AuROString> AuSplitString(const AuROString &str, const AuROString &delim, bool ignoreEmpty = true)
{
AuList<AuROString> tokens;
AuUInt prev = 0, pos = 0;
tokens.reserve(str.size() / 16);
do
{
pos = str.find(delim, prev);
if (pos == AuROString::npos)
{
pos = str.length();
}
auto token = str.substr(prev, pos - prev);
if ((!token.empty()) && ignoreEmpty)
{
tokens.push_back(token);
}
prev = pos + delim.length();
}
while (pos < str.length() && prev < str.length());
return tokens;
}
static AuList<AuString> AuSplitStringLegacy(const AuROString &str, const AuROString &delim, bool ignoreEmpty = true)
{
AuList<AuString> tokens;
AuUInt prev = 0, pos = 0;
@ -98,9 +467,15 @@ static auline AuList<AuString> AuSplitString(const AuString &str, const AuString
do
{
pos = str.find(delim, prev);
if (pos == AuString::npos) pos = str.length();
if (pos == AuROString::npos)
{
pos = str.length();
}
auto token = str.substr(prev, pos - prev);
if ((!token.empty()) && ignoreEmpty) tokens.push_back(token);
if ((!token.empty()) && ignoreEmpty)
{
tokens.push_back(AuString(token));
}
prev = pos + delim.length();
}
while (pos < str.length() && prev < str.length());

View File

@ -2,6 +2,7 @@
// TODO:
using AuUTF8StringView = std::string_view;
using AuROString = std::string_view;
using AuUTF8StringView = AuROString;
using AuU8View = AuUTF8StringView;