[*] Regression: AuROXTL string view has a different interpretation of what remove prefix/suffix should be. Fix SplitNewlines

[*] Harden SplitString & SplitStringDelm, do not splice UTF8 sequences
This commit is contained in:
Reece Wilson 2024-08-22 19:13:28 +01:00
parent 0f6c020c26
commit d99c1032da

View File

@ -9,64 +9,71 @@
namespace Aurora::Parse namespace Aurora::Parse
{ {
static AuROString SplitNewlines(const AuROString &in, AuFunction<void(const AuROString &)> lineCallback, bool returnRemaining) static AuROString SplitNewlines(const AuROString &in,
AuConsumer<const AuROString &> lineCallback,
bool bReturnRemaining = false)
{ {
AuMach index = 0, startIdx = 0; AuMach index = 0, startIdx = 0;
while ((index = in.find("\n", startIdx)) != AuString::npos) while ((index = in.Find("\n", startIdx)) != AuString::npos)
{ {
auto line = in.substr(startIdx, index - startIdx); auto line = in.Substr(startIdx, index - startIdx);
startIdx = index + 1; startIdx = index + 1;
if (line[line.size() - 1] == '\r') if (line[line.Size() - 1] == '\r')
{ {
line.remove_suffix(1); line = line.RemoveSuffix(1);
} }
lineCallback(line); lineCallback(line);
if (startIdx >= in.size()) if (startIdx >= in.Size())
{ {
break; break;
} }
} }
if (returnRemaining) if (bReturnRemaining)
{ {
return in.substr(startIdx); return in.Substr(startIdx);
} }
else else
{ {
lineCallback(in.substr(startIdx)); lineCallback(in.Substr(startIdx));
return {}; return {};
} }
} }
static void SplitNewlines(const AuROString &in, AuFunction<void(const AuROString &)> lineCallback)
{
SplitNewlines(in, lineCallback, false);
}
static AuList<AuROString> SplitString(const AuROString &in, AuUInt16 characters) static AuList<AuROString> SplitString(const AuROString &in, AuUInt16 characters)
{ {
AuList<AuROString> ret; AuList<AuROString> ret;
for (auto i = 0u; i < in.size(); i += characters) for (AuUInt i = 0u; i < in.Size(); )
{ {
auto start = i; AuUInt start, end, len;
auto end = AuMin(AuUInt32(in.size()), AuUInt32(i + characters));
auto len = end - start; start = i;
ret.push_back(in.substr(start, len)); end = AuMin(AuUInt(in.size()), AuUInt(i + characters));
if (end - 1)
{
auto uLastCodepointIndex = AuCodepointsFindPreviousValidByteOffsetFromByteOffset(in, end);
auto uLastByteOffset = AuCodepointsNextLength(in.Substr(uLastCodepointIndex));
end = uLastCodepointIndex + uLastByteOffset;
}
len = end - start;
i += len;
ret.push_back(in.Substr(start, len));
} }
return ret; return ret;
} }
static AuString SplitStringDelm(const AuROString &in, const AuString &delm, AuUInt16 characters) static AuString SplitStringDelm(const AuROString &in, const AuROString &delm, AuUInt16 characters)
{ {
AuString ret; AuString ret;
ret.reserve(in.size()); ret.reserve(in.Size());
for (auto i = 0u; i < in.size(); i += characters) for (AuUInt i = 0u; i < in.Size(); )
{ {
AuUInt32 start, end, len; AuUInt start, end, len;
if (i != 0) if (i != 0)
{ {
@ -74,10 +81,17 @@ namespace Aurora::Parse
} }
start = i; start = i;
end = AuMin(AuUInt32(in.size()), AuUInt32(i + characters)); end = AuMin(AuUInt(in.Size()), AuUInt(i + characters));
if (end - 1)
{
auto uLastCodepointIndex = AuCodepointsFindPreviousValidByteOffsetFromByteOffset(in, end);
auto uLastByteOffset = AuCodepointsNextLength(in.Substr(uLastCodepointIndex));
end = uLastCodepointIndex + uLastByteOffset;
}
len = end - start; len = end - start;
i += len;
ret.insert(ret.size(), in.substr(start, len)); ret.insert(ret.size(), in.Substr(start, len));
} }
return ret; return ret;
} }