From d99c1032da71df2c0f2a6e61573244217275496a Mon Sep 17 00:00:00 2001 From: Jamie Reece Wilson Date: Thu, 22 Aug 2024 19:13:28 +0100 Subject: [PATCH] [*] Regression: AuROXTL string view has a different interpretation of what remove prefix/suffix should be. Fix SplitNewlines [*] Harden SplitString & SplitStringDelm, do not splice UTF8 sequences --- Include/Aurora/Parse/LineParser.hpp | 64 ++++++++++++++++++----------- 1 file changed, 39 insertions(+), 25 deletions(-) diff --git a/Include/Aurora/Parse/LineParser.hpp b/Include/Aurora/Parse/LineParser.hpp index 049a1800..36a959e7 100644 --- a/Include/Aurora/Parse/LineParser.hpp +++ b/Include/Aurora/Parse/LineParser.hpp @@ -9,64 +9,71 @@ namespace Aurora::Parse { - static AuROString SplitNewlines(const AuROString &in, AuFunction lineCallback, bool returnRemaining) + static AuROString SplitNewlines(const AuROString &in, + AuConsumer lineCallback, + bool bReturnRemaining = false) { AuMach index = 0, startIdx = 0; - while ((index = in.find("\n", startIdx)) != AuString::npos) + while ((index = in.Find("\n", startIdx)) != AuString::npos) { - auto line = in.substr(startIdx, index - startIdx); + auto line = in.Substr(startIdx, index - startIdx); startIdx = index + 1; - if (line[line.size() - 1] == '\r') + if (line[line.Size() - 1] == '\r') { - line.remove_suffix(1); + line = line.RemoveSuffix(1); } lineCallback(line); - if (startIdx >= in.size()) + if (startIdx >= in.Size()) { break; } } - if (returnRemaining) + if (bReturnRemaining) { - return in.substr(startIdx); + return in.Substr(startIdx); } else { - lineCallback(in.substr(startIdx)); + lineCallback(in.Substr(startIdx)); return {}; } } - static void SplitNewlines(const AuROString &in, AuFunction lineCallback) - { - SplitNewlines(in, lineCallback, false); - } - static AuList SplitString(const AuROString &in, AuUInt16 characters) { AuList ret; - for (auto i = 0u; i < in.size(); i += characters) + for (AuUInt i = 0u; i < in.Size(); ) { - auto start = i; - auto end = AuMin(AuUInt32(in.size()), AuUInt32(i + characters)); - auto len = end - start; - ret.push_back(in.substr(start, len)); + AuUInt start, end, len; + + start = i; + end = AuMin(AuUInt(in.size()), AuUInt(i + characters)); + if (end - 1) + { + auto uLastCodepointIndex = AuCodepointsFindPreviousValidByteOffsetFromByteOffset(in, end); + auto uLastByteOffset = AuCodepointsNextLength(in.Substr(uLastCodepointIndex)); + end = uLastCodepointIndex + uLastByteOffset; + } + len = end - start; + i += len; + + ret.push_back(in.Substr(start, len)); } return ret; } - static AuString SplitStringDelm(const AuROString &in, const AuString &delm, AuUInt16 characters) + static AuString SplitStringDelm(const AuROString &in, const AuROString &delm, AuUInt16 characters) { AuString ret; - ret.reserve(in.size()); - for (auto i = 0u; i < in.size(); i += characters) + ret.reserve(in.Size()); + for (AuUInt i = 0u; i < in.Size(); ) { - AuUInt32 start, end, len; + AuUInt start, end, len; if (i != 0) { @@ -74,10 +81,17 @@ namespace Aurora::Parse } start = i; - end = AuMin(AuUInt32(in.size()), AuUInt32(i + characters)); + end = AuMin(AuUInt(in.Size()), AuUInt(i + characters)); + if (end - 1) + { + auto uLastCodepointIndex = AuCodepointsFindPreviousValidByteOffsetFromByteOffset(in, end); + auto uLastByteOffset = AuCodepointsNextLength(in.Substr(uLastCodepointIndex)); + end = uLastCodepointIndex + uLastByteOffset; + } len = end - start; + i += len; - ret.insert(ret.size(), in.substr(start, len)); + ret.insert(ret.size(), in.Substr(start, len)); } return ret; }