AuroraRuntime/Source/Parse/Parser.cpp
J Reece Wilson 21902a5d5b [+] AuParse::[Stringify/Parse][U/S]Int[16] class of parse APIs
[*] Optimize UNIX IPC ABI: Handle String encodes an array of U16s to optimize space. Could still be better.
2022-08-04 14:08:12 +01:00

978 lines
27 KiB
C++

/***
Copyright (C) 2021 J Reece Wilson (a/k/a "Reece"). All rights reserved.
File: Parser.cpp
Date: 2021-6-12
Author: Reece
Note: Horrible gen 1 parser.
I'm not removing or significantly improving this.
Just build around what works, could probably wrangle a command list parser on top.
***/
#include <Source/RuntimeInternal.hpp>
#include "Parser.hpp"
namespace Aurora::Parse
{
AUKN_SYM void VaildateStructure(const ParseObject &object)
{
}
template<size_t Z>
static AuFunction<bool(AuUInt8)> ContainedHerein(const AuUInt8(&arry)[Z])
{
return [=](AuUInt8 c) -> bool
{
for (int i = 0; i < Z; i++)
{
if (arry[i] == c)
{
return true;
}
}
return false;
};
}
template<size_t Z>
static AuFunction<bool(AuUInt8)> IsTerminating(ParseState &state, const AuUInt8(&arry)[Z])
{
return [&](AuUInt8 c) -> bool
{
for (int i = 0; i < Z; i++)
{
if (arry[i] == c)
{
return true;
}
}
for (int i = 0; i < state.countOfTokens; i++)
{
if (state.additionalTokens[i] == c)
{
state.lastTokenAdditional = i;
return true;
}
}
return false;
};
}
// oh god just dont think about this tokenizer too much
bool ConsumeStringifedToken(ParseState &state, ParsableTag type, AuString &out)
{
static unsigned char terminatingChars[] = { '\n', '\00' };
static unsigned char whiteChars[] = { ' ' };
static unsigned char illegalCharacters[] = { '\r' }; //reeedows
auto isString = type == ParsableTag::kParseString;
auto isvararg = type == ParsableTag::kParseStringVararg;
bool stringLevel = false;
bool escapedNewLine = false;
bool escapeNextLegal = false;
auto isTerminating = IsTerminating(state, terminatingChars);
static auto isSpace = ContainedHerein(whiteChars);
static auto isIgnore = ContainedHerein(illegalCharacters);
while (true)
{
AuUInt8 cur;
AuUInt8 peek;
// consume character from the stream
if (!state.stream->Next(cur))
{
break;
}
// some characters should be considered universally illegal such as nasty unicode spaces,
// carriage returns, and any other nasty crash exploit dependee characters
if (isIgnore(cur))
{
continue;
}
// Is end of line/end of string
if (isTerminating(cur))
{
// Make sure we aren't encapsulated by quotation marks
if ((!stringLevel) && (!AuExchange(escapedNewLine, false)))
{
state.hasLastToken = true;
state.lastTokenCharacter = cur;
break;
}
}
// if the string starts with a quotation mark, set stringLevel to true
// TODO: I dont remember the parse rules. Check the old tests. Should this be AuStartsWith?
if ((cur == '"') && (isString) && (out.empty()))
{
stringLevel = true;
continue;
}
auto peekStatus = state.stream->PeekNext(peek);
bool isPeekterminating = isTerminating(peek);
// prepare escape characters
if ((peekStatus) && (cur == '\\'))
{
// check \ \n pair + is var arg
if ((isPeekterminating) && (isvararg))
{
escapedNewLine = true;
continue;
}
// escape character for "'s in strings
if (isString && (peek == '\"' || peek == ' ' || peek == '\n' || peek == '\r'))
{
escapeNextLegal = true;
continue;
}
}
// see above
if (AuExchange(escapeNextLegal, false))
{
out += cur;
continue;
}
// match the ending '"' character in the token to terminate strings containing spaces
// do not make this more ambiguous by removing the **else if**.
if (
(cur == '"') && (isString) && (stringLevel) //
// can we expect to fail next iteration?
&& (((peekStatus && (isPeekterminating || isSpace(peek)) /*Is ending character?*/) ||
(!peekStatus)))) // Is EoS?
{
stringLevel = false;
continue;
}
// if we hit a space in a string, assume we're at the end of the token, unless we're parsing
// a string in quotation marks or are consuming the entire line (ParsableTag::kParseStringVararg)
if ((isSpace(cur)) && (!stringLevel) && (!isvararg))
{
break;
}
// otherwise emit
out += cur;
}
//SysAssert(!stringLevel, "Parsed tag of string type must end with \", got {}", out);
if (stringLevel)
{
SysPushErrorSyntaxError("Parsed tag of string type must end with \", got {}", out);
return false;
}
//AuLogDbg("returned {} {}", out, out.size() != 0);
return out.size() != 0;
}
template<typename Func, typename Res>
static bool ScrewExceptions_2(Func func, const AuString &in, Res &out)
{
try
{
out = func(in.c_str(), nullptr);
return true;
}
catch (...)
{
return false;
}
}
template <typename T> int SignBit(T val)
{
return (T(0) < val) - (val < T(0));
}
template <typename T, typename Iterator>
bool ParseInt(Iterator begin, Iterator &end, T &out)
{
T res = 0;
T sign = 1;
out = 0;
auto itr = begin;
if constexpr (AuIsSame_v<T, AuSInt>)
{
if (itr != end)
{
if (*itr == '-')
{
itr++;
sign = -1;
}
}
}
int perf {};
for (;
(itr != end) &&
(*itr != '\0');
itr++)
{
auto c = *itr;
if ((c < '0') || (c > '9'))
{
return false;
}
auto old = res;
res *= 10;
res += static_cast<AuUInt>(*itr) - static_cast<AuUInt>('0');
if ((perf++) >= 5)
{
if constexpr (AuIsSame_v<T, AuUInt>)
{
if (old > res)
{
SysPushErrorSyntaxError("Unsigned integer overflow: {}", AuString(begin, end));
end = itr;
return false;
}
}
else if constexpr (AuIsSame_v<T, AuSInt>)
{
if (SignBit(old) != SignBit(res))
{
SysPushErrorSyntaxError("Signed integer overflow: {}", AuString(begin, end));
end = itr;
return false;
}
}
}
}
end = itr;
out = res * sign;
return true;
}
template <typename T, typename Iterator>
bool ParseInt16(Iterator begin, Iterator &end, T &out)
{
T res = 0;
T sign = 1;
out = 0;
auto itr = begin;
if constexpr (AuIsSame_v<T, AuSInt>)
{
if (itr != end)
{
if (*itr == '-')
{
itr++;
sign = -1;
}
}
}
if (itr != end)
{
if ((*itr == '0') && (itr[1] == 'x')) // TODO (Reece): EVIL... but it's only a byte
{
itr++;
itr++;
}
}
if (begin != end)
{
auto endChar = *(end - 1);
if (endChar == 'h' || endChar == 'H')
{
end--;
}
}
int perf {};
for (;
(itr != end) &&
(*itr != '\0');
itr++)
{
auto c = *itr;
int delta {};
if ((c >= 'A') && (c <= 'F'))
{
delta = 10 + (c - 'A');
}
else if ((c >= 'a') && (c <= 'f'))
{
delta = 10 + (c - 'a');
}
else if ((c >= '0') && (c <= '9'))
{
delta = c - '0';
}
else
{
return false;
}
auto old = res;
res *= T(16);
res += T(delta);
if ((perf++) >= 5)
{
if constexpr (AuIsSame_v<T, AuUInt>)
{
if (old > res)
{
SysPushErrorSyntaxError("Unsigned integer overflow: {}", AuString(begin, end));
end = itr;
return false;
}
}
else if constexpr (AuIsSame_v<T, AuSInt>)
{
if (SignBit(old) != SignBit(res))
{
SysPushErrorSyntaxError("Signed integer overflow: {}", AuString(begin, end));
end = itr;
return false;
}
}
}
}
end = itr;
out = res * sign;
return true;
}
static bool ConsumeTokenPrimitiveish(ParseState &state, ParsableTag type, ParseValue &out)
{
AuCtorCode_t code;
AuString str;
if (!ConsumeStringifedToken(state, type, str))
{
return false;
}
if ((type != ParsableTag::kParseString) && (str.empty()))
{
return false;
}
AuOptional<uuids::uuid> uuid;
auto end = str.end();
switch (type)
{
case ParsableTag::kParseUInt:
{
return ParseInt<AuUInt64>(str.begin(), end, out.primitive.uint) && end == str.end();
}
case ParsableTag::kParseSInt:
{
return ParseInt<AuInt64>(str.begin(), end, out.primitive.sint) && end == str.end();
}
case ParsableTag::kParseNumber:
{
return ScrewExceptions_2(static_cast<double(&)(const std::string &, std::size_t *)>(std::stod), str, out.primitive.number);
}
case ParsableTag::kParseString:
case ParsableTag::kParseStringVararg:
{
out.string = AuMove(str);
break;
}
case ParsableTag::kParseUUID:
{
uuid = uuids::uuid::from_string(str);
if (!uuid.has_value())
{
SysPushErrorSyntaxError("Parse Error: invalid UUID {}", str);
return false;
}
out.UUID = uuid.value();
break;
}
case ParsableTag::kParseBoolean:
{
if ((str == "0") ||
(stricmp(str.c_str(), "false") == 0) ||
(stricmp(str.c_str(), "no") == 0))
{
out.primitive.boolean = false;
}
else if ((str == "1") ||
(stricmp(str.c_str(), "true") == 0) ||
(stricmp(str.c_str(), "yes") == 0))
{
out.primitive.boolean = true;
}
else
{
SysPushErrorSyntaxError("Parsed tag of boolean type wasn't parsable given the English string {}", str);
return false;
}
break;
}
default:
SysPanic("Invalid consume tag {}", type);
}
return true;
}
AUKN_SYM bool ConsumeToken(ParseState &state, ParsableTag type, ParseValue &out)
{
ParseValue temp;
#define ADD_VECTOR_VAL(idx, memberType) \
if (!ConsumeTokenPrimitiveish(state, ParsableTag::kParseNumber, temp)) \
return false; \
out.primitive.memberType[idx] = temp.primitive.number;
switch (type)
{
case ParsableTag::kParseVec3:
{
ADD_VECTOR_VAL(0, vec3);
ADD_VECTOR_VAL(1, vec3);
ADD_VECTOR_VAL(2, vec3);
return true;
}
case ParsableTag::kParseVec4:
{
ADD_VECTOR_VAL(0, vec4);
ADD_VECTOR_VAL(1, vec4);
ADD_VECTOR_VAL(2, vec4);
ADD_VECTOR_VAL(3, vec4);
return true;
}
default:
{
return ConsumeTokenPrimitiveish(state, type, out);
}
}
#undef ADD_VECTOR_VAL
}
AUKN_SYM bool ConsumeToken(ParsableTag type, const AuSPtr<Aurora::IO::Character::ICharacterProvider> &getc, ParseValue &out)
{
ParseState state(getc);
return ConsumeToken(state, type, out);
}
AUKN_SYM bool ConsumeToken(ParsableTag type, const AuSPtr<Aurora::IO::Character::IBufferedCharacterConsumer> &getc, ParseValue &out)
{
ParseState state(getc);
return ConsumeToken(state, type, out);
}
AUKN_SYM AuList<AuParse::ParseValueEx> ConsumeTokens(ParsableTag type, const AuSPtr<Aurora::IO::Character::IBufferedCharacterConsumer> &getc)
{
AuList<AuParse::ParseValueEx> ret;
AuParse::ParseValueEx res;
AuParse::ParseState state(getc);
while (AuParse::ConsumeToken(state, type, res))
{
ret.push_back(res);
}
return ret;
}
AUKN_SYM AuList<AuParse::ParseValueEx> ConsumeTokens(ParsableTag type, const AuSPtr<Aurora::IO::Character::ICharacterProvider> &getc)
{
AuList<AuParse::ParseValueEx> ret;
AuParse::ParseValueEx res;
AuParse::ParseState state(getc);
while (AuParse::ConsumeToken(state, type, res))
{
ret.push_back(res);
}
return ret;
}
AUKN_SYM bool Parse(ParseState &state, const ParseObject &structure, ParseResult &result)
{
for (auto &parseBit : structure)
{
ParsedBit parsed = {};
bool ok;
parsed.tag = parseBit.tag;
AuMach arrayLength = 1;
if (parseBit.array)
{
ParseValue arrayLengthBit = {};
if (!ConsumeToken(state, ParsableTag::kParseUInt, arrayLengthBit))
{
SysPushErrorSyntaxError("Couldn't consume array length, label: {}, tag {}", parseBit.label, parseBit.tag);
return false;
}
arrayLength = arrayLengthBit.primitive.uint;
}
parsed.count = 0;
for (int i = 0; ((i < arrayLength) || (parseBit.vararg)); i++)
{
ParseValueEx parsedSingle = {};
ParseResult nestedresult = {};
switch (parseBit.tag)
{
case ParsableTag::kParseUInt:
case ParsableTag::kParseSInt:
case ParsableTag::kParseString:
case ParsableTag::kParseStringVararg:
case ParsableTag::kParseNumber:
case ParsableTag::kParseBoolean:
case ParsableTag::kParseUUID:
case ParsableTag::kParseVec3:
case ParsableTag::kParseVec4:
{
ok = ConsumeToken(state, parseBit.tag, parsedSingle);
break;
}
case ParsableTag::kParseObject:
{
// TODO: Although this should never result in a stack overflow, i'd rather return a request to prase than recursively call the same function
// Bah
ok = Parse(state, parseBit.objectParse, nestedresult);
// TODO: debug info
parsedSingle.object = nestedresult.result;
break;
}
default:
SysPanic("Invalid consume tag {} for {}", parseBit.tag, parseBit.label);
}
if (!ok)
{
if ((parseBit.optional) || (parseBit.vararg))
{
break;
}
SysPushErrorSyntaxError("Syntax error around: label: {}, tag {}", parseBit.label, parseBit.tag);
return false;
}
if (parseBit.vararg || parseBit.array)
{
parsed.count++;
if (!AuTryInsert(parsed.value.array, parsedSingle))
{
SysPushErrorMem();
return false;
}
parsed.isArray = true;
}
else
{
parsed.isArray = false;
parsed.count = 1;
parsed.value.single = parsedSingle;
}
}
// do not add if an optional bit was not serialized
if (parsed.count != 0)
{
result.result.push_back(parsed);
}
}
result.syntaxError = "No-Debug";
result.debugTree = "No-Debug";
return true;
}
AUKN_SYM void SerializeToken(ParsableTag type, const ParseValue &value, AuString &str)
{
AuString temp = value.string;
switch (type)
{
case ParsableTag::kParseUInt:
{
str += AuToString(value.primitive.uint);
break;
}
case ParsableTag::kParseSInt:
{
str += AuToString(value.primitive.sint);
break;
}
case ParsableTag::kParseNumber:
{
str += AuToString(value.primitive.number);
break;
}
case ParsableTag::kParseVec3:
{
str += AuToString(value.primitive.vec3[0]) + " ";
str += AuToString(value.primitive.vec3[1]) + " ";
str += AuToString(value.primitive.vec3[2]);
break;
}
case ParsableTag::kParseVec4:
{
str += AuToString(value.primitive.vec4[0]) + " ";
str += AuToString(value.primitive.vec4[1]) + " ";
str += AuToString(value.primitive.vec4[2]) + " ";
str += AuToString(value.primitive.vec4[3]);
break;
}
case ParsableTag::kParseString:
{
AuReplaceAll(temp, "\\", "\\\\");
AuReplaceAll(temp, "\"", "\\\"");
str += "\"" + temp + "\"";
break;
}
case ParsableTag::kParseStringVararg:
{
AuReplaceAll(temp, "\n", "\\\n");
str += temp;
break;
}
case ParsableTag::kParseUUID:
{
str += uuids::to_string(value.UUID);
break;
}
case ParsableTag::kParseBoolean:
{
if (value.primitive.boolean)
{
str += "true";
}
else
{
str += "false";
}
break;
}
default:
SysPanic("Invalid consume tag {}", type);
}
}
AUKN_SYM void Serialize(const ParsedObject &structure, AuString &ret)
{
for (auto &parsed : structure)
{
if (parsed.isArray)
{
if (ret.size())
{
ret += " ";
}
ret += AuToString(parsed.count);
}
bool isArray = parsed.count > 1 || parsed.isArray;
for (int i = 0; ((i < parsed.count)); i++)
{
if (ret.size())
{
ret += " ";
}
ParseValueEx parsedSingle = {};
ParseResult nestedresult = {};
switch (parsed.tag)
{
case ParsableTag::kParseUInt:
case ParsableTag::kParseSInt:
case ParsableTag::kParseString:
case ParsableTag::kParseStringVararg:
case ParsableTag::kParseNumber:
case ParsableTag::kParseBoolean:
case ParsableTag::kParseUUID:
case ParsableTag::kParseVec3:
case ParsableTag::kParseVec4:
{
SerializeToken(parsed.tag, !isArray ? parsed.value.single : parsed.value.array[i], ret);
break;
}
case ParsableTag::kParseObject:
{
Serialize(!isArray ? parsed.value.single.object : parsed.value.array[i].object, ret);
parsedSingle.object = nestedresult.result;
break;
}
default:
SysPanic("Invalid emit tag {}", parsed.tag);
}
}
}
}
AUKN_SYM AuResult<AuUInt> ParseUInt(const char *begin, const char *&end)
{
AuUInt temp{};
if (!ParseInt<AuUInt>(begin, end, temp))
{
return {};
}
return temp;
}
AUKN_SYM AuResult<AuSInt> ParseSInt(const char *begin, const char *&end)
{
AuSInt temp{};
if (!ParseInt<AuSInt>(begin, end, temp))
{
return {};
}
return temp;
}
AUKN_SYM AuResult<AuUInt> ParseUInt16(const char *begin, const char *&end)
{
AuUInt temp{};
if (!ParseInt16<AuUInt>(begin, end, temp))
{
return {};
}
return temp;
}
AUKN_SYM AuResult<AuSInt> ParseSInt16(const char *begin, const char *&end)
{
AuSInt temp{};
if (!ParseInt16<AuSInt>(begin, end, temp))
{
return {};
}
return temp;
}
AUKN_SYM AuString StringifySInt16(AuInt64 in, bool bZeroX)
{
if (bZeroX)
{
auto str = fmt::format("{0:#X}", in);
if (str.size() > 2)
{
if (str[1] == 'X')
{
str[1] = 'x';
}
else if (str.size() > 3 && str[2] == 'X')
{
str[2] = 'x';
}
}
return str;
}
else
{
return fmt::format("{:X}", in);
}
}
AUKN_SYM AuString StringifyUInt16(AuUInt64 in, bool bZeroX)
{
if (bZeroX)
{
auto str = fmt::format("{0:#X}", in);
if (str.size() > 2)
{
if (str[1] == 'X')
{
str[1] = 'x';
}
else if (str.size() > 3 && str[2] == 'X')
{
str[2] = 'x';
}
}
return str;
}
else
{
return fmt::format("{:X}", in);
}
}
AUKN_SYM AuResult<AuSInt> ParseSInt16(const char *begin)
{
const auto end = begin + ::strlen(begin);
const char *didntFuckingAskTYVM {end };
auto res = ParseSInt16(begin, didntFuckingAskTYVM);
if (!res.has_value())
{
return {};
}
if (didntFuckingAskTYVM != end)
{
return {};
}
return res;
}
AUKN_SYM AuResult<AuSInt> ParseSInt16(const AuString &str)
{
const char *didntFuckingAskTYVM { &str[str.size()] };
auto res = ParseSInt16(str.c_str(), didntFuckingAskTYVM);
if (!res.has_value())
{
return {};
}
if (didntFuckingAskTYVM != &str[str.size()])
{
return {};
}
return res;
}
AUKN_SYM AuResult<AuUInt> ParseUInt16(const char *begin)
{
const auto end = begin + ::strlen(begin);
const char *didntFuckingAskTYVM { end };
auto res = ParseUInt16(begin, didntFuckingAskTYVM);
if (!res.has_value())
{
return {};
}
if (didntFuckingAskTYVM != end)
{
return {};
}
return res;
}
AUKN_SYM AuResult<AuUInt> ParseUInt16(const AuString &str)
{
const char *didntFuckingAskTYVM { &str[str.size()] };
auto res = ParseUInt16(str.c_str(), didntFuckingAskTYVM);
if (!res.has_value())
{
return {};
}
if (didntFuckingAskTYVM != &str[str.size()])
{
return {};
}
return res;
}
AUKN_SYM AuResult<AuSInt> ParseSInt(const AuString &str)
{
const char *didntFuckingAskTYVM { &str[str.size()] };
auto res = ParseSInt(str.c_str(), didntFuckingAskTYVM);
if (!res.has_value())
{
return {};
}
if (didntFuckingAskTYVM != &str[str.size()])
{
return {};
}
return res;
}
AUKN_SYM AuResult<AuSInt> ParseSInt(const char *begin)
{
const auto end = begin + ::strlen(begin);
const char *didntFuckingAskTYVM { end };
auto res = ParseSInt(begin, didntFuckingAskTYVM);
if (!res.has_value())
{
return {};
}
if (didntFuckingAskTYVM != end)
{
return {};
}
return res;
}
AUKN_SYM AuResult<AuUInt> ParseUInt(const AuString &str)
{
const char *didntFuckingAskTYVM { &str[str.size()] };
auto res = ParseUInt(str.c_str(), didntFuckingAskTYVM);
if (!res.has_value())
{
return {};
}
if (didntFuckingAskTYVM != &str[str.size()])
{
return {};
}
return res;
}
AUKN_SYM AuResult<AuUInt> ParseUInt(const char *begin)
{
const auto end = begin + ::strlen(begin);
const char *didntFuckingAskTYVM { end };
auto res = ParseUInt(begin, didntFuckingAskTYVM);
if (!res.has_value())
{
return {};
}
if (didntFuckingAskTYVM != end)
{
return {};
}
return res;
}
}