AuroraRuntime/Source/Parse/Parser.cpp

660 lines
19 KiB
C++

/***
Copyright (C) 2021 J Reece Wilson (a/k/a "Reece"). All rights reserved.
File: Parser.cpp
Date: 2021-6-12
Author: Reece
***/
#include <Source/RuntimeInternal.hpp>
#include "Parser.hpp"
namespace Aurora::Parse
{
AUKN_SYM void VaildateStructure(const ParseObject &object)
{
}
struct ParseContext
{
AuUInt8 _next;
bool _hasNext;
const ConsumeStream_cb &_getc;
ParseContext(const ConsumeStream_cb &getc) : _getc(getc), _next(0), _hasNext(false)
{
}
bool Next(AuUInt8 &c)
{
if (std::exchange(_hasNext, false))
{
c = _next;
return true;
}
return _getc(c);
}
bool Peak(AuUInt8 &c)
{
if (_hasNext)
{
c = _next;
return true;
}
_hasNext = _getc(_next);
c = _next;
return _hasNext;
}
};
template<size_t Z>
static std::function<bool(AuUInt8)> ContainedHerein(const AuUInt8(&arry)[Z])
{
return [=](AuUInt8 c) -> bool
{
for (int i = 0; i < Z; i++)
{
if (arry[i] == c)
{
return true;
}
}
return false;
};
}
template<size_t Z>
static std::function<bool(AuUInt8)> IsTerminating(ParseState &state, const AuUInt8(&arry)[Z])
{
return [&](AuUInt8 c) -> bool
{
for (int i = 0; i < Z; i++)
{
if (arry[i] == c)
{
return true;
}
}
for (int i = 0; i < state.countOfTokens; i++)
{
if (state.additionalTokens[i] == c)
{
state.lastTokenAdditional = i;
return true;
}
}
return false;
};
}
// oh god just dont think about this tokenizer too much
bool ConsumeStringifedToken(ParseState &state, ParseContext &context, ParsableTag type, AuString &out)
{
static unsigned char terminatingChars[] = { '\n', '\00' };
static unsigned char whiteChars[] = { ' ' };
static unsigned char illegalCharacters[] = { '\r' }; //reeedows
auto isString = type == ParsableTag::kParseString;
auto isvararg = type == ParsableTag::kParseStringVararg;
bool stringLevel = false;
bool escapedNewLine = false;
bool escapedQuote = false;
auto isTerminating = IsTerminating(state, terminatingChars);
static auto isSpace = ContainedHerein(whiteChars);
static auto isIgnore = ContainedHerein(illegalCharacters);
while (true)
{
AuUInt8 next;
AuUInt8 peak;
// consume character from the stream
if (!context.Next(next))
{
break;
}
// some characters should be considered universally illegal such as nasty unicode spaces,
// carriage returns, and any other nasty crash exploit depending characters
// DO NOT USE THIS TO ESCAPE ENGINE TAGS/COLOR ESCAPES/WHATEVER THE FUCK
if (isIgnore(next))
{
continue;
}
// Is end of line/end of string
if (isTerminating(next))
{
// Make sure we aren't encapsulated by quotation marks
if ((!stringLevel) && (!std::exchange(escapedNewLine, false)))
{
state.hasLastToken = true;
state.lastTokenCharacter = next;
break;
}
}
// if we hit a space in a string, assume we're at the end of the token, unless we're parsing
// a string in quotation marks or are consuming the entire line (ParsableTag::kParseStringVararg)
if ((isSpace(next)) && (!stringLevel) && (!isvararg))
{
break;
}
// if the string starts with a quotation mark, set stringLevel to true
if ((next == '"') && (isString) && (out.empty()))
{
stringLevel = true;
continue;
}
auto peakStatus = context.Peak(peak);
bool isPeakterminating = isTerminating(peak);
// check \ \n pair + is var arg
if ((peakStatus) && (isPeakterminating) && (next == '\\') && (isvararg))
{
escapedNewLine = true;
continue;
}
// escape character for "'s in strings
if ((peakStatus) && (isString) && (next == '\\') && (stringLevel) && (peak == '\"'))
{
escapedQuote = true;
continue;
}
// see above
if ((next == '"') && (isString) && (std::exchange(escapedQuote, false)))
{
out += "\"";
continue;
}
// match the ending '"' character in the token to terminate strings containing spaces
// do not make this more ambiguous by removing the **else if**.
// this is mutally exclusive with the above block and must be kept in this order
else if (
(next == '"') && (isString) && (stringLevel) //
&& (((peakStatus && (isPeakterminating || isSpace(peak))) || (!peakStatus)))) // expected to fail
{
stringLevel = false;
continue;
}
// otherwise emit
out += next;
}
//SysAssert(!stringLevel, "Parsed tag of string type must end with \", got {}", out);
if (stringLevel)
{
LogWarn("Parsed tag of string type must end with \", got {}", out);
return false;
}
//Aurora::Console::Logging::LogDbg("returned {} {}", out, count != 0);
return out.size() != 0;
}
template<typename Func, typename Res>
static bool ScrewExceptions_2(Func func, const std::string &in, Res &out)
{
try
{
out = func(in.c_str(), nullptr);
return true;
}
catch (...)
{
return false;
}
}
template <typename T> int SignBit(T val)
{
return (T(0) < val) - (val < T(0));
}
template <typename T>
bool ParseInt(const AuString &in, T &out)
{
T res = 0;
T sign = 1;
out = 0;
auto itr = in.begin();
if constexpr (std::is_same<T, AuSInt>::value)
{
if (itr != in.end())
{
if (*itr == '-')
{
itr++;
sign = -1;
}
}
}
for (;
(itr != in.end()) &&
(*itr != '\0');
itr++)
{
auto c = *itr;
if ((c < '0') || (c > '9'))
{
return false;
}
auto old = res;
res *= 10;
res += static_cast<AuUInt>(*itr) - static_cast<AuUInt>('0');
if constexpr (std::is_same<T, AuUInt>::value)
{
if (old > res)
{
SysPushErrorSyntaxError("Unsigned integer overflow: {}", in);
return false;
}
}
else if constexpr (std::is_same<T, AuSInt>::value)
{
if (SignBit(old) != SignBit(res))
{
SysPushErrorSyntaxError("Signed integer overflow: {}", in);
return false;
}
}
}
out = res * sign;
return true;
}
template <typename T>
bool ParseUInt(const AuString &in, T &out)
{
auto temp = AuUInt{};
out = 0;
if (!ParseInt<AuUInt>(in, temp))
{
return false;
}
if (temp > std::numeric_limits<T>::max())
{
return false;
}
out = temp;
return true;
}
template <typename T>
bool ParseSInt(const AuString &in, T &out)
{
auto temp = AuSInt{};
out = 0;
if (!ParseInt<AuSInt>(in, temp))
{
return false;
}
if (temp > std::numeric_limits<T>::max())
{
return false;
}
if (temp < std::numeric_limits<T>::min())
{
return false;
}
out = temp;
return true;
}
static bool ConsumeTokenPrimitiveish(ParseState &state, ParseContext &context, ParsableTag type, ParseValue &out)
{
AuString str;
if (!ConsumeStringifedToken(state, context, type, str))
{
return false;
}
if ((type != ParsableTag::kParseString) && (str.empty()))
{
return false;
}
AuOptional<uuids::uuid> uuid;
switch (type)
{
case ParsableTag::kParseUInt:
{
return ParseUInt(str, out.primitive.uint);
}
case ParsableTag::kParseSInt:
{
return ParseSInt(str, out.primitive.sint);
}
case ParsableTag::kParseNumber:
{
return ScrewExceptions_2(static_cast<double(&)(const std::string &, std::size_t *)>(std::stod), str, out.primitive.number);
}
case ParsableTag::kParseString:
case ParsableTag::kParseStringVararg:
{
out.string = str;
break;
}
case ParsableTag::kParseUUID:
{
uuid = uuids::uuid::from_string(str);
if (!uuid.has_value())
{
LogWarn("Parse Error: invalid UUID {}", str);
return false;
}
out.UUID = uuid.value();
break;
}
case ParsableTag::kParseBoolean:
{
if ((str == "0") || (stricmp(str.c_str(), "false") == 0) || (stricmp(str.c_str(), "no") == 0))
{
out.primitive.boolean = false;
}
else if ((str == "1") || (stricmp(str.c_str(), "true") == 0) || (stricmp(str.c_str(), "yes") == 0))
{
out.primitive.boolean = true;
}
else
{
Aurora::Console::Logging::LogWarn("Parsed tag of boolean type wasn't parsable given the English string {}", str);
return false;
}
break;
}
default:
SysPanic("Invalid consume tag {}", type);
}
return true;
}
static bool ConsumeToken(ParseState &state, ParseContext &context, ParsableTag type, ParseValue &out)
{
ParseValue temp;
#define ADD_VECTOR_VAL(idx, memberType) \
if (!ConsumeTokenPrimitiveish(state, context, ParsableTag::kParseNumber, temp)) \
return false; \
out.primitive.memberType[idx] = temp.primitive.number;
switch (type)
{
case ParsableTag::kParseVec3:
{
ADD_VECTOR_VAL(0, vec3);
ADD_VECTOR_VAL(1, vec3);
ADD_VECTOR_VAL(2, vec3);
return true;
}
case ParsableTag::kParseVec4:
{
ADD_VECTOR_VAL(0, vec4);
ADD_VECTOR_VAL(1, vec4);
ADD_VECTOR_VAL(2, vec4);
ADD_VECTOR_VAL(3, vec4);
return true;
}
default:
{
return ConsumeTokenPrimitiveish(state, context, type, out);
}
}
#undef ADD_VECTOR_VAL
}
AUKN_SYM bool ConsumeToken(ParsableTag type, ConsumeStream_cb getc, ParseValue &out)
{
ParseState state(getc);
ParseContext context(state.stringstream);
return ConsumeToken(state, context, type, out);
}
static bool Parse(ParseState &state, ParseContext &context, const ParseObject &structure, ParseResult &result)
{
for (auto &parseBit : structure)
{
ParsedBit parsed = {};
bool ok;
parsed.tag = parseBit.tag;
AuMach arrayLength = 1;
if (parseBit.array)
{
ParseValue arrayLengthBit = {};
if (!ConsumeToken(state, context, ParsableTag::kParseUInt, arrayLengthBit))
{
Aurora::Console::Logging::LogWarn("Couldn't consume array length, label: {}, tag {}", parseBit.label, parseBit.tag);
return false;
}
arrayLength = arrayLengthBit.primitive.uint;
}
parsed.count = 0;
for (int i = 0; ((i < arrayLength) || (parseBit.vararg)); i++)
{
ParseValueEx parsedSingle = {};
ParseResult nestedresult = {};
switch (parseBit.tag)
{
case ParsableTag::kParseUInt:
case ParsableTag::kParseSInt:
case ParsableTag::kParseString:
case ParsableTag::kParseStringVararg:
case ParsableTag::kParseNumber:
case ParsableTag::kParseBoolean:
case ParsableTag::kParseUUID:
case ParsableTag::kParseVec3:
case ParsableTag::kParseVec4:
{
ok = ConsumeToken(state, context, parseBit.tag, parsedSingle);
break;
}
case ParsableTag::kParseObject:
{
// TODO: Although this should never result in a stack overflow, i'd rather return a request to prase than recursively call the same function
// Bah
ok = Parse(state, context, parseBit.objectParse, nestedresult);
// TODO: debug info
parsedSingle.object = nestedresult.result;
break;
}
default:
SysPanic("Invalid consume tag {} for {}", parseBit.tag, parseBit.label);
}
if (!ok)
{
if ((parseBit.optional) || (parseBit.vararg))
{
break;
}
Aurora::Console::Logging::LogWarn("Syntax error around: label: {}, tag {}", parseBit.label, parseBit.tag);
return false;
}
if (parseBit.vararg || parseBit.array)
{
parsed.count++;
parsed.value.array.push_back(parsedSingle);
parsed.isArray = true;
}
else
{
parsed.isArray = false;
parsed.count = 1;
parsed.value.single = parsedSingle;
}
}
// do not add if an optional bit was not serialized
if (parsed.count != 0)
{
result.result.push_back(parsed);
}
}
result.syntaxError = "No-Debug";
result.debugTree = "No-Debug";
return true;
}
AUKN_SYM bool Parse(ParseState &state, const ParseObject &structure, ParseResult &result)
{
ParseContext context(state.stringstream);
return Parse(state, context, structure, result);
}
AUKN_SYM void SerializeToken(ParsableTag type, const ParseValue &value, AuString &str)
{
AuString temp = value.string;
switch (type)
{
case ParsableTag::kParseUInt:
{
str += std::to_string(value.primitive.uint);
break;
}
case ParsableTag::kParseSInt:
{
str += std::to_string(value.primitive.sint);
break;
}
case ParsableTag::kParseNumber:
{
str += std::to_string(value.primitive.number);
break;
}
case ParsableTag::kParseVec3:
{
str += std::to_string(value.primitive.vec3[0]) + " ";
str += std::to_string(value.primitive.vec3[1]) + " ";
str += std::to_string(value.primitive.vec3[2]);
break;
}
case ParsableTag::kParseVec4:
{
str += std::to_string(value.primitive.vec4[0]) + " ";
str += std::to_string(value.primitive.vec4[1]) + " ";
str += std::to_string(value.primitive.vec4[2]) + " ";
str += std::to_string(value.primitive.vec4[3]);
break;
}
case ParsableTag::kParseString:
{
AuReplaceAll(temp, "\"", "\\\"");
str += "\"" + temp + "\"";
break;
}
case ParsableTag::kParseStringVararg:
{
AuReplaceAll(temp, "\n", "\\\n");
str += temp;
break;
}
case ParsableTag::kParseUUID:
{
str += uuids::to_string(value.UUID);
break;
}
case ParsableTag::kParseBoolean:
{
if (value.primitive.boolean)
{
str += "true";
}
else
{
str += "false";
}
break;
}
default:
SysPanic("Invalid consume tag {}", type);
}
}
AUKN_SYM void Serialize(const ParsedObject &structure, AuString &ret)
{
for (auto &parsed : structure)
{
if (parsed.isArray)
{
ret += " ";
ret += std::to_string(parsed.count);
}
bool isArray = parsed.count > 1 || parsed.isArray;
for (int i = 0; ((i < parsed.count)); i++)
{
ret += " ";
ParseValueEx parsedSingle = {};
ParseResult nestedresult = {};
switch (parsed.tag)
{
case ParsableTag::kParseUInt:
case ParsableTag::kParseSInt:
case ParsableTag::kParseString:
case ParsableTag::kParseStringVararg:
case ParsableTag::kParseNumber:
case ParsableTag::kParseBoolean:
case ParsableTag::kParseUUID:
case ParsableTag::kParseVec3:
case ParsableTag::kParseVec4:
{
SerializeToken(parsed.tag, !isArray ? parsed.value.single : parsed.value.array[i], ret);
break;
}
case ParsableTag::kParseObject:
{
Serialize(!isArray ? parsed.value.single.object : parsed.value.array[i].object, ret);
parsedSingle.object = nestedresult.result;
break;
}
default:
SysPanic("Invalid emit tag {}", parsed.tag);
}
}
}
ret = ret.substr(1);
}
}