660 lines
19 KiB
C++
660 lines
19 KiB
C++
/***
|
|
Copyright (C) 2021 J Reece Wilson (a/k/a "Reece"). All rights reserved.
|
|
|
|
File: Parser.cpp
|
|
Date: 2021-6-12
|
|
Author: Reece
|
|
***/
|
|
#include <Source/RuntimeInternal.hpp>
|
|
#include "Parser.hpp"
|
|
|
|
namespace Aurora::Parse
|
|
{
|
|
AUKN_SYM void VaildateStructure(const ParseObject &object)
|
|
{
|
|
}
|
|
|
|
struct ParseContext
|
|
{
|
|
AuUInt8 _next;
|
|
bool _hasNext;
|
|
const ConsumeStream_cb &_getc;
|
|
|
|
ParseContext(const ConsumeStream_cb &getc) : _getc(getc), _next(0), _hasNext(false)
|
|
{
|
|
}
|
|
|
|
bool Next(AuUInt8 &c)
|
|
{
|
|
if (std::exchange(_hasNext, false))
|
|
{
|
|
c = _next;
|
|
return true;
|
|
}
|
|
|
|
return _getc(c);
|
|
}
|
|
|
|
bool Peak(AuUInt8 &c)
|
|
{
|
|
if (_hasNext)
|
|
{
|
|
c = _next;
|
|
return true;
|
|
}
|
|
|
|
_hasNext = _getc(_next);
|
|
c = _next;
|
|
return _hasNext;
|
|
}
|
|
};
|
|
|
|
template<size_t Z>
|
|
static std::function<bool(AuUInt8)> ContainedHerein(const AuUInt8(&arry)[Z])
|
|
{
|
|
return [=](AuUInt8 c) -> bool
|
|
{
|
|
for (int i = 0; i < Z; i++)
|
|
{
|
|
if (arry[i] == c)
|
|
{
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
};
|
|
}
|
|
|
|
|
|
template<size_t Z>
|
|
static std::function<bool(AuUInt8)> IsTerminating(ParseState &state, const AuUInt8(&arry)[Z])
|
|
{
|
|
return [&](AuUInt8 c) -> bool
|
|
{
|
|
for (int i = 0; i < Z; i++)
|
|
{
|
|
if (arry[i] == c)
|
|
{
|
|
return true;
|
|
}
|
|
}
|
|
|
|
for (int i = 0; i < state.countOfTokens; i++)
|
|
{
|
|
if (state.additionalTokens[i] == c)
|
|
{
|
|
state.lastTokenAdditional = i;
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
};
|
|
}
|
|
|
|
// oh god just dont think about this tokenizer too much
|
|
bool ConsumeStringifedToken(ParseState &state, ParseContext &context, ParsableTag type, AuString &out)
|
|
{
|
|
static unsigned char terminatingChars[] = { '\n', '\00' };
|
|
static unsigned char whiteChars[] = { ' ' };
|
|
static unsigned char illegalCharacters[] = { '\r' }; //reeedows
|
|
|
|
auto isString = type == ParsableTag::kParseString;
|
|
auto isvararg = type == ParsableTag::kParseStringVararg;
|
|
bool stringLevel = false;
|
|
bool escapedNewLine = false;
|
|
bool escapedQuote = false;
|
|
|
|
auto isTerminating = IsTerminating(state, terminatingChars);
|
|
static auto isSpace = ContainedHerein(whiteChars);
|
|
static auto isIgnore = ContainedHerein(illegalCharacters);
|
|
|
|
while (true)
|
|
{
|
|
AuUInt8 next;
|
|
AuUInt8 peak;
|
|
|
|
// consume character from the stream
|
|
if (!context.Next(next))
|
|
{
|
|
break;
|
|
}
|
|
|
|
// some characters should be considered universally illegal such as nasty unicode spaces,
|
|
// carriage returns, and any other nasty crash exploit depending characters
|
|
// DO NOT USE THIS TO ESCAPE ENGINE TAGS/COLOR ESCAPES/WHATEVER THE FUCK
|
|
if (isIgnore(next))
|
|
{
|
|
continue;
|
|
}
|
|
|
|
// Is end of line/end of string
|
|
if (isTerminating(next))
|
|
{
|
|
// Make sure we aren't encapsulated by quotation marks
|
|
if ((!stringLevel) && (!std::exchange(escapedNewLine, false)))
|
|
{
|
|
state.hasLastToken = true;
|
|
state.lastTokenCharacter = next;
|
|
break;
|
|
}
|
|
}
|
|
|
|
// if we hit a space in a string, assume we're at the end of the token, unless we're parsing
|
|
// a string in quotation marks or are consuming the entire line (ParsableTag::kParseStringVararg)
|
|
if ((isSpace(next)) && (!stringLevel) && (!isvararg))
|
|
{
|
|
break;
|
|
}
|
|
|
|
// if the string starts with a quotation mark, set stringLevel to true
|
|
if ((next == '"') && (isString) && (out.empty()))
|
|
{
|
|
stringLevel = true;
|
|
continue;
|
|
}
|
|
|
|
auto peakStatus = context.Peak(peak);
|
|
|
|
bool isPeakterminating = isTerminating(peak);
|
|
|
|
// check \ \n pair + is var arg
|
|
if ((peakStatus) && (isPeakterminating) && (next == '\\') && (isvararg))
|
|
{
|
|
escapedNewLine = true;
|
|
continue;
|
|
}
|
|
|
|
// escape character for "'s in strings
|
|
if ((peakStatus) && (isString) && (next == '\\') && (stringLevel) && (peak == '\"'))
|
|
{
|
|
escapedQuote = true;
|
|
continue;
|
|
}
|
|
|
|
// see above
|
|
if ((next == '"') && (isString) && (std::exchange(escapedQuote, false)))
|
|
{
|
|
out += "\"";
|
|
continue;
|
|
}
|
|
// match the ending '"' character in the token to terminate strings containing spaces
|
|
// do not make this more ambiguous by removing the **else if**.
|
|
// this is mutally exclusive with the above block and must be kept in this order
|
|
else if (
|
|
(next == '"') && (isString) && (stringLevel) //
|
|
|
|
&& (((peakStatus && (isPeakterminating || isSpace(peak))) || (!peakStatus)))) // expected to fail
|
|
{
|
|
stringLevel = false;
|
|
continue;
|
|
}
|
|
|
|
// otherwise emit
|
|
out += next;
|
|
}
|
|
|
|
//SysAssert(!stringLevel, "Parsed tag of string type must end with \", got {}", out);
|
|
if (stringLevel)
|
|
{
|
|
LogWarn("Parsed tag of string type must end with \", got {}", out);
|
|
return false;
|
|
}
|
|
|
|
//Aurora::Console::Logging::LogDbg("returned {} {}", out, count != 0);
|
|
return out.size() != 0;
|
|
}
|
|
|
|
template<typename Func, typename Res>
|
|
static bool ScrewExceptions_2(Func func, const std::string &in, Res &out)
|
|
{
|
|
try
|
|
{
|
|
out = func(in.c_str(), nullptr);
|
|
return true;
|
|
}
|
|
catch (...)
|
|
{
|
|
return false;
|
|
}
|
|
}
|
|
|
|
template <typename T> int SignBit(T val)
|
|
{
|
|
return (T(0) < val) - (val < T(0));
|
|
}
|
|
|
|
template <typename T>
|
|
bool ParseInt(const AuString &in, T &out)
|
|
{
|
|
T res = 0;
|
|
T sign = 1;
|
|
out = 0;
|
|
|
|
auto itr = in.begin();
|
|
|
|
if constexpr (std::is_same<T, AuSInt>::value)
|
|
{
|
|
if (itr != in.end())
|
|
{
|
|
if (*itr == '-')
|
|
{
|
|
itr++;
|
|
sign = -1;
|
|
}
|
|
}
|
|
}
|
|
|
|
for (;
|
|
(itr != in.end()) &&
|
|
(*itr != '\0');
|
|
itr++)
|
|
{
|
|
auto c = *itr;
|
|
|
|
if ((c < '0') || (c > '9'))
|
|
{
|
|
return false;
|
|
}
|
|
|
|
auto old = res;
|
|
|
|
res *= 10;
|
|
res += static_cast<AuUInt>(*itr) - static_cast<AuUInt>('0');
|
|
|
|
if constexpr (std::is_same<T, AuUInt>::value)
|
|
{
|
|
if (old > res)
|
|
{
|
|
SysPushErrorSyntaxError("Unsigned integer overflow: {}", in);
|
|
return false;
|
|
}
|
|
}
|
|
else if constexpr (std::is_same<T, AuSInt>::value)
|
|
{
|
|
if (SignBit(old) != SignBit(res))
|
|
{
|
|
SysPushErrorSyntaxError("Signed integer overflow: {}", in);
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
|
|
out = res * sign;
|
|
return true;
|
|
}
|
|
|
|
template <typename T>
|
|
bool ParseUInt(const AuString &in, T &out)
|
|
{
|
|
auto temp = AuUInt{};
|
|
out = 0;
|
|
|
|
if (!ParseInt<AuUInt>(in, temp))
|
|
{
|
|
return false;
|
|
}
|
|
|
|
if (temp > std::numeric_limits<T>::max())
|
|
{
|
|
return false;
|
|
}
|
|
|
|
out = temp;
|
|
return true;
|
|
}
|
|
|
|
template <typename T>
|
|
bool ParseSInt(const AuString &in, T &out)
|
|
{
|
|
auto temp = AuSInt{};
|
|
out = 0;
|
|
|
|
if (!ParseInt<AuSInt>(in, temp))
|
|
{
|
|
return false;
|
|
}
|
|
|
|
if (temp > std::numeric_limits<T>::max())
|
|
{
|
|
return false;
|
|
}
|
|
|
|
if (temp < std::numeric_limits<T>::min())
|
|
{
|
|
return false;
|
|
}
|
|
|
|
out = temp;
|
|
return true;
|
|
}
|
|
|
|
static bool ConsumeTokenPrimitiveish(ParseState &state, ParseContext &context, ParsableTag type, ParseValue &out)
|
|
{
|
|
AuString str;
|
|
|
|
if (!ConsumeStringifedToken(state, context, type, str))
|
|
{
|
|
return false;
|
|
}
|
|
|
|
if ((type != ParsableTag::kParseString) && (str.empty()))
|
|
{
|
|
return false;
|
|
}
|
|
|
|
AuOptional<uuids::uuid> uuid;
|
|
|
|
switch (type)
|
|
{
|
|
case ParsableTag::kParseUInt:
|
|
{
|
|
return ParseUInt(str, out.primitive.uint);
|
|
}
|
|
case ParsableTag::kParseSInt:
|
|
{
|
|
return ParseSInt(str, out.primitive.sint);
|
|
}
|
|
case ParsableTag::kParseNumber:
|
|
{
|
|
return ScrewExceptions_2(static_cast<double(&)(const std::string &, std::size_t *)>(std::stod), str, out.primitive.number);
|
|
}
|
|
case ParsableTag::kParseString:
|
|
case ParsableTag::kParseStringVararg:
|
|
{
|
|
out.string = str;
|
|
break;
|
|
}
|
|
case ParsableTag::kParseUUID:
|
|
{
|
|
uuid = uuids::uuid::from_string(str);
|
|
if (!uuid.has_value())
|
|
{
|
|
LogWarn("Parse Error: invalid UUID {}", str);
|
|
return false;
|
|
}
|
|
out.UUID = uuid.value();
|
|
break;
|
|
}
|
|
case ParsableTag::kParseBoolean:
|
|
{
|
|
if ((str == "0") || (stricmp(str.c_str(), "false") == 0) || (stricmp(str.c_str(), "no") == 0))
|
|
{
|
|
out.primitive.boolean = false;
|
|
}
|
|
else if ((str == "1") || (stricmp(str.c_str(), "true") == 0) || (stricmp(str.c_str(), "yes") == 0))
|
|
{
|
|
out.primitive.boolean = true;
|
|
}
|
|
else
|
|
{
|
|
Aurora::Console::Logging::LogWarn("Parsed tag of boolean type wasn't parsable given the English string {}", str);
|
|
return false;
|
|
}
|
|
break;
|
|
}
|
|
default:
|
|
SysPanic("Invalid consume tag {}", type);
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static bool ConsumeToken(ParseState &state, ParseContext &context, ParsableTag type, ParseValue &out)
|
|
{
|
|
ParseValue temp;
|
|
|
|
#define ADD_VECTOR_VAL(idx, memberType) \
|
|
if (!ConsumeTokenPrimitiveish(state, context, ParsableTag::kParseNumber, temp)) \
|
|
return false; \
|
|
out.primitive.memberType[idx] = temp.primitive.number;
|
|
|
|
switch (type)
|
|
{
|
|
case ParsableTag::kParseVec3:
|
|
{
|
|
ADD_VECTOR_VAL(0, vec3);
|
|
ADD_VECTOR_VAL(1, vec3);
|
|
ADD_VECTOR_VAL(2, vec3);
|
|
return true;
|
|
}
|
|
case ParsableTag::kParseVec4:
|
|
{
|
|
ADD_VECTOR_VAL(0, vec4);
|
|
ADD_VECTOR_VAL(1, vec4);
|
|
ADD_VECTOR_VAL(2, vec4);
|
|
ADD_VECTOR_VAL(3, vec4);
|
|
return true;
|
|
}
|
|
default:
|
|
{
|
|
return ConsumeTokenPrimitiveish(state, context, type, out);
|
|
}
|
|
}
|
|
|
|
#undef ADD_VECTOR_VAL
|
|
}
|
|
|
|
AUKN_SYM bool ConsumeToken(ParsableTag type, ConsumeStream_cb getc, ParseValue &out)
|
|
{
|
|
ParseState state(getc);
|
|
ParseContext context(state.stringstream);
|
|
return ConsumeToken(state, context, type, out);
|
|
}
|
|
|
|
static bool Parse(ParseState &state, ParseContext &context, const ParseObject &structure, ParseResult &result)
|
|
{
|
|
for (auto &parseBit : structure)
|
|
{
|
|
ParsedBit parsed = {};
|
|
bool ok;
|
|
|
|
parsed.tag = parseBit.tag;
|
|
|
|
AuMach arrayLength = 1;
|
|
if (parseBit.array)
|
|
{
|
|
ParseValue arrayLengthBit = {};
|
|
|
|
if (!ConsumeToken(state, context, ParsableTag::kParseUInt, arrayLengthBit))
|
|
{
|
|
Aurora::Console::Logging::LogWarn("Couldn't consume array length, label: {}, tag {}", parseBit.label, parseBit.tag);
|
|
return false;
|
|
}
|
|
|
|
arrayLength = arrayLengthBit.primitive.uint;
|
|
}
|
|
|
|
parsed.count = 0;
|
|
|
|
for (int i = 0; ((i < arrayLength) || (parseBit.vararg)); i++)
|
|
{
|
|
ParseValueEx parsedSingle = {};
|
|
ParseResult nestedresult = {};
|
|
switch (parseBit.tag)
|
|
{
|
|
case ParsableTag::kParseUInt:
|
|
case ParsableTag::kParseSInt:
|
|
case ParsableTag::kParseString:
|
|
case ParsableTag::kParseStringVararg:
|
|
case ParsableTag::kParseNumber:
|
|
case ParsableTag::kParseBoolean:
|
|
case ParsableTag::kParseUUID:
|
|
case ParsableTag::kParseVec3:
|
|
case ParsableTag::kParseVec4:
|
|
{
|
|
ok = ConsumeToken(state, context, parseBit.tag, parsedSingle);
|
|
break;
|
|
}
|
|
case ParsableTag::kParseObject:
|
|
{
|
|
// TODO: Although this should never result in a stack overflow, i'd rather return a request to prase than recursively call the same function
|
|
// Bah
|
|
ok = Parse(state, context, parseBit.objectParse, nestedresult);
|
|
// TODO: debug info
|
|
parsedSingle.object = nestedresult.result;
|
|
break;
|
|
}
|
|
default:
|
|
SysPanic("Invalid consume tag {} for {}", parseBit.tag, parseBit.label);
|
|
}
|
|
|
|
if (!ok)
|
|
{
|
|
if ((parseBit.optional) || (parseBit.vararg))
|
|
{
|
|
break;
|
|
}
|
|
|
|
Aurora::Console::Logging::LogWarn("Syntax error around: label: {}, tag {}", parseBit.label, parseBit.tag);
|
|
return false;
|
|
}
|
|
|
|
if (parseBit.vararg || parseBit.array)
|
|
{
|
|
parsed.count++;
|
|
parsed.value.array.push_back(parsedSingle);
|
|
parsed.isArray = true;
|
|
}
|
|
else
|
|
{
|
|
parsed.isArray = false;
|
|
parsed.count = 1;
|
|
parsed.value.single = parsedSingle;
|
|
}
|
|
}
|
|
|
|
// do not add if an optional bit was not serialized
|
|
if (parsed.count != 0)
|
|
{
|
|
result.result.push_back(parsed);
|
|
}
|
|
}
|
|
|
|
result.syntaxError = "No-Debug";
|
|
result.debugTree = "No-Debug";
|
|
return true;
|
|
}
|
|
|
|
AUKN_SYM bool Parse(ParseState &state, const ParseObject &structure, ParseResult &result)
|
|
{
|
|
ParseContext context(state.stringstream);
|
|
return Parse(state, context, structure, result);
|
|
}
|
|
|
|
AUKN_SYM void SerializeToken(ParsableTag type, const ParseValue &value, AuString &str)
|
|
{
|
|
AuString temp = value.string;
|
|
switch (type)
|
|
{
|
|
case ParsableTag::kParseUInt:
|
|
{
|
|
str += std::to_string(value.primitive.uint);
|
|
break;
|
|
}
|
|
case ParsableTag::kParseSInt:
|
|
{
|
|
str += std::to_string(value.primitive.sint);
|
|
break;
|
|
}
|
|
case ParsableTag::kParseNumber:
|
|
{
|
|
str += std::to_string(value.primitive.number);
|
|
break;
|
|
}
|
|
case ParsableTag::kParseVec3:
|
|
{
|
|
str += std::to_string(value.primitive.vec3[0]) + " ";
|
|
str += std::to_string(value.primitive.vec3[1]) + " ";
|
|
str += std::to_string(value.primitive.vec3[2]);
|
|
break;
|
|
}
|
|
case ParsableTag::kParseVec4:
|
|
{
|
|
str += std::to_string(value.primitive.vec4[0]) + " ";
|
|
str += std::to_string(value.primitive.vec4[1]) + " ";
|
|
str += std::to_string(value.primitive.vec4[2]) + " ";
|
|
str += std::to_string(value.primitive.vec4[3]);
|
|
break;
|
|
}
|
|
case ParsableTag::kParseString:
|
|
{
|
|
AuReplaceAll(temp, "\"", "\\\"");
|
|
str += "\"" + temp + "\"";
|
|
break;
|
|
}
|
|
case ParsableTag::kParseStringVararg:
|
|
{
|
|
AuReplaceAll(temp, "\n", "\\\n");
|
|
str += temp;
|
|
break;
|
|
}
|
|
case ParsableTag::kParseUUID:
|
|
{
|
|
str += uuids::to_string(value.UUID);
|
|
break;
|
|
}
|
|
case ParsableTag::kParseBoolean:
|
|
{
|
|
if (value.primitive.boolean)
|
|
{
|
|
str += "true";
|
|
}
|
|
else
|
|
{
|
|
str += "false";
|
|
}
|
|
break;
|
|
}
|
|
default:
|
|
SysPanic("Invalid consume tag {}", type);
|
|
}
|
|
}
|
|
|
|
AUKN_SYM void Serialize(const ParsedObject &structure, AuString &ret)
|
|
{
|
|
for (auto &parsed : structure)
|
|
{
|
|
if (parsed.isArray)
|
|
{
|
|
ret += " ";
|
|
ret += std::to_string(parsed.count);
|
|
}
|
|
|
|
bool isArray = parsed.count > 1 || parsed.isArray;
|
|
|
|
for (int i = 0; ((i < parsed.count)); i++)
|
|
{
|
|
ret += " ";
|
|
|
|
ParseValueEx parsedSingle = {};
|
|
ParseResult nestedresult = {};
|
|
switch (parsed.tag)
|
|
{
|
|
case ParsableTag::kParseUInt:
|
|
case ParsableTag::kParseSInt:
|
|
case ParsableTag::kParseString:
|
|
case ParsableTag::kParseStringVararg:
|
|
case ParsableTag::kParseNumber:
|
|
case ParsableTag::kParseBoolean:
|
|
case ParsableTag::kParseUUID:
|
|
case ParsableTag::kParseVec3:
|
|
case ParsableTag::kParseVec4:
|
|
{
|
|
SerializeToken(parsed.tag, !isArray ? parsed.value.single : parsed.value.array[i], ret);
|
|
break;
|
|
}
|
|
case ParsableTag::kParseObject:
|
|
{
|
|
Serialize(!isArray ? parsed.value.single.object : parsed.value.array[i].object, ret);
|
|
parsedSingle.object = nestedresult.result;
|
|
break;
|
|
}
|
|
default:
|
|
SysPanic("Invalid emit tag {}", parsed.tag);
|
|
}
|
|
}
|
|
}
|
|
|
|
ret = ret.substr(1);
|
|
}
|
|
} |