655 lines
20 KiB
C++
655 lines
20 KiB
C++
/***
|
|
Copyright (C) 2021 J Reece Wilson (a/k/a "Reece"). All rights reserved.
|
|
|
|
File: Parser.cpp
|
|
Date: 2021-6-12
|
|
Author: Reece
|
|
Note: Horrible gen 1 parser.
|
|
I'm not removing or significantly improving this.
|
|
Just build around what works, could probably wrangle a command list parser on top.
|
|
***/
|
|
#include <Source/RuntimeInternal.hpp>
|
|
#include "Parser.hpp"
|
|
|
|
namespace Aurora::Parse
|
|
{
|
|
AUKN_SYM void VaildateStructure(const ParseObject &object)
|
|
{
|
|
}
|
|
|
|
struct ParseContext
|
|
{
|
|
IO::Character::BufferConsumerFromProviderUnique_t buffer;
|
|
|
|
ParseContext(const AuSPtr<IO::Character::ICharacterProvider> &getc)
|
|
{
|
|
buffer = IO::Character::BufferConsumerFromProviderUnique(getc);
|
|
}
|
|
|
|
bool Next(AuUInt8 &c)
|
|
{
|
|
return buffer->Next(c);
|
|
}
|
|
|
|
bool Peek(AuUInt8 &c)
|
|
{
|
|
return buffer->PeekNext(c);
|
|
}
|
|
};
|
|
|
|
template<size_t Z>
|
|
static AuFunction<bool(AuUInt8)> ContainedHerein(const AuUInt8(&arry)[Z])
|
|
{
|
|
return [=](AuUInt8 c) -> bool
|
|
{
|
|
for (int i = 0; i < Z; i++)
|
|
{
|
|
if (arry[i] == c)
|
|
{
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
};
|
|
}
|
|
|
|
template<size_t Z>
|
|
static AuFunction<bool(AuUInt8)> IsTerminating(ParseState &state, const AuUInt8(&arry)[Z])
|
|
{
|
|
return [&](AuUInt8 c) -> bool
|
|
{
|
|
for (int i = 0; i < Z; i++)
|
|
{
|
|
if (arry[i] == c)
|
|
{
|
|
return true;
|
|
}
|
|
}
|
|
|
|
for (int i = 0; i < state.countOfTokens; i++)
|
|
{
|
|
if (state.additionalTokens[i] == c)
|
|
{
|
|
state.lastTokenAdditional = i;
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
};
|
|
}
|
|
|
|
// oh god just dont think about this tokenizer too much
|
|
bool ConsumeStringifedToken(ParseState &state, ParseContext &context, ParsableTag type, AuString &out)
|
|
{
|
|
static unsigned char terminatingChars[] = { '\n', '\00' };
|
|
static unsigned char whiteChars[] = { ' ' };
|
|
static unsigned char illegalCharacters[] = { '\r' }; //reeedows
|
|
|
|
auto isString = type == ParsableTag::kParseString;
|
|
auto isvararg = type == ParsableTag::kParseStringVararg;
|
|
bool stringLevel = false;
|
|
bool escapedNewLine = false;
|
|
bool escapedQuote = false;
|
|
|
|
auto isTerminating = IsTerminating(state, terminatingChars);
|
|
static auto isSpace = ContainedHerein(whiteChars);
|
|
static auto isIgnore = ContainedHerein(illegalCharacters);
|
|
|
|
while (true)
|
|
{
|
|
AuUInt8 cur;
|
|
AuUInt8 peek;
|
|
|
|
// consume character from the stream
|
|
if (!context.Next(cur))
|
|
{
|
|
break;
|
|
}
|
|
|
|
// some characters should be considered universally illegal such as nasty unicode spaces,
|
|
// carriage returns, and any other nasty crash exploit depending characters
|
|
// DO NOT USE THIS TO ESCAPE ENGINE TAGS/COLOR ESCAPES/WHATEVER THE FUCK
|
|
if (isIgnore(cur))
|
|
{
|
|
continue;
|
|
}
|
|
|
|
// Is end of line/end of string
|
|
if (isTerminating(cur))
|
|
{
|
|
// Make sure we aren't encapsulated by quotation marks
|
|
if ((!stringLevel) && (!AuExchange(escapedNewLine, false)))
|
|
{
|
|
state.hasLastToken = true;
|
|
state.lastTokenCharacter = cur;
|
|
break;
|
|
}
|
|
}
|
|
|
|
// if we hit a space in a string, assume we're at the end of the token, unless we're parsing
|
|
// a string in quotation marks or are consuming the entire line (ParsableTag::kParseStringVararg)
|
|
if ((isSpace(cur)) && (!stringLevel) && (!isvararg))
|
|
{
|
|
break;
|
|
}
|
|
|
|
// if the string starts with a quotation mark, set stringLevel to true
|
|
// TODO: I dont remember the parse rules. Check the old tests. Should this be AuStartsWith?
|
|
if ((cur == '"') && (isString) && (out.empty()))
|
|
{
|
|
stringLevel = true;
|
|
continue;
|
|
}
|
|
|
|
auto peekStatus = context.Peek(peek);
|
|
|
|
bool isPeekterminating = isTerminating(peek);
|
|
|
|
// prepare escape characters
|
|
if ((peekStatus) && (cur == '\\'))
|
|
{
|
|
// check \ \n pair + is var arg
|
|
if ((isPeekterminating) && (isvararg))
|
|
{
|
|
escapedNewLine = true;
|
|
continue;
|
|
}
|
|
|
|
// escape character for "'s in strings
|
|
if ((isString) && (stringLevel)) // && (peek == '\"'))
|
|
{
|
|
escapedQuote = true;
|
|
continue;
|
|
}
|
|
}
|
|
|
|
|
|
// see above
|
|
if ((isString) && (AuExchange(escapedQuote, false)))
|
|
{
|
|
out += cur;
|
|
continue;
|
|
}
|
|
// match the ending '"' character in the token to terminate strings containing spaces
|
|
// do not make this more ambiguous by removing the **else if**.
|
|
// this is mutally exclusive with the above block and must be kept in this order
|
|
else if (
|
|
(cur == '"') && (isString) && (stringLevel) //
|
|
|
|
&& (((peekStatus && (isPeekterminating || isSpace(peek))) || (!peekStatus)))) // expected to fail
|
|
{
|
|
stringLevel = false;
|
|
continue;
|
|
}
|
|
else
|
|
{
|
|
// otherwise emit
|
|
out += cur;
|
|
}
|
|
}
|
|
|
|
//SysAssert(!stringLevel, "Parsed tag of string type must end with \", got {}", out);
|
|
if (stringLevel)
|
|
{
|
|
SysPushErrorSyntaxError("Parsed tag of string type must end with \", got {}", out);
|
|
return false;
|
|
}
|
|
|
|
//AuLogDbg("returned {} {}", out, out.size() != 0);
|
|
return out.size() != 0;
|
|
}
|
|
|
|
template<typename Func, typename Res>
|
|
static bool ScrewExceptions_2(Func func, const AuString &in, Res &out)
|
|
{
|
|
try
|
|
{
|
|
out = func(in.c_str(), nullptr);
|
|
return true;
|
|
}
|
|
catch (...)
|
|
{
|
|
return false;
|
|
}
|
|
}
|
|
|
|
template <typename T> int SignBit(T val)
|
|
{
|
|
return (T(0) < val) - (val < T(0));
|
|
}
|
|
|
|
template <typename T, typename Iterator>
|
|
bool ParseInt(Iterator begin, Iterator &end, T &out)
|
|
{
|
|
T res = 0;
|
|
T sign = 1;
|
|
out = 0;
|
|
|
|
auto itr = begin;
|
|
|
|
if constexpr (AuIsSame_v<T, AuSInt>)
|
|
{
|
|
if (itr != end)
|
|
{
|
|
if (*itr == '-')
|
|
{
|
|
itr++;
|
|
sign = -1;
|
|
}
|
|
}
|
|
}
|
|
|
|
int perf {};
|
|
for (;
|
|
(itr != end) &&
|
|
(*itr != '\0');
|
|
itr++)
|
|
{
|
|
auto c = *itr;
|
|
|
|
if ((c < '0') || (c > '9'))
|
|
{
|
|
return false;
|
|
}
|
|
|
|
auto old = res;
|
|
|
|
res *= 10;
|
|
res += static_cast<AuUInt>(*itr) - static_cast<AuUInt>('0');
|
|
|
|
if ((perf++) >= 5)
|
|
{
|
|
if constexpr (AuIsSame_v<T, AuUInt>)
|
|
{
|
|
if (old > res)
|
|
{
|
|
SysPushErrorSyntaxError("Unsigned integer overflow: {}", AuString(begin, end));
|
|
end = itr;
|
|
return false;
|
|
}
|
|
}
|
|
else if constexpr (AuIsSame_v<T, AuSInt>)
|
|
{
|
|
if (SignBit(old) != SignBit(res))
|
|
{
|
|
SysPushErrorSyntaxError("Signed integer overflow: {}", AuString(begin, end));
|
|
end = itr;
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
end = itr;
|
|
out = res * sign;
|
|
return true;
|
|
}
|
|
|
|
AUKN_SYM AuResult<AuUInt> ParseUInt(const char *begin, const char *&end)
|
|
{
|
|
AuUInt temp{};
|
|
|
|
if (!ParseInt<AuUInt>(begin, end, temp))
|
|
{
|
|
return {};
|
|
}
|
|
|
|
return temp;
|
|
}
|
|
|
|
AUKN_SYM AuResult<AuSInt> ParseSInt(const char *begin, const char *&end)
|
|
{
|
|
AuSInt temp{};
|
|
|
|
if (!ParseInt<AuSInt>(begin, end, temp))
|
|
{
|
|
return {};
|
|
}
|
|
|
|
return temp;
|
|
}
|
|
|
|
static bool ConsumeTokenPrimitiveish(ParseState &state, ParseContext &context, ParsableTag type, ParseValue &out)
|
|
{
|
|
AuCtorCode_t code;
|
|
AuString str;
|
|
|
|
if (!ConsumeStringifedToken(state, context, type, str))
|
|
{
|
|
return false;
|
|
}
|
|
|
|
if ((type != ParsableTag::kParseString) && (str.empty()))
|
|
{
|
|
return false;
|
|
}
|
|
|
|
AuOptional<uuids::uuid> uuid;
|
|
auto end = str.end();
|
|
switch (type)
|
|
{
|
|
case ParsableTag::kParseUInt:
|
|
{
|
|
return ParseInt<AuUInt64>(str.begin(), end, out.primitive.uint) && end == str.end();
|
|
}
|
|
case ParsableTag::kParseSInt:
|
|
{
|
|
return ParseInt<AuInt64>(str.begin(), end, out.primitive.sint) && end == str.end();
|
|
}
|
|
case ParsableTag::kParseNumber:
|
|
{
|
|
return ScrewExceptions_2(static_cast<double(&)(const std::string &, std::size_t *)>(std::stod), str, out.primitive.number);
|
|
}
|
|
case ParsableTag::kParseString:
|
|
case ParsableTag::kParseStringVararg:
|
|
{
|
|
out.string = AuMove(str);
|
|
break;
|
|
}
|
|
case ParsableTag::kParseUUID:
|
|
{
|
|
uuid = uuids::uuid::from_string(str);
|
|
if (!uuid.has_value())
|
|
{
|
|
SysPushErrorSyntaxError("Parse Error: invalid UUID {}", str);
|
|
return false;
|
|
}
|
|
out.UUID = uuid.value();
|
|
break;
|
|
}
|
|
case ParsableTag::kParseBoolean:
|
|
{
|
|
if ((str == "0") ||
|
|
(stricmp(str.c_str(), "false") == 0) ||
|
|
(stricmp(str.c_str(), "no") == 0))
|
|
{
|
|
out.primitive.boolean = false;
|
|
}
|
|
else if ((str == "1") ||
|
|
(stricmp(str.c_str(), "true") == 0) ||
|
|
(stricmp(str.c_str(), "yes") == 0))
|
|
{
|
|
out.primitive.boolean = true;
|
|
}
|
|
else
|
|
{
|
|
SysPushErrorSyntaxError("Parsed tag of boolean type wasn't parsable given the English string {}", str);
|
|
return false;
|
|
}
|
|
break;
|
|
}
|
|
default:
|
|
SysPanic("Invalid consume tag {}", type);
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static bool ConsumeToken(ParseState &state, ParseContext &context, ParsableTag type, ParseValue &out)
|
|
{
|
|
ParseValue temp;
|
|
|
|
#define ADD_VECTOR_VAL(idx, memberType) \
|
|
if (!ConsumeTokenPrimitiveish(state, context, ParsableTag::kParseNumber, temp)) \
|
|
return false; \
|
|
out.primitive.memberType[idx] = temp.primitive.number;
|
|
|
|
switch (type)
|
|
{
|
|
case ParsableTag::kParseVec3:
|
|
{
|
|
ADD_VECTOR_VAL(0, vec3);
|
|
ADD_VECTOR_VAL(1, vec3);
|
|
ADD_VECTOR_VAL(2, vec3);
|
|
return true;
|
|
}
|
|
case ParsableTag::kParseVec4:
|
|
{
|
|
ADD_VECTOR_VAL(0, vec4);
|
|
ADD_VECTOR_VAL(1, vec4);
|
|
ADD_VECTOR_VAL(2, vec4);
|
|
ADD_VECTOR_VAL(3, vec4);
|
|
return true;
|
|
}
|
|
default:
|
|
{
|
|
return ConsumeTokenPrimitiveish(state, context, type, out);
|
|
}
|
|
}
|
|
|
|
#undef ADD_VECTOR_VAL
|
|
}
|
|
|
|
AUKN_SYM bool ConsumeToken(ParsableTag type, const AuSPtr<Aurora::IO::Character::ICharacterProvider> &getc, ParseValue &out)
|
|
{
|
|
ParseState state(getc);
|
|
ParseContext context(state.stream);
|
|
return ConsumeToken(state, context, type, out);
|
|
}
|
|
|
|
static bool Parse(ParseState &state, ParseContext &context, const ParseObject &structure, ParseResult &result)
|
|
{
|
|
for (auto &parseBit : structure)
|
|
{
|
|
ParsedBit parsed = {};
|
|
bool ok;
|
|
|
|
parsed.tag = parseBit.tag;
|
|
|
|
AuMach arrayLength = 1;
|
|
if (parseBit.array)
|
|
{
|
|
ParseValue arrayLengthBit = {};
|
|
|
|
if (!ConsumeToken(state, context, ParsableTag::kParseUInt, arrayLengthBit))
|
|
{
|
|
SysPushErrorSyntaxError("Couldn't consume array length, label: {}, tag {}", parseBit.label, parseBit.tag);
|
|
return false;
|
|
}
|
|
|
|
arrayLength = arrayLengthBit.primitive.uint;
|
|
}
|
|
|
|
parsed.count = 0;
|
|
|
|
for (int i = 0; ((i < arrayLength) || (parseBit.vararg)); i++)
|
|
{
|
|
ParseValueEx parsedSingle = {};
|
|
ParseResult nestedresult = {};
|
|
switch (parseBit.tag)
|
|
{
|
|
case ParsableTag::kParseUInt:
|
|
case ParsableTag::kParseSInt:
|
|
case ParsableTag::kParseString:
|
|
case ParsableTag::kParseStringVararg:
|
|
case ParsableTag::kParseNumber:
|
|
case ParsableTag::kParseBoolean:
|
|
case ParsableTag::kParseUUID:
|
|
case ParsableTag::kParseVec3:
|
|
case ParsableTag::kParseVec4:
|
|
{
|
|
ok = ConsumeToken(state, context, parseBit.tag, parsedSingle);
|
|
break;
|
|
}
|
|
case ParsableTag::kParseObject:
|
|
{
|
|
// TODO: Although this should never result in a stack overflow, i'd rather return a request to prase than recursively call the same function
|
|
// Bah
|
|
ok = Parse(state, context, parseBit.objectParse, nestedresult);
|
|
// TODO: debug info
|
|
parsedSingle.object = nestedresult.result;
|
|
break;
|
|
}
|
|
default:
|
|
SysPanic("Invalid consume tag {} for {}", parseBit.tag, parseBit.label);
|
|
}
|
|
|
|
if (!ok)
|
|
{
|
|
if ((parseBit.optional) || (parseBit.vararg))
|
|
{
|
|
break;
|
|
}
|
|
|
|
SysPushErrorSyntaxError("Syntax error around: label: {}, tag {}", parseBit.label, parseBit.tag);
|
|
return false;
|
|
}
|
|
|
|
if (parseBit.vararg || parseBit.array)
|
|
{
|
|
parsed.count++;
|
|
if (!AuTryInsert(parsed.value.array, parsedSingle))
|
|
{
|
|
SysPushErrorMem();
|
|
return false;
|
|
}
|
|
parsed.isArray = true;
|
|
}
|
|
else
|
|
{
|
|
parsed.isArray = false;
|
|
parsed.count = 1;
|
|
parsed.value.single = parsedSingle;
|
|
}
|
|
}
|
|
|
|
// do not add if an optional bit was not serialized
|
|
if (parsed.count != 0)
|
|
{
|
|
result.result.push_back(parsed);
|
|
}
|
|
}
|
|
|
|
result.syntaxError = "No-Debug";
|
|
result.debugTree = "No-Debug";
|
|
return true;
|
|
}
|
|
|
|
AUKN_SYM bool Parse(ParseState &state, const ParseObject &structure, ParseResult &result)
|
|
{
|
|
ParseContext context(state.stream);
|
|
return Parse(state, context, structure, result);
|
|
}
|
|
|
|
AUKN_SYM void SerializeToken(ParsableTag type, const ParseValue &value, AuString &str)
|
|
{
|
|
AuString temp = value.string;
|
|
switch (type)
|
|
{
|
|
case ParsableTag::kParseUInt:
|
|
{
|
|
str += AuToString(value.primitive.uint);
|
|
break;
|
|
}
|
|
case ParsableTag::kParseSInt:
|
|
{
|
|
str += AuToString(value.primitive.sint);
|
|
break;
|
|
}
|
|
case ParsableTag::kParseNumber:
|
|
{
|
|
str += AuToString(value.primitive.number);
|
|
break;
|
|
}
|
|
case ParsableTag::kParseVec3:
|
|
{
|
|
str += AuToString(value.primitive.vec3[0]) + " ";
|
|
str += AuToString(value.primitive.vec3[1]) + " ";
|
|
str += AuToString(value.primitive.vec3[2]);
|
|
break;
|
|
}
|
|
case ParsableTag::kParseVec4:
|
|
{
|
|
str += AuToString(value.primitive.vec4[0]) + " ";
|
|
str += AuToString(value.primitive.vec4[1]) + " ";
|
|
str += AuToString(value.primitive.vec4[2]) + " ";
|
|
str += AuToString(value.primitive.vec4[3]);
|
|
break;
|
|
}
|
|
case ParsableTag::kParseString:
|
|
{
|
|
AuReplaceAll(temp, "\\", "\\\\");
|
|
AuReplaceAll(temp, "\"", "\\\"");
|
|
str += "\"" + temp + "\"";
|
|
break;
|
|
}
|
|
case ParsableTag::kParseStringVararg:
|
|
{
|
|
AuReplaceAll(temp, "\n", "\\\n");
|
|
str += temp;
|
|
break;
|
|
}
|
|
case ParsableTag::kParseUUID:
|
|
{
|
|
str += uuids::to_string(value.UUID);
|
|
break;
|
|
}
|
|
case ParsableTag::kParseBoolean:
|
|
{
|
|
if (value.primitive.boolean)
|
|
{
|
|
str += "true";
|
|
}
|
|
else
|
|
{
|
|
str += "false";
|
|
}
|
|
break;
|
|
}
|
|
default:
|
|
SysPanic("Invalid consume tag {}", type);
|
|
}
|
|
}
|
|
|
|
AUKN_SYM void Serialize(const ParsedObject &structure, AuString &ret)
|
|
{
|
|
for (auto &parsed : structure)
|
|
{
|
|
if (parsed.isArray)
|
|
{
|
|
if (ret.size())
|
|
{
|
|
ret += " ";
|
|
}
|
|
ret += AuToString(parsed.count);
|
|
}
|
|
|
|
bool isArray = parsed.count > 1 || parsed.isArray;
|
|
|
|
for (int i = 0; ((i < parsed.count)); i++)
|
|
{
|
|
if (ret.size())
|
|
{
|
|
ret += " ";
|
|
}
|
|
|
|
ParseValueEx parsedSingle = {};
|
|
ParseResult nestedresult = {};
|
|
switch (parsed.tag)
|
|
{
|
|
case ParsableTag::kParseUInt:
|
|
case ParsableTag::kParseSInt:
|
|
case ParsableTag::kParseString:
|
|
case ParsableTag::kParseStringVararg:
|
|
case ParsableTag::kParseNumber:
|
|
case ParsableTag::kParseBoolean:
|
|
case ParsableTag::kParseUUID:
|
|
case ParsableTag::kParseVec3:
|
|
case ParsableTag::kParseVec4:
|
|
{
|
|
SerializeToken(parsed.tag, !isArray ? parsed.value.single : parsed.value.array[i], ret);
|
|
break;
|
|
}
|
|
case ParsableTag::kParseObject:
|
|
{
|
|
Serialize(!isArray ? parsed.value.single.object : parsed.value.array[i].object, ret);
|
|
parsedSingle.object = nestedresult.result;
|
|
break;
|
|
}
|
|
default:
|
|
SysPanic("Invalid emit tag {}", parsed.tag);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
} |