/*** Copyright (C) 2021 J Reece Wilson (a/k/a "Reece"). All rights reserved. File: Parser.cpp Date: 2021-6-12 Author: Reece ***/ #include #include "Parser.hpp" namespace Aurora::Parse { AUKN_SYM void VaildateStructure(const ParseObject &object) { } struct ParseContext { AuUInt8 _next; bool _hasNext; const ConsumeStream_cb &_getc; ParseContext(const ConsumeStream_cb &getc) : _getc(getc), _next(0), _hasNext(false) { } bool Next(AuUInt8 &c) { if (AuExchange(_hasNext, false)) { c = _next; return true; } return _getc(c); } bool Peek(AuUInt8 &c) { if (_hasNext) { c = _next; return true; } _hasNext = _getc(_next); c = _next; return _hasNext; } }; template static AuFunction ContainedHerein(const AuUInt8(&arry)[Z]) { return [=](AuUInt8 c) -> bool { for (int i = 0; i < Z; i++) { if (arry[i] == c) { return true; } } return false; }; } template static AuFunction IsTerminating(ParseState &state, const AuUInt8(&arry)[Z]) { return [&](AuUInt8 c) -> bool { for (int i = 0; i < Z; i++) { if (arry[i] == c) { return true; } } for (int i = 0; i < state.countOfTokens; i++) { if (state.additionalTokens[i] == c) { state.lastTokenAdditional = i; return true; } } return false; }; } // oh god just dont think about this tokenizer too much bool ConsumeStringifedToken(ParseState &state, ParseContext &context, ParsableTag type, AuString &out) { static unsigned char terminatingChars[] = { '\n', '\00' }; static unsigned char whiteChars[] = { ' ' }; static unsigned char illegalCharacters[] = { '\r' }; //reeedows auto isString = type == ParsableTag::kParseString; auto isvararg = type == ParsableTag::kParseStringVararg; bool stringLevel = false; bool escapedNewLine = false; bool escapedQuote = false; auto isTerminating = IsTerminating(state, terminatingChars); static auto isSpace = ContainedHerein(whiteChars); static auto isIgnore = ContainedHerein(illegalCharacters); while (true) { AuUInt8 next; AuUInt8 peek; // consume character from the stream if (!context.Next(next)) { break; } // some characters should be considered universally illegal such as nasty unicode spaces, // carriage returns, and any other nasty crash exploit depending characters // DO NOT USE THIS TO ESCAPE ENGINE TAGS/COLOR ESCAPES/WHATEVER THE FUCK if (isIgnore(next)) { continue; } // Is end of line/end of string if (isTerminating(next)) { // Make sure we aren't encapsulated by quotation marks if ((!stringLevel) && (!AuExchange(escapedNewLine, false))) { state.hasLastToken = true; state.lastTokenCharacter = next; break; } } // if we hit a space in a string, assume we're at the end of the token, unless we're parsing // a string in quotation marks or are consuming the entire line (ParsableTag::kParseStringVararg) if ((isSpace(next)) && (!stringLevel) && (!isvararg)) { break; } // if the string starts with a quotation mark, set stringLevel to true if ((next == '"') && (isString) && (out.empty())) { stringLevel = true; continue; } auto peekStatus = context.Peek(peek); bool isPeekterminating = isTerminating(peek); // check \ \n pair + is var arg if ((peekStatus) && (isPeekterminating) && (next == '\\') && (isvararg)) { escapedNewLine = true; continue; } // escape character for "'s in strings if ((peekStatus) && (isString) && (next == '\\') && (stringLevel) && (peek == '\"')) { escapedQuote = true; continue; } // see above if ((next == '"') && (isString) && (AuExchange(escapedQuote, false))) { out += "\""; continue; } // match the ending '"' character in the token to terminate strings containing spaces // do not make this more ambiguous by removing the **else if**. // this is mutally exclusive with the above block and must be kept in this order else if ( (next == '"') && (isString) && (stringLevel) // && (((peekStatus && (isPeekterminating || isSpace(peek))) || (!peekStatus)))) // expected to fail { stringLevel = false; continue; } // otherwise emit out += next; } //SysAssert(!stringLevel, "Parsed tag of string type must end with \", got {}", out); if (stringLevel) { LogWarn("Parsed tag of string type must end with \", got {}", out); return false; } //Aurora::Console::Logging::LogDbg("returned {} {}", out, count != 0); return out.size() != 0; } template static bool ScrewExceptions_2(Func func, const AuString &in, Res &out) { try { out = func(in.c_str(), nullptr); return true; } catch (...) { return false; } } template int SignBit(T val) { return (T(0) < val) - (val < T(0)); } template bool ParseInt(const AuString &in, T &out) { T res = 0; T sign = 1; out = 0; auto itr = in.begin(); if constexpr (AuIsSame_v) { if (itr != in.end()) { if (*itr == '-') { itr++; sign = -1; } } } for (; (itr != in.end()) && (*itr != '\0'); itr++) { auto c = *itr; if ((c < '0') || (c > '9')) { return false; } auto old = res; res *= 10; res += static_cast(*itr) - static_cast('0'); if constexpr (AuIsSame_v) { if (old > res) { SysPushErrorSyntaxError("Unsigned integer overflow: {}", in); return false; } } else if constexpr (AuIsSame_v) { if (SignBit(old) != SignBit(res)) { SysPushErrorSyntaxError("Signed integer overflow: {}", in); return false; } } } out = res * sign; return true; } template bool ParseUInt(const AuString &in, T &out) { auto temp = AuUInt{}; out = 0; if (!ParseInt(in, temp)) { return false; } if (temp > std::numeric_limits::max()) { return false; } out = temp; return true; } template bool ParseSInt(const AuString &in, T &out) { auto temp = AuSInt{}; out = 0; if (!ParseInt(in, temp)) { return false; } if (temp > std::numeric_limits::max()) { return false; } if (temp < std::numeric_limits::min()) { return false; } out = temp; return true; } static bool ConsumeTokenPrimitiveish(ParseState &state, ParseContext &context, ParsableTag type, ParseValue &out) { AuString str; if (!ConsumeStringifedToken(state, context, type, str)) { return false; } if ((type != ParsableTag::kParseString) && (str.empty())) { return false; } AuOptional uuid; switch (type) { case ParsableTag::kParseUInt: { return ParseUInt(str, out.primitive.uint); } case ParsableTag::kParseSInt: { return ParseSInt(str, out.primitive.sint); } case ParsableTag::kParseNumber: { return ScrewExceptions_2(static_cast(std::stod), str, out.primitive.number); } case ParsableTag::kParseString: case ParsableTag::kParseStringVararg: { out.string = str; break; } case ParsableTag::kParseUUID: { uuid = uuids::uuid::from_string(str); if (!uuid.has_value()) { LogWarn("Parse Error: invalid UUID {}", str); return false; } out.UUID = uuid.value(); break; } case ParsableTag::kParseBoolean: { if ((str == "0") || (stricmp(str.c_str(), "false") == 0) || (stricmp(str.c_str(), "no") == 0)) { out.primitive.boolean = false; } else if ((str == "1") || (stricmp(str.c_str(), "true") == 0) || (stricmp(str.c_str(), "yes") == 0)) { out.primitive.boolean = true; } else { Aurora::Console::Logging::LogWarn("Parsed tag of boolean type wasn't parsable given the English string {}", str); return false; } break; } default: SysPanic("Invalid consume tag {}", type); } return true; } static bool ConsumeToken(ParseState &state, ParseContext &context, ParsableTag type, ParseValue &out) { ParseValue temp; #define ADD_VECTOR_VAL(idx, memberType) \ if (!ConsumeTokenPrimitiveish(state, context, ParsableTag::kParseNumber, temp)) \ return false; \ out.primitive.memberType[idx] = temp.primitive.number; switch (type) { case ParsableTag::kParseVec3: { ADD_VECTOR_VAL(0, vec3); ADD_VECTOR_VAL(1, vec3); ADD_VECTOR_VAL(2, vec3); return true; } case ParsableTag::kParseVec4: { ADD_VECTOR_VAL(0, vec4); ADD_VECTOR_VAL(1, vec4); ADD_VECTOR_VAL(2, vec4); ADD_VECTOR_VAL(3, vec4); return true; } default: { return ConsumeTokenPrimitiveish(state, context, type, out); } } #undef ADD_VECTOR_VAL } AUKN_SYM bool ConsumeToken(ParsableTag type, ConsumeStream_cb getc, ParseValue &out) { ParseState state(getc); ParseContext context(state.stringstream); return ConsumeToken(state, context, type, out); } static bool Parse(ParseState &state, ParseContext &context, const ParseObject &structure, ParseResult &result) { for (auto &parseBit : structure) { ParsedBit parsed = {}; bool ok; parsed.tag = parseBit.tag; AuMach arrayLength = 1; if (parseBit.array) { ParseValue arrayLengthBit = {}; if (!ConsumeToken(state, context, ParsableTag::kParseUInt, arrayLengthBit)) { Aurora::Console::Logging::LogWarn("Couldn't consume array length, label: {}, tag {}", parseBit.label, parseBit.tag); return false; } arrayLength = arrayLengthBit.primitive.uint; } parsed.count = 0; for (int i = 0; ((i < arrayLength) || (parseBit.vararg)); i++) { ParseValueEx parsedSingle = {}; ParseResult nestedresult = {}; switch (parseBit.tag) { case ParsableTag::kParseUInt: case ParsableTag::kParseSInt: case ParsableTag::kParseString: case ParsableTag::kParseStringVararg: case ParsableTag::kParseNumber: case ParsableTag::kParseBoolean: case ParsableTag::kParseUUID: case ParsableTag::kParseVec3: case ParsableTag::kParseVec4: { ok = ConsumeToken(state, context, parseBit.tag, parsedSingle); break; } case ParsableTag::kParseObject: { // TODO: Although this should never result in a stack overflow, i'd rather return a request to prase than recursively call the same function // Bah ok = Parse(state, context, parseBit.objectParse, nestedresult); // TODO: debug info parsedSingle.object = nestedresult.result; break; } default: SysPanic("Invalid consume tag {} for {}", parseBit.tag, parseBit.label); } if (!ok) { if ((parseBit.optional) || (parseBit.vararg)) { break; } Aurora::Console::Logging::LogWarn("Syntax error around: label: {}, tag {}", parseBit.label, parseBit.tag); return false; } if (parseBit.vararg || parseBit.array) { parsed.count++; parsed.value.array.push_back(parsedSingle); parsed.isArray = true; } else { parsed.isArray = false; parsed.count = 1; parsed.value.single = parsedSingle; } } // do not add if an optional bit was not serialized if (parsed.count != 0) { result.result.push_back(parsed); } } result.syntaxError = "No-Debug"; result.debugTree = "No-Debug"; return true; } AUKN_SYM bool Parse(ParseState &state, const ParseObject &structure, ParseResult &result) { ParseContext context(state.stringstream); return Parse(state, context, structure, result); } AUKN_SYM void SerializeToken(ParsableTag type, const ParseValue &value, AuString &str) { AuString temp = value.string; switch (type) { case ParsableTag::kParseUInt: { str += AuToString(value.primitive.uint); break; } case ParsableTag::kParseSInt: { str += AuToString(value.primitive.sint); break; } case ParsableTag::kParseNumber: { str += AuToString(value.primitive.number); break; } case ParsableTag::kParseVec3: { str += AuToString(value.primitive.vec3[0]) + " "; str += AuToString(value.primitive.vec3[1]) + " "; str += AuToString(value.primitive.vec3[2]); break; } case ParsableTag::kParseVec4: { str += AuToString(value.primitive.vec4[0]) + " "; str += AuToString(value.primitive.vec4[1]) + " "; str += AuToString(value.primitive.vec4[2]) + " "; str += AuToString(value.primitive.vec4[3]); break; } case ParsableTag::kParseString: { AuReplaceAll(temp, "\"", "\\\""); str += "\"" + temp + "\""; break; } case ParsableTag::kParseStringVararg: { AuReplaceAll(temp, "\n", "\\\n"); str += temp; break; } case ParsableTag::kParseUUID: { str += uuids::to_string(value.UUID); break; } case ParsableTag::kParseBoolean: { if (value.primitive.boolean) { str += "true"; } else { str += "false"; } break; } default: SysPanic("Invalid consume tag {}", type); } } AUKN_SYM void Serialize(const ParsedObject &structure, AuString &ret) { for (auto &parsed : structure) { if (parsed.isArray) { ret += " "; ret += AuToString(parsed.count); } bool isArray = parsed.count > 1 || parsed.isArray; for (int i = 0; ((i < parsed.count)); i++) { ret += " "; ParseValueEx parsedSingle = {}; ParseResult nestedresult = {}; switch (parsed.tag) { case ParsableTag::kParseUInt: case ParsableTag::kParseSInt: case ParsableTag::kParseString: case ParsableTag::kParseStringVararg: case ParsableTag::kParseNumber: case ParsableTag::kParseBoolean: case ParsableTag::kParseUUID: case ParsableTag::kParseVec3: case ParsableTag::kParseVec4: { SerializeToken(parsed.tag, !isArray ? parsed.value.single : parsed.value.array[i], ret); break; } case ParsableTag::kParseObject: { Serialize(!isArray ? parsed.value.single.object : parsed.value.array[i].object, ret); parsedSingle.object = nestedresult.result; break; } default: SysPanic("Invalid emit tag {}", parsed.tag); } } } ret = ret.substr(1); } }