/*** Copyright (C) 2021 J Reece Wilson (a/k/a "Reece"). All rights reserved. File: Parser.cpp Date: 2021-6-12 Author: Reece ***/ #include #include "Parser.hpp" namespace Aurora::Parse { AUKN_SYM void VaildateStructure(const ParseObject &object) { } struct ParseContext { ConsumeStream_cb getc; AuUInt8 _next; bool _hasNext; ParseContext(ConsumeStream_cb _getc) : getc(_getc), _next(0), _hasNext(false) { } bool Next(AuUInt8 &c) { if (std::exchange(_hasNext, false)) { c = _next; return true; } return getc(c); } bool Peak(AuUInt8 &c) { if (_hasNext) { c = _next; return true; } _hasNext = getc(_next); c = _next; return _hasNext; } }; template static std::function ContainedHerein(const AuUInt8(&arrayKanker)[Z]) { return [=](AuUInt8 c) -> bool { for (int i = 0; i < Z; i++) { if (arrayKanker[i] == c) { return true; } } return false; }; } // oh god just dont think about this tokenizer too much bool ConsumeStringifedToken(ParsableTag type, ParseContext &context, AuString &out) { static unsigned char terminatingChars[] = { '\n', '\00' }; static unsigned char whiteChars[] = { ' ' }; static unsigned char illegalCharacters[] = { '\r' }; //reeedows auto isString = type == ParsableTag::kParseString; auto isvararg = type == ParsableTag::kParseStringVararg; bool stringLevel = false; bool escapedNewLine = false; bool escapedQuote = false; auto isTerminating = ContainedHerein(terminatingChars); auto isSpace = ContainedHerein(whiteChars); auto isIgnore = ContainedHerein(illegalCharacters); while (true) { AuUInt8 next; AuUInt8 peak; // consume character from the stream if (!context.Next(next)) { break; } // some characters should be considered universally illegal such as nasty unicode spaces, // carriage returns, and any other nasty crash exploit depending characters // DO NOT USE THIS TO ESCAPE ENGINE TAGS/COLOR ESCAPES/WHATEVER THE FUCK if (isIgnore(next)) { continue; } // Is end of line/end of string if (isTerminating(next)) { // Make sure we aren't encapsulated by quotation marks if ((!stringLevel) && (!std::exchange(escapedNewLine, false))) { break; } } // if we hit a space in a string, assume we're at the end of the token, unless we're parsing // a string in quotation marks or are consuming the entire line (ParsableTag::kParseStringVararg) if ((isSpace(next)) && (!stringLevel) && (!isvararg)) { break; } // if the string starts with a quotation mark, set stringLevel to true if ((next == '"') && (isString) && (out.empty())) { stringLevel = true; continue; } auto peakStatus = context.Peak(peak); // check \ \n pair + is var arg if ((peakStatus) && (isTerminating(peak)) && (next == '\\') && (isvararg)) { escapedNewLine = true; continue; } // escape character for "'s in strings if ((peakStatus) && (isString) && (next == '\\') && (stringLevel) && (peak == '\"')) { escapedQuote = true; continue; } // see above if ((next == '"') && (isString) && (std::exchange(escapedQuote, false))) { out += "\""; continue; } // match the ending '"' character in the token to terminate strings containing spaces // do not make this more ambiguous by removing the **else if**. // this is mutally exclusive with the above block and must be kept in this order else if ( (next == '"') && (isString) && (stringLevel) // && (((peakStatus && (isTerminating(peak) || isSpace(peak))) || (!peakStatus)))) // expected to fail { stringLevel = false; continue; } // otherwise emit out += next; } //SysAssert(!stringLevel, "Parsed tag of string type must end with \", got {}", out); if (stringLevel) { LogWarn("Parsed tag of string type must end with \", got {}", out); return false; } //Aurora::Console::Logging::LogDbg("returned {} {}", out, count != 0); return out.size() != 0; } template static bool ScrewExceptions_2(Func func, const std::string &in, Res &out) { try { out = func(in.c_str(), nullptr); return true; } catch (...) { return false; } } template int SignBit(T val) { return (T(0) < val) - (val < T(0)); } template bool ParseInt(const AuString &in, T &out) { T res = 0; T sign = 1; out = 0; auto itr = in.begin(); if constexpr (std::is_same::value) { if (itr != in.end()) { if (*itr == '-') { itr++; sign = -1; } } } for (; (itr != in.end()) && (*itr != '\0'); itr++) { auto c = *itr; if ((c < '0') || (c > '9')) return false; auto old = res; res *= 10; res += static_cast(*itr) - static_cast('0'); if constexpr (std::is_same::value) { if (old > res) { LogDbg("Unsigned integer overflow"); return false; } } else if constexpr (std::is_same::value) { if (SignBit(old) != SignBit(res)) { LogDbg("Signed integer overflow"); return false; } } } out = res * sign; return true; } template bool ParseUInt(const AuString &in, T &out) { auto temp = AuUInt{}; out = 0; if (!ParseInt(in, temp)) return false; if (temp > std::numeric_limits::max()) return false; out = temp; return true; } template bool ParseSInt(const AuString &in, T &out) { auto temp = AuSInt{}; out = 0; if (!ParseInt(in, temp)) return false; if (temp > std::numeric_limits::max()) return false; if (temp < std::numeric_limits::min()) return false; out = temp; return true; } static bool ConsumeTokenPrimitiveish(ParsableTag type, ParseContext &context, ParseValue &out) { AuString str; if (!ConsumeStringifedToken(type, context, str)) { return false; } if ((type != ParsableTag::kParseString) && (str.empty())) { return false; } std::optional uuid; switch (type) { case ParsableTag::kParseUInt: { return ParseUInt(str, out.primitive.uint); } case ParsableTag::kParseSInt: { return ParseSInt(str, out.primitive.sint); } case ParsableTag::kParseNumber: { return ScrewExceptions_2(static_cast(std::stod), str, out.primitive.number); } case ParsableTag::kParseString: case ParsableTag::kParseStringVararg: { out.string = str; break; } case ParsableTag::kParseUUID: { uuid = uuids::uuid::from_string(str); if (!uuid.has_value()) { LogWarn("Parse Error: invalid UUID {}", str); return false; } out.UUID = uuid.value(); break; } case ParsableTag::kParseBoolean: { if (str == "0" || (stricmp(str.c_str(), "false") == 0) || (stricmp(str.c_str(), "no") == 0)) { out.primitive.boolean = false; } else if (str == "1" || (stricmp(str.c_str(), "true") == 0) || (stricmp(str.c_str(), "yes") == 0)) { out.primitive.boolean = true; } else { Aurora::Console::Logging::LogWarn("Parsed tag of boolean type wasn't parsable given the English string {}", str); return false; } break; } default: SysPanic("Invalid consume tag {}", type); } return true; } static bool ConsumeToken(ParsableTag type, ParseContext &context, ParseValue &out) { ParseValue temp; #define ADD_VECTOR_VAL(idx, memberType) \ if (!ConsumeTokenPrimitiveish(ParsableTag::kParseNumber, context, temp)) \ return false; \ out.primitive.memberType[idx] = temp.primitive.number; switch (type) { case ParsableTag::kParseVec3: { ADD_VECTOR_VAL(0, vec3); ADD_VECTOR_VAL(1, vec3); ADD_VECTOR_VAL(2, vec3); return true; } case ParsableTag::kParseVec4: { ADD_VECTOR_VAL(0, vec4); ADD_VECTOR_VAL(1, vec4); ADD_VECTOR_VAL(2, vec4); ADD_VECTOR_VAL(3, vec4); return true; } default: { return ConsumeTokenPrimitiveish(type, context, out); } } #undef ADD_VECTOR_VAL } AUKN_SYM bool ConsumeToken(ParsableTag type, ConsumeStream_cb getc, ParseValue &out) { ParseContext context(getc); return ConsumeToken(type, context, out); } static bool Parse(ParseResult &result, const ParseObject &structure, ParseContext &context) { for (auto &parseBit : structure) { ParsedBit parsed = {}; bool ok; parsed.tag = parseBit.tag; AuMach arrayLength = 1; if (parseBit.array) { ParseValue arrayLengthBit = {}; if (!ConsumeToken(ParsableTag::kParseUInt, context, arrayLengthBit)) { Aurora::Console::Logging::LogWarn("Couldn't consume array length, label: {}, tag {}", parseBit.label, parseBit.tag); return false; } arrayLength = arrayLengthBit.primitive.uint; } parsed.count = 0; for (int i = 0; ((i < arrayLength) || (parseBit.vararg)); i++) { ParseValueEx parsedSingle = {}; ParseResult nestedresult = {}; switch (parseBit.tag) { case ParsableTag::kParseUInt: case ParsableTag::kParseSInt: case ParsableTag::kParseString: case ParsableTag::kParseStringVararg: case ParsableTag::kParseNumber: case ParsableTag::kParseBoolean: case ParsableTag::kParseUUID: case ParsableTag::kParseVec3: case ParsableTag::kParseVec4: { ok = ConsumeToken(parseBit.tag, context, parsedSingle); break; } case ParsableTag::kParseObject: { // TODO: Although this should never result in a stack overflow, i'd rather return a request to prase than recursively call the same function // Bah ok = Parse(nestedresult, parseBit.objectParse, context); // TODO: debug info parsedSingle.Object = nestedresult.result; break; } default: SysPanic("Invalid consume tag {} for {}", parseBit.tag, parseBit.label); } if (!ok) { if ((parseBit.optional) || (parseBit.vararg)) { break; } Aurora::Console::Logging::LogWarn("Syntax error around: label: {}, tag {}", parseBit.label, parseBit.tag); return false; } if (parseBit.vararg || parseBit.array) { parsed.count++; parsed.value.array.push_back(parsedSingle); parsed.isArray = true; } else { parsed.isArray = false; parsed.count = 1; parsed.value.single = parsedSingle; } } // do not add if an optional bit was not serialized if (parsed.count != 0) { result.result.push_back(parsed); } } result.SyntaxError = "No-Debug"; result.DebugTree = "No-Debug"; return true; } AUKN_SYM bool Parse(ParseResult &result, const ParseObject &structure, ConsumeStream_cb getc) { ParseContext context(getc); return Parse(result, structure, context); } AUKN_SYM void SerializeToken(ParsableTag type, const ParseValue &out, AuString &str) { AuString temp = out.string; switch (type) { case ParsableTag::kParseUInt: { str += std::to_string(out.primitive.uint); break; } case ParsableTag::kParseSInt: { str += std::to_string(out.primitive.sint); break; } case ParsableTag::kParseNumber: { str += std::to_string(out.primitive.number); break; } case ParsableTag::kParseVec3: { str += std::to_string(out.primitive.vec3.x) + " "; str += std::to_string(out.primitive.vec3.y) + " "; str += std::to_string(out.primitive.vec3.z); break; } case ParsableTag::kParseVec4: { str += std::to_string(out.primitive.vec4.x) + " "; str += std::to_string(out.primitive.vec4.y) + " "; str += std::to_string(out.primitive.vec4.z) + " "; str += std::to_string(out.primitive.vec4.w); break; } case ParsableTag::kParseString: { ReplaceAll(temp, "\"", "\\\""); str += "\"" + temp + "\""; break; } case ParsableTag::kParseStringVararg: { ReplaceAll(temp, "\n", "\\\n"); str += temp; break; } case ParsableTag::kParseUUID: { str += uuids::to_string(out.UUID); break; } case ParsableTag::kParseBoolean: { if (out.primitive.boolean) { str += "true"; } else { str += "false"; } break; } default: SysPanic("Invalid consume tag {}", type); } } AUKN_SYM void Serialize(const ParsedObject &structure, AuString &out) { AuString ret; for (auto &parsed : structure) { if (parsed.isArray) { ret += " "; ret += std::to_string(parsed.count); } bool isArray = parsed.count > 1 || parsed.isArray; for (int i = 0; ((i < parsed.count)); i++) { ret += " "; ParseValueEx parsedSingle = {}; ParseResult nestedresult = {}; switch (parsed.tag) { case ParsableTag::kParseUInt: case ParsableTag::kParseSInt: case ParsableTag::kParseString: case ParsableTag::kParseStringVararg: case ParsableTag::kParseNumber: case ParsableTag::kParseBoolean: case ParsableTag::kParseUUID: case ParsableTag::kParseVec3: case ParsableTag::kParseVec4: { SerializeToken(parsed.tag, !isArray ? parsed.value.single : parsed.value.array[i], ret); break; } case ParsableTag::kParseObject: { Serialize(!isArray ? parsed.value.single.Object : parsed.value.array[i].Object, ret); parsedSingle.Object = nestedresult.result; break; } default: SysPanic("Invalid emit tag {}", parsed.tag); } } } out = ret.substr(1); } }