/*** Copyright (C) 2021 J Reece Wilson (a/k/a "Reece"). All rights reserved. File: AuParser.cpp Date: 2021-6-12 Author: Reece Note: Horrible gen 1 parser. I'm not removing or significantly improving this. Just build around what works, could probably wrangle a command list parser on top. ***/ #include #include "AuParser.hpp" namespace Aurora::Parse { AUKN_SYM void VaildateStructure(const ParseObject &object) { } template static AuFunction ContainedHerein(const AuUInt8(&arry)[Z]) { return [=](AuUInt8 c) -> bool { for (int i = 0; i < Z; i++) { if (arry[i] == c) { return true; } } return false; }; } template static AuFunction IsTerminating(ParseState &state, const AuUInt8(&arry)[Z]) { return [&](AuUInt8 c) -> bool { for (int i = 0; i < Z; i++) { if (arry[i] == c) { return true; } } for (int i = 0; i < state.countOfTokens; i++) { if (state.additionalTokens[i] == c) { state.lastTokenAdditional = i; return true; } } return false; }; } // oh god just dont think about this tokenizer too much bool ConsumeStringifedToken(ParseState &state, ParsableTag type, AuString &out) { static unsigned char terminatingChars[] = { '\n', '\00' }; static unsigned char whiteChars[] = { ' ' }; static unsigned char illegalCharacters[] = { '\r' }; //reeedows auto isString = type == ParsableTag::kParseString; auto isvararg = type == ParsableTag::kParseStringVararg; bool stringLevel = false; bool escapedNewLine = false; bool escapeNextLegal = false; auto isTerminating = IsTerminating(state, terminatingChars); static auto isSpace = ContainedHerein(whiteChars); static auto isIgnore = ContainedHerein(illegalCharacters); while (true) { AuUInt8 cur; AuUInt8 peek; // consume character from the stream if (!state.stream->Next(cur)) { break; } // some characters should be considered universally illegal such as nasty unicode spaces, // carriage returns, and any other nasty crash exploit dependee characters if (isIgnore(cur)) { continue; } // Is end of line/end of string if (isTerminating(cur)) { // Make sure we aren't encapsulated by quotation marks if ((!stringLevel) && (!AuExchange(escapedNewLine, false))) { state.hasLastToken = true; state.lastTokenCharacter = cur; break; } } // if the string starts with a quotation mark, set stringLevel to true // TODO: I dont remember the parse rules. Check the old tests. Should this be AuStartsWith? if ((cur == '"') && (isString) && (out.empty())) { stringLevel = true; continue; } auto peekStatus = state.stream->PeekNext(peek); bool isPeekterminating = isTerminating(peek); // prepare escape characters if ((peekStatus) && (cur == '\\')) { // check \ \n pair + is var arg if ((isPeekterminating) && (isvararg)) { escapedNewLine = true; continue; } // escape character for "'s in strings if (isString && (peek == '\"' || peek == ' ' || peek == '\n' || peek == '\r')) { escapeNextLegal = true; continue; } } // see above if (AuExchange(escapeNextLegal, false)) { out += cur; continue; } // match the ending '"' character in the token to terminate strings containing spaces // do not make this more ambiguous by removing the **else if**. if ( (cur == '"') && (isString) && (stringLevel) // // can we expect to fail next iteration? && (((peekStatus && (isPeekterminating || isSpace(peek)) /*Is ending character?*/) || (!peekStatus)))) // Is EoS? { stringLevel = false; continue; } // if we hit a space in a string, assume we're at the end of the token, unless we're parsing // a string in quotation marks or are consuming the entire line (ParsableTag::kParseStringVararg) if ((isSpace(cur)) && (!stringLevel) && (!isvararg)) { break; } // otherwise emit out += cur; } //SysAssert(!stringLevel, "Parsed tag of string type must end with \", got {}", out); if (stringLevel) { SysPushErrorSyntaxError("Parsed tag of string type must end with \", got {}", out); return false; } //AuLogDbg("returned {} {}", out, out.size() != 0); return out.size() != 0; } template static bool ScrewExceptions_2(Func func, const AuString &in, Res &out) { try { out = func(in.c_str(), nullptr); return true; } catch (...) { return false; } } template int SignBit(T val) { return (T(0) < val) - (val < T(0)); } template bool ParseInt(Iterator begin, Iterator &end, T &out) { T res = 0; T sign = 1; out = 0; auto itr = begin; if (begin == end) { return false; } if constexpr (AuIsSame_v) { if (itr != end) { if (*itr == '-') { itr++; sign = -1; } } } int perf {}; for (; (itr != end) && (*itr != '\0'); itr++) { auto c = *itr; if ((c < '0') || (c > '9')) { return false; } auto old = res; res *= 10; res += static_cast(*itr) - static_cast('0'); if ((perf++) >= 5) { if constexpr (AuIsSame_v) { if (old > res) { SysPushErrorSyntaxError("Unsigned integer overflow: {}", AuString(begin, end)); end = itr; return false; } } else if constexpr (AuIsSame_v) { if (SignBit(old) != SignBit(res)) { SysPushErrorSyntaxError("Signed integer overflow: {}", AuString(begin, end)); end = itr; return false; } } } } end = itr; out = res * sign; return true; } template bool ParseInt16(Iterator begin, Iterator &end, T &out) { T res = 0; T sign = 1; out = 0; auto itr = begin; if (begin == end) { return false; } if constexpr (AuIsSame_v) { if (itr != end) { if (*itr == '-') { itr++; sign = -1; } } } if (itr != end) { if ((*itr == '0') && (itr[1] == 'x')) // TODO (Reece): EVIL... but it's only a byte { itr++; itr++; } } if (begin != end) { auto endChar = *(end - 1); if (endChar == 'h' || endChar == 'H') { end--; } } int perf {}; for (; (itr != end) && (*itr != '\0'); itr++) { auto c = *itr; int delta {}; if ((c >= 'A') && (c <= 'F')) { delta = 10 + (c - 'A'); } else if ((c >= 'a') && (c <= 'f')) { delta = 10 + (c - 'a'); } else if ((c >= '0') && (c <= '9')) { delta = c - '0'; } else { return false; } auto old = res; res *= T(16); res += T(delta); if ((perf++) >= 5) { if constexpr (AuIsSame_v) { if (old > res) { SysPushErrorSyntaxError("Unsigned integer overflow: {}", AuString(begin, end)); end = itr; return false; } } else if constexpr (AuIsSame_v) { if (SignBit(old) != SignBit(res)) { SysPushErrorSyntaxError("Signed integer overflow: {}", AuString(begin, end)); end = itr; return false; } } } } end = itr; out = res * sign; return true; } static bool ConsumeTokenPrimitiveish(ParseState &state, ParsableTag type, ParseValue &out) { AuString str; if (!ConsumeStringifedToken(state, type, str)) { return false; } if ((type != ParsableTag::kParseString) && (str.empty())) { return false; } AuOptional uuid; auto end = str.end(); switch (type) { case ParsableTag::kParseUInt: { return ParseInt(str.begin(), end, out.primitive.uint) && end == str.end(); } case ParsableTag::kParseSInt: { return ParseInt(str.begin(), end, out.primitive.sint) && end == str.end(); } case ParsableTag::kParseNumber: { return ScrewExceptions_2(static_cast(std::stod), str, out.primitive.number); } case ParsableTag::kParseString: case ParsableTag::kParseStringVararg: { out.string = AuMove(str); break; } case ParsableTag::kParseUUID: { if (auto uuid2 = uuids::uuid::from_string(str)) { uuid = *uuid2; } if (!uuid.has_value()) { SysPushErrorSyntaxError("Parse Error: invalid UUID {}", str); return false; } out.UUID = uuid.value(); break; } case ParsableTag::kParseBoolean: { if ((str == "0") || (stricmp(str.c_str(), "false") == 0) || (stricmp(str.c_str(), "no") == 0)) { out.primitive.boolean = false; } else if ((str == "1") || (stricmp(str.c_str(), "true") == 0) || (stricmp(str.c_str(), "yes") == 0)) { out.primitive.boolean = true; } else { SysPushErrorSyntaxError("Parsed tag of boolean type wasn't parsable given the English string {}", str); return false; } break; } default: SysPanic("Invalid consume tag {}", AuUInt(type)); } return true; } AUKN_SYM bool ConsumeToken(ParseState &state, ParsableTag type, ParseValue &out) { ParseValue temp; #define ADD_VECTOR_VAL(idx, memberType) \ if (!ConsumeTokenPrimitiveish(state, ParsableTag::kParseNumber, temp)) \ return false; \ out.primitive.memberType[idx] = temp.primitive.number; switch (type) { case ParsableTag::kParseVec3: { ADD_VECTOR_VAL(0, vec3); ADD_VECTOR_VAL(1, vec3); ADD_VECTOR_VAL(2, vec3); return true; } case ParsableTag::kParseVec4: { ADD_VECTOR_VAL(0, vec4); ADD_VECTOR_VAL(1, vec4); ADD_VECTOR_VAL(2, vec4); ADD_VECTOR_VAL(3, vec4); return true; } default: { return ConsumeTokenPrimitiveish(state, type, out); } } #undef ADD_VECTOR_VAL } AUKN_SYM bool ConsumeToken(ParsableTag type, const AuSPtr &getc, ParseValue &out) { ParseState state(getc); return ConsumeToken(state, type, out); } AUKN_SYM bool ConsumeToken(ParsableTag type, const AuSPtr &getc, ParseValue &out) { ParseState state(getc); return ConsumeToken(state, type, out); } AUKN_SYM AuList ConsumeTokens(ParsableTag type, const AuSPtr &getc) { AuList ret; AuParse::ParseValueEx res; AuParse::ParseState state(getc); while (AuParse::ConsumeToken(state, type, res)) { ret.push_back(res); } return ret; } AUKN_SYM AuList ConsumeTokens(ParsableTag type, const AuSPtr &getc) { AuList ret; AuParse::ParseValueEx res; AuParse::ParseState state(getc); while (AuParse::ConsumeToken(state, type, res)) { ret.push_back(res); } return ret; } AUKN_SYM bool Parse(ParseState &state, const ParseObject &structure, ParseResult &result) { for (auto &parseBit : structure) { ParsedBit parsed = {}; bool ok; parsed.tag = parseBit.tag; AuMach arrayLength = 1; if (parseBit.array) { ParseValue arrayLengthBit = {}; if (!ConsumeToken(state, ParsableTag::kParseUInt, arrayLengthBit)) { SysPushErrorSyntaxError("Couldn't consume array length, label: {}, tag {}", parseBit.label, (AuUInt)parseBit.tag); return false; } arrayLength = arrayLengthBit.primitive.uint; } parsed.count = 0; for (int i = 0; ((i < arrayLength) || (parseBit.vararg)); i++) { ParseValueEx parsedSingle = {}; ParseResult nestedresult = {}; switch (parseBit.tag) { case ParsableTag::kParseUInt: case ParsableTag::kParseSInt: case ParsableTag::kParseString: case ParsableTag::kParseStringVararg: case ParsableTag::kParseNumber: case ParsableTag::kParseBoolean: case ParsableTag::kParseUUID: case ParsableTag::kParseVec3: case ParsableTag::kParseVec4: { ok = ConsumeToken(state, parseBit.tag, parsedSingle); break; } case ParsableTag::kParseObject: { // TODO: Although this should never result in a stack overflow, i'd rather return a request to prase than recursively call the same function // Bah ok = Parse(state, parseBit.objectParse, nestedresult); // TODO: debug info parsedSingle.object = nestedresult.result; break; } default: SysPanic("Invalid consume tag {} for {}", (AuUInt)parseBit.tag, parseBit.label); } if (!ok) { if ((parseBit.optional) || (parseBit.vararg)) { break; } SysPushErrorSyntaxError("Syntax error around: label: {}, tag {}", parseBit.label, (AuUInt)parseBit.tag); return false; } if (parseBit.vararg || parseBit.array) { parsed.count++; if (!AuTryInsert(parsed.value.array, parsedSingle)) { SysPushErrorMem(); return false; } parsed.isArray = true; } else { parsed.isArray = false; parsed.count = 1; parsed.value.single = parsedSingle; } } // do not add if an optional bit was not serialized if (parsed.count != 0) { result.result.push_back(parsed); } } result.syntaxError = "No-Debug"; result.debugTree = "No-Debug"; return true; } AUKN_SYM void SerializeToken(ParsableTag type, const ParseValue &value, AuString &str) { AuString temp = value.string; switch (type) { case ParsableTag::kParseUInt: { str += AuToString(value.primitive.uint); break; } case ParsableTag::kParseSInt: { str += AuToString(value.primitive.sint); break; } case ParsableTag::kParseNumber: { str += AuToString(value.primitive.number); break; } case ParsableTag::kParseVec3: { str += AuToString(value.primitive.vec3[0]) + " "; str += AuToString(value.primitive.vec3[1]) + " "; str += AuToString(value.primitive.vec3[2]); break; } case ParsableTag::kParseVec4: { str += AuToString(value.primitive.vec4[0]) + " "; str += AuToString(value.primitive.vec4[1]) + " "; str += AuToString(value.primitive.vec4[2]) + " "; str += AuToString(value.primitive.vec4[3]); break; } case ParsableTag::kParseString: { AuReplaceAll(temp, "\\", "\\\\"); AuReplaceAll(temp, "\"", "\\\""); str += "\"" + temp + "\""; break; } case ParsableTag::kParseStringVararg: { AuReplaceAll(temp, "\n", "\\\n"); str += temp; break; } case ParsableTag::kParseUUID: { str += uuids::to_string(value.UUID); break; } case ParsableTag::kParseBoolean: { if (value.primitive.boolean) { str += "true"; } else { str += "false"; } break; } default: SysPanic("Invalid consume tag {}", (AuUInt)type); } } AUKN_SYM void Serialize(const ParsedObject &structure, AuString &ret) { for (auto &parsed : structure) { if (parsed.isArray) { if (ret.size()) { ret += " "; } ret += AuToString(parsed.count); } bool isArray = parsed.count > 1 || parsed.isArray; for (int i = 0; ((i < parsed.count)); i++) { if (ret.size()) { ret += " "; } ParseValueEx parsedSingle = {}; ParseResult nestedresult = {}; switch (parsed.tag) { case ParsableTag::kParseUInt: case ParsableTag::kParseSInt: case ParsableTag::kParseString: case ParsableTag::kParseStringVararg: case ParsableTag::kParseNumber: case ParsableTag::kParseBoolean: case ParsableTag::kParseUUID: case ParsableTag::kParseVec3: case ParsableTag::kParseVec4: { SerializeToken(parsed.tag, !isArray ? parsed.value.single : parsed.value.array[i], ret); break; } case ParsableTag::kParseObject: { Serialize(!isArray ? parsed.value.single.object : parsed.value.array[i].object, ret); parsedSingle.object = nestedresult.result; break; } default: SysPanic("Invalid emit tag {}", (AuUInt)parsed.tag); } } } } AUKN_SYM AuResult ParseUInt(const char *begin, const char *&end) { AuUInt temp{}; if (!ParseInt(begin, end, temp)) { return {}; } return temp; } AUKN_SYM AuResult ParseSInt(const char *begin, const char *&end) { AuSInt temp{}; if (!ParseInt(begin, end, temp)) { return {}; } return temp; } AUKN_SYM AuResult ParseUInt16(const char *begin, const char *&end) { AuUInt temp{}; if (!ParseInt16(begin, end, temp)) { return {}; } return temp; } AUKN_SYM AuResult ParseSInt16(const char *begin, const char *&end) { AuSInt temp{}; if (!ParseInt16(begin, end, temp)) { return {}; } return temp; } AUKN_SYM AuString StringifySInt16(AuInt64 in, bool bZeroX) { if (bZeroX) { auto str = AuString(fmt::format("{0:#X}", in)); if (str.size() > 2) { if (str[1] == 'X') { str[1] = 'x'; } else if (str.size() > 3 && str[2] == 'X') { str[2] = 'x'; } } return str; } else { return AuString(fmt::format("{:X}", in)); } } AUKN_SYM AuString StringifyUInt16(AuUInt64 in, bool bZeroX) { if (bZeroX) { auto str = AuString(fmt::format("{0:#X}", in)); if (str.size() > 2) { if (str[1] == 'X') { str[1] = 'x'; } else if (str.size() > 3 && str[2] == 'X') { str[2] = 'x'; } } return str; } else { return AuString(fmt::format("{:X}", in)); } } AUKN_SYM AuResult ParseSInt16(const char *begin) { const auto end = begin + ::strlen(begin); const char *didntFuckingAskTYVM {end }; auto res = ParseSInt16(begin, didntFuckingAskTYVM); if (!res.has_value()) { return {}; } if (didntFuckingAskTYVM != end) { return {}; } return res; } AUKN_SYM AuResult ParseSInt16(const AuROString &str) { const char *didntFuckingAskTYVM { &str[str.size()] }; auto res = ParseSInt16(str.data(), didntFuckingAskTYVM); if (!res.has_value()) { return {}; } if (didntFuckingAskTYVM != &str[str.size()]) { return {}; } return res; } AUKN_SYM AuResult ParseUInt16(const char *begin) { const auto end = begin + ::strlen(begin); const char *didntFuckingAskTYVM { end }; auto res = ParseUInt16(begin, didntFuckingAskTYVM); if (!res.has_value()) { return {}; } if (didntFuckingAskTYVM != end) { return {}; } return res; } AUKN_SYM AuResult ParseUInt16(const AuROString &str) { const char *didntFuckingAskTYVM { &str[str.size()] }; auto res = ParseUInt16(str.data(), didntFuckingAskTYVM); if (!res.has_value()) { return {}; } if (didntFuckingAskTYVM != &str[str.size()]) { return {}; } return res; } AUKN_SYM AuResult ParseSInt(const AuROString &str) { const char *didntFuckingAskTYVM { &str[str.size()] }; auto res = ParseSInt(str.data(), didntFuckingAskTYVM); if (!res.has_value()) { return {}; } if (didntFuckingAskTYVM != &str[str.size()]) { return {}; } return res; } AUKN_SYM AuResult ParseSInt(const char *begin) { const auto end = begin + ::strlen(begin); const char *didntFuckingAskTYVM { end }; auto res = ParseSInt(begin, didntFuckingAskTYVM); if (!res.has_value()) { return {}; } if (didntFuckingAskTYVM != end) { return {}; } return res; } AUKN_SYM AuResult ParseUInt(const AuROString &str) { const char *didntFuckingAskTYVM { &str[str.size()] }; auto res = ParseUInt(str.data(), didntFuckingAskTYVM); if (!res.has_value()) { return {}; } if (didntFuckingAskTYVM != &str[str.size()]) { return {}; } return res; } AUKN_SYM AuResult ParseUInt(const char *begin) { const auto end = begin + ::strlen(begin); const char *didntFuckingAskTYVM { end }; auto res = ParseUInt(begin, didntFuckingAskTYVM); if (!res.has_value()) { return {}; } if (didntFuckingAskTYVM != end) { return {}; } return res; } }