AuroraRuntime/Source/Parse/Parser.cpp
Reece cf70f0d45c [*/+/-] MEGA COMMIT. ~2 weeks compressed.
The intention is to quickly improve and add util apis, enhance functionality given current demands, go back to the build pipeline, finish that, publish runtime tests, and then use what we have to go back to to linux support with a more stable api.

[+] AuMakeSharedArray
[+] Technet ArgvQuote
[+] Grug subsystem (UNIX signal thread async safe ipc + telemetry flusher + log flusher.)
[+] auEndianness -> Endian swap utils
[+] AuGet<N>(...)
[*] AUE_DEFINE conversion for
        ECompresionType, EAnsiColor, EHashType, EStreamError, EHexDump
[+] ConsoleMessage ByteBuffer serialization
[+] CmdLine subsystem for parsing command line arguments and simple switch/flag checks
[*] Split logger from console subsystem
[+] StartupParameters -> A part of a clean up effort under Process
[*] Refactor SysErrors header + get caller hack
[+] Atomic APIs
[+] popcnt
[+] Ring Buffer sink
[+] Added more standard errors
        Catch,
        Submission,
        LockError,
        NoAccess,
        ResourceMissing,
        ResourceLocked,
        MalformedData,
        InSandboxContext,
        ParseError

[+] Added ErrorCategorySet, ErrorCategoryClear, GetStackTrace
[+] IExitSubscriber, ETriggerLevel
[*] Write bias the high performance RWLockImpl read-lock operation operation
[+] ExitHandlerAdd/ExitHandlerRemove (exit subsystem)
[*] Updated API style
        Digests
[+] CpuId::CpuBitCount
[+] GetUserProgramsFolder
[+] GetPackagePath
[*] Split IStreamReader with an inl file
[*] BlobWriter/BlobReader/BlobArbitraryReader can now take shared pointers to bytebuffers. default constructor allocates a new scalable bytebuffer
[+] ICharacterProvider
[+] ICharacterProviderEx
[+] IBufferedCharacterConsumer
[+] ProviderFromSharedString
[+] ProviderFromString
[+] BufferConsumerFromProvider
[*] Parse Subsystem uses character io bufferer
[*] Rewritten NT's high perf semaphore to use userland SRW/ConVars [like mutex, based on generic semaphore]
[+] ByteBuffer::ResetReadPointer
[*] Bug fix bytebuffer base not reset on free and some scaling issues
[+] ProcessMap -> Added kSectionNameStack, kSectionNameFile, kSectionNameHeap for Section
[*] ProcessMap -> Refactor Segment to Section. I was stupid for keeping a type conflict hack API facing
[+] Added 64 *byte* fast RNG seeds
[+] File Advisorys/File Lock Awareness
[+] Added extended IAuroraThread from OS identifier caches for debug purposes
[*] Tweaked how memory is reported on Windows. Better consistency of what values mean across functions.
[*] Broke AuroraUtils/Typedefs out into a separate library
[*] Update build script
[+] Put some more effort into adding detail to the readme before rewriting it, plus, added some media
[*] Improved public API documentation
[*] Bug fix `SetConsoleCtrlHandler`
[+] Locale TimeDateToFileNameISO8601
[+] Console config stdOutShortTime
[*] Begin using internal UTF8/16 decoders when platform support isnt available (instead of stl)
[*] Bug fixes in decoders
[*] Major bug fix, AuMax
[+] RateLimiter
[+] Binary file sink
[+] Log directory sink
[*] Data header usability (more operators)
[+] AuRemoveRange
[+] AuRemove
[+] AuTryRemove
[+] AuTryRemoveRange
[+] auCastUtils
[+] Finish NewLSWin32Source
[+] AuTryFindByTupleN, AuTryRemoveByTupleN
[+] Separated AuRead/Write types, now in auTypeUtils
[+] Added GetPosition/SetPosition to FileWriter
[*] Fix stupid AuMin in place of AuMax in SpawnThread.Unix.Cpp
[*] Refactored Arbitrary readers to SeekingReaders (as in, they could be atomic and/or parallelized, and accept an arbitrary position as a work parameter -> not Seekable, as in, you can simply set the position)
[*] Hack back in the sched deinit
[+] File AIO loop source interop
[+] Begin to prototype a LoopQueue object I had in mind for NT, untested btw
[+] Stub code for networking
[+] Compression BaseStream/IngestableStreamBase
[*] Major: read/write locks now support write-entrant read routines.
[*] Compression subsystem now uses the MemoryView concept
[*] Rewrite the base stream compressions, made them less broken
[*] Update hashing api
[*] WriterTryGoForward and ReaderTryGoForward now revert to the previous relative index instead of panicing
[+] Added new AuByteBuffer apis
    Trim, Pad, WriteFrom, WriteString, [TODO: ReadString]
[+] Added ByteBufferPushReadState
[+] Added ByteBufferPushWriteState
[*] Move from USC-16 to full UTF-16. Win32 can handle full UTF-16.
[*] ELogLevel is now an Aurora enum
[+] Raised arbitrary limit in header to 255, the max filter buffer
[+] Explicit GZip support
[+] Explicit Zip support
[+] Added [some] compressors

et al
2022-02-17 00:11:40 +00:00

654 lines
19 KiB
C++

/***
Copyright (C) 2021 J Reece Wilson (a/k/a "Reece"). All rights reserved.
File: Parser.cpp
Date: 2021-6-12
Author: Reece
***/
#include <Source/RuntimeInternal.hpp>
#include "Parser.hpp"
namespace Aurora::Parse
{
AUKN_SYM void VaildateStructure(const ParseObject &object)
{
}
struct ParseContext
{
IO::Character::BufferConsumerFromProviderUnique_t buffer;
ParseContext(const AuSPtr<IO::Character::ICharacterProvider> &getc)
{
buffer = IO::Character::BufferConsumerFromProviderUnique(getc);
}
bool Next(AuUInt8 &c)
{
return buffer->Next(c);
}
bool Peek(AuUInt8 &c)
{
return buffer->PeekNext(c);
}
};
template<size_t Z>
static AuFunction<bool(AuUInt8)> ContainedHerein(const AuUInt8(&arry)[Z])
{
return [=](AuUInt8 c) -> bool
{
for (int i = 0; i < Z; i++)
{
if (arry[i] == c)
{
return true;
}
}
return false;
};
}
template<size_t Z>
static AuFunction<bool(AuUInt8)> IsTerminating(ParseState &state, const AuUInt8(&arry)[Z])
{
return [&](AuUInt8 c) -> bool
{
for (int i = 0; i < Z; i++)
{
if (arry[i] == c)
{
return true;
}
}
for (int i = 0; i < state.countOfTokens; i++)
{
if (state.additionalTokens[i] == c)
{
state.lastTokenAdditional = i;
return true;
}
}
return false;
};
}
// oh god just dont think about this tokenizer too much
bool ConsumeStringifedToken(ParseState &state, ParseContext &context, ParsableTag type, AuString &out)
{
static unsigned char terminatingChars[] = { '\n', '\00' };
static unsigned char whiteChars[] = { ' ' };
static unsigned char illegalCharacters[] = { '\r' }; //reeedows
auto isString = type == ParsableTag::kParseString;
auto isvararg = type == ParsableTag::kParseStringVararg;
bool stringLevel = false;
bool escapedNewLine = false;
bool escapedQuote = false;
auto isTerminating = IsTerminating(state, terminatingChars);
static auto isSpace = ContainedHerein(whiteChars);
static auto isIgnore = ContainedHerein(illegalCharacters);
while (true)
{
AuUInt8 cur;
AuUInt8 peek;
// consume character from the stream
if (!context.Next(cur))
{
break;
}
// some characters should be considered universally illegal such as nasty unicode spaces,
// carriage returns, and any other nasty crash exploit depending characters
// DO NOT USE THIS TO ESCAPE ENGINE TAGS/COLOR ESCAPES/WHATEVER THE FUCK
if (isIgnore(cur))
{
continue;
}
// Is end of line/end of string
if (isTerminating(cur))
{
// Make sure we aren't encapsulated by quotation marks
if ((!stringLevel) && (!AuExchange(escapedNewLine, false)))
{
state.hasLastToken = true;
state.lastTokenCharacter = cur;
break;
}
}
// if we hit a space in a string, assume we're at the end of the token, unless we're parsing
// a string in quotation marks or are consuming the entire line (ParsableTag::kParseStringVararg)
if ((isSpace(cur)) && (!stringLevel) && (!isvararg))
{
break;
}
// if the string starts with a quotation mark, set stringLevel to true
// TODO: I dont remember the parse rules. Check the old tests. Should this be AuStartsWith?
if ((cur == '"') && (isString) && (out.empty()))
{
stringLevel = true;
continue;
}
auto peekStatus = context.Peek(peek);
bool isPeekterminating = isTerminating(peek);
// prepare escape characters
if ((peekStatus) && (cur == '\\'))
{
// check \ \n pair + is var arg
if ((isPeekterminating) && (isvararg))
{
escapedNewLine = true;
continue;
}
// escape character for "'s in strings
if ((isString) && (stringLevel)) // && (peek == '\"'))
{
escapedQuote = true;
continue;
}
}
// see above
if ((isString) && (AuExchange(escapedQuote, false)))
{
out += cur;
continue;
}
// match the ending '"' character in the token to terminate strings containing spaces
// do not make this more ambiguous by removing the **else if**.
// this is mutally exclusive with the above block and must be kept in this order
else if (
(cur == '"') && (isString) && (stringLevel) //
&& (((peekStatus && (isPeekterminating || isSpace(peek))) || (!peekStatus)))) // expected to fail
{
stringLevel = false;
continue;
}
else
{
// otherwise emit
out += cur;
}
}
//SysAssert(!stringLevel, "Parsed tag of string type must end with \", got {}", out);
if (stringLevel)
{
AuLogWarn("Parsed tag of string type must end with \", got {}", out);
return false;
}
//AuLogDbg("returned {} {}", out, count != 0);
return out.size() != 0;
}
template<typename Func, typename Res>
static bool ScrewExceptions_2(Func func, const AuString &in, Res &out)
{
try
{
out = func(in.c_str(), nullptr);
return true;
}
catch (...)
{
return false;
}
}
template <typename T> int SignBit(T val)
{
return (T(0) < val) - (val < T(0));
}
template <typename T>
bool ParseInt(const AuString &in, T &out)
{
T res = 0;
T sign = 1;
out = 0;
auto itr = in.begin();
if constexpr (AuIsSame_v<T, AuSInt>)
{
if (itr != in.end())
{
if (*itr == '-')
{
itr++;
sign = -1;
}
}
}
for (;
(itr != in.end()) &&
(*itr != '\0');
itr++)
{
auto c = *itr;
if ((c < '0') || (c > '9'))
{
return false;
}
auto old = res;
res *= 10;
res += static_cast<AuUInt>(*itr) - static_cast<AuUInt>('0');
if constexpr (AuIsSame_v<T, AuUInt>)
{
if (old > res)
{
SysPushErrorSyntaxError("Unsigned integer overflow: {}", in);
return false;
}
}
else if constexpr (AuIsSame_v<T, AuSInt>)
{
if (SignBit(old) != SignBit(res))
{
SysPushErrorSyntaxError("Signed integer overflow: {}", in);
return false;
}
}
}
out = res * sign;
return true;
}
template <typename T>
bool ParseUInt(const AuString &in, T &out)
{
auto temp = AuUInt{};
out = 0;
if (!ParseInt<AuUInt>(in, temp))
{
return false;
}
if (temp > std::numeric_limits<T>::max())
{
return false;
}
out = temp;
return true;
}
template <typename T>
bool ParseSInt(const AuString &in, T &out)
{
auto temp = AuSInt{};
out = 0;
if (!ParseInt<AuSInt>(in, temp))
{
return false;
}
if (temp > std::numeric_limits<T>::max())
{
return false;
}
if (temp < std::numeric_limits<T>::min())
{
return false;
}
out = temp;
return true;
}
static bool ConsumeTokenPrimitiveish(ParseState &state, ParseContext &context, ParsableTag type, ParseValue &out)
{
AuString str;
if (!ConsumeStringifedToken(state, context, type, str))
{
return false;
}
if ((type != ParsableTag::kParseString) && (str.empty()))
{
return false;
}
AuOptional<uuids::uuid> uuid;
switch (type)
{
case ParsableTag::kParseUInt:
{
return ParseUInt(str, out.primitive.uint);
}
case ParsableTag::kParseSInt:
{
return ParseSInt(str, out.primitive.sint);
}
case ParsableTag::kParseNumber:
{
return ScrewExceptions_2(static_cast<double(&)(const std::string &, std::size_t *)>(std::stod), str, out.primitive.number);
}
case ParsableTag::kParseString:
case ParsableTag::kParseStringVararg:
{
out.string = str;
break;
}
case ParsableTag::kParseUUID:
{
uuid = uuids::uuid::from_string(str);
if (!uuid.has_value())
{
AuLogWarn("Parse Error: invalid UUID {}", str);
return false;
}
out.UUID = uuid.value();
break;
}
case ParsableTag::kParseBoolean:
{
if ((str == "0") || (stricmp(str.c_str(), "false") == 0) || (stricmp(str.c_str(), "no") == 0))
{
out.primitive.boolean = false;
}
else if ((str == "1") || (stricmp(str.c_str(), "true") == 0) || (stricmp(str.c_str(), "yes") == 0))
{
out.primitive.boolean = true;
}
else
{
AuLogWarn("Parsed tag of boolean type wasn't parsable given the English string {}", str);
return false;
}
break;
}
default:
SysPanic("Invalid consume tag {}", type);
}
return true;
}
static bool ConsumeToken(ParseState &state, ParseContext &context, ParsableTag type, ParseValue &out)
{
ParseValue temp;
#define ADD_VECTOR_VAL(idx, memberType) \
if (!ConsumeTokenPrimitiveish(state, context, ParsableTag::kParseNumber, temp)) \
return false; \
out.primitive.memberType[idx] = temp.primitive.number;
switch (type)
{
case ParsableTag::kParseVec3:
{
ADD_VECTOR_VAL(0, vec3);
ADD_VECTOR_VAL(1, vec3);
ADD_VECTOR_VAL(2, vec3);
return true;
}
case ParsableTag::kParseVec4:
{
ADD_VECTOR_VAL(0, vec4);
ADD_VECTOR_VAL(1, vec4);
ADD_VECTOR_VAL(2, vec4);
ADD_VECTOR_VAL(3, vec4);
return true;
}
default:
{
return ConsumeTokenPrimitiveish(state, context, type, out);
}
}
#undef ADD_VECTOR_VAL
}
AUKN_SYM bool ConsumeToken(ParsableTag type, const AuSPtr<Aurora::IO::Character::ICharacterProvider> &getc, ParseValue &out)
{
ParseState state(getc);
ParseContext context(state.stream);
return ConsumeToken(state, context, type, out);
}
static bool Parse(ParseState &state, ParseContext &context, const ParseObject &structure, ParseResult &result)
{
for (auto &parseBit : structure)
{
ParsedBit parsed = {};
bool ok;
parsed.tag = parseBit.tag;
AuMach arrayLength = 1;
if (parseBit.array)
{
ParseValue arrayLengthBit = {};
if (!ConsumeToken(state, context, ParsableTag::kParseUInt, arrayLengthBit))
{
AuLogWarn("Couldn't consume array length, label: {}, tag {}", parseBit.label, parseBit.tag);
return false;
}
arrayLength = arrayLengthBit.primitive.uint;
}
parsed.count = 0;
for (int i = 0; ((i < arrayLength) || (parseBit.vararg)); i++)
{
ParseValueEx parsedSingle = {};
ParseResult nestedresult = {};
switch (parseBit.tag)
{
case ParsableTag::kParseUInt:
case ParsableTag::kParseSInt:
case ParsableTag::kParseString:
case ParsableTag::kParseStringVararg:
case ParsableTag::kParseNumber:
case ParsableTag::kParseBoolean:
case ParsableTag::kParseUUID:
case ParsableTag::kParseVec3:
case ParsableTag::kParseVec4:
{
ok = ConsumeToken(state, context, parseBit.tag, parsedSingle);
break;
}
case ParsableTag::kParseObject:
{
// TODO: Although this should never result in a stack overflow, i'd rather return a request to prase than recursively call the same function
// Bah
ok = Parse(state, context, parseBit.objectParse, nestedresult);
// TODO: debug info
parsedSingle.object = nestedresult.result;
break;
}
default:
SysPanic("Invalid consume tag {} for {}", parseBit.tag, parseBit.label);
}
if (!ok)
{
if ((parseBit.optional) || (parseBit.vararg))
{
break;
}
AuLogWarn("Syntax error around: label: {}, tag {}", parseBit.label, parseBit.tag);
return false;
}
if (parseBit.vararg || parseBit.array)
{
parsed.count++;
parsed.value.array.push_back(parsedSingle);
parsed.isArray = true;
}
else
{
parsed.isArray = false;
parsed.count = 1;
parsed.value.single = parsedSingle;
}
}
// do not add if an optional bit was not serialized
if (parsed.count != 0)
{
result.result.push_back(parsed);
}
}
result.syntaxError = "No-Debug";
result.debugTree = "No-Debug";
return true;
}
AUKN_SYM bool Parse(ParseState &state, const ParseObject &structure, ParseResult &result)
{
ParseContext context(state.stream);
return Parse(state, context, structure, result);
}
AUKN_SYM void SerializeToken(ParsableTag type, const ParseValue &value, AuString &str)
{
AuString temp = value.string;
switch (type)
{
case ParsableTag::kParseUInt:
{
str += AuToString(value.primitive.uint);
break;
}
case ParsableTag::kParseSInt:
{
str += AuToString(value.primitive.sint);
break;
}
case ParsableTag::kParseNumber:
{
str += AuToString(value.primitive.number);
break;
}
case ParsableTag::kParseVec3:
{
str += AuToString(value.primitive.vec3[0]) + " ";
str += AuToString(value.primitive.vec3[1]) + " ";
str += AuToString(value.primitive.vec3[2]);
break;
}
case ParsableTag::kParseVec4:
{
str += AuToString(value.primitive.vec4[0]) + " ";
str += AuToString(value.primitive.vec4[1]) + " ";
str += AuToString(value.primitive.vec4[2]) + " ";
str += AuToString(value.primitive.vec4[3]);
break;
}
case ParsableTag::kParseString:
{
AuReplaceAll(temp, "\\", "\\\\");
AuReplaceAll(temp, "\"", "\\\"");
str += "\"" + temp + "\"";
break;
}
case ParsableTag::kParseStringVararg:
{
AuReplaceAll(temp, "\n", "\\\n");
str += temp;
break;
}
case ParsableTag::kParseUUID:
{
str += uuids::to_string(value.UUID);
break;
}
case ParsableTag::kParseBoolean:
{
if (value.primitive.boolean)
{
str += "true";
}
else
{
str += "false";
}
break;
}
default:
SysPanic("Invalid consume tag {}", type);
}
}
AUKN_SYM void Serialize(const ParsedObject &structure, AuString &ret)
{
for (auto &parsed : structure)
{
if (parsed.isArray)
{
ret += " ";
ret += AuToString(parsed.count);
}
bool isArray = parsed.count > 1 || parsed.isArray;
for (int i = 0; ((i < parsed.count)); i++)
{
ret += " ";
ParseValueEx parsedSingle = {};
ParseResult nestedresult = {};
switch (parsed.tag)
{
case ParsableTag::kParseUInt:
case ParsableTag::kParseSInt:
case ParsableTag::kParseString:
case ParsableTag::kParseStringVararg:
case ParsableTag::kParseNumber:
case ParsableTag::kParseBoolean:
case ParsableTag::kParseUUID:
case ParsableTag::kParseVec3:
case ParsableTag::kParseVec4:
{
SerializeToken(parsed.tag, !isArray ? parsed.value.single : parsed.value.array[i], ret);
break;
}
case ParsableTag::kParseObject:
{
Serialize(!isArray ? parsed.value.single.object : parsed.value.array[i].object, ret);
parsedSingle.object = nestedresult.result;
break;
}
default:
SysPanic("Invalid emit tag {}", parsed.tag);
}
}
}
ret = ret.substr(1);
}
}