AuroraRuntime/Include/Aurora/Parse/Parser.hpp
Jamie Reece Wilson 83f34b0c47 [*] I was right. String views are [mostly] pointless (*)
03:28:55:638  17>2 of 53388 functions (<0.1%) were compiled, the rest were copied from previous compilation.
03:28:55:638  17>  0 functions were new in current compilation
03:28:55:638  17>  65 functions had inline decision re-evaluated but remain unchanged
03:28:56:749  17>Finished generating code

the header of const AuString & is the same as std::string_view therefore nothing changes. in fact, we still need to alloc strings a bunch of times for a zero terminated string. worse, <c++20 always allocs each time we want to access a hashmap with o(1) lookup, making small hashmaps kinda pointless when we always have to alloc+copy (thx std)

perhaps this will help some language binders
2024-04-19 05:58:08 +01:00

265 lines
8.9 KiB
C++

/***
Copyright (C) 2021 J Reece Wilson (a/k/a "Reece"). All rights reserved.
File: Parser.hpp
Date: 2021-6-9
Author: Reece
***/
#pragma once
#include <Aurora/Data/Data.hpp>
#include <Aurora/IO/Character/ICharacterProvider.hpp>
#include <Aurora/IO/Character/ICharacterProviderEx.hpp>
#include <Aurora/IO/Character/IBufferedCharacterConsumer.hpp>
#include <Aurora/IO/Character/Providers.hpp>
namespace Aurora::Parse
{
enum class ParsableTag
{
kParseUInt = (const int)Data::EDataType::kTypeUInt,
kParseSInt = (const int)Data::EDataType::kTypeSInt,
kParseNumber = (const int)Data::EDataType::kTypeNumber,
kParseString = (const int)Data::EDataType::kTypeString,
kParseBoolean = (const int)Data::EDataType::kTypeBoolean,
kParseUUID = (const int)Data::EDataType::kTypeUUID,
kParseVec3 = (const int)Data::EDataType::kTypeVec3,
kParseVec4 = (const int)Data::EDataType::kTypeVec4,
kParseObject,
kParseStringVararg
};
struct ParseBit;
struct ParsedBit;
using ParseObject = AuList<ParseBit>;
using ParsedObject = AuList<ParsedBit>;
using ParsePrimativeValue = Data::PrimitiveValue;
using ParseValue = Data::Value;
struct ParseValueEx : ParseValue
{
ParsedObject object;
};
struct ParseBit
{
ParseBit(const ParseBit &in) = default;
ParseBit(ParsableTag tag, const AuString &label) : tag(tag), label(label)
{}
ParseBit(ParsableTag tag, const AuString &label, bool array, bool optional, bool _vararg) : tag(tag), label(label), array(array), optional(optional), vararg(_vararg)
{}
ParseBit(ParsableTag tag) : tag(tag)
{}
ParseBit(ParsableTag tag, bool array, bool optional, bool _vararg) : tag(tag), array(array), optional(optional), vararg(_vararg)
{}
ParseBit(ParseObject _ObjectParse) : tag(ParsableTag::kParseObject), objectParse(_ObjectParse)
{}
ParseBit(ParseObject _ObjectParse, const AuString &label) : tag(ParsableTag::kParseObject), objectParse(_ObjectParse), label(label)
{}
// marks this "parse bit" and all following part bits optional
bool optional = false;
// if true, the lexer yoinks a length prefix token before iterating over array length
bool array = false;
// if true, the lexer parses tag or object parse until EOS
bool vararg = false;
// parse defintion
ParsableTag tag;
ParseObject objectParse;
// optional:
AuString label = "";
// parse object
// {...}
// single parse bit:
// <tag-label: value>
// optional:
// <max-number: value> <min-number: value> (<min-number: value>)
// var arg:
// <max-number: value> <min-number: value> (<min-number: value> ...)
// arrays
// <max-number: value> <min-number: value> [<array count> <min-number: value>]
};
struct ParsedBit
{
inline ParsedBit()
{}
inline ParsedBit(ParsableTag tag) : tag(tag)
{}
AuMach count {};
ParsableTag tag;
bool isArray {};
struct
{
ParseValueEx single;
AuList<ParseValueEx> array;
} value;
};
struct ParseResult
{
AuString syntaxError;
AuString debugTree;
ParsedObject result;
};
struct ParseState
{
inline ParseState(const AuSPtr<Aurora::IO::Character::IBufferedCharacterConsumer> &stream) : stream(stream)
{}
inline ParseState(const AuSPtr<Aurora::IO::Character::ICharacterProvider> &stream) : stream(Aurora::IO::Character::BufferConsumerFromProviderShared(stream))
{}
inline ParseState(const AuSPtr<Aurora::IO::Character::ICharacterProviderEx> &stream) : stream(Aurora::IO::Character::BufferConsumerFromProviderShared(stream))
{}
AuSPtr<Aurora::IO::Character::IBufferedCharacterConsumer> stream;
AuUInt8 *additionalTokens {};
AuUInt16 countOfTokens {};
bool hasLastToken {};
AuUInt8 lastTokenCharacter {};
AuMach lastTokenAdditional {};
};
AUKN_SYM void VaildateStructure(const ParseObject &object);
AUKN_SYM bool ConsumeToken(ParsableTag type, const AuSPtr<Aurora::IO::Character::IBufferedCharacterConsumer> &getc, ParseValue &out);
AUKN_SYM bool ConsumeToken(ParsableTag type, const AuSPtr<Aurora::IO::Character::ICharacterProvider> &getc, ParseValue &out);
AUKN_SYM bool ConsumeToken(ParseState &state, ParsableTag type, ParseValue &out);
static bool ConsumeToken(ParsableTag type, const AuROString &str, AuMach &index, ParseValueEx &out)
{
auto base = IO::Character::ProviderFromStringUnique(str, index);
if (!base) return false;
auto strStream = IO::Character::BufferConsumerFromProviderShared(AuUnsafeRaiiToShared(base));
if (!strStream) return false;
if (!ConsumeToken(type, strStream, out)) return false;
index = base->GetPosition() - strStream->HasBufferedNext();
return true;
}
static bool ConsumeToken(ParsableTag type, const AuROString &str, ParseValueEx &out)
{
auto base = IO::Character::ProviderFromStringUnique(str);
if (!base) return false;
auto strStream = IO::Character::BufferConsumerFromProviderShared(AuUnsafeRaiiToShared(base));
if (!strStream) return false;
return ConsumeToken(type, strStream, out);
}
AUKN_SYM bool Parse(ParseState &state, const ParseObject &structure, ParseResult &result);
static bool Parse(ParseResult &result, const ParseObject &structure, const AuROString &str, AuMach &index)
{
auto base = IO::Character::ProviderFromStringUnique(str);
if (!base) return false;
auto strStream = IO::Character::BufferConsumerFromProviderShared(AuUnsafeRaiiToShared(base));
if (!strStream) return false;
ParseState state(strStream);
if (!Parse(state, structure, result)) return false;
index = base->GetPosition() - state.stream->HasBufferedNext();
return true;
}
static bool Parse(ParseResult &result, const ParseObject &structure, const AuROString &str)
{
auto base = IO::Character::ProviderFromStringUnique(str);
if (!base) return false;
auto strStream = IO::Character::BufferConsumerFromProviderShared(AuUnsafeRaiiToShared(base));
if (!strStream) return false;
ParseState state(strStream);
return Parse(state, structure, result);
}
AUKN_SYM AuList<ParseValueEx> ConsumeTokens(ParsableTag type, const AuSPtr<Aurora::IO::Character::ICharacterProvider> &getc);
AUKN_SYM AuList<ParseValueEx> ConsumeTokens(ParsableTag type, const AuSPtr<Aurora::IO::Character::IBufferedCharacterConsumer> &getc);
AUKN_SYM void SerializeToken(ParsableTag type, const ParseValue &value, AuString &str);
AUKN_SYM void Serialize(const ParsedObject &structure, AuString &ret);
AUKN_SYM AuString StringifyUInt16(AuUInt64 in, bool bZeroX = true);
AUKN_SYM AuString StringifySInt16(AuInt64 in, bool bZeroX = true);
/**
* @brief Parse base10 null string
*/
AUKN_SYM AuResult<AuSInt> ParseSInt(const char *begin, const char *&end);
/**
* @brief Parse base10 null string
*/
AUKN_SYM AuResult<AuUInt> ParseUInt(const char *begin, const char *&end);
/**
* @brief Parse base16 string with optional 0x suffix and 'h' ending
*/
AUKN_SYM AuResult<AuSInt> ParseSInt16(const char *begin, const char *&end);
/**
* @brief Parse base16 string with optional 0x suffix and 'h' ending
*/
AUKN_SYM AuResult<AuUInt> ParseUInt16(const char *begin, const char *&end);
/**
* @brief Parse base16 null terminated string with optional 0x suffix and 'h' ending
*/
AUKN_SYM AuResult<AuSInt> ParseSInt16(const char *begin);
/**
* @brief Parse base16 abi terminated string with optional 0x suffix and 'h' ending
*/
AUKN_SYM AuResult<AuSInt> ParseSInt16(const AuROString &str);
/**
* @brief Parse base16 null terminated string with optional 0x suffix and 'h' ending
*/
AUKN_SYM AuResult<AuUInt> ParseUInt16(const char *begin);
/**
* @brief Parse base16 abi terminated string with optional 0x suffix and 'h' ending
*/
AUKN_SYM AuResult<AuUInt> ParseUInt16(const AuROString &str);
/**
* @brief Parse abi null terminated string
*/
AUKN_SYM AuResult<AuSInt> ParseSInt(const AuROString &str);
/**
* @brief Parse base10 null terminated string
*/
AUKN_SYM AuResult<AuSInt> ParseSInt(const char *begin);
/**
* @brief Parse base10 abi terminated string
*/
AUKN_SYM AuResult<AuUInt> ParseUInt(const AuROString &str);
/**
* @brief Parse base10 null terminated string
*/
AUKN_SYM AuResult<AuUInt> ParseUInt(const char *begin);
}