mirror of
https://github.com/ToruNiina/toml11.git
synced 2024-11-09 22:30:07 +00:00
91966a6917
At the earlier stage of the development, I thought that it is useful if lexer-combinators generate error messages, because by doing this, parser would not need to generate an error message. But now it turned out that to show an appropriate error message, parser need to generate according to the context. And almost all the messages from lexer are discarded. So I added another parameter to lexer-combinator to suppress error message generation. In the future, we may want to remove messages completely from lexers, but currently I will keep it. Removing those unused message generation makes the parsing process faster.
393 lines
11 KiB
C++
393 lines
11 KiB
C++
// Copyright Toru Niina 2017.
|
|
// Distributed under the MIT License.
|
|
#ifndef TOML11_COMBINATOR_HPP
|
|
#define TOML11_COMBINATOR_HPP
|
|
#include "traits.hpp"
|
|
#include "result.hpp"
|
|
#include "utility.hpp"
|
|
#include "region.hpp"
|
|
#include <type_traits>
|
|
#include <iterator>
|
|
#include <limits>
|
|
#include <array>
|
|
#include <iomanip>
|
|
#include <cstdio>
|
|
#include <cassert>
|
|
#include <cctype>
|
|
|
|
// they scans characters and returns region if it matches to the condition.
|
|
// when they fail, it does not change the location.
|
|
// in lexer.hpp, these are used.
|
|
|
|
namespace toml
|
|
{
|
|
namespace detail
|
|
{
|
|
|
|
// to output character as an error message.
|
|
inline std::string show_char(const char c)
|
|
{
|
|
// It supress an error that occurs only in Debug mode of MSVC++ on Windows.
|
|
// I'm not completely sure but they check the value of char to be in the
|
|
// range [0, 256) and some of the COMPLETELY VALID utf-8 character sometimes
|
|
// has negative value (if char has sign). So here it re-interprets c as
|
|
// unsigned char through pointer. In general, converting pointer to a
|
|
// pointer that has different type cause UB, but `(signed|unsigned)?char`
|
|
// are one of the exceptions. Converting pointer only to char and std::byte
|
|
// (c++17) are valid.
|
|
if(std::isgraph(*reinterpret_cast<unsigned char const*>(std::addressof(c))))
|
|
{
|
|
return std::string(1, c);
|
|
}
|
|
else
|
|
{
|
|
std::array<char, 5> buf;
|
|
buf.fill('\0');
|
|
const auto r = std::snprintf(
|
|
buf.data(), buf.size(), "0x%02x", static_cast<int>(c) & 0xFF);
|
|
assert(r == buf.size() - 1);
|
|
return std::string(buf.data());
|
|
}
|
|
}
|
|
|
|
template<char C>
|
|
struct character
|
|
{
|
|
static constexpr char target = C;
|
|
|
|
template<typename Cont>
|
|
static result<region<Cont>, std::string>
|
|
invoke(location<Cont>& loc, const bool msg = false)
|
|
{
|
|
static_assert(std::is_same<char, typename Cont::value_type>::value,
|
|
"internal error: container::value_type should be `char`.");
|
|
|
|
if(loc.iter() == loc.end()) {return err("not sufficient characters");}
|
|
const auto first = loc.iter();
|
|
|
|
const char c = *(loc.iter());
|
|
if(c != target)
|
|
{
|
|
if(msg)
|
|
{
|
|
return err(concat_to_string("expected '", show_char(target),
|
|
"' but got '", show_char(c), "'."));
|
|
}
|
|
return err("");
|
|
}
|
|
loc.advance(); // update location
|
|
|
|
return ok(region<Cont>(loc, first, loc.iter()));
|
|
}
|
|
|
|
static std::string pattern() {return show_char(target);}
|
|
};
|
|
template<char C>
|
|
constexpr char character<C>::target;
|
|
|
|
// closed interval [Low, Up]. both Low and Up are included.
|
|
template<char Low, char Up>
|
|
struct in_range
|
|
{
|
|
// assuming ascii part of UTF-8...
|
|
static_assert(Low <= Up, "lower bound should be less than upper bound.");
|
|
|
|
static constexpr char upper = Up;
|
|
static constexpr char lower = Low;
|
|
|
|
template<typename Cont>
|
|
static result<region<Cont>, std::string>
|
|
invoke(location<Cont>& loc, const bool msg = false)
|
|
{
|
|
static_assert(std::is_same<char, typename Cont::value_type>::value,
|
|
"internal error: container::value_type should be `char`.");
|
|
|
|
if(loc.iter() == loc.end()) {return err("not sufficient characters");}
|
|
const auto first = loc.iter();
|
|
|
|
const char c = *(loc.iter());
|
|
if(c < lower || upper < c)
|
|
{
|
|
if(msg)
|
|
{
|
|
return err(concat_to_string("expected character in range "
|
|
"[", show_char(lower), ", ", show_char(upper), "] but got ",
|
|
"'", show_char(c), "'."));
|
|
}
|
|
return err("");
|
|
}
|
|
|
|
loc.advance();
|
|
return ok(region<Cont>(loc, first, loc.iter()));
|
|
}
|
|
|
|
static std::string pattern()
|
|
{
|
|
return concat_to_string("[",show_char(lower),"-",show_char(upper),"]");
|
|
}
|
|
};
|
|
template<char L, char U> constexpr char in_range<L, U>::upper;
|
|
template<char L, char U> constexpr char in_range<L, U>::lower;
|
|
|
|
// keep iterator if `Combinator` matches. otherwise, increment `iter` by 1 char.
|
|
// for detecting invalid characters, like control sequences in toml string.
|
|
template<typename Combinator>
|
|
struct exclude
|
|
{
|
|
template<typename Cont>
|
|
static result<region<Cont>, std::string>
|
|
invoke(location<Cont>& loc, const bool msg = false)
|
|
{
|
|
static_assert(std::is_same<char, typename Cont::value_type>::value,
|
|
"internal error: container::value_type should be `char`.");
|
|
|
|
if(loc.iter() == loc.end()) {return err("not sufficient characters");}
|
|
auto first = loc.iter();
|
|
|
|
auto rslt = Combinator::invoke(loc, msg);
|
|
if(rslt.is_ok())
|
|
{
|
|
loc.reset(first);
|
|
if(msg)
|
|
{
|
|
return err(concat_to_string("invalid pattern (",
|
|
Combinator::pattern(), ") appeared ", rslt.unwrap().str()));
|
|
}
|
|
return err("");
|
|
}
|
|
loc.reset(std::next(first)); // XXX maybe loc.advance() is okay but...
|
|
return ok(region<Cont>(loc, first, loc.iter()));
|
|
}
|
|
|
|
static std::string pattern()
|
|
{
|
|
return concat_to_string("^(", Combinator::pattern(), ')');
|
|
}
|
|
};
|
|
|
|
// increment `iter`, if matches. otherwise, just return empty string.
|
|
template<typename Combinator>
|
|
struct maybe
|
|
{
|
|
template<typename Cont>
|
|
static result<region<Cont>, std::string>
|
|
invoke(location<Cont>& loc, const bool msg = false)
|
|
{
|
|
static_assert(std::is_same<char, typename Cont::value_type>::value,
|
|
"internal error: container::value_type should be `char`.");
|
|
|
|
const auto rslt = Combinator::invoke(loc, msg);
|
|
if(rslt.is_ok())
|
|
{
|
|
return rslt;
|
|
}
|
|
return ok(region<Cont>(loc));
|
|
}
|
|
|
|
static std::string pattern()
|
|
{
|
|
return concat_to_string('(', Combinator::pattern(), ")?");
|
|
}
|
|
};
|
|
|
|
template<typename ... Ts>
|
|
struct sequence;
|
|
|
|
template<typename Head, typename ... Tail>
|
|
struct sequence<Head, Tail...>
|
|
{
|
|
template<typename Cont>
|
|
static result<region<Cont>, std::string>
|
|
invoke(location<Cont>& loc, const bool msg = false)
|
|
{
|
|
static_assert(std::is_same<char, typename Cont::value_type>::value,
|
|
"internal error: container::value_type should be `char`.");
|
|
|
|
const auto first = loc.iter();
|
|
const auto rslt = Head::invoke(loc, msg);
|
|
if(rslt.is_err())
|
|
{
|
|
loc.reset(first);
|
|
return err(rslt.unwrap_err());
|
|
}
|
|
return sequence<Tail...>::invoke(loc, std::move(rslt.unwrap()), first, msg);
|
|
}
|
|
|
|
// called from the above function only, recursively.
|
|
template<typename Cont, typename Iterator>
|
|
static result<region<Cont>, std::string>
|
|
invoke(location<Cont>& loc, region<Cont> reg, Iterator first,
|
|
const bool msg = false)
|
|
{
|
|
const auto rslt = Head::invoke(loc, msg);
|
|
if(rslt.is_err())
|
|
{
|
|
loc.reset(first);
|
|
return err(rslt.unwrap_err());
|
|
}
|
|
reg += rslt.unwrap(); // concat regions
|
|
return sequence<Tail...>::invoke(loc, std::move(reg), first, msg);
|
|
}
|
|
|
|
static std::string pattern()
|
|
{
|
|
return concat_to_string(Head::pattern(), sequence<Tail...>::pattern());
|
|
}
|
|
};
|
|
|
|
template<typename Head>
|
|
struct sequence<Head>
|
|
{
|
|
// would be called from sequence<T ...>::invoke only.
|
|
template<typename Cont, typename Iterator>
|
|
static result<region<Cont>, std::string>
|
|
invoke(location<Cont>& loc, region<Cont> reg, Iterator first,
|
|
const bool msg = false)
|
|
{
|
|
const auto rslt = Head::invoke(loc, msg);
|
|
if(rslt.is_err())
|
|
{
|
|
loc.reset(first);
|
|
return err(rslt.unwrap_err());
|
|
}
|
|
reg += rslt.unwrap(); // concat regions
|
|
return ok(reg);
|
|
}
|
|
static std::string pattern() {return Head::pattern();}
|
|
};
|
|
|
|
template<typename ... Ts>
|
|
struct either;
|
|
|
|
template<typename Head, typename ... Tail>
|
|
struct either<Head, Tail...>
|
|
{
|
|
template<typename Cont>
|
|
static result<region<Cont>, std::string>
|
|
invoke(location<Cont>& loc, const bool msg = false)
|
|
{
|
|
static_assert(std::is_same<char, typename Cont::value_type>::value,
|
|
"internal error: container::value_type should be `char`.");
|
|
|
|
const auto rslt = Head::invoke(loc, msg);
|
|
if(rslt.is_ok()) {return rslt;}
|
|
return either<Tail...>::invoke(loc, msg);
|
|
}
|
|
|
|
static std::string pattern()
|
|
{
|
|
return concat_to_string('(', Head::pattern(), ")|", either<Tail...>::pattern());
|
|
}
|
|
};
|
|
template<typename Head>
|
|
struct either<Head>
|
|
{
|
|
template<typename Cont>
|
|
static result<region<Cont>, std::string>
|
|
invoke(location<Cont>& loc, const bool msg = false)
|
|
{
|
|
static_assert(std::is_same<char, typename Cont::value_type>::value,
|
|
"internal error: container::value_type should be `char`.");
|
|
return Head::invoke(loc, msg);
|
|
}
|
|
static std::string pattern()
|
|
{
|
|
return concat_to_string('(', Head::pattern(), ')');
|
|
}
|
|
};
|
|
|
|
template<typename T, typename N>
|
|
struct repeat;
|
|
|
|
template<std::size_t N> struct exactly{};
|
|
template<std::size_t N> struct at_least{};
|
|
struct unlimited{};
|
|
|
|
template<typename T, std::size_t N>
|
|
struct repeat<T, exactly<N>>
|
|
{
|
|
template<typename Cont>
|
|
static result<region<Cont>, std::string>
|
|
invoke(location<Cont>& loc, const bool msg = false)
|
|
{
|
|
region<Cont> retval(loc);
|
|
const auto first = loc.iter();
|
|
for(std::size_t i=0; i<N; ++i)
|
|
{
|
|
auto rslt = T::invoke(loc, msg);
|
|
if(rslt.is_err())
|
|
{
|
|
loc.reset(first);
|
|
return err(rslt.unwrap_err());
|
|
}
|
|
retval += rslt.unwrap();
|
|
}
|
|
return ok(std::move(retval));
|
|
}
|
|
static std::string pattern()
|
|
{
|
|
return concat_to_string('(', T::pattern(), "){", N, '}');
|
|
}
|
|
};
|
|
|
|
template<typename T, std::size_t N>
|
|
struct repeat<T, at_least<N>>
|
|
{
|
|
template<typename Cont>
|
|
static result<region<Cont>, std::string>
|
|
invoke(location<Cont>& loc, const bool msg = false)
|
|
{
|
|
region<Cont> retval(loc);
|
|
|
|
const auto first = loc.iter();
|
|
for(std::size_t i=0; i<N; ++i)
|
|
{
|
|
auto rslt = T::invoke(loc, msg);
|
|
if(rslt.is_err())
|
|
{
|
|
loc.reset(first);
|
|
return err(rslt.unwrap_err());
|
|
}
|
|
retval += rslt.unwrap();
|
|
}
|
|
while(true)
|
|
{
|
|
auto rslt = T::invoke(loc, msg);
|
|
if(rslt.is_err())
|
|
{
|
|
return ok(std::move(retval));
|
|
}
|
|
retval += rslt.unwrap();
|
|
}
|
|
}
|
|
static std::string pattern()
|
|
{
|
|
return concat_to_string('(',T::pattern(), "){", N, ",}");
|
|
}
|
|
};
|
|
|
|
template<typename T>
|
|
struct repeat<T, unlimited>
|
|
{
|
|
template<typename Cont>
|
|
static result<region<Cont>, std::string>
|
|
invoke(location<Cont>& loc, const bool msg = false)
|
|
{
|
|
region<Cont> retval(loc);
|
|
while(true)
|
|
{
|
|
auto rslt = T::invoke(loc, msg);
|
|
if(rslt.is_err())
|
|
{
|
|
return ok(std::move(retval));
|
|
}
|
|
retval += rslt.unwrap();
|
|
}
|
|
}
|
|
static std::string pattern() {return concat_to_string('(', T::pattern(), ")*");}
|
|
};
|
|
|
|
} // detail
|
|
} // toml
|
|
#endif// TOML11_COMBINATOR_HPP
|