mirror of
https://github.com/ToruNiina/toml11.git
synced 2024-11-25 13:50:05 +00:00
307 lines
7.4 KiB
C++
307 lines
7.4 KiB
C++
// Copyright Toru Niina 2017.
|
|
// Distributed under the MIT License.
|
|
#ifndef TOML11_COMBINATOR_HPP
|
|
#define TOML11_COMBINATOR_HPP
|
|
#include <cassert>
|
|
#include <cctype>
|
|
#include <cstdio>
|
|
|
|
#include <array>
|
|
#include <iomanip>
|
|
#include <iterator>
|
|
#include <limits>
|
|
#include <type_traits>
|
|
|
|
#include "region.hpp"
|
|
#include "result.hpp"
|
|
#include "traits.hpp"
|
|
#include "utility.hpp"
|
|
|
|
// they scans characters and returns region if it matches to the condition.
|
|
// when they fail, it does not change the location.
|
|
// in lexer.hpp, these are used.
|
|
|
|
namespace toml
|
|
{
|
|
namespace detail
|
|
{
|
|
|
|
// to output character as an error message.
|
|
inline std::string show_char(const char c)
|
|
{
|
|
// It supress an error that occurs only in Debug mode of MSVC++ on Windows.
|
|
// I'm not completely sure but they check the value of char to be in the
|
|
// range [0, 256) and some of the COMPLETELY VALID utf-8 character sometimes
|
|
// has negative value (if char has sign). So here it re-interprets c as
|
|
// unsigned char through pointer. In general, converting pointer to a
|
|
// pointer that has different type cause UB, but `(signed|unsigned)?char`
|
|
// are one of the exceptions. Converting pointer only to char and std::byte
|
|
// (c++17) are valid.
|
|
if(std::isgraph(*reinterpret_cast<unsigned char const*>(std::addressof(c))))
|
|
{
|
|
return std::string(1, c);
|
|
}
|
|
else
|
|
{
|
|
std::array<char, 5> buf;
|
|
buf.fill('\0');
|
|
const auto r = std::snprintf(
|
|
buf.data(), buf.size(), "0x%02x", static_cast<int>(c) & 0xFF);
|
|
(void) r; // Unused variable warning
|
|
assert(r == static_cast<int>(buf.size()) - 1);
|
|
return std::string(buf.data());
|
|
}
|
|
}
|
|
|
|
template<char C>
|
|
struct character
|
|
{
|
|
static constexpr char target = C;
|
|
|
|
static result<region, none_t>
|
|
invoke(location& loc)
|
|
{
|
|
if(loc.iter() == loc.end()) {return none();}
|
|
const auto first = loc.iter();
|
|
|
|
const char c = *(loc.iter());
|
|
if(c != target)
|
|
{
|
|
return none();
|
|
}
|
|
loc.advance(); // update location
|
|
|
|
return ok(region(loc, first, loc.iter()));
|
|
}
|
|
};
|
|
template<char C>
|
|
constexpr char character<C>::target;
|
|
|
|
// closed interval [Low, Up]. both Low and Up are included.
|
|
template<char Low, char Up>
|
|
struct in_range
|
|
{
|
|
// assuming ascii part of UTF-8...
|
|
static_assert(Low <= Up, "lower bound should be less than upper bound.");
|
|
|
|
static constexpr char upper = Up;
|
|
static constexpr char lower = Low;
|
|
|
|
static result<region, none_t>
|
|
invoke(location& loc)
|
|
{
|
|
if(loc.iter() == loc.end()) {return none();}
|
|
const auto first = loc.iter();
|
|
|
|
const char c = *(loc.iter());
|
|
if(c < lower || upper < c)
|
|
{
|
|
return none();
|
|
}
|
|
|
|
loc.advance();
|
|
return ok(region(loc, first, loc.iter()));
|
|
}
|
|
};
|
|
template<char L, char U> constexpr char in_range<L, U>::upper;
|
|
template<char L, char U> constexpr char in_range<L, U>::lower;
|
|
|
|
// keep iterator if `Combinator` matches. otherwise, increment `iter` by 1 char.
|
|
// for detecting invalid characters, like control sequences in toml string.
|
|
template<typename Combinator>
|
|
struct exclude
|
|
{
|
|
static result<region, none_t>
|
|
invoke(location& loc)
|
|
{
|
|
if(loc.iter() == loc.end()) {return none();}
|
|
auto first = loc.iter();
|
|
|
|
auto rslt = Combinator::invoke(loc);
|
|
if(rslt.is_ok())
|
|
{
|
|
loc.reset(first);
|
|
return none();
|
|
}
|
|
loc.reset(std::next(first)); // XXX maybe loc.advance() is okay but...
|
|
return ok(region(loc, first, loc.iter()));
|
|
}
|
|
};
|
|
|
|
// increment `iter`, if matches. otherwise, just return empty string.
|
|
template<typename Combinator>
|
|
struct maybe
|
|
{
|
|
static result<region, none_t>
|
|
invoke(location& loc)
|
|
{
|
|
const auto rslt = Combinator::invoke(loc);
|
|
if(rslt.is_ok())
|
|
{
|
|
return rslt;
|
|
}
|
|
return ok(region(loc));
|
|
}
|
|
};
|
|
|
|
template<typename ... Ts>
|
|
struct sequence;
|
|
|
|
template<typename Head, typename ... Tail>
|
|
struct sequence<Head, Tail...>
|
|
{
|
|
static result<region, none_t>
|
|
invoke(location& loc)
|
|
{
|
|
const auto first = loc.iter();
|
|
const auto rslt = Head::invoke(loc);
|
|
if(rslt.is_err())
|
|
{
|
|
loc.reset(first);
|
|
return none();
|
|
}
|
|
return sequence<Tail...>::invoke(loc, std::move(rslt.unwrap()), first);
|
|
}
|
|
|
|
// called from the above function only, recursively.
|
|
template<typename Iterator>
|
|
static result<region, none_t>
|
|
invoke(location& loc, region reg, Iterator first)
|
|
{
|
|
const auto rslt = Head::invoke(loc);
|
|
if(rslt.is_err())
|
|
{
|
|
loc.reset(first);
|
|
return none();
|
|
}
|
|
reg += rslt.unwrap(); // concat regions
|
|
return sequence<Tail...>::invoke(loc, std::move(reg), first);
|
|
}
|
|
};
|
|
|
|
template<typename Head>
|
|
struct sequence<Head>
|
|
{
|
|
// would be called from sequence<T ...>::invoke only.
|
|
template<typename Iterator>
|
|
static result<region, none_t>
|
|
invoke(location& loc, region reg, Iterator first)
|
|
{
|
|
const auto rslt = Head::invoke(loc);
|
|
if(rslt.is_err())
|
|
{
|
|
loc.reset(first);
|
|
return none();
|
|
}
|
|
reg += rslt.unwrap(); // concat regions
|
|
return ok(reg);
|
|
}
|
|
};
|
|
|
|
template<typename ... Ts>
|
|
struct either;
|
|
|
|
template<typename Head, typename ... Tail>
|
|
struct either<Head, Tail...>
|
|
{
|
|
static result<region, none_t>
|
|
invoke(location& loc)
|
|
{
|
|
const auto rslt = Head::invoke(loc);
|
|
if(rslt.is_ok()) {return rslt;}
|
|
return either<Tail...>::invoke(loc);
|
|
}
|
|
};
|
|
template<typename Head>
|
|
struct either<Head>
|
|
{
|
|
static result<region, none_t>
|
|
invoke(location& loc)
|
|
{
|
|
return Head::invoke(loc);
|
|
}
|
|
};
|
|
|
|
template<typename T, typename N>
|
|
struct repeat;
|
|
|
|
template<std::size_t N> struct exactly{};
|
|
template<std::size_t N> struct at_least{};
|
|
struct unlimited{};
|
|
|
|
template<typename T, std::size_t N>
|
|
struct repeat<T, exactly<N>>
|
|
{
|
|
static result<region, none_t>
|
|
invoke(location& loc)
|
|
{
|
|
region retval(loc);
|
|
const auto first = loc.iter();
|
|
for(std::size_t i=0; i<N; ++i)
|
|
{
|
|
auto rslt = T::invoke(loc);
|
|
if(rslt.is_err())
|
|
{
|
|
loc.reset(first);
|
|
return none();
|
|
}
|
|
retval += rslt.unwrap();
|
|
}
|
|
return ok(std::move(retval));
|
|
}
|
|
};
|
|
|
|
template<typename T, std::size_t N>
|
|
struct repeat<T, at_least<N>>
|
|
{
|
|
static result<region, none_t>
|
|
invoke(location& loc)
|
|
{
|
|
region retval(loc);
|
|
|
|
const auto first = loc.iter();
|
|
for(std::size_t i=0; i<N; ++i)
|
|
{
|
|
auto rslt = T::invoke(loc);
|
|
if(rslt.is_err())
|
|
{
|
|
loc.reset(first);
|
|
return none();
|
|
}
|
|
retval += rslt.unwrap();
|
|
}
|
|
while(true)
|
|
{
|
|
auto rslt = T::invoke(loc);
|
|
if(rslt.is_err())
|
|
{
|
|
return ok(std::move(retval));
|
|
}
|
|
retval += rslt.unwrap();
|
|
}
|
|
}
|
|
};
|
|
|
|
template<typename T>
|
|
struct repeat<T, unlimited>
|
|
{
|
|
static result<region, none_t>
|
|
invoke(location& loc)
|
|
{
|
|
region retval(loc);
|
|
while(true)
|
|
{
|
|
auto rslt = T::invoke(loc);
|
|
if(rslt.is_err())
|
|
{
|
|
return ok(std::move(retval));
|
|
}
|
|
retval += rslt.unwrap();
|
|
}
|
|
}
|
|
};
|
|
|
|
} // detail
|
|
} // toml
|
|
#endif// TOML11_COMBINATOR_HPP
|