// Copyright Toru Niina 2017. // Distributed under the MIT License. #ifndef TOML11_COMBINATOR_HPP #define TOML11_COMBINATOR_HPP #include "traits.hpp" #include "result.hpp" #include "utility.hpp" #include "region.hpp" #include #include #include #include #include // they scans characters and returns region if it matches to the condition. // when they fail, it does not change the location. // in lexer.hpp, these are used. namespace toml { namespace detail { // to output character as an error message. inline std::string show_char(const char c) { // It supress an error that occurs only in Debug mode of MSVC++ on Windows. // I'm not completely sure but they check the value of char to be in the // range [0, 256) and some of the COMPLETELY VALID utf-8 character sometimes // has negative value (if char has sign). So here it re-interprets c as // unsigned char through pointer. In general, converting pointer to a // pointer that has different type cause UB, but `(signed|unsigned)?char` // are one of the exceptions. Converting pointer only to char and std::byte // (c++17) are valid. if(std::isgraph(*reinterpret_cast(std::addressof(c)))) { return std::string(1, c); } else { std::ostringstream oss; oss << "0x" << std::hex << std::setfill('0') << std::setw(2) << static_cast(c); return oss.str(); } } template struct character { static constexpr char target = C; template static result, std::string> invoke(location& loc) { static_assert(std::is_same::value, "internal error: container::value_type should be `char`."); if(loc.iter() == loc.end()) {return err("not sufficient characters");} const auto first = loc.iter(); const char c = *(loc.iter()); if(c != target) { return err(concat_to_string("expected '", show_char(target), "' but got '", show_char(c), "'.")); } ++(loc.iter()); // update location return ok(region(loc, first, loc.iter())); } static std::string pattern() {return show_char(target);} }; template constexpr char character::target; // closed interval [Low, Up]. both Low and Up are included. template struct in_range { // assuming ascii part of UTF-8... static_assert(Low <= Up, "lower bound should be less than upper bound."); static constexpr char upper = Up; static constexpr char lower = Low; template static result, std::string> invoke(location& loc) { static_assert(std::is_same::value, "internal error: container::value_type should be `char`."); if(loc.iter() == loc.end()) {return err("not sufficient characters");} const auto first = loc.iter(); const char c = *(loc.iter()); if(c < lower || upper < c) { return err(concat_to_string("expected character in range " "[", show_char(lower), ", ", show_char(upper), "] but got ", "'", show_char(c), "'.")); } ++(loc.iter()); return ok(region(loc, first, loc.iter())); } static std::string pattern() { return concat_to_string("[",show_char(lower),"-",show_char(upper),"]"); } }; template constexpr char in_range::upper; template constexpr char in_range::lower; // keep iterator if `Combinator` matches. otherwise, increment `iter` by 1 char. // for detecting invalid characters, like control sequences in toml string. template struct exclude { template static result, std::string> invoke(location& loc) { static_assert(std::is_same::value, "internal error: container::value_type should be `char`."); if(loc.iter() == loc.end()) {return err("not sufficient characters");} auto first = loc.iter(); auto rslt = Combinator::invoke(loc); if(rslt.is_ok()) { loc.iter() = first; // rollback return err(concat_to_string( "invalid pattern (", Combinator::pattern(), ") appeared ", rslt.unwrap().str())); } loc.iter() = std::next(first); return ok(region(loc, first, loc.iter())); } static std::string pattern() { return concat_to_string("^(", Combinator::pattern(), ')'); } }; // increment `iter`, if matches. otherwise, just return empty string. template struct maybe { template static result, std::string> invoke(location& loc) { static_assert(std::is_same::value, "internal error: container::value_type should be `char`."); const auto rslt = Combinator::invoke(loc); if(rslt.is_ok()) { return rslt; } return ok(region(loc)); } static std::string pattern() { return concat_to_string('(', Combinator::pattern(), ")?"); } }; template struct sequence; template struct sequence { template static result, std::string> invoke(location& loc) { static_assert(std::is_same::value, "internal error: container::value_type should be `char`."); const auto first = loc.iter(); const auto rslt = Head::invoke(loc); if(rslt.is_err()) { loc.iter() = first; return err(rslt.unwrap_err()); } return sequence::invoke(loc, std::move(rslt.unwrap()), first); } // called from the above function only, recursively. template static result, std::string> invoke(location& loc, region reg, Iterator first) { const auto rslt = Head::invoke(loc); if(rslt.is_err()) { loc.iter() = first; return err(rslt.unwrap_err()); } reg += rslt.unwrap(); // concat regions return sequence::invoke(loc, std::move(reg), first); } static std::string pattern() { return concat_to_string(Head::pattern(), sequence::pattern()); } }; template struct sequence { // would be called from sequence::invoke only. template static result, std::string> invoke(location& loc, region reg, Iterator first) { const auto rslt = Head::invoke(loc); if(rslt.is_err()) { loc.iter() = first; return err(rslt.unwrap_err()); } reg += rslt.unwrap(); // concat regions return ok(reg); } static std::string pattern() {return Head::pattern();} }; template struct either; template struct either { template static result, std::string> invoke(location& loc) { static_assert(std::is_same::value, "internal error: container::value_type should be `char`."); const auto rslt = Head::invoke(loc); if(rslt.is_ok()) {return rslt;} return either::invoke(loc); } static std::string pattern() { return concat_to_string('(', Head::pattern(), ")|", either::pattern()); } }; template struct either { template static result, std::string> invoke(location& loc) { static_assert(std::is_same::value, "internal error: container::value_type should be `char`."); return Head::invoke(loc); } static std::string pattern() { return concat_to_string('(', Head::pattern(), ')'); } }; template struct repeat; template struct exactly{}; template struct at_least{}; struct unlimited{}; template struct repeat> { template static result, std::string> invoke(location& loc) { region retval(loc); const auto first = loc.iter(); for(std::size_t i=0; i struct repeat> { template static result, std::string> invoke(location& loc) { region retval(loc); const auto first = loc.iter(); for(std::size_t i=0; i struct repeat { template static result, std::string> invoke(location& loc) { region retval(loc); while(true) { auto rslt = T::invoke(loc); if(rslt.is_err()) { return ok(std::move(retval)); } retval += rslt.unwrap(); } } static std::string pattern() {return concat_to_string('(', T::pattern(), ")*");} }; } // detail } // toml #endif// TOML11_COMBINATOR_HPP