// Copyright Toru Niina 2017. // Distributed under the MIT License. #ifndef TOML11_COMBINATOR_HPP #define TOML11_COMBINATOR_HPP #include "traits.hpp" #include "result.hpp" #include "utility.hpp" #include "region.hpp" #include #include #include #include #include #include #include #include // they scans characters and returns region if it matches to the condition. // when they fail, it does not change the location. // in lexer.hpp, these are used. namespace toml { namespace detail { // to output character as an error message. inline std::string show_char(const char c) { // It supress an error that occurs only in Debug mode of MSVC++ on Windows. // I'm not completely sure but they check the value of char to be in the // range [0, 256) and some of the COMPLETELY VALID utf-8 character sometimes // has negative value (if char has sign). So here it re-interprets c as // unsigned char through pointer. In general, converting pointer to a // pointer that has different type cause UB, but `(signed|unsigned)?char` // are one of the exceptions. Converting pointer only to char and std::byte // (c++17) are valid. if(std::isgraph(*reinterpret_cast(std::addressof(c)))) { return std::string(1, c); } else { std::array buf; buf.fill('\0'); const auto r = std::snprintf( buf.data(), buf.size(), "0x%02x", static_cast(c) & 0xFF); (void) r; // Unused variable warning assert(r == static_cast(buf.size()) - 1); return std::string(buf.data()); } } template struct character { static constexpr char target = C; template static result, none_t> invoke(location& loc) { static_assert(std::is_same::value, "internal error: container::value_type should be `char`."); if(loc.iter() == loc.end()) {return none();} const auto first = loc.iter(); const char c = *(loc.iter()); if(c != target) { return none(); } loc.advance(); // update location return ok(region(loc, first, loc.iter())); } }; template constexpr char character::target; // closed interval [Low, Up]. both Low and Up are included. template struct in_range { // assuming ascii part of UTF-8... static_assert(Low <= Up, "lower bound should be less than upper bound."); static constexpr char upper = Up; static constexpr char lower = Low; template static result, none_t> invoke(location& loc) { static_assert(std::is_same::value, "internal error: container::value_type should be `char`."); if(loc.iter() == loc.end()) {return none();} const auto first = loc.iter(); const char c = *(loc.iter()); if(c < lower || upper < c) { return none(); } loc.advance(); return ok(region(loc, first, loc.iter())); } }; template constexpr char in_range::upper; template constexpr char in_range::lower; // keep iterator if `Combinator` matches. otherwise, increment `iter` by 1 char. // for detecting invalid characters, like control sequences in toml string. template struct exclude { template static result, none_t> invoke(location& loc) { static_assert(std::is_same::value, "internal error: container::value_type should be `char`."); if(loc.iter() == loc.end()) {return none();} auto first = loc.iter(); auto rslt = Combinator::invoke(loc); if(rslt.is_ok()) { loc.reset(first); return none(); } loc.reset(std::next(first)); // XXX maybe loc.advance() is okay but... return ok(region(loc, first, loc.iter())); } }; // increment `iter`, if matches. otherwise, just return empty string. template struct maybe { template static result, none_t> invoke(location& loc) { static_assert(std::is_same::value, "internal error: container::value_type should be `char`."); const auto rslt = Combinator::invoke(loc); if(rslt.is_ok()) { return rslt; } return ok(region(loc)); } }; template struct sequence; template struct sequence { template static result, none_t> invoke(location& loc) { static_assert(std::is_same::value, "internal error: container::value_type should be `char`."); const auto first = loc.iter(); const auto rslt = Head::invoke(loc); if(rslt.is_err()) { loc.reset(first); return none(); } return sequence::invoke(loc, std::move(rslt.unwrap()), first); } // called from the above function only, recursively. template static result, none_t> invoke(location& loc, region reg, Iterator first) { const auto rslt = Head::invoke(loc); if(rslt.is_err()) { loc.reset(first); return none(); } reg += rslt.unwrap(); // concat regions return sequence::invoke(loc, std::move(reg), first); } }; template struct sequence { // would be called from sequence::invoke only. template static result, none_t> invoke(location& loc, region reg, Iterator first) { const auto rslt = Head::invoke(loc); if(rslt.is_err()) { loc.reset(first); return none(); } reg += rslt.unwrap(); // concat regions return ok(reg); } }; template struct either; template struct either { template static result, none_t> invoke(location& loc) { static_assert(std::is_same::value, "internal error: container::value_type should be `char`."); const auto rslt = Head::invoke(loc); if(rslt.is_ok()) {return rslt;} return either::invoke(loc); } }; template struct either { template static result, none_t> invoke(location& loc) { static_assert(std::is_same::value, "internal error: container::value_type should be `char`."); return Head::invoke(loc); } }; template struct repeat; template struct exactly{}; template struct at_least{}; struct unlimited{}; template struct repeat> { template static result, none_t> invoke(location& loc) { region retval(loc); const auto first = loc.iter(); for(std::size_t i=0; i struct repeat> { template static result, none_t> invoke(location& loc) { region retval(loc); const auto first = loc.iter(); for(std::size_t i=0; i struct repeat { template static result, none_t> invoke(location& loc) { region retval(loc); while(true) { auto rslt = T::invoke(loc); if(rslt.is_err()) { return ok(std::move(retval)); } retval += rslt.unwrap(); } } }; } // detail } // toml #endif// TOML11_COMBINATOR_HPP