Merge pull request #56 from ToruNiina/optimization

Optimization
This commit is contained in:
Toru Niina 2019-04-19 01:30:29 +09:00 committed by GitHub
commit 072dccd05d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 203 additions and 83 deletions

View File

@ -9,7 +9,10 @@
#include <type_traits>
#include <iterator>
#include <limits>
#include <array>
#include <iomanip>
#include <cstdio>
#include <cassert>
#include <cctype>
// they scans characters and returns region if it matches to the condition.
@ -38,10 +41,12 @@ inline std::string show_char(const char c)
}
else
{
std::ostringstream oss;
oss << "0x" << std::hex << std::setfill('0') << std::setw(2)
<< static_cast<int>(c);
return oss.str();
std::array<char, 5> buf;
buf.fill('\0');
const auto r = std::snprintf(
buf.data(), buf.size(), "0x%02x", static_cast<int>(c) & 0xFF);
assert(r == buf.size() - 1);
return std::string(buf.data());
}
}
@ -51,7 +56,8 @@ struct character
static constexpr char target = C;
template<typename Cont>
static result<region<Cont>, std::string> invoke(location<Cont>& loc)
static result<region<Cont>, std::string>
invoke(location<Cont>& loc, const bool msg = false)
{
static_assert(std::is_same<char, typename Cont::value_type>::value,
"internal error: container::value_type should be `char`.");
@ -62,8 +68,12 @@ struct character
const char c = *(loc.iter());
if(c != target)
{
return err(concat_to_string("expected '", show_char(target),
"' but got '", show_char(c), "'."));
if(msg)
{
return err(concat_to_string("expected '", show_char(target),
"' but got '", show_char(c), "'."));
}
return err("");
}
loc.advance(); // update location
@ -86,7 +96,8 @@ struct in_range
static constexpr char lower = Low;
template<typename Cont>
static result<region<Cont>, std::string> invoke(location<Cont>& loc)
static result<region<Cont>, std::string>
invoke(location<Cont>& loc, const bool msg = false)
{
static_assert(std::is_same<char, typename Cont::value_type>::value,
"internal error: container::value_type should be `char`.");
@ -97,9 +108,13 @@ struct in_range
const char c = *(loc.iter());
if(c < lower || upper < c)
{
return err(concat_to_string("expected character in range "
"[", show_char(lower), ", ", show_char(upper), "] but got ",
"'", show_char(c), "'."));
if(msg)
{
return err(concat_to_string("expected character in range "
"[", show_char(lower), ", ", show_char(upper), "] but got ",
"'", show_char(c), "'."));
}
return err("");
}
loc.advance();
@ -120,7 +135,8 @@ template<typename Combinator>
struct exclude
{
template<typename Cont>
static result<region<Cont>, std::string> invoke(location<Cont>& loc)
static result<region<Cont>, std::string>
invoke(location<Cont>& loc, const bool msg = false)
{
static_assert(std::is_same<char, typename Cont::value_type>::value,
"internal error: container::value_type should be `char`.");
@ -128,13 +144,16 @@ struct exclude
if(loc.iter() == loc.end()) {return err("not sufficient characters");}
auto first = loc.iter();
auto rslt = Combinator::invoke(loc);
auto rslt = Combinator::invoke(loc, msg);
if(rslt.is_ok())
{
loc.reset(first);
return err(concat_to_string(
"invalid pattern (", Combinator::pattern(), ") appeared ",
rslt.unwrap().str()));
if(msg)
{
return err(concat_to_string("invalid pattern (",
Combinator::pattern(), ") appeared ", rslt.unwrap().str()));
}
return err("");
}
loc.reset(std::next(first)); // XXX maybe loc.advance() is okay but...
return ok(region<Cont>(loc, first, loc.iter()));
@ -151,12 +170,13 @@ template<typename Combinator>
struct maybe
{
template<typename Cont>
static result<region<Cont>, std::string> invoke(location<Cont>& loc)
static result<region<Cont>, std::string>
invoke(location<Cont>& loc, const bool msg = false)
{
static_assert(std::is_same<char, typename Cont::value_type>::value,
"internal error: container::value_type should be `char`.");
const auto rslt = Combinator::invoke(loc);
const auto rslt = Combinator::invoke(loc, msg);
if(rslt.is_ok())
{
return rslt;
@ -177,34 +197,36 @@ template<typename Head, typename ... Tail>
struct sequence<Head, Tail...>
{
template<typename Cont>
static result<region<Cont>, std::string> invoke(location<Cont>& loc)
static result<region<Cont>, std::string>
invoke(location<Cont>& loc, const bool msg = false)
{
static_assert(std::is_same<char, typename Cont::value_type>::value,
"internal error: container::value_type should be `char`.");
const auto first = loc.iter();
const auto rslt = Head::invoke(loc);
const auto rslt = Head::invoke(loc, msg);
if(rslt.is_err())
{
loc.reset(first);
return err(rslt.unwrap_err());
}
return sequence<Tail...>::invoke(loc, std::move(rslt.unwrap()), first);
return sequence<Tail...>::invoke(loc, std::move(rslt.unwrap()), first, msg);
}
// called from the above function only, recursively.
template<typename Cont, typename Iterator>
static result<region<Cont>, std::string>
invoke(location<Cont>& loc, region<Cont> reg, Iterator first)
invoke(location<Cont>& loc, region<Cont> reg, Iterator first,
const bool msg = false)
{
const auto rslt = Head::invoke(loc);
const auto rslt = Head::invoke(loc, msg);
if(rslt.is_err())
{
loc.reset(first);
return err(rslt.unwrap_err());
}
reg += rslt.unwrap(); // concat regions
return sequence<Tail...>::invoke(loc, std::move(reg), first);
return sequence<Tail...>::invoke(loc, std::move(reg), first, msg);
}
static std::string pattern()
@ -219,9 +241,10 @@ struct sequence<Head>
// would be called from sequence<T ...>::invoke only.
template<typename Cont, typename Iterator>
static result<region<Cont>, std::string>
invoke(location<Cont>& loc, region<Cont> reg, Iterator first)
invoke(location<Cont>& loc, region<Cont> reg, Iterator first,
const bool msg = false)
{
const auto rslt = Head::invoke(loc);
const auto rslt = Head::invoke(loc, msg);
if(rslt.is_err())
{
loc.reset(first);
@ -240,14 +263,15 @@ template<typename Head, typename ... Tail>
struct either<Head, Tail...>
{
template<typename Cont>
static result<region<Cont>, std::string> invoke(location<Cont>& loc)
static result<region<Cont>, std::string>
invoke(location<Cont>& loc, const bool msg = false)
{
static_assert(std::is_same<char, typename Cont::value_type>::value,
"internal error: container::value_type should be `char`.");
const auto rslt = Head::invoke(loc);
const auto rslt = Head::invoke(loc, msg);
if(rslt.is_ok()) {return rslt;}
return either<Tail...>::invoke(loc);
return either<Tail...>::invoke(loc, msg);
}
static std::string pattern()
@ -259,11 +283,12 @@ template<typename Head>
struct either<Head>
{
template<typename Cont>
static result<region<Cont>, std::string> invoke(location<Cont>& loc)
static result<region<Cont>, std::string>
invoke(location<Cont>& loc, const bool msg = false)
{
static_assert(std::is_same<char, typename Cont::value_type>::value,
"internal error: container::value_type should be `char`.");
return Head::invoke(loc);
return Head::invoke(loc, msg);
}
static std::string pattern()
{
@ -282,13 +307,14 @@ template<typename T, std::size_t N>
struct repeat<T, exactly<N>>
{
template<typename Cont>
static result<region<Cont>, std::string> invoke(location<Cont>& loc)
static result<region<Cont>, std::string>
invoke(location<Cont>& loc, const bool msg = false)
{
region<Cont> retval(loc);
const auto first = loc.iter();
for(std::size_t i=0; i<N; ++i)
{
auto rslt = T::invoke(loc);
auto rslt = T::invoke(loc, msg);
if(rslt.is_err())
{
loc.reset(first);
@ -308,14 +334,15 @@ template<typename T, std::size_t N>
struct repeat<T, at_least<N>>
{
template<typename Cont>
static result<region<Cont>, std::string> invoke(location<Cont>& loc)
static result<region<Cont>, std::string>
invoke(location<Cont>& loc, const bool msg = false)
{
region<Cont> retval(loc);
const auto first = loc.iter();
for(std::size_t i=0; i<N; ++i)
{
auto rslt = T::invoke(loc);
auto rslt = T::invoke(loc, msg);
if(rslt.is_err())
{
loc.reset(first);
@ -325,7 +352,7 @@ struct repeat<T, at_least<N>>
}
while(true)
{
auto rslt = T::invoke(loc);
auto rslt = T::invoke(loc, msg);
if(rslt.is_err())
{
return ok(std::move(retval));
@ -343,12 +370,13 @@ template<typename T>
struct repeat<T, unlimited>
{
template<typename Cont>
static result<region<Cont>, std::string> invoke(location<Cont>& loc)
static result<region<Cont>, std::string>
invoke(location<Cont>& loc, const bool msg = false)
{
region<Cont> retval(loc);
while(true)
{
auto rslt = T::invoke(loc);
auto rslt = T::invoke(loc, msg);
if(rslt.is_err())
{
return ok(std::move(retval));

View File

@ -116,10 +116,28 @@ parse_integer(location<Container>& loc)
const auto first = loc.iter();
if(first != loc.end() && *first == '0')
{
if(const auto bin = parse_binary_integer (loc)) {return bin;}
if(const auto oct = parse_octal_integer (loc)) {return oct;}
if(const auto hex = parse_hexadecimal_integer(loc)) {return hex;}
// else, maybe just zero.
const auto second = std::next(first);
if(second == loc.end()) // the token is just zero.
{
return ok(std::make_pair(0, region<Container>(loc, first, second)));
}
if(*second == 'b') {return parse_binary_integer (loc);} // 0b1100
if(*second == 'o') {return parse_octal_integer (loc);} // 0o775
if(*second == 'x') {return parse_hexadecimal_integer(loc);} // 0xC0FFEE
if(std::isdigit(*second))
{
return err(format_underline("[error] toml::parse_integer: "
"leading zero in an Integer is not allowed.",
{{std::addressof(loc), "leading zero"}}));
}
else if(std::isalpha(*second))
{
return err(format_underline("[error] toml::parse_integer: "
"unknown integer prefix appeared.",
{{std::addressof(loc), "none of 0x, 0o, 0b"}}));
}
}
if(const auto token = lex_dec_int::invoke(loc))
@ -308,7 +326,7 @@ result<std::string, std::string> parse_escape_sequence(location<Container>& loc)
{
return err(format_underline("[error] parse_escape_sequence: "
"invalid token found in UTF-8 codepoint uXXXX.",
{{std::addressof(loc), token.unwrap_err()}}));
{{std::addressof(loc), "here"}}));
}
}
case 'U':
@ -321,7 +339,7 @@ result<std::string, std::string> parse_escape_sequence(location<Container>& loc)
{
return err(format_underline("[error] parse_escape_sequence: "
"invalid token found in UTF-8 codepoint Uxxxxxxxx",
{{std::addressof(loc), token.unwrap_err()}}));
{{std::addressof(loc), "here"}}));
}
}
}
@ -388,7 +406,9 @@ parse_ml_basic_string(location<Container>& loc)
else
{
loc.reset(first);
return err(token.unwrap_err());
return err(format_underline("[error] toml::parse_ml_basic_string: "
"the next token is not a multiline string",
{{std::addressof(loc), "here"}}));
}
}
@ -437,7 +457,9 @@ parse_basic_string(location<Container>& loc)
else
{
loc.reset(first); // rollback
return err(token.unwrap_err());
return err(format_underline("[error] toml::parse_basic_string: "
"the next token is not a string",
{{std::addressof(loc), "here"}}));
}
}
@ -476,7 +498,9 @@ parse_ml_literal_string(location<Container>& loc)
else
{
loc.reset(first); // rollback
return err(token.unwrap_err());
return err(format_underline("[error] toml::parse_ml_literal_string: "
"the next token is not a multiline literal string",
{{std::addressof(loc), "here"}}));
}
}
@ -513,7 +537,9 @@ parse_literal_string(location<Container>& loc)
else
{
loc.reset(first); // rollback
return err(token.unwrap_err());
return err(format_underline("[error] toml::parse_literal_string: "
"the next token is not a literal string",
{{std::addressof(loc), "here"}}));
}
}
@ -521,10 +547,30 @@ template<typename Container>
result<std::pair<toml::string, region<Container>>, std::string>
parse_string(location<Container>& loc)
{
if(const auto rslt = parse_ml_basic_string(loc)) {return rslt;}
if(const auto rslt = parse_ml_literal_string(loc)) {return rslt;}
if(const auto rslt = parse_basic_string(loc)) {return rslt;}
if(const auto rslt = parse_literal_string(loc)) {return rslt;}
if(loc.iter() != loc.end() && *(loc.iter()) == '"')
{
if(loc.iter() + 1 != loc.end() && *(loc.iter() + 1) == '"' &&
loc.iter() + 2 != loc.end() && *(loc.iter() + 2) == '"')
{
return parse_ml_basic_string(loc);
}
else
{
return parse_basic_string(loc);
}
}
else if(loc.iter() != loc.end() && *(loc.iter()) == '\'')
{
if(loc.iter() + 1 != loc.end() && *(loc.iter() + 1) == '\'' &&
loc.iter() + 2 != loc.end() && *(loc.iter() + 2) == '\'')
{
return parse_ml_literal_string(loc);
}
else
{
return parse_literal_string(loc);
}
}
return err(format_underline("[error] toml::parse_string: ",
{{std::addressof(loc), "the next token is not a string"}}));
}
@ -1374,6 +1420,46 @@ parse_inline_table(location<Container>& loc)
{{std::addressof(loc), "should be closed"}}));
}
template<typename Container>
value_t guess_number_type(const location<Container>& l)
{
location<Container> loc = l;
if(lex_offset_date_time::invoke(loc)) {return value_t::OffsetDatetime;}
loc.reset(l.iter());
if(lex_local_date_time::invoke(loc)) {return value_t::LocalDatetime;}
loc.reset(l.iter());
if(lex_local_date::invoke(loc)) {return value_t::LocalDate;}
loc.reset(l.iter());
if(lex_local_time::invoke(loc)) {return value_t::LocalTime;}
loc.reset(l.iter());
if(lex_float::invoke(loc)) {return value_t::Float;}
loc.reset(l.iter());
return value_t::Integer;
}
template<typename Container>
value_t guess_value_type(const location<Container>& loc)
{
switch(*loc.iter())
{
case '"' : {return value_t::String; }
case '\'': {return value_t::String; }
case 't' : {return value_t::Boolean;}
case 'f' : {return value_t::Boolean;}
case '[' : {return value_t::Array; }
case '{' : {return value_t::Table; }
case 'i' : {return value_t::Float; } // inf.
case 'n' : {return value_t::Float; } // nan.
default : {return guess_number_type(loc);}
}
}
template<typename Container>
result<value, std::string> parse_value(location<Container>& loc)
{
@ -1383,31 +1469,27 @@ result<value, std::string> parse_value(location<Container>& loc)
return err(format_underline("[error] toml::parse_value: input is empty",
{{std::addressof(loc), ""}}));
}
if(auto r = parse_string (loc))
{return ok(value(std::move(r.unwrap().first), std::move(r.unwrap().second)));}
if(auto r = parse_array (loc))
{return ok(value(std::move(r.unwrap().first), std::move(r.unwrap().second)));}
if(auto r = parse_inline_table (loc))
{return ok(value(std::move(r.unwrap().first), std::move(r.unwrap().second)));}
if(auto r = parse_boolean (loc))
{return ok(value(std::move(r.unwrap().first), std::move(r.unwrap().second)));}
if(auto r = parse_offset_datetime(loc))
{return ok(value(std::move(r.unwrap().first), std::move(r.unwrap().second)));}
if(auto r = parse_local_datetime (loc))
{return ok(value(std::move(r.unwrap().first), std::move(r.unwrap().second)));}
if(auto r = parse_local_date (loc))
{return ok(value(std::move(r.unwrap().first), std::move(r.unwrap().second)));}
if(auto r = parse_local_time (loc))
{return ok(value(std::move(r.unwrap().first), std::move(r.unwrap().second)));}
if(auto r = parse_floating (loc))
{return ok(value(std::move(r.unwrap().first), std::move(r.unwrap().second)));}
if(auto r = parse_integer (loc))
{return ok(value(std::move(r.unwrap().first), std::move(r.unwrap().second)));}
const auto msg = format_underline("[error] toml::parse_value: "
"unknown token appeared", {{std::addressof(loc), "unknown"}});
loc.reset(first);
return err(msg);
switch(guess_value_type(loc))
{
case value_t::Boolean : {return parse_boolean(loc); }
case value_t::Integer : {return parse_integer(loc); }
case value_t::Float : {return parse_floating(loc); }
case value_t::String : {return parse_string(loc); }
case value_t::OffsetDatetime : {return parse_offset_datetime(loc);}
case value_t::LocalDatetime : {return parse_local_datetime(loc); }
case value_t::LocalDate : {return parse_local_date(loc); }
case value_t::LocalTime : {return parse_local_time(loc); }
case value_t::Array : {return parse_array(loc); }
case value_t::Table : {return parse_inline_table(loc); }
default:
{
const auto msg = format_underline("[error] toml::parse_value: "
"unknown token appeared", {{std::addressof(loc), "unknown"}});
loc.reset(first);
return err(msg);
}
}
}
template<typename Container>
@ -1463,7 +1545,8 @@ parse_table_key(location<Container>& loc)
}
else
{
return err(token.unwrap_err());
return err(format_underline("[error] toml::parse_table_key: "
"not a valid table key", {{std::addressof(loc), "here"}}));
}
}
@ -1471,7 +1554,7 @@ template<typename Container>
result<std::pair<std::vector<key>, region<Container>>, std::string>
parse_array_table_key(location<Container>& loc)
{
if(auto token = lex_array_table::invoke(loc))
if(auto token = lex_array_table::invoke(loc, true))
{
location<std::string> inner_loc(loc.name(), token.unwrap().str());
@ -1516,7 +1599,8 @@ parse_array_table_key(location<Container>& loc)
}
else
{
return err(token.unwrap_err());
return err(format_underline("[error] toml::parse_array_table_key: "
"not a valid table key", {{std::addressof(loc), "here"}}));
}
}

View File

@ -71,7 +71,7 @@ struct location final : public region_base
"container should be randomly accessible");
location(std::string name, Container cont)
: source_(std::make_shared<Container>(std::move(cont))), line_number_(0),
: source_(std::make_shared<Container>(std::move(cont))), line_number_(1),
source_name_(std::move(name)), iter_(source_->cbegin())
{}
location(const location&) = default;
@ -88,7 +88,7 @@ struct location final : public region_base
const_iterator begin() const noexcept {return source_->cbegin();}
const_iterator end() const noexcept {return source_->cend();}
// XXX At first, `location::line_num()` is implemented using `std::count` to
// XXX `location::line_num()` used to be implemented using `std::count` to
// count a number of '\n'. But with a long toml file (typically, 10k lines),
// it becomes intolerably slow because each time it generates error messages,
// it counts '\n' from thousands of characters. To workaround it, I decided
@ -110,8 +110,8 @@ struct location final : public region_base
}
void reset(const_iterator rollback) noexcept
{
// since c++11, std::distance works in both ways and returns a negative
// value if `first` is ahead from `last`.
// since c++11, std::distance works in both ways for random-access
// iterators and returns a negative value if `first > last`.
if(0 <= std::distance(rollback, this->iter_)) // rollback < iter
{
this->line_number_ -= std::count(rollback, this->iter_, '\n');

View File

@ -572,6 +572,14 @@ class value
return *this;
}
// for internal use ------------------------------------------------------
template<typename T, typename Container, typename std::enable_if<
detail::is_exact_toml_type<T>::value, std::nullptr_t>::type = nullptr>
value(std::pair<T, detail::region<Container>> parse_result)
: value(std::move(parse_result.first), std::move(parse_result.second))
{}
// type checking and casting ============================================
template<typename T>