diff --git a/toml/combinator.hpp b/toml/combinator.hpp index e925b99..ac8dd14 100644 --- a/toml/combinator.hpp +++ b/toml/combinator.hpp @@ -65,7 +65,7 @@ struct character return err(concat_to_string("expected '", show_char(target), "' but got '", show_char(c), "'.")); } - ++(loc.iter()); // update location + loc.advance(); // update location return ok(region(loc, first, loc.iter())); } @@ -102,7 +102,7 @@ struct in_range "'", show_char(c), "'.")); } - ++(loc.iter()); + loc.advance(); return ok(region(loc, first, loc.iter())); } @@ -131,12 +131,12 @@ struct exclude auto rslt = Combinator::invoke(loc); if(rslt.is_ok()) { - loc.iter() = first; // rollback + loc.reset(first); return err(concat_to_string( "invalid pattern (", Combinator::pattern(), ") appeared ", rslt.unwrap().str())); } - loc.iter() = std::next(first); + loc.reset(std::next(first)); // XXX maybe loc.advance() is okay but... return ok(region(loc, first, loc.iter())); } @@ -186,7 +186,7 @@ struct sequence const auto rslt = Head::invoke(loc); if(rslt.is_err()) { - loc.iter() = first; + loc.reset(first); return err(rslt.unwrap_err()); } return sequence::invoke(loc, std::move(rslt.unwrap()), first); @@ -200,7 +200,7 @@ struct sequence const auto rslt = Head::invoke(loc); if(rslt.is_err()) { - loc.iter() = first; + loc.reset(first); return err(rslt.unwrap_err()); } reg += rslt.unwrap(); // concat regions @@ -224,7 +224,7 @@ struct sequence const auto rslt = Head::invoke(loc); if(rslt.is_err()) { - loc.iter() = first; + loc.reset(first); return err(rslt.unwrap_err()); } reg += rslt.unwrap(); // concat regions @@ -291,7 +291,7 @@ struct repeat> auto rslt = T::invoke(loc); if(rslt.is_err()) { - loc.iter() = first; + loc.reset(first); return err(rslt.unwrap_err()); } retval += rslt.unwrap(); @@ -318,7 +318,7 @@ struct repeat> auto rslt = T::invoke(loc); if(rslt.is_err()) { - loc.iter() = first; + loc.reset(first); return err(rslt.unwrap_err()); } retval += rslt.unwrap(); diff --git a/toml/literal.hpp b/toml/literal.hpp index 3642626..08d7b5e 100644 --- a/toml/literal.hpp +++ b/toml/literal.hpp @@ -39,7 +39,7 @@ inline ::toml::value operator""_toml(const char* str, std::size_t len) // literal is a TOML file (i.e. multiline table). if(auto data = ::toml::detail::parse_toml_file(loc)) { - loc.iter() = loc.begin(); // rollback to the top of the literal + loc.reset(loc.begin()); // rollback to the top of the literal return ::toml::value(std::move(data.unwrap()), ::toml::detail::region>(std::move(loc))); } diff --git a/toml/parser.hpp b/toml/parser.hpp index 765d8b5..c1b98f7 100644 --- a/toml/parser.hpp +++ b/toml/parser.hpp @@ -33,7 +33,7 @@ parse_boolean(location& loc) {{std::addressof(reg), "invalid token"}})); } } - loc.iter() = first; //rollback + loc.reset(first); //rollback return err(format_underline("[error] toml::parse_boolean: ", {{std::addressof(loc), "the next token is not a boolean"}})); } @@ -62,7 +62,7 @@ parse_binary_integer(location& loc) } return ok(std::make_pair(retval, token.unwrap())); } - loc.iter() = first; + loc.reset(first); return err(format_underline("[error] toml::parse_binary_integer:", {{std::addressof(loc), "the next token is not an integer"}})); } @@ -83,7 +83,7 @@ parse_octal_integer(location& loc) iss >> std::oct >> retval; return ok(std::make_pair(retval, token.unwrap())); } - loc.iter() = first; + loc.reset(first); return err(format_underline("[error] toml::parse_octal_integer:", {{std::addressof(loc), "the next token is not an integer"}})); } @@ -104,7 +104,7 @@ parse_hexadecimal_integer(location& loc) iss >> std::hex >> retval; return ok(std::make_pair(retval, token.unwrap())); } - loc.iter() = first; + loc.reset(first); return err(format_underline("[error] toml::parse_hexadecimal_integer", {{std::addressof(loc), "the next token is not an integer"}})); } @@ -132,7 +132,7 @@ parse_integer(location& loc) iss >> retval; return ok(std::make_pair(retval, token.unwrap())); } - loc.iter() = first; + loc.reset(first); return err(format_underline("[error] toml::parse_integer: ", {{std::addressof(loc), "the next token is not an integer"}})); } @@ -221,7 +221,7 @@ parse_floating(location& loc) iss >> v; return ok(std::make_pair(v, token.unwrap())); } - loc.iter() = first; + loc.reset(first); return err(format_underline("[error] toml::parse_floating: ", {{std::addressof(loc), "the next token is not a float"}})); } @@ -288,16 +288,16 @@ result parse_escape_sequence(location& loc) return err(format_underline("[error]: toml::parse_escape_sequence: ", {{ std::addressof(loc), "the next token is not a backslash \"\\\""}})); } - ++loc.iter(); + loc.advance(); switch(*loc.iter()) { - case '\\':{++loc.iter(); return ok(std::string("\\"));} - case '"' :{++loc.iter(); return ok(std::string("\""));} - case 'b' :{++loc.iter(); return ok(std::string("\b"));} - case 't' :{++loc.iter(); return ok(std::string("\t"));} - case 'n' :{++loc.iter(); return ok(std::string("\n"));} - case 'f' :{++loc.iter(); return ok(std::string("\f"));} - case 'r' :{++loc.iter(); return ok(std::string("\r"));} + case '\\':{loc.advance(); return ok(std::string("\\"));} + case '"' :{loc.advance(); return ok(std::string("\""));} + case 'b' :{loc.advance(); return ok(std::string("\b"));} + case 't' :{loc.advance(); return ok(std::string("\t"));} + case 'n' :{loc.advance(); return ok(std::string("\n"));} + case 'f' :{loc.advance(); return ok(std::string("\f"));} + case 'r' :{loc.advance(); return ok(std::string("\r"));} case 'u' : { if(const auto token = lex_escape_unicode_short::invoke(loc)) @@ -331,7 +331,7 @@ result parse_escape_sequence(location& loc) "escape sequence is one of \\, \", b, t, n, f, r, uxxxx, Uxxxxxxxx"}}, /* Hints = */{"if you want to write backslash as just one backslash, " "use literal string like: regex = '<\\i\\c*\\s*>'"}); - loc.iter() = first; + loc.reset(first); return err(msg); } @@ -343,7 +343,7 @@ parse_ml_basic_string(location& loc) if(const auto token = lex_ml_basic_string::invoke(loc)) { auto inner_loc = loc; - inner_loc.iter() = first; + inner_loc.reset(first); std::string retval; retval.reserve(token.unwrap().size()); @@ -387,7 +387,7 @@ parse_ml_basic_string(location& loc) } else { - loc.iter() = first; + loc.reset(first); return err(token.unwrap_err()); } } @@ -400,7 +400,7 @@ parse_basic_string(location& loc) if(const auto token = lex_basic_string::invoke(loc)) { auto inner_loc = loc; - inner_loc.iter() = first; + inner_loc.reset(first); auto quot = lex_quotation_mark::invoke(inner_loc); if(!quot) @@ -436,7 +436,7 @@ parse_basic_string(location& loc) } else { - loc.iter() = first; // rollback + loc.reset(first); // rollback return err(token.unwrap_err()); } } @@ -475,7 +475,7 @@ parse_ml_literal_string(location& loc) } else { - loc.iter() = first; // rollback + loc.reset(first); // rollback return err(token.unwrap_err()); } } @@ -512,7 +512,7 @@ parse_literal_string(location& loc) } else { - loc.iter() = first; // rollback + loc.reset(first); // rollback return err(token.unwrap_err()); } } @@ -548,7 +548,7 @@ parse_local_date(location& loc) "toml::parse_inner_local_date: invalid year format", {{std::addressof(inner_loc), msg}})); } - ++inner_loc.iter(); + inner_loc.advance(); const auto m = lex_date_month::invoke(inner_loc); if(!m || inner_loc.iter() == inner_loc.end() || *inner_loc.iter() != '-') { @@ -559,7 +559,7 @@ parse_local_date(location& loc) "toml::parse_local_date: invalid month format", {{std::addressof(inner_loc), msg}})); } - ++inner_loc.iter(); + inner_loc.advance(); const auto d = lex_date_mday::invoke(inner_loc); if(!d) { @@ -576,7 +576,7 @@ parse_local_date(location& loc) } else { - loc.iter() = first; + loc.reset(first); return err(format_underline("[error]: toml::parse_local_date: ", {{std::addressof(loc), "the next token is not a local_date"}})); } @@ -601,7 +601,7 @@ parse_local_time(location& loc) "toml::parse_local_time: invalid year format", {{std::addressof(inner_loc), msg}})); } - ++inner_loc.iter(); + inner_loc.advance(); const auto m = lex_time_minute::invoke(inner_loc); if(!m || inner_loc.iter() == inner_loc.end() || *inner_loc.iter() != ':') { @@ -612,7 +612,7 @@ parse_local_time(location& loc) "toml::parse_local_time: invalid month format", {{std::addressof(inner_loc), msg}})); } - ++inner_loc.iter(); + inner_loc.advance(); const auto s = lex_time_second::invoke(inner_loc); if(!s) { @@ -661,7 +661,7 @@ parse_local_time(location& loc) } else { - loc.iter() = first; + loc.reset(first); return err(format_underline("[error]: toml::parse_local_time: ", {{std::addressof(loc), "the next token is not a local_time"}})); } @@ -685,13 +685,14 @@ parse_local_datetime(location& loc) "toml::parse_local_datetime: invalid datetime format", {{std::addressof(inner_loc), msg}})); } - const char delim = *(inner_loc.iter()++); + const char delim = *(inner_loc.iter()); if(delim != 'T' && delim != 't' && delim != ' ') { throw internal_error(format_underline("[error]: " "toml::parse_local_datetime: invalid datetime format", {{std::addressof(inner_loc), "should be `T` or ` ` (space)"}})); } + inner_loc.advance(); const auto time = parse_local_time(inner_loc); if(!time) { @@ -705,7 +706,7 @@ parse_local_datetime(location& loc) } else { - loc.iter() = first; + loc.reset(first); return err(format_underline("[error]: toml::parse_local_datetime: ", {{std::addressof(loc), "the next token is not a local_datetime"}})); } @@ -755,7 +756,7 @@ parse_offset_datetime(location& loc) } else { - loc.iter() = first; + loc.reset(first); return err(format_underline("[error]: toml::parse_offset_datetime: ", {{std::addressof(loc), "the next token is not a local_datetime"}})); } @@ -816,7 +817,7 @@ parse_key(location& loc) } else if(*inner_loc.iter() == '.') { - ++inner_loc.iter(); // to skip `.` + inner_loc.advance(); // to skip `.` } else { @@ -827,7 +828,7 @@ parse_key(location& loc) } return ok(std::make_pair(keys, reg)); } - loc.iter() = first; + loc.reset(first); // simple key -> foo if(const auto smpl = parse_simple_key(loc)) @@ -856,7 +857,7 @@ parse_array(location& loc) { return err("[error] toml::parse_array: token is not an array"); } - ++loc.iter(); + loc.advance(); using lex_ws_comment_newline = repeat< either, unlimited>; @@ -868,7 +869,7 @@ parse_array(location& loc) if(loc.iter() != loc.end() && *loc.iter() == ']') { - ++loc.iter(); // skip ']' + loc.advance(); // skip ']' return ok(std::make_pair(retval, region(loc, first, loc.iter()))); } @@ -878,7 +879,7 @@ parse_array(location& loc) if(!retval.empty() && retval.front().type() != val.as_ok().type()) { auto array_start_loc = loc; - array_start_loc.iter() = first; + array_start_loc.reset(first); throw syntax_error(format_underline("[error] toml::parse_array: " "type of elements should be the same each other.", { @@ -898,7 +899,7 @@ parse_array(location& loc) else { auto array_start_loc = loc; - array_start_loc.iter() = first; + array_start_loc.reset(first); throw syntax_error(format_underline("[error] toml::parse_array: " "value having invalid format appeared in an array", { @@ -914,14 +915,14 @@ parse_array(location& loc) lex_ws_comment_newline::invoke(loc); if(loc.iter() != loc.end() && *loc.iter() == ']') { - ++loc.iter(); // skip ']' + loc.advance(); // skip ']' return ok(std::make_pair(retval, region(loc, first, loc.iter()))); } else { auto array_start_loc = loc; - array_start_loc.iter() = first; + array_start_loc.reset(first); throw syntax_error(format_underline("[error] toml::parse_array:" " missing array separator `,` after a value", { @@ -931,7 +932,7 @@ parse_array(location& loc) } } } - loc.iter() = first; + loc.reset(first); throw syntax_error(format_underline("[error] toml::parse_array: " "array did not closed by `]`", {{std::addressof(loc), "should be closed"}})); @@ -950,7 +951,7 @@ parse_key_value_pair(location& loc) // key. then we need to show error as "empty key is not allowed". if(const auto keyval_sep = lex_keyval_sep::invoke(loc)) { - loc.iter() = first; + loc.reset(first); msg = format_underline("[error] toml::parse_key_value_pair: " "empty key is not allowed.", {{std::addressof(loc), "key expected before '='"}}); @@ -979,7 +980,7 @@ parse_key_value_pair(location& loc) "missing key-value separator `=`", {{std::addressof(loc), "should be `=`"}}); } - loc.iter() = first; + loc.reset(first); return err(std::move(msg)); } @@ -988,11 +989,11 @@ parse_key_value_pair(location& loc) if(!val) { std::string msg; - loc.iter() = after_kvsp; + loc.reset(after_kvsp); // check there is something not a comment/whitespace after `=` if(sequence, maybe, lex_newline>::invoke(loc)) { - loc.iter() = after_kvsp; + loc.reset(after_kvsp); msg = format_underline("[error] toml::parse_key_value_pair: " "missing value after key-value separator '='", {{std::addressof(loc), "expected value, but got nothing"}}); @@ -1001,7 +1002,7 @@ parse_key_value_pair(location& loc) { msg = std::move(val.unwrap_err()); } - loc.iter() = first; + loc.reset(first); return err(msg); } return ok(std::make_pair(std::move(key_reg.unwrap()), @@ -1028,6 +1029,7 @@ std::string format_dotted_keys(InputIterator first, const InputIterator last) template result, region>, std::string> parse_table_key(location& loc); + // The following toml file is allowed. // ```toml // [a.b.c] # here, table `a` has element `b`. @@ -1318,14 +1320,14 @@ parse_inline_table(location& loc) return err(format_underline("[error] toml::parse_inline_table: ", {{std::addressof(loc), "the next token is not an inline table"}})); } - ++loc.iter(); + loc.advance(); // it starts from "{". it should be formatted as inline-table while(loc.iter() != loc.end()) { maybe::invoke(loc); if(loc.iter() != loc.end() && *loc.iter() == '}') { - ++loc.iter(); // skip `}` + loc.advance(); // skip `}` return ok(std::make_pair( retval, region(loc, first, loc.iter()))); } @@ -1354,7 +1356,7 @@ parse_inline_table(location& loc) maybe::invoke(loc); if(loc.iter() != loc.end() && *loc.iter() == '}') { - ++loc.iter(); // skip `}` + loc.advance(); // skip `}` return ok(std::make_pair( retval, region(loc, first, loc.iter()))); } @@ -1366,7 +1368,7 @@ parse_inline_table(location& loc) } } } - loc.iter() = first; + loc.reset(first); throw syntax_error(format_underline("[error] toml::parse_inline_table: " "inline table did not closed by `}`", {{std::addressof(loc), "should be closed"}})); @@ -1404,7 +1406,7 @@ result parse_value(location& loc) const auto msg = format_underline("[error] toml::parse_value: " "unknown token appeared", {{std::addressof(loc), "unknown"}}); - loc.iter() = first; + loc.reset(first); return err(msg); } @@ -1540,12 +1542,12 @@ result parse_ml_table(location& loc) const auto before = loc.iter(); if(const auto tmp = parse_array_table_key(loc)) // next table found { - loc.iter() = before; + loc.reset(before); return ok(tab); } if(const auto tmp = parse_table_key(loc)) // next table found { - loc.iter() = before; + loc.reset(before); return ok(tab); } @@ -1585,7 +1587,7 @@ result parse_ml_table(location& loc) const auto msg = format_underline("[error] toml::parse_table: " "invalid line format", {{std::addressof(loc), concat_to_string( "expected newline, but got '", show_char(*loc.iter()), "'.")}}); - loc.iter() = before; + loc.reset(before); return err(msg); } @@ -1688,7 +1690,7 @@ inline table parse(std::istream& is, std::string fname = "unknown file") std::memcpy(BOM.data(), loc.source()->data(), 3); if(BOM[0] == 0xEF && BOM[1] == 0xBB && BOM[2] == 0xBF) { - loc.iter() += 3; // BOM found. skip. + loc.advance(3); // BOM found. skip. } } diff --git a/toml/region.hpp b/toml/region.hpp index 7f3f194..57c1ab3 100644 --- a/toml/region.hpp +++ b/toml/region.hpp @@ -71,7 +71,7 @@ struct location final : public region_base "container should be randomly accessible"); location(std::string name, Container cont) - : source_(std::make_shared(std::move(cont))), + : source_(std::make_shared(std::move(cont))), line_number_(0), source_name_(std::move(name)), iter_(source_->cbegin()) {} location(const location&) = default; @@ -82,18 +82,54 @@ struct location final : public region_base bool is_ok() const noexcept override {return static_cast(source_);} - const_iterator& iter() noexcept {return iter_;} - const_iterator iter() const noexcept {return iter_;} + // this const prohibits codes like `++(loc.iter())`. + const const_iterator iter() const noexcept {return iter_;} - const_iterator begin() const noexcept {return source_->cbegin();} - const_iterator end() const noexcept {return source_->cend();} + const_iterator begin() const noexcept {return source_->cbegin();} + const_iterator end() const noexcept {return source_->cend();} + + // XXX At first, `location::line_num()` is implemented using `std::count` to + // count a number of '\n'. But with a long toml file (typically, 10k lines), + // it becomes intolerably slow because each time it generates error messages, + // it counts '\n' from thousands of characters. To workaround it, I decided + // to introduce `location::line_number_` member variable and synchronize it + // to the location changes the point to look. So an overload of `iter()` + // which returns mutable reference is removed and `advance()`, `retrace()` + // and `reset()` is added. + void advance(std::size_t n = 1) noexcept + { + this->line_number_ += std::count(this->iter_, this->iter_ + n, '\n'); + this->iter_ += n; + return; + } + void retrace(std::size_t n = 1) noexcept + { + this->line_number_ -= std::count(this->iter_ - n, this->iter_, '\n'); + this->iter_ -= n; + return; + } + void reset(const_iterator rollback) noexcept + { + // since c++11, std::distance works in both ways and returns a negative + // value if `first` is ahead from `last`. + if(0 <= std::distance(rollback, this->iter_)) // rollback < iter + { + this->line_number_ -= std::count(rollback, this->iter_, '\n'); + } + else // iter < rollback [[unlikely]] + { + this->line_number_ += std::count(this->iter_, rollback, '\n'); + } + this->iter_ = rollback; + return; + } std::string str() const override {return make_string(1, *this->iter());} std::string name() const override {return source_name_;} std::string line_num() const override { - return std::to_string(1+std::count(this->begin(), this->iter(), '\n')); + return std::to_string(this->line_number_); } std::string line() const override @@ -132,6 +168,7 @@ struct location final : public region_base private: source_ptr source_; + std::size_t line_number_; std::string source_name_; const_iterator iter_; };