Merge branch 'master' into allow-deeper-table-before

This commit is contained in:
ToruNiina 2019-03-05 23:27:11 +09:00
commit d90ffb63c6
3 changed files with 267 additions and 107 deletions

View File

@ -124,9 +124,9 @@ using lex_escape_unicode_short = sequence<character<'u'>,
using lex_escape_unicode_long = sequence<character<'U'>,
repeat<lex_hex_dig, exactly<8>>>;
using lex_escape_seq_char = either<character<'"'>, character<'\\'>,
character<'/'>, character<'b'>,
character<'f'>, character<'n'>,
character<'r'>, character<'t'>,
character<'b'>, character<'f'>,
character<'n'>, character<'r'>,
character<'t'>,
lex_escape_unicode_short,
lex_escape_unicode_long
>;

View File

@ -226,8 +226,9 @@ parse_floating(location<Container>& loc)
"the next token is not a float"));
}
template<typename Container>
std::string read_utf8_codepoint(const region<Container>& reg)
template<typename Container, typename Container2>
std::string read_utf8_codepoint(const region<Container>& reg,
/* for err msg */ const location<Container2>& loc)
{
const auto str = reg.str().substr(1);
std::uint_least32_t codepoint;
@ -247,20 +248,27 @@ std::string read_utf8_codepoint(const region<Container>& reg)
}
else if(codepoint < 0x10000) // U+0800...U+FFFF
{
if(0xD800 <= codepoint && codepoint <= 0xDFFF)
{
std::cerr << format_underline("[warning] "
"toml::read_utf8_codepoint: codepoints in the range "
"[0xD800, 0xDFFF] are not valid UTF-8.",
loc, "not a valid UTF-8 codepoint") << std::endl;
}
assert(codepoint < 0xD800 || 0xDFFF < codepoint);
// 1110yyyy 10yxxxxx 10xxxxxx
character += static_cast<unsigned char>(0xE0| codepoint >> 12);
character += static_cast<unsigned char>(0x80|(codepoint >> 6 & 0x3F));
character += static_cast<unsigned char>(0x80|(codepoint & 0x3F));
}
else if(codepoint < 0x200000) // U+10000 ... U+1FFFFF
else if(codepoint < 0x200000) // U+010000 ... U+1FFFFF
{
if(0x10FFFF < codepoint) // out of Unicode region
{
std::cerr << format_underline(concat_to_string("[warning] "
"input codepoint (", str, ") is too large to decode as "
"a unicode character. The result may not be able to render "
"to your screen."), reg, "should be in [0x00..0x10FFFF]")
<< std::endl;
std::cerr << format_underline("[error] "
"toml::read_utf8_codepoint: input codepoint is too large to "
"decode as a unicode character.", loc,
"should be in [0x00..0x10FFFF]") << std::endl;
}
// 11110yyy 10yyxxxx 10xxxxxx 10xxxxxx
character += static_cast<unsigned char>(0xF0| codepoint >> 18);
@ -300,7 +308,7 @@ result<std::string, std::string> parse_escape_sequence(location<Container>& loc)
{
if(const auto token = lex_escape_unicode_short::invoke(loc))
{
return ok(read_utf8_codepoint(token.unwrap()));
return ok(read_utf8_codepoint(token.unwrap(), loc));
}
else
{
@ -313,7 +321,7 @@ result<std::string, std::string> parse_escape_sequence(location<Container>& loc)
{
if(const auto token = lex_escape_unicode_long::invoke(loc))
{
return ok(read_utf8_codepoint(token.unwrap()));
return ok(read_utf8_codepoint(token.unwrap(), loc));
}
else
{
@ -868,16 +876,39 @@ parse_array(location<Container>& loc)
{
if(!retval.empty() && retval.front().type() != val.as_ok().type())
{
throw syntax_error(format_underline(
"[error] toml::parse_array: type of elements should be the "
"same each other.", region<Container>(loc, first, loc.iter()),
"inhomogeneous types"));
auto array_start_loc = loc;
array_start_loc.iter() = first;
throw syntax_error(format_underline("[error] toml::parse_array: "
"type of elements should be the same each other.",
std::vector<std::pair<region_base const*, std::string>>{
std::make_pair(
std::addressof(array_start_loc),
std::string("array starts here")
),
std::make_pair(
std::addressof(get_region(retval.front())),
std::string("value has type ") +
stringize(retval.front().type())
),
std::make_pair(
std::addressof(get_region(val.unwrap())),
std::string("value has different type, ") +
stringize(val.unwrap().type())
)
}));
}
retval.push_back(std::move(val.unwrap()));
}
else
{
return err(val.unwrap_err());
auto array_start_loc = loc;
array_start_loc.iter() = first;
throw syntax_error(format_underline("[error] toml::parse_array: "
"value having invalid format appeared in an array",
array_start_loc, "array starts here",
loc, "it is not a valid value."));
}
using lex_array_separator = sequence<maybe<lex_ws>, character<','>>;
@ -893,8 +924,12 @@ parse_array(location<Container>& loc)
}
else
{
auto array_start_loc = loc;
array_start_loc.iter() = first;
throw syntax_error(format_underline("[error] toml::parse_array:"
" missing array separator `,`", loc, "should be `,`"));
" missing array separator `,` after a value",
array_start_loc, "array starts here", loc, "should be `,`"));
}
}
}
@ -952,6 +987,7 @@ parse_key_value_pair(location<Container>& loc)
{
std::string msg;
loc.iter() = after_kvsp;
// check there is something not a comment/whitespace after `=`
if(sequence<maybe<lex_ws>, maybe<lex_comment>, lex_newline>::invoke(loc))
{
loc.iter() = after_kvsp;
@ -959,10 +995,9 @@ parse_key_value_pair(location<Container>& loc)
"missing value after key-value separator '='", loc,
"expected value, but got nothing");
}
else
else // there is something not a comment/whitespace, so invalid format.
{
msg = format_underline("[error] toml::parse_key_value_pair: "
"invalid value format", loc, val.unwrap_err());
msg = std::move(val.unwrap_err());
}
loc.iter() = first;
return err(msg);
@ -1193,7 +1228,7 @@ insert_nested_key(table& root, const toml::value& v,
"[error] toml::insert_value: value (\"",
format_dotted_keys(first, last), "\") already exists."),
get_region(tab->at(k)), "value already exists here",
get_region(v), "value inserted twice"));
get_region(v), "value defined twice"));
}
}
tab->insert(std::make_pair(k, v));
@ -1376,6 +1411,20 @@ parse_table_key(location<Container>& loc)
throw internal_error(format_underline("[error] "
"toml::parse_table_key: no `]`", inner_loc, "should be `]`"));
}
// after [table.key], newline or EOF(empty table) requried.
if(loc.iter() != loc.end())
{
using lex_newline_after_table_key =
sequence<maybe<lex_ws>, maybe<lex_comment>, lex_newline>;
const auto nl = lex_newline_after_table_key::invoke(loc);
if(!nl)
{
throw syntax_error(format_underline("[error] "
"toml::parse_table_key: newline required after [table.key]",
loc, "expected newline"));
}
}
return ok(std::make_pair(keys.unwrap().first, token.unwrap()));
}
else
@ -1414,6 +1463,20 @@ parse_array_table_key(location<Container>& loc)
throw internal_error(format_underline("[error] "
"toml::parse_table_key: no `]]`", inner_loc, "should be `]]`"));
}
// after [[table.key]], newline or EOF(empty table) requried.
if(loc.iter() != loc.end())
{
using lex_newline_after_table_key =
sequence<maybe<lex_ws>, maybe<lex_comment>, lex_newline>;
const auto nl = lex_newline_after_table_key::invoke(loc);
if(!nl)
{
throw syntax_error(format_underline("[error] "
"toml::parse_array_table_key: newline required after "
"[[table.key]]", loc, "expected newline"));
}
}
return ok(std::make_pair(keys.unwrap().first, token.unwrap()));
}
else
@ -1429,7 +1492,7 @@ result<table, std::string> parse_ml_table(location<Container>& loc)
const auto first = loc.iter();
if(first == loc.end())
{
return err(std::string("toml::parse_ml_table: input is empty"));
return ok(toml::table{});
}
// XXX at lest one newline is needed.
@ -1508,11 +1571,11 @@ result<table, std::string> parse_toml_file(location<Container>& loc)
const auto first = loc.iter();
if(first == loc.end())
{
return err(std::string("toml::detail::parse_toml_file: input is empty"));
return ok(toml::table{});
}
table data;
/* root object is also table, but without [tablename] */
// root object is also a table, but without [tablename]
if(auto tab = parse_ml_table(loc))
{
data = std::move(tab.unwrap());

View File

@ -28,44 +28,6 @@ inline std::string make_string(std::size_t len, char c)
return std::string(len, c);
}
// location in a container, normally in a file content.
// shared_ptr points the resource that the iter points.
// it can be used not only for resource handling, but also error message.
template<typename Container>
struct location
{
static_assert(std::is_same<char, typename Container::value_type>::value,"");
using const_iterator = typename Container::const_iterator;
using source_ptr = std::shared_ptr<const Container>;
location(std::string name, Container cont)
: source_(std::make_shared<Container>(std::move(cont))),
source_name_(std::move(name)), iter_(source_->cbegin())
{}
location(const location&) = default;
location(location&&) = default;
location& operator=(const location&) = default;
location& operator=(location&&) = default;
~location() = default;
const_iterator& iter() noexcept {return iter_;}
const_iterator iter() const noexcept {return iter_;}
const_iterator begin() const noexcept {return source_->cbegin();}
const_iterator end() const noexcept {return source_->cend();}
source_ptr const& source() const& noexcept {return source_;}
source_ptr&& source() && noexcept {return std::move(source_);}
std::string const& name() const noexcept {return source_name_;}
private:
source_ptr source_;
std::string source_name_;
const_iterator iter_;
};
// region in a container, normally in a file content.
// shared_ptr points the resource that the iter points.
// combinators returns this.
@ -86,12 +48,89 @@ struct region_base
virtual std::string line() const {return std::string("unknown line");}
virtual std::string line_num() const {return std::string("?");}
virtual std::size_t before() const noexcept {return 0;}
virtual std::size_t size() const noexcept {return 0;}
virtual std::size_t after() const noexcept {return 0;}
};
// location in a container, normally in a file content.
// shared_ptr points the resource that the iter points.
// it can be used not only for resource handling, but also error message.
//
// it can be considered as a region that contains only one character.
template<typename Container>
struct location final : public region_base
{
static_assert(std::is_same<char, typename Container::value_type>::value,"");
using const_iterator = typename Container::const_iterator;
using source_ptr = std::shared_ptr<const Container>;
location(std::string name, Container cont)
: source_(std::make_shared<Container>(std::move(cont))),
source_name_(std::move(name)), iter_(source_->cbegin())
{}
location(const location&) = default;
location(location&&) = default;
location& operator=(const location&) = default;
location& operator=(location&&) = default;
~location() = default;
bool is_ok() const noexcept override {return static_cast<bool>(source_);}
const_iterator& iter() noexcept {return iter_;}
const_iterator iter() const noexcept {return iter_;}
const_iterator begin() const noexcept {return source_->cbegin();}
const_iterator end() const noexcept {return source_->cend();}
std::string str() const override {return make_string(1, *this->iter());}
std::string name() const override {return source_name_;}
std::string line_num() const override
{
return std::to_string(1+std::count(this->begin(), this->iter(), '\n'));
}
std::string line() const override
{
return make_string(this->line_begin(), this->line_end());
}
const_iterator line_begin() const noexcept
{
using reverse_iterator = std::reverse_iterator<const_iterator>;
return std::find(reverse_iterator(this->iter()),
reverse_iterator(this->begin()), '\n').base();
}
const_iterator line_end() const noexcept
{
return std::find(this->iter(), this->end(), '\n');
}
// location is always points a character. so the size is 1.
std::size_t size() const noexcept override
{
return 1u;
}
std::size_t before() const noexcept override
{
return std::distance(this->line_begin(), this->iter());
}
std::size_t after() const noexcept override
{
return std::distance(this->iter(), this->line_end());
}
source_ptr const& source() const& noexcept {return source_;}
source_ptr&& source() && noexcept {return std::move(source_);}
private:
source_ptr source_;
std::string source_name_;
const_iterator iter_;
};
template<typename Container>
struct region final : public region_base
{
@ -225,7 +264,19 @@ inline std::string format_underline(const std::string& message,
retval += make_string(line_number.size() + 1, ' ');
retval += " | ";
retval += make_string(reg.before(), ' ');
retval += make_string(reg.size(), '~');
if(reg.size() == 1)
{
// invalid
// ^------
retval += '^';
retval += make_string(reg.after(), '-');
}
else
{
// invalid
// ~~~~~~~
retval += make_string(reg.size(), '~');
}
retval += ' ';
retval += comment_for_underline;
if(helps.size() != 0)
@ -270,7 +321,19 @@ inline std::string format_underline(const std::string& message,
retval << make_string(line_num_width + 1, ' ');
retval << " | ";
retval << make_string(reg1.before(), ' ');
retval << make_string(reg1.size(), '~');
if(reg1.size() == 1)
{
// invalid
// ^------
retval << '^';
retval << make_string(reg1.after(), '-');
}
else
{
// invalid
// ~~~~~~~
retval << make_string(reg1.size(), '~');
}
retval << ' ';
retval << comment_for_underline1 << newline;
// ---------------------------------------
@ -287,7 +350,19 @@ inline std::string format_underline(const std::string& message,
retval << make_string(line_num_width + 1, ' ');
retval << " | ";
retval << make_string(reg2.before(), ' ');
retval << make_string(reg2.size(), '~');
if(reg2.size() == 1)
{
// invalid
// ^------
retval << '^';
retval << make_string(reg2.after(), '-');
}
else
{
// invalid
// ~~~~~~~
retval << make_string(reg2.size(), '~');
}
retval << ' ';
retval << comment_for_underline2;
if(helps.size() != 0)
@ -305,62 +380,84 @@ inline std::string format_underline(const std::string& message,
return retval.str();
}
// to show a better error message.
template<typename Container>
std::string
format_underline(const std::string& message, const location<Container>& loc,
const std::string& comment_for_underline,
std::vector<std::string> helps = {})
inline std::string format_underline(const std::string& message,
std::vector<std::pair<region_base const*, std::string>> reg_com,
std::vector<std::string> helps = {})
{
assert(!reg_com.empty());
#ifdef _WIN32
const auto newline = "\r\n";
#else
const char newline = '\n';
#endif
using const_iterator = typename location<Container>::const_iterator;
using reverse_iterator = std::reverse_iterator<const_iterator>;
const auto line_begin = std::find(reverse_iterator(loc.iter()),
reverse_iterator(loc.begin()),
'\n').base();
const auto line_end = std::find(loc.iter(), loc.end(), '\n');
const auto line_number = std::to_string(
1 + std::count(loc.begin(), loc.iter(), '\n'));
const auto line_num_width = std::max_element(reg_com.begin(), reg_com.end(),
[](std::pair<region_base const*, std::string> const& lhs,
std::pair<region_base const*, std::string> const& rhs)
{
return lhs.first->line_num().size() < rhs.first->line_num().size();
}
)->first->line_num().size();
std::ostringstream retval;
retval << message << newline;
for(std::size_t i=0; i<reg_com.size(); ++i)
{
if(i!=0 && reg_com.at(i-1).first->name() == reg_com.at(i).first->name())
{
retval << " ..." << newline;
}
else
{
retval << " --> " << reg_com.at(i).first->name() << newline;
}
const region_base* const reg = reg_com.at(i).first;
const std::string& comment = reg_com.at(i).second;
retval << ' ' << std::setw(line_num_width) << reg->line_num();
retval << " | " << reg->line() << newline;
retval << make_string(line_num_width + 1, ' ');
retval << " | " << make_string(reg->before(), ' ');
if(reg->size() == 1)
{
// invalid
// ^------
retval << '^';
retval << make_string(reg->after(), '-');
}
else
{
// invalid
// ~~~~~~~
retval << make_string(reg->size(), '~');
}
retval << ' ';
retval << comment << newline;
}
std::string retval;
retval += message;
retval += newline;
retval += " --> ";
retval += loc.name();
retval += newline;
retval += ' ';
retval += line_number;
retval += " | ";
retval += make_string(line_begin, line_end);
retval += newline;
retval += make_string(line_number.size() + 1, ' ');
retval += " | ";
retval += make_string(std::distance(line_begin, loc.iter()),' ');
retval += '^';
retval += make_string(std::distance(loc.iter(), line_end), '-');
retval += ' ';
retval += comment_for_underline;
if(helps.size() != 0)
{
retval += newline;
retval += make_string(line_number.size() + 1, ' ');
retval += " | ";
retval << newline;
retval << make_string(line_num_width + 1, ' ');
retval << " | ";
for(const auto help : helps)
{
retval += newline;
retval += "Hint: ";
retval += help;
retval << newline;
retval << "Hint: ";
retval << help;
}
}
return retval;
return retval.str();
}
} // detail
} // toml
#endif// TOML11_REGION_H