mirror of
https://github.com/ToruNiina/toml11.git
synced 2024-11-09 22:30:07 +00:00
Merge branch 'master' into allow-deeper-table-before
This commit is contained in:
commit
d90ffb63c6
@ -124,9 +124,9 @@ using lex_escape_unicode_short = sequence<character<'u'>,
|
||||
using lex_escape_unicode_long = sequence<character<'U'>,
|
||||
repeat<lex_hex_dig, exactly<8>>>;
|
||||
using lex_escape_seq_char = either<character<'"'>, character<'\\'>,
|
||||
character<'/'>, character<'b'>,
|
||||
character<'f'>, character<'n'>,
|
||||
character<'r'>, character<'t'>,
|
||||
character<'b'>, character<'f'>,
|
||||
character<'n'>, character<'r'>,
|
||||
character<'t'>,
|
||||
lex_escape_unicode_short,
|
||||
lex_escape_unicode_long
|
||||
>;
|
||||
|
109
toml/parser.hpp
109
toml/parser.hpp
@ -226,8 +226,9 @@ parse_floating(location<Container>& loc)
|
||||
"the next token is not a float"));
|
||||
}
|
||||
|
||||
template<typename Container>
|
||||
std::string read_utf8_codepoint(const region<Container>& reg)
|
||||
template<typename Container, typename Container2>
|
||||
std::string read_utf8_codepoint(const region<Container>& reg,
|
||||
/* for err msg */ const location<Container2>& loc)
|
||||
{
|
||||
const auto str = reg.str().substr(1);
|
||||
std::uint_least32_t codepoint;
|
||||
@ -247,20 +248,27 @@ std::string read_utf8_codepoint(const region<Container>& reg)
|
||||
}
|
||||
else if(codepoint < 0x10000) // U+0800...U+FFFF
|
||||
{
|
||||
if(0xD800 <= codepoint && codepoint <= 0xDFFF)
|
||||
{
|
||||
std::cerr << format_underline("[warning] "
|
||||
"toml::read_utf8_codepoint: codepoints in the range "
|
||||
"[0xD800, 0xDFFF] are not valid UTF-8.",
|
||||
loc, "not a valid UTF-8 codepoint") << std::endl;
|
||||
}
|
||||
assert(codepoint < 0xD800 || 0xDFFF < codepoint);
|
||||
// 1110yyyy 10yxxxxx 10xxxxxx
|
||||
character += static_cast<unsigned char>(0xE0| codepoint >> 12);
|
||||
character += static_cast<unsigned char>(0x80|(codepoint >> 6 & 0x3F));
|
||||
character += static_cast<unsigned char>(0x80|(codepoint & 0x3F));
|
||||
}
|
||||
else if(codepoint < 0x200000) // U+10000 ... U+1FFFFF
|
||||
else if(codepoint < 0x200000) // U+010000 ... U+1FFFFF
|
||||
{
|
||||
if(0x10FFFF < codepoint) // out of Unicode region
|
||||
{
|
||||
std::cerr << format_underline(concat_to_string("[warning] "
|
||||
"input codepoint (", str, ") is too large to decode as "
|
||||
"a unicode character. The result may not be able to render "
|
||||
"to your screen."), reg, "should be in [0x00..0x10FFFF]")
|
||||
<< std::endl;
|
||||
std::cerr << format_underline("[error] "
|
||||
"toml::read_utf8_codepoint: input codepoint is too large to "
|
||||
"decode as a unicode character.", loc,
|
||||
"should be in [0x00..0x10FFFF]") << std::endl;
|
||||
}
|
||||
// 11110yyy 10yyxxxx 10xxxxxx 10xxxxxx
|
||||
character += static_cast<unsigned char>(0xF0| codepoint >> 18);
|
||||
@ -300,7 +308,7 @@ result<std::string, std::string> parse_escape_sequence(location<Container>& loc)
|
||||
{
|
||||
if(const auto token = lex_escape_unicode_short::invoke(loc))
|
||||
{
|
||||
return ok(read_utf8_codepoint(token.unwrap()));
|
||||
return ok(read_utf8_codepoint(token.unwrap(), loc));
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -313,7 +321,7 @@ result<std::string, std::string> parse_escape_sequence(location<Container>& loc)
|
||||
{
|
||||
if(const auto token = lex_escape_unicode_long::invoke(loc))
|
||||
{
|
||||
return ok(read_utf8_codepoint(token.unwrap()));
|
||||
return ok(read_utf8_codepoint(token.unwrap(), loc));
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -868,16 +876,39 @@ parse_array(location<Container>& loc)
|
||||
{
|
||||
if(!retval.empty() && retval.front().type() != val.as_ok().type())
|
||||
{
|
||||
throw syntax_error(format_underline(
|
||||
"[error] toml::parse_array: type of elements should be the "
|
||||
"same each other.", region<Container>(loc, first, loc.iter()),
|
||||
"inhomogeneous types"));
|
||||
auto array_start_loc = loc;
|
||||
array_start_loc.iter() = first;
|
||||
|
||||
throw syntax_error(format_underline("[error] toml::parse_array: "
|
||||
"type of elements should be the same each other.",
|
||||
std::vector<std::pair<region_base const*, std::string>>{
|
||||
std::make_pair(
|
||||
std::addressof(array_start_loc),
|
||||
std::string("array starts here")
|
||||
),
|
||||
std::make_pair(
|
||||
std::addressof(get_region(retval.front())),
|
||||
std::string("value has type ") +
|
||||
stringize(retval.front().type())
|
||||
),
|
||||
std::make_pair(
|
||||
std::addressof(get_region(val.unwrap())),
|
||||
std::string("value has different type, ") +
|
||||
stringize(val.unwrap().type())
|
||||
)
|
||||
}));
|
||||
}
|
||||
retval.push_back(std::move(val.unwrap()));
|
||||
}
|
||||
else
|
||||
{
|
||||
return err(val.unwrap_err());
|
||||
auto array_start_loc = loc;
|
||||
array_start_loc.iter() = first;
|
||||
|
||||
throw syntax_error(format_underline("[error] toml::parse_array: "
|
||||
"value having invalid format appeared in an array",
|
||||
array_start_loc, "array starts here",
|
||||
loc, "it is not a valid value."));
|
||||
}
|
||||
|
||||
using lex_array_separator = sequence<maybe<lex_ws>, character<','>>;
|
||||
@ -893,8 +924,12 @@ parse_array(location<Container>& loc)
|
||||
}
|
||||
else
|
||||
{
|
||||
auto array_start_loc = loc;
|
||||
array_start_loc.iter() = first;
|
||||
|
||||
throw syntax_error(format_underline("[error] toml::parse_array:"
|
||||
" missing array separator `,`", loc, "should be `,`"));
|
||||
" missing array separator `,` after a value",
|
||||
array_start_loc, "array starts here", loc, "should be `,`"));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -952,6 +987,7 @@ parse_key_value_pair(location<Container>& loc)
|
||||
{
|
||||
std::string msg;
|
||||
loc.iter() = after_kvsp;
|
||||
// check there is something not a comment/whitespace after `=`
|
||||
if(sequence<maybe<lex_ws>, maybe<lex_comment>, lex_newline>::invoke(loc))
|
||||
{
|
||||
loc.iter() = after_kvsp;
|
||||
@ -959,10 +995,9 @@ parse_key_value_pair(location<Container>& loc)
|
||||
"missing value after key-value separator '='", loc,
|
||||
"expected value, but got nothing");
|
||||
}
|
||||
else
|
||||
else // there is something not a comment/whitespace, so invalid format.
|
||||
{
|
||||
msg = format_underline("[error] toml::parse_key_value_pair: "
|
||||
"invalid value format", loc, val.unwrap_err());
|
||||
msg = std::move(val.unwrap_err());
|
||||
}
|
||||
loc.iter() = first;
|
||||
return err(msg);
|
||||
@ -1193,7 +1228,7 @@ insert_nested_key(table& root, const toml::value& v,
|
||||
"[error] toml::insert_value: value (\"",
|
||||
format_dotted_keys(first, last), "\") already exists."),
|
||||
get_region(tab->at(k)), "value already exists here",
|
||||
get_region(v), "value inserted twice"));
|
||||
get_region(v), "value defined twice"));
|
||||
}
|
||||
}
|
||||
tab->insert(std::make_pair(k, v));
|
||||
@ -1376,6 +1411,20 @@ parse_table_key(location<Container>& loc)
|
||||
throw internal_error(format_underline("[error] "
|
||||
"toml::parse_table_key: no `]`", inner_loc, "should be `]`"));
|
||||
}
|
||||
|
||||
// after [table.key], newline or EOF(empty table) requried.
|
||||
if(loc.iter() != loc.end())
|
||||
{
|
||||
using lex_newline_after_table_key =
|
||||
sequence<maybe<lex_ws>, maybe<lex_comment>, lex_newline>;
|
||||
const auto nl = lex_newline_after_table_key::invoke(loc);
|
||||
if(!nl)
|
||||
{
|
||||
throw syntax_error(format_underline("[error] "
|
||||
"toml::parse_table_key: newline required after [table.key]",
|
||||
loc, "expected newline"));
|
||||
}
|
||||
}
|
||||
return ok(std::make_pair(keys.unwrap().first, token.unwrap()));
|
||||
}
|
||||
else
|
||||
@ -1414,6 +1463,20 @@ parse_array_table_key(location<Container>& loc)
|
||||
throw internal_error(format_underline("[error] "
|
||||
"toml::parse_table_key: no `]]`", inner_loc, "should be `]]`"));
|
||||
}
|
||||
|
||||
// after [[table.key]], newline or EOF(empty table) requried.
|
||||
if(loc.iter() != loc.end())
|
||||
{
|
||||
using lex_newline_after_table_key =
|
||||
sequence<maybe<lex_ws>, maybe<lex_comment>, lex_newline>;
|
||||
const auto nl = lex_newline_after_table_key::invoke(loc);
|
||||
if(!nl)
|
||||
{
|
||||
throw syntax_error(format_underline("[error] "
|
||||
"toml::parse_array_table_key: newline required after "
|
||||
"[[table.key]]", loc, "expected newline"));
|
||||
}
|
||||
}
|
||||
return ok(std::make_pair(keys.unwrap().first, token.unwrap()));
|
||||
}
|
||||
else
|
||||
@ -1429,7 +1492,7 @@ result<table, std::string> parse_ml_table(location<Container>& loc)
|
||||
const auto first = loc.iter();
|
||||
if(first == loc.end())
|
||||
{
|
||||
return err(std::string("toml::parse_ml_table: input is empty"));
|
||||
return ok(toml::table{});
|
||||
}
|
||||
|
||||
// XXX at lest one newline is needed.
|
||||
@ -1508,11 +1571,11 @@ result<table, std::string> parse_toml_file(location<Container>& loc)
|
||||
const auto first = loc.iter();
|
||||
if(first == loc.end())
|
||||
{
|
||||
return err(std::string("toml::detail::parse_toml_file: input is empty"));
|
||||
return ok(toml::table{});
|
||||
}
|
||||
|
||||
table data;
|
||||
/* root object is also table, but without [tablename] */
|
||||
// root object is also a table, but without [tablename]
|
||||
if(auto tab = parse_ml_table(loc))
|
||||
{
|
||||
data = std::move(tab.unwrap());
|
||||
|
259
toml/region.hpp
259
toml/region.hpp
@ -28,44 +28,6 @@ inline std::string make_string(std::size_t len, char c)
|
||||
return std::string(len, c);
|
||||
}
|
||||
|
||||
// location in a container, normally in a file content.
|
||||
// shared_ptr points the resource that the iter points.
|
||||
// it can be used not only for resource handling, but also error message.
|
||||
template<typename Container>
|
||||
struct location
|
||||
{
|
||||
static_assert(std::is_same<char, typename Container::value_type>::value,"");
|
||||
using const_iterator = typename Container::const_iterator;
|
||||
using source_ptr = std::shared_ptr<const Container>;
|
||||
|
||||
location(std::string name, Container cont)
|
||||
: source_(std::make_shared<Container>(std::move(cont))),
|
||||
source_name_(std::move(name)), iter_(source_->cbegin())
|
||||
{}
|
||||
location(const location&) = default;
|
||||
location(location&&) = default;
|
||||
location& operator=(const location&) = default;
|
||||
location& operator=(location&&) = default;
|
||||
~location() = default;
|
||||
|
||||
const_iterator& iter() noexcept {return iter_;}
|
||||
const_iterator iter() const noexcept {return iter_;}
|
||||
|
||||
const_iterator begin() const noexcept {return source_->cbegin();}
|
||||
const_iterator end() const noexcept {return source_->cend();}
|
||||
|
||||
source_ptr const& source() const& noexcept {return source_;}
|
||||
source_ptr&& source() && noexcept {return std::move(source_);}
|
||||
|
||||
std::string const& name() const noexcept {return source_name_;}
|
||||
|
||||
private:
|
||||
|
||||
source_ptr source_;
|
||||
std::string source_name_;
|
||||
const_iterator iter_;
|
||||
};
|
||||
|
||||
// region in a container, normally in a file content.
|
||||
// shared_ptr points the resource that the iter points.
|
||||
// combinators returns this.
|
||||
@ -86,12 +48,89 @@ struct region_base
|
||||
virtual std::string line() const {return std::string("unknown line");}
|
||||
virtual std::string line_num() const {return std::string("?");}
|
||||
|
||||
|
||||
virtual std::size_t before() const noexcept {return 0;}
|
||||
virtual std::size_t size() const noexcept {return 0;}
|
||||
virtual std::size_t after() const noexcept {return 0;}
|
||||
};
|
||||
|
||||
// location in a container, normally in a file content.
|
||||
// shared_ptr points the resource that the iter points.
|
||||
// it can be used not only for resource handling, but also error message.
|
||||
//
|
||||
// it can be considered as a region that contains only one character.
|
||||
template<typename Container>
|
||||
struct location final : public region_base
|
||||
{
|
||||
static_assert(std::is_same<char, typename Container::value_type>::value,"");
|
||||
using const_iterator = typename Container::const_iterator;
|
||||
using source_ptr = std::shared_ptr<const Container>;
|
||||
|
||||
location(std::string name, Container cont)
|
||||
: source_(std::make_shared<Container>(std::move(cont))),
|
||||
source_name_(std::move(name)), iter_(source_->cbegin())
|
||||
{}
|
||||
location(const location&) = default;
|
||||
location(location&&) = default;
|
||||
location& operator=(const location&) = default;
|
||||
location& operator=(location&&) = default;
|
||||
~location() = default;
|
||||
|
||||
bool is_ok() const noexcept override {return static_cast<bool>(source_);}
|
||||
|
||||
const_iterator& iter() noexcept {return iter_;}
|
||||
const_iterator iter() const noexcept {return iter_;}
|
||||
|
||||
const_iterator begin() const noexcept {return source_->cbegin();}
|
||||
const_iterator end() const noexcept {return source_->cend();}
|
||||
|
||||
std::string str() const override {return make_string(1, *this->iter());}
|
||||
std::string name() const override {return source_name_;}
|
||||
|
||||
std::string line_num() const override
|
||||
{
|
||||
return std::to_string(1+std::count(this->begin(), this->iter(), '\n'));
|
||||
}
|
||||
|
||||
std::string line() const override
|
||||
{
|
||||
return make_string(this->line_begin(), this->line_end());
|
||||
}
|
||||
|
||||
const_iterator line_begin() const noexcept
|
||||
{
|
||||
using reverse_iterator = std::reverse_iterator<const_iterator>;
|
||||
return std::find(reverse_iterator(this->iter()),
|
||||
reverse_iterator(this->begin()), '\n').base();
|
||||
}
|
||||
const_iterator line_end() const noexcept
|
||||
{
|
||||
return std::find(this->iter(), this->end(), '\n');
|
||||
}
|
||||
|
||||
// location is always points a character. so the size is 1.
|
||||
std::size_t size() const noexcept override
|
||||
{
|
||||
return 1u;
|
||||
}
|
||||
std::size_t before() const noexcept override
|
||||
{
|
||||
return std::distance(this->line_begin(), this->iter());
|
||||
}
|
||||
std::size_t after() const noexcept override
|
||||
{
|
||||
return std::distance(this->iter(), this->line_end());
|
||||
}
|
||||
|
||||
source_ptr const& source() const& noexcept {return source_;}
|
||||
source_ptr&& source() && noexcept {return std::move(source_);}
|
||||
|
||||
private:
|
||||
|
||||
source_ptr source_;
|
||||
std::string source_name_;
|
||||
const_iterator iter_;
|
||||
};
|
||||
|
||||
template<typename Container>
|
||||
struct region final : public region_base
|
||||
{
|
||||
@ -225,7 +264,19 @@ inline std::string format_underline(const std::string& message,
|
||||
retval += make_string(line_number.size() + 1, ' ');
|
||||
retval += " | ";
|
||||
retval += make_string(reg.before(), ' ');
|
||||
retval += make_string(reg.size(), '~');
|
||||
if(reg.size() == 1)
|
||||
{
|
||||
// invalid
|
||||
// ^------
|
||||
retval += '^';
|
||||
retval += make_string(reg.after(), '-');
|
||||
}
|
||||
else
|
||||
{
|
||||
// invalid
|
||||
// ~~~~~~~
|
||||
retval += make_string(reg.size(), '~');
|
||||
}
|
||||
retval += ' ';
|
||||
retval += comment_for_underline;
|
||||
if(helps.size() != 0)
|
||||
@ -270,7 +321,19 @@ inline std::string format_underline(const std::string& message,
|
||||
retval << make_string(line_num_width + 1, ' ');
|
||||
retval << " | ";
|
||||
retval << make_string(reg1.before(), ' ');
|
||||
retval << make_string(reg1.size(), '~');
|
||||
if(reg1.size() == 1)
|
||||
{
|
||||
// invalid
|
||||
// ^------
|
||||
retval << '^';
|
||||
retval << make_string(reg1.after(), '-');
|
||||
}
|
||||
else
|
||||
{
|
||||
// invalid
|
||||
// ~~~~~~~
|
||||
retval << make_string(reg1.size(), '~');
|
||||
}
|
||||
retval << ' ';
|
||||
retval << comment_for_underline1 << newline;
|
||||
// ---------------------------------------
|
||||
@ -287,7 +350,19 @@ inline std::string format_underline(const std::string& message,
|
||||
retval << make_string(line_num_width + 1, ' ');
|
||||
retval << " | ";
|
||||
retval << make_string(reg2.before(), ' ');
|
||||
retval << make_string(reg2.size(), '~');
|
||||
if(reg2.size() == 1)
|
||||
{
|
||||
// invalid
|
||||
// ^------
|
||||
retval << '^';
|
||||
retval << make_string(reg2.after(), '-');
|
||||
}
|
||||
else
|
||||
{
|
||||
// invalid
|
||||
// ~~~~~~~
|
||||
retval << make_string(reg2.size(), '~');
|
||||
}
|
||||
retval << ' ';
|
||||
retval << comment_for_underline2;
|
||||
if(helps.size() != 0)
|
||||
@ -305,62 +380,84 @@ inline std::string format_underline(const std::string& message,
|
||||
return retval.str();
|
||||
}
|
||||
|
||||
|
||||
// to show a better error message.
|
||||
template<typename Container>
|
||||
std::string
|
||||
format_underline(const std::string& message, const location<Container>& loc,
|
||||
const std::string& comment_for_underline,
|
||||
std::vector<std::string> helps = {})
|
||||
inline std::string format_underline(const std::string& message,
|
||||
std::vector<std::pair<region_base const*, std::string>> reg_com,
|
||||
std::vector<std::string> helps = {})
|
||||
{
|
||||
assert(!reg_com.empty());
|
||||
|
||||
#ifdef _WIN32
|
||||
const auto newline = "\r\n";
|
||||
#else
|
||||
const char newline = '\n';
|
||||
#endif
|
||||
using const_iterator = typename location<Container>::const_iterator;
|
||||
using reverse_iterator = std::reverse_iterator<const_iterator>;
|
||||
const auto line_begin = std::find(reverse_iterator(loc.iter()),
|
||||
reverse_iterator(loc.begin()),
|
||||
'\n').base();
|
||||
const auto line_end = std::find(loc.iter(), loc.end(), '\n');
|
||||
|
||||
const auto line_number = std::to_string(
|
||||
1 + std::count(loc.begin(), loc.iter(), '\n'));
|
||||
const auto line_num_width = std::max_element(reg_com.begin(), reg_com.end(),
|
||||
[](std::pair<region_base const*, std::string> const& lhs,
|
||||
std::pair<region_base const*, std::string> const& rhs)
|
||||
{
|
||||
return lhs.first->line_num().size() < rhs.first->line_num().size();
|
||||
}
|
||||
)->first->line_num().size();
|
||||
|
||||
std::ostringstream retval;
|
||||
retval << message << newline;
|
||||
|
||||
for(std::size_t i=0; i<reg_com.size(); ++i)
|
||||
{
|
||||
if(i!=0 && reg_com.at(i-1).first->name() == reg_com.at(i).first->name())
|
||||
{
|
||||
retval << " ..." << newline;
|
||||
}
|
||||
else
|
||||
{
|
||||
retval << " --> " << reg_com.at(i).first->name() << newline;
|
||||
}
|
||||
|
||||
const region_base* const reg = reg_com.at(i).first;
|
||||
const std::string& comment = reg_com.at(i).second;
|
||||
|
||||
|
||||
retval << ' ' << std::setw(line_num_width) << reg->line_num();
|
||||
retval << " | " << reg->line() << newline;
|
||||
retval << make_string(line_num_width + 1, ' ');
|
||||
retval << " | " << make_string(reg->before(), ' ');
|
||||
|
||||
if(reg->size() == 1)
|
||||
{
|
||||
// invalid
|
||||
// ^------
|
||||
retval << '^';
|
||||
retval << make_string(reg->after(), '-');
|
||||
}
|
||||
else
|
||||
{
|
||||
// invalid
|
||||
// ~~~~~~~
|
||||
retval << make_string(reg->size(), '~');
|
||||
}
|
||||
|
||||
retval << ' ';
|
||||
retval << comment << newline;
|
||||
}
|
||||
|
||||
std::string retval;
|
||||
retval += message;
|
||||
retval += newline;
|
||||
retval += " --> ";
|
||||
retval += loc.name();
|
||||
retval += newline;
|
||||
retval += ' ';
|
||||
retval += line_number;
|
||||
retval += " | ";
|
||||
retval += make_string(line_begin, line_end);
|
||||
retval += newline;
|
||||
retval += make_string(line_number.size() + 1, ' ');
|
||||
retval += " | ";
|
||||
retval += make_string(std::distance(line_begin, loc.iter()),' ');
|
||||
retval += '^';
|
||||
retval += make_string(std::distance(loc.iter(), line_end), '-');
|
||||
retval += ' ';
|
||||
retval += comment_for_underline;
|
||||
if(helps.size() != 0)
|
||||
{
|
||||
retval += newline;
|
||||
retval += make_string(line_number.size() + 1, ' ');
|
||||
retval += " | ";
|
||||
retval << newline;
|
||||
retval << make_string(line_num_width + 1, ' ');
|
||||
retval << " | ";
|
||||
for(const auto help : helps)
|
||||
{
|
||||
retval += newline;
|
||||
retval += "Hint: ";
|
||||
retval += help;
|
||||
retval << newline;
|
||||
retval << "Hint: ";
|
||||
retval << help;
|
||||
}
|
||||
}
|
||||
return retval;
|
||||
return retval.str();
|
||||
}
|
||||
|
||||
|
||||
} // detail
|
||||
} // toml
|
||||
#endif// TOML11_REGION_H
|
||||
|
Loading…
Reference in New Issue
Block a user