mirror of
https://github.com/ToruNiina/toml11.git
synced 2025-01-10 09:20:11 +00:00
🔀 Merge branch 'guess-type-error'
This commit is contained in:
commit
fd980a8c5d
18
README.md
18
README.md
@ -144,6 +144,24 @@ terminate called after throwing an instance of 'toml::syntax_error'
|
||||
| ~~~~~~~ table defined twice
|
||||
```
|
||||
|
||||
When toml11 encounters a malformed value, it tries to detect what type it is.
|
||||
Then it shows hints to fix the format. An error message while reading one of
|
||||
the malformed files in [the language agnostic test suite](https://github.com/BurntSushi/toml-test).
|
||||
is shown below.
|
||||
|
||||
```console
|
||||
what(): [error] bad time: should be HH:MM:SS.subsec
|
||||
--> ./datetime-malformed-no-secs.toml
|
||||
1 | no-secs = 1987-07-05T17:45Z
|
||||
| ^------- HH:MM:SS.subsec
|
||||
|
|
||||
Hint: OK: 1979-05-27T07:32:00, 1979-05-27 07:32:00.999999
|
||||
Hint: NG: 1979-05-27T7:32:00, 1979-05-27 7:32
|
||||
```
|
||||
|
||||
You can find other examples in a job named `output_result` on
|
||||
[CircleCI](https://circleci.com/gh/ToruNiina/toml11).
|
||||
|
||||
Since the error message generation is generally a difficult task, the current
|
||||
status is not ideal. If you encounter a weird error message, please let us know
|
||||
and contribute to improve the quality!
|
||||
|
154
toml/parser.hpp
154
toml/parser.hpp
@ -1410,41 +1410,156 @@ parse_inline_table(location<Container>& loc)
|
||||
}
|
||||
|
||||
template<typename Container>
|
||||
value_t guess_number_type(const location<Container>& l)
|
||||
result<value_t, std::string> guess_number_type(const location<Container>& l)
|
||||
{
|
||||
// This function tries to find some (common) mistakes by checking characters
|
||||
// that follows the last character of a value. But it is often difficult
|
||||
// because some non-newline characters can appear after a value. E.g.
|
||||
// spaces, tabs, commas (in an array or inline table), closing brackets
|
||||
// (of an array or inline table), comment-sign (#). Since this function
|
||||
// does not parse further, those characters are always allowed to be there.
|
||||
location<Container> loc = l;
|
||||
|
||||
if(lex_offset_date_time::invoke(loc)) {return value_t::OffsetDatetime;}
|
||||
if(lex_offset_date_time::invoke(loc)) {return ok(value_t::OffsetDatetime);}
|
||||
loc.reset(l.iter());
|
||||
|
||||
if(lex_local_date_time::invoke(loc)) {return value_t::LocalDatetime;}
|
||||
if(lex_local_date_time::invoke(loc))
|
||||
{
|
||||
// bad offset may appear after this.
|
||||
if(loc.iter() != loc.end() && (*loc.iter() == '+' || *loc.iter() == '-'
|
||||
|| *loc.iter() == 'Z' || *loc.iter() == 'z'))
|
||||
{
|
||||
return err(format_underline("[error] bad offset: should be [+-]HH:MM or Z",
|
||||
{{std::addressof(loc), "[+-]HH:MM or Z"}},
|
||||
{"OK: +09:00, -05:30", "NG: +9:00, -5:30"}));
|
||||
}
|
||||
return ok(value_t::LocalDatetime);
|
||||
}
|
||||
loc.reset(l.iter());
|
||||
|
||||
if(lex_local_date::invoke(loc)) {return value_t::LocalDate;}
|
||||
if(lex_local_date::invoke(loc))
|
||||
{
|
||||
// bad time may appear after this.
|
||||
// A space is allowed as a delimiter between local time. But there are
|
||||
// both cases in which a space becomes valid or invalid.
|
||||
// - invalid: 2019-06-16 7:00:00
|
||||
// - valid : 2019-06-16 07:00:00
|
||||
if(loc.iter() != loc.end())
|
||||
{
|
||||
const auto c = *loc.iter();
|
||||
if(c == 'T' || c == 't')
|
||||
{
|
||||
return err(format_underline("[error] bad time: should be HH:MM:SS.subsec",
|
||||
{{std::addressof(loc), "HH:MM:SS.subsec"}},
|
||||
{"OK: 1979-05-27T07:32:00, 1979-05-27 07:32:00.999999",
|
||||
"NG: 1979-05-27T7:32:00, 1979-05-27 7:32"}));
|
||||
}
|
||||
if('0' <= c && c <= '9')
|
||||
{
|
||||
return err(format_underline("[error] bad time: missing T",
|
||||
{{std::addressof(loc), "T or space required here"}},
|
||||
{"OK: 1979-05-27T07:32:00, 1979-05-27 07:32:00.999999",
|
||||
"NG: 1979-05-27T7:32:00, 1979-05-27 7:32"}));
|
||||
}
|
||||
if(c == ' ' && std::next(loc.iter()) != loc.end() &&
|
||||
('0' <= *std::next(loc.iter()) && *std::next(loc.iter())<= '9'))
|
||||
{
|
||||
loc.advance();
|
||||
return err(format_underline("[error] bad time: should be HH:MM:SS.subsec",
|
||||
{{std::addressof(loc), "HH:MM:SS.subsec"}},
|
||||
{"OK: 1979-05-27T07:32:00, 1979-05-27 07:32:00.999999",
|
||||
"NG: 1979-05-27T7:32:00, 1979-05-27 7:32"}));
|
||||
}
|
||||
}
|
||||
return ok(value_t::LocalDate);
|
||||
}
|
||||
loc.reset(l.iter());
|
||||
|
||||
if(lex_local_time::invoke(loc)) {return value_t::LocalTime;}
|
||||
if(lex_local_time::invoke(loc)) {return ok(value_t::LocalTime);}
|
||||
loc.reset(l.iter());
|
||||
|
||||
if(lex_float::invoke(loc)) {return value_t::Float;}
|
||||
if(lex_float::invoke(loc))
|
||||
{
|
||||
if(loc.iter() != loc.end() && *loc.iter() == '_')
|
||||
{
|
||||
return err(format_underline("[error] bad float: `_` should be surrounded by digits",
|
||||
{{std::addressof(loc), "here"}},
|
||||
{"OK: +1.0, -2e-2, 3.141_592_653_589, inf, nan",
|
||||
"NG: .0, 1., _1.0, 1.0_, 1_.0, 1.0__0"}));
|
||||
}
|
||||
return ok(value_t::Float);
|
||||
}
|
||||
loc.reset(l.iter());
|
||||
|
||||
return value_t::Integer;
|
||||
if(lex_integer::invoke(loc))
|
||||
{
|
||||
if(loc.iter() != loc.end())
|
||||
{
|
||||
const auto c = *loc.iter();
|
||||
if(c == '_')
|
||||
{
|
||||
return err(format_underline("[error] bad integer: `_` should be surrounded by digits",
|
||||
{{std::addressof(loc), "here"}},
|
||||
{"OK: -42, 1_000, 1_2_3_4_5, 0xC0FFEE, 0b0010, 0o755",
|
||||
"NG: 1__000, 0123"}));
|
||||
}
|
||||
if('0' <= c && c <= '9')
|
||||
{
|
||||
// leading zero. point '0'
|
||||
loc.retrace();
|
||||
return err(format_underline("[error] bad integer: leading zero",
|
||||
{{std::addressof(loc), "here"}},
|
||||
{"OK: -42, 1_000, 1_2_3_4_5, 0xC0FFEE, 0b0010, 0o755",
|
||||
"NG: 1__000, 0123"}));
|
||||
}
|
||||
if(c == ':' || c == '-')
|
||||
{
|
||||
return err(format_underline("[error] bad datetime: invalid format",
|
||||
{{std::addressof(loc), "here"}},
|
||||
{"OK: 1979-05-27T07:32:00-07:00, 1979-05-27 07:32:00.999999Z",
|
||||
"NG: 1979-05-27T7:32:00-7:00, 1979-05-27 7:32-00:30"}));
|
||||
}
|
||||
if(c == '.' || c == 'e' || c == 'E')
|
||||
{
|
||||
return err(format_underline("[error] bad float: invalid format",
|
||||
{{std::addressof(loc), "here"}},
|
||||
{"OK: +1.0, -2e-2, 3.141_592_653_589, inf, nan",
|
||||
"NG: .0, 1., _1.0, 1.0_, 1_.0, 1.0__0"}));
|
||||
}
|
||||
}
|
||||
return ok(value_t::Integer);
|
||||
}
|
||||
if(loc.iter() != loc.end() && *loc.iter() == '.')
|
||||
{
|
||||
return err(format_underline("[error] bad float: invalid format",
|
||||
{{std::addressof(loc), "integer part required before this"}},
|
||||
{"OK: +1.0, -2e-2, 3.141_592_653_589, inf, nan",
|
||||
"NG: .0, 1., _1.0, 1.0_, 1_.0, 1.0__0"}));
|
||||
}
|
||||
if(loc.iter() != loc.end() && *loc.iter() == '_')
|
||||
{
|
||||
return err(format_underline("[error] bad number: `_` should be surrounded by digits",
|
||||
{{std::addressof(loc), "`_` is not surrounded by digits"}},
|
||||
{"OK: -42, 1_000, 1_2_3_4_5, 0xC0FFEE, 0b0010, 0o755",
|
||||
"NG: 1__000, 0123"}));
|
||||
}
|
||||
return err(format_underline("[error] bad format: unknown value appeared",
|
||||
{{std::addressof(loc), "here"}}));
|
||||
}
|
||||
|
||||
template<typename Container>
|
||||
value_t guess_value_type(const location<Container>& loc)
|
||||
result<value_t, std::string> guess_value_type(const location<Container>& loc)
|
||||
{
|
||||
switch(*loc.iter())
|
||||
{
|
||||
case '"' : {return value_t::String; }
|
||||
case '\'': {return value_t::String; }
|
||||
case 't' : {return value_t::Boolean;}
|
||||
case 'f' : {return value_t::Boolean;}
|
||||
case '[' : {return value_t::Array; }
|
||||
case '{' : {return value_t::Table; }
|
||||
case 'i' : {return value_t::Float; } // inf.
|
||||
case 'n' : {return value_t::Float; } // nan.
|
||||
case '"' : {return ok(value_t::String); }
|
||||
case '\'': {return ok(value_t::String); }
|
||||
case 't' : {return ok(value_t::Boolean);}
|
||||
case 'f' : {return ok(value_t::Boolean);}
|
||||
case '[' : {return ok(value_t::Array); }
|
||||
case '{' : {return ok(value_t::Table); }
|
||||
case 'i' : {return ok(value_t::Float); } // inf.
|
||||
case 'n' : {return ok(value_t::Float); } // nan.
|
||||
default : {return guess_number_type(loc);}
|
||||
}
|
||||
}
|
||||
@ -1459,7 +1574,12 @@ result<value, std::string> parse_value(location<Container>& loc)
|
||||
{{std::addressof(loc), ""}}));
|
||||
}
|
||||
|
||||
switch(guess_value_type(loc))
|
||||
const auto type = guess_value_type(loc);
|
||||
if(!type)
|
||||
{
|
||||
return err(type.unwrap_err());
|
||||
}
|
||||
switch(type.unwrap())
|
||||
{
|
||||
case value_t::Boolean : {return parse_boolean(loc); }
|
||||
case value_t::Integer : {return parse_integer(loc); }
|
||||
|
Loading…
Reference in New Issue
Block a user