add parser of fundamental types

This commit is contained in:
ToruNiina 2017-05-06 18:44:19 +09:00
parent bbb9388d3a
commit 7dde4f3039
3 changed files with 673 additions and 0 deletions

View File

@ -7,6 +7,7 @@ set(TEST_NAMES
test_value_operator
test_datetime
test_acceptor
test_parser
)
add_definitions("-Wall -Wpedantic")

198
tests/test_parser.cpp Normal file
View File

@ -0,0 +1,198 @@
#define BOOST_TEST_MODULE "test_barekey"
#ifdef UNITTEST_FRAMEWORK_LIBRARY_EXIST
#include <boost/test/unit_test.hpp>
#else
#define BOOST_TEST_NO_LIB
#include <boost/test/included/unit_test.hpp>
#endif
#include <toml/acceptor.hpp>
#include <toml/parser.hpp>
#include <iostream>
BOOST_AUTO_TEST_CASE(test_parse_barekey)
{
{
const std::string source("hoge");
const std::string result = toml::parse_barekey<char>::invoke(
source.cbegin(), toml::is_barekey<char>::invoke(source.cbegin()));
BOOST_CHECK_EQUAL(source, result);
}
{
const std::string source("bare-key");
const std::string result = toml::parse_barekey<char>::invoke(
source.cbegin(), toml::is_barekey<char>::invoke(source.cbegin()));
BOOST_CHECK_EQUAL(source, result);
}
{
const std::string source("bare_key");
const std::string result = toml::parse_barekey<char>::invoke(
source.cbegin(), toml::is_barekey<char>::invoke(source.cbegin()));
BOOST_CHECK_EQUAL(source, result);
}
{
const std::string source("42");
const std::string result = toml::parse_barekey<char>::invoke(
source.cbegin(), toml::is_barekey<char>::invoke(source.cbegin()));
BOOST_CHECK_EQUAL(source, result);
}
}
BOOST_AUTO_TEST_CASE(test_parse_basic_inline_string)
{
typedef toml::parse_basic_inline_string<char> parser;
typedef toml::is_basic_inline_string<char> acceptor;
{
const std::string source("\"simple\"");
const std::string expected("simple");
const std::string result = parser::invoke(
source.cbegin(), acceptor::invoke(source.cbegin()));
BOOST_CHECK_EQUAL(result, expected);
}
{
const std::string source("\"I'm a string. \\\"You can quote me\\\". Name\\tJos\\u00E9\\nLocation\\tSF.\"");
const std::string expected("I'm a string. \"You can quote me\". Name\tJosé\nLocation\tSF.");
const std::string result = parser::invoke(
source.cbegin(), acceptor::invoke(source.cbegin()));
BOOST_CHECK_EQUAL(result, expected);
}
}
BOOST_AUTO_TEST_CASE(test_parse_basic_multiline_string)
{
typedef toml::parse_basic_multiline_string<char> parser;
typedef toml::is_basic_multiline_string<char> acceptor;
{
const std::string source("\"\"\"\nRoses are red\nViolets are blue\"\"\"");
//XXX ifdef windows platform
const std::string expected("Roses are red\nViolets are blue");
const std::string result = parser::invoke(
source.cbegin(), acceptor::invoke(source.cbegin()));
BOOST_CHECK_EQUAL(result, expected);
}
{
const std::string source("\"\"\"\nThe quick brown \\\n\n fox jumps over \\\n the lazy dog.\"\"\"");
const std::string expected("The quick brown fox jumps over the lazy dog.");
const std::string result = parser::invoke(
source.cbegin(), acceptor::invoke(source.cbegin()));
BOOST_CHECK_EQUAL(result, expected);
}
{
const std::string source("\"\"\"\nThe quick brown \\\n fox jumps over \\\n the lazy dog.\\\n \"\"\"");
const std::string expected("The quick brown fox jumps over the lazy dog.");
const std::string result = parser::invoke(
source.cbegin(), acceptor::invoke(source.cbegin()));
BOOST_CHECK_EQUAL(result, expected);
}
}
BOOST_AUTO_TEST_CASE(test_parse_local_time)
{
typedef toml::parse_local_time<char> parser;
typedef toml::is_local_time<char> acceptor;
{
const std::string source("12:34:56");
const toml::Datetime expected(12, 34, 56, 0, 0);
const toml::Datetime result = parser::invoke(
source.cbegin(), acceptor::invoke(source.cbegin()));
BOOST_CHECK_EQUAL(result, expected);
}
{
const std::string source("12:34:56.7");
const toml::Datetime expected(12, 34, 56, 700, 0);
const toml::Datetime result = parser::invoke(
source.cbegin(), acceptor::invoke(source.cbegin()));
BOOST_CHECK_EQUAL(result, expected);
}
{
const std::string source("12:34:56.7891");
const toml::Datetime expected(12, 34, 56, 789, 100);
const toml::Datetime result = parser::invoke(
source.cbegin(), acceptor::invoke(source.cbegin()));
BOOST_CHECK_EQUAL(result, expected);
}
}
BOOST_AUTO_TEST_CASE(test_parse_local_date)
{
typedef toml::parse_local_date<char> parser;
typedef toml::is_local_date<char> acceptor;
{
const std::string source("1979-09-27");
const toml::Datetime expected(1979, 9, 27);
const toml::Datetime result = parser::invoke(
source.cbegin(), acceptor::invoke(source.cbegin()));
BOOST_CHECK_EQUAL(result, expected);
}
}
BOOST_AUTO_TEST_CASE(test_parse_local_date_time)
{
typedef toml::parse_local_date_time<char> parser;
typedef toml::is_local_date_time<char> acceptor;
{
const std::string source("1979-09-27T12:34:56");
const toml::Datetime expected(1979, 9, 27, 12, 34, 56, 0, 0);
const toml::Datetime result = parser::invoke(
source.cbegin(), acceptor::invoke(source.cbegin()));
BOOST_CHECK_EQUAL(result, expected);
}
{
const std::string source("1979-09-27T12:34:56.789000");
const toml::Datetime expected(1979, 9, 27, 12, 34, 56, 789, 0);
const toml::Datetime result = parser::invoke(
source.cbegin(), acceptor::invoke(source.cbegin()));
BOOST_CHECK_EQUAL(result, expected);
}
}
BOOST_AUTO_TEST_CASE(test_parse_offset_date_time)
{
typedef toml::parse_offset_date_time<char> parser;
typedef toml::is_offset_date_time<char> acceptor;
{
const std::string source("1979-09-27T12:34:56Z");
const toml::Datetime expected(1979, 9, 27, 12, 34, 56, 0, 0, 0, 0);
const toml::Datetime result = parser::invoke(
source.cbegin(), acceptor::invoke(source.cbegin()));
BOOST_CHECK_EQUAL(result, expected);
}
{
const std::string source("1979-09-27T12:34:56.789000Z");
const toml::Datetime expected(1979, 9, 27, 12, 34, 56, 789, 0, 0, 0);
const toml::Datetime result = parser::invoke(
source.cbegin(), acceptor::invoke(source.cbegin()));
BOOST_CHECK_EQUAL(result, expected);
}
{
const std::string source("1979-09-27T12:34:56+07:30");
const toml::Datetime expected(1979, 9, 27, 12, 34, 56, 0, 0, 7, 30);
const toml::Datetime result = parser::invoke(
source.cbegin(), acceptor::invoke(source.cbegin()));
BOOST_CHECK_EQUAL(result, expected);
}
{
const std::string source("1979-09-27T12:34:56.789000+07:30");
const toml::Datetime expected(1979, 9, 27, 12, 34, 56, 789, 0, 7, 30);
const toml::Datetime result = parser::invoke(
source.cbegin(), acceptor::invoke(source.cbegin()));
BOOST_CHECK_EQUAL(result, expected);
}
{
const std::string source("1979-09-27T12:34:56-07:30");
const toml::Datetime expected(1979, 9, 27, 12, 34, 56, 0, 0, -7, -30);
const toml::Datetime result = parser::invoke(
source.cbegin(), acceptor::invoke(source.cbegin()));
BOOST_CHECK_EQUAL(result, expected);
}
{
const std::string source("1979-09-27T12:34:56.789000-07:30");
const toml::Datetime expected(1979, 9, 27, 12, 34, 56, 789, 0, -7, -30);
const toml::Datetime result = parser::invoke(
source.cbegin(), acceptor::invoke(source.cbegin()));
BOOST_CHECK_EQUAL(result, expected);
}
}

474
toml/parser.hpp Normal file
View File

@ -0,0 +1,474 @@
#ifndef TOML11_PARSER
#define TOML11_PARSER
#include "value.hpp"
#include "acceptor.hpp"
#include <algorithm>
namespace toml
{
template<typename charT>
struct parse_barekey
{
typedef charT value_type;
typedef toml::key result_type;
static_assert(std::is_same<value_type, result_type::value_type
>::value, "char type is different from default key type");
template<typename Iterator, class = typename std::enable_if<
std::is_same<typename std::iterator_traits<Iterator>::value_type,
value_type>::value>::type>
static result_type invoke(Iterator iter, Iterator end)
{
return result_type(iter, end);
}
};
template<typename charT>
struct parse_escape_sequence
{
typedef charT value_type;
typedef toml::String string_type;
typedef string_type result_type;
static_assert(std::is_same<value_type, result_type::value_type
>::value, "char type is different from default String type");
//XXX this changes iterator position!
template<typename Iterator, class = typename std::enable_if<
std::is_same<typename std::iterator_traits<Iterator>::value_type,
value_type>::value>::type>
static result_type invoke(Iterator& iter)
{
assert(*iter == '\\');
++iter;
switch(*iter)
{
case '\\': ++iter; return "\\";
case '"' : ++iter; return "\"";
case 'b' : ++iter; return "\b";
case 't' : ++iter; return "\t";
case 'n' : ++iter; return "\n";
case 'f' : ++iter; return "\f";
case 'r' : ++iter; return "\r";
case 'u' :
{
string_type tmp(iter+1, iter+5);
std::advance(iter, 5);
return utf8_to_char(make_codepoint(tmp));
}
case 'U':
{
string_type tmp(iter+1, iter+9);
std::advance(iter, 9);
return utf8_to_char(make_codepoint(tmp));
}
default: throw syntax_error("unkwnon escape sequence");
}
}
static unsigned int make_codepoint(string_type str)
{
unsigned int codepoint;
std::basic_istringstream<charT> iss(str);
iss >> std::hex >> codepoint;
return codepoint;
}
static result_type utf8_to_char(const unsigned int codepoint)
{
result_type charactor;
if(codepoint < 0x80)
{
charactor += static_cast<unsigned char>(codepoint);
}
else if(codepoint < 0x800)
{
charactor += static_cast<unsigned char>(0xC0| codepoint >> 6);
charactor += static_cast<unsigned char>(0x80|(codepoint & 0x3F));
}
else if(codepoint < 0x10000)
{
charactor += static_cast<unsigned char>(0xE0| codepoint >>12);
charactor += static_cast<unsigned char>(0x80|(codepoint >>6&0x3F));
charactor += static_cast<unsigned char>(0x80|(codepoint & 0x3F));
}
else
{
charactor += static_cast<unsigned char>(0xF0| codepoint >>18);
charactor += static_cast<unsigned char>(0x80|(codepoint >>12&0x3F));
charactor += static_cast<unsigned char>(0x80|(codepoint >>6 &0x3F));
charactor += static_cast<unsigned char>(0x80|(codepoint & 0x3F));
}
return charactor;
}
};
template<typename charT>
struct parse_basic_inline_string
{
typedef charT value_type;
typedef toml::String result_type;
static_assert(std::is_same<value_type, result_type::value_type
>::value, "char type is different from default String type");
template<typename Iterator, class = typename std::enable_if<
std::is_same<typename std::iterator_traits<Iterator>::value_type,
value_type>::value>::type>
static result_type invoke(Iterator iter, Iterator end)
{
if(std::distance(iter, end) < 2)
throw internal_error("no basic inline string here");
result_type result; result.reserve(std::distance(iter, end)-2);
++iter; --end; // ignore '"'
while(iter != end)
{
if(*iter == '\\')
{
result += parse_escape_sequence<charT>::invoke(iter);
}
else
{
result.push_back(*iter);
++iter;
}
}
return result;
}
};
template<typename charT>
struct parse_basic_multiline_string
{
typedef charT value_type;
typedef toml::String result_type;
static_assert(std::is_same<value_type, result_type::value_type
>::value, "char type is different from default String type");
typedef is_chain_of<is_charactor<charT, '\\'>, is_newline<charT>>
is_line_ending_backslash;
typedef is_repeat_of<is_one_of<is_whitespace<charT>, is_newline<charT>>,
repeat_infinite()> ws_nl_after_backslash_remover;
template<typename Iterator, class = typename std::enable_if<
std::is_same<typename std::iterator_traits<Iterator>::value_type,
value_type>::value>::type>
static result_type invoke(Iterator iter, Iterator end)
{
if(std::distance(iter, end) < 6)
throw internal_error("no basic multiline string here");
result_type result; result.reserve(std::distance(iter, end)-6);
std::advance(iter, 3);
std::advance(end, -3);
iter = is_newline<charT>::invoke(iter); // the first newline will be trimmed
while(iter != end)
{
if(*iter == '\\')
{
if(is_line_ending_backslash::invoke(iter) != iter)
{
iter = ws_nl_after_backslash_remover::invoke(std::next(iter));
}
else
{
result += parse_escape_sequence<charT>::invoke(iter);
}
}
else
{
result.push_back(*iter);
++iter;
}
}
return result;
}
};
template<typename charT>
struct parse_literal_inline_string
{
typedef charT value_type;
typedef toml::String result_type;
static_assert(std::is_same<value_type, result_type::value_type
>::value, "char type is different from default String type");
template<typename Iterator, class = typename std::enable_if<
std::is_same<typename std::iterator_traits<Iterator>::value_type,
value_type>::value>::type>
static result_type invoke(Iterator iter, Iterator end)
{
if(std::distance(iter, end) < 2)
throw internal_error("no literal multiline string here");
result_type result; result.reserve(std::distance(iter, end)-2);
std::advance(iter, 1);
std::advance(end, -1);
while(iter != end)
{
result.push_back(*iter);
++iter;
}
return result;
}
};
template<typename charT>
struct parse_literal_multiline_string
{
typedef charT value_type;
typedef toml::String result_type;
static_assert(std::is_same<value_type, result_type::value_type
>::value, "char type is different from default String type");
typedef is_chain_of<is_charactor<charT, '\\'>, is_newline<charT>>
is_line_ending_backslash;
typedef is_repeat_of<is_one_of<is_whitespace<charT>, is_newline<charT>>,
repeat_infinite()> ws_nl_after_backslash_remover;
template<typename Iterator, class = typename std::enable_if<
std::is_same<typename std::iterator_traits<Iterator>::value_type,
value_type>::value>::type>
static result_type invoke(Iterator iter, Iterator end)
{
if(std::distance(iter, end) < 6)
throw internal_error("no literal multiline string here");
result_type result; result.reserve(std::distance(iter, end)-6);
std::advance(iter, 3);
std::advance(end, -3);
iter = is_newline<charT>::invoke(iter); // the first newline will be trimmed
while(iter != end)
{
result.push_back(*iter);
++iter;
}
return result;
}
};
template<typename charT>
struct parse_integer
{
typedef charT value_type;
typedef std::basic_string<value_type> string_type;
typedef toml::Integer result_type;
template<typename Iterator, class = typename std::enable_if<
std::is_same<typename std::iterator_traits<Iterator>::value_type,
value_type>::value>::type>
static result_type invoke(Iterator iter, Iterator end)
{
string_type result; result.resize(std::distance(iter, end));
std::copy_if(iter, end, result.begin(), [](charT c){return c != '_';});
return std::stoi(result);
}
};
template<typename charT>
struct parse_float
{
typedef charT value_type;
typedef std::basic_string<value_type> string_type;
typedef toml::Float result_type;
template<typename Iterator, class = typename std::enable_if<
std::is_same<typename std::iterator_traits<Iterator>::value_type,
value_type>::value>::type>
static result_type invoke(Iterator iter, Iterator end)
{
string_type result; result.resize(std::distance(iter, end));
std::copy_if(iter, end, result.begin(), [](charT c){return c != '_';});
return std::stod(result);
}
};
template<typename charT>
struct parse_boolean
{
typedef charT value_type;
typedef toml::Boolean result_type;
template<typename Iterator, class = typename std::enable_if<
std::is_same<typename std::iterator_traits<Iterator>::value_type,
value_type>::value>::type>
static result_type invoke(Iterator iter, Iterator end)
{
return (std::distance(iter, end) == 4);
}
};
template<typename charT>
struct parse_local_time
{
typedef charT value_type;
typedef parse_local_time<charT> this_type;
typedef std::basic_string<value_type> string_type;
typedef toml::Datetime result_type;
typedef typename result_type::number_type number_type;
typedef is_repeat_of<is_number<charT>, 2> nums;
typedef is_charactor<charT, ':'> delim;
typedef is_charactor<charT, '.'> fract;
template<typename Iterator, class = typename std::enable_if<
std::is_same<typename std::iterator_traits<Iterator>::value_type,
value_type>::value>::type>
static result_type invoke(Iterator iter, Iterator end)
{
result_type result;
result.hour = std::stoi(string_type(iter, nums::invoke(iter)));
iter = delim::invoke(nums::invoke(iter));
result.minute = std::stoi(string_type(iter, nums::invoke(iter)));
iter = delim::invoke(nums::invoke(iter));
result.second = std::stoi(string_type(iter, nums::invoke(iter)));
iter = fract::invoke(nums::invoke(iter));
if(iter == end)
{
result.millisecond = 0.0;
result.microsecond = 0.0;
}
else if(std::distance(iter, end) <= 3)
{
result.millisecond = parse_number(iter, end);
result.microsecond = 0.0;
}
else
{
result.millisecond = this_type::parse_number(iter, iter + 3);
result.microsecond = this_type::parse_number(iter + 3, end);
}
result.offset_hour = result_type::nooffset;
result.offset_minute = result_type::nooffset;
result.year = result_type::undef;
result.month = result_type::undef;
result.day = result_type::undef;
return result;
}
template<typename Iterator, class = typename std::enable_if<
std::is_same<typename std::iterator_traits<Iterator>::value_type,
value_type>::value>::type>
static number_type parse_number(Iterator iter, Iterator end)
{
if(std::distance(iter, end) > 3) end = iter + 3;
string_type str(iter, end);
while(str.size() < 3){str += '0';}
return std::stoi(str);
}
};
template<typename charT>
struct parse_local_date
{
typedef charT value_type;
typedef std::basic_string<value_type> string_type;
typedef toml::Datetime result_type;
template<std::size_t N>
using nums = is_repeat_of<is_number<charT>, N>;
typedef is_charactor<charT, '-'> delim;
template<typename Iterator, class = typename std::enable_if<
std::is_same<typename std::iterator_traits<Iterator>::value_type,
value_type>::value>::type>
static result_type invoke(Iterator iter, Iterator end)
{
result_type result;
result.year = std::stoi(string_type(iter, nums<4>::invoke(iter)));
iter = delim::invoke(nums<4>::invoke(iter));
result.month = std::stoi(string_type(iter, nums<2>::invoke(iter)));
iter = delim::invoke(nums<2>::invoke(iter));
result.day = std::stoi(string_type(iter, nums<2>::invoke(iter)));
result.offset_hour = result_type::nooffset;
result.offset_minute = result_type::nooffset;
result.hour = result_type::undef;
result.minute = result_type::undef;
result.second = result_type::undef;
result.millisecond = result_type::undef;
result.microsecond = result_type::undef;
return result;
}
};
template<typename charT>
struct parse_local_date_time
{
typedef charT value_type;
typedef std::basic_string<value_type> string_type;
typedef toml::Datetime result_type;
template<std::size_t N>
using nums = is_repeat_of<is_number<charT>, N>;
typedef is_charactor<charT, 'T'> delim;
template<typename Iterator, class = typename std::enable_if<
std::is_same<typename std::iterator_traits<Iterator>::value_type,
value_type>::value>::type>
static result_type invoke(Iterator iter, Iterator end)
{
const Iterator date = is_local_date<charT>::invoke(iter);
result_type result = parse_local_date<charT>::invoke(iter, date);
iter = delim::invoke(date);// 'T'
const result_type time = parse_local_time<charT>::invoke(iter, end);
result.hour = time.hour;
result.minute = time.minute;
result.second = time.second;
result.millisecond = time.millisecond;
result.microsecond = time.microsecond;
result.offset_hour = result_type::nooffset;
result.offset_minute = result_type::nooffset;
return result;
}
};
template<typename charT>
struct parse_offset_date_time
{
typedef charT value_type;
typedef std::basic_string<value_type> string_type;
typedef toml::Datetime result_type;
template<std::size_t N>
using nums = is_repeat_of<is_number<charT>, N>;
typedef is_charactor<charT, ':'> delim;
template<typename Iterator, class = typename std::enable_if<
std::is_same<typename std::iterator_traits<Iterator>::value_type,
value_type>::value>::type>
static result_type invoke(Iterator iter, Iterator end)
{
const Iterator datetime = is_local_date_time<charT>::invoke(iter);
result_type result = parse_local_date_time<charT>::invoke(iter, datetime);
iter = datetime;
if(*iter == 'Z')
{
result.offset_hour = 0;
result.offset_minute = 0;
}
else
{
if(*iter != '+' && *iter != '-')
throw syntax_error("invalid offset-datetime");
const int sign = (*iter == '-') ? -1 : 1;
++iter;
result.offset_hour = sign *
std::stoi(string_type(iter, nums<2>::invoke(iter)));
iter = delim::invoke(nums<2>::invoke(iter));
result.offset_minute = sign *
std::stoi(string_type(iter, nums<2>::invoke(iter)));
}
return result;
}
};
// template<typename Iterator>
// toml::key parse_key(Iterator iter, Iterator end)
// {
// const auto bare = accept_barekey_letter<Iterator>();
//
// }
}// toml
#endif// TOML11_PARSER