// Copyright Toru Niina 2017. // Distributed under the MIT License. #ifndef TOML11_LEXER_HPP #define TOML11_LEXER_HPP #include "combinator.hpp" #include #include #include #include namespace toml { namespace detail { // these scans contents from current location in a container of char // and extract a region that matches their own pattern. // to see the implementation of each component, see combinator.hpp. using lex_wschar = either, character<'\t'>>; using lex_ws = repeat>; using lex_newline = either, sequence, character<'\n'>>>; using lex_lower = in_range<'a', 'z'>; using lex_upper = in_range<'A', 'Z'>; using lex_alpha = either; using lex_digit = in_range<'0', '9'>; using lex_nonzero = in_range<'1', '9'>; using lex_oct_dig = in_range<'0', '7'>; using lex_bin_dig = in_range<'0', '1'>; using lex_hex_dig = either, in_range<'a', 'f'>>; using lex_hex_prefix = sequence, character<'x'>>; using lex_oct_prefix = sequence, character<'o'>>; using lex_bin_prefix = sequence, character<'b'>>; using lex_underscore = character<'_'>; using lex_plus = character<'+'>; using lex_minus = character<'-'>; using lex_sign = either; // digit | nonzero 1*(digit | _ digit) using lex_unsigned_dec_int = either>, at_least<1>>>, lex_digit>; // (+|-)? unsigned_dec_int using lex_dec_int = sequence, lex_unsigned_dec_int>; // hex_prefix hex_dig *(hex_dig | _ hex_dig) using lex_hex_int = sequence>, unlimited>>>; // oct_prefix oct_dig *(oct_dig | _ oct_dig) using lex_oct_int = sequence>, unlimited>>>; // bin_prefix bin_dig *(bin_dig | _ bin_dig) using lex_bin_int = sequence>, unlimited>>>; // (dec_int | hex_int | oct_int | bin_int) using lex_integer = either; // =========================================================================== using lex_inf = sequence, character<'n'>, character<'f'>>; using lex_nan = sequence, character<'a'>, character<'n'>>; using lex_special_float = sequence, either>; using lex_exponent_part = sequence, character<'E'>>, lex_dec_int>; using lex_zero_prefixable_int = sequence>, unlimited>>; using lex_fractional_part = sequence, lex_zero_prefixable_int>; using lex_float = either>>>>; // =========================================================================== using lex_true = sequence, character<'r'>, character<'u'>, character<'e'>>; using lex_false = sequence, character<'a'>, character<'l'>, character<'s'>, character<'e'>>; using lex_boolean = either; // =========================================================================== using lex_date_fullyear = repeat>; using lex_date_month = repeat>; using lex_date_mday = repeat>; using lex_time_delim = either, character<'t'>, character<' '>>; using lex_time_hour = repeat>; using lex_time_minute = repeat>; using lex_time_second = repeat>; using lex_time_secfrac = sequence, repeat>>; using lex_time_numoffset = sequence, character<'-'>>, sequence, lex_time_minute>>; using lex_time_offset = either, character<'z'>, lex_time_numoffset>; using lex_partial_time = sequence, lex_time_minute, character<':'>, lex_time_second, maybe>; using lex_full_date = sequence, lex_date_month, character<'-'>, lex_date_mday>; using lex_full_time = sequence; using lex_offset_date_time = sequence; using lex_local_date_time = sequence; using lex_local_date = lex_full_date; using lex_local_time = lex_partial_time; // =========================================================================== using lex_quotation_mark = character<'"'>; using lex_basic_unescaped = exclude, character<0x22>, character<0x5C>, character<0x7F>>>; using lex_escape = character<'\\'>; using lex_escape_unicode_short = sequence, repeat>>; using lex_escape_unicode_long = sequence, repeat>>; using lex_escape_seq_char = either, character<'\\'>, character<'b'>, character<'f'>, character<'n'>, character<'r'>, character<'t'>, lex_escape_unicode_short, lex_escape_unicode_long >; using lex_escaped = sequence; using lex_basic_char = either; using lex_basic_string = sequence, lex_quotation_mark>; using lex_ml_basic_string_delim = repeat>; using lex_ml_basic_unescaped = exclude, character<0x5C>, character<0x7F>, lex_ml_basic_string_delim>>; using lex_ml_basic_escaped_newline = sequence< lex_escape, maybe, lex_newline, repeat, unlimited>>; using lex_ml_basic_char = either; using lex_ml_basic_body = repeat, unlimited>; using lex_ml_basic_string = sequence; using lex_literal_char = exclude, in_range<0x10, 0x19>, character<0x27>>>; using lex_apostrophe = character<'\''>; using lex_literal_string = sequence, lex_apostrophe>; using lex_ml_literal_string_delim = repeat>; using lex_ml_literal_char = exclude, in_range<0x10, 0x1F>, character<0x7F>, lex_ml_literal_string_delim>>; using lex_ml_literal_body = repeat, unlimited>; using lex_ml_literal_string = sequence; using lex_string = either; // =========================================================================== using lex_comment_start_symbol = character<'#'>; using lex_non_eol = either, exclude>>; using lex_comment = sequence>; using lex_dot_sep = sequence, character<'.'>, maybe>; using lex_unquoted_key = repeat, character<'_'>>, at_least<1>>; using lex_quoted_key = either; using lex_simple_key = either; using lex_dotted_key = sequence, at_least<1> > >; using lex_key = either; using lex_keyval_sep = sequence, character<'='>, maybe>; using lex_std_table_open = character<'['>; using lex_std_table_close = character<']'>; using lex_std_table = sequence, lex_key, maybe, lex_std_table_close>; using lex_array_table_open = sequence; using lex_array_table_close = sequence; using lex_array_table = sequence, lex_key, maybe, lex_array_table_close>; } // detail } // toml #endif // TOML_LEXER_HPP