af637f7e89
Now matches toml/pull/891.
198 lines
5.5 KiB
C++
198 lines
5.5 KiB
C++
//# This file is a part of toml++ and is subject to the the terms of the MIT license.
|
|
//# Copyright (c) Mark Gillard <mark.gillard@outlook.com.au>
|
|
//# See https://github.com/marzer/tomlplusplus/blob/master/LICENSE for the full license text.
|
|
// SPDX-License-Identifier: MIT
|
|
#pragma once
|
|
|
|
#include "unicode_autogenerated.h"
|
|
#include "header_start.h"
|
|
/// \cond
|
|
|
|
TOML_IMPL_NAMESPACE_START
|
|
{
|
|
TOML_CONST_GETTER
|
|
constexpr bool is_string_delimiter(char32_t c) noexcept
|
|
{
|
|
return c == U'"' || c == U'\'';
|
|
}
|
|
|
|
TOML_CONST_GETTER
|
|
constexpr bool is_ascii_letter(char32_t c) noexcept
|
|
{
|
|
return (c >= U'a' && c <= U'z') || (c >= U'A' && c <= U'Z');
|
|
}
|
|
|
|
TOML_CONST_GETTER
|
|
constexpr bool is_binary_digit(char32_t c) noexcept
|
|
{
|
|
return c == U'0' || c == U'1';
|
|
}
|
|
|
|
TOML_CONST_GETTER
|
|
constexpr bool is_octal_digit(char32_t c) noexcept
|
|
{
|
|
return (c >= U'0' && c <= U'7');
|
|
}
|
|
|
|
TOML_CONST_GETTER
|
|
constexpr bool is_decimal_digit(char32_t c) noexcept
|
|
{
|
|
return (c >= U'0' && c <= U'9');
|
|
}
|
|
|
|
TOML_CONST_GETTER
|
|
constexpr bool is_hexadecimal_digit(char32_t c) noexcept
|
|
{
|
|
return U'0' <= c && c <= U'f' && (1ull << (static_cast<uint_least64_t>(c) - 0x30u)) & 0x7E0000007E03FFull;
|
|
}
|
|
|
|
template <typename T>
|
|
TOML_CONST_GETTER
|
|
constexpr uint_least32_t hex_to_dec(const T c) noexcept
|
|
{
|
|
if constexpr (std::is_same_v<remove_cvref<T>, uint_least32_t>)
|
|
return c >= 0x41u // >= 'A'
|
|
? 10u + (c | 0x20u) - 0x61u // - 'a'
|
|
: c - 0x30u // - '0'
|
|
;
|
|
else
|
|
return hex_to_dec(static_cast<uint_least32_t>(c));
|
|
}
|
|
|
|
TOML_CONST_GETTER
|
|
constexpr bool is_horizontal_whitespace(char32_t c) noexcept
|
|
{
|
|
return is_ascii_horizontal_whitespace(c) || is_non_ascii_horizontal_whitespace(c);
|
|
}
|
|
|
|
TOML_CONST_GETTER
|
|
constexpr bool is_vertical_whitespace(char32_t c) noexcept
|
|
{
|
|
return is_ascii_vertical_whitespace(c) || is_non_ascii_vertical_whitespace(c);
|
|
}
|
|
|
|
TOML_CONST_GETTER
|
|
constexpr bool is_whitespace(char32_t c) noexcept
|
|
{
|
|
return is_horizontal_whitespace(c) || is_vertical_whitespace(c);
|
|
}
|
|
|
|
TOML_CONST_GETTER
|
|
constexpr bool is_bare_key_character(char32_t c) noexcept
|
|
{
|
|
return is_ascii_bare_key_character(c)
|
|
#if TOML_LANG_UNRELEASED // toml/pull/891 (unicode bare keys)
|
|
|| is_non_ascii_bare_key_character(c)
|
|
#endif
|
|
;
|
|
}
|
|
|
|
TOML_CONST_GETTER
|
|
constexpr bool is_value_terminator(char32_t c) noexcept
|
|
{
|
|
return is_whitespace(c) || c == U']' || c == U'}' || c == U',' || c == U'#';
|
|
}
|
|
|
|
TOML_CONST_GETTER
|
|
constexpr bool is_control_character(char c) noexcept
|
|
{
|
|
return c <= '\u001F' || c == '\u007F';
|
|
}
|
|
|
|
TOML_CONST_GETTER
|
|
constexpr bool is_control_character(char32_t c) noexcept
|
|
{
|
|
return c <= U'\u001F' || c == U'\u007F';
|
|
}
|
|
|
|
TOML_CONST_GETTER
|
|
constexpr bool is_nontab_control_character(char32_t c) noexcept
|
|
{
|
|
return c <= U'\u0008' || (c >= U'\u000A' && c <= U'\u001F') || c == U'\u007F';
|
|
}
|
|
|
|
TOML_CONST_GETTER
|
|
constexpr bool is_unicode_surrogate(char32_t c) noexcept
|
|
{
|
|
return c >= 0xD800u && c <= 0xDFFF;
|
|
}
|
|
|
|
struct utf8_decoder
|
|
{
|
|
// utf8_decoder based on this: https://bjoern.hoehrmann.de/utf-8/decoder/dfa/
|
|
// Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
|
|
|
|
uint_least32_t state{};
|
|
char32_t codepoint{};
|
|
|
|
static constexpr uint8_t state_table[]{
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 7, 7,
|
|
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
|
|
7, 7, 7, 8, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
|
2, 2, 2, 2, 2, 2, 2, 2, 10, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 11, 6, 6,
|
|
6, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
|
|
|
0, 12, 24, 36, 60, 96, 84, 12, 12, 12, 48, 72, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 0, 12,
|
|
12, 12, 12, 12, 0, 12, 0, 12, 12, 12, 24, 12, 12, 12, 12, 12, 24, 12, 24, 12, 12, 12, 12, 12, 12, 12, 12,
|
|
12, 24, 12, 12, 12, 12, 12, 24, 12, 12, 12, 12, 12, 12, 12, 24, 12, 12, 12, 12, 12, 12, 12, 12, 12, 36, 12,
|
|
36, 12, 12, 12, 36, 12, 12, 12, 12, 12, 36, 12, 36, 12, 12, 12, 36, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12
|
|
};
|
|
|
|
TOML_PURE_INLINE_GETTER
|
|
constexpr bool error() const noexcept
|
|
{
|
|
return state == uint_least32_t{ 12u };
|
|
}
|
|
|
|
TOML_PURE_INLINE_GETTER
|
|
constexpr bool has_code_point() const noexcept
|
|
{
|
|
return state == uint_least32_t{};
|
|
}
|
|
|
|
TOML_PURE_INLINE_GETTER
|
|
constexpr bool needs_more_input() const noexcept
|
|
{
|
|
return !has_code_point() && !error();
|
|
}
|
|
|
|
constexpr void operator()(uint8_t byte) noexcept
|
|
{
|
|
TOML_ASSERT_ASSUME(!error());
|
|
|
|
const auto type = state_table[byte];
|
|
|
|
codepoint = static_cast<char32_t>(has_code_point() ? (uint_least32_t{ 255u } >> type) & byte
|
|
: (byte & uint_least32_t{ 63u })
|
|
| (static_cast<uint_least32_t>(codepoint) << 6));
|
|
|
|
state = state_table[state + uint_least32_t{ 256u } + type];
|
|
}
|
|
|
|
TOML_ALWAYS_INLINE
|
|
constexpr void operator()(char c) noexcept
|
|
{
|
|
operator()(static_cast<uint8_t>(c));
|
|
}
|
|
|
|
TOML_ALWAYS_INLINE
|
|
constexpr void reset() noexcept
|
|
{
|
|
state = {};
|
|
}
|
|
};
|
|
|
|
TOML_PURE_GETTER
|
|
TOML_ATTR(nonnull)
|
|
bool is_ascii(const char* str, size_t len) noexcept;
|
|
}
|
|
TOML_IMPL_NAMESPACE_END;
|
|
|
|
/// \endcond
|
|
#include "header_end.h"
|