toml11/toml/serializer.hpp

813 lines
27 KiB
C++
Raw Normal View History

2019-02-13 04:37:33 +00:00
// Copyright Toru Niina 2019.
// Distributed under the MIT License.
#ifndef TOML11_SERIALIZER_HPP
#define TOML11_SERIALIZER_HPP
#include "value.hpp"
#include "lexer.hpp"
#include <limits>
#include <cstdio>
2019-02-13 04:37:33 +00:00
namespace toml
{
// This function serialize a key. It checks a string is a bare key and
// escapes special characters if the string is not compatible to a bare key.
// ```cpp
// std::string k("non.bare.key"); // the key itself includes `.`s.
// std::string formatted = toml::format_key(k);
// assert(formatted == "\"non.bare.key\"");
// ```
//
// This function is exposed to make it easy to write a user-defined serializer.
// Since toml restricts characters available in a bare key, generally a string
// should be escaped. But checking whether a string needs to be surrounded by
// a `"` and escaping some special character is boring.
inline std::string format_key(const toml::key& key)
{
detail::location<toml::key> loc(key, key);
detail::lex_unquoted_key::invoke(loc);
if(loc.iter() == loc.end())
{
return key; // all the tokens are consumed. the key is unquoted-key.
}
std::string token("\"");
for(const char c : key)
{
switch(c)
{
case '\\': {token += "\\\\"; break;}
case '\"': {token += "\\\""; break;}
case '\b': {token += "\\b"; break;}
case '\t': {token += "\\t"; break;}
case '\f': {token += "\\f"; break;}
case '\n': {token += "\\n"; break;}
case '\r': {token += "\\r"; break;}
default : {token += c; break;}
}
}
token += "\"";
return token;
}
template<typename Comment,
template<typename ...> class Table,
template<typename ...> class Array>
2019-02-13 04:37:33 +00:00
struct serializer
{
using value_type = basic_value<Comment, Table, Array>;
using key_type = typename value_type::key_type ;
using comment_type = typename value_type::comment_type ;
using boolean_type = typename value_type::boolean_type ;
using integer_type = typename value_type::integer_type ;
using floating_type = typename value_type::floating_type ;
using string_type = typename value_type::string_type ;
using local_time_type = typename value_type::local_time_type ;
using local_date_type = typename value_type::local_date_type ;
using local_datetime_type = typename value_type::local_datetime_type ;
using offset_datetime_type = typename value_type::offset_datetime_type;
using array_type = typename value_type::array_type ;
using table_type = typename value_type::table_type ;
serializer(const std::size_t w = 80u,
2019-02-13 04:37:33 +00:00
const int float_prec = std::numeric_limits<toml::floating>::max_digits10,
const bool can_be_inlined = false,
const bool no_comment = false,
2019-02-13 04:37:33 +00:00
std::vector<toml::key> ks = {})
: can_be_inlined_(can_be_inlined), no_comment_(no_comment),
float_prec_(float_prec), width_(w), keys_(std::move(ks))
2019-02-13 04:37:33 +00:00
{}
~serializer() = default;
std::string operator()(const boolean_type& b) const
2019-02-13 04:37:33 +00:00
{
return b ? "true" : "false";
}
std::string operator()(const integer_type i) const
2019-02-13 04:37:33 +00:00
{
return std::to_string(i);
}
std::string operator()(const floating_type f) const
2019-02-13 04:37:33 +00:00
{
const auto fmt = "%.*g";
2019-03-12 16:17:27 +00:00
const auto bsz = std::snprintf(nullptr, 0, fmt, this->float_prec_, f);
// +1 for null character(\0)
std::vector<char> buf(static_cast<std::size_t>(bsz + 1), '\0');
2019-03-12 16:17:27 +00:00
std::snprintf(buf.data(), buf.size(), fmt, this->float_prec_, f);
2019-02-13 04:37:33 +00:00
std::string token(buf.begin(), std::prev(buf.end()));
2019-02-13 04:37:33 +00:00
if(token.back() == '.') // 1. => 1.0
{
token += '0';
}
const auto e = std::find_if(
token.cbegin(), token.cend(), [](const char c) noexcept -> bool {
return c == 'e' || c == 'E';
2019-02-13 04:37:33 +00:00
});
const auto has_exponent = (token.cend() != e);
const auto has_fraction = (token.cend() != std::find(
token.cbegin(), token.cend(), '.'));
if(!has_exponent && !has_fraction)
{
// the resulting value does not have any float specific part!
token += ".0";
return token;
}
if(!has_exponent)
2019-02-13 04:37:33 +00:00
{
return token; // there is no exponent part. just return it.
}
#ifdef TOML11_USE_UNRELEASED_TOML_FEATURES
// Although currently it is not released yet as a tagged version,
// TOML will allow zero-prefix in an exponent part, such as `1.234e+01`.
// ```toml
// num1 = 1.234e+1 # OK in TOML v0.5.0
// num2 = 1.234e+01 # error in TOML v0.5.0 but will be allowed soon
// ```
// To avoid `e+01`, the following `else` section removes the zero
// prefixes in the exponent part.
// If the feature is activated, it can be skipped.
return token;
#else
// zero-prefix in an exponent is NOT allowed in TOML v0.5.0.
2019-02-13 04:37:33 +00:00
// remove it if it exists.
bool sign_exists = false;
std::size_t zero_prefix = 0;
for(auto iter = std::next(e), iend = token.cend(); iter != iend; ++iter)
{
if(*iter == '+' || *iter == '-'){sign_exists = true; continue;}
if(*iter == '0'){zero_prefix += 1;}
else {break;}
}
if(zero_prefix != 0)
{
const auto offset = std::distance(token.cbegin(), e) +
(sign_exists ? 2 : 1);
token.erase(static_cast<typename std::string::size_type>(offset),
zero_prefix);
2019-02-13 04:37:33 +00:00
}
return token;
#endif
2019-02-13 04:37:33 +00:00
}
std::string operator()(const string_type& s) const
2019-02-13 04:37:33 +00:00
{
if(s.kind == string_t::basic)
{
if(std::find(s.str.cbegin(), s.str.cend(), '\n') != s.str.cend() ||
std::find(s.str.cbegin(), s.str.cend(), '\"') != s.str.cend())
2019-02-13 04:37:33 +00:00
{
// if linefeed or double-quote is contained,
// make it multiline basic string.
const auto escaped = this->escape_ml_basic_string(s.str);
std::string open("\"\"\"");
std::string close("\"\"\"");
if(escaped.find('\n') != std::string::npos ||
this->width_ < escaped.size() + 6)
{
// if the string body contains newline or is enough long,
// add newlines after and before delimiters.
open += "\n";
close = std::string("\\\n") + close;
}
return open + escaped + close;
2019-02-13 04:37:33 +00:00
}
// no linefeed. try to make it oneline-string.
std::string oneline = this->escape_basic_string(s.str);
if(oneline.size() + 2 < width_ || width_ < 2)
{
const std::string quote("\"");
return quote + oneline + quote;
}
// the line is too long compared to the specified width.
// split it into multiple lines.
std::string token("\"\"\"\n");
while(!oneline.empty())
{
if(oneline.size() < width_)
{
token += oneline;
oneline.clear();
}
else if(oneline.at(width_-2) == '\\')
{
token += oneline.substr(0, width_-2);
token += "\\\n";
oneline.erase(0, width_-2);
}
else
{
token += oneline.substr(0, width_-1);
token += "\\\n";
oneline.erase(0, width_-1);
}
}
return token + std::string("\\\n\"\"\"");
}
else // the string `s` is literal-string.
{
if(std::find(s.str.cbegin(), s.str.cend(), '\n') != s.str.cend() ||
std::find(s.str.cbegin(), s.str.cend(), '\'') != s.str.cend() )
{
std::string open("'''");
if(this->width_ + 6 < s.str.size())
{
open += '\n'; // the first newline is ignored by TOML spec
}
2019-02-13 04:37:33 +00:00
const std::string close("'''");
return open + s.str + close;
}
else
{
const std::string quote("'");
return quote + s.str + quote;
}
}
}
std::string operator()(const local_date_type& d) const
2019-02-13 04:37:33 +00:00
{
std::ostringstream oss;
oss << d;
return oss.str();
}
std::string operator()(const local_time_type& t) const
2019-02-13 04:37:33 +00:00
{
std::ostringstream oss;
oss << t;
return oss.str();
}
std::string operator()(const local_datetime_type& dt) const
2019-02-13 04:37:33 +00:00
{
std::ostringstream oss;
oss << dt;
return oss.str();
}
std::string operator()(const offset_datetime_type& odt) const
2019-02-13 04:37:33 +00:00
{
std::ostringstream oss;
oss << odt;
return oss.str();
}
std::string operator()(const array_type& v) const
2019-02-13 04:37:33 +00:00
{
if(!v.empty() && v.front().is_table())// v is an array of tables
2019-02-13 04:37:33 +00:00
{
// if it's not inlined, we need to add `[[table.key]]`.
// but if it can be inlined,
// ```
// table.key = [
// {...},
// # comment
// {...},
// ]
// ```
2019-02-13 04:37:33 +00:00
if(this->can_be_inlined_)
{
std::string token;
if(!keys_.empty())
{
token += this->serialize_key(keys_.back());
token += " = ";
}
bool failed = false;
2019-02-13 04:37:33 +00:00
token += "[\n";
for(const auto& item : v)
{
// if an element of the table has a comment, the table
// cannot be inlined.
if(this->has_comment_inside(item.as_table()))
{
failed = true;
break;
}
if(!no_comment_)
{
for(const auto& c : item.comments())
{
token += '#';
token += c;
token += '\n';
}
}
const auto t = this->make_inline_table(item.as_table());
2019-02-13 04:37:33 +00:00
if(t.size() + 1 > width_ || // +1 for the last comma {...},
std::find(t.cbegin(), t.cend(), '\n') != t.cend())
{
failed = true;
2019-02-13 04:37:33 +00:00
break;
}
token += t;
token += ",\n";
}
if(!failed)
2019-02-13 04:37:33 +00:00
{
token += "]\n";
return token;
}
// if failed, serialize them as [[array.of.tables]].
2019-02-13 04:37:33 +00:00
}
std::string token;
for(const auto& item : v)
{
if(!no_comment_)
{
for(const auto& c : item.comments())
{
token += '#';
token += c;
token += '\n';
}
}
2019-02-13 04:37:33 +00:00
token += "[[";
token += this->serialize_dotted_key(keys_);
token += "]]\n";
token += this->make_multiline_table(item.as_table());
2019-02-13 04:37:33 +00:00
}
return token;
}
if(v.empty())
{
return std::string("[]");
}
2019-02-13 04:37:33 +00:00
// not an array of tables. normal array.
// first, try to make it inline if none of the elements have a comment.
if(!this->has_comment_inside(v))
2019-02-13 04:37:33 +00:00
{
const auto inl = this->make_inline_array(v);
if(inl.size() < this->width_ &&
std::find(inl.cbegin(), inl.cend(), '\n') == inl.cend())
{
return inl;
}
}
// if the length exceeds this->width_, print multiline array.
// key = [
// # ...
// 42,
// ...
// ]
2019-02-13 04:37:33 +00:00
std::string token;
std::string current_line;
2019-02-13 04:37:33 +00:00
token += "[\n";
for(const auto& item : v)
{
if(!item.comments().empty() && !no_comment_)
{
// if comment exists, the element must be the only element in the line.
// e.g. the following is not allowed.
// ```toml
// array = [
// # comment for what?
// 1, 2, 3, 4, 5
// ]
// ```
if(!current_line.empty())
{
if(current_line.back() != '\n')
{
current_line += '\n';
}
token += current_line;
current_line.clear();
}
for(const auto& c : item.comments())
{
token += '#';
token += c;
token += '\n';
}
token += toml::visit(*this, item);
if(token.back() == '\n') {token.pop_back();}
token += ",\n";
continue;
}
std::string next_elem;
next_elem += toml::visit(*this, item);
2019-02-14 07:17:32 +00:00
// comma before newline.
if(next_elem.back() == '\n') {next_elem.pop_back();}
// if current line does not exceeds the width limit, continue.
if(current_line.size() + next_elem.size() + 1 < this->width_)
{
current_line += next_elem;
current_line += ',';
}
else if(current_line.empty())
{
// if current line was empty, force put the next_elem because
// next_elem is not splittable
token += next_elem;
token += ",\n";
// current_line is kept empty
}
else // reset current_line
{
2019-02-14 07:17:32 +00:00
assert(current_line.back() == ',');
token += current_line;
2019-02-14 07:17:32 +00:00
token += '\n';
current_line = next_elem;
2019-02-14 07:17:32 +00:00
current_line += ',';
}
2019-02-13 04:37:33 +00:00
}
2019-06-17 16:27:52 +00:00
if(!current_line.empty())
{
if(current_line.back() != '\n') {current_line += '\n';}
token += current_line;
}
2019-02-13 04:37:33 +00:00
token += "]\n";
return token;
}
// templatize for any table-like container
std::string operator()(const table_type& v) const
2019-02-13 04:37:33 +00:00
{
// if an element has a comment, then it can't be inlined.
// table = {# how can we write a comment for this? key = "value"}
if(this->can_be_inlined_ && !(this->has_comment_inside(v)))
2019-02-13 04:37:33 +00:00
{
std::string token;
if(!this->keys_.empty())
{
token += this->serialize_key(this->keys_.back());
token += " = ";
}
token += this->make_inline_table(v);
if(token.size() < this->width_ &&
token.end() == std::find(token.begin(), token.end(), '\n'))
2019-02-13 04:37:33 +00:00
{
return token;
}
}
std::string token;
if(!keys_.empty())
{
token += '[';
token += this->serialize_dotted_key(keys_);
token += "]\n";
}
token += this->make_multiline_table(v);
return token;
}
private:
std::string serialize_key(const toml::key& key) const
{
return ::toml::format_key(key);
2019-02-13 04:37:33 +00:00
}
std::string serialize_dotted_key(const std::vector<toml::key>& keys) const
{
std::string token;
if(keys.empty()){return token;}
for(const auto& k : keys)
{
token += this->serialize_key(k);
token += '.';
}
token.erase(token.size() - 1, 1); // remove trailing `.`
return token;
}
std::string escape_basic_string(const std::string& s) const
{
//XXX assuming `s` is a valid utf-8 sequence.
std::string retval;
for(const char c : s)
{
switch(c)
{
case '\\': {retval += "\\\\"; break;}
case '\"': {retval += "\\\""; break;}
case '\b': {retval += "\\b"; break;}
case '\t': {retval += "\\t"; break;}
case '\f': {retval += "\\f"; break;}
case '\n': {retval += "\\n"; break;}
case '\r': {retval += "\\r"; break;}
default : {retval += c; break;}
}
}
return retval;
}
std::string escape_ml_basic_string(const std::string& s) const
{
std::string retval;
for(auto i=s.cbegin(), e=s.cend(); i!=e; ++i)
{
switch(*i)
{
case '\\': {retval += "\\\\"; break;}
// One or two consecutive "s are allowed.
// Later we will check there are no three consecutive "s.
// case '\"': {retval += "\\\""; break;}
2019-02-13 04:37:33 +00:00
case '\b': {retval += "\\b"; break;}
case '\t': {retval += "\\t"; break;}
case '\f': {retval += "\\f"; break;}
case '\n': {retval += "\n"; break;}
case '\r':
{
if(std::next(i) != e && *std::next(i) == '\n')
{
retval += "\r\n";
++i;
}
else
{
retval += "\\r";
}
break;
}
default: {retval += *i; break;}
}
}
// Only 1 or 2 consecutive `"`s are allowed in multiline basic string.
// 3 consecutive `"`s are considered as a closing delimiter.
// We need to check if there are 3 or more consecutive `"`s and insert
// backslash to break them down into several short `"`s like the `str6`
// in the following example.
// ```toml
// str4 = """Here are two quotation marks: "". Simple enough."""
// # str5 = """Here are three quotation marks: """.""" # INVALID
// str5 = """Here are three quotation marks: ""\"."""
// str6 = """Here are fifteen quotation marks: ""\"""\"""\"""\"""\"."""
// ```
auto found_3_quotes = retval.find("\"\"\"");
while(found_3_quotes != std::string::npos)
{
retval.replace(found_3_quotes, 3, "\"\"\\\"");
found_3_quotes = retval.find("\"\"\"");
}
2019-02-13 04:37:33 +00:00
return retval;
}
// if an element of a table or an array has a comment, it cannot be inlined.
bool has_comment_inside(const array_type& a) const noexcept
{
// if no_comment is set, comments would not be written.
if(this->no_comment_) {return false;}
for(const auto& v : a)
{
if(!v.comments().empty()) {return true;}
}
return false;
}
bool has_comment_inside(const table_type& t) const noexcept
{
// if no_comment is set, comments would not be written.
if(this->no_comment_) {return false;}
for(const auto& kv : t)
{
if(!kv.second.comments().empty()) {return true;}
}
return false;
}
std::string make_inline_array(const array_type& v) const
2019-02-13 04:37:33 +00:00
{
assert(!has_comment_inside(v));
2019-02-13 04:37:33 +00:00
std::string token;
token += '[';
bool is_first = true;
for(const auto& item : v)
{
if(is_first) {is_first = false;} else {token += ',';}
token += visit(serializer(std::numeric_limits<std::size_t>::max(),
this->float_prec_, true), item);
}
token += ']';
return token;
}
std::string make_inline_table(const table_type& v) const
2019-02-13 04:37:33 +00:00
{
assert(!has_comment_inside(v));
2019-02-13 04:37:33 +00:00
assert(this->can_be_inlined_);
std::string token;
token += '{';
bool is_first = true;
for(const auto& kv : v)
{
// in inline tables, trailing comma is not allowed (toml-lang #569).
if(is_first) {is_first = false;} else {token += ',';}
token += this->serialize_key(kv.first);
token += '=';
token += visit(serializer(std::numeric_limits<std::size_t>::max(),
this->float_prec_, true), kv.second);
}
token += '}';
return token;
}
std::string make_multiline_table(const table_type& v) const
2019-02-13 04:37:33 +00:00
{
std::string token;
// print non-table stuff first. because after printing [foo.bar], the
// remaining non-table values will be assigned into [foo.bar], not [foo]
2020-02-11 12:13:55 +00:00
for(const auto& kv : v)
2019-02-13 04:37:33 +00:00
{
if(kv.second.is_table() || is_array_of_tables(kv.second))
2019-02-13 04:37:33 +00:00
{
continue;
}
if(!kv.second.comments().empty() && !no_comment_)
{
for(const auto& c : kv.second.comments())
{
token += '#';
token += c;
token += '\n';
}
}
2019-02-13 04:51:08 +00:00
const auto key_and_sep = this->serialize_key(kv.first) + " = ";
2019-02-14 06:49:13 +00:00
const auto residual_width = (this->width_ > key_and_sep.size()) ?
this->width_ - key_and_sep.size() : 0;
2019-02-13 04:37:33 +00:00
token += key_and_sep;
token += visit(serializer(residual_width, this->float_prec_, true),
kv.second);
if(token.back() != '\n')
{
token += '\n';
}
2019-02-13 04:37:33 +00:00
}
// normal tables / array of tables
// after multiline table appeared, the other tables cannot be inline
// because the table would be assigned into the table.
// [foo]
// ...
// bar = {...} # <- bar will be a member of [foo].
bool multiline_table_printed = false;
for(const auto& kv : v)
{
if(!kv.second.is_table() && !is_array_of_tables(kv.second))
2019-02-13 04:37:33 +00:00
{
continue; // other stuff are already serialized. skip them.
}
std::vector<toml::key> ks(this->keys_);
ks.push_back(kv.first);
auto tmp = visit(serializer(this->width_, this->float_prec_,
!multiline_table_printed, this->no_comment_, ks),
2019-02-13 04:37:33 +00:00
kv.second);
if((!multiline_table_printed) &&
std::find(tmp.cbegin(), tmp.cend(), '\n') != tmp.cend())
{
multiline_table_printed = true;
}
else
{
// still inline tables only.
tmp += '\n';
}
if(!kv.second.comments().empty() && !no_comment_)
{
for(const auto& c : kv.second.comments())
{
token += '#';
token += c;
token += '\n';
}
}
2019-02-13 04:37:33 +00:00
token += tmp;
}
return token;
}
bool is_array_of_tables(const value_type& v) const
2019-02-13 04:37:33 +00:00
{
if(!v.is_array()) {return false;}
const auto& a = v.as_array();
return !a.empty() && a.front().is_table();
2019-02-13 04:37:33 +00:00
}
private:
bool can_be_inlined_;
bool no_comment_;
2019-02-13 04:37:33 +00:00
int float_prec_;
std::size_t width_;
2019-02-13 04:37:33 +00:00
std::vector<toml::key> keys_;
};
template<typename C,
template<typename ...> class M, template<typename ...> class V>
std::string
format(const basic_value<C, M, V>& v, std::size_t w = 80u,
int fprec = std::numeric_limits<toml::floating>::max_digits10,
bool no_comment = false, bool force_inline = false)
2019-02-13 04:37:33 +00:00
{
// if value is a table, it is considered to be a root object.
// the root object can't be an inline table.
if(v.is_table())
{
std::ostringstream oss;
if(!v.comments().empty())
{
oss << v.comments();
oss << '\n'; // to split the file comment from the first element
}
oss << visit(serializer<C, M, V>(w, fprec, no_comment, false), v);
return oss.str();
}
return visit(serializer<C, M, V>(w, fprec, force_inline), v);
2019-02-13 04:37:33 +00:00
}
2019-06-28 10:08:48 +00:00
namespace detail
{
template<typename charT, typename traits>
int comment_index(std::basic_ostream<charT, traits>&)
{
static const int index = std::ios_base::xalloc();
return index;
}
} // detail
template<typename charT, typename traits>
std::basic_ostream<charT, traits>&
nocomment(std::basic_ostream<charT, traits>& os)
{
// by default, it is zero. and by defalut, it shows comments.
os.iword(detail::comment_index(os)) = 1;
return os;
}
template<typename charT, typename traits>
std::basic_ostream<charT, traits>&
showcomment(std::basic_ostream<charT, traits>& os)
{
// by default, it is zero. and by defalut, it shows comments.
os.iword(detail::comment_index(os)) = 0;
return os;
}
template<typename charT, typename traits, typename C,
template<typename ...> class M, template<typename ...> class V>
2019-02-13 04:37:33 +00:00
std::basic_ostream<charT, traits>&
operator<<(std::basic_ostream<charT, traits>& os, const basic_value<C, M, V>& v)
{
// get status of std::setw().
const auto w = static_cast<std::size_t>(os.width());
2019-06-20 15:25:21 +00:00
const int fprec = static_cast<int>(os.precision());
os.width(0);
2019-06-28 10:08:48 +00:00
// by defualt, iword is initialized byl 0. And by default, toml11 outputs
// comments. So `0` means showcomment. 1 means nocommnet.
const bool no_comment = (1 == os.iword(detail::comment_index(os)));
2019-06-28 10:08:48 +00:00
if(!no_comment && v.is_table() && !v.comments().empty())
{
os << v.comments();
os << '\n'; // to split the file comment from the first element
}
// the root object can't be an inline table. so pass `false`.
os << visit(serializer<C, M, V>(w, fprec, false, no_comment), v);
// if v is a non-table value, and has only one comment, then
// put a comment just after a value. in the following way.
//
// ```toml
// key = "value" # comment.
// ```
//
// Since the top-level toml object is a table, one who want to put a
// non-table toml value must use this in a following way.
//
// ```cpp
// toml::value v;
// std::cout << "user-defined-key = " << v << std::endl;
// ```
//
// In this case, it is impossible to put comments before key-value pair.
// The only way to preserve comments is to put all of them after a value.
if(!no_comment && !v.is_table() && !v.comments().empty())
{
os << " #";
for(const auto& c : v.comments()) {os << c;}
}
return os;
}
2019-02-13 04:37:33 +00:00
} // toml
#endif// TOML11_SERIALIZER_HPP