mirror of
https://github.com/ToruNiina/toml11.git
synced 2024-11-21 20:10:05 +00:00
feat: reduce memory consumption with long line
source_location stores the whole line. In case of short range in a long line like: ``` array = [1, 2, 3, ... , 100, 101, ..., 10000] ^^^- the region ``` It save the whole line as a `std::stirng`. It consumes a lot of memory and slows down everything. We can omit most of the part of the line because we only need the region, `100` here.
This commit is contained in:
parent
42a2628924
commit
869fdbdf8f
@ -82,11 +82,16 @@ class region
|
||||
const_iterator cend() const noexcept;
|
||||
|
||||
std::string as_string() const;
|
||||
std::vector<std::string> as_lines() const;
|
||||
std::vector<std::pair<std::string, std::size_t>> as_lines() const;
|
||||
|
||||
source_ptr const& source() const noexcept {return this->source_;}
|
||||
std::string const& source_name() const noexcept {return this->source_name_;}
|
||||
|
||||
private:
|
||||
|
||||
std::pair<std::string, std::size_t>
|
||||
take_line(const_iterator begin, const_iterator end) const;
|
||||
|
||||
private:
|
||||
|
||||
source_ptr source_;
|
||||
|
@ -10,7 +10,34 @@
|
||||
namespace toml
|
||||
{
|
||||
|
||||
//
|
||||
// A struct to contain location in a toml file.
|
||||
//
|
||||
// To reduce memory consumption, it omits unrelated parts of long lines. like:
|
||||
//
|
||||
// 1. one long line, short region
|
||||
// ```
|
||||
// |
|
||||
// 1 | ... "foo", "bar", baz, "qux", "foobar", ...
|
||||
// | ^-- unknown value
|
||||
// ```
|
||||
// 2. long region
|
||||
// ```
|
||||
// |
|
||||
// 1 | array = [ "foo", ... "bar" ]
|
||||
// | ^^^^^^^^^^^^^^^^^^^^- in this array
|
||||
// ```
|
||||
// 3. many lines
|
||||
// |
|
||||
// 1 | array = [ "foo",
|
||||
// | ^^^^^^^^
|
||||
// | ...
|
||||
// | ^^^
|
||||
// |
|
||||
// 10 | , "bar"]
|
||||
// | ^^^^^^^^- in this array
|
||||
// ```
|
||||
//
|
||||
struct source_location
|
||||
{
|
||||
public:
|
||||
@ -39,13 +66,19 @@ struct source_location
|
||||
|
||||
std::vector<std::string> const& lines() const noexcept {return line_str_;}
|
||||
|
||||
// for internal use
|
||||
std::size_t first_column_offset() const noexcept {return this->first_offset_;}
|
||||
std::size_t last_column_offset() const noexcept {return this->last_offset_;}
|
||||
|
||||
private:
|
||||
|
||||
bool is_ok_;
|
||||
std::size_t first_line_;
|
||||
std::size_t first_column_;
|
||||
std::size_t first_column_; // column num in the actual file
|
||||
std::size_t first_offset_; // column num in the shown line
|
||||
std::size_t last_line_;
|
||||
std::size_t last_column_;
|
||||
std::size_t last_column_; // column num in the actual file
|
||||
std::size_t last_offset_; // column num in the shown line
|
||||
std::size_t length_;
|
||||
std::string file_name_;
|
||||
std::vector<std::string> line_str_;
|
||||
|
@ -121,27 +121,65 @@ TOML11_INLINE std::string region::as_string() const
|
||||
}
|
||||
}
|
||||
|
||||
TOML11_INLINE std::vector<std::string> region::as_lines() const
|
||||
TOML11_INLINE std::pair<std::string, std::size_t>
|
||||
region::take_line(const_iterator begin, const_iterator end) const
|
||||
{
|
||||
// To omit long line, we cap region by before/after 30 chars
|
||||
const auto dist_before = std::distance(source_->cbegin(), begin);
|
||||
const auto dist_after = std::distance(end, source_->cend());
|
||||
|
||||
const const_iterator capped_begin = (dist_before <= 30) ? source_->cbegin() : std::prev(begin, 30);
|
||||
const const_iterator capped_end = (dist_after <= 30) ? source_->cend() : std::next(end, 30);
|
||||
|
||||
const auto lf = char_type('\n');
|
||||
const auto lf_before = std::find(cxx::make_reverse_iterator(begin),
|
||||
cxx::make_reverse_iterator(capped_begin), lf);
|
||||
const auto lf_after = std::find(end, capped_end, lf);
|
||||
|
||||
auto offset = static_cast<std::size_t>(std::distance(lf_before.base(), begin));
|
||||
|
||||
std::string retval = make_string(lf_before.base(), lf_after);
|
||||
|
||||
if(lf_before.base() != source_->cbegin() && *lf_before != lf)
|
||||
{
|
||||
retval = "... " + retval;
|
||||
offset += 4;
|
||||
}
|
||||
|
||||
if(lf_after != source_->cend() && *lf_after != lf)
|
||||
{
|
||||
retval = retval + " ...";
|
||||
}
|
||||
|
||||
return std::make_pair(retval, offset);
|
||||
}
|
||||
|
||||
TOML11_INLINE std::vector<std::pair<std::string, std::size_t>> region::as_lines() const
|
||||
{
|
||||
assert(this->is_ok());
|
||||
if(this->length_ == 0)
|
||||
{
|
||||
return std::vector<std::string>{""};
|
||||
return std::vector<std::pair<std::string, std::size_t>>{
|
||||
std::make_pair("", std::size_t(0))
|
||||
};
|
||||
}
|
||||
|
||||
// Consider the following toml file
|
||||
// ```
|
||||
// array = [
|
||||
// 1, 2, 3,
|
||||
// ] # comment
|
||||
// ```
|
||||
// and the region represnets
|
||||
// ```
|
||||
// [
|
||||
// 1, 2, 3,
|
||||
// ]
|
||||
// ```
|
||||
// but we want to show the following.
|
||||
// ```
|
||||
// array = [
|
||||
// 1, 2, 3,
|
||||
// ] # comment
|
||||
// ```
|
||||
// So we need to find LFs before `begin` and after `end`.
|
||||
@ -162,25 +200,45 @@ TOML11_INLINE std::vector<std::string> region::as_lines() const
|
||||
const auto begin = std::next(this->source_->cbegin(), begin_idx);
|
||||
const auto end = std::next(this->source_->cbegin(), end_idx);
|
||||
|
||||
const auto line_begin = std::find(cxx::make_reverse_iterator(begin), this->source_->crend(), char_type('\n')).base();
|
||||
const auto line_end = std::find(end, this->source_->cend(), char_type('\n'));
|
||||
assert(this->first_line_number() <= this->last_line_number());
|
||||
|
||||
const auto reg_lines = make_string(line_begin, line_end);
|
||||
|
||||
if(reg_lines == "") // the region is an empty line that only contains LF
|
||||
if(this->first_line_number() == this->last_line_number())
|
||||
{
|
||||
return std::vector<std::string>{""};
|
||||
return std::vector<std::pair<std::string, std::size_t>>{
|
||||
this->take_line(begin, end)
|
||||
};
|
||||
}
|
||||
|
||||
std::istringstream iss(reg_lines);
|
||||
// we have multiple lines. `begin` and `end` points different lines.
|
||||
// that means that there is at least one `LF` between `begin` and `end`.
|
||||
|
||||
std::vector<std::string> lines;
|
||||
std::string line;
|
||||
while(std::getline(iss, line))
|
||||
const auto after_begin = std::distance(begin, this->source_->cend());
|
||||
const auto before_end = std::distance(this->source_->cbegin(), end);
|
||||
|
||||
const_iterator capped_file_end = this->source_->cend();
|
||||
const_iterator capped_file_begin = this->source_->cbegin();
|
||||
if(60 < after_begin) {capped_file_end = std::next(begin, 50);}
|
||||
if(60 < before_end) {capped_file_begin = std::prev(end, 50);}
|
||||
|
||||
const auto lf = char_type('\n');
|
||||
const auto first_line_end = std::find(begin, capped_file_end, lf);
|
||||
const auto last_line_begin = std::find(capped_file_begin, end, lf);
|
||||
|
||||
const auto first_line = this->take_line(begin, first_line_end);
|
||||
const auto last_line = this->take_line(last_line_begin, end);
|
||||
|
||||
if(this->first_line_number() + 1 == this->last_line_number())
|
||||
{
|
||||
lines.push_back(line);
|
||||
return std::vector<std::pair<std::string, std::size_t>>{
|
||||
first_line, last_line
|
||||
};
|
||||
}
|
||||
else
|
||||
{
|
||||
return std::vector<std::pair<std::string, std::size_t>>{
|
||||
first_line, std::make_pair("...", 0), last_line
|
||||
};
|
||||
}
|
||||
return lines;
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
|
@ -20,8 +20,10 @@ TOML11_INLINE source_location::source_location(const detail::region& r)
|
||||
: is_ok_(false),
|
||||
first_line_(1),
|
||||
first_column_(1),
|
||||
first_offset_(1),
|
||||
last_line_(1),
|
||||
last_column_(1),
|
||||
last_offset_(1),
|
||||
length_(0),
|
||||
file_name_("unknown file")
|
||||
{
|
||||
@ -34,7 +36,17 @@ TOML11_INLINE source_location::source_location(const detail::region& r)
|
||||
this->last_line_ = r.last_line_number();
|
||||
this->last_column_ = r.last_column_number();
|
||||
this->length_ = r.length();
|
||||
this->line_str_ = r.as_lines();
|
||||
|
||||
const auto lines = r.as_lines();
|
||||
assert( ! lines.empty());
|
||||
|
||||
for(const auto& l : lines)
|
||||
{
|
||||
this->line_str_.push_back(l.first);
|
||||
}
|
||||
|
||||
this->first_offset_ = lines.at( 0).second + 1; // to 1-origin
|
||||
this->last_offset_ = lines.at(lines.size()-1).second + 1;
|
||||
}
|
||||
}
|
||||
|
||||
@ -145,36 +157,36 @@ TOML11_INLINE std::string format_location_impl(const std::size_t lnw,
|
||||
{
|
||||
// when column points LF, it exceeds the size of the first line.
|
||||
std::size_t underline_limit = 1;
|
||||
if(loc.first_line().size() < loc.first_column_number())
|
||||
if(loc.first_line().size() < loc.first_column_offset())
|
||||
{
|
||||
underline_limit = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
underline_limit = loc.first_line().size() - loc.first_column_number() + 1;
|
||||
underline_limit = loc.first_line().size() - loc.first_column_offset() + 1;
|
||||
}
|
||||
const auto underline_len = (std::min)(underline_limit, loc.length());
|
||||
|
||||
format_line(oss, lnw, loc.first_line_number(), loc.first_line());
|
||||
format_underline(oss, lnw, loc.first_column_number(), underline_len, msg);
|
||||
format_underline(oss, lnw, loc.first_column_offset(), underline_len, msg);
|
||||
}
|
||||
else if(loc.lines().size() == 2)
|
||||
{
|
||||
const auto first_underline_len =
|
||||
loc.first_line().size() - loc.first_column_number() + 1;
|
||||
loc.first_line().size() - loc.first_column_offset() + 1;
|
||||
format_line(oss, lnw, loc.first_line_number(), loc.first_line());
|
||||
format_underline(oss, lnw, loc.first_column_number(),
|
||||
format_underline(oss, lnw, loc.first_column_offset(),
|
||||
first_underline_len, "");
|
||||
|
||||
format_line(oss, lnw, loc.last_line_number(), loc.last_line());
|
||||
format_underline(oss, lnw, 1, loc.last_column_number(), msg);
|
||||
format_underline(oss, lnw, 1, loc.last_column_offset(), msg);
|
||||
}
|
||||
else if(loc.lines().size() > 2)
|
||||
{
|
||||
const auto first_underline_len =
|
||||
loc.first_line().size() - loc.first_column_number() + 1;
|
||||
loc.first_line().size() - loc.first_column_offset() + 1;
|
||||
format_line(oss, lnw, loc.first_line_number(), loc.first_line());
|
||||
format_underline(oss, lnw, loc.first_column_number(),
|
||||
format_underline(oss, lnw, loc.first_column_offset(),
|
||||
first_underline_len, "and");
|
||||
|
||||
if(loc.lines().size() == 3)
|
||||
@ -188,7 +200,7 @@ TOML11_INLINE std::string format_location_impl(const std::size_t lnw,
|
||||
format_empty_line(oss, lnw);
|
||||
}
|
||||
format_line(oss, lnw, loc.last_line_number(), loc.last_line());
|
||||
format_underline(oss, lnw, 1, loc.last_column_number(), msg);
|
||||
format_underline(oss, lnw, 1, loc.last_column_offset(), msg);
|
||||
}
|
||||
// if loc is empty, do nothing.
|
||||
return oss.str();
|
||||
|
Loading…
Reference in New Issue
Block a user