diff --git a/README.md b/README.md index 7d76aa2c8..5ee79af4b 100644 --- a/README.md +++ b/README.md @@ -498,7 +498,7 @@ I deeply appreciate the help of the following people. - [Vladimir Petrigo](https://github.com/vpetrigo) made a SFINAE hack more readable. - [Denis Andrejew](https://github.com/seeekr) fixed a grammar issue in the README file. - [Pierre-Antoine Lacaze](https://github.com/palacaze) found a subtle bug in the `dump()` function. -- [TurpentineDistillery](https://github.com/TurpentineDistillery) pointed to [`std::locale::classic()`](http://en.cppreference.com/w/cpp/locale/locale/classic) to avoid too much locale joggling. +- [TurpentineDistillery](https://github.com/TurpentineDistillery) pointed to [`std::locale::classic()`](http://en.cppreference.com/w/cpp/locale/locale/classic) to avoid too much locale joggling, found some nice performance improvements in the parser and improved the benchmarking code. - [cgzones](https://github.com/cgzones) had an idea how to fix the Coverity scan. Thanks a lot for helping out! diff --git a/src/json.hpp b/src/json.hpp index 36afd56bc..5d2d660a0 100644 --- a/src/json.hpp +++ b/src/json.hpp @@ -8821,8 +8821,22 @@ basic_json_parser_66: */ void fill_line_buffer(size_t n = 0) { + // if line buffer is used, m_content points to its data + assert(m_line_buffer.empty() + or m_content == reinterpret_cast(m_line_buffer.data())); + + // if line buffer is used, m_limit is set past the end of its data + assert(m_line_buffer.empty() + or m_limit == m_content + m_line_buffer.size()); + + // pointer relationships + assert(m_content <= m_start); + assert(m_start <= m_cursor); + assert(m_cursor <= m_limit); + assert(m_marker == nullptr or m_marker <= m_limit); + // number of processed characters (p) - const auto offset_start = m_start - m_content; + const size_t num_processed_chars = static_cast(m_start - m_content); // offset for m_marker wrt. to m_start const auto offset_marker = (m_marker == nullptr) ? 0 : m_marker - m_start; // number of unprocessed characters (u) @@ -8831,17 +8845,10 @@ basic_json_parser_66: // no stream is used or end of file is reached if (m_stream == nullptr or m_stream->eof()) { - // skip this part if we are already using the line buffer - if (m_start != reinterpret_cast(m_line_buffer.data())) - { - // copy unprocessed characters to line buffer - m_line_buffer.clear(); - for (m_cursor = m_start; m_cursor != m_limit; ++m_cursor) - { - assert(m_cursor != nullptr); - m_line_buffer.append(1, static_cast(*m_cursor)); - } - } + // m_start may or may not be pointing into m_line_buffer at + // this point. We trust the standand library to do the right + // thing. See http://stackoverflow.com/q/28142011/266378 + m_line_buffer.assign(m_start, m_limit); // append n characters to make sure that there is sufficient // space between m_cursor and m_limit @@ -8854,16 +8861,18 @@ basic_json_parser_66: else { // delete processed characters from line buffer - m_line_buffer.erase(0, static_cast(offset_start)); + m_line_buffer.erase(0, num_processed_chars); // read next line from input stream - std::string line; - std::getline(*m_stream, line, '\n'); + m_line_buffer_tmp.clear(); + std::getline(*m_stream, m_line_buffer_tmp, '\n'); + // add line with newline symbol to the line buffer - m_line_buffer += line + "\n"; + m_line_buffer += m_line_buffer_tmp; + m_line_buffer.push_back('\n'); } // set pointers - m_content = reinterpret_cast(m_line_buffer.c_str()); + m_content = reinterpret_cast(m_line_buffer.data()); assert(m_content != nullptr); m_start = m_content; m_marker = m_start + offset_marker; @@ -8946,9 +8955,20 @@ basic_json_parser_66: // iterate the result between the quotes for (const lexer_char_t* i = m_start + 1; i < m_cursor - 1; ++i) { - // process escaped characters - if (*i == '\\') + // find next escape character + auto e = std::find(i, m_cursor - 1, '\\'); + if (e != i) { + // see https://github.com/nlohmann/json/issues/365#issuecomment-262874705 + for (auto k = i; k < e; k++) + { + result.push_back(static_cast(*k)); + } + i = e - 1; // -1 because of ++i + } + else + { + // processing escaped character // read next character ++i; @@ -9035,12 +9055,6 @@ basic_json_parser_66: } } } - else - { - // all other characters are just copied to the end of the - // string - result.append(1, static_cast(*i)); - } } return result; @@ -9224,6 +9238,8 @@ basic_json_parser_66: std::istream* m_stream = nullptr; /// line buffer buffer for m_stream string_t m_line_buffer {}; + /// used for filling m_line_buffer + string_t m_line_buffer_tmp {}; /// the buffer pointer const lexer_char_t* m_content = nullptr; /// pointer to the beginning of the current symbol diff --git a/src/json.hpp.re2c b/src/json.hpp.re2c index b629dc619..524e5bd28 100644 --- a/src/json.hpp.re2c +++ b/src/json.hpp.re2c @@ -7970,8 +7970,22 @@ class basic_json */ void fill_line_buffer(size_t n = 0) { + // if line buffer is used, m_content points to its data + assert(m_line_buffer.empty() + or m_content == reinterpret_cast(m_line_buffer.data())); + + // if line buffer is used, m_limit is set past the end of its data + assert(m_line_buffer.empty() + or m_limit == m_content + m_line_buffer.size()); + + // pointer relationships + assert(m_content <= m_start); + assert(m_start <= m_cursor); + assert(m_cursor <= m_limit); + assert(m_marker == nullptr or m_marker <= m_limit); + // number of processed characters (p) - const auto offset_start = m_start - m_content; + const size_t num_processed_chars = static_cast(m_start - m_content); // offset for m_marker wrt. to m_start const auto offset_marker = (m_marker == nullptr) ? 0 : m_marker - m_start; // number of unprocessed characters (u) @@ -7980,17 +7994,10 @@ class basic_json // no stream is used or end of file is reached if (m_stream == nullptr or m_stream->eof()) { - // skip this part if we are already using the line buffer - if (m_start != reinterpret_cast(m_line_buffer.data())) - { - // copy unprocessed characters to line buffer - m_line_buffer.clear(); - for (m_cursor = m_start; m_cursor != m_limit; ++m_cursor) - { - assert(m_cursor != nullptr); - m_line_buffer.append(1, static_cast(*m_cursor)); - } - } + // m_start may or may not be pointing into m_line_buffer at + // this point. We trust the standand library to do the right + // thing. See http://stackoverflow.com/q/28142011/266378 + m_line_buffer.assign(m_start, m_limit); // append n characters to make sure that there is sufficient // space between m_cursor and m_limit @@ -8003,16 +8010,18 @@ class basic_json else { // delete processed characters from line buffer - m_line_buffer.erase(0, static_cast(offset_start)); + m_line_buffer.erase(0, num_processed_chars); // read next line from input stream - std::string line; - std::getline(*m_stream, line, '\n'); + m_line_buffer_tmp.clear(); + std::getline(*m_stream, m_line_buffer_tmp, '\n'); + // add line with newline symbol to the line buffer - m_line_buffer += line + "\n"; + m_line_buffer += m_line_buffer_tmp; + m_line_buffer.push_back('\n'); } // set pointers - m_content = reinterpret_cast(m_line_buffer.c_str()); + m_content = reinterpret_cast(m_line_buffer.data()); assert(m_content != nullptr); m_start = m_content; m_marker = m_start + offset_marker; @@ -8095,9 +8104,20 @@ class basic_json // iterate the result between the quotes for (const lexer_char_t* i = m_start + 1; i < m_cursor - 1; ++i) { - // process escaped characters - if (*i == '\\') + // find next escape character + auto e = std::find(i, m_cursor - 1, '\\'); + if (e != i) { + // see https://github.com/nlohmann/json/issues/365#issuecomment-262874705 + for (auto k = i; k < e; k++) + { + result.push_back(static_cast(*k)); + } + i = e - 1; // -1 because of ++i + } + else + { + // processing escaped character // read next character ++i; @@ -8184,12 +8204,6 @@ class basic_json } } } - else - { - // all other characters are just copied to the end of the - // string - result.append(1, static_cast(*i)); - } } return result; @@ -8373,6 +8387,8 @@ class basic_json std::istream* m_stream = nullptr; /// line buffer buffer for m_stream string_t m_line_buffer {}; + /// used for filling m_line_buffer + string_t m_line_buffer_tmp {}; /// the buffer pointer const lexer_char_t* m_content = nullptr; /// pointer to the beginning of the current symbol