⚡ micro-optimization of dump()

A lot of small changes to avoid memory allocations: - The locale is only queried once rather than with every number serialization. - The indentation string is recycled between different calls. - The string escape function avoids a copy if no escaping is necessary. - The string escape and the space function use a complete switch case instead of cascaded ifs. Cachegrind measures some 15% performance improvement.
2024-11-23 20:40:08 +00:00 · 2017-02-28 16:28:22 +01:00 · 2017-02-28 16:28:22 +01:00 · 224f99070b
commit 224f99070b
parent fc48b8ac2b
3 changed files with 277 additions and 160 deletions
--- a/src/json.hpp
+++ b/src/json.hpp
@ -34,6 +34,7 @@ SOFTWARE.
 #include <cassert> // assert
 #include <cctype> // isdigit
 #include <ciso646> // and, not, or
+#include <clocale> // lconv, localeconv
 #include <cmath> // isfinite, labs, ldexp, signbit
 #include <cstddef> // nullptr_t, ptrdiff_t, size_t
 #include <cstdint> // int64_t, uint64_t
@ -6203,7 +6204,9 @@ class basic_json
    {
      public:
        serializer(std::ostream& s)
-            : o(s)
+            : o(s), loc(std::localeconv()),
+              thousands_sep(!loc->thousands_sep ? '\0' : loc->thousands_sep[0]),
+              decimal_point(!loc->decimal_point ? '\0' : loc->decimal_point[0])
        {}

        /*!
@ -6244,7 +6247,10 @@ class basic_json

                        // variable to hold indentation for recursive calls
                        const auto new_indent = current_indent + indent_step;
-                        string_t indent_string = string_t(new_indent, ' ');
+                        if (indent_string.size() < new_indent)
+                        {
+                            indent_string.resize(new_indent, ' ');
+                        }

                        // first n-1 elements
                        auto i = val.m_value.object->cbegin();
@ -6252,8 +6258,7 @@ class basic_json
                        {
                            o.write(indent_string.c_str(), new_indent);
                            o.put('\"');
-                            const auto s = escape_string(i->first);
-                            o.write(s.c_str(), static_cast<std::streamsize>(s.size()));
+                            dump_escaped(i->first);
                            o.write("\": ", 3);
                            dump(i->second, true, indent_step, new_indent);
                            o.write(",\n", 2);
@ -6263,8 +6268,7 @@ class basic_json
                        assert(i != val.m_value.object->cend());
                        o.write(indent_string.c_str(), new_indent);
                        o.put('\"');
-                        const auto s = escape_string(i->first);
-                        o.write(s.c_str(), static_cast<std::streamsize>(s.size()));
+                        dump_escaped(i->first);
                        o.write("\": ", 3);
                        dump(i->second, true, indent_step, new_indent);

@ -6281,8 +6285,7 @@ class basic_json
                        for (size_t cnt = 0; cnt < val.m_value.object->size() - 1; ++cnt, ++i)
                        {
                            o.put('\"');
-                            const auto s = escape_string(i->first);
-                            o.write(s.c_str(), static_cast<std::streamsize>(s.size()));
+                            dump_escaped(i->first);
                            o.write("\":", 2);
                            dump(i->second, false, indent_step, current_indent);
                            o.put(',');
@ -6291,8 +6294,7 @@ class basic_json
                        // last element
                        assert(i != val.m_value.object->cend());
                        o.put('\"');
-                        const auto s = escape_string(i->first);
-                        o.write(s.c_str(), static_cast<std::streamsize>(s.size()));
+                        dump_escaped(i->first);
                        o.write("\":", 2);
                        dump(i->second, false, indent_step, current_indent);

@ -6316,7 +6318,10 @@ class basic_json

                        // variable to hold indentation for recursive calls
                        const auto new_indent = current_indent + indent_step;
-                        string_t indent_string = string_t(new_indent, ' ');
+                        if (indent_string.size() < new_indent)
+                        {
+                            indent_string.resize(new_indent, ' ');
+                        }

                        // first n-1 elements
                        for (auto i = val.m_value.array->cbegin(); i != val.m_value.array->cend() - 1; ++i)
@ -6359,8 +6364,7 @@ class basic_json
                case value_t::string:
                {
                    o.put('\"');
-                    const auto s = escape_string(*val.m_value.string);
-                    o.write(s.c_str(), static_cast<std::streamsize>(s.size()));
+                    dump_escaped(*val.m_value.string);
                    o.put('\"');
                    return;
                }
@ -6380,19 +6384,19 @@ class basic_json

                case value_t::number_integer:
                {
-                    x_write(val.m_value.number_integer);
+                    dump_integer(val.m_value.number_integer);
                    return;
                }

                case value_t::number_unsigned:
                {
-                    x_write(val.m_value.number_unsigned);
+                    dump_integer(val.m_value.number_unsigned);
                    return;
                }

                case value_t::number_float:
                {
-                    x_write(val.m_value.number_float);
+                    dump_float(val.m_value.number_float);
                    return;
                }

@ -6438,14 +6442,40 @@ class basic_json
                        return res + 1;
                    }

+                    case 0x00:
+                    case 0x01:
+                    case 0x02:
+                    case 0x03:
+                    case 0x04:
+                    case 0x05:
+                    case 0x06:
+                    case 0x07:
+                    case 0x0b:
+                    case 0x0e:
+                    case 0x0f:
+                    case 0x10:
+                    case 0x11:
+                    case 0x12:
+                    case 0x13:
+                    case 0x14:
+                    case 0x15:
+                    case 0x16:
+                    case 0x17:
+                    case 0x18:
+                    case 0x19:
+                    case 0x1a:
+                    case 0x1b:
+                    case 0x1c:
+                    case 0x1d:
+                    case 0x1e:
+                    case 0x1f:
+                    {
+                        // from c (1 byte) to \uxxxx (6 bytes)
+                        return res + 5;
+                    }
+
                    default:
                    {
-                        if (c >= 0x00 and c <= 0x1f)
-                        {
-                            // from c (1 byte) to \uxxxx (6 bytes)
-                            return res + 5;
-                        }
-
                        return res;
                    }
                }
@ -6465,12 +6495,13 @@ class basic_json

        @complexity Linear in the length of string @a s.
        */
-        static string_t escape_string(const string_t& s)
+        void dump_escaped(const string_t& s) const
        {
            const auto space = extra_space(s);
            if (space == 0)
            {
-                return s;
+                o.write(s.c_str(), static_cast<std::streamsize>(s.size()));
+                return;
            }

            // create a result string of necessary size
@ -6537,43 +6568,69 @@ class basic_json
                        break;
                    }

+                    case 0x00:
+                    case 0x01:
+                    case 0x02:
+                    case 0x03:
+                    case 0x04:
+                    case 0x05:
+                    case 0x06:
+                    case 0x07:
+                    case 0x0b:
+                    case 0x0e:
+                    case 0x0f:
+                    case 0x10:
+                    case 0x11:
+                    case 0x12:
+                    case 0x13:
+                    case 0x14:
+                    case 0x15:
+                    case 0x16:
+                    case 0x17:
+                    case 0x18:
+                    case 0x19:
+                    case 0x1a:
+                    case 0x1b:
+                    case 0x1c:
+                    case 0x1d:
+                    case 0x1e:
+                    case 0x1f:
+                    {
+                        // convert a number 0..15 to its hex representation
+                        // (0..f)
+                        static const char hexify[16] =
+                        {
+                            '0', '1', '2', '3', '4', '5', '6', '7',
+                            '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'
+                        };
+
+                        // print character c as \uxxxx
+                        for (const char m :
+                    { 'u', '0', '0', hexify[c >> 4], hexify[c & 0x0f]
+                        })
+                        {
+                            result[++pos] = m;
+                        }
+
+                        ++pos;
+                        break;
+                    }
+
                    default:
                    {
-                        if (c >= 0x00 and c <= 0x1f)
-                        {
-                            // convert a number 0..15 to its hex representation
-                            // (0..f)
-                            static const char hexify[16] =
-                            {
-                                '0', '1', '2', '3', '4', '5', '6', '7',
-                                '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'
-                            };
-
-                            // print character c as \uxxxx
-                            for (const char m :
-                        { 'u', '0', '0', hexify[c >> 4], hexify[c & 0x0f]
-                            })
-                            {
-                                result[++pos] = m;
-                            }
-
-                            ++pos;
-                        }
-                        else
-                        {
-                            // all other characters are added as-is
-                            result[pos++] = c;
-                        }
+                        // all other characters are added as-is
+                        result[pos++] = c;
                        break;
                    }
                }
            }

-            return result;
+            assert(pos == s.size() + space);
+            o.write(result.c_str(), static_cast<std::streamsize>(result.size()));
        }

        template<typename NumberType>
-        void x_write(NumberType x)
+        void dump_integer(NumberType x)
        {
            // special case for "0"
            if (x == 0)
@ -6607,7 +6664,7 @@ class basic_json
            o.write(m_buf.data(), static_cast<std::streamsize>(i));
        }

-        void x_write(number_float_t x)
+        void dump_float(number_float_t x)
        {
            // special case for 0.0 and -0.0
            if (x == 0)
@ -6634,15 +6691,6 @@ class basic_json
            // check if buffer was large enough
            assert(static_cast<size_t>(written_bytes) < m_buf.size());

-            // read information from locale
-            const auto loc = localeconv();
-            assert(loc != nullptr);
-            const char thousands_sep = !loc->thousands_sep ? '\0'
-                                       : loc->thousands_sep[0];
-
-            const char decimal_point = !loc->decimal_point ? '\0'
-                                       : loc->decimal_point[0];
-
            // erase thousands separator
            if (thousands_sep != '\0')
            {
@ -6687,6 +6735,12 @@ class basic_json

        /// a (hopefully) large enough character buffer
        std::array < char, 64 > m_buf{{}};
+
+        const std::lconv* loc = nullptr;
+        const char thousands_sep = '\0';
+        const char decimal_point = '\0';
+
+        string_t indent_string = string_t(512, ' ');
    };

  public:
--- a/src/json.hpp.re2c
+++ b/src/json.hpp.re2c
@ -34,6 +34,7 @@ SOFTWARE.
 #include <cassert> // assert
 #include <cctype> // isdigit
 #include <ciso646> // and, not, or
+#include <clocale> // lconv, localeconv
 #include <cmath> // isfinite, labs, ldexp, signbit
 #include <cstddef> // nullptr_t, ptrdiff_t, size_t
 #include <cstdint> // int64_t, uint64_t
@ -6203,7 +6204,9 @@ class basic_json
    {
      public:
        serializer(std::ostream& s)
-            : o(s)
+            : o(s), loc(std::localeconv()),
+              thousands_sep(!loc->thousands_sep ? '\0' : loc->thousands_sep[0]),
+              decimal_point(!loc->decimal_point ? '\0' : loc->decimal_point[0])
        {}

        /*!
@ -6244,7 +6247,10 @@ class basic_json

                        // variable to hold indentation for recursive calls
                        const auto new_indent = current_indent + indent_step;
-                        string_t indent_string = string_t(new_indent, ' ');
+                        if (indent_string.size() < new_indent)
+                        {
+                            indent_string.resize(new_indent, ' ');
+                        }

                        // first n-1 elements
                        auto i = val.m_value.object->cbegin();
@ -6252,8 +6258,7 @@ class basic_json
                        {
                            o.write(indent_string.c_str(), new_indent);
                            o.put('\"');
-                            const auto s = escape_string(i->first);
-                            o.write(s.c_str(), static_cast<std::streamsize>(s.size()));
+                            dump_escaped(i->first);
                            o.write("\": ", 3);
                            dump(i->second, true, indent_step, new_indent);
                            o.write(",\n", 2);
@ -6263,8 +6268,7 @@ class basic_json
                        assert(i != val.m_value.object->cend());
                        o.write(indent_string.c_str(), new_indent);
                        o.put('\"');
-                        const auto s = escape_string(i->first);
-                        o.write(s.c_str(), static_cast<std::streamsize>(s.size()));
+                        dump_escaped(i->first);
                        o.write("\": ", 3);
                        dump(i->second, true, indent_step, new_indent);

@ -6281,8 +6285,7 @@ class basic_json
                        for (size_t cnt = 0; cnt < val.m_value.object->size() - 1; ++cnt, ++i)
                        {
                            o.put('\"');
-                            const auto s = escape_string(i->first);
-                            o.write(s.c_str(), static_cast<std::streamsize>(s.size()));
+                            dump_escaped(i->first);
                            o.write("\":", 2);
                            dump(i->second, false, indent_step, current_indent);
                            o.put(',');
@ -6291,8 +6294,7 @@ class basic_json
                        // last element
                        assert(i != val.m_value.object->cend());
                        o.put('\"');
-                        const auto s = escape_string(i->first);
-                        o.write(s.c_str(), static_cast<std::streamsize>(s.size()));
+                        dump_escaped(i->first);
                        o.write("\":", 2);
                        dump(i->second, false, indent_step, current_indent);

@ -6316,7 +6318,10 @@ class basic_json

                        // variable to hold indentation for recursive calls
                        const auto new_indent = current_indent + indent_step;
-                        string_t indent_string = string_t(new_indent, ' ');
+                        if (indent_string.size() < new_indent)
+                        {
+                            indent_string.resize(new_indent, ' ');
+                        }

                        // first n-1 elements
                        for (auto i = val.m_value.array->cbegin(); i != val.m_value.array->cend() - 1; ++i)
@ -6359,8 +6364,7 @@ class basic_json
                case value_t::string:
                {
                    o.put('\"');
-                    const auto s = escape_string(*val.m_value.string);
-                    o.write(s.c_str(), static_cast<std::streamsize>(s.size()));
+                    dump_escaped(*val.m_value.string);
                    o.put('\"');
                    return;
                }
@ -6380,19 +6384,19 @@ class basic_json

                case value_t::number_integer:
                {
-                    x_write(val.m_value.number_integer);
+                    dump_integer(val.m_value.number_integer);
                    return;
                }

                case value_t::number_unsigned:
                {
-                    x_write(val.m_value.number_unsigned);
+                    dump_integer(val.m_value.number_unsigned);
                    return;
                }

                case value_t::number_float:
                {
-                    x_write(val.m_value.number_float);
+                    dump_float(val.m_value.number_float);
                    return;
                }

@ -6438,14 +6442,40 @@ class basic_json
                        return res + 1;
                    }

+                    case 0x00:
+                    case 0x01:
+                    case 0x02:
+                    case 0x03:
+                    case 0x04:
+                    case 0x05:
+                    case 0x06:
+                    case 0x07:
+                    case 0x0b:
+                    case 0x0e:
+                    case 0x0f:
+                    case 0x10:
+                    case 0x11:
+                    case 0x12:
+                    case 0x13:
+                    case 0x14:
+                    case 0x15:
+                    case 0x16:
+                    case 0x17:
+                    case 0x18:
+                    case 0x19:
+                    case 0x1a:
+                    case 0x1b:
+                    case 0x1c:
+                    case 0x1d:
+                    case 0x1e:
+                    case 0x1f:
+                    {
+                        // from c (1 byte) to \uxxxx (6 bytes)
+                        return res + 5;
+                    }
+
                    default:
                    {
-                        if (c >= 0x00 and c <= 0x1f)
-                        {
-                            // from c (1 byte) to \uxxxx (6 bytes)
-                            return res + 5;
-                        }
-
                        return res;
                    }
                }
@ -6465,12 +6495,13 @@ class basic_json

        @complexity Linear in the length of string @a s.
        */
-        static string_t escape_string(const string_t& s)
+        void dump_escaped(const string_t& s) const
        {
            const auto space = extra_space(s);
            if (space == 0)
            {
-                return s;
+                o.write(s.c_str(), static_cast<std::streamsize>(s.size()));
+                return;
            }

            // create a result string of necessary size
@ -6537,43 +6568,69 @@ class basic_json
                        break;
                    }

+                    case 0x00:
+                    case 0x01:
+                    case 0x02:
+                    case 0x03:
+                    case 0x04:
+                    case 0x05:
+                    case 0x06:
+                    case 0x07:
+                    case 0x0b:
+                    case 0x0e:
+                    case 0x0f:
+                    case 0x10:
+                    case 0x11:
+                    case 0x12:
+                    case 0x13:
+                    case 0x14:
+                    case 0x15:
+                    case 0x16:
+                    case 0x17:
+                    case 0x18:
+                    case 0x19:
+                    case 0x1a:
+                    case 0x1b:
+                    case 0x1c:
+                    case 0x1d:
+                    case 0x1e:
+                    case 0x1f:
+                    {
+                        // convert a number 0..15 to its hex representation
+                        // (0..f)
+                        static const char hexify[16] =
+                        {
+                            '0', '1', '2', '3', '4', '5', '6', '7',
+                            '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'
+                        };
+
+                        // print character c as \uxxxx
+                        for (const char m :
+                    { 'u', '0', '0', hexify[c >> 4], hexify[c & 0x0f]
+                        })
+                        {
+                            result[++pos] = m;
+                        }
+
+                        ++pos;
+                        break;
+                    }
+
                    default:
                    {
-                        if (c >= 0x00 and c <= 0x1f)
-                        {
-                            // convert a number 0..15 to its hex representation
-                            // (0..f)
-                            static const char hexify[16] =
-                            {
-                                '0', '1', '2', '3', '4', '5', '6', '7',
-                                '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'
-                            };
-
-                            // print character c as \uxxxx
-                            for (const char m :
-                        { 'u', '0', '0', hexify[c >> 4], hexify[c & 0x0f]
-                            })
-                            {
-                                result[++pos] = m;
-                            }
-
-                            ++pos;
-                        }
-                        else
-                        {
-                            // all other characters are added as-is
-                            result[pos++] = c;
-                        }
+                        // all other characters are added as-is
+                        result[pos++] = c;
                        break;
                    }
                }
            }

-            return result;
+            assert(pos == s.size() + space);
+            o.write(result.c_str(), static_cast<std::streamsize>(result.size()));
        }

        template<typename NumberType>
-        void x_write(NumberType x)
+        void dump_integer(NumberType x)
        {
            // special case for "0"
            if (x == 0)
@ -6607,7 +6664,7 @@ class basic_json
            o.write(m_buf.data(), static_cast<std::streamsize>(i));
        }

-        void x_write(number_float_t x)
+        void dump_float(number_float_t x)
        {
            // special case for 0.0 and -0.0
            if (x == 0)
@ -6634,15 +6691,6 @@ class basic_json
            // check if buffer was large enough
            assert(static_cast<size_t>(written_bytes) < m_buf.size());

-            // read information from locale
-            const auto loc = localeconv();
-            assert(loc != nullptr);
-            const char thousands_sep = !loc->thousands_sep ? '\0'
-                                       : loc->thousands_sep[0];
-
-            const char decimal_point = !loc->decimal_point ? '\0'
-                                       : loc->decimal_point[0];
-
            // erase thousands separator
            if (thousands_sep != '\0')
            {
@ -6687,6 +6735,12 @@ class basic_json

        /// a (hopefully) large enough character buffer
        std::array < char, 64 > m_buf{{}};
+
+        const std::lconv* loc = nullptr;
+        const char thousands_sep = '\0';
+        const char decimal_point = '\0';
+
+        string_t indent_string = string_t(512, ' ');
    };

  public:
--- a/test/src/unit-convenience.cpp
+++ b/test/src/unit-convenience.cpp
@ -49,44 +49,53 @@ TEST_CASE("convenience functions")

    SECTION("string escape")
    {
-        CHECK(json::serializer::escape_string("\"") == "\\\"");
-        CHECK(json::serializer::escape_string("\\") == "\\\\");
-        CHECK(json::serializer::escape_string("\b") == "\\b");
-        CHECK(json::serializer::escape_string("\f") == "\\f");
-        CHECK(json::serializer::escape_string("\n") == "\\n");
-        CHECK(json::serializer::escape_string("\r") == "\\r");
-        CHECK(json::serializer::escape_string("\t") == "\\t");
+        const auto check_escaped = [](const char* original,
+                                      const char* escaped)
+        {
+            std::stringstream ss;
+            json::serializer s(ss);
+            s.dump_escaped(original);
+            CHECK(ss.str() == escaped);
+        };

-        CHECK(json::serializer::escape_string("\x01") == "\\u0001");
-        CHECK(json::serializer::escape_string("\x02") == "\\u0002");
-        CHECK(json::serializer::escape_string("\x03") == "\\u0003");
-        CHECK(json::serializer::escape_string("\x04") == "\\u0004");
-        CHECK(json::serializer::escape_string("\x05") == "\\u0005");
-        CHECK(json::serializer::escape_string("\x06") == "\\u0006");
-        CHECK(json::serializer::escape_string("\x07") == "\\u0007");
-        CHECK(json::serializer::escape_string("\x08") == "\\b");
-        CHECK(json::serializer::escape_string("\x09") == "\\t");
-        CHECK(json::serializer::escape_string("\x0a") == "\\n");
-        CHECK(json::serializer::escape_string("\x0b") == "\\u000b");
-        CHECK(json::serializer::escape_string("\x0c") == "\\f");
-        CHECK(json::serializer::escape_string("\x0d") == "\\r");
-        CHECK(json::serializer::escape_string("\x0e") == "\\u000e");
-        CHECK(json::serializer::escape_string("\x0f") == "\\u000f");
-        CHECK(json::serializer::escape_string("\x10") == "\\u0010");
-        CHECK(json::serializer::escape_string("\x11") == "\\u0011");
-        CHECK(json::serializer::escape_string("\x12") == "\\u0012");
-        CHECK(json::serializer::escape_string("\x13") == "\\u0013");
-        CHECK(json::serializer::escape_string("\x14") == "\\u0014");
-        CHECK(json::serializer::escape_string("\x15") == "\\u0015");
-        CHECK(json::serializer::escape_string("\x16") == "\\u0016");
-        CHECK(json::serializer::escape_string("\x17") == "\\u0017");
-        CHECK(json::serializer::escape_string("\x18") == "\\u0018");
-        CHECK(json::serializer::escape_string("\x19") == "\\u0019");
-        CHECK(json::serializer::escape_string("\x1a") == "\\u001a");
-        CHECK(json::serializer::escape_string("\x1b") == "\\u001b");
-        CHECK(json::serializer::escape_string("\x1c") == "\\u001c");
-        CHECK(json::serializer::escape_string("\x1d") == "\\u001d");
-        CHECK(json::serializer::escape_string("\x1e") == "\\u001e");
-        CHECK(json::serializer::escape_string("\x1f") == "\\u001f");
+        check_escaped("\"", "\\\"");
+        check_escaped("\\", "\\\\");
+        check_escaped("\b", "\\b");
+        check_escaped("\f", "\\f");
+        check_escaped("\n", "\\n");
+        check_escaped("\r", "\\r");
+        check_escaped("\t", "\\t");
+
+        check_escaped("\x01", "\\u0001");
+        check_escaped("\x02", "\\u0002");
+        check_escaped("\x03", "\\u0003");
+        check_escaped("\x04", "\\u0004");
+        check_escaped("\x05", "\\u0005");
+        check_escaped("\x06", "\\u0006");
+        check_escaped("\x07", "\\u0007");
+        check_escaped("\x08", "\\b");
+        check_escaped("\x09", "\\t");
+        check_escaped("\x0a", "\\n");
+        check_escaped("\x0b", "\\u000b");
+        check_escaped("\x0c", "\\f");
+        check_escaped("\x0d", "\\r");
+        check_escaped("\x0e", "\\u000e");
+        check_escaped("\x0f", "\\u000f");
+        check_escaped("\x10", "\\u0010");
+        check_escaped("\x11", "\\u0011");
+        check_escaped("\x12", "\\u0012");
+        check_escaped("\x13", "\\u0013");
+        check_escaped("\x14", "\\u0014");
+        check_escaped("\x15", "\\u0015");
+        check_escaped("\x16", "\\u0016");
+        check_escaped("\x17", "\\u0017");
+        check_escaped("\x18", "\\u0018");
+        check_escaped("\x19", "\\u0019");
+        check_escaped("\x1a", "\\u001a");
+        check_escaped("\x1b", "\\u001b");
+        check_escaped("\x1c", "\\u001c");
+        check_escaped("\x1d", "\\u001d");
+        check_escaped("\x1e", "\\u001e");
+        check_escaped("\x1f", "\\u001f");
    }
 }