From c13753a70cc55f3b1c99fb8f8395e78e5f9cae43 Mon Sep 17 00:00:00 2001 From: Victor Zverovich Date: Sat, 18 Nov 2023 07:54:36 -1000 Subject: [PATCH] Fix handling of invalid Unicode in precision --- include/fmt/format.h | 23 +++++++++++++++-------- test/format-test.cc | 1 + 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/include/fmt/format.h b/include/fmt/format.h index 60595b2f..d4d20965 100644 --- a/include/fmt/format.h +++ b/include/fmt/format.h @@ -791,12 +791,18 @@ inline auto code_point_index(basic_string_view s, size_t n) -> size_t { // Calculates the index of the nth code point in a UTF-8 string. inline auto code_point_index(string_view s, size_t n) -> size_t { - const char* data = s.data(); - size_t num_code_points = 0; - for (size_t i = 0, size = s.size(); i != size; ++i) { - if ((data[i] & 0xc0) != 0x80 && ++num_code_points > n) return i; - } - return s.size(); + size_t result = s.size(); + const char* begin = s.begin(); + for_each_codepoint( + s, [begin, &n, &result](uint32_t, string_view sv) { + if (n != 0) { + --n; + return true; + } + result = to_unsigned(sv.begin() - begin); + return false; + }); + return result; } inline auto code_point_index(basic_string_view s, size_t n) @@ -1962,8 +1968,9 @@ auto write_escaped_char(OutputIt out, Char v) -> OutputIt { *out++ = static_cast('\''); if ((needs_escape(static_cast(v)) && v != static_cast('"')) || v == static_cast('\'')) { - out = write_escaped_cp( - out, find_escape_result{v_array, v_array + 1, static_cast(v)}); + out = write_escaped_cp(out, + find_escape_result{v_array, v_array + 1, + static_cast(v)}); } else { *out++ = v; } diff --git a/test/format-test.cc b/test/format-test.cc index f4e7eeca..08b4e8c1 100644 --- a/test/format-test.cc +++ b/test/format-test.cc @@ -1050,6 +1050,7 @@ TEST(format_test, precision) { EXPECT_EQ("st", fmt::format("{0:.2}", "str")); EXPECT_EQ("вожык", fmt::format("{0:.5}", "вожыкі")); + EXPECT_EQ("123456", fmt::format("{0:.6}", "123456\xad")); } TEST(format_test, runtime_precision) {