mirror of
https://github.com/fmtlib/fmt.git
synced 2024-12-23 23:10:04 +00:00
Fix decoder on broken utf8 sequences. (#3044)
Signed-off-by: Vladislav Shchapov <vladislav@shchapov.ru> Signed-off-by: Vladislav Shchapov <vladislav@shchapov.ru>
This commit is contained in:
parent
541cd21838
commit
489dabbd31
@ -602,6 +602,7 @@ FMT_CONSTEXPR FMT_NOINLINE auto copy_str_noinline(InputIt begin, InputIt end,
|
||||
*/
|
||||
FMT_CONSTEXPR inline auto utf8_decode(const char* s, uint32_t* c, int* e)
|
||||
-> const char* {
|
||||
constexpr const int prefix_masks[] = {0x00, 0x80, 0xe0, 0xf0, 0xf8};
|
||||
constexpr const int masks[] = {0x00, 0x7f, 0x1f, 0x0f, 0x07};
|
||||
constexpr const uint32_t mins[] = {4194304, 0, 128, 2048, 65536};
|
||||
constexpr const int shiftc[] = {0, 18, 12, 6, 0};
|
||||
@ -628,6 +629,8 @@ FMT_CONSTEXPR inline auto utf8_decode(const char* s, uint32_t* c, int* e)
|
||||
*e |= uchar(s[3]) >> 6;
|
||||
*e ^= 0x2a; // top two bits of each tail byte correct?
|
||||
*e >>= shifte[len];
|
||||
*e |= ((uchar(s[0]) & prefix_masks[len]) !=
|
||||
uchar((prefix_masks[len] << 1) & 0xFF)); // first byte correct?
|
||||
|
||||
return next;
|
||||
}
|
||||
@ -643,8 +646,8 @@ FMT_CONSTEXPR void for_each_codepoint(string_view s, F f) {
|
||||
auto error = 0;
|
||||
auto end = utf8_decode(buf_ptr, &cp, &error);
|
||||
bool result = f(error ? invalid_code_point : cp,
|
||||
string_view(ptr, to_unsigned(end - buf_ptr)));
|
||||
return result ? end : nullptr;
|
||||
string_view(ptr, error ? 1 : to_unsigned(end - buf_ptr)));
|
||||
return result ? (error ? buf_ptr + 1 : end) : nullptr;
|
||||
};
|
||||
auto p = s.data();
|
||||
const size_t block_size = 4; // utf8_decode always reads blocks of 4 chars.
|
||||
|
@ -380,8 +380,15 @@ TEST(ranges_test, escape_string) {
|
||||
EXPECT_EQ(fmt::format("{}", vec{"\xcd\xb8"}), "[\"\\u0378\"]");
|
||||
// Unassigned Unicode code points.
|
||||
EXPECT_EQ(fmt::format("{}", vec{"\xf0\xaa\x9b\x9e"}), "[\"\\U0002a6de\"]");
|
||||
// Broken utf-8.
|
||||
EXPECT_EQ(fmt::format("{}", vec{"\xf4\x8f\xbf\xc0"}),
|
||||
"[\"\\xf4\\x8f\\xbf\\xc0\"]");
|
||||
EXPECT_EQ(fmt::format("{}", vec{"\xf0\x28"}), "[\"\\xf0(\"]");
|
||||
EXPECT_EQ(fmt::format("{}", vec{"\xe1\x28"}), "[\"\\xe1(\"]");
|
||||
EXPECT_EQ(fmt::format("{}", vec{std::string("\xf0\x28\0\0anything", 12)}),
|
||||
"[\"\\xf0(\\x00\\x00anything\"]");
|
||||
|
||||
// Correct utf-8.
|
||||
EXPECT_EQ(fmt::format("{}", vec{"понедельник"}), "[\"понедельник\"]");
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user