Fix UTF-8 truncation
This commit is contained in:
parent
d6eede9e08
commit
0889856d61
@ -436,6 +436,24 @@ inline size_t count_code_points(basic_string_view<char8_t> s) {
|
|||||||
return num_code_points;
|
return num_code_points;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename Char>
|
||||||
|
inline size_t code_point_index(basic_string_view<Char> s, size_t n) {
|
||||||
|
size_t size = s.size();
|
||||||
|
return n < size ? n : size;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculates the index of the nth code point in a UTF-8 string.
|
||||||
|
inline size_t code_point_index(basic_string_view<char8_t> s, size_t n) {
|
||||||
|
const char8_t* data = s.data();
|
||||||
|
size_t num_code_points = 0;
|
||||||
|
for (size_t i = 0, size = s.size(); i != size; ++i) {
|
||||||
|
if ((data[i] & 0xc0) != 0x80 && ++num_code_points > n) {
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return s.size();
|
||||||
|
}
|
||||||
|
|
||||||
inline char8_t to_char8_t(char c) { return static_cast<char8_t>(c); }
|
inline char8_t to_char8_t(char c) { return static_cast<char8_t>(c); }
|
||||||
|
|
||||||
template <typename InputIt, typename OutChar>
|
template <typename InputIt, typename OutChar>
|
||||||
@ -1729,7 +1747,8 @@ template <typename Range> class basic_writer {
|
|||||||
const Char* data = s.data();
|
const Char* data = s.data();
|
||||||
std::size_t size = s.size();
|
std::size_t size = s.size();
|
||||||
if (specs.precision >= 0 && internal::to_unsigned(specs.precision) < size)
|
if (specs.precision >= 0 && internal::to_unsigned(specs.precision) < size)
|
||||||
size = internal::to_unsigned(specs.precision);
|
size = internal::code_point_index(s,
|
||||||
|
internal::to_unsigned(specs.precision));
|
||||||
write(data, size, specs);
|
write(data, size, specs);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2621,3 +2621,13 @@ TEST(FormatTest, FormatCustomChar) {
|
|||||||
EXPECT_EQ(result.size(), 1);
|
EXPECT_EQ(result.size(), 1);
|
||||||
EXPECT_EQ(result[0], mychar('x'));
|
EXPECT_EQ(result[0], mychar('x'));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(FormatTest, FormatUTF8Precision) {
|
||||||
|
using str_type = std::basic_string<char8_t>;
|
||||||
|
str_type format(reinterpret_cast<const char8_t*>(u8"{:.4}"));
|
||||||
|
str_type str(reinterpret_cast<const char8_t*>(u8"caf\u00e9s")); // cafés
|
||||||
|
auto result = fmt::format(format, str);
|
||||||
|
EXPECT_EQ(fmt::internal::count_code_points(result), 4);
|
||||||
|
EXPECT_EQ(result.size(), 5);
|
||||||
|
EXPECT_EQ(result, str.substr(0, 5));
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user