mirror of
https://github.com/fmtlib/fmt.git
synced 2024-11-22 10:30:05 +00:00
More escaping
This commit is contained in:
parent
f69a572538
commit
6397095ca4
@ -2525,8 +2525,8 @@ template <> struct formatter<detail::bigint> {
|
||||
};
|
||||
|
||||
FMT_FUNC detail::utf8_to_utf16::utf8_to_utf16(string_view s) {
|
||||
for_each_codepoint(s, [this](uint32_t cp, int error) {
|
||||
if (error != 0) FMT_THROW(std::runtime_error("invalid utf8"));
|
||||
for_each_codepoint(s, [this](uint32_t cp, string_view) {
|
||||
if (cp == invalid_code_point) FMT_THROW(std::runtime_error("invalid utf8"));
|
||||
if (cp <= 0xFFFF) {
|
||||
buffer_.push_back(static_cast<wchar_t>(cp));
|
||||
} else {
|
||||
@ -2534,6 +2534,7 @@ FMT_FUNC detail::utf8_to_utf16::utf8_to_utf16(string_view s) {
|
||||
buffer_.push_back(static_cast<wchar_t>(0xD800 + (cp >> 10)));
|
||||
buffer_.push_back(static_cast<wchar_t>(0xDC00 + (cp & 0x3FF)));
|
||||
}
|
||||
return true;
|
||||
});
|
||||
buffer_.push_back(0);
|
||||
}
|
||||
|
@ -483,27 +483,38 @@ FMT_CONSTEXPR inline auto utf8_decode(const char* s, uint32_t* c, int* e)
|
||||
return next;
|
||||
}
|
||||
|
||||
enum { invalid_code_point = ~uint32_t() };
|
||||
|
||||
// Invokes f(cp, sv) for every code point cp in s with sv being the string view
|
||||
// corresponding to the code point. cp is invalid_code_point on error.
|
||||
template <typename F>
|
||||
FMT_CONSTEXPR void for_each_codepoint(string_view s, F f) {
|
||||
auto decode = [f](const char* p) {
|
||||
auto decode = [f](const char* buf_ptr, const char* ptr) {
|
||||
auto cp = uint32_t();
|
||||
auto error = 0;
|
||||
p = utf8_decode(p, &cp, &error);
|
||||
f(cp, error);
|
||||
return p;
|
||||
auto end = utf8_decode(buf_ptr, &cp, &error);
|
||||
bool result = f(error ? invalid_code_point : cp,
|
||||
string_view(ptr, to_unsigned(end - buf_ptr)));
|
||||
return result ? end : nullptr;
|
||||
};
|
||||
auto p = s.data();
|
||||
const size_t block_size = 4; // utf8_decode always reads blocks of 4 chars.
|
||||
if (s.size() >= block_size) {
|
||||
for (auto end = p + s.size() - block_size + 1; p < end;) p = decode(p);
|
||||
for (auto end = p + s.size() - block_size + 1; p < end;) {
|
||||
p = decode(p, p);
|
||||
if (!p) return;
|
||||
}
|
||||
}
|
||||
if (auto num_chars_left = s.data() + s.size() - p) {
|
||||
char buf[2 * block_size - 1] = {};
|
||||
copy_str<char>(p, p + num_chars_left, buf);
|
||||
p = buf;
|
||||
const char* buf_ptr = buf;
|
||||
do {
|
||||
p = decode(p);
|
||||
} while (p - buf < num_chars_left);
|
||||
auto end = decode(buf_ptr, p);
|
||||
if (!end) return;
|
||||
p += end - buf_ptr;
|
||||
buf_ptr = end;
|
||||
} while (buf_ptr - buf < num_chars_left);
|
||||
}
|
||||
}
|
||||
|
||||
@ -518,10 +529,10 @@ FMT_CONSTEXPR inline size_t compute_width(string_view s) {
|
||||
// It is not a lambda for compatibility with C++14.
|
||||
struct count_code_points {
|
||||
size_t* count;
|
||||
FMT_CONSTEXPR void operator()(uint32_t cp, int error) const {
|
||||
FMT_CONSTEXPR auto operator()(uint32_t cp, string_view) const -> bool {
|
||||
*count += detail::to_unsigned(
|
||||
1 +
|
||||
(error == 0 && cp >= 0x1100 &&
|
||||
(cp >= 0x1100 &&
|
||||
(cp <= 0x115f || // Hangul Jamo init. consonants
|
||||
cp == 0x2329 || // LEFT-POINTING ANGLE BRACKET
|
||||
cp == 0x232a || // RIGHT-POINTING ANGLE BRACKET
|
||||
@ -539,6 +550,7 @@ FMT_CONSTEXPR inline size_t compute_width(string_view s) {
|
||||
(cp >= 0x1f300 && cp <= 0x1f64f) ||
|
||||
// Supplemental Symbols and Pictographs:
|
||||
(cp >= 0x1f900 && cp <= 0x1f9ff))));
|
||||
return true;
|
||||
}
|
||||
};
|
||||
for_each_codepoint(s, count_code_points{&num_code_points});
|
||||
|
@ -227,17 +227,65 @@ template <typename OutputIt> OutputIt write_delimiter(OutputIt out) {
|
||||
return out;
|
||||
}
|
||||
|
||||
template <typename Char> inline bool is_printable_ascii(Char c) {
|
||||
return c >= 0x20 && c < 0x7e;
|
||||
inline auto is_printable(uint32_t cp) -> bool {
|
||||
if (0x2a6de <= cp && cp < 0x2a700) return false;
|
||||
if (0x2b735 <= cp && cp < 0x2b740) return false;
|
||||
if (0x2b81e <= cp && cp < 0x2b820) return false;
|
||||
if (0x2cea2 <= cp && cp < 0x2ceb0) return false;
|
||||
if (0x2ebe1 <= cp && cp < 0x2f800) return false;
|
||||
if (0x2fa1e <= cp && cp < 0x30000) return false;
|
||||
if (0x3134b <= cp && cp < 0xe0100) return false;
|
||||
if (0xe01f0 <= cp && cp < 0x110000) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
template <
|
||||
typename Char, typename OutputIt, typename T,
|
||||
FMT_ENABLE_IF(is_std_string_like<typename std::decay<T>::type>::value)>
|
||||
OutputIt write_range_entry(OutputIt out, const T& str) {
|
||||
inline auto needs_escape(uint32_t cp) -> bool {
|
||||
return cp < 0x20 || cp == 0x7f || cp == '"' || cp == '\\' ||
|
||||
!is_printable(cp);
|
||||
}
|
||||
|
||||
template <typename Char> struct find_escape_result {
|
||||
const Char* begin;
|
||||
const Char* end;
|
||||
uint32_t cp;
|
||||
};
|
||||
|
||||
template <typename Char>
|
||||
auto find_escape(const Char* begin, const Char* end)
|
||||
-> find_escape_result<Char> {
|
||||
for (; begin != end; ++begin) {
|
||||
auto cp = static_cast<typename std::make_unsigned<Char>::type>(*begin);
|
||||
if (needs_escape(cp)) return {begin, begin + 1, cp};
|
||||
}
|
||||
return {begin, nullptr, 0};
|
||||
}
|
||||
|
||||
auto find_escape(const char* begin, const char* end)
|
||||
-> find_escape_result<char> {
|
||||
if (!is_utf8()) return find_escape<char>(begin, end);
|
||||
auto result = find_escape_result<char>{end, nullptr, 0};
|
||||
for_each_codepoint(string_view(begin, to_unsigned(end - begin)),
|
||||
[&](uint32_t cp, string_view sv) {
|
||||
if (needs_escape(cp)) {
|
||||
result = {sv.begin(), sv.end(), cp};
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
});
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename Char, typename OutputIt>
|
||||
auto write_range_entry(OutputIt out, basic_string_view<Char> str) -> OutputIt {
|
||||
*out++ = '"';
|
||||
for (Char c : basic_string_view<Char>(str)) {
|
||||
switch (c) {
|
||||
auto begin = str.begin(), end = str.end();
|
||||
do {
|
||||
auto escape = find_escape(begin, end);
|
||||
out = copy_str<Char>(begin, escape.begin, out);
|
||||
begin = escape.end;
|
||||
if (!begin) break;
|
||||
auto c = static_cast<Char>(escape.cp);
|
||||
switch (escape.cp) {
|
||||
case '\n':
|
||||
*out++ = '\\';
|
||||
c = 'n';
|
||||
@ -256,13 +304,14 @@ OutputIt write_range_entry(OutputIt out, const T& str) {
|
||||
*out++ = '\\';
|
||||
break;
|
||||
default:
|
||||
if (is_printable_ascii(c)) break;
|
||||
if (sizeof(Char) != 1 && c >= 0x80) break;
|
||||
out = format_to(out, "\\x{:02x}", c);
|
||||
for (Char escape_char : basic_string_view<Char>(
|
||||
escape.begin, to_unsigned(escape.end - escape.begin))) {
|
||||
out = format_to(out, "\\x{:02x}", escape_char);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
*out++ = c;
|
||||
}
|
||||
} while (begin != end);
|
||||
*out++ = '"';
|
||||
return out;
|
||||
}
|
||||
|
@ -264,7 +264,8 @@ TEST(ranges_test, join_range) {
|
||||
#endif // FMT_RANGES_TEST_ENABLE_JOIN
|
||||
|
||||
TEST(ranges_test, escape_string) {
|
||||
EXPECT_EQ(fmt::format("{}", std::vector<std::string>{"\n\r\t\"\\"}),
|
||||
"[\"\\n\\r\\t\\\"\\\\\"]");
|
||||
EXPECT_EQ(fmt::format("{}", std::vector<std::string>{"\x7"}), "[\"\\x07\"]");
|
||||
}
|
||||
using vec = std::vector<std::string>;
|
||||
EXPECT_EQ(fmt::format("{}", vec{"\n\r\t\"\\"}), "[\"\\n\\r\\t\\\"\\\\\"]");
|
||||
EXPECT_EQ(fmt::format("{}", vec{"\x07"}), "[\"\\x07\"]");
|
||||
EXPECT_EQ(fmt::format("{}", vec{"\x7f"}), "[\"\\x7f\"]");
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user