Add a subset of the text library as an optional component

This commit is contained in:
Victor Zverovich 2019-09-14 17:47:56 -07:00
parent 0fc7bd1573
commit e5ab813ffb
17 changed files with 8609 additions and 0 deletions

View File

@ -33,6 +33,8 @@ if (MASTER_PROJECT AND NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release CACHE STRING ${doc})
endif ()
option(FMT_USE_TEXT "Use the text library." OFF)
option(FMT_PEDANTIC "Enable extra warnings and expensive tests." OFF)
option(FMT_WERROR "Halt the compilation with an error on compiler warnings."
OFF)
@ -160,6 +162,10 @@ if (HAVE_OPEN)
set(FMT_SOURCES ${FMT_SOURCES} src/posix.cc)
endif ()
if (FMT_USE_TEXT)
set(FMT_SOURCES ${FMT_SOURCES} src/text/grapheme_break.cpp)
endif ()
add_library(fmt ${FMT_SOURCES} ${FMT_HEADERS} README.rst ChangeLog.rst)
add_library(fmt::fmt ALIAS fmt)
@ -180,6 +186,11 @@ target_include_directories(fmt PUBLIC
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>
$<INSTALL_INTERFACE:include>)
if (FMT_USE_TEXT)
target_include_directories(fmt PUBLIC
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/src/text>)
endif ()
set_target_properties(fmt PROPERTIES
VERSION ${FMT_VERSION} SOVERSION ${CPACK_PACKAGE_VERSION_MAJOR}
DEBUG_POSTFIX d)

View File

@ -0,0 +1,4 @@
#ifndef BOOST_ASSERT
#include <assert.h>
# define BOOST_ASSERT(condition) assert(condition)
#endif

View File

@ -0,0 +1,3 @@
#ifndef BOOST_CXX14_CONSTEXPR
# define BOOST_CXX14_CONSTEXPR
#endif

View File

@ -0,0 +1,13 @@
#ifndef TEXT_BOOST_CONTAINER_SMALL_VECTOR_HPP
#define TEXT_BOOST_CONTAINER_SMALL_VECTOR_HPP
#include <vector>
namespace boost {
namespace container {
template <typename T, size_t>
using small_vector = std::vector<T>;
}
}
#endif // TEXT_BOOST_CONTAINER_SMALL_VECTOR_HPP

View File

@ -0,0 +1,253 @@
#ifndef BOOST_TEXT_ALGORITHM_HPP
#define BOOST_TEXT_ALGORITHM_HPP
#include <boost/text/detail/sentinel_tag.hpp>
#include <cstddef>
#include <iterator>
namespace boost { namespace text {
namespace detail {
template<typename Iter>
std::ptrdiff_t distance(Iter first, Iter last, non_sentinel_tag)
{
return std::distance(first, last);
}
template<typename Iter, typename Sentinel>
std::ptrdiff_t distance(Iter first, Sentinel last, sentinel_tag)
{
std::ptrdiff_t retval = 0;
while (first != last) {
++retval;
++first;
}
return retval;
}
}
/** Range-friendly version of `std::distance()`, taking an iterator and a
sentinel. */
template<typename Iter, typename Sentinel>
std::ptrdiff_t distance(Iter first, Sentinel last)
{
return detail::distance(
first,
last,
typename std::conditional<
std::is_same<Iter, Sentinel>::value,
detail::non_sentinel_tag,
detail::sentinel_tag>::type());
}
/** Range-friendly version of `std::find()`, taking an iterator and a
sentinel. */
template<typename BidiIter, typename Sentinel, typename T>
BidiIter find(BidiIter first, Sentinel last, T const & x)
{
while (first != last) {
if (*first == x)
return first;
++first;
}
return first;
}
/** A range-friendly compliment to `std::find()`; returns an iterator to
the first element not equal to `x`. */
template<typename BidiIter, typename Sentinel, typename T>
BidiIter find_not(BidiIter first, Sentinel last, T const & x)
{
while (first != last) {
if (*first != x)
return first;
++first;
}
return first;
}
/** Range-friendly version of `std::find_if()`, taking an iterator and a
sentinel. */
template<typename BidiIter, typename Sentinel, typename Pred>
BidiIter find_if(BidiIter first, Sentinel last, Pred p)
{
while (first != last) {
if (p(*first))
return first;
++first;
}
return first;
}
/** Range-friendly version of `std::find_if_not()`, taking an iterator and
a sentinel. */
template<typename BidiIter, typename Sentinel, typename Pred>
BidiIter find_if_not(BidiIter first, Sentinel last, Pred p)
{
while (first != last) {
if (!p(*first))
return first;
++first;
}
return first;
}
/** Analogue of `std::find()` that finds the last value in `[first, last)`
equal to `x`. */
template<typename BidiIter, typename T>
BidiIter find_backward(BidiIter first, BidiIter last, T const & x)
{
auto it = last;
while (it != first) {
if (*--it == x)
return it;
}
return last;
}
/** Analogue of `std::find()` that finds the last value in `[first, last)`
not equal to `x`. */
template<typename BidiIter, typename T>
BidiIter find_not_backward(BidiIter first, BidiIter last, T const & x)
{
auto it = last;
while (it != first) {
if (*--it != x)
return it;
}
return last;
}
/** Analogue of `std::find()` that finds the last value `v` in `[first,
last)` for which `p(v)` is true. */
template<typename BidiIter, typename Pred>
BidiIter find_if_backward(BidiIter first, BidiIter last, Pred p)
{
auto it = last;
while (it != first) {
if (p(*--it))
return it;
}
return last;
}
/** Analogue of `std::find()` that finds the last value `v` in `[first,
last)` for which `p(v)` is false. */
template<typename BidiIter, typename Pred>
BidiIter find_if_not_backward(BidiIter first, BidiIter last, Pred p)
{
auto it = last;
while (it != first) {
if (!p(*--it))
return it;
}
return last;
}
/** A utility range type returned by `foreach_subrange*()`. */
template<typename Iter, typename Sentinel = Iter>
struct foreach_subrange_range
{
using iterator = Iter;
using sentinel = Sentinel;
foreach_subrange_range() {}
foreach_subrange_range(iterator first, sentinel last) :
first_(first),
last_(last)
{}
iterator begin() const noexcept { return first_; }
sentinel end() const noexcept { return last_; }
private:
iterator first_;
sentinel last_;
};
/** Calls `f(sub)` for each subrange `sub` in `[first, last)`. A subrange
is a contiguous subsequence of elements that each compares equal to
the first element of the subsequence. Subranges passed to `f` are
non-overlapping. */
template<typename FwdIter, typename Sentinel, typename Func>
Func foreach_subrange(FwdIter first, Sentinel last, Func f)
{
while (first != last) {
auto const & x = *first;
auto const next = find_not(first, last, x);
if (first != next)
f(foreach_subrange_range<FwdIter, Sentinel>(first, next));
first = next;
}
return f;
}
/** Calls `f(sub)` for each subrange `sub` in `[first, last)`. A subrange
is a contiguous subsequence of elements that for each element `e`,
`proj(e)` each compares equal to `proj()` of the first element of the
subsequence. Subranges passed to `f` are non-overlapping. */
template<typename FwdIter, typename Sentinel, typename Func, typename Proj>
Func foreach_subrange(FwdIter first, Sentinel last, Func f, Proj proj)
{
using value_type = typename std::iterator_traits<FwdIter>::value_type;
while (first != last) {
auto const & x = proj(*first);
auto const next = find_if_not(
first, last, [&x, proj](const value_type & element) {
return proj(element) == x;
});
if (first != next)
f(foreach_subrange_range<FwdIter, Sentinel>(first, next));
first = next;
}
return f;
}
/** Calls `f(sub)` for each subrange `sub` in `[first, last)`. A subrange
is a contiguous subsequence of elements, each of which is equal to
`x`. Subranges passed to `f` are non-overlapping. */
template<typename FwdIter, typename Sentinel, typename T, typename Func>
Func foreach_subrange_of(FwdIter first, Sentinel last, T const & x, Func f)
{
while (first != last) {
first = find(first, last, x);
auto const next = find_not(first, last, x);
if (first != next)
f(foreach_subrange_range<FwdIter, Sentinel>(first, next));
first = next;
}
return f;
}
/** Calls `f(sub)` for each subrange `sub` in `[first, last)`. A subrange
is a contiguous subsequence of elements `ei` for which `p(ei)` is
true. Subranges passed to `f` are non-overlapping. */
template<typename FwdIter, typename Sentinel, typename Pred, typename Func>
Func foreach_subrange_if(FwdIter first, Sentinel last, Pred p, Func f)
{
while (first != last) {
first = boost::text::find_if(first, last, p);
auto const next = boost::text::find_if_not(first, last, p);
if (first != next)
f(foreach_subrange_range<FwdIter, Sentinel>(first, next));
first = next;
}
return f;
}
/** Sentinel-friendly version of `std::all_of()`. */
template<typename Iter, typename Sentinel, typename Pred>
bool all_of(Iter first, Sentinel last, Pred p)
{
for (; first != last; ++first) {
if (!p(*first))
return false;
}
return true;
}
}}
#endif

104
src/text/boost/text/config.hpp Executable file
View File

@ -0,0 +1,104 @@
#ifndef BOOST_TEXT_CONFIG_HPP
#define BOOST_TEXT_CONFIG_HPP
#include <boost/config.hpp>
/** There are ICU-based implementations of many operations, but those are only
defined when BOOST_TEXT_HAS_ICU is nonzero. If you define this, you must
make sure the the ICU headers are in your path, and that your build
properly links in ICU. */
#ifndef BOOST_TEXT_HAS_ICU
# define BOOST_TEXT_HAS_ICU 0
#endif
/** There are ICU-based implementations of many operations, but those are only
used when BOOST_TEXT_HAS_ICU and BOOST_TEXT_USE_ICU are both nonzero. */
#ifndef BOOST_TEXT_USE_ICU
# define BOOST_TEXT_USE_ICU 0
#endif
/** When you insert into a rope, the incoming sequence may be inserted as a
new segment, or if it falls within an existing string-segment, it may be
inserted into the string object used to represent that segment. This only
happens if the incoming sequence will fit within the existing segment's
capacity, or if the segment is smaller than a certain limit.
BOOST_TEXT_STRING_INSERT_MAX is that limit. */
#ifndef BOOST_TEXT_STRING_INSERT_MAX
# define BOOST_TEXT_STRING_INSERT_MAX 4096
#endif
#ifndef BOOST_TEXT_DOXYGEN
// Nothing before GCC 6 has proper C++14 constexpr support.
#if defined(__GNUC__) && __GNUC__ < 6 && !defined(__clang__)
# define BOOST_TEXT_CXX14_CONSTEXPR
# define BOOST_TEXT_NO_CXX14_CONSTEXPR
#elif defined(_MSC_VER) && _MSC_VER <= 1915
# define BOOST_TEXT_CXX14_CONSTEXPR
# define BOOST_TEXT_NO_CXX14_CONSTEXPR
#else
# define BOOST_TEXT_CXX14_CONSTEXPR BOOST_CXX14_CONSTEXPR
# if defined(BOOST_NO_CXX14_CONSTEXPR)
# define BOOST_TEXT_NO_CXX14_CONSTEXPR
# endif
#endif
// Implements separate compilation features as described in
// http://www.boost.org/more/separate_compilation.html
// normalize macros
#if !defined(BOOST_TEXT_DYN_LINK) && !defined(BOOST_TEXT_STATIC_LINK) && \
!defined(BOOST_ALL_DYN_LINK) && !defined(BOOST_ALL_STATIC_LINK)
# define BOOST_TEXT_STATIC_LINK
#endif
#if defined(BOOST_ALL_DYN_LINK) && !defined(BOOST_TEXT_DYN_LINK)
# define BOOST_TEXT_DYN_LINK
#elif defined(BOOST_ALL_STATIC_LINK) && !defined(BOOST_TEXT_STATIC_LINK)
# define BOOST_TEXT_STATIC_LINK
#endif
#if defined(BOOST_TEXT_DYN_LINK) && defined(BOOST_TEXT_STATIC_LINK)
# error Must not define both BOOST_TEXT_DYN_LINK and BOOST_TEXT_STATIC_LINK
#endif
// enable dynamic or static linking as requested
#if defined(BOOST_ALL_DYN_LINK) || defined(BOOST_TEXT_DYN_LINK)
# if defined(BOOST_TEXT_SOURCE)
# define BOOST_TEXT_DECL BOOST_SYMBOL_EXPORT
# else
# define BOOST_TEXT_DECL BOOST_SYMBOL_IMPORT
# endif
#else
# define BOOST_TEXT_DECL
#endif
#if 0 // TODO: Disabled for now.
// enable automatic library variant selection
#if !defined(BOOST_TEXT_SOURCE) && !defined(BOOST_ALL_NO_LIB) && \
!defined(BOOST_TEXT_NO_LIB)
//
// Set the name of our library, this will get undef'ed by auto_link.hpp
// once it's done with it:
//
#define BOOST_LIB_NAME boost_text
//
// If we're importing code from a dll, then tell auto_link.hpp about it:
//
#if defined(BOOST_ALL_DYN_LINK) || defined(BOOST_TEXT_DYN_LINK)
# define BOOST_DYN_LINK
#endif
//
// And include the header that does the work:
//
#include <boost/config/auto_link.hpp>
#endif // auto-linking disabled
#endif
#endif // doxygen
#endif

View File

@ -0,0 +1,580 @@
#ifndef BOOST_TEXT_DETAIL_ALGORITHM_HPP
#define BOOST_TEXT_DETAIL_ALGORITHM_HPP
#include <boost/text/detail/iterator.hpp>
#include <boost/assert.hpp>
#include <numeric>
#include <type_traits>
#include <utility>
namespace boost { namespace text {
struct unencoded_rope;
struct unencoded_rope_view;
}}
namespace boost { namespace text { namespace detail {
template<typename...>
struct void_
{
using type = void;
};
template<typename... T>
using void_t = typename void_<T...>::type;
template<typename T>
struct fixup_ptr
{
using type = T;
};
template<typename T>
using remove_v_t = typename std::remove_volatile<T>::type;
template<typename T>
struct fixup_ptr<T *>
{
using type = remove_v_t<T> const *;
};
template<typename T>
using fixup_ptr_t = typename fixup_ptr<T>::type;
template<typename T>
using remove_cv_ref_t =
typename std::remove_cv<typename std::remove_reference<T>::type>::type;
struct nonesuch
{};
template<
typename Default,
typename AlwaysVoid,
template<typename...> class Template,
typename... Args>
struct detector
{
using value_t = std::false_type;
using type = Default;
};
template<
typename Default,
template<typename...> class Template,
typename... Args>
struct detector<Default, void_t<Template<Args...>>, Template, Args...>
{
using value_t = std::true_type;
using type = Template<Args...>;
};
template<template<typename...> class Template, typename... Args>
using is_detected =
typename detector<nonesuch, void, Template, Args...>::value_t;
template<template<typename...> class Template, typename... Args>
using detected_t =
typename detector<nonesuch, void, Template, Args...>::type;
template<
typename Default,
template<typename...> class Template,
typename... Args>
using detected_or =
typename detector<Default, void, Template, Args...>::type;
template<typename T>
using has_begin = decltype(*std::begin(std::declval<T>()));
template<typename T>
using has_end = decltype(*std::end(std::declval<T>()));
template<typename T>
using value_type_ = typename std::remove_cv<
typename std::remove_reference<typename T::value_type>::type>::type;
template<typename T>
using nonpointer_iterator_category_ =
typename T::iterator::iterator_category;
template<typename T>
using iterator_category_ = typename std::conditional<
std::is_pointer<typename T::iterator>::value,
std::random_access_iterator_tag,
detected_t<nonpointer_iterator_category_, T>>::type;
template<typename T, typename U, int N>
struct is_convertible_and_n_bytes
: std::integral_constant<
bool,
std::is_convertible<T, U>::value && sizeof(T) == N>
{
};
template<typename T>
using is_char_iter = std::integral_constant<
bool,
std::is_same<char *, typename std::remove_cv<T>::type>::value ||
std::is_same<char const *, typename std::remove_cv<T>::type>::
value ||
is_convertible_and_n_bytes<detected_t<value_type_, T>, char, 1>::
value>;
template<typename T>
using is_char_range = std::integral_constant<
bool,
std::is_same<remove_cv_ref_t<T>, unencoded_rope_view>::value ||
std::is_same<remove_cv_ref_t<T>, unencoded_rope>::value ||
(is_convertible_and_n_bytes<
remove_cv_ref_t<detected_t<has_begin, T>>,
char,
1>::value &&
is_convertible_and_n_bytes<
remove_cv_ref_t<detected_t<has_end, T>>,
char,
1>::value)>;
template<
typename T,
typename R1,
typename Exclude1,
typename Exclude2,
bool R1IsCharRange = is_char_range<R1>::value &&
!std::is_same<R1, Exclude1>::value &&
!std::is_same<R1, Exclude2>::value>
struct rng_alg_ret
{
};
template<typename T, typename R1, typename Exclude1, typename Exclude2>
struct rng_alg_ret<T, R1, Exclude1, Exclude2, true>
{
using type = T;
};
template<
typename T,
typename R1,
typename Exclude1 = void,
typename Exclude2 = void>
using rng_alg_ret_t = typename rng_alg_ret<T, R1, Exclude1, Exclude2>::type;
template<
typename T,
typename R1,
typename R2,
bool R1IsCharRange = is_char_range<R1>::value,
bool R2IsCharRange = is_char_range<R2>::value>
struct rngs_alg_ret
{
};
template<typename T, typename R1, typename R2>
struct rngs_alg_ret<T, R1, R2, true, true>
{
using type = T;
};
template<typename T, typename R1, typename R2>
using rngs_alg_ret_t = typename rngs_alg_ret<T, R1, R2>::type;
template<typename T>
using has_contig_begin = decltype(&*std::begin(std::declval<T>()));
template<typename T>
using has_contig_end = decltype(&*std::end(std::declval<T>()));
template<typename T>
using is_contig_char_range = std::integral_constant<
bool,
std::is_same<
fixup_ptr_t<detected_t<has_contig_begin, T>>,
char const *>::value &&
std::is_same<
fixup_ptr_t<detected_t<has_contig_end, T>>,
char const *>::value &&
std::is_convertible<
iterator_category_<T>,
std::random_access_iterator_tag>::value &&
!std::is_same<T, unencoded_rope>::value &&
!std::is_same<T, unencoded_rope_view>::value>;
template<
typename T,
typename R1,
bool R1IsContigCharRange = is_contig_char_range<R1>::value>
struct contig_rng_alg_ret
{
};
template<typename T, typename R1>
struct contig_rng_alg_ret<T, R1, true>
{
using type = T;
};
template<typename T, typename R1>
using contig_rng_alg_ret_t = typename contig_rng_alg_ret<T, R1>::type;
template<
typename T,
typename R1,
typename R2,
bool R1IsContigCharRange = is_contig_char_range<R1>::value,
bool R2IsContigCharRange = is_contig_char_range<R2>::value>
struct contig_rngs_alg_ret
{
};
template<typename T, typename R1, typename R2>
struct contig_rngs_alg_ret<T, R1, R2, true, true>
{
using type = T;
};
template<typename T, typename R1, typename R2>
using contig_rngs_alg_ret_t = typename contig_rngs_alg_ret<T, R1, R2>::type;
template<typename T>
using is_char16_range = std::integral_constant<
bool,
(is_convertible_and_n_bytes<
remove_cv_ref_t<detected_t<has_begin, T>>,
uint16_t,
2>::value &&
is_convertible_and_n_bytes<
remove_cv_ref_t<detected_t<has_end, T>>,
uint16_t,
2>::value)>;
template<
typename T,
typename R1,
bool R1IsChar16Range = is_char16_range<R1>::value>
struct rng16_alg_ret
{
};
template<typename T, typename R1>
struct rng16_alg_ret<T, R1, true>
{
using type = T;
};
template<typename T, typename R1>
using rng16_alg_ret_t = typename rng16_alg_ret<T, R1>::type;
template<
typename T,
typename R1,
bool R1IsCharRange = is_char_iter<R1>::value>
struct char_iter_ret
{
};
template<typename T, typename R1>
struct char_iter_ret<T, R1, true>
{
using type = T;
};
template<typename T, typename R1>
using char_iter_ret_t = typename char_iter_ret<T, R1>::type;
template<typename T>
using is_code_point = std::integral_constant<
bool,
(std::is_unsigned<T>::value && std::is_integral<T>::value &&
sizeof(T) == 4)>;
template<typename T>
using has_deref_and_incr =
std::pair<decltype(*std::declval<T>()), decltype(++std::declval<T>())>;
template<typename T>
using is_cp_iter = std::integral_constant<
bool,
((std::is_pointer<T>::value &&
is_code_point<typename std::remove_cv<
typename std::remove_pointer<T>::type>::type>::value) ||
(is_detected<has_deref_and_incr, T>::value &&
is_code_point<typename std::remove_cv<
detected_t<value_type_, T>>::type>::value))>;
template<typename T, typename R1, bool R1IsCPRange = is_cp_iter<R1>::value>
struct cp_iter_ret
{
};
template<typename T, typename R1>
struct cp_iter_ret<T, R1, true>
{
using type = T;
};
template<typename T, typename R1>
using cp_iter_ret_t = typename cp_iter_ret<T, R1>::type;
template<typename T>
using is_16_code_unit = std::integral_constant<
bool,
(std::is_unsigned<T>::value && std::is_integral<T>::value &&
sizeof(T) == 2)>;
template<typename T>
using is_16_iter = std::integral_constant<
bool,
((std::is_pointer<T>::value &&
is_16_code_unit<typename std::remove_cv<
typename std::remove_pointer<T>::type>::type>::value) ||
(is_detected<has_deref_and_incr, T>::value &&
is_16_code_unit<typename std::remove_cv<
detected_t<value_type_, T>>::type>::value))>;
template<typename T, typename R1, bool R1IsCPRange = is_16_iter<R1>::value>
struct _16_iter_ret
{
};
template<typename T, typename R1>
struct _16_iter_ret<T, R1, true>
{
using type = T;
};
template<typename T, typename R1>
using _16_iter_ret_t = typename _16_iter_ret<T, R1>::type;
template<typename T>
using is_8_code_unit = std::
integral_constant<bool, std::is_integral<T>::value && sizeof(T) == 1>;
template<typename T>
using is_8_iter = std::integral_constant<
bool,
((std::is_pointer<T>::value &&
is_8_code_unit<typename std::remove_cv<
typename std::remove_pointer<T>::type>::type>::value) ||
(is_detected<has_deref_and_incr, T>::value &&
is_8_code_unit<typename std::remove_cv<
detected_t<value_type_, T>>::type>::value))>;
template<typename T, typename R1, bool R1IsCPRange = is_8_iter<R1>::value>
struct _8_iter_ret
{
};
template<typename T, typename R1>
struct _8_iter_ret<T, R1, true>
{
using type = T;
};
template<typename T, typename R1>
using _8_iter_ret_t = typename _8_iter_ret<T, R1>::type;
template<typename T, typename U>
using comparable_ = decltype(std::declval<T>() == std::declval<U>());
template<
typename T,
typename CPIter,
typename Sentinel,
bool FIsWordPropFunc = is_cp_iter<CPIter>::value &&
is_detected<comparable_, CPIter, Sentinel>::value>
struct cp_iter_sntl_ret
{
};
template<typename T, typename CPIter, typename Sentinel>
struct cp_iter_sntl_ret<T, CPIter, Sentinel, true>
{
using type = T;
};
template<typename T, typename CPIter, typename Sentinel>
using cp_iter_sntl_ret_t =
typename cp_iter_sntl_ret<T, CPIter, Sentinel>::type;
template<typename T, typename R1>
using cp_rng_alg_ret_t = cp_iter_sntl_ret_t<
T,
decltype(std::declval<R1>().begin()),
decltype(std::declval<R1>().end())>;
template<typename T>
using is_grapheme_char_range = std::integral_constant<
bool,
is_char_iter<remove_cv_ref_t<decltype(
std::declval<const T>().begin().base().base())>>::value &&
is_char_iter<remove_cv_ref_t<decltype(
std::declval<const T>().end().base().base())>>::value>;
template<
typename T,
typename R1,
bool R1IsGraphemeCharRange = is_grapheme_char_range<R1>::value>
struct graph_rng_alg_ret
{
};
template<typename T, typename R1>
struct graph_rng_alg_ret<T, R1, true>
{
using type = T;
};
template<typename T, typename R1>
using graph_rng_alg_ret_t = typename graph_rng_alg_ret<T, R1>::type;
template<typename T>
using is_contig_grapheme_char_range = std::integral_constant<
bool,
(std::is_same<
decltype(std::declval<const T>().begin().base().base()),
char const *>::value ||
std::is_same<
decltype(std::declval<const T>().begin().base().base()),
char *>::value) &&
(std::is_same<
decltype(std::declval<const T>().end().base().base()),
char const *>::value ||
std::is_same<
decltype(std::declval<const T>().end().base().base()),
char *>::value)>;
template<
typename T,
typename R1,
bool R1IsContigGraphemeCharRange =
is_contig_grapheme_char_range<R1>::value>
struct contig_graph_rng_alg_ret
{
};
template<typename T, typename R1>
struct contig_graph_rng_alg_ret<T, R1, true>
{
using type = T;
};
template<typename T, typename R1>
using contig_graph_rng_alg_ret_t =
typename contig_graph_rng_alg_ret<T, R1>::type;
template<typename Iter>
detail::reverse_iterator<Iter> make_reverse_iterator(Iter it)
{
return detail::reverse_iterator<Iter>(it);
}
inline std::size_t
hash_combine_(std::size_t seed, std::size_t value) noexcept
{
return seed ^= value + 0x9e3779b9 + (seed << 6) + (seed >> 2);
}
template<int N>
struct hash_4_more_chars
{
template<typename Iter>
static std::size_t call(std::size_t curr, Iter it)
{
return curr;
}
};
template<>
struct hash_4_more_chars<8>
{
template<typename Iter>
static std::size_t call(std::size_t curr, Iter it)
{
curr <<= 32;
curr += (*(it + 4) << 24) + (*(it + 5) << 16) + (*(it + 2) << 6) +
(*(it + 7) << 0);
return curr;
}
};
template<typename CharRange>
std::size_t hash_char_range(CharRange const & r) noexcept
{
auto first = r.begin();
auto last = r.end();
auto const size = last - first;
auto const remainder = size % sizeof(std::size_t);
last -= remainder;
std::size_t retval = size;
for (; first != last; first += sizeof(std::size_t)) {
std::size_t curr = (*(first + 0) << 24) + (*(first + 1) << 16) +
(*(first + 2) << 8) + (*(first + 3) << 0);
curr = hash_4_more_chars<sizeof(std::size_t)>::call(curr, first);
retval = hash_combine_(retval, curr);
}
first = last;
last += remainder;
for (; first != last; ++first) {
retval = hash_combine_(retval, *first);
}
return retval;
}
template<typename GraphemeRange>
std::size_t hash_grapheme_range(GraphemeRange const & r) noexcept
{
std::size_t cps = 0;
std::size_t retval = std::accumulate(
r.begin().base(),
r.end().base(),
std::size_t(0),
[&cps](std::size_t seed, std::size_t value) {
++cps;
return hash_combine_(seed, value);
});
return hash_combine_(retval, cps);
}
}}}
#endif

View File

@ -0,0 +1,51 @@
#ifndef BOOST_TEXT_DETAIL_BREAK_PROP_ITER_HPP
#define BOOST_TEXT_DETAIL_BREAK_PROP_ITER_HPP
#include <boost/text/detail/lzw.hpp>
#include <unordered_map>
namespace boost { namespace text { namespace detail {
template<typename Enum>
struct lzw_to_break_prop_iter
{
using value_type = std::pair<uint32_t, Enum>;
using difference_type = int;
using pointer = unsigned char *;
using reference = unsigned char &;
using iterator_category = std::output_iterator_tag;
using buffer_t = container::small_vector<unsigned char, 256>;
lzw_to_break_prop_iter(
std::unordered_map<uint32_t, Enum> & map, buffer_t & buf) :
map_(&map),
buf_(&buf)
{}
lzw_to_break_prop_iter & operator=(unsigned char c)
{
buf_->push_back(c);
auto const element_bytes = 4;
auto it = buf_->begin();
for (auto end = buf_->end() - buf_->size() % element_bytes;
it != end;
it += element_bytes) {
(*map_)[bytes_to_cp(&*it)] = Enum(*(it + 3));
}
buf_->erase(buf_->begin(), it);
return *this;
}
lzw_to_break_prop_iter & operator*() { return *this; }
lzw_to_break_prop_iter & operator++() { return *this; }
lzw_to_break_prop_iter & operator++(int) { return *this; }
private:
std::unordered_map<uint32_t, Enum> * map_;
buffer_t * buf_;
};
}}}
#endif

View File

@ -0,0 +1,424 @@
#ifndef BOOST_TEXT_DETAIL_ITERATOR_HPP
#define BOOST_TEXT_DETAIL_ITERATOR_HPP
#include <boost/text/config.hpp>
#include <iterator>
namespace boost { namespace text { namespace detail {
template<typename T>
struct arrow_proxy
{
T * operator->() noexcept { return &value_; }
T const * operator->() const noexcept { return &value_; }
explicit arrow_proxy(T value) noexcept : value_(std::move(value)) {}
private:
T value_;
};
template<typename T, bool IterIsProxy>
struct invoke_op_arrow
{
template<typename U>
static arrow_proxy<T> call(U && value) noexcept
{
return arrow_proxy<T>(std::forward<U>(value));
}
};
template<typename T>
struct invoke_op_arrow<T, false>
{
static T * call(T & value) noexcept { return &value; }
};
template<typename Iter, bool IterIsProxy = false>
struct reverse_iterator
{
using iterator_category =
typename std::iterator_traits<Iter>::iterator_category;
using value_type = typename std::iterator_traits<Iter>::value_type;
using difference_type =
typename std::iterator_traits<Iter>::difference_type;
using pointer = typename std::conditional<
IterIsProxy,
arrow_proxy<value_type>,
typename std::iterator_traits<Iter>::pointer>::type;
using reference = typename std::iterator_traits<Iter>::reference;
using iterator_type = Iter;
constexpr reverse_iterator() noexcept : it_() {}
explicit constexpr reverse_iterator(iterator_type it) noexcept : it_(it)
{}
constexpr reverse_iterator(reverse_iterator const & other) noexcept :
it_(other.it_)
{}
template<typename Iter2>
constexpr reverse_iterator(
const reverse_iterator<Iter2> & other) noexcept :
it_(other.base())
{}
constexpr iterator_type base() const noexcept { return it_; }
BOOST_TEXT_CXX14_CONSTEXPR reference operator*() const noexcept
{
Iter temp = it_;
return *--temp;
}
constexpr pointer operator->() const noexcept
{
Iter temp = it_;
return invoke_op_arrow<value_type, IterIsProxy>::call(*--temp);
}
constexpr reference operator[](difference_type n) const noexcept
{
return *(it_ - n - 1);
}
constexpr reverse_iterator operator+(difference_type n) const noexcept
{
return reverse_iterator(it_ - n);
}
constexpr reverse_iterator operator-(difference_type n) const noexcept
{
return reverse_iterator(it_ + n);
}
BOOST_TEXT_CXX14_CONSTEXPR reverse_iterator & operator++() noexcept
{
--it_;
return *this;
}
BOOST_TEXT_CXX14_CONSTEXPR reverse_iterator operator++(int)noexcept
{
reverse_iterator retval = *this;
--it_;
return retval;
}
BOOST_TEXT_CXX14_CONSTEXPR reverse_iterator & operator--() noexcept
{
++it_;
return *this;
}
BOOST_TEXT_CXX14_CONSTEXPR reverse_iterator operator--(int)noexcept
{
reverse_iterator retval = *this;
++it_;
return retval;
}
BOOST_TEXT_CXX14_CONSTEXPR reverse_iterator &
operator+=(difference_type n) noexcept
{
it_ -= n;
return *this;
}
BOOST_TEXT_CXX14_CONSTEXPR reverse_iterator &
operator-=(difference_type n) noexcept
{
it_ += n;
return *this;
}
private:
Iter it_;
};
template<typename Iter, bool IterIsProxy>
constexpr bool operator==(
reverse_iterator<Iter, IterIsProxy> const & lhs,
reverse_iterator<Iter, IterIsProxy> const & rhs) noexcept
{
return lhs.base() == rhs.base();
}
template<typename Iter, bool IterIsProxy>
constexpr bool operator<(
reverse_iterator<Iter, IterIsProxy> const & lhs,
reverse_iterator<Iter, IterIsProxy> const & rhs) noexcept
{
return rhs.base() < lhs.base();
}
template<typename Iter, bool IterIsProxy>
constexpr bool operator!=(
reverse_iterator<Iter, IterIsProxy> const & lhs,
reverse_iterator<Iter, IterIsProxy> const & rhs) noexcept
{
return !(lhs == rhs);
}
template<typename Iter, bool IterIsProxy>
constexpr bool operator>(
reverse_iterator<Iter, IterIsProxy> const & lhs,
reverse_iterator<Iter, IterIsProxy> const & rhs) noexcept
{
return rhs < lhs;
}
template<typename Iter, bool IterIsProxy>
constexpr bool operator<=(
reverse_iterator<Iter, IterIsProxy> const & lhs,
reverse_iterator<Iter, IterIsProxy> const & rhs) noexcept
{
return !(rhs < lhs);
}
template<typename Iter, bool IterIsProxy>
constexpr bool operator>=(
reverse_iterator<Iter, IterIsProxy> const & lhs,
reverse_iterator<Iter, IterIsProxy> const & rhs) noexcept
{
return !(lhs < rhs);
}
template<
typename Iter1,
bool Iter1IsProxy,
typename Iter2,
bool Iter2IsProxy>
constexpr bool operator==(
reverse_iterator<Iter1, Iter1IsProxy> const & lhs,
reverse_iterator<Iter2, Iter2IsProxy> const & rhs) noexcept
{
return lhs.base() == rhs.base();
}
template<
typename Iter1,
bool Iter1IsProxy,
typename Iter2,
bool Iter2IsProxy>
constexpr bool operator<(
reverse_iterator<Iter1, Iter1IsProxy> const & lhs,
reverse_iterator<Iter2, Iter2IsProxy> const & rhs) noexcept
{
return rhs.base() < lhs.base();
}
template<
typename Iter1,
bool Iter1IsProxy,
typename Iter2,
bool Iter2IsProxy>
constexpr bool operator!=(
reverse_iterator<Iter1, Iter1IsProxy> const & lhs,
reverse_iterator<Iter2, Iter2IsProxy> const & rhs) noexcept
{
return !(lhs == rhs);
}
template<
typename Iter1,
bool Iter1IsProxy,
typename Iter2,
bool Iter2IsProxy>
constexpr bool operator>(
reverse_iterator<Iter1, Iter1IsProxy> const & lhs,
reverse_iterator<Iter2, Iter2IsProxy> const & rhs) noexcept
{
return rhs < lhs;
}
template<
typename Iter1,
bool Iter1IsProxy,
typename Iter2,
bool Iter2IsProxy>
constexpr bool operator<=(
reverse_iterator<Iter1, Iter1IsProxy> const & lhs,
reverse_iterator<Iter2, Iter2IsProxy> const & rhs) noexcept
{
return !(rhs < lhs);
}
template<
typename Iter1,
bool Iter1IsProxy,
typename Iter2,
bool Iter2IsProxy>
constexpr bool operator>=(
reverse_iterator<Iter1, Iter1IsProxy> const & lhs,
reverse_iterator<Iter2, Iter2IsProxy> const & rhs) noexcept
{
return !(lhs < rhs);
}
template<
typename Iter1,
bool Iter1IsProxy,
typename Iter2,
bool Iter2IsProxy>
constexpr auto operator-(
reverse_iterator<Iter1, Iter1IsProxy> const & lhs,
reverse_iterator<Iter2, Iter2IsProxy> const & rhs) noexcept
-> decltype(rhs.base() - lhs.base())
{
return rhs.base() - lhs.base();
}
using reverse_char_iterator = reverse_iterator<char *>;
using const_reverse_char_iterator = reverse_iterator<char const *>;
struct const_repeated_chars_iterator
{
using value_type = char;
using difference_type = std::ptrdiff_t;
using pointer = char const *;
using reference = char;
using iterator_category = std::random_access_iterator_tag;
constexpr const_repeated_chars_iterator() noexcept :
first_(nullptr),
size_(0),
n_(0)
{}
constexpr const_repeated_chars_iterator(
char const * first,
difference_type size,
difference_type n) noexcept :
first_(first),
size_(size),
n_(n)
{}
constexpr reference operator*() const noexcept
{
return first_[n_ % size_];
}
constexpr value_type operator[](difference_type n) const noexcept
{
return first_[(n_ + n) % size_];
}
BOOST_TEXT_CXX14_CONSTEXPR const_repeated_chars_iterator &
operator++() noexcept
{
++n_;
return *this;
}
BOOST_TEXT_CXX14_CONSTEXPR const_repeated_chars_iterator
operator++(int)noexcept
{
const_repeated_chars_iterator retval = *this;
++*this;
return retval;
}
BOOST_TEXT_CXX14_CONSTEXPR const_repeated_chars_iterator &
operator+=(difference_type n) noexcept
{
n_ += n;
return *this;
}
BOOST_TEXT_CXX14_CONSTEXPR const_repeated_chars_iterator &
operator--() noexcept
{
--n_;
return *this;
}
BOOST_TEXT_CXX14_CONSTEXPR const_repeated_chars_iterator
operator--(int)noexcept
{
const_repeated_chars_iterator retval = *this;
--*this;
return retval;
}
BOOST_TEXT_CXX14_CONSTEXPR const_repeated_chars_iterator &
operator-=(difference_type n) noexcept
{
n_ -= n;
return *this;
}
friend constexpr bool operator==(
const_repeated_chars_iterator lhs,
const_repeated_chars_iterator rhs) noexcept
{
return lhs.first_ == rhs.first_ && lhs.n_ == rhs.n_;
}
friend constexpr bool operator!=(
const_repeated_chars_iterator lhs,
const_repeated_chars_iterator rhs) noexcept
{
return !(lhs == rhs);
}
friend constexpr bool operator<(
const_repeated_chars_iterator lhs,
const_repeated_chars_iterator rhs) noexcept
{
return lhs.first_ == rhs.first_ && lhs.n_ < rhs.n_;
}
friend constexpr bool operator<=(
const_repeated_chars_iterator lhs,
const_repeated_chars_iterator rhs) noexcept
{
return lhs == rhs || lhs < rhs;
}
friend constexpr bool operator>(
const_repeated_chars_iterator lhs,
const_repeated_chars_iterator rhs) noexcept
{
return rhs < lhs;
}
friend constexpr bool operator>=(
const_repeated_chars_iterator lhs,
const_repeated_chars_iterator rhs) noexcept
{
return rhs <= lhs;
}
friend BOOST_TEXT_CXX14_CONSTEXPR const_repeated_chars_iterator
operator+(
const_repeated_chars_iterator lhs, difference_type rhs) noexcept
{
return lhs += rhs;
}
friend BOOST_TEXT_CXX14_CONSTEXPR const_repeated_chars_iterator
operator+(
difference_type lhs, const_repeated_chars_iterator rhs) noexcept
{
return rhs += lhs;
}
friend BOOST_TEXT_CXX14_CONSTEXPR const_repeated_chars_iterator
operator-(
const_repeated_chars_iterator lhs, difference_type rhs) noexcept
{
return lhs -= rhs;
}
friend constexpr difference_type operator-(
const_repeated_chars_iterator lhs,
const_repeated_chars_iterator rhs) noexcept
{
return lhs.n_ - rhs.n_;
}
private:
char const * first_;
difference_type size_;
difference_type n_;
};
using const_reverse_repeated_chars_iterator =
reverse_iterator<const_repeated_chars_iterator>;
}}}
#endif

View File

@ -0,0 +1,103 @@
#ifndef BOOST_TEXT_DETAIL_LZW_HPP
#define BOOST_TEXT_DETAIL_LZW_HPP
#include <boost/container/small_vector.hpp>
#include <vector>
namespace boost { namespace text { namespace detail {
inline uint32_t bytes_to_uint32_t(unsigned char const * chars)
{
return chars[0] << 24 | chars[1] << 16 | chars[2] << 8 | chars[3] << 0;
}
inline uint32_t bytes_to_cp(unsigned char const * chars)
{
return chars[0] << 16 | chars[1] << 8 | chars[2] << 0;
}
inline uint32_t bytes_to_uint16_t(unsigned char const * chars)
{
return chars[0] << 8 | chars[1] << 0;
}
enum : uint16_t { no_predecessor = 0xffff, no_value = 0xffff };
struct lzw_reverse_table_element
{
lzw_reverse_table_element(
uint16_t pred = no_predecessor, uint16_t value = no_value) :
pred_(pred),
value_(value)
{}
uint16_t pred_;
uint16_t value_;
};
using lzw_reverse_table = std::vector<lzw_reverse_table_element>;
template<typename OutIter>
OutIter
copy_table_entry(lzw_reverse_table const & table, uint16_t i, OutIter out)
{
*out++ = table[i].value_;
while (table[i].pred_ != no_predecessor) {
i = table[i].pred_;
*out++ = table[i].value_;
}
return out;
}
// Hardcoded to 16 bits. Takes unsigned 16-bit LZW-compressed values as
// input and writes the decompressed unsigned char values to out.
template<typename Iter, typename OutIter>
OutIter lzw_decompress(Iter first, Iter last, OutIter out)
{
lzw_reverse_table reverse_table(1 << 16);
for (uint16_t i = 0; i < 256u; ++i) {
reverse_table[i].value_ = i;
}
container::small_vector<unsigned char, 256> table_entry;
uint32_t next_table_value = 256;
uint32_t const end_table_value = 1 << 16;
uint16_t prev_code = *first++;
BOOST_ASSERT(prev_code < 256);
unsigned char c = (unsigned char)prev_code;
table_entry.push_back(c);
*out++ = table_entry;
while (first != last) {
uint16_t const code = *first++;
table_entry.clear();
if (reverse_table[code].value_ == no_value) {
table_entry.push_back(c);
copy_table_entry(
reverse_table, prev_code, std::back_inserter(table_entry));
} else {
copy_table_entry(
reverse_table, code, std::back_inserter(table_entry));
}
*out++ = table_entry;
c = table_entry.back();
if (next_table_value < end_table_value) {
reverse_table[next_table_value++] =
lzw_reverse_table_element{prev_code, c};
}
prev_code = code;
}
return out;
}
}}}
#endif

View File

@ -0,0 +1,12 @@
#ifndef BOOST_TEXT_DETAIL_SENTINEL_TAG_HPP
#define BOOST_TEXT_DETAIL_SENTINEL_TAG_HPP
namespace boost { namespace text { namespace detail {
struct sentinel_tag
{};
struct non_sentinel_tag
{};
}}}
#endif

View File

@ -0,0 +1,482 @@
#ifndef BOOST_TEXT_GRAPHEME_BREAK_HPP
#define BOOST_TEXT_GRAPHEME_BREAK_HPP
#include <boost/text/algorithm.hpp>
#include <boost/text/lazy_segment_range.hpp>
#include <boost/text/utility.hpp>
#include <boost/assert.hpp>
#include <array>
#include <unordered_map>
#include <stdint.h>
namespace boost { namespace text {
/** The grapheme properties defined by Unicode. */
enum class grapheme_property {
Other,
CR,
LF,
Control,
Extend,
Regional_Indicator,
Prepend,
SpacingMark,
L,
V,
T,
LV,
LVT,
ExtPict,
ZWJ
};
namespace detail {
struct grapheme_prop_interval
{
uint32_t lo_;
uint32_t hi_;
grapheme_property prop_;
};
inline bool operator<(
grapheme_prop_interval lhs, grapheme_prop_interval rhs) noexcept
{
return lhs.hi_ <= rhs.lo_;
}
BOOST_TEXT_DECL std::array<grapheme_prop_interval, 6> const &
make_grapheme_prop_intervals();
BOOST_TEXT_DECL std::unordered_map<uint32_t, grapheme_property>
make_grapheme_prop_map();
}
/** Returns the grapheme property associated with code point `cp`. */
inline grapheme_property grapheme_prop(uint32_t cp) noexcept
{
static auto const map = detail::make_grapheme_prop_map();
static auto const intervals = detail::make_grapheme_prop_intervals();
auto const it = map.find(cp);
if (it == map.end()) {
auto const it2 = std::lower_bound(
intervals.begin(),
intervals.end(),
detail::grapheme_prop_interval{cp, cp + 1});
if (it2 == intervals.end() || cp < it2->lo_ || it2->hi_ <= cp)
return grapheme_property::Other;
return it2->prop_;
}
return it->second;
}
namespace detail {
inline bool skippable(grapheme_property prop) noexcept
{
return prop == grapheme_property::Extend;
}
enum class grapheme_break_emoji_state_t {
none,
first_emoji, // Indicates that prop points to an odd-count
// emoji.
second_emoji // Indicates that prop points to an even-count
// emoji.
};
template<typename CPIter>
struct grapheme_break_state
{
CPIter it;
grapheme_property prev_prop;
grapheme_property prop;
grapheme_break_emoji_state_t emoji_state;
};
template<typename CPIter>
grapheme_break_state<CPIter> next(grapheme_break_state<CPIter> state)
{
++state.it;
state.prev_prop = state.prop;
return state;
}
template<typename CPIter>
grapheme_break_state<CPIter> prev(grapheme_break_state<CPIter> state)
{
--state.it;
state.prop = state.prev_prop;
return state;
}
template<typename CPIter>
bool gb11_prefix(CPIter first, CPIter prev_it)
{
auto final_prop = grapheme_property::Other;
find_if_backward(first, prev_it, [&final_prop](uint32_t cp) {
final_prop = grapheme_prop(cp);
return final_prop != grapheme_property::Extend;
});
return final_prop == grapheme_property::ExtPict;
}
inline bool table_grapheme_break(
grapheme_property lhs, grapheme_property rhs) noexcept
{
// Note that RI.RI was changed to '1' since that case is handled
// in the grapheme break FSM.
// clang-format off
// See chart at https://unicode.org/Public/11.0.0/ucd/auxiliary/GraphemeBreakTest.html .
constexpr std::array<std::array<bool, 15>, 15> grapheme_breaks = {{
// Other CR LF Ctrl Ext RI Pre SpcMk L V T LV LVT ExtPict ZWJ
{{1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0}}, // Other
{{1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}}, // CR
{{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}}, // LF
{{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}}, // Control
{{1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0}}, // Extend
{{1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0}}, // RI
{{0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Prepend
{{1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0}}, // SpacingMark
{{1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0}}, // L
{{1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0}}, // V
{{1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0}}, // T
{{1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0}}, // LV
{{1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0}}, // LVT
{{1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0}}, // ExtPict
{{1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0}}, // ZWJ
}};
// clang-format on
auto const lhs_int = static_cast<int>(lhs);
auto const rhs_int = static_cast<int>(rhs);
return grapheme_breaks[lhs_int][rhs_int];
}
}
#ifdef BOOST_TEXT_DOXYGEN
/** Finds the nearest grapheme break at or before before `it`. If `it ==
first`, that is returned. Otherwise, the first code point of the
grapheme that `it` is within is returned (even if `it` is already at
the first code point of a grapheme).
This function only participates in overload resolution if `CPIter`
models the CPIter concept. */
template<typename CPIter, typename Sentinel>
CPIter prev_grapheme_break(CPIter first, CPIter it, Sentinel last) noexcept;
/** Finds the next word break after `first`. This will be the first code
point after the current word, or `last` if no next word exists.
This function only participates in overload resolution if `CPIter`
models the CPIter concept.
\pre `first` is at the beginning of a word. */
template<typename CPIter, typename Sentinel>
CPIter next_grapheme_break(CPIter first, Sentinel last) noexcept;
/** Finds the nearest grapheme break at or before before `it`. If `it ==
range.begin()`, that is returned. Otherwise, the first code point of
the grapheme that `it` is within is returned (even if `it` is already
at the first code point of a grapheme).
This function only participates in overload resolution if `CPRange`
models the CPRange concept. */
template<typename CPRange, typename CPIter>
detail::undefined prev_grapheme_break(CPRange & range, CPIter it) noexcept;
/** Finds the next grapheme break after `it`. This will be the first code
point after the current grapheme, or `range.end()` if no next grapheme
exists.
This function only participates in overload resolution if `CPRange`
models the CPRange concept.
\pre `it` is at the beginning of a grapheme. */
template<typename CPRange, typename CPIter>
detail::undefined next_grapheme_break(CPRange & range, CPIter it) noexcept;
#else
template<typename CPIter, typename Sentinel>
auto prev_grapheme_break(CPIter first, CPIter it, Sentinel last) noexcept
-> detail::cp_iter_ret_t<CPIter, CPIter>
{
if (it == first)
return it;
if (it == last && --it == first)
return it;
detail::grapheme_break_state<CPIter> state;
state.it = it;
state.prop = grapheme_prop(*state.it);
state.prev_prop = grapheme_prop(*std::prev(state.it));
state.emoji_state = detail::grapheme_break_emoji_state_t::none;
for (; state.it != first; state = prev(state)) {
state.prev_prop = grapheme_prop(*std::prev(state.it));
// When we see an RI, back up to the first RI so we can see what
// emoji state we're supposed to be in here.
if (state.emoji_state ==
detail::grapheme_break_emoji_state_t::none &&
state.prop == grapheme_property::Regional_Indicator) {
int ris_before = 0;
find_if_not_backward(
first, state.it, [&ris_before](uint32_t cp) {
bool const ri = grapheme_prop(cp) ==
grapheme_property::Regional_Indicator;
if (ri)
++ris_before;
return ri;
});
state.emoji_state =
(ris_before % 2 == 0)
? detail::grapheme_break_emoji_state_t::first_emoji
: detail::grapheme_break_emoji_state_t::second_emoji;
}
// GB11
if (state.prev_prop == grapheme_property::ZWJ &&
state.prop == grapheme_property::ExtPict &&
detail::gb11_prefix(first, std::prev(state.it))) {
continue;
}
if (state.emoji_state ==
detail::grapheme_break_emoji_state_t::first_emoji) {
if (state.prev_prop == grapheme_property::Regional_Indicator) {
state.emoji_state =
detail::grapheme_break_emoji_state_t::second_emoji;
return state.it;
} else {
state.emoji_state =
detail::grapheme_break_emoji_state_t::none;
}
} else if (
state.emoji_state ==
detail::grapheme_break_emoji_state_t::second_emoji &&
state.prev_prop == grapheme_property::Regional_Indicator) {
state.emoji_state =
detail::grapheme_break_emoji_state_t::first_emoji;
continue;
}
if (detail::table_grapheme_break(state.prev_prop, state.prop))
return state.it;
}
return first;
}
template<typename CPIter, typename Sentinel>
auto next_grapheme_break(CPIter first, Sentinel last) noexcept
-> detail::cp_iter_ret_t<CPIter, CPIter>
{
if (first == last)
return first;
detail::grapheme_break_state<CPIter> state;
state.it = first;
if (++state.it == last)
return state.it;
state.prev_prop = grapheme_prop(*std::prev(state.it));
state.prop = grapheme_prop(*state.it);
state.emoji_state =
state.prev_prop == grapheme_property::Regional_Indicator
? detail::grapheme_break_emoji_state_t::first_emoji
: detail::grapheme_break_emoji_state_t::none;
for (; state.it != last; state = next(state)) {
state.prop = grapheme_prop(*state.it);
// GB11
if (state.prev_prop == grapheme_property::ZWJ &&
state.prop == grapheme_property::ExtPict &&
detail::gb11_prefix(first, std::prev(state.it))) {
continue;
}
if (state.emoji_state ==
detail::grapheme_break_emoji_state_t::first_emoji) {
if (state.prop == grapheme_property::Regional_Indicator) {
state.emoji_state =
detail::grapheme_break_emoji_state_t::none;
continue;
} else {
state.emoji_state =
detail::grapheme_break_emoji_state_t::none;
}
} else if (state.prop == grapheme_property::Regional_Indicator) {
state.emoji_state =
detail::grapheme_break_emoji_state_t::first_emoji;
}
if (detail::table_grapheme_break(state.prev_prop, state.prop))
return state.it;
}
return state.it;
}
template<typename CPRange, typename CPIter>
auto prev_grapheme_break(CPRange & range, CPIter it) noexcept
-> detail::cp_rng_alg_ret_t<detail::iterator_t<CPRange>, CPRange>
{
return prev_grapheme_break(std::begin(range), it, std::end(range));
}
template<typename CPRange, typename CPIter>
auto next_grapheme_break(CPRange & range, CPIter it) noexcept
-> detail::cp_rng_alg_ret_t<detail::iterator_t<CPRange>, CPRange>
{
return next_grapheme_break(it, std::end(range));
}
#endif
namespace detail {
template<typename CPIter, typename Sentinel>
struct next_grapheme_callable
{
CPIter operator()(CPIter it, Sentinel last) const noexcept
{
return next_grapheme_break(it, last);
}
};
template<typename CPIter>
struct prev_grapheme_callable
{
CPIter operator()(CPIter first, CPIter it, CPIter last) const
noexcept
{
return prev_grapheme_break(first, it, last);
}
};
}
#if 0
/** Returns the bounds of the grapheme that `it` lies within. */
template<typename CPIter, typename Sentinel>
cp_range<CPIter> grapheme(CPIter first, CPIter it, Sentinel last) noexcept
{
first = prev_grapheme_break(first, it, last);
return cp_range<CPIter>{first, next_grapheme_break(first, last)};
}
#endif
#ifdef BOOST_TEXT_DOXYGEN
#if 0
/** Returns the bounds of the grapheme that `it` lies within,
as a cp_range. */
template<typename CPRange, typename CPIter>
detail::undefined grapheme(CPRange & range, CPIter it) noexcept;
#endif
/** Returns a lazy range of the code point ranges delimiting graphemes in
`[first, last)`. */
template<typename CPIter, typename Sentinel>
detail::undefined graphemes(CPIter first, Sentinel last) noexcept;
/** Returns a lazy range of the code point ranges delimiting graphemes in
`range`. */
template<typename CPRange>
detail::undefined graphemes(CPRange & range) noexcept;
/** Returns a lazy range of the code point ranges delimiting graphemes in
`[first, last)`, in reverse. */
template<typename CPIter>
detail::undefined reversed_graphemes(CPIter first, CPIter last) noexcept;
/** Returns a lazy range of the code point ranges delimiting graphemes in
`range`, in reverse. */
template<typename CPRange>
detail::undefined reversed_graphemes(CPRange & range) noexcept;
#else
#if 0
template<typename CPRange, typename CPIter>
auto grapheme(CPRange & range, CPIter it) noexcept
-> cp_range<detail::iterator_t<CPRange>>
{
auto first =
prev_grapheme_break(std::begin(range), it, std::end(range));
return cp_range<CPIter>{first, next_grapheme_break(first, range.end())};
}
#endif
template<typename CPIter, typename Sentinel>
lazy_segment_range<
CPIter,
Sentinel,
detail::next_grapheme_callable<CPIter, Sentinel>>
graphemes(CPIter first, Sentinel last) noexcept
{
detail::next_grapheme_callable<CPIter, Sentinel> next;
return {std::move(next), {first, last}, {last}};
}
template<typename CPRange>
auto graphemes(CPRange & range) noexcept -> lazy_segment_range<
detail::iterator_t<CPRange>,
detail::sentinel_t<CPRange>,
detail::next_grapheme_callable<
detail::iterator_t<CPRange>,
detail::sentinel_t<CPRange>>>
{
detail::next_grapheme_callable<
detail::iterator_t<CPRange>,
detail::sentinel_t<CPRange>>
next;
return {std::move(next),
{std::begin(range), std::end(range)},
{std::end(range)}};
}
template<typename CPIter>
lazy_segment_range<
CPIter,
CPIter,
detail::prev_grapheme_callable<CPIter>,
cp_range<CPIter>,
detail::const_reverse_lazy_segment_iterator,
true>
reversed_graphemes(CPIter first, CPIter last) noexcept
{
detail::prev_grapheme_callable<CPIter> prev;
return {std::move(prev), {first, last, last}, {first, first, last}};
}
template<typename CPRange>
auto reversed_graphemes(CPRange & range) noexcept -> lazy_segment_range<
detail::iterator_t<CPRange>,
detail::sentinel_t<CPRange>,
detail::prev_grapheme_callable<detail::iterator_t<CPRange>>,
cp_range<detail::iterator_t<CPRange>>,
detail::const_reverse_lazy_segment_iterator,
true>
{
detail::prev_grapheme_callable<detail::iterator_t<CPRange>> prev;
return {std::move(prev),
{std::begin(range), std::end(range), std::end(range)},
{std::begin(range), std::begin(range), std::end(range)}};
}
#endif
}}
#endif

View File

@ -0,0 +1,218 @@
#ifndef BOOST_TEXT_LAZY_SEGMENT_RANGE_HPP
#define BOOST_TEXT_LAZY_SEGMENT_RANGE_HPP
#include <boost/text/utility.hpp>
namespace boost { namespace text {
namespace detail {
template<typename CPIter, typename CPRange>
struct segment_arrow_proxy
{
explicit segment_arrow_proxy(CPRange value) : value_(value) {}
CPRange * operator->() const noexcept
{
return &value_;
}
private:
CPRange value_;
};
template<
typename CPIter,
typename Sentinel,
typename NextFunc,
typename CPRange>
struct const_lazy_segment_iterator
{
private:
NextFunc * next_func_;
CPIter prev_;
CPIter it_;
Sentinel last_;
public:
using value_type = CPRange;
using pointer = detail::segment_arrow_proxy<CPIter, CPRange>;
using reference = value_type;
using difference_type = std::ptrdiff_t;
using iterator_category = std::forward_iterator_tag;
const_lazy_segment_iterator() noexcept :
next_func_(),
prev_(),
it_(),
last_()
{}
const_lazy_segment_iterator(CPIter it, Sentinel last) noexcept :
next_func_(),
prev_(it),
it_(),
last_(last)
{}
const_lazy_segment_iterator(Sentinel last) noexcept :
next_func_(),
prev_(),
it_(),
last_(last)
{}
reference operator*() const noexcept
{
return value_type{prev_, it_};
}
pointer operator->() const noexcept { return pointer(**this); }
const_lazy_segment_iterator & operator++() noexcept
{
auto const next_it = (*next_func_)(it_, last_);
prev_ = it_;
it_ = next_it;
return *this;
}
void set_next_func(NextFunc * next_func) noexcept
{
next_func_ = next_func;
it_ = (*next_func_)(prev_, last_);
}
friend bool operator==(
const_lazy_segment_iterator lhs,
const_lazy_segment_iterator rhs) noexcept
{
return lhs.prev_ == rhs.last_;
}
friend bool operator!=(
const_lazy_segment_iterator lhs,
const_lazy_segment_iterator rhs) noexcept
{
return !(lhs == rhs);
}
};
template<typename CPIter, typename, typename PrevFunc, typename CPRange>
struct const_reverse_lazy_segment_iterator
{
private:
PrevFunc * prev_func_;
CPIter first_;
CPIter it_;
CPIter next_;
public:
using value_type = CPRange;
using pointer = detail::segment_arrow_proxy<CPIter, CPRange>;
using reference = value_type;
using difference_type = std::ptrdiff_t;
using iterator_category = std::forward_iterator_tag;
const_reverse_lazy_segment_iterator() noexcept :
prev_func_(),
first_(),
it_(),
next_()
{}
const_reverse_lazy_segment_iterator(
CPIter first, CPIter it, CPIter last) noexcept :
prev_func_(),
first_(first),
it_(it),
next_(last)
{}
reference operator*() const noexcept
{
return value_type{it_, next_};
}
pointer operator->() const noexcept { return pointer(**this); }
const_reverse_lazy_segment_iterator & operator++() noexcept
{
if (it_ == first_) {
next_ = first_;
return *this;
}
auto const prev_it =
(*prev_func_)(first_, std::prev(it_), next_);
next_ = it_;
it_ = prev_it;
return *this;
}
void set_next_func(PrevFunc * prev_func) noexcept
{
prev_func_ = prev_func;
++*this;
}
friend bool operator==(
const_reverse_lazy_segment_iterator lhs,
const_reverse_lazy_segment_iterator rhs) noexcept
{
return lhs.next_ == rhs.first_;
}
friend bool operator!=(
const_reverse_lazy_segment_iterator lhs,
const_reverse_lazy_segment_iterator rhs) noexcept
{
return !(lhs == rhs);
}
};
}
/** Represents a range of non-overlapping subranges. Each subrange
represents some semantically significant segment, the semantics of
which are controlled by the `NextFunc` template parameter. For
instance, if `NextFunc` is next_paragraph_break, the subranges
produced by lazy_segment_range will be paragraphs. Each subrange is
lazily produced; an output subrange is not produced until a lazy range
iterator is dereferenced. */
template<
typename CPIter,
typename Sentinel,
typename NextFunc,
typename CPRange = cp_range<CPIter>,
template<class, class, class, class> class IteratorTemplate =
detail::const_lazy_segment_iterator,
bool Reverse = false>
struct lazy_segment_range
{
using iterator = IteratorTemplate<CPIter, Sentinel, NextFunc, CPRange>;
lazy_segment_range() noexcept {}
lazy_segment_range(
NextFunc next_func, iterator first, iterator last) noexcept :
next_func_(std::move(next_func)),
first_(first),
last_(last)
{}
iterator begin() const noexcept
{
const_cast<iterator &>(first_).set_next_func(
const_cast<NextFunc *>(&next_func_));
return first_;
}
iterator end() const noexcept { return last_; }
/** Moves the contained `NextFunc` out of *this. */
NextFunc && next_func() && noexcept { return std::move(next_func_); }
private:
NextFunc next_func_;
iterator first_;
iterator last_;
};
}}
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,248 @@
#ifndef BOOST_TEXT_UTILITY_HPP
#define BOOST_TEXT_UTILITY_HPP
#include <boost/text/transcode_iterator.hpp>
#include <boost/text/detail/algorithm.hpp>
#include <boost/text/detail/sentinel_tag.hpp>
namespace boost { namespace text {
/** A range that adapts a sequence of `char const *` to a sequence of code
points. */
struct utf32_range
{
using iterator = utf_8_to_32_iterator<char const *>;
utf32_range() :
first_(nullptr, nullptr, nullptr),
last_(nullptr, nullptr, nullptr)
{}
utf32_range(char const * f, char const * l) :
first_(f, f, l),
last_(f, l, l)
{}
utf32_range(iterator f, iterator l) : first_(f), last_(l) {}
template<typename CharRange>
utf32_range(CharRange const & r) :
first_(std::begin(r), std::begin(r), std::end(r)),
last_(std::begin(r), std::end(r), std::end(r))
{}
bool empty() const noexcept { return first_ == last_; }
iterator begin() const noexcept { return first_; }
iterator end() const noexcept { return last_; }
friend bool operator==(utf32_range lhs, utf32_range rhs)
{
return lhs.first_ == rhs.first_ && lhs.last_ == rhs.last_;
}
friend bool operator!=(utf32_range lhs, utf32_range rhs)
{
return !(lhs == rhs);
}
private:
iterator first_;
iterator last_;
};
/** A range of code points. */
template<typename CPIter, typename Sentinel = CPIter>
struct cp_range
{
using iterator = CPIter;
using sentinel = Sentinel;
static_assert(
detail::is_cp_iter<CPIter>::value,
"CPIter must be a code point iterator");
cp_range() {}
cp_range(iterator first, sentinel last) : first_(first), last_(last) {}
bool empty() const noexcept { return first_ == last_; }
iterator begin() const { return first_; }
sentinel end() const { return last_; }
friend bool operator==(cp_range lhs, cp_range rhs)
{
return lhs.first_ == rhs.first_ && lhs.last_ == rhs.last_;
}
friend bool operator!=(cp_range lhs, cp_range rhs)
{
return !(lhs == rhs);
}
private:
iterator first_;
sentinel last_;
};
/** A generic range. */
template<typename Iter, typename Sentinel = Iter>
struct range
{
using iterator = Iter;
using sentinel = Sentinel;
range() {}
range(iterator first, sentinel last) : first_(first), last_(last) {}
bool empty() const noexcept { return first_ == last_; }
iterator begin() const { return first_; }
sentinel end() const { return last_; }
friend bool operator==(range lhs, range rhs)
{
return lhs.first_ == rhs.first_ && lhs.last_ == rhs.last_;
}
friend bool operator!=(range lhs, range rhs) { return !(lhs == rhs); }
private:
iterator first_;
sentinel last_;
};
namespace detail {
template<typename T>
using remove_cv_ref_t = typename std::remove_cv<
typename std::remove_reference<T>::type>::type;
template<typename Range>
using iterator_t =
remove_cv_ref_t<decltype(std::declval<Range>().begin())>;
template<typename Range>
using sentinel_t =
remove_cv_ref_t<decltype(std::declval<Range>().end())>;
template<
template<class, class, class> class IterTemplate,
typename Iter,
typename Sentinel>
struct make_range_impl_t
{
using iter_t =
IterTemplate<Iter, Sentinel, use_replacement_character>;
static range<iter_t, Sentinel>
call(Iter first, Sentinel last) noexcept
{
return {iter_t{first, first, last}, last};
}
};
template<
template<class, class, class> class IterTemplate,
typename Iter>
struct make_range_impl_t<IterTemplate, Iter, Iter>
{
using iter_t = IterTemplate<Iter, Iter, use_replacement_character>;
static range<iter_t, iter_t> call(Iter first, Iter last) noexcept
{
return {iter_t{first, first, last}, iter_t{first, last, last}};
}
};
template<
template<class, class, class> class IterTemplate,
typename Range>
struct make_range_t
{
using impl_t = make_range_impl_t<
IterTemplate,
iterator_t<Range const>,
sentinel_t<Range const>>;
static auto call(Range const & r) noexcept
-> decltype(impl_t::call(std::begin(r), std::end(r)))
{
return impl_t::call(std::begin(r), std::end(r));
}
};
}
#ifdef BOOST_TEXT_DOXYGEN
/** Returns a range of code points transcoded from the given range of
UTF-8 code units.
This function only participates in overload resolution if `CharRange`
models the CharRange concept. */
template<typename CharRange>
detail::unspecified make_to_utf32_range(CharRange const & r) noexcept;
/** Returns a range of UTF-8 code units transcoded from the given range of
code points.
This function only participates in overload resolution if `CPRange`
models the CPRange concept. */
template<typename CPRange>
detail::unspecified make_from_utf32_range(CPRange const & r) noexcept;
/** Returns a range of UTF-16 code units transcoded from the given range
of UTF-8 code units.
This function only participates in overload resolution if `CharRange`
models the CharRange concept. */
template<typename CharRange>
detail::unspecified make_to_utf16_range(CharRange const & r) noexcept;
/** Returns a range of UTF-8 code units transcoded from the given range of
UTF-16 code units.
This function only participates in overload resolution if
`Char16Range` is a range of 16-bit integral values, each of which is
convertible to `uint16_t`. */
template<typename Char16Range>
detail::unspecified make_from_utf16_range(Char16Range const & r) noexcept;
#else
template<typename CharRange>
auto make_to_utf32_range(CharRange const & r) noexcept
-> detail::rng_alg_ret_t<
decltype(
detail::make_range_t<utf_8_to_32_iterator, CharRange>::call(r)),
CharRange>
{
return detail::make_range_t<utf_8_to_32_iterator, CharRange>::call(r);
}
template<typename CPRange>
auto make_from_utf32_range(CPRange const & r) noexcept
-> detail::cp_rng_alg_ret_t<
decltype(
detail::make_range_t<utf_32_to_8_iterator, CPRange>::call(r)),
CPRange>
{
return detail::make_range_t<utf_32_to_8_iterator, CPRange>::call(r);
}
template<typename CharRange>
auto make_to_utf16_range(CharRange const & r) noexcept
-> detail::rng_alg_ret_t<
decltype(
detail::make_range_t<utf_8_to_16_iterator, CharRange>::call(r)),
CharRange>
{
return detail::make_range_t<utf_8_to_16_iterator, CharRange>::call(r);
}
template<typename Char16Range>
auto make_from_utf16_range(Char16Range const & r) noexcept
-> detail::rng16_alg_ret_t<
decltype(detail::make_range_t<utf_16_to_8_iterator, Char16Range>::
call(r)),
Char16Range>
{
return detail::make_range_t<utf_16_to_8_iterator, Char16Range>::call(r);
}
#endif
}}
#endif

View File

@ -0,0 +1,9 @@
#ifndef TEXT_BOOST_THROW_EXCEPTION_HPP
#define TEXT_BOOST_THROW_EXCEPTION_HPP
namespace boost {
template <typename E>
void throw_exception(const E& e) { throw e; }
}
#endif // TEXT_BOOST_THROW_EXCEPTION_HPP

3591
src/text/grapheme_break.cpp Normal file

File diff suppressed because it is too large Load Diff