From d5045a5f4017631a4660f985fe451c5a64c42ca0 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Sat, 11 Aug 2012 21:26:25 -0400 Subject: [PATCH] [ICU] Use new normalizer2 compose/decompose API It's considerably faster than the fallback implementation we had previously! --- src/hb-buffer.cc | 9 ++--- src/hb-glib.cc | 28 ++++++-------- src/hb-icu.cc | 77 +++++++++++++++++++++++++++++++-------- src/hb-unicode-private.hh | 13 +------ src/hb-unicode.cc | 35 +++++++++++++++++- src/hb-warning.cc | 8 ---- 6 files changed, 112 insertions(+), 58 deletions(-) diff --git a/src/hb-buffer.cc b/src/hb-buffer.cc index db4edce13..eddd5d0ba 100644 --- a/src/hb-buffer.cc +++ b/src/hb-buffer.cc @@ -37,8 +37,6 @@ #define HB_DEBUG_BUFFER (HB_DEBUG+0) #endif -#define _HB_BUFFER_UNICODE_FUNCS_DEFAULT (const_cast (&_hb_unicode_funcs_default)) - /* Here is how the buffer works internally: * * There are two info pointers: info and out_info. They always have @@ -144,7 +142,7 @@ hb_buffer_t::reset (void) return; hb_unicode_funcs_destroy (unicode); - unicode = _HB_BUFFER_UNICODE_FUNCS_DEFAULT; + unicode = hb_unicode_funcs_get_default (); hb_segment_properties_t default_props = _HB_BUFFER_PROPS_DEFAULT; props = default_props; @@ -552,7 +550,7 @@ hb_buffer_get_empty (void) static const hb_buffer_t _hb_buffer_nil = { HB_OBJECT_HEADER_STATIC, - _HB_BUFFER_UNICODE_FUNCS_DEFAULT, + const_cast (&_hb_unicode_funcs_nil), _HB_BUFFER_PROPS_DEFAULT, true, /* in_error */ @@ -608,7 +606,8 @@ hb_buffer_set_unicode_funcs (hb_buffer_t *buffer, return; if (!unicode) - unicode = _HB_BUFFER_UNICODE_FUNCS_DEFAULT; + unicode = hb_unicode_funcs_get_default (); + hb_unicode_funcs_reference (unicode); hb_unicode_funcs_destroy (buffer->unicode); diff --git a/src/hb-glib.cc b/src/hb-glib.cc index 7af92cc01..046275879 100644 --- a/src/hb-glib.cc +++ b/src/hb-glib.cc @@ -250,9 +250,6 @@ hb_glib_unicode_compose (hb_unicode_funcs_t *ufuncs HB_UNUSED, /* We don't ifdef-out the fallback code such that compiler always * sees it and makes sure it's compilable. */ - if (!a || !b) - return false; - gchar utf8[12]; gchar *normalized; int len; @@ -367,22 +364,21 @@ hb_glib_unicode_decompose_compatibility (hb_unicode_funcs_t *ufuncs, return utf8_decomposed_len; } -extern HB_INTERNAL const hb_unicode_funcs_t _hb_glib_unicode_funcs; -const hb_unicode_funcs_t _hb_glib_unicode_funcs = { - HB_OBJECT_HEADER_STATIC, - - NULL, /* parent */ - true, /* immutable */ - { -#define HB_UNICODE_FUNC_IMPLEMENT(name) hb_glib_unicode_##name, - HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS -#undef HB_UNICODE_FUNC_IMPLEMENT - } -}; - hb_unicode_funcs_t * hb_glib_get_unicode_funcs (void) { + static const hb_unicode_funcs_t _hb_glib_unicode_funcs = { + HB_OBJECT_HEADER_STATIC, + + NULL, /* parent */ + true, /* immutable */ + { +#define HB_UNICODE_FUNC_IMPLEMENT(name) hb_glib_unicode_##name, + HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS +#undef HB_UNICODE_FUNC_IMPLEMENT + } + }; + return const_cast (&_hb_glib_unicode_funcs); } diff --git a/src/hb-icu.cc b/src/hb-icu.cc index 5e92058c6..4bb7af29e 100644 --- a/src/hb-icu.cc +++ b/src/hb-icu.cc @@ -33,7 +33,7 @@ #include "hb-unicode-private.hh" -#include +#include #include #include #include @@ -164,6 +164,10 @@ hb_icu_unicode_script (hb_unicode_funcs_t *ufuncs HB_UNUSED, return hb_icu_script_to_script (scriptCode); } +#if U_ICU_VERSION_MAJOR_NUM >= 49 +static const UNormalizer2 *normalizer; +#endif + static hb_bool_t hb_icu_unicode_compose (hb_unicode_funcs_t *ufuncs HB_UNUSED, hb_codepoint_t a, @@ -171,8 +175,17 @@ hb_icu_unicode_compose (hb_unicode_funcs_t *ufuncs HB_UNUSED, hb_codepoint_t *ab, void *user_data HB_UNUSED) { - if (!a || !b) - return false; +#if U_ICU_VERSION_MAJOR_NUM >= 49 + { + UChar32 ret = unorm2_composePair (normalizer, a, b); + if (ret < 0) return false; + *ab = ret; + return true; + } +#endif + + /* We don't ifdef-out the fallback code such that compiler always + * sees it and makes sure it's compilable. */ UChar utf16[4], normalized[5]; unsigned int len; @@ -207,6 +220,32 @@ hb_icu_unicode_decompose (hb_unicode_funcs_t *ufuncs HB_UNUSED, hb_codepoint_t *b, void *user_data HB_UNUSED) { +#if U_ICU_VERSION_MAJOR_NUM >= 49 + { + UChar decomposed[4]; + int len; + UErrorCode icu_err = U_ZERO_ERROR; + len = unorm2_getRawDecomposition (normalizer, ab, decomposed, + ARRAY_LENGTH (decomposed), &icu_err); + if (U_FAILURE (icu_err) || len < 0) return false; + + len = u_countChar32 (decomposed, len); + if (len == 1) { + U16_GET_UNSAFE (decomposed, 0, *a); + *b = 0; + return *a != ab; + } else if (len == 2) { + len =0; + U16_NEXT_UNSAFE (decomposed, len, *a); + U16_NEXT_UNSAFE (decomposed, len, *b); + } + return true; + } +#endif + + /* We don't ifdef-out the fallback code such that compiler always + * sees it and makes sure it's compilable. */ + UChar utf16[2], normalized[2 * HB_UNICODE_MAX_DECOMPOSITION_LEN + 1]; unsigned int len; hb_bool_t ret, err; @@ -306,22 +345,28 @@ hb_icu_unicode_decompose_compatibility (hb_unicode_funcs_t *ufuncs HB_UNUSED, } -extern HB_INTERNAL const hb_unicode_funcs_t _hb_icu_unicode_funcs; -const hb_unicode_funcs_t _hb_icu_unicode_funcs = { - HB_OBJECT_HEADER_STATIC, - - NULL, /* parent */ - true, /* immutable */ - { -#define HB_UNICODE_FUNC_IMPLEMENT(name) hb_icu_unicode_##name, - HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS -#undef HB_UNICODE_FUNC_IMPLEMENT - } -}; - hb_unicode_funcs_t * hb_icu_get_unicode_funcs (void) { + static const hb_unicode_funcs_t _hb_icu_unicode_funcs = { + HB_OBJECT_HEADER_STATIC, + + NULL, /* parent */ + true, /* immutable */ + { +#define HB_UNICODE_FUNC_IMPLEMENT(name) hb_icu_unicode_##name, + HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS +#undef HB_UNICODE_FUNC_IMPLEMENT + } + }; + +#if U_ICU_VERSION_MAJOR_NUM >= 49 + if (!hb_atomic_ptr_get (&normalizer)) { + UErrorCode icu_err = U_ZERO_ERROR; + /* We ignore failure in getNFCInstace(). */ + hb_atomic_ptr_cmpexch (&normalizer, NULL, unorm2_getNFCInstance (&icu_err)); + } +#endif return const_cast (&_hb_icu_unicode_funcs); } diff --git a/src/hb-unicode-private.hh b/src/hb-unicode-private.hh index 53214b9a6..7ef582074 100644 --- a/src/hb-unicode-private.hh +++ b/src/hb-unicode-private.hh @@ -80,13 +80,14 @@ HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS_SIMPLE hb_codepoint_t *ab) { *ab = 0; + if (unlikely (!a || !b)) return false; return func.compose (this, a, b, ab, user_data.compose); } inline hb_bool_t decompose (hb_codepoint_t ab, hb_codepoint_t *a, hb_codepoint_t *b) { - *a = *b = 0; + *a = ab; *b = 0; return func.decompose (this, ab, a, b, user_data.decompose); } @@ -182,17 +183,7 @@ HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS_SIMPLE }; -#ifdef HAVE_GLIB -extern HB_INTERNAL const hb_unicode_funcs_t _hb_glib_unicode_funcs; -#define _hb_unicode_funcs_default _hb_glib_unicode_funcs -#elif defined(HAVE_ICU) -extern HB_INTERNAL const hb_unicode_funcs_t _hb_icu_unicode_funcs; -#define _hb_unicode_funcs_default _hb_icu_unicode_funcs -#else -#define HB_UNICODE_FUNCS_NIL 1 extern HB_INTERNAL const hb_unicode_funcs_t _hb_unicode_funcs_nil; -#define _hb_unicode_funcs_default _hb_unicode_funcs_nil -#endif /* Modified combining marks */ diff --git a/src/hb-unicode.cc b/src/hb-unicode.cc index 8979eaa77..2e2d077ec 100644 --- a/src/hb-unicode.cc +++ b/src/hb-unicode.cc @@ -109,12 +109,44 @@ hb_unicode_decompose_compatibility_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED } +#define HB_UNICODE_FUNCS_IMPLEMENT_SET \ + HB_UNICODE_FUNCS_IMPLEMENT (glib) \ + HB_UNICODE_FUNCS_IMPLEMENT (icu) \ + HB_UNICODE_FUNCS_IMPLEMENT (nil) \ + /* ^--- Add new callbacks before nil */ + +#define hb_nil_get_unicode_funcs hb_unicode_funcs_get_empty + +/* Prototype them all */ +#define HB_UNICODE_FUNCS_IMPLEMENT(set) \ +extern "C" hb_unicode_funcs_t *hb_##set##_get_unicode_funcs (void); +HB_UNICODE_FUNCS_IMPLEMENT_SET +#undef HB_UNICODE_FUNCS_IMPLEMENT + + hb_unicode_funcs_t * hb_unicode_funcs_get_default (void) { - return const_cast (&_hb_unicode_funcs_default); +#define HB_UNICODE_FUNCS_IMPLEMENT(set) \ + return hb_##set##_get_unicode_funcs (); + +#ifdef HAVE_GLIB + HB_UNICODE_FUNCS_IMPLEMENT(glib) +#elif defined(HAVE_ICU) + HB_UNICODE_FUNCS_IMPLEMENT(icu) +#else +#define HB_UNICODE_FUNCS_NIL 1 + HB_UNICODE_FUNCS_IMPLEMENT(nil) +#endif + +#undef HB_UNICODE_FUNCS_IMPLEMENT } +#if !defined(HB_NO_UNICODE_FUNCS) && defined(HB_UNICODE_FUNCS_NIL) +#pragma message("Could not find any Unicode functions implementation, you have to provide your own.") +#pragma message("To suppress this warnings, define HB_NO_UNICODE_FUNCS.") +#endif + hb_unicode_funcs_t * hb_unicode_funcs_create (hb_unicode_funcs_t *parent) { @@ -140,7 +172,6 @@ hb_unicode_funcs_create (hb_unicode_funcs_t *parent) } -extern HB_INTERNAL const hb_unicode_funcs_t _hb_unicode_funcs_nil; const hb_unicode_funcs_t _hb_unicode_funcs_nil = { HB_OBJECT_HEADER_STATIC, diff --git a/src/hb-warning.cc b/src/hb-warning.cc index 8ff4d20e8..01adceac3 100644 --- a/src/hb-warning.cc +++ b/src/hb-warning.cc @@ -37,11 +37,3 @@ #if defined(HB_ATOMIC_INT_NIL) || defined(HB_MUTEX_IMPL_NIL) #pragma message("To suppress these warnings, define HB_NO_MT.") #endif - - -#include "hb-unicode-private.hh" - -#if !defined(HB_NO_UNICODE_FUNCS) && defined(HB_UNICODE_FUNCS_NIL) -#pragma message("Could not find any Unicode functions implementation, you have to provide your own.") -#pragma message("To suppress this warnings, define HB_NO_UNICODE_FUNCS.") -#endif