ICU-13234 collect string & character options bits in new stringoptions.h

X-SVN-Rev: 40162
2017-06-08 20:35:40 +00:00 · 2017-06-08 20:35:40 +00:00 · 06a03303cb
commit 06a03303cb
parent 3975adb564
10 changed files with 140 additions and 128 deletions
--- a/icu4c/source/common/normalizer2impl.cpp
+++ b/icu4c/source/common/normalizer2impl.cpp
@ -23,7 +23,7 @@
 #include "unicode/bytestream.h"
 #include "unicode/edits.h"
 #include "unicode/normalizer2.h"
-#include "unicode/ucasemap.h"  // U_OMIT_UNCHANGED_TEXT
+#include "unicode/stringoptions.h"
 #include "unicode/udata.h"
 #include "unicode/ustring.h"
 #include "unicode/utf16.h"
--- a/icu4c/source/common/ucase.h
+++ b/icu4c/source/common/ucase.h
@ -61,7 +61,7 @@ enum {
 /**
 * Bit mask for getting just the options from a string compare options word
 * that are relevant for case-insensitive string comparison.
- * See uchar.h. Also include _STRNCMP_STYLE and U_COMPARE_CODE_POINT_ORDER.
+ * See stringoptions.h. Also include _STRNCMP_STYLE and U_COMPARE_CODE_POINT_ORDER.
 * @internal
 */
 #define _STRCASECMP_OPTIONS_MASK 0xffff
@ -69,7 +69,7 @@ enum {
 /**
 * Bit mask for getting just the options from a string compare options word
 * that are relevant for case folding (of a single string or code point).
- * See uchar.h.
+ * See stringoptions.h.
 * @internal
 */
 #define _FOLD_CASE_OPTIONS_MASK 0xff
--- a/icu4c/source/common/ucasemap_imp.h
+++ b/icu4c/source/common/ucasemap_imp.h
@ -11,15 +11,6 @@
 #include "unicode/ucasemap.h"
 #include "ucase.h"

-#ifndef U_COMPARE_IGNORE_CASE
-/* see also unorm.h */
-/**
- * Option bit for unorm_compare:
- * Perform case-insensitive comparison.
- */
-#define U_COMPARE_IGNORE_CASE       0x10000
-#endif
-
 /**
 * Internal API, used by u_strcasecmp() etc.
 * Compare strings case-insensitively,
--- a/icu4c/source/common/unicode/stringoptions.h
+++ b/icu4c/source/common/unicode/stringoptions.h
@ -0,0 +1,133 @@
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+// stringoptions.h
+// created: 2017jun08 Markus W. Scherer
+
+#ifndef __STRINGOPTIONS_H__
+#define __STRINGOPTIONS_H__
+
+#include "unicode/utypes.h"
+
+/**
+ * \file
+ * \brief C API: Bit set option bit constants for various string and character processing functions.
+ */
+
+/**
+ * Option value for case folding: Use default mappings defined in CaseFolding.txt.
+ *
+ * @stable ICU 2.0
+ */
+#define U_FOLD_CASE_DEFAULT 0
+
+/**
+ * Option value for case folding:
+ *
+ * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I
+ * and dotless i appropriately for Turkic languages (tr, az).
+ *
+ * Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that
+ * are to be included for default mappings and
+ * excluded for the Turkic-specific mappings.
+ *
+ * Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that
+ * are to be excluded for default mappings and
+ * included for the Turkic-specific mappings.
+ *
+ * @stable ICU 2.0
+ */
+#define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1
+
+/**
+ * Do not lowercase non-initial parts of words when titlecasing.
+ * Option bit for titlecasing APIs that take an options bit set.
+ *
+ * By default, titlecasing will titlecase the first cased character
+ * of a word and lowercase all other characters.
+ * With this option, the other characters will not be modified.
+ *
+ * @see ucasemap_setOptions
+ * @see ucasemap_toTitle
+ * @see ucasemap_utf8ToTitle
+ * @see UnicodeString::toTitle
+ * @stable ICU 3.8
+ */
+#define U_TITLECASE_NO_LOWERCASE 0x100
+
+/**
+ * Do not adjust the titlecasing indexes from BreakIterator::next() indexes;
+ * titlecase exactly the characters at breaks from the iterator.
+ * Option bit for titlecasing APIs that take an options bit set.
+ *
+ * By default, titlecasing will take each break iterator index,
+ * adjust it by looking for the next cased character, and titlecase that one.
+ * Other characters are lowercased.
+ *
+ * This follows Unicode 4 & 5 section 3.13 Default Case Operations:
+ *
+ * R3  toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
+ * #29, "Text Boundaries." Between each pair of word boundaries, find the first
+ * cased character F. If F exists, map F to default_title(F); then map each
+ * subsequent character C to default_lower(C).
+ *
+ * @see ucasemap_setOptions
+ * @see ucasemap_toTitle
+ * @see ucasemap_utf8ToTitle
+ * @see UnicodeString::toTitle
+ * @see U_TITLECASE_NO_LOWERCASE
+ * @stable ICU 3.8
+ */
+#define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200
+
+#ifndef U_HIDE_DRAFT_API
+
+/**
+ * Omit unchanged text when recording how source substrings
+ * relate to changed and unchanged result substrings.
+ * Used for example in some case-mapping and normalization functions.
+ *
+ * @see CaseMap
+ * @see Edits
+ * @see Normalizer2
+ * @draft ICU 60
+ */
+#define U_OMIT_UNCHANGED_TEXT 0x4000
+
+#endif  // U_HIDE_DRAFT_API
+
+/**
+ * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
+ * Compare strings in code point order instead of code unit order.
+ * @stable ICU 2.2
+ */
+#define U_COMPARE_CODE_POINT_ORDER  0x8000
+
+/**
+ * Option bit for unorm_compare:
+ * Perform case-insensitive comparison.
+ * @stable ICU 2.2
+ */
+#define U_COMPARE_IGNORE_CASE       0x10000
+
+/**
+ * Option bit for unorm_compare:
+ * Both input strings are assumed to fulfill FCD conditions.
+ * @stable ICU 2.2
+ */
+#define UNORM_INPUT_IS_FCD          0x20000
+
+// Related definitions elsewhere.
+// Options that are not meaningful in the same functions
+// can share the same bits.
+//
+// Public:
+// unicode/unorm.h #define UNORM_COMPARE_NORM_OPTIONS_SHIFT 20
+//
+// Internal: (may change or be removed)
+// ucase.h #define _STRCASECMP_OPTIONS_MASK 0xffff
+// ucase.h #define _FOLD_CASE_OPTIONS_MASK 0xff
+// ustr_imp.h #define _STRNCMP_STYLE 0x1000
+// unormcmp.cpp #define _COMPARE_EQUIV 0x80000
+
+#endif  // __STRINGOPTIONS_H__
--- a/icu4c/source/common/unicode/ucasemap.h
+++ b/icu4c/source/common/unicode/ucasemap.h
@ -23,6 +23,7 @@

 #include "unicode/utypes.h"
 #include "unicode/localpointer.h"
+#include "unicode/stringoptions.h"
 #include "unicode/ustring.h"

 /**
@ -144,56 +145,6 @@ ucasemap_setLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode);
 U_STABLE void U_EXPORT2
 ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode *pErrorCode);

-/**
- * Do not lowercase non-initial parts of words when titlecasing.
- * Option bit for titlecasing APIs that take an options bit set.
- *
- * By default, titlecasing will titlecase the first cased character
- * of a word and lowercase all other characters.
- * With this option, the other characters will not be modified.
- *
- * @see ucasemap_setOptions
- * @see ucasemap_toTitle
- * @see ucasemap_utf8ToTitle
- * @see UnicodeString::toTitle
- * @stable ICU 3.8
- */
-#define U_TITLECASE_NO_LOWERCASE 0x100
-
-/**
- * Do not adjust the titlecasing indexes from BreakIterator::next() indexes;
- * titlecase exactly the characters at breaks from the iterator.
- * Option bit for titlecasing APIs that take an options bit set.
- *
- * By default, titlecasing will take each break iterator index,
- * adjust it by looking for the next cased character, and titlecase that one.
- * Other characters are lowercased.
- *
- * This follows Unicode 4 & 5 section 3.13 Default Case Operations:
- *
- * R3  toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
- * #29, "Text Boundaries." Between each pair of word boundaries, find the first
- * cased character F. If F exists, map F to default_title(F); then map each
- * subsequent character C to default_lower(C).
- *
- * @see ucasemap_setOptions
- * @see ucasemap_toTitle
- * @see ucasemap_utf8ToTitle
- * @see UnicodeString::toTitle
- * @see U_TITLECASE_NO_LOWERCASE
- * @stable ICU 3.8
- */
-#define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200
-
-/**
- * Omit unchanged text when case-mapping or normalizing with Edits.
- *
- * @see CaseMap
- * @see Edits
- * @draft ICU 60
- */
-#define U_OMIT_UNCHANGED_TEXT 0x4000
-
 #if !UCONFIG_NO_BREAK_ITERATION

 /**
--- a/icu4c/source/common/unicode/uchar.h
+++ b/icu4c/source/common/unicode/uchar.h
@ -26,6 +26,7 @@
 #define UCHAR_H

 #include "unicode/utypes.h"
+#include "unicode/stringoptions.h"

 U_CDECL_BEGIN

@ -3569,27 +3570,6 @@ u_toupper(UChar32 c);
 U_STABLE UChar32 U_EXPORT2
 u_totitle(UChar32 c);

-/** Option value for case folding: use default mappings defined in CaseFolding.txt. @stable ICU 2.0 */
-#define U_FOLD_CASE_DEFAULT 0
-
-/**
- * Option value for case folding:
- *
- * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I
- * and dotless i appropriately for Turkic languages (tr, az).
- *
- * Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that
- * are to be included for default mappings and
- * excluded for the Turkic-specific mappings.
- *
- * Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that
- * are to be excluded for default mappings and
- * included for the Turkic-specific mappings.
- *
- * @stable ICU 2.0
- */
-#define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1
-
 /**
 * The given character is mapped to its case folding equivalent according to
 * UnicodeData.txt and CaseFolding.txt;
--- a/icu4c/source/common/unicode/unistr.h
+++ b/icu4c/source/common/unicode/unistr.h
@ -38,16 +38,6 @@

 struct UConverter;          // unicode/ucnv.h

-#ifndef U_COMPARE_CODE_POINT_ORDER
-/* see also ustring.h and unorm.h */
-/**
- * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
- * Compare strings in code point order instead of code unit order.
- * @stable ICU 2.2
- */
-#define U_COMPARE_CODE_POINT_ORDER  0x8000
-#endif
-
 #ifndef USTRING_H
 /**
 * \ingroup ustring_ustrlen
--- a/icu4c/source/common/unicode/unorm2.h
+++ b/icu4c/source/common/unicode/unorm2.h
@ -32,6 +32,7 @@

 #include "unicode/utypes.h"
 #include "unicode/localpointer.h"
+#include "unicode/stringoptions.h"
 #include "unicode/uset.h"

 /**
@ -526,30 +527,6 @@ unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c);
 U_STABLE UBool U_EXPORT2
 unorm2_isInert(const UNormalizer2 *norm2, UChar32 c);

-/**
- * Option bit for unorm_compare:
- * Both input strings are assumed to fulfill FCD conditions.
- * @stable ICU 2.2
- */
-#define UNORM_INPUT_IS_FCD          0x20000
-
-/**
- * Option bit for unorm_compare:
- * Perform case-insensitive comparison.
- * @stable ICU 2.2
- */
-#define U_COMPARE_IGNORE_CASE       0x10000
-
-#ifndef U_COMPARE_CODE_POINT_ORDER
-/* see also unistr.h and ustring.h */
-/**
- * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
- * Compare strings in code point order instead of code unit order.
- * @stable ICU 2.2
- */
-#define U_COMPARE_CODE_POINT_ORDER  0x8000
-#endif
-
 /**
 * Compares two strings for canonical equivalence.
 * Further options include case-insensitive comparison and
--- a/icu4c/source/common/unicode/ustring.h
+++ b/icu4c/source/common/unicode/ustring.h
@ -497,16 +497,6 @@ u_strCompare(const UChar *s1, int32_t length1,
 U_STABLE int32_t U_EXPORT2
 u_strCompareIter(UCharIterator *iter1, UCharIterator *iter2, UBool codePointOrder);

-#ifndef U_COMPARE_CODE_POINT_ORDER
-/* see also unistr.h and unorm.h */
-/**
- * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
- * Compare strings in code point order instead of code unit order.
- * @stable ICU 2.2
- */
-#define U_COMPARE_CODE_POINT_ORDER  0x8000
-#endif
-
 /**
 * Compare two strings case-insensitively using full case folding.
 * This is equivalent to
--- a/icu4c/source/test/intltest/tstnorm.cpp
+++ b/icu4c/source/test/intltest/tstnorm.cpp
@ -13,7 +13,7 @@
 #include "unicode/uchar.h"
 #include "unicode/errorcode.h"
 #include "unicode/normlzr.h"
-#include "unicode/ucasemap.h"  // U_OMIT_UNCHANGED_TEXT
+#include "unicode/stringoptions.h"
 #include "unicode/uniset.h"
 #include "unicode/usetiter.h"
 #include "unicode/schriter.h"