ICU-13234 collect string & character options bits in new stringoptions.h
X-SVN-Rev: 40162
This commit is contained in:
parent
3975adb564
commit
06a03303cb
@ -23,7 +23,7 @@
|
||||
#include "unicode/bytestream.h"
|
||||
#include "unicode/edits.h"
|
||||
#include "unicode/normalizer2.h"
|
||||
#include "unicode/ucasemap.h" // U_OMIT_UNCHANGED_TEXT
|
||||
#include "unicode/stringoptions.h"
|
||||
#include "unicode/udata.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "unicode/utf16.h"
|
||||
|
@ -61,7 +61,7 @@ enum {
|
||||
/**
|
||||
* Bit mask for getting just the options from a string compare options word
|
||||
* that are relevant for case-insensitive string comparison.
|
||||
* See uchar.h. Also include _STRNCMP_STYLE and U_COMPARE_CODE_POINT_ORDER.
|
||||
* See stringoptions.h. Also include _STRNCMP_STYLE and U_COMPARE_CODE_POINT_ORDER.
|
||||
* @internal
|
||||
*/
|
||||
#define _STRCASECMP_OPTIONS_MASK 0xffff
|
||||
@ -69,7 +69,7 @@ enum {
|
||||
/**
|
||||
* Bit mask for getting just the options from a string compare options word
|
||||
* that are relevant for case folding (of a single string or code point).
|
||||
* See uchar.h.
|
||||
* See stringoptions.h.
|
||||
* @internal
|
||||
*/
|
||||
#define _FOLD_CASE_OPTIONS_MASK 0xff
|
||||
|
@ -11,15 +11,6 @@
|
||||
#include "unicode/ucasemap.h"
|
||||
#include "ucase.h"
|
||||
|
||||
#ifndef U_COMPARE_IGNORE_CASE
|
||||
/* see also unorm.h */
|
||||
/**
|
||||
* Option bit for unorm_compare:
|
||||
* Perform case-insensitive comparison.
|
||||
*/
|
||||
#define U_COMPARE_IGNORE_CASE 0x10000
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Internal API, used by u_strcasecmp() etc.
|
||||
* Compare strings case-insensitively,
|
||||
|
133
icu4c/source/common/unicode/stringoptions.h
Normal file
133
icu4c/source/common/unicode/stringoptions.h
Normal file
@ -0,0 +1,133 @@
|
||||
// © 2017 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
|
||||
// stringoptions.h
|
||||
// created: 2017jun08 Markus W. Scherer
|
||||
|
||||
#ifndef __STRINGOPTIONS_H__
|
||||
#define __STRINGOPTIONS_H__
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: Bit set option bit constants for various string and character processing functions.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Option value for case folding: Use default mappings defined in CaseFolding.txt.
|
||||
*
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#define U_FOLD_CASE_DEFAULT 0
|
||||
|
||||
/**
|
||||
* Option value for case folding:
|
||||
*
|
||||
* Use the modified set of mappings provided in CaseFolding.txt to handle dotted I
|
||||
* and dotless i appropriately for Turkic languages (tr, az).
|
||||
*
|
||||
* Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that
|
||||
* are to be included for default mappings and
|
||||
* excluded for the Turkic-specific mappings.
|
||||
*
|
||||
* Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that
|
||||
* are to be excluded for default mappings and
|
||||
* included for the Turkic-specific mappings.
|
||||
*
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1
|
||||
|
||||
/**
|
||||
* Do not lowercase non-initial parts of words when titlecasing.
|
||||
* Option bit for titlecasing APIs that take an options bit set.
|
||||
*
|
||||
* By default, titlecasing will titlecase the first cased character
|
||||
* of a word and lowercase all other characters.
|
||||
* With this option, the other characters will not be modified.
|
||||
*
|
||||
* @see ucasemap_setOptions
|
||||
* @see ucasemap_toTitle
|
||||
* @see ucasemap_utf8ToTitle
|
||||
* @see UnicodeString::toTitle
|
||||
* @stable ICU 3.8
|
||||
*/
|
||||
#define U_TITLECASE_NO_LOWERCASE 0x100
|
||||
|
||||
/**
|
||||
* Do not adjust the titlecasing indexes from BreakIterator::next() indexes;
|
||||
* titlecase exactly the characters at breaks from the iterator.
|
||||
* Option bit for titlecasing APIs that take an options bit set.
|
||||
*
|
||||
* By default, titlecasing will take each break iterator index,
|
||||
* adjust it by looking for the next cased character, and titlecase that one.
|
||||
* Other characters are lowercased.
|
||||
*
|
||||
* This follows Unicode 4 & 5 section 3.13 Default Case Operations:
|
||||
*
|
||||
* R3 toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
|
||||
* #29, "Text Boundaries." Between each pair of word boundaries, find the first
|
||||
* cased character F. If F exists, map F to default_title(F); then map each
|
||||
* subsequent character C to default_lower(C).
|
||||
*
|
||||
* @see ucasemap_setOptions
|
||||
* @see ucasemap_toTitle
|
||||
* @see ucasemap_utf8ToTitle
|
||||
* @see UnicodeString::toTitle
|
||||
* @see U_TITLECASE_NO_LOWERCASE
|
||||
* @stable ICU 3.8
|
||||
*/
|
||||
#define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Omit unchanged text when recording how source substrings
|
||||
* relate to changed and unchanged result substrings.
|
||||
* Used for example in some case-mapping and normalization functions.
|
||||
*
|
||||
* @see CaseMap
|
||||
* @see Edits
|
||||
* @see Normalizer2
|
||||
* @draft ICU 60
|
||||
*/
|
||||
#define U_OMIT_UNCHANGED_TEXT 0x4000
|
||||
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
|
||||
* Compare strings in code point order instead of code unit order.
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
#define U_COMPARE_CODE_POINT_ORDER 0x8000
|
||||
|
||||
/**
|
||||
* Option bit for unorm_compare:
|
||||
* Perform case-insensitive comparison.
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
#define U_COMPARE_IGNORE_CASE 0x10000
|
||||
|
||||
/**
|
||||
* Option bit for unorm_compare:
|
||||
* Both input strings are assumed to fulfill FCD conditions.
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
#define UNORM_INPUT_IS_FCD 0x20000
|
||||
|
||||
// Related definitions elsewhere.
|
||||
// Options that are not meaningful in the same functions
|
||||
// can share the same bits.
|
||||
//
|
||||
// Public:
|
||||
// unicode/unorm.h #define UNORM_COMPARE_NORM_OPTIONS_SHIFT 20
|
||||
//
|
||||
// Internal: (may change or be removed)
|
||||
// ucase.h #define _STRCASECMP_OPTIONS_MASK 0xffff
|
||||
// ucase.h #define _FOLD_CASE_OPTIONS_MASK 0xff
|
||||
// ustr_imp.h #define _STRNCMP_STYLE 0x1000
|
||||
// unormcmp.cpp #define _COMPARE_EQUIV 0x80000
|
||||
|
||||
#endif // __STRINGOPTIONS_H__
|
@ -23,6 +23,7 @@
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/localpointer.h"
|
||||
#include "unicode/stringoptions.h"
|
||||
#include "unicode/ustring.h"
|
||||
|
||||
/**
|
||||
@ -144,56 +145,6 @@ ucasemap_setLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode);
|
||||
U_STABLE void U_EXPORT2
|
||||
ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Do not lowercase non-initial parts of words when titlecasing.
|
||||
* Option bit for titlecasing APIs that take an options bit set.
|
||||
*
|
||||
* By default, titlecasing will titlecase the first cased character
|
||||
* of a word and lowercase all other characters.
|
||||
* With this option, the other characters will not be modified.
|
||||
*
|
||||
* @see ucasemap_setOptions
|
||||
* @see ucasemap_toTitle
|
||||
* @see ucasemap_utf8ToTitle
|
||||
* @see UnicodeString::toTitle
|
||||
* @stable ICU 3.8
|
||||
*/
|
||||
#define U_TITLECASE_NO_LOWERCASE 0x100
|
||||
|
||||
/**
|
||||
* Do not adjust the titlecasing indexes from BreakIterator::next() indexes;
|
||||
* titlecase exactly the characters at breaks from the iterator.
|
||||
* Option bit for titlecasing APIs that take an options bit set.
|
||||
*
|
||||
* By default, titlecasing will take each break iterator index,
|
||||
* adjust it by looking for the next cased character, and titlecase that one.
|
||||
* Other characters are lowercased.
|
||||
*
|
||||
* This follows Unicode 4 & 5 section 3.13 Default Case Operations:
|
||||
*
|
||||
* R3 toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
|
||||
* #29, "Text Boundaries." Between each pair of word boundaries, find the first
|
||||
* cased character F. If F exists, map F to default_title(F); then map each
|
||||
* subsequent character C to default_lower(C).
|
||||
*
|
||||
* @see ucasemap_setOptions
|
||||
* @see ucasemap_toTitle
|
||||
* @see ucasemap_utf8ToTitle
|
||||
* @see UnicodeString::toTitle
|
||||
* @see U_TITLECASE_NO_LOWERCASE
|
||||
* @stable ICU 3.8
|
||||
*/
|
||||
#define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200
|
||||
|
||||
/**
|
||||
* Omit unchanged text when case-mapping or normalizing with Edits.
|
||||
*
|
||||
* @see CaseMap
|
||||
* @see Edits
|
||||
* @draft ICU 60
|
||||
*/
|
||||
#define U_OMIT_UNCHANGED_TEXT 0x4000
|
||||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
/**
|
||||
|
@ -26,6 +26,7 @@
|
||||
#define UCHAR_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/stringoptions.h"
|
||||
|
||||
U_CDECL_BEGIN
|
||||
|
||||
@ -3569,27 +3570,6 @@ u_toupper(UChar32 c);
|
||||
U_STABLE UChar32 U_EXPORT2
|
||||
u_totitle(UChar32 c);
|
||||
|
||||
/** Option value for case folding: use default mappings defined in CaseFolding.txt. @stable ICU 2.0 */
|
||||
#define U_FOLD_CASE_DEFAULT 0
|
||||
|
||||
/**
|
||||
* Option value for case folding:
|
||||
*
|
||||
* Use the modified set of mappings provided in CaseFolding.txt to handle dotted I
|
||||
* and dotless i appropriately for Turkic languages (tr, az).
|
||||
*
|
||||
* Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that
|
||||
* are to be included for default mappings and
|
||||
* excluded for the Turkic-specific mappings.
|
||||
*
|
||||
* Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that
|
||||
* are to be excluded for default mappings and
|
||||
* included for the Turkic-specific mappings.
|
||||
*
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1
|
||||
|
||||
/**
|
||||
* The given character is mapped to its case folding equivalent according to
|
||||
* UnicodeData.txt and CaseFolding.txt;
|
||||
|
@ -38,16 +38,6 @@
|
||||
|
||||
struct UConverter; // unicode/ucnv.h
|
||||
|
||||
#ifndef U_COMPARE_CODE_POINT_ORDER
|
||||
/* see also ustring.h and unorm.h */
|
||||
/**
|
||||
* Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
|
||||
* Compare strings in code point order instead of code unit order.
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
#define U_COMPARE_CODE_POINT_ORDER 0x8000
|
||||
#endif
|
||||
|
||||
#ifndef USTRING_H
|
||||
/**
|
||||
* \ingroup ustring_ustrlen
|
||||
|
@ -32,6 +32,7 @@
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/localpointer.h"
|
||||
#include "unicode/stringoptions.h"
|
||||
#include "unicode/uset.h"
|
||||
|
||||
/**
|
||||
@ -526,30 +527,6 @@ unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c);
|
||||
U_STABLE UBool U_EXPORT2
|
||||
unorm2_isInert(const UNormalizer2 *norm2, UChar32 c);
|
||||
|
||||
/**
|
||||
* Option bit for unorm_compare:
|
||||
* Both input strings are assumed to fulfill FCD conditions.
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
#define UNORM_INPUT_IS_FCD 0x20000
|
||||
|
||||
/**
|
||||
* Option bit for unorm_compare:
|
||||
* Perform case-insensitive comparison.
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
#define U_COMPARE_IGNORE_CASE 0x10000
|
||||
|
||||
#ifndef U_COMPARE_CODE_POINT_ORDER
|
||||
/* see also unistr.h and ustring.h */
|
||||
/**
|
||||
* Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
|
||||
* Compare strings in code point order instead of code unit order.
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
#define U_COMPARE_CODE_POINT_ORDER 0x8000
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Compares two strings for canonical equivalence.
|
||||
* Further options include case-insensitive comparison and
|
||||
|
@ -497,16 +497,6 @@ u_strCompare(const UChar *s1, int32_t length1,
|
||||
U_STABLE int32_t U_EXPORT2
|
||||
u_strCompareIter(UCharIterator *iter1, UCharIterator *iter2, UBool codePointOrder);
|
||||
|
||||
#ifndef U_COMPARE_CODE_POINT_ORDER
|
||||
/* see also unistr.h and unorm.h */
|
||||
/**
|
||||
* Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
|
||||
* Compare strings in code point order instead of code unit order.
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
#define U_COMPARE_CODE_POINT_ORDER 0x8000
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Compare two strings case-insensitively using full case folding.
|
||||
* This is equivalent to
|
||||
|
@ -13,7 +13,7 @@
|
||||
#include "unicode/uchar.h"
|
||||
#include "unicode/errorcode.h"
|
||||
#include "unicode/normlzr.h"
|
||||
#include "unicode/ucasemap.h" // U_OMIT_UNCHANGED_TEXT
|
||||
#include "unicode/stringoptions.h"
|
||||
#include "unicode/uniset.h"
|
||||
#include "unicode/usetiter.h"
|
||||
#include "unicode/schriter.h"
|
||||
|
Loading…
Reference in New Issue
Block a user