ICU-1438 add unorm_compare

X-SVN-Rev: 8658
This commit is contained in:
Markus Scherer 2002-05-22 00:24:53 +00:00
parent 493e2da8c2
commit 24c5b88bc2

View File

@ -374,7 +374,7 @@ unorm_previous(UCharIterator *src,
UBool doNormalize, UBool *pNeededToNormalize,
UErrorCode *pErrorCode);
/*
/**
* Concatenate normalized strings, making sure that the result is normalized as well.
*
* If both the left and the right strings are in
@ -418,4 +418,92 @@ unorm_concatenate(const UChar *left, int32_t leftLength,
UNormalizationMode mode, int32_t options,
UErrorCode *pErrorCode);
/**
* Option bit for unorm_compare:
* Both input strings are assumed to fulfill FCD conditions.
* @draft ICU 2.2
*/
#define UNORM_INPUT_IS_FCD 0x20000
/**
* Option bit for unorm_compare:
* Perform case-insensitive comparison.
* @draft ICU 2.2
*/
#define U_COMPARE_IGNORE_CASE 0x10000
/**
* Option bit for unorm_compare:
* Compare strings in code point order instead of code unit order.
* @draft ICU 2.2
*/
#define U_COMPARE_CODE_POINT_ORDER 0x8000
/**
* Compare two strings for canonical equivalence.
* Further options include case-insensitive comparison and
* code point order (as opposed to code unit order).
*
* Canonical equivalence between two strings is defined as their normalized
* forms (NFD or NFC) being identical.
* This function compares strings incrementally instead of normalizing
* (and optionally case-folding) both strings entirely,
* improving performance significantly.
*
* Bulk normalization is only necessary if the strings do not fulfill the FCD
* conditions. Only in this case, and only if the strings are relatively long,
* is memory allocated temporarily.
* For FCD strings and short non-FCD strings there is no memory allocation.
*
* Semantically, this is equivalent to
* strcmp[CodePointOrder](foldCase(NFD(s1)), foldCase(NFD(s2)))
* where code point order and foldCase are all optional.
*
* @param s1 First source string.
* @param length1 Length of first source string, or -1 if NUL-terminated.
*
* @param s2 Second source string.
* @param length2 Length of second source string, or -1 if NUL-terminated.
*
* @param options A bit set of options:
* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
* Case-sensitive comparison in code unit order, and the input strings
* are quick-checked for FCD.
*
* - UNORM_INPUT_IS_FCD
* Set if the caller knows that both s1 and s2 fulfill the FCD conditions.
* If not set, the function will quickCheck for FCD
* and normalize if necessary.
*
* - U_COMPARE_CODE_POINT_ORDER
* Set to choose code point order instead of code unit order
* (see u_strcmpCodePointOrder for details).
*
* - U_COMPARE_IGNORE_CASE
* Set to compare strings case-insensitively using case folding,
* instead of case-sensitively.
* If set, then the following case folding options are used.
*
* - Options as used with case-insensitive comparisons, currently:
*
* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
* (see u_strcasecmp for details)
*
* @param pErrorCode ICU error code in/out parameter.
* Must fulfill U_SUCCESS before the function call.
* @return <0 or 0 or >0 as usual for string comparisons
*
* @see unorm_normalize
* @see UNORM_FCD
* @see u_strcasecmp
* @see u_strcmpCodePointOrder
*
* @draft ICU 2.1
*/
U_CAPI int32_t U_EXPORT2
unorm_compare(const UChar *s1, int32_t length1,
const UChar *s2, int32_t length2,
uint32_t options,
UErrorCode *pErrorCode);
#endif