ICU-1438 add unorm_compare

X-SVN-Rev: 8658
2002-05-22 00:24:53 +00:00 · 2002-05-22 00:24:53 +00:00 · 24c5b88bc2
commit 24c5b88bc2
parent 493e2da8c2
1 changed files with 89 additions and 1 deletions
--- a/icu4c/source/common/unicode/unorm.h
+++ b/icu4c/source/common/unicode/unorm.h
@ -374,7 +374,7 @@ unorm_previous(UCharIterator *src,
               UBool doNormalize, UBool *pNeededToNormalize,
               UErrorCode *pErrorCode);

-/*
+/**
 * Concatenate normalized strings, making sure that the result is normalized as well.
 *
 * If both the left and the right strings are in
@ -418,4 +418,92 @@ unorm_concatenate(const UChar *left, int32_t leftLength,
                  UNormalizationMode mode, int32_t options,
                  UErrorCode *pErrorCode);

+/**
+ * Option bit for unorm_compare:
+ * Both input strings are assumed to fulfill FCD conditions.
+ * @draft ICU 2.2
+ */
+#define UNORM_INPUT_IS_FCD          0x20000
+
+/**
+ * Option bit for unorm_compare:
+ * Perform case-insensitive comparison.
+ * @draft ICU 2.2
+ */
+#define U_COMPARE_IGNORE_CASE       0x10000
+
+/**
+ * Option bit for unorm_compare:
+ * Compare strings in code point order instead of code unit order.
+ * @draft ICU 2.2
+ */
+#define U_COMPARE_CODE_POINT_ORDER  0x8000
+
+/**
+ * Compare two strings for canonical equivalence.
+ * Further options include case-insensitive comparison and
+ * code point order (as opposed to code unit order).
+ *
+ * Canonical equivalence between two strings is defined as their normalized
+ * forms (NFD or NFC) being identical.
+ * This function compares strings incrementally instead of normalizing
+ * (and optionally case-folding) both strings entirely,
+ * improving performance significantly.
+ *
+ * Bulk normalization is only necessary if the strings do not fulfill the FCD
+ * conditions. Only in this case, and only if the strings are relatively long,
+ * is memory allocated temporarily.
+ * For FCD strings and short non-FCD strings there is no memory allocation.
+ *
+ * Semantically, this is equivalent to
+ *   strcmp[CodePointOrder](foldCase(NFD(s1)), foldCase(NFD(s2)))
+ * where code point order and foldCase are all optional.
+ *
+ * @param s1 First source string.
+ * @param length1 Length of first source string, or -1 if NUL-terminated.
+ *
+ * @param s2 Second source string.
+ * @param length2 Length of second source string, or -1 if NUL-terminated.
+ *
+ * @param options A bit set of options:
+ *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ *     Case-sensitive comparison in code unit order, and the input strings
+ *     are quick-checked for FCD.
+ *
+ *   - UNORM_INPUT_IS_FCD
+ *     Set if the caller knows that both s1 and s2 fulfill the FCD conditions.
+ *     If not set, the function will quickCheck for FCD
+ *     and normalize if necessary.
+ *
+ *   - U_COMPARE_CODE_POINT_ORDER
+ *     Set to choose code point order instead of code unit order
+ *     (see u_strcmpCodePointOrder for details).
+ *
+ *   - U_COMPARE_IGNORE_CASE
+ *     Set to compare strings case-insensitively using case folding,
+ *     instead of case-sensitively.
+ *     If set, then the following case folding options are used.
+ *
+ *   - Options as used with case-insensitive comparisons, currently:
+ *
+ *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ *    (see u_strcasecmp for details)
+ *
+ * @param pErrorCode ICU error code in/out parameter.
+ *                   Must fulfill U_SUCCESS before the function call.
+ * @return <0 or 0 or >0 as usual for string comparisons
+ *
+ * @see unorm_normalize
+ * @see UNORM_FCD
+ * @see u_strcasecmp
+ * @see u_strcmpCodePointOrder
+ *
+ * @draft ICU 2.1
+ */
+U_CAPI int32_t U_EXPORT2
+unorm_compare(const UChar *s1, int32_t length1,
+              const UChar *s2, int32_t length2,
+              uint32_t options,
+              UErrorCode *pErrorCode);
+
 #endif