1999-12-28 23:57:50 +00:00
|
|
|
|
/*
|
|
|
|
|
*******************************************************************************
|
2001-03-03 04:06:43 +00:00
|
|
|
|
* Copyright (c) {1996-1999}, International Business Machines Corporation and others. All Rights Reserved.
|
1999-12-28 23:57:50 +00:00
|
|
|
|
*******************************************************************************
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#ifndef UCOL_H
|
|
|
|
|
#define UCOL_H
|
|
|
|
|
|
|
|
|
|
#include "unicode/utypes.h"
|
2000-12-06 00:53:48 +00:00
|
|
|
|
#include "unicode/unorm.h"
|
2001-01-15 19:02:30 +00:00
|
|
|
|
|
1999-12-28 23:57:50 +00:00
|
|
|
|
/**
|
2000-12-08 18:46:55 +00:00
|
|
|
|
* \file
|
2000-12-15 04:02:27 +00:00
|
|
|
|
* \brief C API: Collator
|
2000-12-08 18:46:55 +00:00
|
|
|
|
*
|
|
|
|
|
* <h2> Collator C API </h2>
|
1999-12-28 23:57:50 +00:00
|
|
|
|
*
|
|
|
|
|
* The C API for Collator performs locale-sensitive
|
2000-12-15 19:18:27 +00:00
|
|
|
|
* string comparison. You use this class to build
|
1999-12-28 23:57:50 +00:00
|
|
|
|
* searching and sorting routines for natural language text.
|
2000-12-15 19:18:27 +00:00
|
|
|
|
* <em>Important: </em>The ICU collation implementation is being reworked.
|
|
|
|
|
* This means that collation results and especially sort keys will change
|
|
|
|
|
* from ICU 1.6 to 1.7 and again to 1.8.
|
|
|
|
|
* For details, see the <a href="http://oss.software.ibm.com/icu/develop/ICU_collation_design.htm">collation design document</a>.
|
1999-12-28 23:57:50 +00:00
|
|
|
|
*
|
|
|
|
|
* <p>
|
|
|
|
|
* Like other locale-sensitive classes, you can use the function
|
|
|
|
|
* <code>ucol_open()</code>, to obtain the appropriate pointer to
|
|
|
|
|
* <code>UCollator</code> object for a given locale. If you need
|
|
|
|
|
* to understand the details of a particular collation strategy or
|
|
|
|
|
* if you need to modify that strategy.
|
|
|
|
|
*
|
|
|
|
|
* <p>
|
|
|
|
|
* The following example shows how to compare two strings using
|
|
|
|
|
* the <code>UCollator</code> for the default locale.
|
|
|
|
|
* <blockquote>
|
|
|
|
|
* <pre>
|
2000-12-08 18:46:55 +00:00
|
|
|
|
* \code
|
1999-12-28 23:57:50 +00:00
|
|
|
|
* // Compare two strings in the default locale
|
|
|
|
|
* UErrorCode success = U_ZERO_ERROR;
|
|
|
|
|
* UCollator* myCollator = ucol_open(NULL, &success);
|
|
|
|
|
* UChar source[4], target[4];
|
|
|
|
|
* u_uastrcpy(source, "abc");
|
|
|
|
|
* u_uastrcpy(target, "ABC");
|
|
|
|
|
* if( u_strcoll(myCollator, source, u_strlen(source), target, u_strlen(target)) == UCOL_LESS) {
|
|
|
|
|
* printf("abc is less than ABC\n");
|
|
|
|
|
* }else{
|
|
|
|
|
* printf("abc is greater than or equal to ABC\n");
|
|
|
|
|
* }
|
2000-12-08 18:46:55 +00:00
|
|
|
|
* \endcode
|
1999-12-28 23:57:50 +00:00
|
|
|
|
* </pre>
|
|
|
|
|
* </blockquote>
|
|
|
|
|
*
|
|
|
|
|
* <p>
|
|
|
|
|
* You can set a <code>Collator</code>'s <em>strength</em> property
|
|
|
|
|
* to determine the level of difference considered significant in
|
|
|
|
|
* comparisons. Four strengths are provided: <code>UCOL_PRIMARY</code>,
|
|
|
|
|
* <code>UCOL_SECONDARY</code>, <code>UCOL_TERTIARY</code>, and
|
|
|
|
|
* <code>UCOL_IDENTICAL</code>. The exact assignment of strengths to
|
|
|
|
|
* language features is locale dependant. For example, in Czech,
|
|
|
|
|
* "e" and "f" are considered primary differences, while "e" and "\u00EA"
|
|
|
|
|
* are secondary differences, "e" and "E" are tertiary differences and
|
|
|
|
|
* "e" and "e" are identical.
|
|
|
|
|
* The following shows how both case and accents could be ignored for
|
|
|
|
|
* US English.
|
|
|
|
|
* <blockquote>
|
|
|
|
|
* <pre>
|
2000-12-08 18:46:55 +00:00
|
|
|
|
* \code
|
1999-12-28 23:57:50 +00:00
|
|
|
|
* //Get the Collator for US English and set its strength to UCOL_PRIMARY
|
|
|
|
|
* UErrorCode success = U_ZERO_ERROR;
|
|
|
|
|
* UCollator* usCollator = ucol_open("en_US", &success);
|
|
|
|
|
* ucol_setStrength(usCollator, UCOL_PRIMARY);
|
|
|
|
|
* UChar source[4], target[4];
|
|
|
|
|
* u_uastrcpy(source, "abc");
|
|
|
|
|
* u_uastrcpy(target, "ABC");
|
|
|
|
|
* if( u_strcoll(myCollator, source, u_strlen(source), target, u_strlen(target)) == UCOL_EQUAL) {
|
|
|
|
|
* printf("'abc' and 'ABC' strings are equivalent with strength UCOL_PRIMARY\n");
|
|
|
|
|
* }
|
2000-12-08 18:46:55 +00:00
|
|
|
|
* \endcode
|
1999-12-28 23:57:50 +00:00
|
|
|
|
* </pre>
|
|
|
|
|
* </blockquote>
|
|
|
|
|
* <p>
|
2000-12-15 19:18:27 +00:00
|
|
|
|
* For comparing strings exactly once, the <code>u_strcoll</code>
|
1999-12-28 23:57:50 +00:00
|
|
|
|
* method provides the best performance. When sorting a list of
|
2000-12-15 19:18:27 +00:00
|
|
|
|
* strings however, it is generally necessary to compare each
|
|
|
|
|
* string multiple times. In this case, sort keys
|
|
|
|
|
* provide better performance. The <code>ucol_getSortKey</code> method converts
|
|
|
|
|
* a string to a series of bytes that can be compared bitwise
|
|
|
|
|
* against other sort keys using <code>strcmp()</code>.
|
|
|
|
|
* Sort keys are written as zero-terminated byte strings.
|
|
|
|
|
* They consist of several substrings, one for each collation strength level,
|
|
|
|
|
* that are delimited by 0x01 bytes.
|
|
|
|
|
* If the string code points are appended for UCOL_IDENTICAL, then they are processed
|
|
|
|
|
* for correct code point order comparison and may contain 0x01 bytes
|
|
|
|
|
* but not zero bytes.</p>
|
1999-12-28 23:57:50 +00:00
|
|
|
|
* <p>
|
|
|
|
|
* <strong>Note:</strong> <code>UCollator</code>s with different Locale,
|
|
|
|
|
* Collation Strength and Decomposition Mode settings will return different
|
|
|
|
|
* sort orders for the same set of strings. Locales have specific
|
|
|
|
|
* collation rules, and the way in which secondary and tertiary differences
|
|
|
|
|
* are taken into account, for example, will result in a different sorting order
|
|
|
|
|
* for same strings.
|
|
|
|
|
* <p>
|
|
|
|
|
* @see UCollationResult
|
|
|
|
|
* @see UNormalizationMode
|
|
|
|
|
* @see UCollationStrength
|
|
|
|
|
* @see UCollationElements
|
|
|
|
|
*/
|
2000-12-14 01:11:11 +00:00
|
|
|
|
struct collIterate;
|
|
|
|
|
typedef struct collIterate collIterate;
|
|
|
|
|
|
|
|
|
|
struct incrementalContext;
|
|
|
|
|
typedef struct incrementalContext incrementalContext;
|
1999-12-28 23:57:50 +00:00
|
|
|
|
|
2001-01-15 19:02:30 +00:00
|
|
|
|
/** A collator.
|
|
|
|
|
* For usage in C programs.
|
|
|
|
|
*/
|
2001-01-15 07:28:54 +00:00
|
|
|
|
struct UCollator;
|
|
|
|
|
typedef struct UCollator UCollator;
|
2001-01-04 00:45:41 +00:00
|
|
|
|
|
1999-12-28 23:57:50 +00:00
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* UCOL_LESS is returned if source string is compared to be less than target
|
|
|
|
|
* string in the u_strcoll() method.
|
|
|
|
|
* UCOL_EQUAL is returned if source string is compared to be equal to target
|
|
|
|
|
* string in the u_strcoll() method.
|
|
|
|
|
* UCOL_GREATER is returned if source string is compared to be greater than
|
|
|
|
|
* target string in the u_strcoll() method.
|
|
|
|
|
* @see u_strcoll()
|
|
|
|
|
**/
|
|
|
|
|
/** Possible values for a comparison result */
|
2000-12-06 00:53:48 +00:00
|
|
|
|
typedef enum {
|
1999-12-28 23:57:50 +00:00
|
|
|
|
/** string a == string b */
|
|
|
|
|
UCOL_EQUAL = 0,
|
|
|
|
|
/** string a > string b */
|
|
|
|
|
UCOL_GREATER = 1,
|
|
|
|
|
/** string a < string b */
|
|
|
|
|
UCOL_LESS = -1
|
2000-12-06 00:53:48 +00:00
|
|
|
|
} UCollationResult ;
|
2000-11-29 04:02:53 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
typedef enum {
|
|
|
|
|
/* accepted by most attributes */
|
|
|
|
|
UCOL_DEFAULT = -1,
|
2000-12-06 00:53:48 +00:00
|
|
|
|
|
2000-11-30 23:20:14 +00:00
|
|
|
|
/* for UCOL_STRENGTH */
|
|
|
|
|
/** Primary collation strength */
|
|
|
|
|
UCOL_PRIMARY = 0,
|
|
|
|
|
/** Secondary collation strength */
|
|
|
|
|
UCOL_SECONDARY = 1,
|
|
|
|
|
/** Tertiary collation strength */
|
|
|
|
|
UCOL_TERTIARY = 2,
|
2000-12-06 00:53:48 +00:00
|
|
|
|
/** Default collation strength */
|
2000-11-30 23:20:14 +00:00
|
|
|
|
UCOL_DEFAULT_STRENGTH = UCOL_TERTIARY,
|
2001-02-21 17:45:06 +00:00
|
|
|
|
UCOL_CE_STRENGTH_LIMIT,
|
2000-11-30 23:20:14 +00:00
|
|
|
|
/** Quaternary collation strength */
|
|
|
|
|
UCOL_QUATERNARY=3,
|
|
|
|
|
/** Identical collation strength */
|
|
|
|
|
UCOL_IDENTICAL=15,
|
2001-02-21 17:45:06 +00:00
|
|
|
|
UCOL_STRENGTH_LIMIT,
|
2000-11-30 23:20:14 +00:00
|
|
|
|
|
2000-12-06 00:53:48 +00:00
|
|
|
|
/* for UCOL_FRENCH_COLLATION, UCOL_CASE_LEVEL & UCOL_DECOMPOSITION_MODE*/
|
2000-11-30 23:20:14 +00:00
|
|
|
|
UCOL_OFF = 16,
|
|
|
|
|
UCOL_ON = 17,
|
|
|
|
|
|
2000-11-29 04:02:53 +00:00
|
|
|
|
/* for UCOL_ALTERNATE_HANDLING */
|
2000-12-06 00:53:48 +00:00
|
|
|
|
UCOL_SHIFTED = 20,
|
|
|
|
|
UCOL_NON_IGNORABLE = 21,
|
2000-11-30 23:20:14 +00:00
|
|
|
|
|
2000-11-29 04:02:53 +00:00
|
|
|
|
/* for UCOL_CASE_FIRST */
|
2000-12-06 00:53:48 +00:00
|
|
|
|
UCOL_LOWER_FIRST = 24,
|
|
|
|
|
UCOL_UPPER_FIRST = 25,
|
2000-11-30 23:20:14 +00:00
|
|
|
|
|
2000-11-29 04:02:53 +00:00
|
|
|
|
/* for UCOL_NORMALIZATION_MODE */
|
2000-12-06 00:53:48 +00:00
|
|
|
|
UCOL_ON_WITHOUT_HANGUL = 28,
|
|
|
|
|
|
|
|
|
|
/** No more attribute values after this*/
|
2000-11-29 04:02:53 +00:00
|
|
|
|
UCOL_ATTRIBUTE_VALUE_COUNT
|
2000-11-30 23:20:14 +00:00
|
|
|
|
|
2000-11-29 04:02:53 +00:00
|
|
|
|
} UColAttributeValue;
|
|
|
|
|
|
1999-12-28 23:57:50 +00:00
|
|
|
|
/**
|
|
|
|
|
* Base letter represents a primary difference. Set comparison
|
|
|
|
|
* level to UCOL_PRIMARY to ignore secondary and tertiary differences.
|
|
|
|
|
* Use this to set the strength of a Collator object.
|
|
|
|
|
* Example of primary difference, "abc" < "abd"
|
|
|
|
|
*
|
|
|
|
|
* Diacritical differences on the same base letter represent a secondary
|
|
|
|
|
* difference. Set comparison level to UCOL_SECONDARY to ignore tertiary
|
|
|
|
|
* differences. Use this to set the strength of a Collator object.
|
|
|
|
|
* Example of secondary difference, "<EFBFBD>" >> "a".
|
|
|
|
|
*
|
|
|
|
|
* Uppercase and lowercase versions of the same character represents a
|
|
|
|
|
* tertiary difference. Set comparison level to UCOL_TERTIARY to include
|
|
|
|
|
* all comparison differences. Use this to set the strength of a Collator
|
|
|
|
|
* object.
|
|
|
|
|
* Example of tertiary difference, "abc" <<< "ABC".
|
|
|
|
|
*
|
|
|
|
|
* Two characters are considered "identical" when they have the same
|
|
|
|
|
* unicode spellings. UCOL_IDENTICAL.
|
|
|
|
|
* For example, "<EFBFBD>" == "<EFBFBD>".
|
|
|
|
|
*
|
|
|
|
|
* UCollationStrength is also used to determine the strength of sort keys
|
2001-03-13 07:29:58 +00:00
|
|
|
|
* generated from UCollator objects
|
1999-12-28 23:57:50 +00:00
|
|
|
|
**/
|
2000-11-29 04:02:53 +00:00
|
|
|
|
/** Possible collation strengths - all under UColAttributeValue*/
|
|
|
|
|
typedef UColAttributeValue UCollationStrength;
|
2000-11-29 00:16:15 +00:00
|
|
|
|
|
2000-11-29 04:02:53 +00:00
|
|
|
|
typedef enum {
|
2000-11-29 00:16:15 +00:00
|
|
|
|
UCOL_FRENCH_COLLATION, /* attribute for direction of secondary weights*/
|
|
|
|
|
UCOL_ALTERNATE_HANDLING, /* attribute for handling variable elements*/
|
|
|
|
|
UCOL_CASE_FIRST, /* who goes first, lower case or uppercase */
|
|
|
|
|
UCOL_CASE_LEVEL, /* do we have an extra case level */
|
|
|
|
|
UCOL_NORMALIZATION_MODE, /* attribute for normalization */
|
|
|
|
|
UCOL_STRENGTH, /* attribute for strength */
|
|
|
|
|
UCOL_ATTRIBUTE_COUNT
|
2000-11-29 04:02:53 +00:00
|
|
|
|
} UColAttribute;
|
1999-12-28 23:57:50 +00:00
|
|
|
|
|
2000-12-14 01:11:11 +00:00
|
|
|
|
typedef enum {
|
|
|
|
|
UCOL_TAILORING_ONLY,
|
|
|
|
|
UCOL_FULL_RULES
|
|
|
|
|
} UColRuleOption ;
|
|
|
|
|
|
1999-12-28 23:57:50 +00:00
|
|
|
|
/**
|
2001-03-13 07:29:58 +00:00
|
|
|
|
* Open a UCollator for comparing strings.
|
|
|
|
|
* The UCollator may be used in calls to \Ref{ucol_strcoll}.
|
1999-12-28 23:57:50 +00:00
|
|
|
|
* @param loc The locale containing the comparison conventions.
|
|
|
|
|
* @param status A pointer to an UErrorCode to receive any errors
|
|
|
|
|
* @return A pointer to a UCollator, or 0 if an error occurred.
|
|
|
|
|
* @see ucol_openRules
|
2000-03-22 19:19:33 +00:00
|
|
|
|
* @stable
|
1999-12-28 23:57:50 +00:00
|
|
|
|
*/
|
2001-01-15 07:28:54 +00:00
|
|
|
|
|
1999-12-28 23:57:50 +00:00
|
|
|
|
U_CAPI UCollator*
|
|
|
|
|
ucol_open( const char *loc,
|
|
|
|
|
UErrorCode *status);
|
|
|
|
|
|
2001-03-14 00:22:56 +00:00
|
|
|
|
/**
|
|
|
|
|
* Open a UCollator with a specific version.
|
|
|
|
|
* This is the same as ucol_open() except that ucol_getVersion() of
|
|
|
|
|
* the returned object is guaranteed to be the same as the version
|
|
|
|
|
* parameter.
|
|
|
|
|
* This is designed to be used to open the same collator for a given
|
|
|
|
|
* locale even when ICU is updated.
|
|
|
|
|
* The same locale and version guarantees the same sort keys and
|
|
|
|
|
* comparison results.
|
|
|
|
|
*
|
|
|
|
|
* @param loc The locale ID for which to open a collator.
|
|
|
|
|
* @param version The requested collator version.
|
|
|
|
|
* @param status A pointer to a UErrorCode,
|
|
|
|
|
* must not indicate a failure before calling this function.
|
|
|
|
|
* @return A pointer to a UCollator, or NULL if an error occurred
|
|
|
|
|
* or a collator with the requested version is not available.
|
|
|
|
|
*
|
|
|
|
|
* @see ucol_open
|
|
|
|
|
* @see ucol_getVersion
|
|
|
|
|
* @draft ICU 1.8
|
|
|
|
|
*/
|
|
|
|
|
U_CAPI UCollator * U_EXPORT2
|
|
|
|
|
ucol_openVersion(const char *loc,
|
|
|
|
|
UVersionInfo version,
|
|
|
|
|
UErrorCode *status);
|
|
|
|
|
|
1999-12-28 23:57:50 +00:00
|
|
|
|
/**
|
2001-03-13 07:29:58 +00:00
|
|
|
|
* Open a UCollator for comparing strings.
|
|
|
|
|
* The UCollator may be used in calls to \Ref{ucol_strcoll}.
|
1999-12-28 23:57:50 +00:00
|
|
|
|
* @param rules A string describing the collation rules.
|
|
|
|
|
* @param rulesLength The length of rules, or -1 if null-terminated.
|
|
|
|
|
* @param mode The normalization mode; one of UCOL_NO_NORMALIZATION,
|
|
|
|
|
* UCOL_CAN_DECOMP, UCOL_COMPAT_DECOMP, UCOL_CAN_DECOMP_COMPAT_COMP,
|
|
|
|
|
* UCOL_COMPAT_DECOMP_CAN_COMP, UCOL_DEFAULT_NORMALIZATION
|
|
|
|
|
* @param strength The collation strength; one of UCOL_PRIMARY, UCOL_SECONDARY,
|
|
|
|
|
* UCOL_TERTIARY, UCOL_IDENTICAL,UCOL_DEFAULT_STRENGTH
|
|
|
|
|
* @param status A pointer to an UErrorCode to receive any errors
|
|
|
|
|
* @return A pointer to a UCollator, or 0 if an error occurred.
|
|
|
|
|
* @see ucol_open
|
2000-03-22 19:19:33 +00:00
|
|
|
|
* @stable
|
1999-12-28 23:57:50 +00:00
|
|
|
|
*/
|
|
|
|
|
U_CAPI UCollator*
|
|
|
|
|
ucol_openRules( const UChar *rules,
|
|
|
|
|
int32_t rulesLength,
|
|
|
|
|
UNormalizationMode mode,
|
|
|
|
|
UCollationStrength strength,
|
|
|
|
|
UErrorCode *status);
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Close a UCollator.
|
2001-03-13 07:29:58 +00:00
|
|
|
|
* Once closed, a UCollator should not be used.
|
|
|
|
|
* @param coll The UCollator to close.
|
2000-03-22 19:19:33 +00:00
|
|
|
|
* @stable
|
1999-12-28 23:57:50 +00:00
|
|
|
|
*/
|
|
|
|
|
U_CAPI void
|
|
|
|
|
ucol_close(UCollator *coll);
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Compare two strings.
|
|
|
|
|
* The strings will be compared using the normalization mode and options
|
|
|
|
|
* specified in \Ref{ucol_open} or \Ref{ucol_openRules}
|
2001-03-13 07:29:58 +00:00
|
|
|
|
* @param coll The UCollator containing the comparison rules.
|
1999-12-28 23:57:50 +00:00
|
|
|
|
* @param source The source string.
|
|
|
|
|
* @param sourceLength The length of source, or -1 if null-terminated.
|
|
|
|
|
* @param target The target string.
|
|
|
|
|
* @param targetLength The length of target, or -1 if null-terminated.
|
|
|
|
|
* @return The result of comparing the strings; one of UCOL_EQUAL,
|
|
|
|
|
* UCOL_GREATER, UCOL_LESS
|
|
|
|
|
* @see ucol_greater
|
|
|
|
|
* @see ucol_greaterOrEqual
|
|
|
|
|
* @see ucol_equal
|
2000-03-22 19:19:33 +00:00
|
|
|
|
* @stable
|
1999-12-28 23:57:50 +00:00
|
|
|
|
*/
|
|
|
|
|
|
2001-01-04 00:45:41 +00:00
|
|
|
|
U_CAPI UCollationResult
|
2001-01-15 07:28:54 +00:00
|
|
|
|
ucol_strcoll( const UCollator *coll,
|
2001-01-04 00:45:41 +00:00
|
|
|
|
const UChar *source,
|
|
|
|
|
int32_t sourceLength,
|
|
|
|
|
const UChar *target,
|
|
|
|
|
int32_t targetLength);
|
|
|
|
|
|
1999-12-28 23:57:50 +00:00
|
|
|
|
/**
|
|
|
|
|
* Determine if one string is greater than another.
|
|
|
|
|
* This function is equivalent to \Ref{ucol_strcoll} == UCOL_GREATER
|
2001-03-13 07:29:58 +00:00
|
|
|
|
* @param coll The UCollator containing the comparison rules.
|
1999-12-28 23:57:50 +00:00
|
|
|
|
* @param source The source string.
|
|
|
|
|
* @param sourceLength The length of source, or -1 if null-terminated.
|
|
|
|
|
* @param target The target string.
|
|
|
|
|
* @param targetLength The length of target, or -1 if null-terminated.
|
|
|
|
|
* @return TRUE if source is greater than target, FALSE otherwise.
|
|
|
|
|
* @see ucol_strcoll
|
|
|
|
|
* @see ucol_greaterOrEqual
|
|
|
|
|
* @see ucol_equal
|
2000-03-22 19:19:33 +00:00
|
|
|
|
* @stable
|
1999-12-28 23:57:50 +00:00
|
|
|
|
*/
|
2000-05-18 22:08:39 +00:00
|
|
|
|
U_CAPI UBool
|
1999-12-28 23:57:50 +00:00
|
|
|
|
ucol_greater( const UCollator *coll,
|
|
|
|
|
const UChar *source,
|
|
|
|
|
int32_t sourceLength,
|
|
|
|
|
const UChar *target,
|
|
|
|
|
int32_t targetLength);
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Determine if one string is greater than or equal to another.
|
|
|
|
|
* This function is equivalent to \Ref{ucol_strcoll} != UCOL_LESS
|
2001-03-13 07:29:58 +00:00
|
|
|
|
* @param coll The UCollator containing the comparison rules.
|
1999-12-28 23:57:50 +00:00
|
|
|
|
* @param source The source string.
|
|
|
|
|
* @param sourceLength The length of source, or -1 if null-terminated.
|
|
|
|
|
* @param target The target string.
|
|
|
|
|
* @param targetLength The length of target, or -1 if null-terminated.
|
|
|
|
|
* @return TRUE if source is greater than or equal to target, FALSE otherwise.
|
|
|
|
|
* @see ucol_strcoll
|
|
|
|
|
* @see ucol_greater
|
|
|
|
|
* @see ucol_equal
|
2000-03-22 19:19:33 +00:00
|
|
|
|
* @stable
|
1999-12-28 23:57:50 +00:00
|
|
|
|
*/
|
2000-05-18 22:08:39 +00:00
|
|
|
|
U_CAPI UBool
|
1999-12-28 23:57:50 +00:00
|
|
|
|
ucol_greaterOrEqual( const UCollator *coll,
|
|
|
|
|
const UChar *source,
|
|
|
|
|
int32_t sourceLength,
|
|
|
|
|
const UChar *target,
|
|
|
|
|
int32_t targetLength);
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Compare two strings for equality.
|
|
|
|
|
* This function is equivalent to \Ref{ucol_strcoll} == UCOL_EQUAL
|
2001-03-13 07:29:58 +00:00
|
|
|
|
* @param coll The UCollator containing the comparison rules.
|
1999-12-28 23:57:50 +00:00
|
|
|
|
* @param source The source string.
|
|
|
|
|
* @param sourceLength The length of source, or -1 if null-terminated.
|
|
|
|
|
* @param target The target string.
|
|
|
|
|
* @param targetLength The length of target, or -1 if null-terminated.
|
|
|
|
|
* @return TRUE if source is equal to target, FALSE otherwise
|
|
|
|
|
* @see ucol_strcoll
|
|
|
|
|
* @see ucol_greater
|
|
|
|
|
* @see ucol_greaterOrEqual
|
2000-03-22 19:19:33 +00:00
|
|
|
|
* @stable
|
1999-12-28 23:57:50 +00:00
|
|
|
|
*/
|
2000-05-18 22:08:39 +00:00
|
|
|
|
U_CAPI UBool
|
1999-12-28 23:57:50 +00:00
|
|
|
|
ucol_equal( const UCollator *coll,
|
|
|
|
|
const UChar *source,
|
|
|
|
|
int32_t sourceLength,
|
|
|
|
|
const UChar *target,
|
|
|
|
|
int32_t targetLength);
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Get the collation strength used in a UCollator.
|
|
|
|
|
* The strength influences how strings are compared.
|
2001-03-13 07:29:58 +00:00
|
|
|
|
* @param coll The UCollator to query.
|
1999-12-28 23:57:50 +00:00
|
|
|
|
* @return The collation strength; one of UCOL_PRIMARY, UCOL_SECONDARY,
|
|
|
|
|
* UCOL_TERTIARY, UCOL_IDENTICAL, UCOL_DEFAULT_STRENGTH
|
|
|
|
|
* @see ucol_setStrength
|
2000-03-22 19:19:33 +00:00
|
|
|
|
* @stable
|
1999-12-28 23:57:50 +00:00
|
|
|
|
*/
|
|
|
|
|
U_CAPI UCollationStrength
|
|
|
|
|
ucol_getStrength(const UCollator *coll);
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Set the collation strength used in a UCollator.
|
|
|
|
|
* The strength influences how strings are compared.
|
|
|
|
|
* <p>Example of use:
|
|
|
|
|
* <pre>
|
|
|
|
|
* . UCollationResult result;
|
|
|
|
|
* . UChar *source, *target;
|
|
|
|
|
* . UErrorCode status = U_ZERO_ERROR;
|
2001-03-13 07:29:58 +00:00
|
|
|
|
* . UCollator *myCollation = ucol_open("en_US", status);
|
1999-12-28 23:57:50 +00:00
|
|
|
|
* . if (U_FAILURE(&status)) return;
|
|
|
|
|
* . ucol_setStrength(myCollation, UCOL_PRIMARY);
|
|
|
|
|
* . u_uastrcpy(source, "abc");
|
|
|
|
|
* . u_uastrcpy(target, "ABC");
|
|
|
|
|
* . // result will be "abc" == "ABC"
|
|
|
|
|
* . // tertiary differences will be ignored
|
|
|
|
|
* . result = ucol_strcoll(myCollation, source, u_strlen(source), target, u_strlen(target));
|
|
|
|
|
* </pre>
|
2001-03-13 07:29:58 +00:00
|
|
|
|
* @param coll The UCollator to set.
|
1999-12-28 23:57:50 +00:00
|
|
|
|
* @param strength The desired collation strength; one of UCOL_PRIMARY,
|
|
|
|
|
* UCOL_SECONDARY, UCOL_TERTIARY, UCOL_IDENTICAL, UCOL_DEFAULT_STRENGTH
|
|
|
|
|
* @see ucol_getStrength
|
2000-03-22 19:19:33 +00:00
|
|
|
|
* @stable
|
1999-12-28 23:57:50 +00:00
|
|
|
|
*/
|
|
|
|
|
U_CAPI void
|
|
|
|
|
ucol_setStrength( UCollator *coll,
|
|
|
|
|
UCollationStrength strength);
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Get the normalization mode used in a UCollator.
|
|
|
|
|
* The normalization mode influences how strings are compared.
|
2001-03-13 07:29:58 +00:00
|
|
|
|
* @param coll The UCollator to query.
|
1999-12-28 23:57:50 +00:00
|
|
|
|
* @return The normalization mode; one of UCOL_NO_NORMALIZATION,
|
|
|
|
|
* UCOL_CAN_DECOMP, UCOL_COMPAT_DECOMP, UCOL_CAN_DECOMP_COMPAT_COMP,
|
|
|
|
|
* UCOL_COMPAT_DECOMP_CAN_COMP, UCOL_DEFAULT_NORMALIZATION
|
|
|
|
|
* @see ucol_setNormalization
|
2000-03-22 19:19:33 +00:00
|
|
|
|
* @stable
|
1999-12-28 23:57:50 +00:00
|
|
|
|
*/
|
|
|
|
|
U_CAPI UNormalizationMode
|
|
|
|
|
ucol_getNormalization(const UCollator* coll);
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Set the normalization mode used in a UCollator.
|
|
|
|
|
* The normalization mode influences how strings are compared.
|
2001-03-13 07:29:58 +00:00
|
|
|
|
* @param coll The UCollator to set.
|
1999-12-28 23:57:50 +00:00
|
|
|
|
* @param mode The desired normalization mode; one of UCOL_NO_NORMALIZATION,
|
|
|
|
|
* UCOL_CAN_DECOMP, UCOL_COMPAT_DECOMP, UCOL_CAN_DECOMP_COMPAT_COMP,
|
|
|
|
|
* UCOL_COMPAT_DECOMP_CAN_COMP, UCOL_DEFAULT_NORMALIZATION
|
|
|
|
|
* @see ucol_getNormalization
|
2000-03-22 19:19:33 +00:00
|
|
|
|
* @stable
|
1999-12-28 23:57:50 +00:00
|
|
|
|
*/
|
|
|
|
|
U_CAPI void
|
|
|
|
|
ucol_setNormalization( UCollator *coll,
|
|
|
|
|
UNormalizationMode mode);
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Get the display name for a UCollator.
|
|
|
|
|
* The display name is suitable for presentation to a user.
|
|
|
|
|
* @param objLoc The locale of the collator in question.
|
|
|
|
|
* @param dispLoc The locale for display.
|
|
|
|
|
* @param result A pointer to a buffer to receive the attribute.
|
|
|
|
|
* @param resultLength The maximum size of result.
|
|
|
|
|
* @param status A pointer to an UErrorCode to receive any errors
|
|
|
|
|
* @return The total buffer size needed; if greater than resultLength,
|
|
|
|
|
* the output was truncated.
|
2000-03-22 19:19:33 +00:00
|
|
|
|
* @stable
|
1999-12-28 23:57:50 +00:00
|
|
|
|
*/
|
|
|
|
|
U_CAPI int32_t
|
|
|
|
|
ucol_getDisplayName( const char *objLoc,
|
|
|
|
|
const char *dispLoc,
|
|
|
|
|
UChar *result,
|
|
|
|
|
int32_t resultLength,
|
|
|
|
|
UErrorCode *status);
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Get a locale for which collation rules are available.
|
|
|
|
|
* A UCollator in a locale returned by this function will perform the correct
|
|
|
|
|
* collation for the locale.
|
|
|
|
|
* @param index The index of the desired locale.
|
|
|
|
|
* @return A locale for which collation rules are available, or 0 if none.
|
|
|
|
|
* @see ucol_countAvailable
|
2000-03-22 19:19:33 +00:00
|
|
|
|
* @stable
|
1999-12-28 23:57:50 +00:00
|
|
|
|
*/
|
|
|
|
|
U_CAPI const char*
|
|
|
|
|
ucol_getAvailable(int32_t index);
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Determine how many locales have collation rules available.
|
|
|
|
|
* This function is most useful as determining the loop ending condition for
|
|
|
|
|
* calls to \Ref{ucol_getAvailable}.
|
|
|
|
|
* @return The number of locales for which collation rules are available.
|
|
|
|
|
* @see ucol_getAvailable
|
2000-03-22 19:19:33 +00:00
|
|
|
|
* @stable
|
1999-12-28 23:57:50 +00:00
|
|
|
|
*/
|
|
|
|
|
U_CAPI int32_t
|
|
|
|
|
ucol_countAvailable(void);
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Get the collation rules from a UCollator.
|
|
|
|
|
* The rules will follow the rule syntax.
|
2001-03-13 07:29:58 +00:00
|
|
|
|
* @param coll The UCollator to query.
|
1999-12-28 23:57:50 +00:00
|
|
|
|
* @param length
|
|
|
|
|
* @return The collation rules.
|
2000-03-22 19:19:33 +00:00
|
|
|
|
* @stable
|
1999-12-28 23:57:50 +00:00
|
|
|
|
*/
|
|
|
|
|
U_CAPI const UChar*
|
|
|
|
|
ucol_getRules( const UCollator *coll,
|
|
|
|
|
int32_t *length);
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Get a sort key for a string from a UCollator.
|
2000-12-15 19:18:27 +00:00
|
|
|
|
* Sort keys may be compared using <TT>strcmp</TT>.
|
2001-03-13 07:29:58 +00:00
|
|
|
|
* @param coll The UCollator containing the collation rules.
|
1999-12-28 23:57:50 +00:00
|
|
|
|
* @param source The string to transform.
|
|
|
|
|
* @param sourecLength The length of source, or -1 if null-terminated.
|
|
|
|
|
* @param result A pointer to a buffer to receive the attribute.
|
|
|
|
|
* @param resultLength The maximum size of result.
|
|
|
|
|
* @return The size needed to fully store the sort key..
|
|
|
|
|
* @see ucol_keyHashCode
|
2000-03-22 19:19:33 +00:00
|
|
|
|
* @stable
|
1999-12-28 23:57:50 +00:00
|
|
|
|
*/
|
|
|
|
|
U_CAPI int32_t
|
|
|
|
|
ucol_getSortKey(const UCollator *coll,
|
2000-11-07 00:00:17 +00:00
|
|
|
|
const UChar *source,
|
|
|
|
|
int32_t sourceLength,
|
|
|
|
|
uint8_t *result,
|
|
|
|
|
int32_t resultLength);
|
|
|
|
|
|
1999-12-28 23:57:50 +00:00
|
|
|
|
|
|
|
|
|
|
2000-05-15 19:48:26 +00:00
|
|
|
|
/**
|
|
|
|
|
* Gets the version information for a Collator.
|
|
|
|
|
* @param info the version # information, the result will be filled in
|
|
|
|
|
* @stable
|
|
|
|
|
*/
|
|
|
|
|
U_CAPI void U_EXPORT2
|
|
|
|
|
ucol_getVersion(const UCollator* coll, UVersionInfo info);
|
|
|
|
|
|
2000-05-18 21:25:51 +00:00
|
|
|
|
|
2000-11-17 23:32:32 +00:00
|
|
|
|
/* Following are the new APIs for 1.7. They are all draft and most are not even implemented */
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Universal attribute setter
|
|
|
|
|
* @param coll collator which attributes are to be changed
|
|
|
|
|
* @param attr attribute type
|
|
|
|
|
* @param value attribute value
|
|
|
|
|
* @param status to indicate whether the operation went on smoothly or there were errors
|
|
|
|
|
* @draft API 1.7 freeze
|
|
|
|
|
*/
|
2000-11-20 06:40:54 +00:00
|
|
|
|
U_CAPI void ucol_setAttribute(UCollator *coll, UColAttribute attr, UColAttributeValue value, UErrorCode *status);
|
2000-11-17 23:32:32 +00:00
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Universal attribute getter
|
|
|
|
|
* @param coll collator which attributes are to be changed
|
|
|
|
|
* @param attr attribute type
|
|
|
|
|
* @return attribute value
|
|
|
|
|
* @param status to indicate whether the operation went on smoothly or there were errors
|
|
|
|
|
* @draft API 1.7 freeze
|
|
|
|
|
*/
|
2000-11-20 06:40:54 +00:00
|
|
|
|
U_CAPI UColAttributeValue ucol_getAttribute(const UCollator *coll, UColAttribute attr, UErrorCode *status);
|
2000-11-17 23:32:32 +00:00
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Thread safe cloning operation
|
|
|
|
|
* @param coll collator to be cloned
|
2001-02-16 22:42:45 +00:00
|
|
|
|
* @param stackBuffer user allocated space for the new clone. If NULL new memory will be allocated.
|
|
|
|
|
If buffer is not lareg enough, new memory will be allocated.
|
|
|
|
|
Clients can use the U_COL_SAFECLONE_BUFFERSIZE.
|
|
|
|
|
This will probably be enough to avoid memory allocations.
|
|
|
|
|
* @param pBufferSize pointer to size of allocated space.
|
|
|
|
|
If *pBufferSize == 0, a sufficient size for use in cloning will
|
|
|
|
|
be returned ('pre-flighting')
|
|
|
|
|
If *pBufferSize is not enough for a stack-based safe clone,
|
|
|
|
|
new memory will be allocated.
|
2000-11-17 23:32:32 +00:00
|
|
|
|
* @param status to indicate whether the operation went on smoothly or there were errors
|
2001-02-16 22:42:45 +00:00
|
|
|
|
An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any allocations were necessary.
|
2000-11-17 23:32:32 +00:00
|
|
|
|
* @return pointer to the new clone
|
2001-02-16 22:42:45 +00:00
|
|
|
|
* @draft API 1.8 freeze
|
2000-11-17 23:32:32 +00:00
|
|
|
|
*/
|
2001-02-16 22:42:45 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
U_CAPI UCollator * ucol_safeClone(
|
|
|
|
|
const UCollator *coll,
|
|
|
|
|
void *stackBuffer,
|
|
|
|
|
int32_t *pBufferSize,
|
|
|
|
|
UErrorCode *status);
|
|
|
|
|
|
|
|
|
|
#define U_COL_SAFECLONE_BUFFERSIZE 256
|
2000-11-17 23:32:32 +00:00
|
|
|
|
|
|
|
|
|
/* declaration for forward iterating function */
|
2001-03-15 22:40:25 +00:00
|
|
|
|
U_CDECL_BEGIN
|
2000-11-17 23:32:32 +00:00
|
|
|
|
typedef UChar UCharForwardIterator(void *context);
|
2001-03-15 22:40:25 +00:00
|
|
|
|
U_CDECL_END
|
2000-11-17 23:32:32 +00:00
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* String compare that uses user supplied character iteration.
|
|
|
|
|
* The idea is to prevent users from having to convert the whole string into UChar's before comparing
|
|
|
|
|
* since sometimes strings differ on first couple of characters.
|
|
|
|
|
* @param coll collator to be used for comparing
|
|
|
|
|
* @param source pointer to function for iterating over the first string
|
|
|
|
|
* @param sourceContext data to be passed to the first iterating function.
|
|
|
|
|
* @param target pointer to function for iterating over the second string
|
|
|
|
|
* @param targetContext data to be passed to the second iterating function.
|
|
|
|
|
* @return The result of comparing the strings; one of UCOL_EQUAL,
|
|
|
|
|
* UCOL_GREATER, UCOL_LESS
|
|
|
|
|
*/
|
|
|
|
|
U_CAPI UCollationResult ucol_strcollinc(const UCollator *coll,
|
|
|
|
|
UCharForwardIterator *source, void *sourceContext,
|
|
|
|
|
UCharForwardIterator *target, void *targetContext);
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Returns current rules. Delta defines whether full rules are returned or just the tailoring.
|
|
|
|
|
* Returns number of UChars needed to store rules. If buffer is NULL or bufferLen is not enough
|
|
|
|
|
* to store rules, will store up to available space.
|
|
|
|
|
* @param coll collator to get the rules from
|
|
|
|
|
* @param delta one of UCOL_TAILORING_ONLY, UCOL_FULL_RULES.
|
|
|
|
|
* @param buffer buffer to store the result in. If NULL, you'll get no rules.
|
|
|
|
|
* @param bufferLen lenght of buffer to store rules in. If less then needed you'll get only the part that fits in.
|
|
|
|
|
*/
|
|
|
|
|
U_CAPI int32_t ucol_getRulesEx(const UCollator *coll, UColRuleOption delta, UChar *buffer, int32_t bufferLen);
|
|
|
|
|
|
2001-01-15 19:02:30 +00:00
|
|
|
|
/* This is the C API wrapper for CollationIterator that got booted out from here, including just for */
|
|
|
|
|
/* include backward compatibility */
|
|
|
|
|
#include "unicode/ucoleitr.h"
|
2000-11-17 23:32:32 +00:00
|
|
|
|
|
1999-12-28 23:57:50 +00:00
|
|
|
|
#endif
|