0<=code<=0x10ffff
.
* @param nameChoice Selector for which name to get.
* @param buffer Destination address for copying the name.
* The name will always be zero-terminated.
* If there is no name, then the buffer will be set to the empty string.
* @param bufferLength ==sizeof(buffer)
* @param pErrorCode Pointer to a UErrorCode variable;
* check for U_SUCCESS()
after u_charName()
* returns.
* @return The length of the name, or 0 if there is no name for this character.
* If the bufferLength is less than or equal to the length, then the buffer
* contains the truncated name and the returned length indicates the full
* length of the name.
* The length does not include the zero-termination.
*
* @see UCharNameChoice
* @see u_charFromName
* @see u_enumCharNames
* @stable ICU 2.0
*/
U_CAPI int32_t U_EXPORT2
u_charName(UChar32 code, UCharNameChoice nameChoice,
char *buffer, int32_t bufferLength,
UErrorCode *pErrorCode);
/**
* Get the ISO 10646 comment for a character.
* The ISO 10646 comment is an informative field in the Unicode Character
* Database (UnicodeData.txt field 11) and is from the ISO 10646 names list.
*
* @param c The character (code point) for which to get the ISO comment.
* It must be 0<=c<=0x10ffff
.
* @param dest Destination address for copying the comment.
* The comment will be zero-terminated if possible.
* If there is no comment, then the buffer will be set to the empty string.
* @param destCapacity ==sizeof(dest)
* @param pErrorCode Pointer to a UErrorCode variable;
* check for U_SUCCESS()
after u_getISOComment()
* returns.
* @return The length of the comment, or 0 if there is no comment for this character.
* If the destCapacity is less than or equal to the length, then the buffer
* contains the truncated name and the returned length indicates the full
* length of the name.
* The length does not include the zero-termination.
*
* @draft ICU 2.2
*/
U_CAPI int32_t U_EXPORT2
u_getISOComment(UChar32 c,
char *dest, int32_t destCapacity,
UErrorCode *pErrorCode);
/**
* Find a Unicode character by its name and return its code point value.
* The name is matched exactly and completely.
* If the name does not correspond to a code point, pErrorCode
* is set to U_INVALID_CHAR_FOUND
.
* A Unicode 1.0 name is matched only if it differs from the modern name.
* Unicode names are all uppercase. Extended names are lowercase followed
* by an uppercase hexadecimal number, and within angle brackets.
*
* @param nameChoice Selector for which name to match.
* @param name The name to match.
* @param pErrorCode Pointer to a UErrorCode variable
* @return The Unicode value of the code point with the given name,
* or an undefined value if there is no such code point.
*
* @see UCharNameChoice
* @see u_charName
* @see u_enumCharNames
* @stable ICU 1.7
*/
U_CAPI UChar32 U_EXPORT2
u_charFromName(UCharNameChoice nameChoice,
const char *name,
UErrorCode *pErrorCode);
/**
* Type of a callback function for u_enumCharNames() that gets called
* for each Unicode character with the code point value and
* the character name.
* If such a function returns FALSE, then the enumeration is stopped.
*
* @param context The context pointer that was passed to u_enumCharNames().
* @param code The Unicode code point for the character with this name.
* @param nameChoice Selector for which kind of names is enumerated.
* @param name The character's name, zero-terminated.
* @param length The length of the name.
* @return TRUE if the enumeration should continue, FALSE to stop it.
*
* @see UCharNameChoice
* @see u_enumCharNames
* @stable ICU 1.7
*/
typedef UBool UEnumCharNamesFn(void *context,
UChar32 code,
UCharNameChoice nameChoice,
const char *name,
int32_t length);
/**
* Enumerate all assigned Unicode characters between the start and limit
* code points (start inclusive, limit exclusive) and call a function
* for each, passing the code point value and the character name.
* For Unicode 1.0 names, only those are enumerated that differ from the
* modern names.
*
* @param start The first code point in the enumeration range.
* @param limit One more than the last code point in the enumeration range
* (the first one after the range).
* @param fn The function that is to be called for each character name.
* @param context An arbitrary pointer that is passed to the function.
* @param nameChoice Selector for which kind of names to enumerate.
* @param pErrorCode Pointer to a UErrorCode variable
*
* @see UCharNameChoice
* @see UEnumCharNamesFn
* @see u_charName
* @see u_charFromName
* @stable ICU 1.7
*/
U_CAPI void U_EXPORT2
u_enumCharNames(UChar32 start, UChar32 limit,
UEnumCharNamesFn *fn,
void *context,
UCharNameChoice nameChoice,
UErrorCode *pErrorCode);
/**
* Return the Unicode name for a given property, as given in the
* Unicode database file PropertyAliases.txt.
*
* In addition, this function maps the property
* UCHAR_GENERAL_CATEGORY_MASK to the synthetic names "gcm" /
* "General_Category_Mask". These names are not in
* PropertyAliases.txt.
*
* @param property UProperty selector other than UCHAR_INVALID_CODE.
* If out of range, NULL is returned.
*
* @param nameChoice selector for which name to get. If out of range,
* NULL is returned. All properties have a long name. Most
* have a short name, but some do not. Unicode allows for
* additional names; if present these will be returned by
* U_LONG_PROPERTY_NAME + i, where i=1, 2,...
*
* @return a pointer to the name, or NULL if either the
* property or the nameChoice is out of range. If a given
* nameChoice returns NULL, then all larger values of
* nameChoice will return NULL, with one exception: if NULL is
* returned for U_SHORT_PROPERTY_NAME, then
* U_LONG_PROPERTY_NAME (and higher) may still return a
* non-NULL value. The returned pointer is valid until
* u_cleanup() is called.
*
* @see UProperty
* @see UPropertyNameChoice
* @draft ICU 2.4
*/
U_CAPI const char* U_EXPORT2
u_getPropertyName(UProperty property,
UPropertyNameChoice nameChoice);
/**
* Return the UProperty enum for a given property name, as specified
* in the Unicode database file PropertyAliases.txt. Short, long, and
* any other variants are recognized.
*
* In addition, this function maps the synthetic names "gcm" /
* "General_Category_Mask" to the property
* UCHAR_GENERAL_CATEGORY_MASK. These names are not in
* PropertyAliases.txt.
*
* @param alias the property name to be matched. The name is compared
* using "loose matching" as described in PropertyAliases.txt.
*
* @return a UProperty enum, or UCHAR_INVALID_CODE if the given name
* does not match any property.
*
* @see UProperty
* @draft ICU 2.4
*/
U_CAPI UProperty U_EXPORT2
u_getPropertyEnum(const char* alias);
/**
* Return the Unicode name for a given property value, as given in the
* Unicode database file PropertyValueAliases.txt.
*
* Note: Some of the names in PropertyValueAliases.txt can only be
* retrieved using UCHAR_GENERAL_CATEGORY_MASK, not
* UCHAR_GENERAL_CATEGORY. These include: "C" / "Other", "L" /
* "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P"
* / "Punctuation", "S" / "Symbol", and "Z" / "Separator".
*
* @param property UProperty selector constant.
* Must be UCHAR_BINARY_START<=whichc
is not a valid digit in the specified
* radix, -1
is returned. A character is a valid digit
* if at least one of the following is true:
* 'A'
through 'Z'
.
* In this case the value is c-'A'+10
.'a'
through 'z'
.
* In this case the value is ch-'a'+10
.radix
is not a
* valid radix, or the value of digit
is not a valid
* digit in the specified radix, the null character
* (U+0000
) is returned.
*
* The radix
argument is valid if it is greater than or
* equal to 2 and less than or equal to 36.
* The digit
argument is valid if
* 0 <= digit < radix
.
*
* If the digit is less than 10, then
* '0' + digit
is returned. Otherwise, the value
* 'a' + digit - 10
is returned.
*
* Same as java.lang.Character.forDigit().
*
* @param digit the number to convert to a character.
* @param radix the radix.
* @return the char
representation of the specified digit
* in the specified radix.
*
* @see u_digit
* @see u_charDigitValue
* @see u_isdigit
* @stable ICU 2.0
*/
U_CAPI UChar32 U_EXPORT2
u_forDigit(int32_t digit, int8_t radix);
/**
* Get the "age" of the code point.
* The "age" is the Unicode version when the code point was first
* designated (as a non-character or for Private Use)
* or assigned a character.
* This can be useful to avoid emitting code points to receiving
* processes that do not accept newer characters.
* The data is from the UCD file DerivedAge.txt.
*
* @param c The code point.
* @param versionArray The Unicode version number array, to be filled in.
*
* @stable ICU 2.1
*/
U_CAPI void U_EXPORT2
u_charAge(UChar32 c, UVersionInfo versionArray);
/**
* Gets the Unicode version information.
* The version array is filled in with the version information
* for the Unicode standard that is currently used by ICU.
* For example, Unicode version 3.1.1 is represented as an array with
* the values { 3, 1, 1, 0 }.
*
* @param versionArray an output array that will be filled in with
* the Unicode version number
* @stable ICU 2.0
*/
U_CAPI void U_EXPORT2
u_getUnicodeVersion(UVersionInfo versionArray);
/**
* Get the FC_NFKC_Closure property string for a character.
* See Unicode Standard Annex #15 for details, search for "FC_NFKC_Closure"
* or for "FNC": http://www.unicode.org/reports/tr15/
*
* @param c The character (code point) for which to get the FC_NFKC_Closure string.
* It must be 0<=c<=0x10ffff
.
* @param dest Destination address for copying the string.
* The string will be zero-terminated if possible.
* If there is no FC_NFKC_Closure string,
* then the buffer will be set to the empty string.
* @param destCapacity ==sizeof(dest)
* @param pErrorCode Pointer to a UErrorCode variable.
* @return The length of the string, or 0 if there is no FC_NFKC_Closure string for this character.
* If the destCapacity is less than or equal to the length, then the buffer
* contains the truncated name and the returned length indicates the full
* length of the name.
* The length does not include the zero-termination.
*
* @draft ICU 2.2
*/
U_CAPI int32_t U_EXPORT2
u_getFC_NFKC_Closure(UChar32 c, UChar *dest, int32_t destCapacity, UErrorCode *pErrorCode);
U_CDECL_END
#endif /*_UCHAR*/
/*eof*/