1999-12-28 23:39:02 +00:00
/*
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2003-03-04 00:42:54 +00:00
* Copyright ( C ) 1997 - 2003 , International Business Machines
1999-12-28 23:39:02 +00:00
* Corporation and others . All Rights Reserved .
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
*
* File UCHAR . H
*
* Modification History :
*
* Date Name Description
* 04 / 02 / 97 aliu Creation .
* 03 / 29 / 99 helena Updated for C APIs .
* 4 / 15 / 99 Madhu Updated for C Implementation and Javadoc
* 5 / 20 / 99 Madhu Added the function u_getVersion ( )
* 8 / 19 / 1999 srl Upgraded scripts to Unicode 3.0
* 8 / 27 / 1999 schererm UCharDirection constants : U_ . . .
* 11 / 11 / 1999 weiv added u_isalnum ( ) , cleaned comments
2000-01-11 23:20:26 +00:00
* 01 / 11 / 2000 helena Renamed u_getVersion to u_getUnicodeVersion ( ) .
2001-03-21 20:44:20 +00:00
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
1999-12-28 23:39:02 +00:00
*/
# ifndef UCHAR_H
# define UCHAR_H
# include "unicode/utypes.h"
2002-01-12 00:11:09 +00:00
U_CDECL_BEGIN
2001-03-21 20:44:20 +00:00
/*==========================================================================*/
/* Unicode version number */
/*==========================================================================*/
2002-03-13 23:31:12 +00:00
/**
* Unicode version number , default for the current ICU version .
* The actual Unicode Character Database ( UCD ) data is stored in uprops . dat
* and may be generated from UCD files from a different Unicode version .
* Call u_getUnicodeVersion to get the actual Unicode version of the data .
*
* @ see u_getUnicodeVersion
2002-12-04 23:39:56 +00:00
* @ stable ICU 2.0
2002-03-13 23:31:12 +00:00
*/
2003-03-04 00:42:54 +00:00
# define U_UNICODE_VERSION "4"
1999-12-28 23:39:02 +00:00
/**
2000-12-08 18:43:57 +00:00
* \ file
2003-02-11 01:59:32 +00:00
* \ brief C API : Unicode Properties
2000-12-08 18:43:57 +00:00
*
2003-02-11 01:59:32 +00:00
* This C API provides low - level access to the Unicode Character Database .
* In addition to raw property values , some convenience functions calculate
* derived properties , for example for Java - style programming .
*
* Unicode assigns each code point ( not just assigned character ) values for
* many properties .
* Most of them are simple boolean flags , or constants from a small enumerated list .
* For some properties , values are strings or other relatively more complex types .
*
* For more information see
* " About the Unicode Character Database " ( http : //www.unicode.org/ucd/)
* and the ICU User Guide chapter on Properties ( http : //oss.software.ibm.com/icu/userguide/properties.html).
2003-02-09 21:02:26 +00:00
*
* Many functions are designed to match java . lang . Character functions .
* See the individual function documentation ,
* and see the JDK 1.4 .1 java . lang . Character documentation
* at http : //java.sun.com/j2se/1.4.1/docs/api/java/lang/Character.html
2003-04-24 23:09:26 +00:00
*
* There are also functions that provide easy migration from C / POSIX functions
* like isblank ( ) . Their use is generally discouraged because the C / POSIX
* standards do not define their semantics beyond the ASCII range , which means
* that different implementations exhibit very different behavior .
* Instead , Unicode properties should be used directly .
*
* There are also only a few , broad C / POSIX character classes , and they tend
* to be used for conflicting purposes . For example , the " isalpha() " class
* is sometimes used to determine word boundaries , while a more sophisticated
* approach would at least distinguish initial letters from continuation
* characters ( the latter including combining marks ) .
* ( In ICU , BreakIterator is the most sophisticated API for word boundaries . )
* Another example : There is no " istitle() " class for titlecase characters .
*
* A summary of the behavior of some C / POSIX character classification implementations
* for Unicode is available at http : //oss.software.ibm.com/cvs/icu/~checkout~/icuhtml/design/posix_classes.html
*
* < strong > Important < / strong > :
* The behavior of the ICU C / POSIX - style character classification
* functions is subject to change according to discussion of the above summary .
1999-12-28 23:39:02 +00:00
*/
/**
* Constants .
*/
2002-12-05 00:30:16 +00:00
/** The lowest Unicode code point value. Code points are non-negative. @stable ICU 2.0 */
2000-03-24 17:45:45 +00:00
# define UCHAR_MIN_VALUE 0
1999-12-28 23:39:02 +00:00
/**
2000-03-24 17:45:45 +00:00
* The highest Unicode code point value ( scalar value ) according to
* The Unicode Standard . This is a 21 - bit value ( 20.1 bits , rounded up ) .
* For a single character , UChar32 is a simple type that can hold any code point value .
2003-02-11 01:59:32 +00:00
*
* @ see UChar32
2002-12-05 00:30:16 +00:00
* @ stable ICU 2.0
1999-12-28 23:39:02 +00:00
*/
2000-03-24 17:45:45 +00:00
# define UCHAR_MAX_VALUE 0x10ffff
1999-12-28 23:39:02 +00:00
2002-03-13 23:31:12 +00:00
/**
* Get a single - bit bit set ( a flag ) from a bit number 0. .31 .
2003-02-11 01:59:32 +00:00
* @ stable ICU 2.1
2002-03-13 23:31:12 +00:00
*/
# define U_MASK(x) ((uint32_t)1<<(x))
2003-02-11 01:59:32 +00:00
/*
2002-10-30 18:24:53 +00:00
* ! ! Note : Several comments in this file are machine - read by the
* genpname tool . These comments describe the correspondence between
* icu enum constants and UCD entities . Do not delete them . Update
* these comments as needed .
*
2002-11-11 18:39:28 +00:00
* Any comment of the form " / *[name]* / " ( spaces added ) is such
2002-10-30 18:24:53 +00:00
* a comment .
*
* The U_JG_ * and U_GC_ * _MASK constants are matched by their symbolic
* name , which must match PropertyValueAliases . txt .
*/
2002-03-06 23:31:11 +00:00
/**
* Selection constants for Unicode properties .
* These constants are used in functions like u_hasBinaryProperty to select
* one of the Unicode properties .
*
* The properties APIs are intended to reflect Unicode properties as defined
* in the Unicode Character Database ( UCD ) and Unicode Technical Reports ( UTR ) .
2003-02-11 01:59:32 +00:00
* For details about the properties see http : //www.unicode.org/ucd/ .
2002-03-06 23:31:11 +00:00
* For names of Unicode properties see the UCD file PropertyAliases . txt .
*
2003-02-11 01:59:32 +00:00
* Important : If ICU is built with UCD files from Unicode versions below , e . g . , 3.2 ,
2002-03-06 23:31:11 +00:00
* then properties marked with " new in Unicode 3.2 " are not or not fully available .
* Check u_getUnicodeVersion to be sure .
*
2002-07-04 00:38:51 +00:00
* @ see u_hasBinaryProperty
* @ see u_getIntPropertyValue
2002-03-06 23:31:11 +00:00
* @ see u_getUnicodeVersion
2003-02-11 01:59:32 +00:00
* @ stable ICU 2.1
2002-03-06 23:31:11 +00:00
*/
2002-08-21 19:12:24 +00:00
typedef enum UProperty {
2003-02-11 01:59:32 +00:00
/* See note !!. Comments of the form "Binary property Dash",
2002-10-30 18:24:53 +00:00
" Enumerated property Script " , " Double property Numeric_Value " ,
and " String property Age " are read by genpname . */
2003-02-11 01:59:32 +00:00
/* Note: Place UCHAR_ALPHABETIC before UCHAR_BINARY_START so that
2002-11-22 19:36:55 +00:00
debuggers display UCHAR_ALPHABETIC as the symbolic name for 0 ,
rather than UCHAR_BINARY_START . Likewise for other * _START
identifiers . */
2003-02-11 23:27:40 +00:00
2002-03-06 23:31:11 +00:00
/** Binary property Alphabetic. Same as u_isUAlphabetic, different from u_isalpha.
2003-02-11 01:59:32 +00:00
Lu + Ll + Lt + Lm + Lo + Nl + Other_Alphabetic @ stable ICU 2.1 */
2002-11-22 19:36:55 +00:00
UCHAR_ALPHABETIC = 0 ,
2003-02-11 01:59:32 +00:00
/** First constant for binary Unicode properties. @stable ICU 2.1 */
2002-11-22 19:36:55 +00:00
UCHAR_BINARY_START = UCHAR_ALPHABETIC ,
2003-02-11 01:59:32 +00:00
/** Binary property ASCII_Hex_Digit. 0-9 A-F a-f @stable ICU 2.1 */
2002-03-06 23:31:11 +00:00
UCHAR_ASCII_HEX_DIGIT ,
/** Binary property Bidi_Control.
Format controls which have specific functions
2003-02-11 01:59:32 +00:00
in the Bidi Algorithm . @ stable ICU 2.1 */
2002-03-06 23:31:11 +00:00
UCHAR_BIDI_CONTROL ,
/** Binary property Bidi_Mirrored.
Characters that may change display in RTL text .
Same as u_isMirrored .
2003-02-11 01:59:32 +00:00
See Bidi Algorithm , UTR 9. @ stable ICU 2.1 */
2002-03-06 23:31:11 +00:00
UCHAR_BIDI_MIRRORED ,
2003-02-11 01:59:32 +00:00
/** Binary property Dash. Variations of dashes. @stable ICU 2.1 */
2002-03-06 23:31:11 +00:00
UCHAR_DASH ,
/** Binary property Default_Ignorable_Code_Point (new in Unicode 3.2).
Ignorable in most processing .
2003-02-11 01:59:32 +00:00
< 2060. .206F , FFF0 . . FFFB , E0000 . . E0FFF > + Other_Default_Ignorable_Code_Point + ( Cf + Cc + Cs - White_Space ) @ stable ICU 2.1 */
2002-03-06 23:31:11 +00:00
UCHAR_DEFAULT_IGNORABLE_CODE_POINT ,
/** Binary property Deprecated (new in Unicode 3.2).
2003-02-11 01:59:32 +00:00
The usage of deprecated characters is strongly discouraged . @ stable ICU 2.1 */
2002-03-06 23:31:11 +00:00
UCHAR_DEPRECATED ,
/** Binary property Diacritic. Characters that linguistically modify
2003-02-11 01:59:32 +00:00
the meaning of another character to which they apply . @ stable ICU 2.1 */
2002-03-06 23:31:11 +00:00
UCHAR_DIACRITIC ,
/** Binary property Extender.
Extend the value or shape of a preceding alphabetic character ,
2003-02-11 01:59:32 +00:00
e . g . , length and iteration marks . @ stable ICU 2.1 */
2002-03-06 23:31:11 +00:00
UCHAR_EXTENDER ,
/** Binary property Full_Composition_Exclusion.
CompositionExclusions . txt + Singleton Decompositions +
2003-02-11 01:59:32 +00:00
Non - Starter Decompositions . @ stable ICU 2.1 */
2002-03-06 23:31:11 +00:00
UCHAR_FULL_COMPOSITION_EXCLUSION ,
/** Binary property Grapheme_Base (new in Unicode 3.2).
For programmatic determination of grapheme cluster boundaries .
2003-02-11 01:59:32 +00:00
[ 0. .10F FFF ] - Cc - Cf - Cs - Co - Cn - Zl - Zp - Grapheme_Link - Grapheme_Extend - CGJ @ stable ICU 2.1 */
2002-03-06 23:31:11 +00:00
UCHAR_GRAPHEME_BASE ,
/** Binary property Grapheme_Extend (new in Unicode 3.2).
For programmatic determination of grapheme cluster boundaries .
2003-02-11 01:59:32 +00:00
Me + Mn + Mc + Other_Grapheme_Extend - Grapheme_Link - CGJ @ stable ICU 2.1 */
2002-03-06 23:31:11 +00:00
UCHAR_GRAPHEME_EXTEND ,
/** Binary property Grapheme_Link (new in Unicode 3.2).
2003-02-11 01:59:32 +00:00
For programmatic determination of grapheme cluster boundaries . @ stable ICU 2.1 */
2002-03-06 23:31:11 +00:00
UCHAR_GRAPHEME_LINK ,
/** Binary property Hex_Digit.
2003-02-11 01:59:32 +00:00
Characters commonly used for hexadecimal numbers . @ stable ICU 2.1 */
2002-03-06 23:31:11 +00:00
UCHAR_HEX_DIGIT ,
/** Binary property Hyphen. Dashes used to mark connections
2003-02-11 01:59:32 +00:00
between pieces of words , plus the Katakana middle dot . @ stable ICU 2.1 */
2002-03-06 23:31:11 +00:00
UCHAR_HYPHEN ,
/** Binary property ID_Continue.
Characters that can continue an identifier .
2003-02-11 01:59:32 +00:00
DerivedCoreProperties . txt also says " NOTE: Cf characters should be filtered out. "
ID_Start + Mn + Mc + Nd + Pc @ stable ICU 2.1 */
2002-03-06 23:31:11 +00:00
UCHAR_ID_CONTINUE ,
/** Binary property ID_Start.
Characters that can start an identifier .
2003-02-11 01:59:32 +00:00
Lu + Ll + Lt + Lm + Lo + Nl @ stable ICU 2.1 */
2002-03-06 23:31:11 +00:00
UCHAR_ID_START ,
2002-03-12 19:09:08 +00:00
/** Binary property Ideographic.
2003-02-11 01:59:32 +00:00
CJKV ideographs . @ stable ICU 2.1 */
2002-03-06 23:31:11 +00:00
UCHAR_IDEOGRAPHIC ,
/** Binary property IDS_Binary_Operator (new in Unicode 3.2).
For programmatic determination of
2003-02-11 01:59:32 +00:00
Ideographic Description Sequences . @ stable ICU 2.1 */
2002-03-06 23:31:11 +00:00
UCHAR_IDS_BINARY_OPERATOR ,
/** Binary property IDS_Trinary_Operator (new in Unicode 3.2).
For programmatic determination of
2003-02-11 01:59:32 +00:00
Ideographic Description Sequences . @ stable ICU 2.1 */
2002-03-12 19:09:08 +00:00
UCHAR_IDS_TRINARY_OPERATOR ,
2002-03-06 23:31:11 +00:00
/** Binary property Join_Control.
2003-02-11 01:59:32 +00:00
Format controls for cursive joining and ligation . @ stable ICU 2.1 */
2002-03-06 23:31:11 +00:00
UCHAR_JOIN_CONTROL ,
/** Binary property Logical_Order_Exception (new in Unicode 3.2).
Characters that do not use logical order and
2003-02-11 01:59:32 +00:00
require special handling in most processing . @ stable ICU 2.1 */
2002-03-06 23:31:11 +00:00
UCHAR_LOGICAL_ORDER_EXCEPTION ,
/** Binary property Lowercase. Same as u_isULowercase, different from u_islower.
2003-02-11 01:59:32 +00:00
Ll + Other_Lowercase @ stable ICU 2.1 */
2002-03-06 23:31:11 +00:00
UCHAR_LOWERCASE ,
2003-02-11 01:59:32 +00:00
/** Binary property Math. Sm+Other_Math @stable ICU 2.1 */
2002-03-06 23:31:11 +00:00
UCHAR_MATH ,
/** Binary property Noncharacter_Code_Point.
Code points that are explicitly defined as illegal
2003-02-11 01:59:32 +00:00
for the encoding of characters . @ stable ICU 2.1 */
2002-03-06 23:31:11 +00:00
UCHAR_NONCHARACTER_CODE_POINT ,
2003-02-11 01:59:32 +00:00
/** Binary property Quotation_Mark. @stable ICU 2.1 */
2002-03-06 23:31:11 +00:00
UCHAR_QUOTATION_MARK ,
/** Binary property Radical (new in Unicode 3.2).
For programmatic determination of
2003-02-11 01:59:32 +00:00
Ideographic Description Sequences . @ stable ICU 2.1 */
2002-03-06 23:31:11 +00:00
UCHAR_RADICAL ,
/** Binary property Soft_Dotted (new in Unicode 3.2).
Characters with a " soft dot " , like i or j .
An accent placed on these characters causes
2003-02-11 01:59:32 +00:00
the dot to disappear . @ stable ICU 2.1 */
2002-03-06 23:31:11 +00:00
UCHAR_SOFT_DOTTED ,
/** Binary property Terminal_Punctuation.
Punctuation characters that generally mark
2003-02-11 01:59:32 +00:00
the end of textual units . @ stable ICU 2.1 */
2002-03-06 23:31:11 +00:00
UCHAR_TERMINAL_PUNCTUATION ,
/** Binary property Unified_Ideograph (new in Unicode 3.2).
For programmatic determination of
2003-02-11 01:59:32 +00:00
Ideographic Description Sequences . @ stable ICU 2.1 */
2002-03-06 23:31:11 +00:00
UCHAR_UNIFIED_IDEOGRAPH ,
/** Binary property Uppercase. Same as u_isUUppercase, different from u_isupper.
2003-02-11 01:59:32 +00:00
Lu + Other_Uppercase @ stable ICU 2.1 */
2002-03-06 23:31:11 +00:00
UCHAR_UPPERCASE ,
/** Binary property White_Space.
Same as u_isUWhiteSpace , different from u_isspace and u_isWhitespace .
2003-02-11 01:59:32 +00:00
Space characters + TAB + CR + LF - ZWSP - ZWNBSP @ stable ICU 2.1 */
2002-03-06 23:31:11 +00:00
UCHAR_WHITE_SPACE ,
/** Binary property XID_Continue.
ID_Continue modified to allow closure under
2003-02-11 01:59:32 +00:00
normalization forms NFKC and NFKD . @ stable ICU 2.1 */
2002-03-06 23:31:11 +00:00
UCHAR_XID_CONTINUE ,
/** Binary property XID_Start. ID_Start modified to allow
2003-02-11 01:59:32 +00:00
closure under normalization forms NFKC and NFKD . @ stable ICU 2.1 */
2002-03-06 23:31:11 +00:00
UCHAR_XID_START ,
2003-02-20 22:47:18 +00:00
/** Binary property Case_Sensitive. Either the source of a case
mapping or _in_ the target of a case mapping . Not the same as
the general category Cased_Letter . @ draft ICU 2.6 */
UCHAR_CASE_SENSITIVE ,
2003-02-11 01:59:32 +00:00
/** One more than the last constant for binary Unicode properties. @stable ICU 2.1 */
2002-07-04 00:38:51 +00:00
UCHAR_BINARY_LIMIT ,
/** Enumerated property Bidi_Class.
Same as u_charDirection , returns UCharDirection values . @ draft ICU 2.2 */
2002-11-22 19:36:55 +00:00
UCHAR_BIDI_CLASS = 0x1000 ,
/** First constant for enumerated/integer Unicode properties. @draft ICU 2.2 */
UCHAR_INT_START = UCHAR_BIDI_CLASS ,
2002-07-04 00:38:51 +00:00
/** Enumerated property Block.
Same as ublock_getCode , returns UBlockCode values . @ draft ICU 2.2 */
UCHAR_BLOCK ,
/** Enumerated property Canonical_Combining_Class.
Same as u_getCombiningClass , returns 8 - bit numeric values . @ draft ICU 2.2 */
UCHAR_CANONICAL_COMBINING_CLASS ,
2002-07-04 16:46:36 +00:00
/** Enumerated property Decomposition_Type.
Returns UDecompositionType values . @ draft ICU 2.2 */
UCHAR_DECOMPOSITION_TYPE ,
2002-07-04 00:38:51 +00:00
/** Enumerated property East_Asian_Width.
See http : //www.unicode.org/reports/tr11/
Returns UEastAsianWidth values . @ draft ICU 2.2 */
UCHAR_EAST_ASIAN_WIDTH ,
/** Enumerated property General_Category.
Same as u_charType , returns UCharCategory values . @ draft ICU 2.2 */
UCHAR_GENERAL_CATEGORY ,
2002-07-04 16:46:36 +00:00
/** Enumerated property Joining_Group.
Returns UJoiningGroup values . @ draft ICU 2.2 */
UCHAR_JOINING_GROUP ,
/** Enumerated property Joining_Type.
Returns UJoiningType values . @ draft ICU 2.2 */
UCHAR_JOINING_TYPE ,
/** Enumerated property Line_Break.
Returns ULineBreak values . @ draft ICU 2.2 */
UCHAR_LINE_BREAK ,
2002-07-04 00:38:51 +00:00
/** Enumerated property Numeric_Type.
Returns UNumericType values . @ draft ICU 2.2 */
UCHAR_NUMERIC_TYPE ,
/** Enumerated property Script.
Same as uscript_getScript , returns UScriptCode values . @ draft ICU 2.2 */
UCHAR_SCRIPT ,
2003-03-08 02:00:06 +00:00
/** Enumerated property Hangul_Syllable_Type, new in Unicode 4.
Returns UHangulSyllableType values . @ draft ICU 2.6 */
UCHAR_HANGUL_SYLLABLE_TYPE ,
2002-07-04 00:38:51 +00:00
/** One more than the last constant for enumerated/integer Unicode properties. @draft ICU 2.2 */
2002-10-30 18:24:53 +00:00
UCHAR_INT_LIMIT ,
2003-01-22 17:57:04 +00:00
/** Bitmask property General_Category_Mask.
This is the General_Category property returned as a bit mask .
2002-12-10 00:33:45 +00:00
When used in u_getIntPropertyValue ( c ) , same as U_MASK ( u_charType ( c ) ) ,
returns bit masks for UCharCategory values where exactly one bit is set .
When used with u_getPropertyValueName ( ) and u_getPropertyValueEnum ( ) ,
a multi - bit mask is used for sets of categories like " Letters " .
Mask values should be cast to uint32_t .
@ draft ICU 2.4 */
UCHAR_GENERAL_CATEGORY_MASK = 0x2000 ,
/** First constant for bit-mask Unicode properties. @draft ICU 2.4 */
UCHAR_MASK_START = UCHAR_GENERAL_CATEGORY_MASK ,
/** One more than the last constant for bit-mask Unicode properties. @draft ICU 2.4 */
UCHAR_MASK_LIMIT ,
2002-10-30 18:24:53 +00:00
/** Double property Numeric_Value.
Corresponds to u_getNumericValue . @ draft ICU 2.4 */
2002-12-10 00:33:45 +00:00
UCHAR_NUMERIC_VALUE = 0x3000 ,
2002-11-22 19:36:55 +00:00
/** First constant for double Unicode properties. @draft ICU 2.4 */
UCHAR_DOUBLE_START = UCHAR_NUMERIC_VALUE ,
2002-10-30 18:24:53 +00:00
/** One more than the last constant for double Unicode properties. @draft ICU 2.4 */
UCHAR_DOUBLE_LIMIT ,
/** String property Age.
Corresponds to u_charAge . @ draft ICU 2.4 */
2002-12-10 00:33:45 +00:00
UCHAR_AGE = 0x4000 ,
2002-11-22 19:36:55 +00:00
/** First constant for string Unicode properties. @draft ICU 2.4 */
UCHAR_STRING_START = UCHAR_AGE ,
2002-10-30 18:24:53 +00:00
/** String property Bidi_Mirroring_Glyph.
Corresponds to u_charMirror . @ draft ICU 2.4 */
UCHAR_BIDI_MIRRORING_GLYPH ,
/** String property Case_Folding.
Corresponds to u_strFoldCase in ustring . h . @ draft ICU 2.4 */
UCHAR_CASE_FOLDING ,
/** String property ISO_Comment.
Corresponds to u_getISOComment . @ draft ICU 2.4 */
UCHAR_ISO_COMMENT ,
/** String property Lowercase_Mapping.
Corresponds to u_strToLower in ustring . h . @ draft ICU 2.4 */
UCHAR_LOWERCASE_MAPPING ,
/** String property Name.
Corresponds to u_charName . @ draft ICU 2.4 */
UCHAR_NAME ,
/** String property Simple_Case_Folding.
Corresponds to u_foldCase . @ draft ICU 2.4 */
UCHAR_SIMPLE_CASE_FOLDING ,
/** String property Simple_Lowercase_Mapping.
Corresponds to u_tolower . @ draft ICU 2.4 */
UCHAR_SIMPLE_LOWERCASE_MAPPING ,
/** String property Simple_Titlecase_Mapping.
Corresponds to u_totitle . @ draft ICU 2.4 */
UCHAR_SIMPLE_TITLECASE_MAPPING ,
/** String property Simple_Uppercase_Mapping.
Corresponds to u_toupper . @ draft ICU 2.4 */
UCHAR_SIMPLE_UPPERCASE_MAPPING ,
/** String property Titlecase_Mapping.
Corresponds to u_strToTitle in ustring . h . @ draft ICU 2.4 */
UCHAR_TITLECASE_MAPPING ,
/** String property Unicode_1_Name.
Corresponds to u_charName . @ draft ICU 2.4 */
UCHAR_UNICODE_1_NAME ,
/** String property Uppercase_Mapping.
Corresponds to u_strToUpper in ustring . h . @ draft ICU 2.4 */
UCHAR_UPPERCASE_MAPPING ,
/** One more than the last constant for string Unicode properties. @draft ICU 2.4 */
UCHAR_STRING_LIMIT ,
/** Represents a nonexistent or invalid property or property value. @draft ICU 2.4 */
UCHAR_INVALID_CODE = - 1
2002-08-21 19:12:24 +00:00
} UProperty ;
2002-03-06 23:31:11 +00:00
2001-10-11 22:11:38 +00:00
/**
* Data for enumerated Unicode general category types .
* See http : //www.unicode.org/Public/UNIDATA/UnicodeData.html .
2002-12-04 23:39:56 +00:00
* @ stable ICU 2.0
1999-12-28 23:39:02 +00:00
*/
2002-08-21 19:12:24 +00:00
typedef enum UCharCategory
1999-12-28 23:39:02 +00:00
{
2002-10-30 18:24:53 +00:00
/** See note !!. Comments of the form "Cn" are read by genpname. */
2002-12-05 00:30:16 +00:00
/** Non-category for unassigned and non-character code points. @stable ICU 2.0 */
2001-03-21 20:44:20 +00:00
U_UNASSIGNED = 0 ,
2002-12-05 00:30:16 +00:00
/** Cn "Other, Not Assigned (no characters in [UnicodeData.txt] have this property)" (same as U_UNASSIGNED!) @stable ICU 2.0 */
2001-10-11 22:11:38 +00:00
U_GENERAL_OTHER_TYPES = 0 ,
2002-12-05 00:30:16 +00:00
/** Lu @stable ICU 2.0 */
1999-12-28 23:39:02 +00:00
U_UPPERCASE_LETTER = 1 ,
2002-12-05 00:30:16 +00:00
/** Ll @stable ICU 2.0 */
1999-12-28 23:39:02 +00:00
U_LOWERCASE_LETTER = 2 ,
2002-12-05 00:30:16 +00:00
/** Lt @stable ICU 2.0 */
1999-12-28 23:39:02 +00:00
U_TITLECASE_LETTER = 3 ,
2002-12-05 00:30:16 +00:00
/** Lm @stable ICU 2.0 */
1999-12-28 23:39:02 +00:00
U_MODIFIER_LETTER = 4 ,
2002-12-05 00:30:16 +00:00
/** Lo @stable ICU 2.0 */
1999-12-28 23:39:02 +00:00
U_OTHER_LETTER = 5 ,
2002-12-05 00:30:16 +00:00
/** Mn @stable ICU 2.0 */
1999-12-28 23:39:02 +00:00
U_NON_SPACING_MARK = 6 ,
2002-12-05 00:30:16 +00:00
/** Me @stable ICU 2.0 */
1999-12-28 23:39:02 +00:00
U_ENCLOSING_MARK = 7 ,
2002-12-05 00:30:16 +00:00
/** Mc @stable ICU 2.0 */
1999-12-28 23:39:02 +00:00
U_COMBINING_SPACING_MARK = 8 ,
2002-12-05 00:30:16 +00:00
/** Nd @stable ICU 2.0 */
1999-12-28 23:39:02 +00:00
U_DECIMAL_DIGIT_NUMBER = 9 ,
2002-12-05 00:30:16 +00:00
/** Nl @stable ICU 2.0 */
1999-12-28 23:39:02 +00:00
U_LETTER_NUMBER = 10 ,
2002-12-05 00:30:16 +00:00
/** No @stable ICU 2.0 */
1999-12-28 23:39:02 +00:00
U_OTHER_NUMBER = 11 ,
2002-12-05 00:30:16 +00:00
/** Zs @stable ICU 2.0 */
1999-12-28 23:39:02 +00:00
U_SPACE_SEPARATOR = 12 ,
2002-12-05 00:30:16 +00:00
/** Zl @stable ICU 2.0 */
1999-12-28 23:39:02 +00:00
U_LINE_SEPARATOR = 13 ,
2002-12-05 00:30:16 +00:00
/** Zp @stable ICU 2.0 */
1999-12-28 23:39:02 +00:00
U_PARAGRAPH_SEPARATOR = 14 ,
2002-12-05 00:30:16 +00:00
/** Cc @stable ICU 2.0 */
1999-12-28 23:39:02 +00:00
U_CONTROL_CHAR = 15 ,
2002-12-05 00:30:16 +00:00
/** Cf @stable ICU 2.0 */
1999-12-28 23:39:02 +00:00
U_FORMAT_CHAR = 16 ,
2002-12-05 00:30:16 +00:00
/** Co @stable ICU 2.0 */
1999-12-28 23:39:02 +00:00
U_PRIVATE_USE_CHAR = 17 ,
2002-12-05 00:30:16 +00:00
/** Cs @stable ICU 2.0 */
1999-12-28 23:39:02 +00:00
U_SURROGATE = 18 ,
2002-12-05 00:30:16 +00:00
/** Pd @stable ICU 2.0 */
1999-12-28 23:39:02 +00:00
U_DASH_PUNCTUATION = 19 ,
2002-12-05 00:30:16 +00:00
/** Ps @stable ICU 2.0 */
1999-12-28 23:39:02 +00:00
U_START_PUNCTUATION = 20 ,
2002-12-05 00:30:16 +00:00
/** Pe @stable ICU 2.0 */
1999-12-28 23:39:02 +00:00
U_END_PUNCTUATION = 21 ,
2002-12-05 00:30:16 +00:00
/** Pc @stable ICU 2.0 */
1999-12-28 23:39:02 +00:00
U_CONNECTOR_PUNCTUATION = 22 ,
2002-12-05 00:30:16 +00:00
/** Po @stable ICU 2.0 */
1999-12-28 23:39:02 +00:00
U_OTHER_PUNCTUATION = 23 ,
2002-12-05 00:30:16 +00:00
/** Sm @stable ICU 2.0 */
1999-12-28 23:39:02 +00:00
U_MATH_SYMBOL = 24 ,
2002-12-05 00:30:16 +00:00
/** Sc @stable ICU 2.0 */
1999-12-28 23:39:02 +00:00
U_CURRENCY_SYMBOL = 25 ,
2002-12-05 00:30:16 +00:00
/** Sk @stable ICU 2.0 */
1999-12-28 23:39:02 +00:00
U_MODIFIER_SYMBOL = 26 ,
2002-12-05 00:30:16 +00:00
/** So @stable ICU 2.0 */
1999-12-28 23:39:02 +00:00
U_OTHER_SYMBOL = 27 ,
2002-12-05 00:30:16 +00:00
/** Pi @stable ICU 2.0 */
1999-12-28 23:39:02 +00:00
U_INITIAL_PUNCTUATION = 28 ,
2002-12-05 00:30:16 +00:00
/** Pf @stable ICU 2.0 */
1999-12-28 23:39:02 +00:00
U_FINAL_PUNCTUATION = 29 ,
2002-12-05 00:30:16 +00:00
/** One higher than the last enum UCharCategory constant. @stable ICU 2.0 */
1999-12-28 23:39:02 +00:00
U_CHAR_CATEGORY_COUNT
2002-08-21 19:12:24 +00:00
} UCharCategory ;
2001-11-13 22:47:47 +00:00
2002-03-13 23:31:12 +00:00
/**
* U_GC_XX_MASK constants are bit flags corresponding to Unicode
* general category values .
* For each category , the nth bit is set if the numeric value of the
* corresponding UCharCategory constant is n .
*
* There are also some U_GC_Y_MASK constants for groups of general categories
2002-12-11 22:49:18 +00:00
* like L for all letter categories .
2002-03-13 23:31:12 +00:00
*
* @ see u_charType
* @ see U_GET_GC_MASK
* @ see UCharCategory
2003-02-11 01:59:32 +00:00
* @ stable ICU 2.1
2002-03-13 23:31:12 +00:00
*/
# define U_GC_CN_MASK U_MASK(U_GENERAL_OTHER_TYPES)
2003-02-11 01:59:32 +00:00
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
2002-03-13 23:31:12 +00:00
# define U_GC_LU_MASK U_MASK(U_UPPERCASE_LETTER)
2003-02-11 01:59:32 +00:00
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
2002-03-13 23:31:12 +00:00
# define U_GC_LL_MASK U_MASK(U_LOWERCASE_LETTER)
2003-02-11 01:59:32 +00:00
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
2002-03-13 23:31:12 +00:00
# define U_GC_LT_MASK U_MASK(U_TITLECASE_LETTER)
2003-02-11 01:59:32 +00:00
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
2002-03-13 23:31:12 +00:00
# define U_GC_LM_MASK U_MASK(U_MODIFIER_LETTER)
2003-02-11 01:59:32 +00:00
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
2002-03-13 23:31:12 +00:00
# define U_GC_LO_MASK U_MASK(U_OTHER_LETTER)
2003-02-11 01:59:32 +00:00
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
2002-03-13 23:31:12 +00:00
# define U_GC_MN_MASK U_MASK(U_NON_SPACING_MARK)
2003-02-11 01:59:32 +00:00
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
2002-03-13 23:31:12 +00:00
# define U_GC_ME_MASK U_MASK(U_ENCLOSING_MARK)
2003-02-11 01:59:32 +00:00
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
2002-03-13 23:31:12 +00:00
# define U_GC_MC_MASK U_MASK(U_COMBINING_SPACING_MARK)
2003-02-11 01:59:32 +00:00
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
2002-03-13 23:31:12 +00:00
# define U_GC_ND_MASK U_MASK(U_DECIMAL_DIGIT_NUMBER)
2003-02-11 01:59:32 +00:00
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
2002-03-13 23:31:12 +00:00
# define U_GC_NL_MASK U_MASK(U_LETTER_NUMBER)
2003-02-11 01:59:32 +00:00
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
2002-03-13 23:31:12 +00:00
# define U_GC_NO_MASK U_MASK(U_OTHER_NUMBER)
2003-02-11 01:59:32 +00:00
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
2002-03-13 23:31:12 +00:00
# define U_GC_ZS_MASK U_MASK(U_SPACE_SEPARATOR)
2003-02-11 01:59:32 +00:00
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
2002-03-13 23:31:12 +00:00
# define U_GC_ZL_MASK U_MASK(U_LINE_SEPARATOR)
2003-02-11 01:59:32 +00:00
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
2002-03-13 23:31:12 +00:00
# define U_GC_ZP_MASK U_MASK(U_PARAGRAPH_SEPARATOR)
2003-02-11 01:59:32 +00:00
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
2002-03-13 23:31:12 +00:00
# define U_GC_CC_MASK U_MASK(U_CONTROL_CHAR)
2003-02-11 01:59:32 +00:00
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
2002-03-13 23:31:12 +00:00
# define U_GC_CF_MASK U_MASK(U_FORMAT_CHAR)
2003-02-11 01:59:32 +00:00
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
2002-03-13 23:31:12 +00:00
# define U_GC_CO_MASK U_MASK(U_PRIVATE_USE_CHAR)
2003-02-11 01:59:32 +00:00
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
2002-03-13 23:31:12 +00:00
# define U_GC_CS_MASK U_MASK(U_SURROGATE)
2003-02-11 01:59:32 +00:00
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
2002-03-13 23:31:12 +00:00
# define U_GC_PD_MASK U_MASK(U_DASH_PUNCTUATION)
2003-02-11 01:59:32 +00:00
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
2002-03-13 23:31:12 +00:00
# define U_GC_PS_MASK U_MASK(U_START_PUNCTUATION)
2003-02-11 01:59:32 +00:00
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
2002-03-13 23:31:12 +00:00
# define U_GC_PE_MASK U_MASK(U_END_PUNCTUATION)
2003-02-11 01:59:32 +00:00
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
2002-03-13 23:31:12 +00:00
# define U_GC_PC_MASK U_MASK(U_CONNECTOR_PUNCTUATION)
2003-02-11 01:59:32 +00:00
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
2002-03-13 23:31:12 +00:00
# define U_GC_PO_MASK U_MASK(U_OTHER_PUNCTUATION)
2003-02-11 01:59:32 +00:00
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
2002-03-13 23:31:12 +00:00
# define U_GC_SM_MASK U_MASK(U_MATH_SYMBOL)
2003-02-11 01:59:32 +00:00
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
2002-03-13 23:31:12 +00:00
# define U_GC_SC_MASK U_MASK(U_CURRENCY_SYMBOL)
2003-02-11 01:59:32 +00:00
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
2002-03-13 23:31:12 +00:00
# define U_GC_SK_MASK U_MASK(U_MODIFIER_SYMBOL)
2003-02-11 01:59:32 +00:00
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
2002-03-13 23:31:12 +00:00
# define U_GC_SO_MASK U_MASK(U_OTHER_SYMBOL)
2003-02-11 01:59:32 +00:00
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
2002-03-13 23:31:12 +00:00
# define U_GC_PI_MASK U_MASK(U_INITIAL_PUNCTUATION)
2003-02-11 01:59:32 +00:00
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
2002-03-13 23:31:12 +00:00
# define U_GC_PF_MASK U_MASK(U_FINAL_PUNCTUATION)
2003-02-11 01:59:32 +00:00
/** Mask constant for multiple UCharCategory bits (L Letters). @stable ICU 2.1 */
2002-03-13 23:31:12 +00:00
# define U_GC_L_MASK \
( U_GC_LU_MASK | U_GC_LL_MASK | U_GC_LT_MASK | U_GC_LM_MASK | U_GC_LO_MASK )
2003-02-11 01:59:32 +00:00
/** Mask constant for multiple UCharCategory bits (LC Cased Letters). @stable ICU 2.1 */
2002-10-30 18:24:53 +00:00
# define U_GC_LC_MASK \
( U_GC_LU_MASK | U_GC_LL_MASK | U_GC_LT_MASK )
2003-02-11 01:59:32 +00:00
/** Mask constant for multiple UCharCategory bits (M Marks). @stable ICU 2.1 */
2002-03-13 23:31:12 +00:00
# define U_GC_M_MASK (U_GC_MN_MASK|U_GC_ME_MASK|U_GC_MC_MASK)
2003-02-11 01:59:32 +00:00
/** Mask constant for multiple UCharCategory bits (N Numbers). @stable ICU 2.1 */
2002-03-13 23:31:12 +00:00
# define U_GC_N_MASK (U_GC_ND_MASK|U_GC_NL_MASK|U_GC_NO_MASK)
2003-02-11 01:59:32 +00:00
/** Mask constant for multiple UCharCategory bits (Z Separators). @stable ICU 2.1 */
2002-03-13 23:31:12 +00:00
# define U_GC_Z_MASK (U_GC_ZS_MASK|U_GC_ZL_MASK|U_GC_ZP_MASK)
2003-02-11 01:59:32 +00:00
/** Mask constant for multiple UCharCategory bits (C Others). @stable ICU 2.1 */
2002-03-13 23:31:12 +00:00
# define U_GC_C_MASK \
( U_GC_CN_MASK | U_GC_CC_MASK | U_GC_CF_MASK | U_GC_CO_MASK | U_GC_CS_MASK )
2003-02-11 01:59:32 +00:00
/** Mask constant for multiple UCharCategory bits (P Punctuation). @stable ICU 2.1 */
2002-03-13 23:31:12 +00:00
# define U_GC_P_MASK \
( U_GC_PD_MASK | U_GC_PS_MASK | U_GC_PE_MASK | U_GC_PC_MASK | U_GC_PO_MASK | \
U_GC_PI_MASK | U_GC_PF_MASK )
2003-02-11 01:59:32 +00:00
/** Mask constant for multiple UCharCategory bits (S Symbols). @stable ICU 2.1 */
2002-03-13 23:31:12 +00:00
# define U_GC_S_MASK (U_GC_SM_MASK|U_GC_SC_MASK|U_GC_SK_MASK|U_GC_SO_MASK)
1999-12-28 23:39:02 +00:00
/**
* This specifies the language directional property of a character set .
2002-12-04 23:39:56 +00:00
* @ stable ICU 2.0
1999-12-28 23:39:02 +00:00
*/
2003-02-11 23:27:40 +00:00
typedef enum UCharDirection {
2002-10-30 18:24:53 +00:00
/** See note !!. Comments of the form "EN" are read by genpname. */
2002-12-05 00:30:16 +00:00
/** L @stable ICU 2.0 */
2003-02-11 23:27:40 +00:00
U_LEFT_TO_RIGHT = 0 ,
2002-12-05 00:30:16 +00:00
/** R @stable ICU 2.0 */
2003-02-11 23:27:40 +00:00
U_RIGHT_TO_LEFT = 1 ,
2002-12-05 00:30:16 +00:00
/** EN @stable ICU 2.0 */
1999-12-28 23:39:02 +00:00
U_EUROPEAN_NUMBER = 2 ,
2002-12-05 00:30:16 +00:00
/** ES @stable ICU 2.0 */
1999-12-28 23:39:02 +00:00
U_EUROPEAN_NUMBER_SEPARATOR = 3 ,
2002-12-05 00:30:16 +00:00
/** ET @stable ICU 2.0 */
1999-12-28 23:39:02 +00:00
U_EUROPEAN_NUMBER_TERMINATOR = 4 ,
2002-12-05 00:30:16 +00:00
/** AN @stable ICU 2.0 */
1999-12-28 23:39:02 +00:00
U_ARABIC_NUMBER = 5 ,
2002-12-05 00:30:16 +00:00
/** CS @stable ICU 2.0 */
1999-12-28 23:39:02 +00:00
U_COMMON_NUMBER_SEPARATOR = 6 ,
2002-12-05 00:30:16 +00:00
/** B @stable ICU 2.0 */
1999-12-28 23:39:02 +00:00
U_BLOCK_SEPARATOR = 7 ,
2002-12-05 00:30:16 +00:00
/** S @stable ICU 2.0 */
1999-12-28 23:39:02 +00:00
U_SEGMENT_SEPARATOR = 8 ,
2002-12-05 00:30:16 +00:00
/** WS @stable ICU 2.0 */
2003-02-11 23:27:40 +00:00
U_WHITE_SPACE_NEUTRAL = 9 ,
2002-12-05 00:30:16 +00:00
/** ON @stable ICU 2.0 */
2003-02-11 23:27:40 +00:00
U_OTHER_NEUTRAL = 10 ,
2002-12-05 00:30:16 +00:00
/** LRE @stable ICU 2.0 */
1999-12-28 23:39:02 +00:00
U_LEFT_TO_RIGHT_EMBEDDING = 11 ,
2002-12-05 00:30:16 +00:00
/** LRO @stable ICU 2.0 */
1999-12-28 23:39:02 +00:00
U_LEFT_TO_RIGHT_OVERRIDE = 12 ,
2002-12-05 00:30:16 +00:00
/** AL @stable ICU 2.0 */
1999-12-28 23:39:02 +00:00
U_RIGHT_TO_LEFT_ARABIC = 13 ,
2002-12-05 00:30:16 +00:00
/** RLE @stable ICU 2.0 */
1999-12-28 23:39:02 +00:00
U_RIGHT_TO_LEFT_EMBEDDING = 14 ,
2002-12-05 00:30:16 +00:00
/** RLO @stable ICU 2.0 */
1999-12-28 23:39:02 +00:00
U_RIGHT_TO_LEFT_OVERRIDE = 15 ,
2002-12-05 00:30:16 +00:00
/** PDF @stable ICU 2.0 */
1999-12-28 23:39:02 +00:00
U_POP_DIRECTIONAL_FORMAT = 16 ,
2002-12-05 00:30:16 +00:00
/** NSM @stable ICU 2.0 */
1999-12-28 23:39:02 +00:00
U_DIR_NON_SPACING_MARK = 17 ,
2002-12-05 00:30:16 +00:00
/** BN @stable ICU 2.0 */
1999-12-28 23:39:02 +00:00
U_BOUNDARY_NEUTRAL = 18 ,
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
1999-12-28 23:39:02 +00:00
U_CHAR_DIRECTION_COUNT
2002-08-22 17:59:47 +00:00
} UCharDirection ;
1999-12-28 23:39:02 +00:00
2000-11-29 03:50:58 +00:00
/**
2003-02-11 01:59:32 +00:00
* Constants for Unicode blocks , see the Unicode Data file Blocks . txt
2002-12-04 23:39:56 +00:00
* @ stable ICU 2.0
2000-03-22 18:31:40 +00:00
*/
2001-09-11 05:00:34 +00:00
enum UBlockCode {
2003-03-04 00:42:54 +00:00
/** New No_Block value in Unicode 4. @draft ICU 2.6 */
2003-03-04 19:55:37 +00:00
UBLOCK_NO_BLOCK = 0 , /*[none]*/ /* Special range indicating No_Block */
2003-03-04 00:42:54 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_BASIC_LATIN = 1 , /*[0000]*/ /*See note !!*/
2001-11-13 22:47:47 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_LATIN_1_SUPPLEMENT = 2 , /*[0080]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_LATIN_EXTENDED_A = 3 , /*[0100]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_LATIN_EXTENDED_B = 4 , /*[0180]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_IPA_EXTENSIONS = 5 , /*[0250]*/
2003-02-11 23:27:40 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_SPACING_MODIFIER_LETTERS = 6 , /*[02B0]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_COMBINING_DIACRITICAL_MARKS = 7 , /*[0300]*/
2003-02-11 23:27:40 +00:00
2002-05-31 01:25:20 +00:00
/**
* Unicode 3.2 renames this block to " Greek and Coptic " .
2002-12-04 23:39:56 +00:00
* @ stable ICU 2.0
2002-05-31 01:25:20 +00:00
*/
2002-10-30 18:24:53 +00:00
UBLOCK_GREEK = 8 , /*[0370]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_CYRILLIC = 9 , /*[0400]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_ARMENIAN = 10 , /*[0530]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_HEBREW = 11 , /*[0590]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_ARABIC = 12 , /*[0600]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_SYRIAC = 13 , /*[0700]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_THAANA = 14 , /*[0780]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_DEVANAGARI = 15 , /*[0900]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_BENGALI = 16 , /*[0980]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_GURMUKHI = 17 , /*[0A00]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_GUJARATI = 18 , /*[0A80]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_ORIYA = 19 , /*[0B00]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_TAMIL = 20 , /*[0B80]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_TELUGU = 21 , /*[0C00]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_KANNADA = 22 , /*[0C80]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_MALAYALAM = 23 , /*[0D00]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_SINHALA = 24 , /*[0D80]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_THAI = 25 , /*[0E00]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_LAO = 26 , /*[0E80]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_TIBETAN = 27 , /*[0F00]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_MYANMAR = 28 , /*[1000]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_GEORGIAN = 29 , /*[10A0]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_HANGUL_JAMO = 30 , /*[1100]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_ETHIOPIC = 31 , /*[1200]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_CHEROKEE = 32 , /*[13A0]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = 33 , /*[1400]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_OGHAM = 34 , /*[1680]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_RUNIC = 35 , /*[16A0]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_KHMER = 36 , /*[1780]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_MONGOLIAN = 37 , /*[1800]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_LATIN_EXTENDED_ADDITIONAL = 38 , /*[1E00]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_GREEK_EXTENDED = 39 , /*[1F00]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_GENERAL_PUNCTUATION = 40 , /*[2000]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_SUPERSCRIPTS_AND_SUBSCRIPTS = 41 , /*[2070]*/
2003-02-11 23:27:40 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_CURRENCY_SYMBOLS = 42 , /*[20A0]*/
2003-02-11 23:27:40 +00:00
2002-05-31 01:25:20 +00:00
/**
* Unicode 3.2 renames this block to " Combining Diacritical Marks for Symbols " .
2002-12-04 23:39:56 +00:00
* @ stable ICU 2.0
2002-05-31 01:25:20 +00:00
*/
2002-10-30 18:24:53 +00:00
UBLOCK_COMBINING_MARKS_FOR_SYMBOLS = 43 , /*[20D0]*/
2003-02-11 23:27:40 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_LETTERLIKE_SYMBOLS = 44 , /*[2100]*/
2003-02-11 23:27:40 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_NUMBER_FORMS = 45 , /*[2150]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_ARROWS = 46 , /*[2190]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_MATHEMATICAL_OPERATORS = 47 , /*[2200]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_MISCELLANEOUS_TECHNICAL = 48 , /*[2300]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_CONTROL_PICTURES = 49 , /*[2400]*/
2003-02-11 23:27:40 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_OPTICAL_CHARACTER_RECOGNITION = 50 , /*[2440]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_ENCLOSED_ALPHANUMERICS = 51 , /*[2460]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_BOX_DRAWING = 52 , /*[2500]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_BLOCK_ELEMENTS = 53 , /*[2580]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_GEOMETRIC_SHAPES = 54 , /*[25A0]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_MISCELLANEOUS_SYMBOLS = 55 , /*[2600]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_DINGBATS = 56 , /*[2700]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_BRAILLE_PATTERNS = 57 , /*[2800]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_CJK_RADICALS_SUPPLEMENT = 58 , /*[2E80]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_KANGXI_RADICALS = 59 , /*[2F00]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_IDEOGRAPHIC_DESCRIPTION_CHARACTERS = 60 , /*[2FF0]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION = 61 , /*[3000]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_HIRAGANA = 62 , /*[3040]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_KATAKANA = 63 , /*[30A0]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_BOPOMOFO = 64 , /*[3100]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_HANGUL_COMPATIBILITY_JAMO = 65 , /*[3130]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_KANBUN = 66 , /*[3190]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_BOPOMOFO_EXTENDED = 67 , /*[31A0]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_ENCLOSED_CJK_LETTERS_AND_MONTHS = 68 , /*[3200]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_CJK_COMPATIBILITY = 69 , /*[3300]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = 70 , /*[3400]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_CJK_UNIFIED_IDEOGRAPHS = 71 , /*[4E00]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_YI_SYLLABLES = 72 , /*[A000]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_YI_RADICALS = 73 , /*[A490]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_HANGUL_SYLLABLES = 74 , /*[AC00]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_HIGH_SURROGATES = 75 , /*[D800]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_HIGH_PRIVATE_USE_SURROGATES = 76 , /*[DB80]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_LOW_SURROGATES = 77 , /*[DC00]*/
2001-09-11 05:00:34 +00:00
2002-05-31 01:25:20 +00:00
/**
2002-07-18 19:08:43 +00:00
* Same as UBLOCK_PRIVATE_USE_AREA .
* Until Unicode 3.1 .1 , the corresponding block name was " Private Use " ,
* and multiple code point ranges had this block .
* Unicode 3.2 renames the block for the BMP PUA to " Private Use Area " and
* adds separate blocks for the supplementary PUAs .
*
2002-12-04 23:39:56 +00:00
* @ stable ICU 2.0
2002-05-31 01:25:20 +00:00
*/
2002-03-02 02:30:03 +00:00
UBLOCK_PRIVATE_USE = 78 ,
2002-07-18 19:08:43 +00:00
/**
* Same as UBLOCK_PRIVATE_USE .
* Until Unicode 3.1 .1 , the corresponding block name was " Private Use " ,
* and multiple code point ranges had this block .
* Unicode 3.2 renames the block for the BMP PUA to " Private Use Area " and
* adds separate blocks for the supplementary PUAs .
*
2002-12-04 23:39:56 +00:00
* @ stable ICU 2.0
2002-07-18 19:08:43 +00:00
*/
2002-10-30 18:24:53 +00:00
UBLOCK_PRIVATE_USE_AREA = UBLOCK_PRIVATE_USE , /*[E000]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS = 79 , /*[F900]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_ALPHABETIC_PRESENTATION_FORMS = 80 , /*[FB00]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_ARABIC_PRESENTATION_FORMS_A = 81 , /*[FB50]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_COMBINING_HALF_MARKS = 82 , /*[FE20]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_CJK_COMPATIBILITY_FORMS = 83 , /*[FE30]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_SMALL_FORM_VARIANTS = 84 , /*[FE50]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_ARABIC_PRESENTATION_FORMS_B = 85 , /*[FE70]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_SPECIALS = 86 , /*[FFF0]*/
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS = 87 , /*[FF00]*/
2003-02-11 23:27:40 +00:00
2003-03-04 00:42:54 +00:00
/* New blocks in Unicode 3.1 */
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_OLD_ITALIC = 88 , /*[10300]*/
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_GOTHIC = 89 , /*[10330]*/
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_DESERET = 90 , /*[10400]*/
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_BYZANTINE_MUSICAL_SYMBOLS = 91 , /*[1D000]*/
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_MUSICAL_SYMBOLS = 92 , /*[1D100]*/
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_MATHEMATICAL_ALPHANUMERIC_SYMBOLS = 93 , /*[1D400]*/
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = 94 , /*[20000]*/
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = 95 , /*[2F800]*/
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-10-30 18:24:53 +00:00
UBLOCK_TAGS = 96 , /*[E0000]*/
2002-05-31 01:25:20 +00:00
/* New blocks in Unicode 3.2 */
/** @draft ICU 2.2 */
2002-10-30 18:24:53 +00:00
UBLOCK_CYRILLIC_SUPPLEMENTARY = 97 , /*[0500]*/
2002-05-31 01:25:20 +00:00
/** @draft ICU 2.2 */
2002-10-30 18:24:53 +00:00
UBLOCK_TAGALOG = 98 , /*[1700]*/
2002-05-31 01:25:20 +00:00
/** @draft ICU 2.2 */
2002-10-30 18:24:53 +00:00
UBLOCK_HANUNOO = 99 , /*[1720]*/
2002-05-31 01:25:20 +00:00
/** @draft ICU 2.2 */
2002-10-30 18:24:53 +00:00
UBLOCK_BUHID = 100 , /*[1740]*/
2002-05-31 01:25:20 +00:00
/** @draft ICU 2.2 */
2002-10-30 18:24:53 +00:00
UBLOCK_TAGBANWA = 101 , /*[1760]*/
2002-05-31 01:25:20 +00:00
/** @draft ICU 2.2 */
2002-10-30 18:24:53 +00:00
UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = 102 , /*[27C0]*/
2002-05-31 01:25:20 +00:00
/** @draft ICU 2.2 */
2002-10-30 18:24:53 +00:00
UBLOCK_SUPPLEMENTAL_ARROWS_A = 103 , /*[27F0]*/
2002-05-31 01:25:20 +00:00
/** @draft ICU 2.2 */
2002-10-30 18:24:53 +00:00
UBLOCK_SUPPLEMENTAL_ARROWS_B = 104 , /*[2900]*/
2002-05-31 01:25:20 +00:00
/** @draft ICU 2.2 */
2002-10-30 18:24:53 +00:00
UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = 105 , /*[2980]*/
2002-05-31 01:25:20 +00:00
/** @draft ICU 2.2 */
2002-10-30 18:24:53 +00:00
UBLOCK_SUPPLEMENTAL_MATHEMATICAL_OPERATORS = 106 , /*[2A00]*/
2002-05-31 01:25:20 +00:00
/** @draft ICU 2.2 */
2002-10-30 18:24:53 +00:00
UBLOCK_KATAKANA_PHONETIC_EXTENSIONS = 107 , /*[31F0]*/
2002-05-31 01:25:20 +00:00
/** @draft ICU 2.2 */
2002-10-30 18:24:53 +00:00
UBLOCK_VARIATION_SELECTORS = 108 , /*[FE00]*/
2002-05-31 01:25:20 +00:00
/** @draft ICU 2.2 */
2002-10-30 18:24:53 +00:00
UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_A = 109 , /*[F0000]*/
2002-05-31 01:25:20 +00:00
/** @draft ICU 2.2 */
2002-10-30 18:24:53 +00:00
UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B = 110 , /*[100000]*/
2002-05-31 01:25:20 +00:00
2003-03-04 00:42:54 +00:00
/* New blocks in Unicode 4 */
/** @draft ICU 2.6 */
UBLOCK_LIMBU = 111 , /*[1900]*/
/** @draft ICU 2.6 */
UBLOCK_TAI_LE = 112 , /*[1950]*/
/** @draft ICU 2.6 */
UBLOCK_KHMER_SYMBOLS = 113 , /*[19E0]*/
/** @draft ICU 2.6 */
UBLOCK_PHONETIC_EXTENSIONS = 114 , /*[1D00]*/
/** @draft ICU 2.6 */
UBLOCK_MISCELLANEOUS_SYMBOLS_AND_ARROWS = 115 , /*[2B00]*/
/** @draft ICU 2.6 */
UBLOCK_YIJING_HEXAGRAM_SYMBOLS = 116 , /*[4DC0]*/
/** @draft ICU 2.6 */
UBLOCK_LINEAR_B_SYLLABARY = 117 , /*[10000]*/
/** @draft ICU 2.6 */
UBLOCK_LINEAR_B_IDEOGRAMS = 118 , /*[10080]*/
/** @draft ICU 2.6 */
UBLOCK_AEGEAN_NUMBERS = 119 , /*[10100]*/
/** @draft ICU 2.6 */
UBLOCK_UGARITIC = 120 , /*[10380]*/
/** @draft ICU 2.6 */
UBLOCK_SHAVIAN = 121 , /*[10450]*/
/** @draft ICU 2.6 */
UBLOCK_OSMANYA = 122 , /*[10480]*/
/** @draft ICU 2.6 */
UBLOCK_CYPRIOT_SYLLABARY = 123 , /*[10800]*/
/** @draft ICU 2.6 */
UBLOCK_TAI_XUAN_JING_SYMBOLS = 124 , /*[1D300]*/
/** @draft ICU 2.6 */
UBLOCK_VARIATION_SELECTORS_SUPPLEMENT = 125 , /*[E0100]*/
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-05-31 01:25:20 +00:00
UBLOCK_COUNT ,
2001-09-11 05:00:34 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2002-08-21 21:07:59 +00:00
UBLOCK_INVALID_CODE = - 1
1999-12-28 23:39:02 +00:00
} ;
2001-09-21 23:13:40 +00:00
2002-12-05 00:30:16 +00:00
/** @stable ICU 2.0 */
2001-09-11 05:00:34 +00:00
typedef enum UBlockCode UBlockCode ;
1999-12-28 23:39:02 +00:00
/**
* Values returned by the u_getCellWidth ( ) function .
2002-12-05 23:26:26 +00:00
* @ obsolete ICU 2.6 . Use UCHAR_EAST_ASIAN_WIDTH instead since this API will be removed in that release .
1999-12-28 23:39:02 +00:00
*/
2002-12-11 22:49:18 +00:00
enum UCellWidth
1999-12-28 23:39:02 +00:00
{
2002-12-05 23:26:26 +00:00
/** @obsolete ICU 2.6. Use UCHAR_EAST_ASIAN_WIDTH instead since this API will be removed in that release. */
1999-12-28 23:39:02 +00:00
U_ZERO_WIDTH = 0 ,
2002-12-05 23:26:26 +00:00
/** @obsolete ICU 2.6. Use UCHAR_EAST_ASIAN_WIDTH instead since this API will be removed in that release. */
2001-03-21 20:44:20 +00:00
U_HALF_WIDTH = 1 ,
2002-12-05 23:26:26 +00:00
/** @obsolete ICU 2.6. Use UCHAR_EAST_ASIAN_WIDTH instead since this API will be removed in that release. */
2001-03-21 20:44:20 +00:00
U_FULL_WIDTH = 2 ,
2002-12-05 23:26:26 +00:00
/** @obsolete ICU 2.6. Use UCHAR_EAST_ASIAN_WIDTH instead since this API will be removed in that release. */
2001-03-21 20:44:20 +00:00
U_NEUTRAL_WIDTH = 3 ,
2002-12-05 23:26:26 +00:00
/** @obsolete ICU 2.6. Use UCHAR_EAST_ASIAN_WIDTH instead since this API will be removed in that release. */
2001-03-21 20:44:20 +00:00
U_CELL_WIDTH_COUNT
2002-12-11 22:49:18 +00:00
} ;
typedef enum UCellWidth UCellWidth ; /**< C typedef for struct UCellWidth. @obsolete ICU 2.6. Use UCHAR_EAST_ASIAN_WIDTH instead since this API will be removed in that release. */
1999-12-28 23:39:02 +00:00
2002-07-04 00:38:51 +00:00
/**
* East Asian Width constants .
*
* @ see UCHAR_EAST_ASIAN_WIDTH
2002-08-21 19:12:24 +00:00
* @ see u_getIntPropertyValue
2002-07-04 00:38:51 +00:00
* @ draft ICU 2.2
*/
2002-08-21 19:12:24 +00:00
typedef enum UEastAsianWidth {
2002-10-30 18:24:53 +00:00
U_EA_NEUTRAL , /*[N]*/ /*See note !!*/
U_EA_AMBIGUOUS , /*[A]*/
U_EA_HALFWIDTH , /*[H]*/
U_EA_FULLWIDTH , /*[F]*/
U_EA_NARROW , /*[Na]*/
U_EA_WIDE , /*[W]*/
2002-07-04 00:38:51 +00:00
U_EA_COUNT
2002-08-21 19:12:24 +00:00
} UEastAsianWidth ;
2002-07-04 00:38:51 +00:00
/*
* Implementation note :
* Keep UEastAsianWidth constant values in sync with names list in genprops / props2 . c .
*/
1999-12-28 23:39:02 +00:00
/**
* Selector constants for u_charName ( ) .
2003-02-03 22:34:59 +00:00
* u_charName ( ) returns the " modern " name of a
2002-02-14 05:36:48 +00:00
* Unicode character ; or the name that was defined in
1999-12-28 23:39:02 +00:00
* Unicode version 1.0 , before the Unicode standard merged
2002-02-14 05:36:48 +00:00
* with ISO - 10646 ; or an " extended " name that gives each
* Unicode code point a unique name .
1999-12-28 23:39:02 +00:00
*
2001-11-09 18:17:40 +00:00
* @ see u_charName
2002-12-04 23:39:56 +00:00
* @ stable ICU 2.0
1999-12-28 23:39:02 +00:00
*/
2002-08-21 19:12:24 +00:00
typedef enum UCharNameChoice {
1999-12-28 23:39:02 +00:00
U_UNICODE_CHAR_NAME ,
U_UNICODE_10_CHAR_NAME ,
2002-02-14 05:36:48 +00:00
U_EXTENDED_CHAR_NAME ,
1999-12-28 23:39:02 +00:00
U_CHAR_NAME_CHOICE_COUNT
2002-08-21 19:12:24 +00:00
} UCharNameChoice ;
1999-12-28 23:39:02 +00:00
2002-10-30 18:24:53 +00:00
/**
* Selector constants for u_getPropertyName ( ) and
* u_getPropertyValueName ( ) . These selectors are used to choose which
* name is returned for a given property or value . All properties and
* values have a long name . Most have a short name , but some do not .
* Unicode allows for additional names , beyond the long and short
* name , which would be indicated by U_LONG_PROPERTY_NAME + i , where
* i = 1 , 2 , . . .
*
* @ see u_getPropertyName ( )
* @ see u_getPropertyValueName ( )
* @ draft ICU 2.4
*/
typedef enum UPropertyNameChoice {
U_SHORT_PROPERTY_NAME ,
U_LONG_PROPERTY_NAME ,
U_PROPERTY_NAME_CHOICE_COUNT
} UPropertyNameChoice ;
2002-07-04 16:46:36 +00:00
/**
* Decomposition Type constants .
*
* @ see UCHAR_DECOMPOSITION_TYPE
* @ draft ICU 2.2
*/
2002-08-21 19:12:24 +00:00
typedef enum UDecompositionType {
2002-10-30 18:24:53 +00:00
U_DT_NONE , /*[none]*/ /*See note !!*/
U_DT_CANONICAL , /*[can]*/
U_DT_COMPAT , /*[com]*/
U_DT_CIRCLE , /*[enc]*/
U_DT_FINAL , /*[fin]*/
U_DT_FONT , /*[font]*/
U_DT_FRACTION , /*[fra]*/
U_DT_INITIAL , /*[init]*/
U_DT_ISOLATED , /*[iso]*/
U_DT_MEDIAL , /*[med]*/
U_DT_NARROW , /*[nar]*/
U_DT_NOBREAK , /*[nb]*/
U_DT_SMALL , /*[sml]*/
U_DT_SQUARE , /*[sqr]*/
U_DT_SUB , /*[sub]*/
U_DT_SUPER , /*[sup]*/
U_DT_VERTICAL , /*[vert]*/
U_DT_WIDE , /*[wide]*/
2002-07-04 16:46:36 +00:00
U_DT_COUNT /* 18 */
2002-08-21 19:12:24 +00:00
} UDecompositionType ;
2002-07-04 16:46:36 +00:00
/**
* Joining Type constants .
*
* @ see UCHAR_JOINING_TYPE
* @ draft ICU 2.2
*/
2002-08-21 19:12:24 +00:00
typedef enum UJoiningType {
2002-10-30 18:24:53 +00:00
U_JT_NON_JOINING , /*[U]*/ /*See note !!*/
U_JT_JOIN_CAUSING , /*[C]*/
U_JT_DUAL_JOINING , /*[D]*/
U_JT_LEFT_JOINING , /*[L]*/
U_JT_RIGHT_JOINING , /*[R]*/
U_JT_TRANSPARENT , /*[T]*/
2002-07-04 16:46:36 +00:00
U_JT_COUNT /* 6 */
2002-08-21 19:12:24 +00:00
} UJoiningType ;
2002-07-04 16:46:36 +00:00
/**
* Joining Group constants .
*
* @ see UCHAR_JOINING_GROUP
* @ draft ICU 2.2
*/
2002-08-21 19:12:24 +00:00
typedef enum UJoiningGroup {
2002-07-04 16:46:36 +00:00
U_JG_NO_JOINING_GROUP ,
U_JG_AIN ,
U_JG_ALAPH ,
U_JG_ALEF ,
U_JG_BEH ,
U_JG_BETH ,
U_JG_DAL ,
U_JG_DALATH_RISH ,
U_JG_E ,
U_JG_FEH ,
U_JG_FINAL_SEMKATH ,
U_JG_GAF ,
U_JG_GAMAL ,
U_JG_HAH ,
U_JG_HAMZA_ON_HEH_GOAL ,
U_JG_HE ,
U_JG_HEH ,
U_JG_HEH_GOAL ,
U_JG_HETH ,
U_JG_KAF ,
U_JG_KAPH ,
U_JG_KNOTTED_HEH ,
U_JG_LAM ,
U_JG_LAMADH ,
U_JG_MEEM ,
U_JG_MIM ,
U_JG_NOON ,
U_JG_NUN ,
U_JG_PE ,
U_JG_QAF ,
U_JG_QAPH ,
U_JG_REH ,
U_JG_REVERSED_PE ,
U_JG_SAD ,
U_JG_SADHE ,
U_JG_SEEN ,
U_JG_SEMKATH ,
U_JG_SHIN ,
U_JG_SWASH_KAF ,
U_JG_SYRIAC_WAW ,
U_JG_TAH ,
U_JG_TAW ,
U_JG_TEH_MARBUTA ,
U_JG_TETH ,
U_JG_WAW ,
U_JG_YEH ,
U_JG_YEH_BARREE ,
U_JG_YEH_WITH_TAIL ,
U_JG_YUDH ,
U_JG_YUDH_HE ,
U_JG_ZAIN ,
2003-03-04 00:42:54 +00:00
U_JG_FE , /**< @draft ICU 2.6 */
U_JG_KHAPH , /**< @draft ICU 2.6 */
U_JG_ZHAIN , /**< @draft ICU 2.6 */
U_JG_COUNT
2002-08-21 19:12:24 +00:00
} UJoiningGroup ;
2002-07-04 16:46:36 +00:00
/**
* Line Break constants .
*
* @ see UCHAR_LINE_BREAK
* @ draft ICU 2.2
*/
2002-08-21 19:12:24 +00:00
typedef enum ULineBreak {
2002-10-30 18:24:53 +00:00
U_LB_UNKNOWN , /*[XX]*/ /*See note !!*/
U_LB_AMBIGUOUS , /*[AI]*/
U_LB_ALPHABETIC , /*[AL]*/
U_LB_BREAK_BOTH , /*[B2]*/
U_LB_BREAK_AFTER , /*[BA]*/
U_LB_BREAK_BEFORE , /*[BB]*/
U_LB_MANDATORY_BREAK , /*[BK]*/
U_LB_CONTINGENT_BREAK , /*[CB]*/
U_LB_CLOSE_PUNCTUATION , /*[CL]*/
U_LB_COMBINING_MARK , /*[CM]*/
U_LB_CARRIAGE_RETURN , /*[CR]*/
U_LB_EXCLAMATION , /*[EX]*/
U_LB_GLUE , /*[GL]*/
U_LB_HYPHEN , /*[HY]*/
U_LB_IDEOGRAPHIC , /*[ID]*/
U_LB_INSEPERABLE , /*[IN]*/
U_LB_INFIX_NUMERIC , /*[IS]*/
U_LB_LINE_FEED , /*[LF]*/
U_LB_NONSTARTER , /*[NS]*/
U_LB_NUMERIC , /*[NU]*/
U_LB_OPEN_PUNCTUATION , /*[OP]*/
U_LB_POSTFIX_NUMERIC , /*[PO]*/
U_LB_PREFIX_NUMERIC , /*[PR]*/
U_LB_QUOTATION , /*[QU]*/
U_LB_COMPLEX_CONTEXT , /*[SA]*/
U_LB_SURROGATE , /*[SG]*/
U_LB_SPACE , /*[SP]*/
U_LB_BREAK_SYMBOLS , /*[SY]*/
U_LB_ZWSPACE , /*[ZW]*/
2003-03-14 19:24:55 +00:00
U_LB_NEXT_LINE , /*[NL]*/ /* new in Unicode 4/ICU 2.6 */
U_LB_WORD_JOINER , /*[WJ]*/
U_LB_COUNT
2002-08-21 19:12:24 +00:00
} ULineBreak ;
2002-07-04 16:46:36 +00:00
2002-07-04 00:38:51 +00:00
/**
* Numeric Type constants .
*
* @ see UCHAR_NUMERIC_TYPE
* @ draft ICU 2.2
*/
2002-08-21 19:12:24 +00:00
typedef enum UNumericType {
2002-10-30 18:24:53 +00:00
U_NT_NONE , /*[None]*/ /*See note !!*/
U_NT_DECIMAL , /*[de]*/
U_NT_DIGIT , /*[di]*/
U_NT_NUMERIC , /*[nu]*/
2002-07-04 00:38:51 +00:00
U_NT_COUNT
2002-08-21 19:12:24 +00:00
} UNumericType ;
2002-07-04 00:38:51 +00:00
2003-03-08 02:00:06 +00:00
/**
* Hangul Syllable Type constants .
*
* @ see UCHAR_HANGUL_SYLLABLE_TYPE
* @ draft ICU 2.6
*/
typedef enum UHangulSyllableType {
U_HST_NOT_APPLICABLE , /*[NA]*/ /*See note !!*/
U_HST_LEADING_JAMO , /*[L]*/
U_HST_VOWEL_JAMO , /*[V]*/
U_HST_TRAILING_JAMO , /*[T]*/
U_HST_LV_SYLLABLE , /*[LV]*/
U_HST_LVT_SYLLABLE , /*[LVT]*/
U_HST_COUNT
} UHangulSyllableType ;
2002-03-06 23:31:11 +00:00
/**
* Check a binary Unicode property for a code point .
*
* Unicode , especially in version 3.2 , defines many more properties than the
* original set in UnicodeData . txt .
*
* The properties APIs are intended to reflect Unicode properties as defined
* in the Unicode Character Database ( UCD ) and Unicode Technical Reports ( UTR ) .
2003-02-11 01:59:32 +00:00
* For details about the properties see http : //www.unicode.org/ucd/ .
2002-03-06 23:31:11 +00:00
* For names of Unicode properties see the UCD file PropertyAliases . txt .
*
* Important : If ICU is built with UCD files from Unicode versions below 3.2 ,
* then properties marked with " new in Unicode 3.2 " are not or not fully available .
*
* @ param c Code point to test .
* @ param which UProperty selector constant , identifies which binary property to check .
* Must be UCHAR_BINARY_START < = which < UCHAR_BINARY_LIMIT .
* @ return TRUE or FALSE according to the binary Unicode property value for c .
2002-12-11 23:28:44 +00:00
* Also FALSE if ' which ' is out of bounds or if the Unicode version
2002-03-06 23:31:11 +00:00
* does not have data for the property at all , or not for this code point .
*
* @ see UProperty
2002-07-04 00:38:51 +00:00
* @ see u_getIntPropertyValue
2002-03-06 23:31:11 +00:00
* @ see u_getUnicodeVersion
2003-02-11 01:59:32 +00:00
* @ stable ICU 2.1
2002-03-06 23:31:11 +00:00
*/
2002-03-12 19:09:08 +00:00
U_CAPI UBool U_EXPORT2
u_hasBinaryProperty ( UChar32 c , UProperty which ) ;
2002-03-06 23:31:11 +00:00
/**
* Check if a code point has the Alphabetic Unicode property .
* Same as u_hasBinaryProperty ( c , UCHAR_ALPHABETIC ) .
* This is different from u_isalpha !
2002-07-03 12:05:56 +00:00
* @ param c Code point to test
* @ return true if the code point has the Alphabetic Unicode property , false otherwise
2002-03-06 23:31:11 +00:00
*
* @ see UCHAR_ALPHABETIC
* @ see u_isalpha
* @ see u_hasBinaryProperty
2003-02-11 01:59:32 +00:00
* @ stable ICU 2.1
2002-03-06 23:31:11 +00:00
*/
2002-03-12 19:09:08 +00:00
U_CAPI UBool U_EXPORT2
u_isUAlphabetic ( UChar32 c ) ;
2002-03-06 23:31:11 +00:00
/**
* Check if a code point has the Lowercase Unicode property .
* Same as u_hasBinaryProperty ( c , UCHAR_LOWERCASE ) .
* This is different from u_islower !
2002-07-03 12:05:56 +00:00
* @ param c Code point to test
* @ return true if the code point has the Lowercase Unicode property , false otherwise
2002-03-06 23:31:11 +00:00
*
* @ see UCHAR_LOWERCASE
* @ see u_islower
* @ see u_hasBinaryProperty
2003-02-11 01:59:32 +00:00
* @ stable ICU 2.1
2002-03-06 23:31:11 +00:00
*/
2002-03-12 19:09:08 +00:00
U_CAPI UBool U_EXPORT2
u_isULowercase ( UChar32 c ) ;
2002-03-06 23:31:11 +00:00
/**
* Check if a code point has the Uppercase Unicode property .
* Same as u_hasBinaryProperty ( c , UCHAR_UPPERCASE ) .
* This is different from u_isupper !
2002-07-03 12:05:56 +00:00
* @ param c Code point to test
* @ return true if the code point has the Uppercase Unicode property , false otherwise
2002-03-06 23:31:11 +00:00
*
* @ see UCHAR_UPPERCASE
* @ see u_isupper
* @ see u_hasBinaryProperty
2003-02-11 01:59:32 +00:00
* @ stable ICU 2.1
2002-03-06 23:31:11 +00:00
*/
2002-03-12 19:09:08 +00:00
U_CAPI UBool U_EXPORT2
u_isUUppercase ( UChar32 c ) ;
2002-03-06 23:31:11 +00:00
/**
* Check if a code point has the White_Space Unicode property .
* Same as u_hasBinaryProperty ( c , UCHAR_WHITE_SPACE ) .
* This is different from both u_isspace and u_isWhitespace !
2003-02-11 01:59:32 +00:00
*
* Comparison :
* - u_isUWhiteSpace = UCHAR_WHITE_SPACE : Unicode White_Space property ;
2003-04-24 23:09:26 +00:00
* most of general categories " Z " ( separators ) + most whitespace ISO controls
* ( including no - break spaces , but excluding IS1 . . IS4 and ZWSP )
2003-02-11 01:59:32 +00:00
* - u_isWhitespace : Java isWhitespace ; Z + whitespace ISO controls but excluding no - break spaces
2003-02-13 01:46:14 +00:00
* - u_isJavaSpaceChar : Java isSpaceChar ; just Z ( including no - break spaces )
2003-02-11 01:59:32 +00:00
* - u_isspace : Z + whitespace ISO controls ( including no - break spaces )
2003-04-24 23:09:26 +00:00
* - u_isblank : " horizontal spaces " = TAB + Zs - ZWSP
2003-02-11 01:59:32 +00:00
*
2002-07-03 12:05:56 +00:00
* @ param c Code point to test
* @ return true if the code point has the White_Space Unicode property , false otherwise .
2002-03-06 23:31:11 +00:00
*
* @ see UCHAR_WHITE_SPACE
* @ see u_isWhitespace
* @ see u_isspace
2003-02-09 21:02:26 +00:00
* @ see u_isJavaSpaceChar
2002-03-06 23:31:11 +00:00
* @ see u_hasBinaryProperty
2003-02-11 01:59:32 +00:00
* @ stable ICU 2.1
2002-03-06 23:31:11 +00:00
*/
2002-03-12 19:09:08 +00:00
U_CAPI UBool U_EXPORT2
u_isUWhiteSpace ( UChar32 c ) ;
2002-03-06 23:31:11 +00:00
2002-07-04 00:38:51 +00:00
/**
* Get the property value for an enumerated or integer Unicode property for a code point .
2002-12-11 23:28:44 +00:00
* Also returns binary and mask property values .
2002-07-04 00:38:51 +00:00
*
* Unicode , especially in version 3.2 , defines many more properties than the
* original set in UnicodeData . txt .
*
* The properties APIs are intended to reflect Unicode properties as defined
* in the Unicode Character Database ( UCD ) and Unicode Technical Reports ( UTR ) .
* For details about the properties see http : //www.unicode.org/ .
* For names of Unicode properties see the UCD file PropertyAliases . txt .
*
* Sample usage :
* UEastAsianWidth ea = ( UEastAsianWidth ) u_getIntPropertyValue ( c , UCHAR_EAST_ASIAN_WIDTH ) ;
* UBool b = ( UBool ) u_getIntPropertyValue ( c , UCHAR_IDEOGRAPHIC ) ;
*
* @ param c Code point to test .
2002-10-30 18:24:53 +00:00
* @ param which UProperty selector constant , identifies which property to check .
2002-07-04 00:38:51 +00:00
* Must be UCHAR_BINARY_START < = which < UCHAR_BINARY_LIMIT
2002-12-10 00:33:45 +00:00
* or UCHAR_INT_START < = which < UCHAR_INT_LIMIT
* or UCHAR_MASK_START < = which < UCHAR_MASK_LIMIT .
2002-07-04 00:38:51 +00:00
* @ return Numeric value that is directly the property value or ,
* for enumerated properties , corresponds to the numeric value of the enumerated
* constant of the respective property value enumeration type
* ( cast to enum type if necessary ) .
* Returns 0 or 1 ( for FALSE / TRUE ) for binary Unicode properties .
2002-12-11 23:28:44 +00:00
* Returns a bit - mask for mask properties .
* Returns 0 if ' which ' is out of bounds or if the Unicode version
2002-07-04 00:38:51 +00:00
* does not have data for the property at all , or not for this code point .
*
* @ see UProperty
* @ see u_hasBinaryProperty
* @ see u_getIntPropertyMinValue
* @ see u_getIntPropertyMaxValue
* @ see u_getUnicodeVersion
* @ draft ICU 2.2
*/
U_CAPI int32_t U_EXPORT2
u_getIntPropertyValue ( UChar32 c , UProperty which ) ;
/**
* Get the minimum value for an enumerated / integer / binary Unicode property .
* Can be used together with u_getIntPropertyMaxValue
* to allocate arrays of UnicodeSet or similar .
*
* @ param which UProperty selector constant , identifies which binary property to check .
* Must be UCHAR_BINARY_START < = which < UCHAR_BINARY_LIMIT
2002-12-11 01:09:02 +00:00
* or UCHAR_INT_START < = which < UCHAR_INT_LIMIT .
2002-07-04 00:38:51 +00:00
* @ return Minimum value returned by u_getIntPropertyValue for a Unicode property .
* 0 if the property selector is out of range .
*
* @ see UProperty
* @ see u_hasBinaryProperty
* @ see u_getUnicodeVersion
* @ see u_getIntPropertyMaxValue
* @ see u_getIntPropertyValue
* @ draft ICU 2.2
*/
U_CAPI int32_t U_EXPORT2
u_getIntPropertyMinValue ( UProperty which ) ;
/**
* Get the maximum value for an enumerated / integer / binary Unicode property .
* Can be used together with u_getIntPropertyMinValue
* to allocate arrays of UnicodeSet or similar .
*
* Examples for min / max values ( for Unicode 3.2 ) :
*
* - UCHAR_BIDI_CLASS : 0 / 18 ( U_LEFT_TO_RIGHT / U_BOUNDARY_NEUTRAL )
2002-11-14 02:16:22 +00:00
* - UCHAR_SCRIPT : 0 / 45 ( USCRIPT_COMMON / USCRIPT_TAGBANWA )
2002-07-04 00:38:51 +00:00
* - UCHAR_IDEOGRAPHIC : 0 / 1 ( FALSE / TRUE )
*
2002-11-13 20:03:11 +00:00
* For undefined UProperty constant values , min / max values will be 0 / - 1.
2002-11-14 02:16:22 +00:00
*
2002-07-04 00:38:51 +00:00
* @ param which UProperty selector constant , identifies which binary property to check .
* Must be UCHAR_BINARY_START < = which < UCHAR_BINARY_LIMIT
2002-12-11 01:09:02 +00:00
* or UCHAR_INT_START < = which < UCHAR_INT_LIMIT .
2002-07-04 00:38:51 +00:00
* @ return Maximum value returned by u_getIntPropertyValue for a Unicode property .
2002-11-13 20:03:11 +00:00
* < = 0 if the property selector is out of range .
2002-07-04 00:38:51 +00:00
*
* @ see UProperty
* @ see u_hasBinaryProperty
* @ see u_getUnicodeVersion
* @ see u_getIntPropertyMaxValue
* @ see u_getIntPropertyValue
* @ draft ICU 2.2
*/
U_CAPI int32_t U_EXPORT2
u_getIntPropertyMaxValue ( UProperty which ) ;
/**
* Get the numeric value for a Unicode code point as defined in the
* Unicode Character Database .
*
* A " double " return type is necessary because
* some numeric values are fractions , negative , or too large for int32_t .
*
* For characters without any numeric values in the Unicode Character Database ,
* this function will return U_NO_NUMERIC_VALUE .
*
2003-02-11 23:27:40 +00:00
* Similar to java . lang . Character . getNumericValue ( ) , but u_getNumericValue ( )
2003-02-13 01:46:14 +00:00
* also supports negative values , large values , and fractions ,
* while Java ' s getNumericValue ( ) returns values 10. .35 for ASCII letters .
2003-02-11 23:27:40 +00:00
*
2002-07-04 00:38:51 +00:00
* @ param c Code point to get the numeric value for .
* @ return Numeric value of c , or U_NO_NUMERIC_VALUE if none is defined .
*
* @ see U_NO_NUMERIC_VALUE
* @ draft ICU 2.2
*/
U_CAPI double U_EXPORT2
u_getNumericValue ( UChar32 c ) ;
/**
* Special value that is returned by u_getNumericValue when
* no numeric value is defined for a code point .
*
* @ see u_getNumericValue
* @ draft ICU 2.2
*/
# define U_NO_NUMERIC_VALUE ((double)-123456789.)
1999-12-28 23:39:02 +00:00
/**
2003-02-11 01:59:32 +00:00
* Determines whether the specified code point has the general category " Ll "
* ( lowercase letter ) .
1999-12-28 23:39:02 +00:00
*
2003-02-11 01:59:32 +00:00
* Same as java . lang . Character . isLowerCase ( ) .
*
* This misses some characters that are also lowercase but
* have a different general category value .
* In order to include those , use UCHAR_LOWERCASE .
*
2003-04-24 23:09:26 +00:00
* In addition to being equivalent to a Java function , this also serves
* as a C / POSIX migration function .
* See the comments about C / POSIX character classification functions in the
* documentation at the top of this header file .
*
2003-02-11 01:59:32 +00:00
* @ param c the code point to be tested
* @ return TRUE if the code point is an Ll lowercase letter
*
* @ see UCHAR_LOWERCASE
2001-11-09 18:17:40 +00:00
* @ see u_isupper
* @ see u_istitle
* @ see u_islower
2002-12-04 23:39:56 +00:00
* @ stable ICU 2.0
1999-12-28 23:39:02 +00:00
*/
2000-05-18 22:08:39 +00:00
U_CAPI UBool U_EXPORT2
2000-02-03 19:54:49 +00:00
u_islower ( UChar32 c ) ;
1999-12-28 23:39:02 +00:00
/**
2003-02-11 01:59:32 +00:00
* Determines whether the specified code point has the general category " Lu "
* ( uppercase letter ) .
1999-12-28 23:39:02 +00:00
*
2003-02-11 01:59:32 +00:00
* Same as java . lang . Character . isUpperCase ( ) .
*
* This misses some characters that are also uppercase but
* have a different general category value .
* In order to include those , use UCHAR_UPPERCASE .
*
2003-04-24 23:09:26 +00:00
* In addition to being equivalent to a Java function , this also serves
* as a C / POSIX migration function .
* See the comments about C / POSIX character classification functions in the
* documentation at the top of this header file .
*
2003-02-11 01:59:32 +00:00
* @ param c the code point to be tested
* @ return TRUE if the code point is an Lu uppercase letter
*
* @ see UCHAR_UPPERCASE
2001-11-09 18:17:40 +00:00
* @ see u_islower
1999-12-28 23:39:02 +00:00
* @ see u_istitle
2001-11-09 18:17:40 +00:00
* @ see u_tolower
2002-12-04 23:39:56 +00:00
* @ stable ICU 2.0
1999-12-28 23:39:02 +00:00
*/
2000-05-18 22:08:39 +00:00
U_CAPI UBool U_EXPORT2
2000-02-03 19:54:49 +00:00
u_isupper ( UChar32 c ) ;
1999-12-28 23:39:02 +00:00
/**
2003-02-11 01:59:32 +00:00
* Determines whether the specified code point is a titlecase letter .
* True for general category " Lt " ( titlecase letter ) .
*
* Same as java . lang . Character . isTitleCase ( ) .
*
* @ param c the code point to be tested
* @ return TRUE if the code point is an Lt titlecase letter
1999-12-28 23:39:02 +00:00
*
2001-11-09 18:17:40 +00:00
* @ see u_isupper
* @ see u_islower
* @ see u_totitle
2002-12-04 23:39:56 +00:00
* @ stable ICU 2.0
1999-12-28 23:39:02 +00:00
*/
2000-05-18 22:08:39 +00:00
U_CAPI UBool U_EXPORT2
2000-02-03 19:54:49 +00:00
u_istitle ( UChar32 c ) ;
1999-12-28 23:39:02 +00:00
/**
2003-02-09 21:02:26 +00:00
* Determines whether the specified code point is a digit character according to Java .
* True for characters with general category " Nd " ( decimal digit numbers ) .
2003-04-09 19:04:01 +00:00
* Beginning with Unicode 4 , this is the same as
* testing for the Numeric_Type of Decimal .
2003-02-09 21:02:26 +00:00
*
* Same as java . lang . Character . isDigit ( ) .
*
2003-04-24 23:09:26 +00:00
* In addition to being equivalent to a Java function , this also serves
* as a C / POSIX migration function .
* See the comments about C / POSIX character classification functions in the
* documentation at the top of this header file .
*
2003-02-09 21:02:26 +00:00
* @ param c the code point to be tested
* @ return TRUE if the code point is a digit character according to Character . isDigit ( )
2001-01-23 23:45:21 +00:00
*
2002-12-04 23:39:56 +00:00
* @ stable ICU 2.0
1999-12-28 23:39:02 +00:00
*/
2000-05-18 22:08:39 +00:00
U_CAPI UBool U_EXPORT2
2000-02-03 19:54:49 +00:00
u_isdigit ( UChar32 c ) ;
1999-12-28 23:39:02 +00:00
2003-02-11 01:59:32 +00:00
/**
* Determines whether the specified code point is a letter character .
* True for general categories " L " ( letters ) .
*
* Same as java . lang . Character . isLetter ( ) .
*
2003-04-24 23:09:26 +00:00
* In addition to being equivalent to a Java function , this also serves
* as a C / POSIX migration function .
* See the comments about C / POSIX character classification functions in the
* documentation at the top of this header file .
*
2003-02-11 01:59:32 +00:00
* @ param c the code point to be tested
* @ return TRUE if the code point is a letter character
*
* @ see u_isdigit
* @ see u_isalnum
* @ stable ICU 2.0
*/
U_CAPI UBool U_EXPORT2
u_isalpha ( UChar32 c ) ;
1999-12-28 23:39:02 +00:00
/**
2003-02-09 21:02:26 +00:00
* Determines whether the specified code point is an alphanumeric character
* ( letter or digit ) according to Java .
* True for characters with general categories
* " L " ( letters ) and " Nd " ( decimal digit numbers ) .
*
* Same as java . lang . Character . isLetterOrDigit ( ) .
*
2003-04-24 23:09:26 +00:00
* In addition to being equivalent to a Java function , this also serves
* as a C / POSIX migration function .
* See the comments about C / POSIX character classification functions in the
* documentation at the top of this header file .
*
2003-02-09 21:02:26 +00:00
* @ param c the code point to be tested
* @ return TRUE if the code point is an alphanumeric character according to Character . isLetterOrDigit ( )
1999-12-28 23:39:02 +00:00
*
2002-12-04 23:39:56 +00:00
* @ stable ICU 2.0
1999-12-28 23:39:02 +00:00
*/
2000-05-18 22:08:39 +00:00
U_CAPI UBool U_EXPORT2
2000-02-03 19:54:49 +00:00
u_isalnum ( UChar32 c ) ;
1999-12-28 23:39:02 +00:00
2003-04-24 23:09:26 +00:00
/**
* Determines whether the specified code point is a hexadecimal digit .
* This is equivalent to u_digit ( c , 16 ) > = 0.
* True for characters with general category " Nd " ( decimal digit numbers )
* as well as Latin letters a - f and A - F in both ASCII and Fullwidth ASCII .
* ( That is , for letters with code points
* 0041. .0046 , 0061. .0066 , FF21 . . FF26 , FF41 . . FF46 . )
*
* In order to narrow the definition of hexadecimal digits to only ASCII
* characters , use ( c < = 0x7f & & u_isxdigit ( c ) ) .
*
* This is a C / POSIX migration function .
* See the comments about C / POSIX character classification functions in the
* documentation at the top of this header file .
*
* @ param c the code point to be tested
* @ return TRUE if the code point is a hexadecimal digit
*
* @ draft ICU 2.6
*/
U_CAPI UBool U_EXPORT2
u_isxdigit ( UChar32 c ) ;
/**
* Determines whether the specified code point is a punctuation character .
* True for characters with general categories " P " ( punctuation ) .
*
* This is a C / POSIX migration function .
* See the comments about C / POSIX character classification functions in the
* documentation at the top of this header file .
*
* @ param c the code point to be tested
* @ return TRUE if the code point is a punctuation character
*
* @ draft ICU 2.6
*/
U_CAPI UBool U_EXPORT2
u_ispunct ( UChar32 c ) ;
/**
* Determines whether the specified code point is a " graphic " character
* ( printable , excluding spaces ) .
* TRUE for all characters except those with general categories
* " Cc " ( control codes ) , " Cf " ( format controls ) , " Cs " ( surrogates ) ,
* " Cn " ( unassigned ) , and " Z " ( separators ) .
*
* This is a C / POSIX migration function .
* See the comments about C / POSIX character classification functions in the
* documentation at the top of this header file .
*
* @ param c the code point to be tested
* @ return TRUE if the code point is a " graphic " character
*
* @ draft ICU 2.6
*/
U_CAPI UBool U_EXPORT2
u_isgraph ( UChar32 c ) ;
/**
* Determines whether the specified code point is a " blank " or " horizontal space " ,
* a character that visibly separates words on a line .
* The following are equivalent definitions :
*
* TRUE for Unicode White_Space characters except for " vertical space controls "
* where " vertical space controls " contains
* U + 000 A ( LF ) U + 000 B ( VT ) U + 000 C ( FF ) U + 000 D ( CR ) U + 00 85 ( NEL ) U + 2028 ( LS ) U + 2029 ( PS )
*
* same as
*
* TRUE for U + 000 9 ( TAB ) and characters with general category " Zs " ( space separators )
* except Zero Width Space ( ZWSP , U + 200 B ) .
*
* Comparison :
* - u_isUWhiteSpace = UCHAR_WHITE_SPACE : Unicode White_Space property ;
* most of general categories " Z " ( separators ) + most whitespace ISO controls
* ( including no - break spaces , but excluding IS1 . . IS4 and ZWSP )
* - u_isWhitespace : Java isWhitespace ; Z + whitespace ISO controls but excluding no - break spaces
* - u_isJavaSpaceChar : Java isSpaceChar ; just Z ( including no - break spaces )
* - u_isspace : Z + whitespace ISO controls ( including no - break spaces )
* - u_isblank : " horizontal spaces " = TAB + Zs - ZWSP
*
* This is a C / POSIX migration function .
* See the comments about C / POSIX character classification functions in the
* documentation at the top of this header file .
*
* @ param c the code point to be tested
* @ return TRUE if the code point is a " blank "
*
* @ draft ICU 2.6
*/
U_CAPI UBool U_EXPORT2
u_isblank ( UChar32 c ) ;
1999-12-28 23:39:02 +00:00
/**
2003-02-11 01:59:32 +00:00
* Determines whether the specified code point is " defined " ,
* which usually means that it is assigned a character .
* True for general categories other than " Cn " ( other , not assigned ) ,
* i . e . , true for all code points mentioned in UnicodeData . txt .
1999-12-28 23:39:02 +00:00
*
2003-04-24 23:09:26 +00:00
* Note that non - character code points ( e . g . , U + FDD0 ) are not " defined "
* ( they are Cn ) , but surrogate code points are " defined " ( Cs ) .
2003-02-11 01:59:32 +00:00
*
* Same as java . lang . Character . isDefined ( ) .
*
* @ param c the code point to be tested
* @ return TRUE if the code point is assigned a character
1999-12-28 23:39:02 +00:00
*
2001-11-09 18:17:40 +00:00
* @ see u_isdigit
* @ see u_isalpha
* @ see u_isalnum
* @ see u_isupper
* @ see u_islower
* @ see u_istitle
2002-12-04 23:39:56 +00:00
* @ stable ICU 2.0
1999-12-28 23:39:02 +00:00
*/
2000-05-18 22:08:39 +00:00
U_CAPI UBool U_EXPORT2
2000-02-03 19:54:49 +00:00
u_isdefined ( UChar32 c ) ;
1999-12-28 23:39:02 +00:00
/**
* Determines if the specified character is a space character or not .
2003-02-11 01:59:32 +00:00
*
* Comparison :
* - u_isUWhiteSpace = UCHAR_WHITE_SPACE : Unicode White_Space property ;
2003-04-24 23:09:26 +00:00
* most of general categories " Z " ( separators ) + most whitespace ISO controls
* ( including no - break spaces , but excluding IS1 . . IS4 and ZWSP )
2003-02-11 01:59:32 +00:00
* - u_isWhitespace : Java isWhitespace ; Z + whitespace ISO controls but excluding no - break spaces
2003-04-16 00:20:23 +00:00
* - u_isJavaSpaceChar : Java isSpaceChar ; just Z ( including no - break spaces )
2003-02-11 01:59:32 +00:00
* - u_isspace : Z + whitespace ISO controls ( including no - break spaces )
2003-04-24 23:09:26 +00:00
* - u_isblank : " horizontal spaces " = TAB + Zs - ZWSP
*
* This is a C / POSIX migration function .
* See the comments about C / POSIX character classification functions in the
* documentation at the top of this header file .
1999-12-28 23:39:02 +00:00
*
2002-07-03 12:05:56 +00:00
* @ param c the character to be tested
1999-12-28 23:39:02 +00:00
* @ return true if the character is a space character ; false otherwise .
2003-02-09 21:02:26 +00:00
*
* @ see u_isJavaSpaceChar
* @ see u_isWhitespace
* @ see u_isUWhiteSpace
2002-12-04 23:39:56 +00:00
* @ stable ICU 2.0
1999-12-28 23:39:02 +00:00
*/
2000-05-18 22:08:39 +00:00
U_CAPI UBool U_EXPORT2
2000-02-03 19:54:49 +00:00
u_isspace ( UChar32 c ) ;
1999-12-28 23:39:02 +00:00
2000-05-18 17:40:19 +00:00
/**
2003-02-09 21:02:26 +00:00
* Determine if the specified code point is a space character according to Java .
* True for characters with general categories " Z " ( separators ) ,
* which does not include control codes ( e . g . , TAB or Line Feed ) .
*
* Same as java . lang . Character . isSpaceChar ( ) .
*
2003-02-11 01:59:32 +00:00
* Comparison :
* - u_isUWhiteSpace = UCHAR_WHITE_SPACE : Unicode White_Space property ;
2003-04-24 23:09:26 +00:00
* most of general categories " Z " ( separators ) + most whitespace ISO controls
* ( including no - break spaces , but excluding IS1 . . IS4 and ZWSP )
2003-02-11 01:59:32 +00:00
* - u_isWhitespace : Java isWhitespace ; Z + whitespace ISO controls but excluding no - break spaces
2003-04-16 00:20:23 +00:00
* - u_isJavaSpaceChar : Java isSpaceChar ; just Z ( including no - break spaces )
2003-02-11 01:59:32 +00:00
* - u_isspace : Z + whitespace ISO controls ( including no - break spaces )
2003-04-24 23:09:26 +00:00
* - u_isblank : " horizontal spaces " = TAB + Zs - ZWSP
2003-02-11 01:59:32 +00:00
*
2003-02-09 21:02:26 +00:00
* @ param c the code point to be tested
* @ return TRUE if the code point is a space character according to Character . isSpaceChar ( )
*
* @ see u_isspace
* @ see u_isWhitespace
* @ see u_isUWhiteSpace
* @ draft ICU 2.6
*/
U_CAPI UBool U_EXPORT2
u_isJavaSpaceChar ( UChar32 c ) ;
/**
* Determines if the specified code point is a whitespace character according to Java / ICU .
* A character is considered to be a Java whitespace character if and only
2000-05-18 17:40:19 +00:00
* if it satisfies one of the following criteria :
*
2003-02-09 21:02:26 +00:00
* - It is a Unicode separator ( categories " Z " ) , but is not
* a no - break space ( U + 00 A0 NBSP or U + 2007 Figure Space or U + 202F Narrow NBSP ) .
* - It is U + 000 9 HORIZONTAL TABULATION .
* - It is U + 000 A LINE FEED .
* - It is U + 000 B VERTICAL TABULATION .
* - It is U + 000 C FORM FEED .
* - It is U + 000 D CARRIAGE RETURN .
* - It is U + 001 C FILE SEPARATOR .
* - It is U + 001 D GROUP SEPARATOR .
* - It is U + 001 E RECORD SEPARATOR .
* - It is U + 001F UNIT SEPARATOR .
* - It is U + 00 85 NEXT LINE .
*
* Same as java . lang . Character . isWhitespace ( ) except that Java omits U + 0085.
*
2003-02-11 01:59:32 +00:00
* Comparison :
* - u_isUWhiteSpace = UCHAR_WHITE_SPACE : Unicode White_Space property ;
2003-04-24 23:09:26 +00:00
* most of general categories " Z " ( separators ) + most whitespace ISO controls
* ( including no - break spaces , but excluding IS1 . . IS4 and ZWSP )
2003-02-11 01:59:32 +00:00
* - u_isWhitespace : Java isWhitespace ; Z + whitespace ISO controls but excluding no - break spaces
2003-04-16 00:20:23 +00:00
* - u_isJavaSpaceChar : Java isSpaceChar ; just Z ( including no - break spaces )
2003-02-11 01:59:32 +00:00
* - u_isspace : Z + whitespace ISO controls ( including no - break spaces )
2003-04-24 23:09:26 +00:00
* - u_isblank : " horizontal spaces " = TAB + Zs - ZWSP
2003-02-11 01:59:32 +00:00
*
2003-02-09 21:02:26 +00:00
* @ param c the code point to be tested
* @ return TRUE if the code point is a whitespace character according to Java / ICU
*
* @ see u_isspace
* @ see u_isJavaSpaceChar
* @ see u_isUWhiteSpace
2002-12-04 23:39:56 +00:00
* @ stable ICU 2.0
2000-05-18 17:40:19 +00:00
*/
2000-05-18 22:08:39 +00:00
U_CAPI UBool U_EXPORT2
2000-05-18 17:40:19 +00:00
u_isWhitespace ( UChar32 c ) ;
1999-12-28 23:39:02 +00:00
/**
2003-02-11 01:59:32 +00:00
* Determines whether the specified code point is a control character
* ( as defined by this function ) .
2001-03-17 01:56:34 +00:00
* A control character is one of the following :
* - ISO 8 - bit control character ( U + 0000. . U + 001f and U + 007f . . U + 009f )
* - U_CONTROL_CHAR ( Cc )
* - U_FORMAT_CHAR ( Cf )
* - U_LINE_SEPARATOR ( Zl )
* - U_PARAGRAPH_SEPARATOR ( Zp )
1999-12-28 23:39:02 +00:00
*
2003-04-24 23:09:26 +00:00
* This is a C / POSIX migration function .
* See the comments about C / POSIX character classification functions in the
* documentation at the top of this header file .
*
2003-02-11 01:59:32 +00:00
* @ param c the code point to be tested
* @ return TRUE if the code point is a control character
1999-12-28 23:39:02 +00:00
*
2003-02-11 01:59:32 +00:00
* @ see UCHAR_DEFAULT_IGNORABLE_CODE_POINT
2001-11-09 18:17:40 +00:00
* @ see u_isprint
2002-12-04 23:39:56 +00:00
* @ stable ICU 2.0
1999-12-28 23:39:02 +00:00
*/
2000-05-18 22:08:39 +00:00
U_CAPI UBool U_EXPORT2
2000-02-03 19:54:49 +00:00
u_iscntrl ( UChar32 c ) ;
1999-12-28 23:39:02 +00:00
2003-02-09 21:02:26 +00:00
/**
* Determines whether the specified code point is an ISO control code .
2003-02-11 01:59:32 +00:00
* True for U + 0000. . U + 001f and U + 007f . . U + 009f ( general category " Cc " ) .
2003-02-09 21:02:26 +00:00
*
* Same as java . lang . Character . isISOControl ( ) .
*
* @ param c the code point to be tested
* @ return TRUE if the code point is an ISO control code
*
* @ see u_iscntrl
* @ draft ICU 2.6
*/
U_CAPI UBool U_EXPORT2
u_isISOControl ( UChar32 c ) ;
1999-12-28 23:39:02 +00:00
/**
2003-02-11 01:59:32 +00:00
* Determines whether the specified code point is a printable character .
* True for general categories < em > other < / em > than " C " ( controls ) .
1999-12-28 23:39:02 +00:00
*
2003-04-24 23:09:26 +00:00
* This is a C / POSIX migration function .
* See the comments about C / POSIX character classification functions in the
* documentation at the top of this header file .
*
2003-02-11 01:59:32 +00:00
* @ param c the code point to be tested
* @ return TRUE if the code point is a printable character
1999-12-28 23:39:02 +00:00
*
2003-02-11 01:59:32 +00:00
* @ see UCHAR_DEFAULT_IGNORABLE_CODE_POINT
2001-11-09 18:17:40 +00:00
* @ see u_iscntrl
2002-12-04 23:39:56 +00:00
* @ stable ICU 2.0
1999-12-28 23:39:02 +00:00
*/
2000-05-18 22:08:39 +00:00
U_CAPI UBool U_EXPORT2
2000-02-03 19:54:49 +00:00
u_isprint ( UChar32 c ) ;
1999-12-28 23:39:02 +00:00
/**
2003-02-11 01:59:32 +00:00
* Determines whether the specified code point is a base character .
* True for general categories " L " ( letters ) , " N " ( numbers ) ,
* " Mc " ( spacing combining marks ) , and " Me " ( enclosing marks ) .
*
* Note that this is different from the Unicode definition in
* chapter 3.5 , conformance clause D13 ,
* which defines base characters to be all characters ( not Cn )
* that do not graphically combine with preceding characters ( M )
* and that are neither control ( Cc ) or format ( Cf ) characters .
1999-12-28 23:39:02 +00:00
*
2003-02-11 01:59:32 +00:00
* @ param c the code point to be tested
* @ return TRUE if the code point is a base character according to this function
1999-12-28 23:39:02 +00:00
*
2001-11-09 18:17:40 +00:00
* @ see u_isalpha
* @ see u_isdigit
2002-12-04 23:39:56 +00:00
* @ stable ICU 2.0
1999-12-28 23:39:02 +00:00
*/
2000-05-18 22:08:39 +00:00
U_CAPI UBool U_EXPORT2
2000-02-03 19:54:49 +00:00
u_isbase ( UChar32 c ) ;
2000-04-24 22:31:22 +00:00
1999-12-28 23:39:02 +00:00
/**
2003-02-11 01:59:32 +00:00
* Returns the bidirectional category value for the code point ,
* which is used in the Unicode bidirectional algorithm
* ( UAX # 9 http : //www.unicode.org/reports/tr9/).
* Note that some < em > unassigned < / em > code points have bidi values
* of R or AL because they are in blocks that are reserved
* for Right - To - Left scripts .
*
* Same as java . lang . Character . getDirectionality ( )
*
* @ param c the code point to be tested
* @ return the bidirectional category ( UCharDirection ) value
*
2001-11-09 18:17:40 +00:00
* @ see UCharDirection
2002-12-04 23:39:56 +00:00
* @ stable ICU 2.0
2001-11-09 18:17:40 +00:00
*/
1999-12-28 23:39:02 +00:00
U_CAPI UCharDirection U_EXPORT2
2000-02-03 19:54:49 +00:00
u_charDirection ( UChar32 c ) ;
1999-12-28 23:39:02 +00:00
2000-04-24 22:31:22 +00:00
/**
2003-02-11 01:59:32 +00:00
* Determines whether the code point has the Bidi_Mirrored property .
2000-04-24 22:31:22 +00:00
* This property is set for characters that are commonly used in
* Right - To - Left contexts and need to be displayed with a " mirrored "
* glyph .
*
2003-02-11 01:59:32 +00:00
* Same as java . lang . Character . isMirrored ( ) .
* Same as UCHAR_BIDI_MIRRORED
*
* @ param c the code point to be tested
* @ return TRUE if the character has the Bidi_Mirrored property
*
* @ see UCHAR_BIDI_MIRRORED
2002-12-04 23:39:56 +00:00
* @ stable ICU 2.0
2000-04-24 22:31:22 +00:00
*/
2000-05-18 22:08:39 +00:00
U_CAPI UBool U_EXPORT2
2000-04-24 22:31:22 +00:00
u_isMirrored ( UChar32 c ) ;
/**
* Maps the specified character to a " mirror-image " character .
2003-02-11 01:59:32 +00:00
* For characters with the Bidi_Mirrored property , implementations
2000-04-24 22:31:22 +00:00
* sometimes need a " poor man's " mapping to another Unicode
* character ( code point ) such that the default glyph may serve
* as the mirror - image of the default glyph of the specified
* character . This is useful for text conversion to and from
* codepages with visual order , and for displays without glyph
* selecetion capabilities .
*
2003-02-11 01:59:32 +00:00
* @ param c the code point to be mapped
2000-04-24 22:31:22 +00:00
* @ return another Unicode code point that may serve as a mirror - image
* substitute , or c itself if there is no such mapping or c
2003-02-11 01:59:32 +00:00
* does not have the Bidi_Mirrored property
*
* @ see UCHAR_BIDI_MIRRORED
* @ see u_isMirrored
2002-12-04 23:39:56 +00:00
* @ stable ICU 2.0
2000-04-24 22:31:22 +00:00
*/
U_CAPI UChar32 U_EXPORT2
u_charMirror ( UChar32 c ) ;
1999-12-28 23:39:02 +00:00
/**
2002-12-06 00:19:24 +00:00
* Obsolete because the " cell width " functions and implementation are
2002-07-04 00:38:51 +00:00
* out of date compared with Unicode Standard Annex # 11.
* Use u_getIntPropertyValue with UCHAR_EAST_ASIAN_WIDTH .
* See http : //www.unicode.org/reports/tr11/
*
1999-12-28 23:39:02 +00:00
* Returns a value indicating the display - cell width of the character
* when used in Asian text , according to the Unicode standard ( see p . 6 - 130
* of The Unicode Standard , Version 2.0 ) . The results for various characters
* are as follows :
* < P >
2002-03-22 17:35:20 +00:00
* U_ZERO_WIDTH : Characters which are considered to take up no display - cell space :
1999-12-28 23:39:02 +00:00
* control characters
* format characters
* line and paragraph separators
* non - spacing marks
* combining Hangul jungseong
* combining Hangul jongseong
* unassigned Unicode values
* < P >
2002-03-22 17:35:20 +00:00
* U_HALF_WIDTH : Characters which take up half a cell in standard Asian text :
1999-12-28 23:39:02 +00:00
* all characters in the General Scripts Area except combining Hangul choseong
* and the characters called out specifically above as ZERO_WIDTH
* alphabetic and Arabic presentation forms
* halfwidth CJK punctuation
* halfwidth Katakana
* halfwidth Hangul Jamo
* halfwidth forms , arrows , and shapes
* < P >
2002-03-22 17:35:20 +00:00
* U_FULL_WIDTH : Characters which take up a full cell in standard Asian text :
1999-12-28 23:39:02 +00:00
* combining Hangul choseong
* all characters in the CJK Phonetics and Symbols Area
* all characters in the CJK Ideographs Area
* all characters in the Hangul Syllables Area
* CJK compatibility ideographs
* CJK compatibility forms
* small form variants
* fullwidth ASCII
* fullwidth punctuation and currency signs
* < P >
2002-03-22 17:35:20 +00:00
* U_NEUTRAL_WIDTH : Characters whose cell width is context - dependent :
1999-12-28 23:39:02 +00:00
* all characters in the Symbols Area , except those specifically called out above
* all characters in the Surrogates Area
* all charcaters in the Private Use Area
* < P >
* For Korean text , this algorithm should work properly with properly normalized Korean
* text . Precomposed Hangul syllables and non - combining jamo are all considered full -
* width characters . For combining jamo , we treat we treat choseong ( initial consonants )
* as double - width characters and junseong ( vowels ) and jongseong ( final consonants )
* as non - spacing marks . This will work right in text that uses the precomposed
* choseong characters instead of teo choseong characters in a row , and which uses the
* choseong filler character at the beginning of syllables that don ' t have an initial
* consonant . The results may be slightly off with Korean text following different
* conventions .
2002-07-03 12:05:56 +00:00
* @ param c The character to be tested
* @ return a value indicating the display - cell width of the character when used in Asian text
2002-12-06 00:19:24 +00:00
* @ obsolete ICU 2.6 . Use UCHAR_EAST_ASIAN_WIDTH instead since this API will be removed in that release .
1999-12-28 23:39:02 +00:00
*/
U_CAPI uint16_t U_EXPORT2
2000-02-03 19:54:49 +00:00
u_charCellWidth ( UChar32 c ) ;
1999-12-28 23:39:02 +00:00
/**
2003-02-11 01:59:32 +00:00
* Returns the general category value for the code point .
*
* Same as java . lang . Character . getType ( ) .
*
* @ param c the code point to be tested
* @ return the general category ( UCharCategory ) value
2001-01-23 23:45:21 +00:00
*
1999-12-28 23:39:02 +00:00
* @ see UCharCategory
2002-12-04 23:39:56 +00:00
* @ stable ICU 2.0
1999-12-28 23:39:02 +00:00
*/
U_CAPI int8_t U_EXPORT2
2000-02-03 19:54:49 +00:00
u_charType ( UChar32 c ) ;
1999-12-28 23:39:02 +00:00
2002-03-13 23:31:12 +00:00
/**
* Get a single - bit bit set for the general category of a character .
* This bit set can be compared bitwise with U_GC_SM_MASK , U_GC_L_MASK , etc .
* Same as U_MASK ( u_charType ( c ) ) .
*
2003-02-11 01:59:32 +00:00
* @ param c the code point to be tested
* @ return a single - bit mask corresponding to the general category ( UCharCategory ) value
*
2002-03-13 23:31:12 +00:00
* @ see u_charType
* @ see UCharCategory
* @ see U_GC_CN_MASK
2003-02-11 01:59:32 +00:00
* @ stable ICU 2.1
2002-03-13 23:31:12 +00:00
*/
# define U_GET_GC_MASK(c) U_MASK(u_charType(c))
2002-01-12 00:11:09 +00:00
/**
* Callback from u_enumCharTypes ( ) , is called for each contiguous range
* of code points c ( where start < = c < limit )
* with the same Unicode general category ( " character type " ) .
*
* The callback function can stop the enumeration by returning FALSE .
*
* @ param context an opaque pointer , as passed into utrie_enum ( )
* @ param start the first code point in a contiguous range with value
* @ param limit one past the last code point in a contiguous range with value
* @ param type the general category for all code points in [ start . . limit [
* @ return FALSE to stop the enumeration
*
2003-02-11 01:59:32 +00:00
* @ stable ICU 2.1
2002-01-12 00:11:09 +00:00
* @ see UCharCategory
* @ see u_enumCharTypes
*/
typedef UBool U_CALLCONV
UCharEnumTypeRange ( const void * context , UChar32 start , UChar32 limit , UCharCategory type ) ;
/**
* Enumerate efficiently all code points with their Unicode general categories .
*
* This is useful for building data structures ( e . g . , UnicodeSet ' s ) ,
* for enumerating all assigned code points ( type ! = U_UNASSIGNED ) , etc .
*
* For each contiguous range of code points with a given general category ( " character type " ) ,
* the UCharEnumTypeRange function is called .
* Adjacent ranges have different types .
* The Unicode Standard guarantees that the numeric value of the type is 0. .31 .
*
* @ param enumRange a pointer to a function that is called for each contiguous range
* of code points with the same general category
* @ param context an opaque pointer that is passed on to the callback function
*
2003-02-11 01:59:32 +00:00
* @ stable ICU 2.1
2002-01-12 00:11:09 +00:00
* @ see UCharCategory
* @ see UCharEnumTypeRange
*/
U_CAPI void U_EXPORT2
u_enumCharTypes ( UCharEnumTypeRange * enumRange , const void * context ) ;
2001-01-23 23:45:21 +00:00
/**
* Returns the combining class of the code point as specified in UnicodeData . txt .
*
* @ param c the code point of the character
* @ return the combining class of the character
2002-12-04 23:39:56 +00:00
* @ stable ICU 2.0
2001-01-23 23:45:21 +00:00
*/
U_CAPI uint8_t U_EXPORT2
u_getCombiningClass ( UChar32 c ) ;
1999-12-28 23:39:02 +00:00
/**
2003-04-09 19:04:01 +00:00
* Returns the decimal digit value of a decimal digit character .
2003-02-11 23:27:40 +00:00
* Such characters have the general category " Nd " ( decimal digit numbers )
* and a Numeric_Type of Decimal .
2003-02-11 01:59:32 +00:00
*
2003-04-09 19:04:01 +00:00
* Unlike ICU releases before 2.6 , no digit values are returned for any
* Han characters because Han number characters are often used with a special
* Chinese - style number format ( with characters for powers of 10 in between )
* instead of in decimal - positional notation .
* Unicode 4 explicitly assigns Han number characters the Numeric_Type
* Numeric instead of Decimal .
* See Jitterbug 1483 for more details .
*
* Use u_getIntPropertyValue ( c , UCHAR_NUMERIC_TYPE ) and u_getNumericValue ( )
* for complete numeric Unicode properties .
*
* @ param c the code point for which to get the decimal digit value
* @ return the decimal digit value of c ,
2003-02-11 01:59:32 +00:00
* or - 1 if c is not a decimal digit character
1999-12-28 23:39:02 +00:00
*
2003-02-11 01:59:32 +00:00
* @ see u_getNumericValue
2002-12-04 23:39:56 +00:00
* @ stable ICU 2.0
1999-12-28 23:39:02 +00:00
*/
U_CAPI int32_t U_EXPORT2
2000-02-03 19:54:49 +00:00
u_charDigitValue ( UChar32 c ) ;
1999-12-28 23:39:02 +00:00
/**
2001-09-11 05:00:34 +00:00
* Returns the Unicode allocation block that contains the character .
1999-12-28 23:39:02 +00:00
*
2003-02-11 01:59:32 +00:00
* @ param c the code point to be tested
* @ return the block value ( UBlockCode ) for c
*
* @ see UBlockCode
2002-12-04 23:39:56 +00:00
* @ stable ICU 2.0
1999-12-28 23:39:02 +00:00
*/
2001-09-11 05:00:34 +00:00
U_CAPI UBlockCode U_EXPORT2
2003-02-11 01:59:32 +00:00
ublock_getCode ( UChar32 c ) ;
1999-12-28 23:39:02 +00:00
/**
* Retrieve the name of a Unicode character .
* Depending on < code > nameChoice < / code > , the character name written
* into the buffer is the " modern " name or the name that was defined
* in Unicode version 1.0 .
* The name contains only " invariant " characters
* like A - Z , 0 - 9 , space , and ' - ' .
2000-10-10 17:33:09 +00:00
* Unicode 1.0 names are only retrieved if they are different from the modern
* names and if the data file contains the data for them . gennames may or may
* not be called with a command line option to include 1.0 names in unames . dat .
1999-12-28 23:39:02 +00:00
*
* @ param code The character ( code point ) for which to get the name .
2002-07-08 16:40:57 +00:00
* It must be < code > 0 < = code < = 0x10ffff < / code > .
1999-12-28 23:39:02 +00:00
* @ param nameChoice Selector for which name to get .
* @ param buffer Destination address for copying the name .
2000-10-11 16:10:25 +00:00
* The name will always be zero - terminated .
* If there is no name , then the buffer will be set to the empty string .
1999-12-28 23:39:02 +00:00
* @ param bufferLength < code > = = sizeof ( buffer ) < / code >
* @ param pErrorCode Pointer to a UErrorCode variable ;
* check for < code > U_SUCCESS ( ) < / code > after < code > u_charName ( ) < / code >
* returns .
2000-10-11 16:10:25 +00:00
* @ return The length of the name , or 0 if there is no name for this character .
* If the bufferLength is less than or equal to the length , then the buffer
* contains the truncated name and the returned length indicates the full
* length of the name .
* The length does not include the zero - termination .
1999-12-28 23:39:02 +00:00
*
* @ see UCharNameChoice
2000-10-11 16:10:25 +00:00
* @ see u_charFromName
* @ see u_enumCharNames
2002-12-04 23:39:56 +00:00
* @ stable ICU 2.0
1999-12-28 23:39:02 +00:00
*/
2002-03-12 01:32:42 +00:00
U_CAPI int32_t U_EXPORT2
2000-10-10 17:33:09 +00:00
u_charName ( UChar32 code , UCharNameChoice nameChoice ,
2002-03-12 01:32:42 +00:00
char * buffer , int32_t bufferLength ,
1999-12-28 23:39:02 +00:00
UErrorCode * pErrorCode ) ;
2002-07-08 16:40:57 +00:00
/**
* Get the ISO 10646 comment for a character .
* The ISO 10646 comment is an informative field in the Unicode Character
* Database ( UnicodeData . txt field 11 ) and is from the ISO 10646 names list .
*
* @ param c The character ( code point ) for which to get the ISO comment .
* It must be < code > 0 < = c < = 0x10ffff < / code > .
* @ param dest Destination address for copying the comment .
* The comment will be zero - terminated if possible .
* If there is no comment , then the buffer will be set to the empty string .
* @ param destCapacity < code > = = sizeof ( dest ) < / code >
* @ param pErrorCode Pointer to a UErrorCode variable ;
* check for < code > U_SUCCESS ( ) < / code > after < code > u_getISOComment ( ) < / code >
* returns .
* @ return The length of the comment , or 0 if there is no comment for this character .
* If the destCapacity is less than or equal to the length , then the buffer
* contains the truncated name and the returned length indicates the full
* length of the name .
* The length does not include the zero - termination .
*
* @ draft ICU 2.2
*/
U_CAPI int32_t U_EXPORT2
u_getISOComment ( UChar32 c ,
char * dest , int32_t destCapacity ,
UErrorCode * pErrorCode ) ;
2000-06-29 18:27:07 +00:00
/**
* Find a Unicode character by its name and return its code point value .
2000-10-11 16:10:25 +00:00
* The name is matched exactly and completely .
2002-02-15 19:10:38 +00:00
* If the name does not correspond to a code point , < i > pErrorCode < / i >
* is set to < code > U_INVALID_CHAR_FOUND < / code > .
2000-10-11 16:10:25 +00:00
* A Unicode 1.0 name is matched only if it differs from the modern name .
2002-02-15 19:10:38 +00:00
* Unicode names are all uppercase . Extended names are lowercase followed
* by an uppercase hexadecimal number , and within angle brackets .
2000-10-11 16:10:25 +00:00
*
* @ param nameChoice Selector for which name to match .
* @ param name The name to match .
* @ param pErrorCode Pointer to a UErrorCode variable
2002-02-15 19:10:38 +00:00
* @ return The Unicode value of the code point with the given name ,
* or an undefined value if there is no such code point .
2000-10-11 16:10:25 +00:00
*
* @ see UCharNameChoice
* @ see u_charName
* @ see u_enumCharNames
2002-12-11 22:49:18 +00:00
* @ stable ICU 1.7
2000-06-29 18:27:07 +00:00
*/
U_CAPI UChar32 U_EXPORT2
u_charFromName ( UCharNameChoice nameChoice ,
const char * name ,
UErrorCode * pErrorCode ) ;
/**
* Type of a callback function for u_enumCharNames ( ) that gets called
* for each Unicode character with the code point value and
* the character name .
* If such a function returns FALSE , then the enumeration is stopped .
2000-10-11 16:10:25 +00:00
*
* @ param context The context pointer that was passed to u_enumCharNames ( ) .
* @ param code The Unicode code point for the character with this name .
* @ param nameChoice Selector for which kind of names is enumerated .
* @ param name The character ' s name , zero - terminated .
* @ param length The length of the name .
* @ return TRUE if the enumeration should continue , FALSE to stop it .
*
* @ see UCharNameChoice
* @ see u_enumCharNames
2002-12-11 22:49:18 +00:00
* @ stable ICU 1.7
2000-06-29 18:27:07 +00:00
*/
typedef UBool UEnumCharNamesFn ( void * context ,
UChar32 code ,
UCharNameChoice nameChoice ,
2000-10-10 17:33:09 +00:00
const char * name ,
2002-03-12 01:32:42 +00:00
int32_t length ) ;
2000-06-29 18:27:07 +00:00
/**
* Enumerate all assigned Unicode characters between the start and limit
* code points ( start inclusive , limit exclusive ) and call a function
* for each , passing the code point value and the character name .
2000-10-11 16:10:25 +00:00
* For Unicode 1.0 names , only those are enumerated that differ from the
* modern names .
*
* @ param start The first code point in the enumeration range .
* @ param limit One more than the last code point in the enumeration range
* ( the first one after the range ) .
* @ param fn The function that is to be called for each character name .
* @ param context An arbitrary pointer that is passed to the function .
* @ param nameChoice Selector for which kind of names to enumerate .
* @ param pErrorCode Pointer to a UErrorCode variable
*
* @ see UCharNameChoice
* @ see UEnumCharNamesFn
* @ see u_charName
* @ see u_charFromName
2002-12-11 22:49:18 +00:00
* @ stable ICU 1.7
2000-06-29 18:27:07 +00:00
*/
U_CAPI void U_EXPORT2
u_enumCharNames ( UChar32 start , UChar32 limit ,
UEnumCharNamesFn * fn ,
void * context ,
UCharNameChoice nameChoice ,
UErrorCode * pErrorCode ) ;
2002-10-30 18:24:53 +00:00
/**
* Return the Unicode name for a given property , as given in the
* Unicode database file PropertyAliases . txt .
*
2002-12-11 01:09:02 +00:00
* In addition , this function maps the property
* UCHAR_GENERAL_CATEGORY_MASK to the synthetic names " gcm " /
* " General_Category_Mask " . These names are not in
* PropertyAliases . txt .
*
2002-10-30 18:24:53 +00:00
* @ param property UProperty selector other than UCHAR_INVALID_CODE .
* If out of range , NULL is returned .
*
* @ param nameChoice selector for which name to get . If out of range ,
* NULL is returned . All properties have a long name . Most
* have a short name , but some do not . Unicode allows for
* additional names ; if present these will be returned by
* U_LONG_PROPERTY_NAME + i , where i = 1 , 2 , . . .
*
* @ return a pointer to the name , or NULL if either the
* property or the nameChoice is out of range . If a given
* nameChoice returns NULL , then all larger values of
* nameChoice will return NULL , with one exception : if NULL is
* returned for U_SHORT_PROPERTY_NAME , then
* U_LONG_PROPERTY_NAME ( and higher ) may still return a
* non - NULL value . The returned pointer is valid until
* u_cleanup ( ) is called .
*
* @ see UProperty
* @ see UPropertyNameChoice
* @ draft ICU 2.4
*/
U_CAPI const char * U_EXPORT2
u_getPropertyName ( UProperty property ,
UPropertyNameChoice nameChoice ) ;
/**
* Return the UProperty enum for a given property name , as specified
* in the Unicode database file PropertyAliases . txt . Short , long , and
* any other variants are recognized .
*
2002-12-11 01:09:02 +00:00
* In addition , this function maps the synthetic names " gcm " /
* " General_Category_Mask " to the property
* UCHAR_GENERAL_CATEGORY_MASK . These names are not in
* PropertyAliases . txt .
*
2002-10-30 18:24:53 +00:00
* @ param alias the property name to be matched . The name is compared
* using " loose matching " as described in PropertyAliases . txt .
*
* @ return a UProperty enum , or UCHAR_INVALID_CODE if the given name
* does not match any property .
*
* @ see UProperty
* @ draft ICU 2.4
*/
U_CAPI UProperty U_EXPORT2
u_getPropertyEnum ( const char * alias ) ;
/**
* Return the Unicode name for a given property value , as given in the
* Unicode database file PropertyValueAliases . txt .
*
2002-12-11 01:09:02 +00:00
* Note : Some of the names in PropertyValueAliases . txt can only be
* retrieved using UCHAR_GENERAL_CATEGORY_MASK , not
* UCHAR_GENERAL_CATEGORY . These include : " C " / " Other " , " L " /
* " Letter " , " LC " / " Cased_Letter " , " M " / " Mark " , " N " / " Number " , " P "
* / " Punctuation " , " S " / " Symbol " , and " Z " / " Separator " .
*
2002-12-10 00:33:45 +00:00
* @ param property UProperty selector constant .
* Must be UCHAR_BINARY_START < = which < UCHAR_BINARY_LIMIT
* or UCHAR_INT_START < = which < UCHAR_INT_LIMIT
* or UCHAR_MASK_START < = which < UCHAR_MASK_LIMIT .
* If out of range , NULL is returned .
2002-10-30 18:24:53 +00:00
*
* @ param value selector for a value for the given property . If out
* of range , NULL is returned . In general , valid values range
* from 0 up to some maximum . There are a few exceptions :
* ( 1. ) UCHAR_BLOCK values begin at the non - zero value
* UBLOCK_BASIC_LATIN . ( 2. ) UCHAR_CANONICAL_COMBINING_CLASS
* values are not contiguous and range from 0. .240 . ( 3. )
2002-12-11 01:09:02 +00:00
* UCHAR_GENERAL_CATEGORY_MASK values are not values of
2002-10-30 18:24:53 +00:00
* UCharCategory , but rather mask values produced by
* U_GET_GC_MASK ( ) . This allows grouped categories such as
* [ : L : ] to be represented . Mask values range
* non - contiguously from 1. . U_GC_P_MASK .
*
* @ param nameChoice selector for which name to get . If out of range ,
* NULL is returned . All values have a long name . Most have
* a short name , but some do not . Unicode allows for
* additional names ; if present these will be returned by
* U_LONG_PROPERTY_NAME + i , where i = 1 , 2 , . . .
* @ return a pointer to the name , or NULL if either the
* property or the nameChoice is out of range . If a given
* nameChoice returns NULL , then all larger values of
* nameChoice will return NULL , with one exception : if NULL is
* returned for U_SHORT_PROPERTY_NAME , then
* U_LONG_PROPERTY_NAME ( and higher ) may still return a
* non - NULL value . The returned pointer is valid until
* u_cleanup ( ) is called .
*
* @ see UProperty
* @ see UPropertyNameChoice
* @ draft ICU 2.4
*/
U_CAPI const char * U_EXPORT2
u_getPropertyValueName ( UProperty property ,
int32_t value ,
UPropertyNameChoice nameChoice ) ;
/**
* Return the property value integer for a given value name , as
* specified in the Unicode database file PropertyValueAliases . txt .
* Short , long , and any other variants are recognized .
*
2002-12-11 01:09:02 +00:00
* Note : Some of the names in PropertyValueAliases . txt will only be
* recognized with UCHAR_GENERAL_CATEGORY_MASK , not
* UCHAR_GENERAL_CATEGORY . These include : " C " / " Other " , " L " /
* " Letter " , " LC " / " Cased_Letter " , " M " / " Mark " , " N " / " Number " , " P "
* / " Punctuation " , " S " / " Symbol " , and " Z " / " Separator " .
*
2002-12-10 00:33:45 +00:00
* @ param property UProperty selector constant .
* Must be UCHAR_BINARY_START < = which < UCHAR_BINARY_LIMIT
* or UCHAR_INT_START < = which < UCHAR_INT_LIMIT
* or UCHAR_MASK_START < = which < UCHAR_MASK_LIMIT .
* If out of range , UCHAR_INVALID_CODE is returned .
2002-10-30 18:24:53 +00:00
*
* @ param alias the value name to be matched . The name is compared
* using " loose matching " as described in
* PropertyValueAliases . txt .
*
* @ return a value integer or UCHAR_INVALID_CODE if the given name
* does not match any value of the given property , or if the
* property is invalid . Note : U CHAR_GENERAL_CATEGORY values
* are not values of UCharCategory , but rather mask values
* produced by U_GET_GC_MASK ( ) . This allows grouped
* categories such as [ : L : ] to be represented .
*
* @ see UProperty
* @ draft ICU 2.4
*/
U_CAPI int32_t U_EXPORT2
u_getPropertyValueEnum ( UProperty property ,
const char * alias ) ;
1999-12-28 23:39:02 +00:00
/**
2003-02-09 21:02:26 +00:00
* Determines if the specified character is permissible as the
2003-02-11 01:59:32 +00:00
* first character in an identifier according to Unicode
* ( The Unicode Standard , Version 3.0 , chapter 5.16 Identifiers ) .
2003-02-09 21:02:26 +00:00
* True for characters with general categories " L " ( letters ) and " Nl " ( letter numbers ) .
2001-11-09 18:17:40 +00:00
*
2003-02-09 21:02:26 +00:00
* Same as java . lang . Character . isUnicodeIdentifierStart ( ) .
2003-02-11 01:59:32 +00:00
* Same as UCHAR_ID_START
2003-02-09 21:02:26 +00:00
*
* @ param c the code point to be tested
2003-02-11 01:59:32 +00:00
* @ return TRUE if the code point may start an identifier
2003-02-09 21:02:26 +00:00
*
2003-02-11 01:59:32 +00:00
* @ see UCHAR_ID_START
2003-02-09 21:02:26 +00:00
* @ see u_isalpha
* @ see u_isIDPart
2002-12-04 23:39:56 +00:00
* @ stable ICU 2.0
2001-11-09 18:17:40 +00:00
*/
2000-05-18 22:08:39 +00:00
U_CAPI UBool U_EXPORT2
2000-02-03 19:54:49 +00:00
u_isIDStart ( UChar32 c ) ;
2001-11-09 18:17:40 +00:00
1999-12-28 23:39:02 +00:00
/**
2003-02-11 01:59:32 +00:00
* Determines if the specified character is permissible
* in an identifier according to Java .
* True for characters with general categories " L " ( letters ) ,
* " Nl " ( letter numbers ) , " Nd " ( decimal digits ) ,
* " Mc " and " Mn " ( combining marks ) , " Pc " ( connecting punctuation ) , and
* u_isIDIgnorable ( c ) .
2003-02-09 21:02:26 +00:00
*
2003-02-11 01:59:32 +00:00
* Same as java . lang . Character . isUnicodeIdentifierPart ( ) .
* Almost the same as Unicode ' s ID_Continue ( UCHAR_ID_CONTINUE )
* except that Unicode recommends to ignore Cf which is less than
* u_isIDIgnorable ( c ) .
*
* @ param c the code point to be tested
* @ return TRUE if the code point may occur in an identifier according to Java
*
* @ see UCHAR_ID_CONTINUE
* @ see u_isIDStart
* @ see u_isIDIgnorable
2002-12-04 23:39:56 +00:00
* @ stable ICU 2.0
2001-11-09 18:17:40 +00:00
*/
2000-05-18 22:08:39 +00:00
U_CAPI UBool U_EXPORT2
2000-02-03 19:54:49 +00:00
u_isIDPart ( UChar32 c ) ;
2001-11-09 18:17:40 +00:00
1999-12-28 23:39:02 +00:00
/**
2003-02-09 21:02:26 +00:00
* Determines if the specified character should be regarded
* as an ignorable character in an identifier ,
* according to Java .
* True for characters with general category " Cf " ( format controls ) as well as
* non - whitespace ISO controls
* ( U + 0000. . U + 000 8 , U + 000 E . . U + 001 B , U + 007F . . U + 00 84 , U + 0086. . U + 009F ) .
*
* Same as java . lang . Character . isIdentifierIgnorable ( )
* except that Java also returns TRUE for U + 00 85 Next Line
* ( it omits U + 00 85 from whitespace ISO controls ) .
*
2003-02-11 01:59:32 +00:00
* Note that Unicode just recommends to ignore Cf ( format controls ) .
*
2003-02-09 21:02:26 +00:00
* @ param c the code point to be tested
* @ return TRUE if the code point is ignorable in identifiers according to Java
*
* @ see UCHAR_DEFAULT_IGNORABLE_CODE_POINT
* @ see u_isIDStart
* @ see u_isIDPart
2002-12-04 23:39:56 +00:00
* @ stable ICU 2.0
2001-11-09 18:17:40 +00:00
*/
2000-05-18 22:08:39 +00:00
U_CAPI UBool U_EXPORT2
2000-02-03 19:54:49 +00:00
u_isIDIgnorable ( UChar32 c ) ;
2001-11-09 18:17:40 +00:00
/**
2003-02-11 01:59:32 +00:00
* Determines if the specified character is permissible as the
* first character in a Java identifier .
* In addition to u_isIDStart ( c ) , true for characters with
* general categories " Sc " ( currency symbols ) and " Pc " ( connecting punctuation ) .
*
* Same as java . lang . Character . isJavaIdentifierStart ( ) .
*
* @ param c the code point to be tested
* @ return TRUE if the code point may start a Java identifier
2001-11-09 18:17:40 +00:00
*
* @ see u_isJavaIDPart
* @ see u_isalpha
* @ see u_isIDStart
2002-12-04 23:39:56 +00:00
* @ stable ICU 2.0
2001-11-09 18:17:40 +00:00
*/
2000-05-18 22:08:39 +00:00
U_CAPI UBool U_EXPORT2
2000-02-03 19:54:49 +00:00
u_isJavaIDStart ( UChar32 c ) ;
1999-12-28 23:39:02 +00:00
2001-11-09 18:17:40 +00:00
/**
2003-02-11 01:59:32 +00:00
* Determines if the specified character is permissible
* in a Java identifier .
* In addition to u_isIDPart ( c ) , true for characters with
* general category " Sc " ( currency symbols ) .
*
* Same as java . lang . Character . isJavaIdentifierPart ( ) .
*
* @ param c the code point to be tested
* @ return TRUE if the code point may occur in a Java identifier
*
2001-11-09 18:17:40 +00:00
* @ see u_isIDIgnorable
* @ see u_isJavaIDStart
* @ see u_isalpha
* @ see u_isdigit
* @ see u_isIDPart
2002-12-04 23:39:56 +00:00
* @ stable ICU 2.0
2001-11-09 18:17:40 +00:00
*/
2000-05-18 22:08:39 +00:00
U_CAPI UBool U_EXPORT2
2000-02-03 19:54:49 +00:00
u_isJavaIDPart ( UChar32 c ) ;
1999-12-28 23:39:02 +00:00
/**
* The given character is mapped to its lowercase equivalent according to
2003-02-11 23:27:40 +00:00
* UnicodeData . txt ; if the character has no lowercase equivalent , the character
1999-12-28 23:39:02 +00:00
* itself is returned .
2003-02-11 23:27:40 +00:00
*
* Same as java . lang . Character . toLowerCase ( ) .
*
* This function only returns the simple , single - code point case mapping .
* Full case mappings may result in zero , one or more code points and depend
* on context or language etc .
* Full case mappings are applied by the string case mapping functions ,
* see ustring . h and the UnicodeString class .
*
* @ param c the code point to be mapped
* @ return the Simple_Lowercase_Mapping of the code point , if any ;
* otherwise the code point itself .
2002-12-04 23:39:56 +00:00
* @ stable ICU 2.0
1999-12-28 23:39:02 +00:00
*/
2000-02-03 19:54:49 +00:00
U_CAPI UChar32 U_EXPORT2
u_tolower ( UChar32 c ) ;
1999-12-28 23:39:02 +00:00
/**
2001-01-23 23:45:21 +00:00
* The given character is mapped to its uppercase equivalent according to UnicodeData . txt ;
2003-02-11 23:27:40 +00:00
* if the character has no uppercase equivalent , the character itself is
1999-12-28 23:39:02 +00:00
* returned .
2003-02-11 23:27:40 +00:00
*
* Same as java . lang . Character . toUpperCase ( ) .
*
* This function only returns the simple , single - code point case mapping .
* Full case mappings may result in zero , one or more code points and depend
* on context or language etc .
* Full case mappings are applied by the string case mapping functions ,
* see ustring . h and the UnicodeString class .
*
* @ param c the code point to be mapped
* @ return the Simple_Uppercase_Mapping of the code point , if any ;
* otherwise the code point itself .
2002-12-04 23:39:56 +00:00
* @ stable ICU 2.0
1999-12-28 23:39:02 +00:00
*/
2000-02-03 19:54:49 +00:00
U_CAPI UChar32 U_EXPORT2
u_toupper ( UChar32 c ) ;
2001-11-09 18:17:40 +00:00
1999-12-28 23:39:02 +00:00
/**
2003-02-11 23:27:40 +00:00
* The given character is mapped to its titlecase equivalent
* according to UnicodeData . txt ;
* if none is defined , the character itself is returned .
*
* Same as java . lang . Character . toTitleCase ( ) .
1999-12-28 23:39:02 +00:00
*
2003-02-11 23:27:40 +00:00
* This function only returns the simple , single - code point case mapping .
* Full case mappings may result in zero , one or more code points and depend
* on context or language etc .
* Full case mappings are applied by the string case mapping functions ,
* see ustring . h and the UnicodeString class .
*
* @ param c the code point to be mapped
* @ return the Simple_Titlecase_Mapping of the code point , if any ;
* otherwise the code point itself .
2002-12-04 23:39:56 +00:00
* @ stable ICU 2.0
1999-12-28 23:39:02 +00:00
*/
2000-02-03 19:54:49 +00:00
U_CAPI UChar32 U_EXPORT2
u_totitle ( UChar32 c ) ;
1999-12-28 23:39:02 +00:00
2002-12-05 00:30:16 +00:00
/** Option value for case folding: use default mappings defined in CaseFolding.txt. @stable ICU 2.0 */
2001-02-14 00:47:36 +00:00
# define U_FOLD_CASE_DEFAULT 0
2002-06-03 03:33:44 +00:00
/**
* Option value for case folding :
*
* Use the modified set of mappings provided in CaseFolding . txt to handle dotted I
* and dotless i appropriately for Turkic languages ( tr , az ) .
*
* Before Unicode 3.2 , CaseFolding . txt contains mappings marked with ' I ' that
* are to be included for default mappings and
* excluded for the Turkic - specific mappings .
*
* Unicode 3.2 CaseFolding . txt instead contains mappings marked with ' T ' that
* are to be excluded for default mappings and
* included for the Turkic - specific mappings .
*
2002-12-04 23:39:56 +00:00
* @ stable ICU 2.0
2002-06-03 03:33:44 +00:00
*/
2001-02-14 00:47:36 +00:00
# define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1
/**
* The given character is mapped to its case folding equivalent according to
2003-02-11 23:27:40 +00:00
* UnicodeData . txt and CaseFolding . txt ;
* if the character has no case folding equivalent , the character
2001-02-14 00:47:36 +00:00
* itself is returned .
*
2003-02-11 23:27:40 +00:00
* This function only returns the simple , single - code point case mapping .
* Full case mappings may result in zero , one or more code points and depend
* on context or language etc .
* Full case mappings are applied by the string case mapping functions ,
* see ustring . h and the UnicodeString class .
*
* @ param c the code point to be mapped
2001-02-14 00:47:36 +00:00
* @ param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
2003-02-11 23:27:40 +00:00
* @ return the Simple_Case_Folding of the code point , if any ;
* otherwise the code point itself .
2002-12-04 23:39:56 +00:00
* @ stable ICU 2.0
2001-02-14 00:47:36 +00:00
*/
U_CAPI UChar32 U_EXPORT2
u_foldCase ( UChar32 c , uint32_t options ) ;
2001-09-21 00:27:17 +00:00
/**
2003-02-11 23:27:40 +00:00
* Returns the decimal digit value of the code point in the
* specified radix .
*
* If the radix is not in the range < code > 2 < = radix < = 36 < / code > or if the
* value of < code > c < / code > is not a valid digit in the specified
* radix , < code > - 1 < / code > is returned . A character is a valid digit
2001-09-21 00:27:17 +00:00
* if at least one of the following is true :
* < ul >
2003-02-11 23:27:40 +00:00
* < li > The character has a decimal digit value .
* Such characters have the general category " Nd " ( decimal digit numbers )
* and a Numeric_Type of Decimal .
* In this case the value is the character ' s decimal digit value . < / li >
* < li > The character is one of the uppercase Latin letters
* < code > ' A ' < / code > through < code > ' Z ' < / code > .
* In this case the value is < code > c - ' A ' + 10 < / code > . < / li >
* < li > The character is one of the lowercase Latin letters
* < code > ' a ' < / code > through < code > ' z ' < / code > .
* In this case the value is < code > ch - ' a ' + 10 < / code > . < / li >
2003-04-24 23:09:26 +00:00
* < li > Latin letters from both the ASCII range ( 0061. .007 A , 0041. .005 A )
* as well as from the Fullwidth ASCII range ( FF41 . . FF5A , FF21 . . FF3A )
* are recognized . < / li >
2001-09-21 00:27:17 +00:00
* < / ul >
*
2003-04-24 23:09:26 +00:00
* Same as java . lang . Character . digit ( ) .
2003-02-11 23:27:40 +00:00
*
* @ param c the code point to be tested .
2001-09-21 00:27:17 +00:00
* @ param radix the radix .
* @ return the numeric value represented by the character in the
2003-02-11 23:27:40 +00:00
* specified radix ,
* or - 1 if there is no value or if the value exceeds the radix .
2001-09-21 00:27:17 +00:00
*
2003-02-11 23:27:40 +00:00
* @ see UCHAR_NUMERIC_TYPE
2001-09-21 00:27:17 +00:00
* @ see u_forDigit
* @ see u_charDigitValue
* @ see u_isdigit
2002-12-04 23:39:56 +00:00
* @ stable ICU 2.0
2001-09-21 00:27:17 +00:00
*/
2001-11-12 23:01:17 +00:00
U_CAPI int32_t U_EXPORT2
2001-09-21 00:27:17 +00:00
u_digit ( UChar32 ch , int8_t radix ) ;
/**
2003-02-11 23:27:40 +00:00
* Determines the character representation for a specific digit in
* the specified radix . If the value of < code > radix < / code > is not a
* valid radix , or the value of < code > digit < / code > is not a valid
2001-09-21 00:27:17 +00:00
* digit in the specified radix , the null character
2003-02-11 23:27:40 +00:00
* ( < code > U + 0000 < / code > ) is returned .
2001-09-21 00:27:17 +00:00
* < p >
2003-02-11 23:27:40 +00:00
* The < code > radix < / code > argument is valid if it is greater than or
2001-09-21 00:27:17 +00:00
* equal to 2 and less than or equal to 36.
* The < code > digit < / code > argument is valid if
2003-02-11 23:27:40 +00:00
* < code > 0 < = digit < radix < / code > .
2001-09-21 00:27:17 +00:00
* < p >
2003-02-11 23:27:40 +00:00
* If the digit is less than 10 , then
* < code > ' 0 ' + digit < / code > is returned . Otherwise , the value
* < code > ' a ' + digit - 10 < / code > is returned .
*
* Same as java . lang . Character . forDigit ( ) .
2001-09-21 00:27:17 +00:00
*
* @ param digit the number to convert to a character .
* @ param radix the radix .
* @ return the < code > char < / code > representation of the specified digit
2003-02-11 23:27:40 +00:00
* in the specified radix .
2001-09-21 00:27:17 +00:00
*
* @ see u_digit
* @ see u_charDigitValue
* @ see u_isdigit
2002-12-04 23:39:56 +00:00
* @ stable ICU 2.0
2001-09-21 00:27:17 +00:00
*/
U_CAPI UChar32 U_EXPORT2
u_forDigit ( int32_t digit , int8_t radix ) ;
2002-03-04 01:22:45 +00:00
/**
* Get the " age " of the code point .
* The " age " is the Unicode version when the code point was first
* designated ( as a non - character or for Private Use )
* or assigned a character .
* This can be useful to avoid emitting code points to receiving
* processes that do not accept newer characters .
* The data is from the UCD file DerivedAge . txt .
*
* @ param c The code point .
* @ param versionArray The Unicode version number array , to be filled in .
*
2003-02-11 01:59:32 +00:00
* @ stable ICU 2.1
2002-03-04 01:22:45 +00:00
*/
U_CAPI void U_EXPORT2
u_charAge ( UChar32 c , UVersionInfo versionArray ) ;
1999-12-28 23:39:02 +00:00
/**
2003-02-11 23:27:40 +00:00
* Gets the Unicode version information .
* The version array is filled in with the version information
* for the Unicode standard that is currently used by ICU .
* For example , Unicode version 3.1 .1 is represented as an array with
* the values { 3 , 1 , 1 , 0 } .
*
* @ param versionArray an output array that will be filled in with
* the Unicode version number
2002-12-04 23:39:56 +00:00
* @ stable ICU 2.0
1999-12-28 23:39:02 +00:00
*/
2000-01-12 19:50:27 +00:00
U_CAPI void U_EXPORT2
2000-01-12 20:20:38 +00:00
u_getUnicodeVersion ( UVersionInfo info ) ;
1999-12-28 23:39:02 +00:00
2002-07-08 16:40:57 +00:00
/**
* Get the FC_NFKC_Closure property string for a character .
* See Unicode Standard Annex # 15 for details , search for " FC_NFKC_Closure "
* or for " FNC " : http : //www.unicode.org/reports/tr15/
*
* @ param c The character ( code point ) for which to get the FC_NFKC_Closure string .
* It must be < code > 0 < = c < = 0x10ffff < / code > .
* @ param dest Destination address for copying the string .
* The string will be zero - terminated if possible .
* If there is no FC_NFKC_Closure string ,
* then the buffer will be set to the empty string .
* @ param destCapacity < code > = = sizeof ( dest ) < / code >
* @ param pErrorCode Pointer to a UErrorCode variable .
* @ return The length of the string , or 0 if there is no FC_NFKC_Closure string for this character .
* If the destCapacity is less than or equal to the length , then the buffer
* contains the truncated name and the returned length indicates the full
* length of the name .
* The length does not include the zero - termination .
2003-02-11 23:27:40 +00:00
*
2002-07-08 16:40:57 +00:00
* @ draft ICU 2.2
*/
U_CAPI int32_t U_EXPORT2
u_getFC_NFKC_Closure ( UChar32 c , UChar * dest , int32_t destCapacity , UErrorCode * pErrorCode ) ;
2001-09-11 05:00:34 +00:00
2002-08-21 19:12:24 +00:00
# ifdef ICU_UCHAR_USE_DEPRECATES
2001-09-11 05:00:34 +00:00
/**
2002-12-05 23:26:26 +00:00
* @ obsolete ICU 2.4 . Use ublock_getCode ( ) instead since this API will be removed in that release .
2001-09-11 05:00:34 +00:00
*/
# define u_charScript ublock_getCode
2002-12-05 23:26:26 +00:00
/** @obsolete ICU 2.4. Use UBlockCode instead since this API will be removed in that release. */
2001-09-11 05:00:34 +00:00
typedef UBlockCode UCharScript ;
2002-08-21 19:12:24 +00:00
# endif /* ICU_UCHAR_USE_DEPRECATES */
2001-09-11 05:00:34 +00:00
2002-01-12 00:11:09 +00:00
U_CDECL_END
1999-12-28 23:39:02 +00:00
# endif /*_UCHAR*/
/*eof*/