scuffed-code/icu4c/source/common/uchar.h

/*
********************************************************************************
*                                                                              *
* COPYRIGHT:                                                                   *
*   (C) Copyright Taligent, Inc.,  1997                                        *
*   (C) Copyright International Business Machines Corporation,  1997-1998      *
*   Licensed Material - Program-Property of IBM - All Rights Reserved.         *
*   US Government Users Restricted Rights - Use, duplication, or disclosure    *
*   restricted by GSA ADP Schedule Contract with IBM Corp.                     *
*                                                                              *
********************************************************************************
*
* File UCHAR.H
*
* Modification History:
*
*   Date        Name        Description
*   04/02/97    aliu        Creation.
*   03/29/99    helena      Updated for C APIs.
*   4/15/99     Madhu       Updated for C Implementation and Javadoc
*   5/20/99     Madhu		Added the function u_getVersion()
*   8/19/1999   srl            Upgraded scripts to Unicode 3.0
********************************************************************************
*/

#ifndef UCHAR_H
#define UCHAR_H

#include "utypes.h"
/*===========================================================================*/
/* Unicode version number                                                    */
/*===========================================================================*/
#define UNICODE_VERSION  "3.0.0.beta"

/**
 * The Unicode C API allows you to query the properties associated with individual 
 * Unicode character values.  
 * <p>
 * The Unicode character information, provided implicitly by the 
 * Unicode character encoding standard, includes information about the script 
 * (for example, symbols or control characters) to which the character belongs,
 * as well as semantic information such as whether a character is a digit or 
 * uppercase, lowercase, or uncased.
 * <P>
 */
    

    struct UCharDigitPair{
        uint16_t fUnicode;
        int8_t     fValue;
    };
    typedef struct UCharDigitPair UCharDigitPair;
    struct BlockScriptMap {
        UChar        fFirstCode;
        UChar        fLastCode;
    };
    typedef struct BlockScriptMap BlockScriptMap;

    
    static  bool_t  tablesCreated=FALSE;
    static  bool_t  ulTablesCreated=FALSE;
    static  bool_t  dirTablesCreated=FALSE;
    static  void    createTables(void);
    static  void    createUlTables(void);
    static  void    createDirTables(void);
/**
 * The Unicode C API allows you to query the properties associated with individual 
 * Unicode character values.  
 * <p>
 * The Unicode character information, provided implicitly by the 
 * Unicode character encoding standard, includes information about the script 
 * (for example, symbols or control characters) to which the character belongs,
 * as well as semantic information such as whether a character is a digit or 
 * uppercase, lowercase, or uncased.
 * <P>
 */

/**
 * Constants.
 */

/**
 * The minimum value a UChar can have.  The lowest value a
 * UChar can have is 0x0000.
 */
  static UChar UCHAR_MIN_VALUE;
/**
 * The maximum value a UChar can have.  The greatest value a
 * UChar can have is 0xffff.
 */

 static UChar UCHAR_MAX_VALUE;
/**
 * Data for enumerated Unicode general category types
 */


enum UCharCategory
{
    UNASSIGNED				= 0,
	UPPERCASE_LETTER		= 1,
	LOWERCASE_LETTER		= 2,
	TITLECASE_LETTER		= 3,
	MODIFIER_LETTER			= 4,
	OTHER_LETTER			= 5,
	NON_SPACING_MARK		= 6,
	ENCLOSING_MARK			= 7,
	COMBINING_SPACING_MARK	= 8,
	DECIMAL_DIGIT_NUMBER	= 9,
	LETTER_NUMBER			= 10,
	OTHER_NUMBER			= 11,
	SPACE_SEPARATOR			= 12,
	LINE_SEPARATOR			= 13,
	PARAGRAPH_SEPARATOR		= 14,
	CONTROL					= 15,
	FORMAT					= 16,
	PRIVATE_USE			= 17,
	SURROGATE				= 18,
	DASH_PUNCTUATION		= 19,
	START_PUNCTUATION		= 20,
	END_PUNCTUATION			= 21,
    CONNECTOR_PUNCTUATION	= 22,
	OTHER_PUNCTUATION		= 23,
	MATH_SYMBOL				= 24,
	CURRENCY_SYMBOL			= 25,
	MODIFIER_SYMBOL			= 26,
	OTHER_SYMBOL			= 27,
	INITIAL_PUNCTUATION		= 28,
	FINAL_PUNCTUATION		= 29,
	GENERAL_OTHER_TYPES    = 30

};

typedef enum UCharCategory UCharCategory;
/**
 * This specifies the language directional property of a character set.
 */

enum UCharDirection   { 
    LEFT_TO_RIGHT               = 0, 
    RIGHT_TO_LEFT               = 1, 
    EUROPEAN_NUMBER             = 2,
    EUROPEAN_NUMBER_SEPARATOR   = 3,
    EUROPEAN_NUMBER_TERMINATOR  = 4,
    ARABIC_NUMBER               = 5,
    COMMON_NUMBER_SEPARATOR     = 6,
    BLOCK_SEPARATOR             = 7,
    SEGMENT_SEPARATOR           = 8,
    WHITE_SPACE_NEUTRAL         = 9, 
    OTHER_NEUTRAL               = 10, 
	LEFT_TO_RIGHT_EMBEDDING     = 11,
    LEFT_TO_RIGHT_OVERRIDE      = 12,
	RIGHT_TO_LEFT_ARABIC        = 13,
	RIGHT_TO_LEFT_EMBEDDING     = 14,
	RIGHT_TO_LEFT_OVERRIDE      = 15,
	POP_DIRECTIONAL_FORMAT      = 16,
	DIR_NON_SPACING_MARK        = 17,
	BOUNDARY_NEUTRAL            = 18
   
	
};

typedef enum UCharDirection UCharDirection;
/**
 * Script range as defined in the Unicode standard.
 */

/* Generated from Unicode Data files */
enum UCharScript {
/* Script names */
   BASIC_LATIN,
   LATIN_1_SUPPLEMENT,
   LATIN_EXTENDED_A,
   LATIN_EXTENDED_B,
   IPA_EXTENSIONS,
   SPACING_MODIFIER_LETTERS,
   COMBINING_DIACRITICAL_MARKS,
   GREEK,
   CYRILLIC,
   ARMENIAN,
   HEBREW,
   ARABIC,
   SYRIAC,
   THAANA,
   DEVANAGARI,
   BENGALI,
   GURMUKHI,
   GUJARATI,
   ORIYA,
   TAMIL,
   TELUGU,
   KANNADA,
   MALAYALAM,
   SINHALA,
   THAI,
   LAO,
   TIBETAN,
   MYANMAR,
   GEORGIAN,
   HANGUL_JAMO,
   ETHIOPIC,
   CHEROKEE,
   UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
   OGHAM,
   RUNIC,
   KHMER,
   MONGOLIAN,
   LATIN_EXTENDED_ADDITIONAL,
   GREEK_EXTENDED,
   GENERAL_PUNCTUATION,
   SUPERSCRIPTS_AND_SUBSCRIPTS,
   CURRENCY_SYMBOLS,
   COMBINING_MARKS_FOR_SYMBOLS,
   LETTERLIKE_SYMBOLS,
   NUMBER_FORMS,
   ARROWS,
   MATHEMATICAL_OPERATORS,
   MISCELLANEOUS_TECHNICAL,
   CONTROL_PICTURES,
   OPTICAL_CHARACTER_RECOGNITION,
   ENCLOSED_ALPHANUMERICS,
   BOX_DRAWING,
   BLOCK_ELEMENTS,
   GEOMETRIC_SHAPES,
   MISCELLANEOUS_SYMBOLS,
   DINGBATS,
   BRAILLE_PATTERNS,
   CJK_RADICALS_SUPPLEMENT,
   KANGXI_RADICALS,
   IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
   CJK_SYMBOLS_AND_PUNCTUATION,
   HIRAGANA,
   KATAKANA,
   BOPOMOFO,
   HANGUL_COMPATIBILITY_JAMO,
   KANBUN,
   BOPOMOFO_EXTENDED,
   ENCLOSED_CJK_LETTERS_AND_MONTHS,
   CJK_COMPATIBILITY,
   CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
   CJK_UNIFIED_IDEOGRAPHS,
   YI_SYLLABLES,
   YI_RADICALS,
   HANGUL_SYLLABLES,
   HIGH_SURROGATES,
   HIGH_PRIVATE_USE_SURROGATES,
   LOW_SURROGATES,
   PRIVATE_USE_AREA, /* PRIVATE_USE */
   CJK_COMPATIBILITY_IDEOGRAPHS,
   ALPHABETIC_PRESENTATION_FORMS,
   ARABIC_PRESENTATION_FORMS_A,
   COMBINING_HALF_MARKS,
   CJK_COMPATIBILITY_FORMS,
   SMALL_FORM_VARIANTS,
   ARABIC_PRESENTATION_FORMS_B,
   SPECIALS,
   HALFWIDTH_AND_FULLWIDTH_FORMS,
   SCRIPT_COUNT,
   NO_SCRIPT,


/* Enums for compatibility with ICU 1.2.4 and previous */
   LATIN1_SUPPLEMENT=LATIN_1_SUPPLEMENT,
   IPA_EXTENSION=IPA_EXTENSIONS,
   SPACING_MODIFIER=SPACING_MODIFIER_LETTERS,
   COMBINING_DIACRITICAL=COMBINING_DIACRITICAL_MARKS,
   SUPER_SUBSCRIPT=SUPERSCRIPTS_AND_SUBSCRIPTS,
   CURRENCY_SYMBOL_SCRIPT=CURRENCY_SYMBOLS,
   SYMBOL_COMBINING_MARK=COMBINING_MARKS_FOR_SYMBOLS,
   LETTERLIKE_SYMBOL=LETTERLIKE_SYMBOLS,
   NUMBER_FORM=NUMBER_FORMS,
   ARROW=ARROWS,
   MATH_OPERATOR=MATHEMATICAL_OPERATORS,
   MISC_TECHNICAL=MISCELLANEOUS_TECHNICAL,
   CONTROL_PICTURE=CONTROL_PICTURES,
   OPTICAL_CHARACTER=OPTICAL_CHARACTER_RECOGNITION,
   ENCLOSED_ALPHANUMERIC=ENCLOSED_ALPHANUMERICS,
   BOXDRAWING=BOX_DRAWING,
   BLOCK_ELEMENT=BLOCK_ELEMENTS,
   GEOMETRIC_SHAPE=GEOMETRIC_SHAPES,
   MISC_SYMBOL=MISCELLANEOUS_SYMBOLS,
   DINGBAT=DINGBATS,
   CJK_SYMBOL_PUNCTUATION=CJK_SYMBOLS_AND_PUNCTUATION,
   ENCLOSED_CJK_LETTER_MONTH=ENCLOSED_CJK_LETTERS_AND_MONTHS,
   CJK_UNIFIED_IDEOGRAPH=CJK_UNIFIED_IDEOGRAPHS,
   HANGUL_SYLLABLE=HANGUL_SYLLABLES,
   HIGH_SURROGATE=HIGH_SURROGATES,
   HIGH_PRIVATE_USE_SURROGATE=HIGH_PRIVATE_USE_SURROGATES,
   LOW_SURROGATE=LOW_SURROGATES,
   PRIVATE_USE_CHARACTERS=PRIVATE_USE_AREA,
   CJK_COMPATIBILITY_IDEOGRAPH=CJK_COMPATIBILITY_IDEOGRAPHS,
   ALPHABETIC_PRESENTATION=ALPHABETIC_PRESENTATION_FORMS,
   ARABIC_PRESENTATION_A=ARABIC_PRESENTATION_FORMS_A,
   COMBINING_HALFMARK=COMBINING_HALF_MARKS,
   CJK_COMPATIBILITY_FORM=CJK_COMPATIBILITY_FORMS,
   SMALL_FORM_VARIANT=SMALL_FORM_VARIANTS,
   ARABIC_PRESENTATION_B=ARABIC_PRESENTATION_FORMS_B,
   HALFWIDTH_FULLWIDTH_FORM=HALFWIDTH_AND_FULLWIDTH_FORMS
};
typedef enum UCharScript UCharScript;

/**
 * Values returned by the u_getCellWidth() function.
 */
enum UCellWidth
{
    ZERO_WIDTH              = 0,
    HALF_WIDTH              = 1,
    FULL_WIDTH              = 2,
    NEUTRAL                 = 3
};

typedef enum UCellWidth UCellWidth;
/**
 * Functions to classify characters.
 */

/**
 * Determines whether the specified UChar is a lowercase character
 * according to Unicode 2.1.2.
 *
 * @param ch    the character to be tested
 * @return  true if the character is lowercase; false otherwise.
 * @see UNICODE_VERSION
 * @see uisupper()
 * @see uistitle()
 * @see uislower()
 */
CAPI bool_t U_EXPORT2
u_islower(UChar c);

/**
 * Determines whether the specified character is an uppercase character
 * according to Unicode 2.1.2.
 *
 * @param ch    the character to be tested
 * @return  true if the character is uppercase; false otherwise.
 * @see uislower()
 * @see uistitle
 * @see utolower()
 */
CAPI bool_t U_EXPORT2
u_isupper(UChar c);

/**
 * Determines whether the specified character is a titlecase character
 * according to Unicode 2.1.2.
 *
 * @param ch    the character to be tested
 * @return  true if the character is titlecase; false otherwise.
 * @see uisupper()
 * @see uislower()
 * @see utotitle()
 */
CAPI bool_t U_EXPORT2
u_istitle(UChar c);

/**
 * Determines whether the specified character is a digit according to Unicode
 * 2.1.2.
 *
 * @param ch    the character to be tested
 * @return  true if the character is a digit; false otherwise.
 */
CAPI bool_t U_EXPORT2
u_isdigit(UChar c);

/**
 * Determines whether the specified numeric value is actually a defined character
 * according to Unicode 2.1.2.
 *
 * @param ch    the character to be tested
 * @return  true if the character has a defined Unicode meaning; false otherwise.
 *
 * @see uisdigit()
 * @see uisalpha()
 * @see uisalnum()
 * @see uisupper()
 * @see uislower()
 * @see uistitle()
 */
CAPI bool_t U_EXPORT2
u_isdefined(UChar c);

/**
 * Determines whether the specified character is a letter
 * according to Unicode 2.1.2.
 *
 * @param ch    the character to be tested
 * @return  true if the character is a letter; false otherwise.
 *
 * @see uisdigit()
 * @see uisalnum()
 */
CAPI bool_t U_EXPORT2
u_isalpha(UChar c);

/**
 * Determines if the specified character is a space character or not.
 *
 * @param ch    the character to be tested
 * @return  true if the character is a space character; false otherwise.
 */
CAPI bool_t U_EXPORT2
u_isspace(UChar c);

/**
 * Determines whether the specified character is a control character or not.
 *
 * @param ch    the character to be tested
 * @return  true if the Unicode character is a control character; false otherwise.
 *
 * @see uisprint()
 */
CAPI bool_t U_EXPORT2
u_iscntrl(UChar c);


/**
 * Determines whether the specified character is a printable character according 
 * to Unicode 2.1.2.
 *
 * @param ch    the character to be tested
 * @return  true if the Unicode character is a printable character; false otherwise.
 *
 * @see uiscntrl()
 */
CAPI bool_t U_EXPORT2
u_isprint(UChar c);

/**
 * Determines whether the specified character is of the base form according 
 * to Unicode 2.1.2.
 *
 * @param ch    the character to be tested
 * @return  true if the Unicode character is of the base form; false otherwise.
 *
 * @see uisalpha()
 * @see uisdigit()
 */
CAPI bool_t U_EXPORT2
u_isbase(UChar c);
/**
  * Returns the linguistic direction property of a character.
  * <P>
  * Returns the linguistic direction property of a character.
  * For example, 0x0041 (letter A) has the LEFT_TO_RIGHT directional 
  * property.
  * @see UCharDirection
  */
CAPI UCharDirection U_EXPORT2
u_charDirection(UChar c);

/**
 * Returns a value indicating the display-cell width of the character
 * when used in Asian text, according to the Unicode standard (see p. 6-130
 * of The Unicode Standard, Version 2.0).  The results for various characters
 * are as follows:
 * <P>
 *      ZERO_WIDTH: Characters which are considered to take up no display-cell space:
 *          control characters
 *          format characters
 *          line and paragraph separators
 *          non-spacing marks
 *          combining Hangul jungseong
 *          combining Hangul jongseong
 *          unassigned Unicode values
 * <P>
 *      HALF_WIDTH: Characters which take up half a cell in standard Asian text:
 *          all characters in the General Scripts Area except combining Hangul choseong
 *              and the characters called out specifically above as ZERO_WIDTH
 *          alphabetic and Arabic presentation forms
 *          halfwidth CJK punctuation
 *          halfwidth Katakana
 *          halfwidth Hangul Jamo
 *          halfwidth forms, arrows, and shapes
 * <P>
 *      FULL_WIDTH:  Characters which take up a full cell in standard Asian text:
 *          combining Hangul choseong
 *          all characters in the CJK Phonetics and Symbols Area
 *          all characters in the CJK Ideographs Area
 *          all characters in the Hangul Syllables Area
 *          CJK compatibility ideographs
 *          CJK compatibility forms
 *          small form variants
 *          fullwidth ASCII
 *          fullwidth punctuation and currency signs
 * <P>
 *      NEUTRAL:  Characters whose cell width is context-dependent:
 *          all characters in the Symbols Area, except those specifically called out above
 *          all characters in the Surrogates Area
 *          all charcaters in the Private Use Area
 * <P>
 * For Korean text, this algorithm should work properly with properly normalized Korean
 * text.  Precomposed Hangul syllables and non-combining jamo are all considered full-
 * width characters.  For combining jamo, we treat we treat choseong (initial consonants)
 * as double-width characters and junseong (vowels) and jongseong (final consonants)
 * as non-spacing marks.  This will work right in text that uses the precomposed
 * choseong characters instead of teo choseong characters in a row, and which uses the
 * choseong filler character at the beginning of syllables that don't have an initial
 * consonant.  The results may be slightly off with Korean text following different
 * conventions.
 */
CAPI uint16_t U_EXPORT2
u_charCellWidth(UChar c);

/**
 * Returns a value indicating a character category according to Unicode
 * 2.1.2.
 * @param c            the character to be tested
 * @return a value of type int, the character category.
 * @see UCharCategory
 */
CAPI int8_t U_EXPORT2
u_charType(UChar c);

/**
 * Retrives the decimal numeric value of a digit character.
 * @param c the digit character for which to get the numeric value
 * @return the numeric value of ch in decimal radix.  This method returns
 * -1 if ch is not a valid digit character.
 */
CAPI int32_t U_EXPORT2
u_charDigitValue(UChar c);

/**
 *
 * Returns the script associated with a character.
 * @see #UCharScript
 */
CAPI UCharScript     U_EXPORT2
u_charScript(UChar    ch);

/** 
 * The following functions are java specific.
 */
/**
  * A convenience method for determining if a Unicode character 
  * is allowed to start in a Unicode identifier.
  * A character may start a Unicode identifier if and only if
  * it is a letter.
  *
  * @param   c  the Unicode character.
  * @return  TRUE if the character may start a Unicode identifier;
  *          FALSE otherwise.
  * @see     u_isalpha
  * @see     u_isIDPart
  */
CAPI bool_t U_EXPORT2
u_isIDStart(UChar c);
/**
  * A convenience method for determining if a Unicode character
  * may be part of a Unicode identifier other than the starting
  * character.
  * <P>
  * A character may be part of a Unicode identifier if and only if
  * it is one of the following:
  * <ul>
  * <li>  a letter
  * <li>  a connecting punctuation character (such as "_").
  * <li>  a digit
  * <li>  a numeric letter (such as a Roman numeral character)
  * <li>  a combining mark
  * <li>  a non-spacing mark
  * <li>  an ignorable control character
  * </ul>
  * 
  * @param   c  the Unicode character.
  * @return  TRUE if the character may be part of a Unicode identifier;
  *          FALSE otherwise.
  * @see     u_isIDIgnorable
  * @see     u_isIDStart
     */
CAPI bool_t U_EXPORT2
u_isIDPart(UChar c);
/**
  * A convenience method for determining if a Unicode character 
  * should be regarded as an ignorable character 
  * in a Unicode identifier.
  * <P>
  * The following Unicode characters are ignorable in a 
  * Unicode identifier:
  * <table>
  * <tr><td>0x0000 through 0x0008,</td>
  *                                 <td>ISO control characters that</td></tr>
  * <tr><td>0x000E through 0x001B,</td> <td>are not whitespace</td></tr>
  * <tr><td>and 0x007F through 0x009F</td></tr>
  * <tr><td>0x200C through 0x200F</td>  <td>join controls</td></tr>
  * <tr><td>0x200A through 0x200E</td>  <td>bidirectional controls</td></tr>
  * <tr><td>0x206A through 0x206F</td>  <td>format controls</td></tr>
  * <tr><td>0xFEFF</td>               <td>zero-width no-break space</td></tr>
  * </table>
  * 
  * @param   c  the Unicode character.
  * @return  TRUE if the character may be part of a Unicode identifier;
  *          FALSE otherwise.
  * @see     u_isIDPart
  */
CAPI bool_t U_EXPORT2
u_isIDIgnorable(UChar c);
    /**
     * A convenience method for determining if a Unicode character
     * is allowed as the first character in a Java identifier.
     * <P>
     * A character may start a Java identifier if and only if
     * it is one of the following:
     * <ul>
     * <li>  a letter
     * <li>  a currency symbol (such as "$")
     * <li>  a connecting punctuation symbol (such as "_").
     * </ul>
     *
     * @param   c  the Unicode character.
     * @return  TRUE if the character may start a Java identifier;
     *          FALSE otherwise.
     * @see     u_isJavaIDPart
     * @see     u_isalpha
     * @see     u_isIDStart
     */
CAPI bool_t U_EXPORT2
u_isJavaIDStart(UChar c);
    /**
     * A convenience method for determining if a Unicode character 
     * may be part of a Java identifier other than the starting
     * character.
     * <P>
     * A character may be part of a Java identifier if and only if
     * it is one of the following:
     * <ul>
     * <li>  a letter
     * <li>  a currency symbol (such as "$")
     * <li>  a connecting punctuation character (such as "_").
     * <li>  a digit
     * <li>  a numeric letter (such as a Roman numeral character)
     * <li>  a combining mark
     * <li>  a non-spacing mark
     * <li>  an ignorable control character
     * </ul>
     * 
     * @param   c the Unicode character.
     * @return  TRUE if the character may be part of a Unicode identifier; 
     *          FALSE otherwise.
     * @see     u_isIDIgnorable
     * @see     u_isJavaIDStart
     * @see     u_isalpha
     * @see     u_isdigit
     * @see     u_isIDPart
     */

CAPI bool_t U_EXPORT2
u_isJavaIDPart(UChar c);

/**
 * Functions to change character case.
 */

/**
 * The given character is mapped to its lowercase equivalent according to
 * Unicode 2.1.2; if the character has no lowercase equivalent, the character 
 * itself is returned.
 * <P>
 * A character has a lowercase equivalent if and only if a lowercase mapping
 * is specified for the character in the Unicode 2.1.2 attribute table.
 * <P>
 * utolower() only deals with the general letter case conversion.
 * For language specific case conversion behavior, use ustrToUpper().
 * For example, the case conversion for dot-less i and dotted I in Turkish,
 * or for final sigma in Greek.
 *
 * @param ch    the character to be converted
 * @return  the lowercase equivalent of the character, if any;
 *      otherwise the character itself.
 */
CAPI UChar U_EXPORT2
u_tolower(UChar c);

/**
 * The given character is mapped to its uppercase equivalent according to Unicode
 * 2.1.2; if the character has no uppercase equivalent, the character itself is 
 * returned.
 * <P>
 * utoupper() only deals with the general letter case conversion.
 * For language specific case conversion behavior, use ustrToUpper().
 * For example, the case conversion for dot-less i and dotted I in Turkish,
 * or ess-zed (i.e., "sharp S") in German.
 *
 * @param ch    the character to be converted
 * @return  the uppercase equivalent of the character, if any;
 *      otherwise the character itself.
 */
CAPI UChar U_EXPORT2
u_toupper(UChar c);
/**
 * The given character is mapped to its titlecase equivalent according to Unicode
 * 2.1.2.  There are only four Unicode characters that are truly titlecase forms
 * that are distinct from uppercase forms.  As a rule, if a character has no
 * true titlecase equivalent, its uppercase equivalent is returned.
 * <P>
 * A character has a titlecase equivalent if and only if a titlecase mapping
 * is specified for the character in the Unicode 2.1.2 data.
 *
 * @param ch    the character to be converted
 * @return  the titlecase equivalent of the character, if any;
 *      otherwise the character itself.
 */
CAPI UChar U_EXPORT2
u_totitle(UChar c);

/**
 *
 *The function is used to get the Unicode standard Version that is used
 *@return the Unicode stabdard Version number
 */
CAPI const char* U_EXPORT2
u_getVersion(void);

#endif /*_UCHAR*/
/*eof*/