ICU-647 update API docs

X-SVN-Rev: 3200
2000-12-12 20:31:47 +00:00 · 2000-12-12 20:31:47 +00:00 · 96c50174b4
commit 96c50174b4
parent 07a0851ec4
4 changed files with 347 additions and 89 deletions
--- a/icu4c/source/common/unicode/utf.h
+++ b/icu4c/source/common/unicode/utf.h
@ -16,13 +16,50 @@

 /**
 * \file
-* \brief Description of UChar and UChar32 data types
+* \brief C API: UChar and UChar32 data types and UTF macros for C Unicode string handling
 *
-*   This file defines the UChar and UChar32 data types for Unicode code units
+*   <p>This file defines the UChar and UChar32 data types for Unicode code units
 *   and code points, as well as macros for efficiently getting code points
-*   in and out of a string.
-*   utf.h is included by utypes.h and itself includes the utfXX.h after some
-*   common definitions. Those files define the macros for each UTF-size.
+*   in and out of a string.</p>
+*
+*   <p>utf.h is included by utypes.h and itself includes the utfXX.h after some
+*   common definitions. Those files define the macros for each UTF-size.</p>
+*
+*   <p>ICU allows in principle to set which UTF (UTF-8/16/32) is used internally
+*   by defining UTF_SIZE to either 8, 16, or 32. utf.h would then define the UChar type
+*   accordingly. UTF-16 is the default.<br>
+*   In praxis, since a lot of the ICU source code &mdash; especially low-level code like
+*   conversion and collation &mdash; assumes UTF-16, utf.h enforces the default of UTF-16.
+*   This is unlikely to change in the future. Only some files (ubidi.h) should work with any UTF.</p>
+*
+*   <p>Accordinly, utf.h defines UChar to be an unsigned 16-bit integer. If this matches wchar_t, then
+*   UChar is defined to be exactly wchar_t, otherwise uint16_t.</p>
+*
+*   <p>UChar32 is always defined to be a 32-bit integer to be large enough for a 21-bit
+*   Unicode code point (Unicode scalar value, 0..0x10ffff). If wchar_t is a 32-bit type, then
+*   UChar32 is defined to be exactly wchar_t, <em>regardless of whether wchar_t is signed or unsigned.
+*   This means that UChar32 may be signed or unsigned depending on the platform!</em>
+*   If wchar_t is not a 32-bit type, then UChar32 is defined to be uint32_t.</p>
+*
+*   <p>utf.h also defines a number of C macros for handling single Unicode code points and
+*   for using UTF Unicode strings. It includes utf8.h, utf16.h, and utf32.h for the actual
+*   implementations of those macros and then aliases one set of them (for UTF-16) for general use.
+*   The UTF-specific macros have the UTF size in the macro name prefixes (UTF16_...), while
+*   the general alias macros always begin with UTF_...</p>
+*
+*   <p>Many string operations can be done with or without error checking.
+*   Where such a distinction is useful, there are two versions of the macros, "unsafe" and "safe"
+*   ones with ..._UNSAFE and ..._SAFE suffixes. The unsafe macros are fast but may cause
+*   program failures if the strings are not well-formed. The safe macros have an additional, boolean
+*   parameter "strict". If strict is FALSE, then only illegal sequences are detected.
+*   Otherwise, irregular sequences are detected as well (like single surrogates in UTF-8/32).
+*   Safe macros return special error code points for illegal/irregular sequences:
+*   Typically, U+ffff, or for UTF-8 values that would result in a byte sequence of the same length
+*   as the illegal input sequence.</p>
+*
+*   <p>It is possible to change the choice for the general alias macros to be unsafe, safe/not strict or safe/strict.
+*   The default is safe/not strict. It is not recommended to select the unsafe macros as the basis for
+*   Unicode string handling in ICU! To select this, define UTF_SAFE, UTF_STRICT, or UTF_UNSAFE.</p>
 */

 #ifndef __UTF_H__
@ -40,12 +77,17 @@

 /* If there is no compiler option for the preferred UTF size, then default to UTF-16. */
 #ifndef UTF_SIZE
+    /** Number of bits in a Unicode string code unit, same as x in UTF-x (8, 16, or 32). */
 #   define UTF_SIZE 16
 #endif

+/** Number of bytes in a UChar (sizeof(UChar)). */
 #define U_SIZEOF_UCHAR (UTF_SIZE>>3)

-/* Do we have wchar.h on this platform? It is there on most platforms. */
+/*!
+ * \def U_SIZEOF_WCHAR_T
+ * Do we have wchar.h on this platform? It is there on most platforms.
+ */
 #ifndef U_HAVE_WCHAR_H
 #   define U_HAVE_WCHAR_H 1
 #endif
@ -53,48 +95,67 @@
 /* U_SIZEOF_WCHAR_T==sizeof(wchar_t) (0 means it is not defined or autoconf could not set it) */
 #if U_SIZEOF_WCHAR_T==0
 #   undef U_SIZEOF_WCHAR_T
+    /** U_SIZEOF_WCHAR_T==sizeof(wchar_t). */
 #   define U_SIZEOF_WCHAR_T 4
 #endif

-/* Define UChar32 to be compatible with wchar_t if possible. */
+/*!
+ * \var UChar32
+ * Define UChar32 to be wchar_t if that is 32 bits wide; may be signed or unsigned!
+ * If wchar_t is not 32 bits wide, then define UChar32 to be uint32_t.
+ */
 #if U_SIZEOF_WCHAR_T==4
    typedef wchar_t UChar32;
 #else
    typedef uint32_t UChar32;
 #endif

-/* Unicode string and array offset and index type */
+/**
+ * Unicode string and array offset and index type.
+ * ICU always counts Unicode code units (UChars) for string offsets, indexes, and lengths, not Unicode code points.
+ */
 typedef int32_t UTextOffset;

 /* Specify which macro versions are the default ones - safe or fast. */
 #if !defined(UTF_SAFE) && !defined(UTF_STRICT) && !defined(UTF_UNSAFE)
+    /**
+     * The default choice for general Unicode string macros is to use the ..._SAFE macro implementations
+     * with strict=FALSE. See the utf.h file description.
+     */
 #   define UTF_SAFE
 #endif

 /* internal definitions ----------------------------------------------------- */

-/*
- * Special error values for UTF-8,
- * which need 1 or 2 bytes in UTF-8:
- * U+0015 = NAK = Negative Acknowledge, C0 control character
- * U+009f = highest C1 control character
+/**
+ * <p>Special error values for UTF-8,
+ * which need 1 or 2 bytes in UTF-8:<br>
+ * U+0015 = NAK = Negative Acknowledge, C0 control character<br>
+ * U+009f = highest C1 control character</p>
 *
- * These are used by ("safe") UTF-8 macros so that they can return an error value
+ * <p>These are used by ("safe") UTF-8 macros so that they can return an error value
 * that needs the same number of code units (bytes) as were seen by
- * a macro.
+ * a macro. They should be tested with UTF_IS_ERROR() or UTF_IS_VALID().</p>
+ *
+ * @internal
 */
+/*@{*/
 #define UTF8_ERROR_VALUE_1 0x15
 #define UTF8_ERROR_VALUE_2 0x9f
+/*@}*/

-/* error value for all UTFs */
+/**
+ * Error value for all UTFs. This code point value will be set by macros with error
+ * checking if an error is detected.
+ */
 #define UTF_ERROR_VALUE 0xffff

 /* single-code point definitions -------------------------------------------- */

-/* is this code unit or code point a surrogate? */
+/** Is this code unit or code point a surrogate (U+d800..U+dfff)? */
 #define UTF_IS_SURROGATE(uchar) (((uchar)&0xfffff800)==0xd800)

-/*
+/**
 * Is a given 32-bit code point/Unicode scalar value
 * actually a valid Unicode (abstract) character?
 */
@ -102,14 +163,14 @@ typedef int32_t UTextOffset;
    ((uint32_t)(c)<=0x10ffff && \
     !UTF_IS_SURROGATE(c) && ((c)&0xfffe)!=0xfffe)

-/*
+/**
 * Is a given 32-bit code an error value
 * as returned by one of the macros for any UTF?
 */
 #define UTF_IS_ERROR(c) \
    (((c)&0xfffe)==0xfffe || (c)==UTF8_ERROR_VALUE_1 || (c)==UTF8_ERROR_VALUE_2)

-/* This is a combined macro: is c a valid Unicode value _and_ not an error code? */
+/** This is a combined macro: Is c a valid Unicode value _and_ not an error code? */
 #define UTF_IS_VALID(c) \
    ((uint32_t)(c)<=0x10ffff && \
     !UTF_IS_SURROGATE(c) && \
@ -124,6 +185,19 @@ typedef int32_t UTextOffset;

 /* Define types and macros according to the selected UTF size. -------------- */

+/*!
+ * \var UChar
+ * Define UChar to be wchar_t if that is 16 bits wide; always assumed to be unsigned.
+ * If wchar_t is not 16 bits wide, then define UChar to be uint16_t.
+ */
+
+/**
+ * <p>All these macros are aliases to the selected UTF implementation macros.
+ * In an ICU build, they are always macros to the UTF-16 macros (with UTF16_ prefixes).
+ * In essence, they remove the UTF size from the macro names so that all macros will
+ * have a UTF_ prefix.</p>
+ */
+/*@{*/
 #if UTF_SIZE==8

 #   error UTF-8 is not implemented, undefine UTF_SIZE or define it to 16
@ -144,24 +218,27 @@ typedef int32_t UTextOffset;
        typedef uint16_t UChar;
 #   endif

+    /** Does this code unit alone encode a code point? */
 #   define UTF_IS_SINGLE(uchar)                         UTF16_IS_SINGLE(uchar)
+    /** Is this code unit the first one of several? */
 #   define UTF_IS_LEAD(uchar)                           UTF16_IS_LEAD(uchar)
+    /** Is this code unit one of several but not the first one? */
 #   define UTF_IS_TRAIL(uchar)                          UTF16_IS_TRAIL(uchar)

+    /** Does this code point require multiple code units? */
 #   define UTF_NEED_MULTIPLE_UCHAR(c)                   UTF16_NEED_MULTIPLE_UCHAR(c)
+    /** How many code units are used to encode this code point? */
 #   define UTF_CHAR_LENGTH(c)                           UTF16_CHAR_LENGTH(c)
+    /** How many code units are used at most for any Unicode code point? */
 #   define UTF_MAX_CHAR_LENGTH                          UTF16_MAX_CHAR_LENGTH
+    /** Estimate the number of code units for a string based on the number of UTF-16 code units. */
 #   define UTF_ARRAY_SIZE(size)                         UTF16_ARRAY_SIZE(size)

 #   define UTF_GET_CHAR_UNSAFE(s, i, c)                 UTF16_GET_CHAR_UNSAFE(s, i, c)
 #   define UTF_GET_CHAR_SAFE(s, start, i, length, c, strict) UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict)
-#   define UTF_GET_CHAR_SAFE_STRICT(s, start, i, length, c) UTF16_GET_CHAR_SAFE_STRICT(s, start, i, length, c)
-#   define UTF_GET_CHAR_SAFE_LENIENT(s, start, i, length, c) UTF16_GET_CHAR_SAFE_LENIENT(s, start, i, length, c)

 #   define UTF_NEXT_CHAR_UNSAFE(s, i, c)                UTF16_NEXT_CHAR_UNSAFE(s, i, c)
 #   define UTF_NEXT_CHAR_SAFE(s, i, length, c, strict)  UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict)
-#   define UTF_NEXT_CHAR_SAFE_STRICT(s, i, length, c)   UTF16_NEXT_CHAR_SAFE_STRICT(s, i, length, c)
-#   define UTF_NEXT_CHAR_SAFE_LENIENT(s, i, length, c)  UTF16_NEXT_CHAR_SAFE_LENIENT(s, i, length, c)

 #   define UTF_APPEND_CHAR_UNSAFE(s, i, c)              UTF16_APPEND_CHAR_UNSAFE(s, i, c)
 #   define UTF_APPEND_CHAR_SAFE(s, i, length, c)        UTF16_APPEND_CHAR_SAFE(s, i, length, c)
@ -177,8 +254,6 @@ typedef int32_t UTextOffset;

 #   define UTF_PREV_CHAR_UNSAFE(s, i, c)                UTF16_PREV_CHAR_UNSAFE(s, i, c)
 #   define UTF_PREV_CHAR_SAFE(s, start, i, c, strict)   UTF16_PREV_CHAR_SAFE(s, start, i, c, strict)
-#   define UTF_PREV_CHAR_SAFE_STRICT(s, start, i, c)    UTF16_PREV_CHAR_SAFE_STRICT(s, start, i, c)
-#   define UTF_PREV_CHAR_SAFE_LENIENT(s, start, i, c)   UTF16_PREV_CHAR_SAFE_LENIENT(s, start, i, c)

 #   define UTF_BACK_1_UNSAFE(s, i)                      UTF16_BACK_1_UNSAFE(s, i)
 #   define UTF_BACK_1_SAFE(s, start, i)                 UTF16_BACK_1_SAFE(s, start, i)
@ -198,35 +273,131 @@ typedef int32_t UTextOffset;
 #else
 #   error UTF_SIZE must be undefined or one of { 8, 16, 32 } - only 16 is implemented
 #endif
+/*@}*/

 /* Define the default macros for handling UTF characters. ------------------- */

+/**
+ * \def UTF_GET_CHAR(s, start, i, length, c)
+ *
+ * Set c to the code point that contains the code unit i.
+ * i could point to the first, the last, or an intermediate code unit.
+ * i is not modified.
+ * \pre 0<=i<length
+ */
+
+/**
+ * \def UTF_NEXT_CHAR(s, i, length, c)
+ *
+ * Set c to the code point that starts at code unit i
+ * and advance i to beyond the code units of this code point (post-increment).
+ * \pre 0<=i<length
+ * \post 0<i<=length
+ */
+
+/**
+ * \def UTF_APPEND_CHAR(s, i, length, c)
+ *
+ * Append the code units of code point c to the string at index i
+ * and advance i to beyond the new code units (post-increment).
+ * \pre 0<c<0x10ffff
+ * \pre 0<=i<length
+ * \post 0<i<=length
+ */
+
+/**
+ * \def UTF_FWD_1(s, i, length)
+ *
+ * Advance i to beyond the code units of the code point that begins at i.
+ * I.e., advance i by one code point.
+ * \pre 0<=i<length
+ * \post 0<i<=length
+ */
+
+/**
+ * \def UTF_FWD_N(s, i, length, n)
+ *
+ * Advance i to beyond the code units of the n code points where the first one begins at i.
+ * I.e., advance i by n code points.
+ * \pre 0<=i<length
+ * \post 0<i<=length
+ */
+
+/**
+ * \def UTF_SET_CHAR_START(s, start, i)
+ *
+ * Take the random-access index i and adjust it so that it points to the beginning
+ * of a code point.
+ * The input index points to any code unit of a code point and is moved to point to
+ * the first code unit of the same code point. i is never incremented.
+ * \pre start<=i<length
+ * \post start<=i<length
+ */
+
+/**
+ * \def UTF_PREV_CHAR(s, start, i, c)
+ *
+ * Set c to the code point that has code units before i
+ * and move i forward to the first code unit of this code point (pre-increment).
+ * \pre start<i<=length
+ * \post start<=i<length
+ */
+
+/**
+ * \def UTF_BACK_1(s, start, i)
+ *
+ * Move i forward to the first code unit of the code point that has code units before i.
+ * I.e., move i forward by one code point.
+ * \pre start<i<=length
+ * \post start<=i<length
+ */
+
+/**
+ * \def UTF_BACK_N(s, start, i, n)
+ *
+ * Move i forward to the first code unit of the n code points that have code units before i.
+ * I.e., move i forward by n code points.
+ * \pre start<i<=length
+ * \post start<=i<length
+ */
+
+/**
+ * \def UTF_SET_CHAR_LIMIT(s, start, i, length)
+ *
+ * Take the random-access index i and adjust it so that it points beyond the
+ * a code point. The input index points beyond any code unit
+ * of a code point and is moved to point beyond the last code unit of the same
+ * code point. i is never decremented.
+ * \pre start<i<=length
+ * \post start<i<=length
+ */
+
 #ifdef UTF_SAFE

-#   define UTF_GET_CHAR(s, start, i, length, c) UTF_GET_CHAR_SAFE_LENIENT(s, start, i, length, c)
+#   define UTF_GET_CHAR(s, start, i, length, c) UTF_GET_CHAR_SAFE(s, start, i, length, c, FALSE)

-#   define UTF_NEXT_CHAR(s, i, length, c)       UTF_NEXT_CHAR_SAFE_LENIENT(s, i, length, c)
+#   define UTF_NEXT_CHAR(s, i, length, c)       UTF_NEXT_CHAR_SAFE(s, i, length, c, FALSE)
 #   define UTF_APPEND_CHAR(s, i, length, c)     UTF_APPEND_CHAR_SAFE(s, i, length, c)
 #   define UTF_FWD_1(s, i, length)              UTF_FWD_1_SAFE(s, i, length)
 #   define UTF_FWD_N(s, i, length, n)           UTF_FWD_N_SAFE(s, i, length, n)
 #   define UTF_SET_CHAR_START(s, start, i)      UTF_SET_CHAR_START_SAFE(s, start, i)

-#   define UTF_PREV_CHAR(s, start, i, c)        UTF_PREV_CHAR_SAFE_LENIENT(s, start, i, c)
+#   define UTF_PREV_CHAR(s, start, i, c)        UTF_PREV_CHAR_SAFE(s, start, i, c, FALSE)
 #   define UTF_BACK_1(s, start, i)              UTF_BACK_1_SAFE(s, start, i)
 #   define UTF_BACK_N(s, start, i, n)           UTF_BACK_N_SAFE(s, start, i, n)
 #   define UTF_SET_CHAR_LIMIT(s, start, i, length) UTF_SET_CHAR_LIMIT_SAFE(s, start, i, length)

 #elif defined(UTF_STRICT)

-#   define UTF_GET_CHAR(s, start, i, length, c) UTF_GET_CHAR_SAFE_STRICT(s, start, i, length, c)
+#   define UTF_GET_CHAR(s, start, i, length, c) UTF_GET_CHAR_SAFE(s, start, i, length, c, TRUE)

-#   define UTF_NEXT_CHAR(s, i, length, c)       UTF_NEXT_CHAR_SAFE_STRICT(s, i, length, c)
+#   define UTF_NEXT_CHAR(s, i, length, c)       UTF_NEXT_CHAR_SAFE(s, i, length, c, TRUE)
 #   define UTF_APPEND_CHAR(s, i, length, c)     UTF_APPEND_CHAR_SAFE(s, i, length, c)
 #   define UTF_FWD_1(s, i, length)              UTF_FWD_1_SAFE(s, i, length)
 #   define UTF_FWD_N(s, i, length, n)           UTF_FWD_N_SAFE(s, i, length, n)
 #   define UTF_SET_CHAR_START(s, start, i)      UTF_SET_CHAR_START_SAFE(s, start, i)

-#   define UTF_PREV_CHAR(s, start, i, c)        UTF_PREV_CHAR_SAFE_STRICT(s, start, i, c)
+#   define UTF_PREV_CHAR(s, start, i, c)        UTF_PREV_CHAR_SAFE(s, start, i, c, TRUE)
 #   define UTF_BACK_1(s, start, i)              UTF_BACK_1_SAFE(s, start, i)
 #   define UTF_BACK_N(s, start, i, n)           UTF_BACK_N_SAFE(s, start, i, n)
 #   define UTF_SET_CHAR_LIMIT(s, start, i, length) UTF_SET_CHAR_LIMIT_SAFE(s, start, i, length)
--- a/icu4c/source/common/unicode/utf32.h
+++ b/icu4c/source/common/unicode/utf32.h
@ -15,7 +15,7 @@
 */
 /**
 * \file
-* \brief Description of UTF-32 macros
+* \brief C API: UTF-32 macros
 *
 *   This file defines macros to deal with UTF-32 code units and code points.
 *   Signatures and semantics are the same as for the similarly named macros
--- a/icu4c/source/common/unicode/utf8.h
+++ b/icu4c/source/common/unicode/utf8.h
@ -16,7 +16,7 @@

 /**
 * \file 
-* \brief Description of UTF-8 macros
+* \brief C API: UTF-8 macros
 * 
 *   This file defines macros to deal with UTF-8 code units and code points.
 *   Signatures and semantics are the same as for the similarly named macros
--- a/icu4c/source/common/unicode/utypes.h
+++ b/icu4c/source/common/unicode/utypes.h
@ -35,28 +35,41 @@
 #include "unicode/umachine.h"
 #include "unicode/utf.h"

+/*!
+ * \file
+ * \brief Basic definitions for ICU, for both C and C++ APIs
+ *
+ * This file defines basic types, constants, and enumerations directly or
+ * indirectly by including other header files, especially utf.h for the
+ * basic character and string definitions and umachine.h for consistent
+ * integer and other types.
+ */
+
 /*===========================================================================*/
 /* char Character set family                                                 */
 /*===========================================================================*/

-/*
- * These definitions allow to specify the encoding of text
+/**
+ * \def U_CHARSET_FAMILY
+ *
+ * <p>These definitions allow to specify the encoding of text
 * in the char data type as defined by the platform and the compiler.
 * It is enough to determine the code point values of "invariant characters",
 * which are the ones shared by all encodings that are in use
- * on a given platform.
+ * on a given platform.</p>
 *
- * Those "invariant characters" should be all the uppercase and lowercase
+ * <p>Those "invariant characters" should be all the uppercase and lowercase
 * latin letters, the digits, the space, and "basic punctuation".
- * Also, '\n', '\r', '\t' should be available.
+ * Also, '\n', '\r', '\t' should be available.</p>
 *
- * The list of "invariant characters" is:
- *    A-Z  a-z  0-9  SPACE  "  %  &  '  (  )  *  +  ,  -  .  /  :  ;  <  =  >  ?  _
- * (52 letters + 10 numbers + 20 punc/sym = 82 total)
+ * <p>The list of "invariant characters" is:<br>
+ *    A-Z  a-z  0-9  SPACE  "  %  &amp;  '  (  )  *  +  ,  -  .  /  :  ;  &lt;  =  >  ?  _<br>
+ * (52 letters + 10 numbers + 20 punc/sym = 82 total)</p>
 *
- * In other words, all the graphic characters in 7-bit ASCII should
- * be safely accessible except the following:
+ * <p>In other words, all the graphic characters in 7-bit ASCII should
+ * be safely accessible except the following:</p>
 * 
+ * \code
 *    '\' <backslash>
 *    '[' <left bracket>
 *    ']' <right bracket>
@ -70,6 +83,7 @@
 *    '$' <dollar sign>
 *    '@' <commercial at>
 *    '`' <grave accent>
+ * \endcode
 */

 #define U_ASCII_FAMILY 0
@ -82,19 +96,38 @@
 /*===========================================================================*/
 /* Related version information                                               */
 /*===========================================================================*/
+
+/** The current ICU library version as a dotted-decimal string. */
 #define U_ICU_VERSION "1.7"
+
+/** The current ICU library major/minor version as a string without dots, for library name suffixes. */
 #define U_ICU_VERSION_SHORT "17"

+/** An ICU version consists of up to 4 numbers from 0..255. */
 #define U_MAX_VERSION_LENGTH 4
+
+/** In a string, ICU version fields are delimited by dots. */
 #define U_VERSION_DELIMITER '.'
+
+/** The maximum length of an ICU version string. */
 #define U_MAX_VERSION_STRING_LENGTH 20

+/** The binary form of a version on ICU APIs is an array of 4 uint8_t. */
 typedef uint8_t UVersionInfo[U_MAX_VERSION_LENGTH];

 /*===========================================================================*/
 /* ICUDATA naming scheme                                                     */
 /*===========================================================================*/

+/**
+ * \def U_ICUDATA_TYPE_LETTER
+ *
+ * This is a platform-dependent string containing one letter:
+ * - b for big-endian, ASCII-family platforms
+ * - l for little-endian, ASCII-family platforms
+ * - e for big-endian, EBCDIC-family platforms
+ * This letter is part of the common data file name.
+ */
 #if U_CHARSET_FAMILY
 #   if U_IS_BIG_ENDIAN
   /* EBCDIC - should always be BE */
@ -113,19 +146,25 @@ typedef uint8_t UVersionInfo[U_MAX_VERSION_LENGTH];
 #   endif
 #endif

-/* A single string literal containing the icudata stub name, i.e. 'icudt18e' for 
+/** A single string literal containing the icudata stub name, i.e. 'icudt18e' for 
   ICU 1.8.x on EBCDIC, etc.. */
 #define U_ICUDATA_NAME    "icudt" U_ICU_VERSION_SHORT U_ICUDATA_TYPE_LETTER

-/* Work around the OS390 compiler issue, to be removed when the compiler 
-updates come out.  */
+/**
+ * \def U_CALLCONV
+ * Work around the OS390 compiler issue, to be removed when the compiler
+ * updates come out.
+ */
 #if defined(OS390) && defined(XP_CPLUSPLUS)
 #    define U_CALLCONV __cdecl
 #else
 #    define U_CALLCONV 
 #endif

-/* Define NULL (the wrong way, cast to void *) if it does not exist. */
+/**
+ * \def NULL
+ * Define NULL if necessary, to 0 for C++ and to ((void *)0) for C.
+ */
 #ifndef NULL
 #ifdef XP_CPLUSPLUS
 #define NULL    0
@ -134,8 +173,10 @@ updates come out.  */
 #endif
 #endif

-/* Maximum value of a (void*) - use to indicate the limit of
-   an 'infinite' buffer.  */
+/**
+ * \def U_MAX_PTR
+ * Maximum value of a (void*) - use to indicate the limit of an 'infinite' buffer.
+ */
 #ifndef U_MAX_PTR
 #define U_MAX_PTR ((void*)-1)
 #endif
@ -170,9 +211,11 @@ typedef double UDate;
 * described in detail below.  UClassID values can be compared using
 * operator==(). Nothing else should be done with them.
 *
+ * \par
 * getDynamicClassID() is declared in the base class of the hierarchy as
 * a pure virtual.  Each concrete subclass implements it in the same way:
 *
+ * \code
 *      class Base {
 *      public:
 *          virtual UClassID getDynamicClassID() const = 0;
@ -183,10 +226,12 @@ typedef double UDate;
 *          virtual UClassID getDynamicClassID() const
 *            { return Derived::getStaticClassID(); }
 *      }
+ * \endcode
 *
 * Each concrete class implements getStaticClassID() as well, which allows
 * clients to test for a specific type.
 *
+ * \code
 *      class Derived {
 *      public:
 *          static UClassID getStaticClassID();
@ -198,6 +243,7 @@ typedef double UDate;
 *      UClassID Derived::getStaticClassID()
 *        { return (UClassID)&Derived::fgClassID; }
 *      char Derived::fgClassID = 0; // Value is irrelevant
+ * \endcode
 */

 typedef void* UClassID;
@ -206,11 +252,29 @@ typedef void* UClassID;
 /* Shared library/DLL import-export API control                              */
 /*===========================================================================*/

-/**
+/*
 * Control of symbol import/export.
 * The ICU is separated into two libraries.
 */

+/**
+ * \def U_COMMON_API
+ * Set to export library symbols from inside the common library,
+ * and to import them from outside.
+ */
+
+/**
+ * \def U_I18N_API
+ * Set to export library symbols from inside the i18n library,
+ * and to import them from outside.
+ */
+
+/**
+ * \def U_LAYOUT_API
+ * Set to export library symbols from inside the layout engine library,
+ * and to import them from outside.
+ */
+
 #ifdef U_COMMON_IMPLEMENTATION
 #define U_COMMON_API  U_EXPORT
 #define U_I18N_API    U_IMPORT
@ -230,6 +294,7 @@ typedef void* UClassID;
 #endif

 /**
+ * \def U_STANDARD_CPP_NAMESPACE
 * Control of C++ Namespace
 */
 #ifdef __cplusplus
@ -242,38 +307,47 @@ typedef void* UClassID;
 /* UErrorCode */
 /*===========================================================================*/

-/** Error code to replace exception handling.
- *  So that the code is compatible with all C++ compilers.
+/**
+ * Error code to replace exception handling, so that the code is compatible with all C++ compilers,
+ * and to use the same mechanism for C and C++.
+ *
+ * \par
+ * ICU functions that take a reference (C++) or a pointer (C) to a UErrorCode
+ * first test if(U_FAILURE(errorCode)) { return immediately; }
+ * so that in a chain of such functions the first one that sets an error code
+ * causes the following ones to not perform any operations.
+ *
+ * \par
+ * Error codes should be tested using U_FAILURE() and U_SUCCESS().
 */
 enum UErrorCode {
-    U_ERROR_INFO_START        = -128,     /* Start of information results (semantically successful) */
-    U_USING_FALLBACK_ERROR    = -128,
-    U_USING_DEFAULT_ERROR     = -127,
-    U_ERROR_INFO_LIMIT,
+    U_ERROR_INFO_START        = -128,   /**< Start of information results (semantically successful) */
+    U_USING_FALLBACK_ERROR    = -128,   /**< A resource bundle lookup returned a fallback result (not an error) */
+    U_USING_DEFAULT_ERROR     = -127,   /**< A reousrce bundle lookup returned a result from the root locale (not an error) */
+    U_ERROR_INFO_LIMIT,                 /**< This must always be the last warning value to indicate the limit for UErrorCode warnings (last warning code +1) */

-    /** success */
-    U_ZERO_ERROR              =  0,       
+    U_ZERO_ERROR              =  0,     /**< No error, no warning. */

-    U_ILLEGAL_ARGUMENT_ERROR  =  1,       /* Start of codes indicating failure */
+    U_ILLEGAL_ARGUMENT_ERROR  =  1,     /**< Start of codes indicating failure */
    U_MISSING_RESOURCE_ERROR  =  2,
    U_INVALID_FORMAT_ERROR    =  3,
    U_FILE_ACCESS_ERROR       =  4,
-    U_INTERNAL_PROGRAM_ERROR  =  5,       /* Indicates a bug in the library code */
+    U_INTERNAL_PROGRAM_ERROR  =  5,     /**< Indicates a bug in the library code */
    U_MESSAGE_PARSE_ERROR     =  6,
-    U_MEMORY_ALLOCATION_ERROR =  7,       /* Memory allocation error */
+    U_MEMORY_ALLOCATION_ERROR =  7,     /**< Memory allocation error */
    U_INDEX_OUTOFBOUNDS_ERROR =  8,
-    U_PARSE_ERROR             =  9,       /* Equivalent to Java ParseException */
-    U_INVALID_CHAR_FOUND      = 10,       /* In the Character conversion routines: Invalid character or sequence was encountered*/
-    U_TRUNCATED_CHAR_FOUND    = 11,       /* In the Character conversion routines: More bytes are required to complete the conversion successfully*/
-    U_ILLEGAL_CHAR_FOUND      = 12,       /* In codeset conversion: a sequence that does NOT belong in the codepage has been encountered*/
-    U_INVALID_TABLE_FORMAT    = 13,       /* Conversion table file found, but corrupted*/
-    U_INVALID_TABLE_FILE      = 14,       /* Conversion table file not found*/
-    U_BUFFER_OVERFLOW_ERROR   = 15,       /* A result would not fit in the supplied buffer */
-    U_UNSUPPORTED_ERROR       = 16,       /* Requested operation not supported in current context */
-    U_RESOURCE_TYPE_MISMATCH  = 17,       /* an operation is requested over a resource that does not support it*/          
-    U_ILLEGAL_ESCAPE_SEQUENCE = 18,       /* ISO-2022 illlegal escape sequence*/ 
-    U_UNSUPPORTED_ESCAPE_SEQUENCE = 19,   /* ISO-2022 unsupported escape sequence*/  
-    U_ERROR_LIMIT   
+    U_PARSE_ERROR             =  9,     /**< Equivalent to Java ParseException */
+    U_INVALID_CHAR_FOUND      = 10,     /**< In the Character conversion routines: Invalid character or sequence was encountered */
+    U_TRUNCATED_CHAR_FOUND    = 11,     /**< In the Character conversion routines: More bytes are required to complete the conversion successfully */
+    U_ILLEGAL_CHAR_FOUND      = 12,     /**< In codeset conversion: a sequence that does NOT belong in the codepage has been encountered */
+    U_INVALID_TABLE_FORMAT    = 13,     /**< Conversion table file found, but corrupted */
+    U_INVALID_TABLE_FILE      = 14,     /**< Conversion table file not found */
+    U_BUFFER_OVERFLOW_ERROR   = 15,     /**< A result would not fit in the supplied buffer */
+    U_UNSUPPORTED_ERROR       = 16,     /**< Requested operation not supported in current context */
+    U_RESOURCE_TYPE_MISMATCH  = 17,     /**< an operation is requested over a resource that does not support it */
+    U_ILLEGAL_ESCAPE_SEQUENCE = 18,     /**< ISO-2022 illlegal escape sequence */
+    U_UNSUPPORTED_ESCAPE_SEQUENCE = 19, /**< ISO-2022 unsupported escape sequence */
+    U_ERROR_LIMIT                       /**< This must always be the last value to indicate the limit for UErrorCode (last error code +1) */
 };

 #ifndef XP_CPLUSPLUS
@ -282,22 +356,35 @@ typedef enum UErrorCode UErrorCode;

 /* Use the following to determine if an UErrorCode represents */
 /* operational success or failure. */
+
 #ifdef XP_CPLUSPLUS
-/** @stable */
-inline UBool U_SUCCESS(UErrorCode code) { return (UBool)(code<=U_ZERO_ERROR); }
-/** @stable */
-inline UBool U_FAILURE(UErrorCode code) { return (UBool)(code>U_ZERO_ERROR); }
+    /**
+     * Does the error code indicate success?
+     * @stable
+     */
+    inline UBool U_SUCCESS(UErrorCode code) { return (UBool)(code<=U_ZERO_ERROR); }
+    /**
+     * Does the error code indicate a failure?
+     * @stable
+     */
+    inline UBool U_FAILURE(UErrorCode code) { return (UBool)(code>U_ZERO_ERROR); }
 #else
-/** @stable */
-#define U_SUCCESS(x) ((x)<=U_ZERO_ERROR)
-/** @stable */
-#define U_FAILURE(x) ((x)>U_ZERO_ERROR)
+    /**
+     * Does the error code indicate success?
+     * @stable
+     */
+#   define U_SUCCESS(x) ((x)<=U_ZERO_ERROR)
+    /**
+     * Does the error code indicate a failure?
+     * @stable
+     */
+#   define U_FAILURE(x) ((x)>U_ZERO_ERROR)
 #endif

 /**
 * Return a string for a UErrorCode value.
 * The string will be the same as the name of the error code constant
- * in the enum above.
+ * in the UErrorCode enum above.
 */
 U_CAPI const char * U_EXPORT2
 u_errorName(UErrorCode code);
@ -307,16 +394,16 @@ u_errorName(UErrorCode code);

 #define U_COPYRIGHT_STRING_LENGTH  160

+/**
+ * Mutex data type.
+ * @internal
+ */
+typedef void *UMTX;
+
 /*===========================================================================*/
 /* Include header for platform utilies */
 /*===========================================================================*/

-
-
-/* Mutex data type.  INTERNAL.*/
-typedef void *UMTX;
-
-
 #include "unicode/putil.h"

 #endif /* _UTYPES */