From 6df16763109cf4a032258081813bb865f8f465c1 Mon Sep 17 00:00:00 2001
From: Andy Heninger <andy.heninger@gmail.com>
Date: Thu, 8 Aug 2002 00:39:13 +0000
Subject: [PATCH] ICU-2077 RBBI: review comments incorporated. (incomplete, 
 more to come.)

X-SVN-Rev: 9612
---
 icu4c/source/common/brkiter.cpp         | 153 +++++++++++++-----------
 icu4c/source/common/rbbi.cpp            |  95 ++++++++++-----
 icu4c/source/common/rbbidata.cpp        |  13 +-
 icu4c/source/common/rbbisetb.h          |   3 +-
 icu4c/source/common/ubrk.cpp            |  23 ++--
 icu4c/source/common/unicode/brkiter.h   |  93 +++++++-------
 icu4c/source/common/unicode/rbbi.h      |  40 +++++--
 icu4c/source/common/unicode/ubrk.h      |  54 +++++----
 icu4c/source/test/intltest/rbbiapts.cpp |  10 +-
 9 files changed, 287 insertions(+), 197 deletions(-)

diff --git a/icu4c/source/common/brkiter.cpp b/icu4c/source/common/brkiter.cpp
index 7b77da8350..dca3e4d4bb 100644
--- a/icu4c/source/common/brkiter.cpp
+++ b/icu4c/source/common/brkiter.cpp
@@ -37,7 +37,7 @@ const int32_t BreakIterator::DONE = (int32_t)-1;
 
 // -------------------------------------
 
-// Creates a simple text boundary for word breaks.
+// Creates a break iterator for word breaks.
 BreakIterator*
 BreakIterator::createWordInstance(const Locale& key, UErrorCode& status)
 {
@@ -49,31 +49,32 @@ BreakIterator::createWordInstance(const Locale& key, UErrorCode& status)
 
     if (U_FAILURE(status))
         return NULL;
+
     if (!uprv_strcmp(key.getLanguage(), "th"))
     {
         filename = "word_th";
     }
 
     UDataMemory* file = udata_open(NULL, "brk", filename, &status);
+    if (U_FAILURE(status)) {
+        return NULL;
+    }
+    // The UDataMemory is adopted by the break iterator.
 
-    if (U_SUCCESS(status)) {
-        if(!uprv_strcmp(filename, "word_th")) {
-            filename = "thaidict.brk";
-            result = new DictionaryBasedBreakIterator(file, filename, status);
-            /* test for NULL */
-            if(result == 0) {
-                status = U_MEMORY_ALLOCATION_ERROR;
-                return 0;
-            }
-        }
-        else {
-            result = new RuleBasedBreakIterator(file, status);
-            /* test for NULL */
-            if(result == 0) {
-                status = U_MEMORY_ALLOCATION_ERROR;
-                return 0;
-            }
-        }
+    if(!uprv_strcmp(filename, "word_th")) {
+        filename = "thaidict.brk";
+        result = new DictionaryBasedBreakIterator(file, filename, status);
+    }
+    else {
+        result = new RuleBasedBreakIterator(file, status);
+    }
+    if (result == NULL) {
+        udata_close(file);
+        status = U_MEMORY_ALLOCATION_ERROR;
+    }
+    if (U_FAILURE(status)) {   // Sometimes redundant check, but simple.
+        delete result;
+        result = NULL;
     }
 
     return result;
@@ -81,7 +82,7 @@ BreakIterator::createWordInstance(const Locale& key, UErrorCode& status)
 
 // -------------------------------------
 
-// Creates a simple text boundary for line breaks.
+// Creates a break iterator  for line breaks.
 BreakIterator*
 BreakIterator::createLineInstance(const Locale& key, UErrorCode& status)
 {
@@ -93,39 +94,39 @@ BreakIterator::createLineInstance(const Locale& key, UErrorCode& status)
 
     if (U_FAILURE(status))
         return NULL;
+
     if (!uprv_strcmp(key.getLanguage(), "th"))
     {
         filename = "line_th";
     }
 
     UDataMemory* file = udata_open(NULL, "brk", filename, &status);
-
-    if (!U_FAILURE(status)) {
-        if (!uprv_strcmp(key.getLanguage(), "th")) {
-            filename = "thaidict.brk";
-            result = new DictionaryBasedBreakIterator(file, filename, status);
-            /* test for NULL */
-            if(result == 0) {
-                status = U_MEMORY_ALLOCATION_ERROR;
-                return 0;
-            }
-        }
-        else {
-            result = new RuleBasedBreakIterator(file, status);
-            /* test for NULL */
-            if(result == 0) {
-                status = U_MEMORY_ALLOCATION_ERROR;
-                return 0;
-            }
-        }
+    if (U_FAILURE(status)) {
+        return NULL;
     }
+    // The UDataMemory is adopted by the break iterator.
 
+    if (!uprv_strcmp(key.getLanguage(), "th")) {
+        filename = "thaidict.brk";
+        result = new DictionaryBasedBreakIterator(file, filename, status);
+    }
+    else {
+        result = new RuleBasedBreakIterator(file, status);
+    }
+    if (result == NULL) {
+        udata_close(file);
+        status = U_MEMORY_ALLOCATION_ERROR;
+    }
+    if (U_FAILURE(status)) {   // Sometimes redundant check, but simple.
+        delete result;
+        result = NULL;
+    }
     return result;
 }
 
 // -------------------------------------
 
-// Creates a simple text boundary for character breaks.
+// Creates a break iterator  for character breaks.
 BreakIterator*
 BreakIterator::createCharacterInstance(const Locale& /* key */, UErrorCode& status)
 {
@@ -138,22 +139,26 @@ BreakIterator::createCharacterInstance(const Locale& /* key */, UErrorCode& stat
     if (U_FAILURE(status))
         return NULL;
     UDataMemory* file = udata_open(NULL, "brk", filename, &status);
-
-    if (!U_FAILURE(status)) {
-        result = new RuleBasedBreakIterator(file, status);
-        /* test for NULL */
-        if(result == 0) {
-            status = U_MEMORY_ALLOCATION_ERROR;
-            return 0;
-        }
+    if (U_FAILURE(status)) {
+        return NULL;
     }
+    // The UDataMemory is adopted by the break iterator.
 
+    result = new RuleBasedBreakIterator(file, status);
+    if (result == NULL) {
+        udata_close(file);
+        status = U_MEMORY_ALLOCATION_ERROR;
+    }
+    if (U_FAILURE(status)) {   // Sometimes redundant check, but simple.
+        delete result;
+        result = NULL;
+    }
     return result;
 }
 
 // -------------------------------------
 
-// Creates a simple text boundary for sentence breaks.
+// Creates a break iterator  for sentence breaks.
 BreakIterator*
 BreakIterator::createSentenceInstance(const Locale& /*key */, UErrorCode& status)
 {
@@ -166,14 +171,19 @@ BreakIterator::createSentenceInstance(const Locale& /*key */, UErrorCode& status
     if (U_FAILURE(status))
         return NULL;
     UDataMemory* file = udata_open(NULL, "brk", filename, &status);
+    if (U_FAILURE(status)) {
+        return NULL;
+    }
+    // The UDataMemory is adopted by the break iterator.
 
-    if (!U_FAILURE(status)) {
-        result = new RuleBasedBreakIterator(file, status);
-        /* test for NULL */
-            if(result == 0) {
-                status = U_MEMORY_ALLOCATION_ERROR;
-                return 0;
-            }
+    result = new RuleBasedBreakIterator(file, status);
+    if (result == NULL) {
+        udata_close(file);
+        status = U_MEMORY_ALLOCATION_ERROR;
+    }
+    if (U_FAILURE(status)) {   // Sometimes redundant check, but simple.
+        delete result;
+        result = NULL;
     }
 
     return result;
@@ -181,7 +191,7 @@ BreakIterator::createSentenceInstance(const Locale& /*key */, UErrorCode& status
 
 // -------------------------------------
 
-// Creates a simple text boundary for title casing breaks.
+// Creates a break iterator for title casing breaks.
 BreakIterator*
 BreakIterator::createTitleInstance(const Locale& /* key */, UErrorCode& status)
 {
@@ -194,14 +204,19 @@ BreakIterator::createTitleInstance(const Locale& /* key */, UErrorCode& status)
     if (U_FAILURE(status))
         return NULL;
     UDataMemory* file = udata_open(NULL, "brk", filename, &status);
+    if (U_FAILURE(status)) {
+        return NULL;
+    }
+    // The UDataMemory is adopted by the break iterator.
 
-    if (!U_FAILURE(status)) {
-        result = new RuleBasedBreakIterator(file, status);
-        /* test for NULL */
-        if(result == 0) {
-            status = U_MEMORY_ALLOCATION_ERROR;
-            return 0;
-        }
+    result = new RuleBasedBreakIterator(file, status);
+    if (result == NULL) {
+        udata_close(file);
+        status = U_MEMORY_ALLOCATION_ERROR;
+    }
+    if (U_FAILURE(status)) {   // Sometimes redundant check, but simple.
+        delete result;
+        result = NULL;
     }
 
     return result;
@@ -234,11 +249,11 @@ BreakIterator::getDisplayName(const Locale& objectLocale,
     return objectLocale.getDisplayName(displayLocale, name);
 }
 
-// -------------------------------------
-
-// Needed because we declare the copy constructor (in order to prevent synthesizing one) and
-// so the default constructor is no longer synthesized.
-
+// ------------------------------------------
+//
+// Default constructor and destructor
+//
+//-------------------------------------------
 BreakIterator::BreakIterator()
 {
     fBufferClone = FALSE;
diff --git a/icu4c/source/common/rbbi.cpp b/icu4c/source/common/rbbi.cpp
index e0dd1ba3a4..f8341e41dd 100644
--- a/icu4c/source/common/rbbi.cpp
+++ b/icu4c/source/common/rbbi.cpp
@@ -18,6 +18,7 @@
 #include "rbbirb.h"
 #include "filestrm.h"
 #include "cmemory.h"
+#include "cstring.h"
 
 #include "uassert.h"
 
@@ -25,8 +26,7 @@ U_NAMESPACE_BEGIN
 
 
 static const int16_t START_STATE = 1;     // The state number of the starting state
-
-static const int16_t STOP_STATE = 0;      // The state-transition value indicating "stop"
+static const int16_t STOP_STATE  = 0;     // The state-transition value indicating "stop"
 
 /**
  * Class ID.  (value is irrelevant; address is important)
@@ -86,6 +86,10 @@ RuleBasedBreakIterator::RuleBasedBreakIterator( const UnicodeString  &rules,
     if (U_FAILURE(status)) {return;};
     RuleBasedBreakIterator *bi = (RuleBasedBreakIterator *)
         RBBIRuleBuilder::createRuleBasedBreakIterator(rules, parseError, status);
+    // Note:  This is a bit awkward.  The RBBI ruleBuilder has a factory method that
+    //        creates and returns a complete RBBI.  From here, in a constructor, we
+    //        can't just return the object created by the builder factory, hence
+    //        the assignment of the factory created object to "this".
     if (U_SUCCESS(status)) {
         *this = *bi;
         delete bi;
@@ -118,16 +122,15 @@ RuleBasedBreakIterator::RuleBasedBreakIterator(const RuleBasedBreakIterator& oth
 }
 
 
-//=======================================================================
-// boilerplate
-//=======================================================================
 /**
  * Destructor
  */
 RuleBasedBreakIterator::~RuleBasedBreakIterator() {
     delete fText;
+    fText = NULL;
     if (fData != NULL) {
         fData->removeReference();
+        fData = NULL;
     }
 }
 
@@ -163,6 +166,7 @@ RuleBasedBreakIterator::operator=(const RuleBasedBreakIterator& that) {
 //-----------------------------------------------------------------------------
 //
 //    init()      Shared initialization routine.   Used by all the constructors.
+//                Initializes all fields, leaving the object in a consistent state.
 //
 //-----------------------------------------------------------------------------
 UBool RuleBasedBreakIterator::fTrace = FALSE;
@@ -179,7 +183,7 @@ void RuleBasedBreakIterator::init() {
     if (debugInitDone == FALSE) {
 #ifdef RBBI_DEBUG
         char *debugEnv = getenv("U_RBBIDEBUG");
-        if (debugEnv && strstr(debugEnv, "trace")) {
+        if (debugEnv && uprv_strstr(debugEnv, "trace")) {
             fTrace = TRUE;
         }
 #endif
@@ -268,7 +272,7 @@ RuleBasedBreakIterator::adoptText(CharacterIterator* newText) {
     reset();
     delete fText;
     fText = newText;
-    fText->first();
+    this->first();
 }
 
 /**
@@ -286,8 +290,8 @@ RuleBasedBreakIterator::setText(const UnicodeString& newText) {
     else {
         delete fText;
         fText = new StringCharacterIterator(newText);
-        fText->first();
     }
+    this->first();
 }
 
 
@@ -435,11 +439,14 @@ int32_t RuleBasedBreakIterator::following(int32_t offset) {
     fLastBreakTagValid = TRUE;
     if (fText == NULL || offset >= fText->endIndex()) {
         // fText->setToEnd();
-        return BreakIterator::DONE;
+        // return BreakIterator::DONE;
+        last();
+        return next();
     }
     else if (offset < fText->startIndex()) {
         // fText->setToStart();
-        return fText->startIndex();
+        // return fText->startIndex();
+        return first();
     }
 
     // otherwise, set our internal iteration position (temporarily)
@@ -476,10 +483,11 @@ int32_t RuleBasedBreakIterator::preceding(int32_t offset) {
     // just return DONE; if it's before the beginning, return the
     // text's starting offset
     if (fText == NULL || offset > fText->endIndex()) {
-        return BreakIterator::DONE;
+        // return BreakIterator::DONE;
+        return last();
     }
     else if (offset < fText->startIndex()) {
-        return fText->startIndex();
+        return first();
     }
 
     // if we start by updating the current iteration position to the
@@ -499,19 +507,25 @@ int32_t RuleBasedBreakIterator::preceding(int32_t offset) {
 UBool RuleBasedBreakIterator::isBoundary(int32_t offset) {
     // the beginning index of the iterator is always a boundary position by definition
     if (fText == NULL || offset == fText->startIndex()) {
+        first();       // For side effects on current position, tag values.
         return TRUE;
     }
 
     // out-of-range indexes are never boundary positions
-    else if (offset < fText->startIndex() || offset > fText->endIndex()) {
+    if (offset < fText->startIndex()) {
+        first();       // For side effects on current position, tag values.
+        return FALSE;
+    }
+
+    if (offset > fText->endIndex()) {
+        last();        // For side effects on current position, tag values.
         return FALSE;
     }
 
     // otherwise, we can use following() on the position before the specified
-    // one and return true of the position we get back is the one the user
+    // one and return true if the position we get back is the one the user
     // specified
-    else
-        return following(offset - 1) == offset;
+    return following(offset - 1) == offset;
 }
 
 /**
@@ -555,7 +569,7 @@ int32_t RuleBasedBreakIterator::handleNext(void) {
     int32_t result = fText->getIndex() + 1;
     int32_t lookaheadResult = 0;
 
-    // begin in state 1
+    // Initialize the state machine.  Begin in state 1
     int32_t            state           = START_STATE;
     int16_t            category;
     UChar32            c               = fText->current32();
@@ -565,16 +579,19 @@ int32_t RuleBasedBreakIterator::handleNext(void) {
 
     fLastBreakTag = 0;
 
-    row = (RBBIStateTableRow *)
+    row = (RBBIStateTableRow *)    // Point to starting row of state table.
         (fData->fForwardTable->fTableData + (fData->fForwardTable->fRowLen * state));
+
+    // Character Category fetch for starting character.
+    //    See comments on character category code within loop, below.
     UTRIE_GET16(&fData->fTrie, c, category);
     if ((category & 0x4000) != 0)  {
           fDictionaryCharCount++;
           category &= ~0x4000;
         }
 
-      // loop until we reach the end of the text or transition to state 0
-      for (;;) {
+    // loop until we reach the end of the text or transition to state 0
+    for (;;) {
         if (c == CharacterIterator::DONE && fText->hasNext()==FALSE) {
             // Note: CharacterIterator::DONE is 0xffff, which is also a legal
             //       character value.  Check for DONE first, because it's quicker,
@@ -586,15 +603,16 @@ int32_t RuleBasedBreakIterator::handleNext(void) {
         // Note:  the 16 in UTRIE_GET16 refers to the size of the data being returned,
         //        not the size of the character going in.
         //
-        //  And off bit 14, which flags use of a dictionary for dictionary based
-        //    iterators, but should be ignored here.
         UTRIE_GET16(&fData->fTrie, c, category);
 
         // Check the dictionary bit in the character's category.
-        //    Counter is only used by dictionary based iterators.
+        //    Counter is only used by dictionary based iterators (subclasses).
+        //    Chars that need to be handled by a dictionary have a flag bit set
+        //    in their category values.
         //
         if ((category & 0x4000) != 0)  {
             fDictionaryCharCount++;
+            //  And off the dictionary flag bit.
             category &= ~0x4000;
         }
 
@@ -616,6 +634,8 @@ int32_t RuleBasedBreakIterator::handleNext(void) {
         // Get the next character.  Doing it here positions the iterator
         //    to the correct position for recording matches in the code that
         //    follows.
+        //  TODO:  16 bit next, and a 16 bit TRIE lookup, with escape code
+        //         for non-BMP chars, would be faster.
         c = fText->next32();
 
         if (row->fAccepting == 0 && row->fLookAhead == 0) {
@@ -636,7 +656,7 @@ int32_t RuleBasedBreakIterator::handleNext(void) {
         if (row->fAccepting == 0 && row->fLookAhead != 0) {
             // Lookahead match point.  Remember it, but only if no other rule has
             //                         unconitionally matched up to this point.
-            // TODO:  handle case where there's a pending match from a different rule
+            // TODO:  handle case where there's a pending match from a different rule -
             //        where lookaheadStatus != 0  && lookaheadStatus != row->fLookAhead.
             int32_t  r = fText->getIndex();
             if (r > result) {
@@ -672,6 +692,7 @@ continueOn:
     // a lookahead state, advance the break position to the lookahead position
     // (the theory here is that if there are no characters at all after the lookahead
     // position, that always matches the lookahead criteria)
+    //   TODO:  is this really the right behavior?
     if (c == CharacterIterator::DONE &&
         fText->hasNext()==FALSE &&
         lookaheadResult == fText->endIndex()) {
@@ -694,8 +715,9 @@ continueOn:
 //      This method backs the iterator back up to a "safe position" in the text.
 //      This is a position that we know, without any context, must be a break position.
 //      The various calling methods then iterate forward from this safe position to
-//      the appropriate position to return.  (For more information, see the description
-//      of buildBackwardsStateTable() in RuleBasedBreakIterator.Builder.)
+//      the appropriate position to return.
+//
+//      The logic of this function is very similar to handleNext(), above.
 //
 //-----------------------------------------------------------------------------------
 int32_t RuleBasedBreakIterator::handlePrevious(void) {
@@ -833,18 +855,27 @@ RuleBasedBreakIterator::reset()
 
 //-------------------------------------------------------------------------------
 //
-//   getRuleStatus()
+//   getRuleStatus()   Return the break rule tag associated with the current
+//                     iterator position.  If the iterator arrived at its current
+//                     position by iterating forwards, the value will have been
+//                     cached by the handleNext() function.
+//
+//                     If no cached status value is available, the status is
+//                     found by doing a previous() followed by a next(), which
+//                     leaves the iterator where it started, and computes the
+//                     status while doing the next().
 //
 //-------------------------------------------------------------------------------
 int32_t  RuleBasedBreakIterator::getRuleStatus() const {
-    // If the break tag value is unkown, back the iterator up, then move
-    //   forward again.  Moving forward will set the fLastBreakTag value correctly.
     RuleBasedBreakIterator *nonConstThis  = (RuleBasedBreakIterator *)this;
     if (fLastBreakTagValid == FALSE) {
-        if (current() == fText->startIndex()) {
+        //  No cached status is available.
+        if (fText == NULL || current() == fText->startIndex()) {
+            //  At start of text, or there is no text.  Status is always zero.
             nonConstThis->fLastBreakTag = 0;
             nonConstThis->fLastBreakTagValid = TRUE;
         } else {
+            //  Not at start of text.  Find status the tedious way.
             int32_t pa = current();
             nonConstThis->previous();
             int32_t pb = nonConstThis->next();
@@ -857,7 +888,7 @@ int32_t  RuleBasedBreakIterator::getRuleStatus() const {
 
 //-------------------------------------------------------------------------------
 //
-//   getFlattenedData      Access to the compiled form of the rules,
+//   getBinaryRules        Access to the compiled form of the rules,
 //                         for use by build system tools that save the data
 //                         for standard iterator types.
 //
@@ -868,7 +899,7 @@ const uint8_t  *RuleBasedBreakIterator::getBinaryRules(uint32_t &length) {
 
     if (fData != NULL) {
         retPtr = (const uint8_t *)fData->fHeader;
-         length = fData->fHeader->fLength;
+        length = fData->fHeader->fLength;
     }
     return retPtr;
 }
diff --git a/icu4c/source/common/rbbidata.cpp b/icu4c/source/common/rbbidata.cpp
index 38e37ecc78..50540ef761 100644
--- a/icu4c/source/common/rbbidata.cpp
+++ b/icu4c/source/common/rbbidata.cpp
@@ -1,8 +1,8 @@
 /*
-**********************************************************************
+***************************************************************************
 *   Copyright (C) 1999-2002 International Business Machines Corporation   *
-*   and others. All rights reserved.                                 *
-**********************************************************************
+*   and others. All rights reserved.                                      *
+***************************************************************************
 */
 
 #include "unicode/utypes.h"
@@ -156,7 +156,7 @@ int32_t  RBBIDataWrapper::hashCode() {
 //
 //-----------------------------------------------------------------------------
 void RBBIDataWrapper::removeReference() {
-    if (umtx_atomic_dec(&fRefCount) == 0) {  
+    if (umtx_atomic_dec(&fRefCount) == 0) {
         delete this;
     }
 };
@@ -221,9 +221,4 @@ void  RBBIDataWrapper::printData() {
 
 
 
-
-
-
-
-
 U_NAMESPACE_END
diff --git a/icu4c/source/common/rbbisetb.h b/icu4c/source/common/rbbisetb.h
index 735166aa69..385b0be05a 100644
--- a/icu4c/source/common/rbbisetb.h
+++ b/icu4c/source/common/rbbisetb.h
@@ -35,7 +35,8 @@ U_NAMESPACE_BEGIN
 //     All of them are strung together in a linked list, which is kept in order
 //     (by character)
 //
-struct RangeDescriptor : public UObject {
+class RangeDescriptor : public UObject {
+public:
     UChar32            fStartChar;      // Start of range, unicode 32 bit value.
     UChar32            fEndChar;        // End of range, unicode 32 bit value.
     int32_t            fNum;            // runtime-mapped input value for this range.
diff --git a/icu4c/source/common/ubrk.cpp b/icu4c/source/common/ubrk.cpp
index a257998bc5..601e734c8a 100644
--- a/icu4c/source/common/ubrk.cpp
+++ b/icu4c/source/common/ubrk.cpp
@@ -94,22 +94,27 @@ ubrk_openRules(  const UChar        *rules,
                        UParseError  *parseErr,
                        UErrorCode   *status)  {
 
-    BreakIterator *result = 0;
+    if (status == NULL || U_FAILURE(*status)){
+        return 0;
+    }
 
+    BreakIterator *result = 0;
     UnicodeString ruleString(rules, rulesLength);
     result = RBBIRuleBuilder::createRuleBasedBreakIterator(ruleString, *parseErr, *status);
     if(U_FAILURE(*status)) {
         return 0;
     }
 
-    UCharCharacterIterator *iter = 0;
-    iter = new UCharCharacterIterator(text, textLength);
-    if(iter == 0) {
-        *status = U_MEMORY_ALLOCATION_ERROR;
-        delete result;
-        return 0;
+    if (text != NULL) {
+        UCharCharacterIterator *iter = 0;
+        iter = new UCharCharacterIterator(text, textLength);
+        if(iter == 0) {
+            *status = U_MEMORY_ALLOCATION_ERROR;
+            delete result;
+            return 0;
+        }
+        result->adoptText(iter);
     }
-    result->adoptText(iter);
     return (UBreakIterator *)result;
 }
 
@@ -243,7 +248,7 @@ ubrk_countAvailable()
 }
 
 
-U_CAPI  UBool U_EXPORT2 
+U_CAPI  UBool U_EXPORT2
 ubrk_isBoundary(UBreakIterator *bi, int32_t offset)
 {
     return ((BreakIterator *)bi)->isBoundary(offset);
diff --git a/icu4c/source/common/unicode/brkiter.h b/icu4c/source/common/unicode/brkiter.h
index b015ac97f6..11701bba4b 100644
--- a/icu4c/source/common/unicode/brkiter.h
+++ b/icu4c/source/common/unicode/brkiter.h
@@ -1,10 +1,10 @@
 /*
 *****************************************************************************************
-*   Copyright (C) 1997-2001, International Business Machines
+*   Copyright (C) 1997-2002, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *****************************************************************************************
 *
-* File BRKITER.H
+* File brkiter.h
 *
 * Modification History:
 *
@@ -65,13 +65,13 @@ U_NAMESPACE_BEGIN
  * <P>
  * Helper function to output text
  * <pre>
- * \code 
+ * \code
  *    void printTextRange( BreakIterator& iterator, int32_t start, int32_t end )
  *    {
  *        UnicodeString textBuffer, temp;
  *        CharacterIterator *strIter = iterator.createText();
  *        strIter->getText(temp);
- *        cout << " " << start << " " << end << " |" 
+ *        cout << " " << start << " " << end << " |"
  *             << temp.extractBetween(start, end, textBuffer)
  *             << "|" << endl;
  *        delete strIter;
@@ -149,7 +149,7 @@ U_NAMESPACE_BEGIN
  *           BreakIterator* boundary;
  *           UnicodeString stringToExamine("Aaa bbb ccc. Ddd eee fff.");
  *           cout << "Examining: " << stringToExamine << endl;
- * 
+ *
  *           //print each sentence in forward and reverse order
  *           boundary = BreakIterator::createSentenceInstance( Locale::US );
  *           boundary->setText(stringToExamine);
@@ -158,7 +158,7 @@ U_NAMESPACE_BEGIN
  *           cout << "----- backward: ----------" << endl;
  *           printEachBackward(*boundary);
  *           delete boundary;
- * 
+ *
  *           //print each word in order
  *           boundary = BreakIterator::createWordInstance();
  *           boundary->setText(stringToExamine);
@@ -173,7 +173,7 @@ U_NAMESPACE_BEGIN
  *           //print word at charpos 10
  *           cout << "----- at pos 10: ---------" << endl;
  *           printAt(*boundary, 10 );
- * 
+ *
  *           delete boundary;
  *       }
  * \endcode
@@ -222,6 +222,8 @@ public:
 
     /**
      * Return a CharacterIterator over the text being analyzed.
+     * Changing the state of the returned iterator can have undefined consequences
+     * on the operation of the break iterator.  If you need to change it, clone it first.
      * @stable
      */
     virtual const CharacterIterator& getText(void) const = 0;
@@ -278,8 +280,7 @@ public:
     virtual int32_t next(void) = 0;
 
     /**
-     * Return character index of the text boundary that was most recently
-     * returned by next(), previous(), first(), or last()
+     * Return character index of the current interator position within the text.
      * @return The boundary most recently returned.
      * @stable
      */
@@ -304,9 +305,11 @@ public:
      * @stable
      */
     virtual int32_t preceding(int32_t offset) = 0;
- 
+
     /**
      * Return true if the specfied position is a boundary position.
+     * As a side effect, the current position of the iterator is set
+     * to the first boundary position at or following the specified offset.
      * @param offset the offset to check.
      * @return True if "offset" is a boundary position.
      * @stable
@@ -328,22 +331,22 @@ public:
      * Create BreakIterator for word-breaks using the given locale.
      * Returns an instance of a BreakIterator implementing word breaks.
      * WordBreak is useful for word selection (ex. double click)
-     * @param where the locale. 
+     * @param where the locale.
      * @param status the error code
-     * @return A BreakIterator for word-breaks.  The UErrorCode& status 
+     * @return A BreakIterator for word-breaks.  The UErrorCode& status
      * parameter is used to return status information to the user.
      * To check whether the construction succeeded or not, you should check
      * the value of U_SUCCESS(err).  If you wish more detailed information, you
      * can check for informational error results which still indicate success.
-     * U_USING_FALLBACK_ERROR indicates that a fall back locale was used.  For
+     * U_USING_FALLBACK_WARNING indicates that a fall back locale was used.  For
      * example, 'de_CH' was requested, but nothing was found there, so 'de' was
-     * used.  U_USING_DEFAULT_ERROR indicates that the default locale data was
+     * used.  U_USING_DEFAULT_WARNING indicates that the default locale data was
      * used; neither the requested locale nor any of its fall back locales
      * could be found.
      * The caller owns the returned object and is responsible for deleting it.
      * @stable
      */
-    static BreakIterator* createWordInstance(const Locale& where, 
+    static BreakIterator* createWordInstance(const Locale& where,
                                                    UErrorCode& status);
 
     /**
@@ -354,84 +357,84 @@ public:
      * LineBreak is useful for word wrapping text.
      * @param where the locale.
      * @param status The error code.
-     * @return A BreakIterator for line-breaks.  The UErrorCode& status 
+     * @return A BreakIterator for line-breaks.  The UErrorCode& status
      * parameter is used to return status information to the user.
      * To check whether the construction succeeded or not, you should check
      * the value of U_SUCCESS(err).  If you wish more detailed information, you
      * can check for informational error results which still indicate success.
-     * U_USING_FALLBACK_ERROR indicates that a fall back locale was used.  For
+     * U_USING_FALLBACK_WARNING indicates that a fall back locale was used.  For
      * example, 'de_CH' was requested, but nothing was found there, so 'de' was
-     * used.  U_USING_DEFAULT_ERROR indicates that the default locale data was
+     * used.  U_USING_DEFAULT_WARNING indicates that the default locale data was
      * used; neither the requested locale nor any of its fall back locales
      * could be found.
      * The caller owns the returned object and is responsible for deleting it.
      * @stable
      */
-    static BreakIterator* createLineInstance(const Locale& where, 
+    static BreakIterator* createLineInstance(const Locale& where,
                                                    UErrorCode& status);
 
     /**
      * Create BreakIterator for character-breaks using specified locale
      * Returns an instance of a BreakIterator implementing character breaks.
      * Character breaks are boundaries of combining character sequences.
-     * @param where the locale. 
+     * @param where the locale.
      * @param status The error code.
-     * @return A BreakIterator for character-breaks.  The UErrorCode& status 
+     * @return A BreakIterator for character-breaks.  The UErrorCode& status
      * parameter is used to return status information to the user.
      * To check whether the construction succeeded or not, you should check
      * the value of U_SUCCESS(err).  If you wish more detailed information, you
      * can check for informational error results which still indicate success.
-     * U_USING_FALLBACK_ERROR indicates that a fall back locale was used.  For
+     * U_USING_FALLBACK_WARNING indicates that a fall back locale was used.  For
      * example, 'de_CH' was requested, but nothing was found there, so 'de' was
-     * used.  U_USING_DEFAULT_ERROR indicates that the default locale data was
+     * used.  U_USING_DEFAULT_WARNING indicates that the default locale data was
      * used; neither the requested locale nor any of its fall back locales
      * could be found.
      * The caller owns the returned object and is responsible for deleting it.
      * @stable
      */
-    static BreakIterator* createCharacterInstance(const Locale& where, 
+    static BreakIterator* createCharacterInstance(const Locale& where,
                                                         UErrorCode& status);
 
     /**
      * Create BreakIterator for sentence-breaks using specified locale
      * Returns an instance of a BreakIterator implementing sentence breaks.
-     * @param where the locale. 
+     * @param where the locale.
      * @param status The error code.
-     * @return A BreakIterator for sentence-breaks.  The UErrorCode& status 
+     * @return A BreakIterator for sentence-breaks.  The UErrorCode& status
      * parameter is used to return status information to the user.
      * To check whether the construction succeeded or not, you should check
      * the value of U_SUCCESS(err).  If you wish more detailed information, you
      * can check for informational error results which still indicate success.
-     * U_USING_FALLBACK_ERROR indicates that a fall back locale was used.  For
+     * U_USING_FALLBACK_WARNING indicates that a fall back locale was used.  For
      * example, 'de_CH' was requested, but nothing was found there, so 'de' was
-     * used.  U_USING_DEFAULT_ERROR indicates that the default locale data was
+     * used.  U_USING_DEFAULT_WARNING indicates that the default locale data was
      * used; neither the requested locale nor any of its fall back locales
      * could be found.
      * The caller owns the returned object and is responsible for deleting it.
      * @stable
      */
-    static BreakIterator* createSentenceInstance(const Locale& where, 
+    static BreakIterator* createSentenceInstance(const Locale& where,
                                                        UErrorCode& status);
 
     /**
      * Create BreakIterator for title-casing breaks using the specified locale
      * Returns an instance of a BreakIterator implementing title breaks.
-     * @param where the locale. 
+     * @param where the locale.
      * @param status The error code.
-     * @return A BreakIterator for title-breaks.  The UErrorCode& status 
+     * @return A BreakIterator for title-breaks.  The UErrorCode& status
      * parameter is used to return status information to the user.
      * To check whether the construction succeeded or not, you should check
      * the value of U_SUCCESS(err).  If you wish more detailed information, you
      * can check for informational error results which still indicate success.
-     * U_USING_FALLBACK_ERROR indicates that a fall back locale was used.  For
+     * U_USING_FALLBACK_WARNING indicates that a fall back locale was used.  For
      * example, 'de_CH' was requested, but nothing was found there, so 'de' was
-     * used.  U_USING_DEFAULT_ERROR indicates that the default locale data was
+     * used.  U_USING_DEFAULT_WARNING indicates that the default locale data was
      * used; neither the requested locale nor any of its fall back locales
      * could be found.
      * The caller owns the returned object and is responsible for deleting it.
-     * @stable
+     * @draft ICU 2.1
      */
-    static BreakIterator* createTitleInstance(const Locale& where, 
+    static BreakIterator* createTitleInstance(const Locale& where,
                                                        UErrorCode& status);
 
     /**
@@ -469,24 +472,30 @@ public:
     /**
      * Thread safe client-buffer-based cloning operation
      *    Do NOT call delete on a safeclone, since 'new' is not used to create it.
-     * @param stackBuffer user allocated space for the new clone. If NULL new memory will be allocated. 
+     * @param stackBuffer user allocated space for the new clone. If NULL new memory will be allocated.
      * If buffer is not large enough, new memory will be allocated.
-     * @param BufferSize reference to size of allocated space. 
-     * If BufferSize == 0, a sufficient size for use in cloning will 
+     * @param BufferSize reference to size of allocated space.
+     * If BufferSize == 0, a sufficient size for use in cloning will
      * be returned ('pre-flighting')
-     * If BufferSize is not enough for a stack-based safe clone, 
+     * If BufferSize is not enough for a stack-based safe clone,
      * new memory will be allocated.
      * @param status to indicate whether the operation went on smoothly or there were errors
-     *  An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any allocations were 
+     *  An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any allocations were
      *  necessary.
      * @return pointer to the new clone
-     *  
-     * @draft ICU 1.8
+     *
+     * @stable
      */
     virtual BreakIterator *  createBufferClone(void *stackBuffer,
                                                int32_t &BufferSize,
                                                UErrorCode &status) = 0;
 
+    /**
+     *   Determine whether the BreakIterator was created in user memory by
+     *   createBufferClone(), and thus should not be deleted.  Such objects
+     *   must be closed by an explicit call to the destructor (not delete).
+     *  @stable
+     */
     inline UBool isBufferClone(void);
 
 
diff --git a/icu4c/source/common/unicode/rbbi.h b/icu4c/source/common/unicode/rbbi.h
index c2d5b9d43b..af6bf31232 100644
--- a/icu4c/source/common/unicode/rbbi.h
+++ b/icu4c/source/common/unicode/rbbi.h
@@ -24,9 +24,9 @@ struct UTrie;
 U_NAMESPACE_BEGIN
 
 struct RBBIDataHeader;
-class RuleBasedBreakIteratorTables;
-class BreakIterator;
-class RBBIDataWrapper;
+class  RuleBasedBreakIteratorTables;
+class  BreakIterator;
+class  RBBIDataWrapper;
 
 
 
@@ -37,10 +37,6 @@ class RBBIDataWrapper;
  * <p>See the ICU User Guide for information on Break Iterator Rules.</p>
  *
  */
-
-
-
-
 class U_COMMON_API RuleBasedBreakIterator : public BreakIterator {
 
 protected:
@@ -74,7 +70,7 @@ protected:
     uint32_t           fDictionaryCharCount;
 
     //
-    // Debugging flag.
+    // Debugging flag.  Trace operation of state machine when true.
     //
     static UBool        fTrace;
 
@@ -117,7 +113,8 @@ protected:
 public:
 
     /** Default constructor.  Creates an empty shell of an iterator, with no
-     *  rules or text to iterate over.   Object can subsequently be assigned.
+     *  rules or text to iterate over.   Object can subsequently be assigned to.
+     *  @draft ICU 2.2
      */
     RuleBasedBreakIterator();
 
@@ -134,12 +131,14 @@ public:
      * @param parseError  In the event of a syntax error in the rules, provides the location
      *                    within the rules of the problem.
      * @param status Information on any errors encountered.
+     *  @draft ICU 2.2
      */
     RuleBasedBreakIterator( const UnicodeString    &rules,
                              UParseError           &parseError,
                              UErrorCode            &status);
     /**
      * Destructor
+     *  @stable
      */
     virtual ~RuleBasedBreakIterator();
 
@@ -148,6 +147,7 @@ public:
      * and iterate over the same text, as the one passed in.
      * @param that The RuleBasedBreakItertor passed in
      * @return the newly created RuleBasedBreakIterator
+     *  @stable
      */
     RuleBasedBreakIterator& operator=(const RuleBasedBreakIterator& that);
 
@@ -157,6 +157,7 @@ public:
      * @param that The BreakIterator to be compared for equality
      * @Return TRUE if both BreakIterators are of the
      * same class, have the same behavior, and iterate over the same text.
+     *  @stable
      */
     virtual UBool operator==(const BreakIterator& that) const;
 
@@ -165,6 +166,7 @@ public:
      * and vice versa.
      * @param that The BreakIterator to be compared for inequality
      * @return TRUE if both BreakIterators are not same.
+     *  @stable
      */
     UBool operator!=(const BreakIterator& that) const;
 
@@ -175,18 +177,21 @@ public:
      *   will correctly clone (copy) a derived class.
      * clone() is thread safe.  Multiple threads may simultaeneously
      * clone the same source break iterator.
+     *  @stable
      */
     virtual BreakIterator* clone() const;
 
     /**
      * Compute a hash code for this BreakIterator
      * @return A hash code
+     *  @stable
      */
     virtual int32_t hashCode(void) const;
 
     /**
      * Returns the description used to create this iterator
      * @return the description used to create this iterator
+     *  @stable
      */
     virtual const UnicodeString& getRules(void) const;
 
@@ -200,6 +205,7 @@ public:
      * Changing the state of this iterator can have undefined consequences.  If
      * you need to change it, clone it first.
      * @return An iterator over the text being analyzed.
+     *  @stable
      */
     virtual const CharacterIterator& getText(void) const;
 
@@ -209,6 +215,7 @@ public:
      * the current iteration position to the beginning of the text.
      * @param newText An iterator over the text to analyze.  The BreakIterator
      * takes ownership of the character iterator.  The caller MUST NOT delete it!
+     *  @stable
      */
     virtual void adoptText(CharacterIterator* newText);
 
@@ -216,6 +223,7 @@ public:
      * Set the iterator to analyze a new piece of text.  This function resets
      * the current iteration position to the beginning of the text.
      * @param newText The text to analyze.
+     *  @stable
      */
     virtual void setText(const UnicodeString& newText);
 
@@ -223,6 +231,7 @@ public:
      * Sets the current iteration position to the beginning of the text.
      * (i.e., the CharacterIterator's starting offset).
      * @return The offset of the beginning of the text.
+     *  @stable
      */
     virtual int32_t first(void);
 
@@ -230,6 +239,7 @@ public:
      * Sets the current iteration position to the end of the text.
      * (i.e., the CharacterIterator's ending offset).
      * @return The text's past-the-end offset.
+     *  @stable
      */
     virtual int32_t last(void);
 
@@ -241,18 +251,21 @@ public:
      * (negative is backwards, and positive is forwards).
      * @return The character offset of the boundary position n boundaries away from
      * the current one.
+     *  @stable
      */
     virtual int32_t next(int32_t n);
 
     /**
      * Advances the iterator to the next boundary position.
      * @return The position of the first boundary after this one.
+     *  @stable
      */
     virtual int32_t next(void);
 
     /**
      * Moves the iterator backwards, to the last boundary preceding this one.
      * @return The position of the last boundary position preceding this one.
+     *  @stable
      */
     virtual int32_t previous(void);
 
@@ -261,6 +274,7 @@ public:
      * the specified position.
      * @param offset The position from which to begin searching for a break position.
      * @return The position of the first break after the current position.
+     *  @stable
      */
     virtual int32_t following(int32_t offset);
 
@@ -269,6 +283,7 @@ public:
      * specified position.
      * @param offset The position to begin searching for a break from.
      * @return The position of the last boundary before the starting position.
+     *  @stable
      */
     virtual int32_t preceding(int32_t offset);
 
@@ -278,12 +293,14 @@ public:
      * or after "offset".
      * @param offset the offset to check.
      * @return True if "offset" is a boundary position.
+     *  @stable
      */
     virtual UBool isBoundary(int32_t offset);
 
     /**
      * Returns the current iteration position.
      * @return The current iteration position.
+     * @stable
      */
     virtual int32_t current(void) const;
 
@@ -295,6 +312,7 @@ public:
      * status, a default value of 0 is returned.
      * @return the status from the break rule that determined the most recently
      * returned break position.
+     * @draft ICU 2.2
      */
     virtual int32_t getRuleStatus() const;
 
@@ -336,7 +354,7 @@ public:
      *                     buffer size, but do not clone the object.  If the
      *                     size was too small (but not zero), allocate heap
      *                     storage for the cloned object.
-     * 
+     *
      * @param status       Error status.  U_SAFECLONE_ALLOCATED_WARNING will be
      *                     returned if the the provided buffer was too small, and
      *                     the clone was therefore put on the heap.
@@ -344,6 +362,7 @@ public:
      * @return  Pointer to the clone object.  This may differ from the stackBuffer
      *          address if the byte alignment of the stack buffer was not suitable
      *          or if the stackBuffer was too small to hold the clone.
+     * @draft stable
      */
     virtual BreakIterator *  createBufferClone(void *stackBuffer,
                                                int32_t &BufferSize,
@@ -365,6 +384,7 @@ public:
      * @return   A pointer to the binary (compiled) rule data.  The storage
      *           belongs to the RulesBasedBreakIterator object, not the
      *           caller, and must not be modified or deleted.
+     * @internal
      */
     virtual const uint8_t *getBinaryRules(uint32_t &length);
 
diff --git a/icu4c/source/common/unicode/ubrk.h b/icu4c/source/common/unicode/ubrk.h
index 045dea019f..f627cf6bfe 100644
--- a/icu4c/source/common/unicode/ubrk.h
+++ b/icu4c/source/common/unicode/ubrk.h
@@ -47,7 +47,7 @@
  * typically starts of words, that should be set to Title Case
  * when title casing the text.
  * <P>
- * 
+ *
  * This is the interface for all text boundaries.
  * <P>
  * Examples:
@@ -204,15 +204,27 @@ typedef enum UBreakIteratorType UBreakIteratorType;
  *  than for single individual values.
 */
 enum UWordBreak {
+    /** Tag value for "words" that do not fit into any of other categories. 
+     *  Includes spaces and most punctuation. */
     UBRK_WORD_NONE           = 0,
+    /** Upper bound for tags for uncategorized words. */
     UBRK_WORD_NONE_LIMIT     = 100,
+    /** Tag value for words that appear to be numbers, lower limit.    */
     UBRK_WORD_NUMBER         = 100,
+    /** Tag value for words that appear to be numbers, upper limit.    */
     UBRK_WORD_NUMBER_LIMIT   = 200,
+    /** Tag value for words that contain letters, excluding
+     *  hiragana, katakana or ideographic characters, lower limit.    */
     UBRK_WORD_LETTER         = 200,
+    /** Tag value for words containing letters, upper limit  */
     UBRK_WORD_LETTER_LIMIT   = 300,
-    UBRK_WORD_HIRAKATA       = 300,
-    UBRK_WORD_HIRAKATA_LIMIT = 400,
+    /** Tag value for words containing kana characters, lower limit */
+    UBRK_WORD_KANA           = 300,
+    /** Tag value for words containing kana characters, upper limit */
+    UBRK_WORD_KANA_LIMIT     = 400,
+    /** Tag value for words containing ideographic characters, lower limit */
     UBRK_WORD_IDEO           = 400,
+    /** Tag value for words containing ideographic characters, upper limit */
     UBRK_WORD_IDEO_LIMIT     = 500
 };
 typedef enum UWordBreak UWordBreak;
@@ -232,7 +244,7 @@ typedef enum UWordBreak UWordBreak;
  * @see ubrk_openRules
  * @stable
  */
-U_CAPI UBreakIterator* U_EXPORT2 
+U_CAPI UBreakIterator* U_EXPORT2
 ubrk_open(UBreakIteratorType type,
       const char *locale,
       const UChar *text,
@@ -252,9 +264,9 @@ ubrk_open(UBreakIteratorType type,
  * @param status A UErrorCode to receive any errors.
  * @return A UBreakIterator for the specified rules.
  * @see ubrk_open
- * @draft
+ * @draft ICU 2.2
  */
-U_CAPI UBreakIterator* U_EXPORT2 
+U_CAPI UBreakIterator* U_EXPORT2
 ubrk_openRules(const UChar     *rules,
                int32_t         rulesLength,
                const UChar     *text,
@@ -276,9 +288,9 @@ ubrk_openRules(const UChar     *rules,
  * @param status to indicate whether the operation went on smoothly or there were errors
  *  An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any allocations were necessary.
  * @return pointer to the new clone
- * @draft ICU 1.8
+ * @stable
  */
-U_CAPI UBreakIterator * U_EXPORT2 
+U_CAPI UBreakIterator * U_EXPORT2
 ubrk_safeClone(
           const UBreakIterator *bi,
           void *stackBuffer,
@@ -293,7 +305,7 @@ ubrk_safeClone(
 * @param bi The break iterator to close.
  * @stable
 */
-U_CAPI void U_EXPORT2 
+U_CAPI void U_EXPORT2
 ubrk_close(UBreakIterator *bi);
 
 /**
@@ -304,7 +316,7 @@ ubrk_close(UBreakIterator *bi);
  * @param status The error code
  * @stable
  */
-U_CAPI void U_EXPORT2 
+U_CAPI void U_EXPORT2
 ubrk_setText(UBreakIterator* bi,
              const UChar*    text,
              int32_t         textLength,
@@ -318,7 +330,7 @@ ubrk_setText(UBreakIterator* bi,
  * \Ref{ubrk_first}, or \Ref{ubrk_last}.
  * @stable
  */
-U_CAPI int32_t U_EXPORT2 
+U_CAPI int32_t U_EXPORT2
 ubrk_current(const UBreakIterator *bi);
 
 /**
@@ -330,7 +342,7 @@ ubrk_current(const UBreakIterator *bi);
  * @see ubrk_previous
  * @stable
  */
-U_CAPI int32_t U_EXPORT2 
+U_CAPI int32_t U_EXPORT2
 ubrk_next(UBreakIterator *bi);
 
 /**
@@ -342,7 +354,7 @@ ubrk_next(UBreakIterator *bi);
  * @see ubrk_next
  * @stable
  */
-U_CAPI int32_t U_EXPORT2 
+U_CAPI int32_t U_EXPORT2
 ubrk_previous(UBreakIterator *bi);
 
 /**
@@ -353,7 +365,7 @@ ubrk_previous(UBreakIterator *bi);
  * @see ubrk_last
  * @stable
  */
-U_CAPI int32_t U_EXPORT2 
+U_CAPI int32_t U_EXPORT2
 ubrk_first(UBreakIterator *bi);
 
 /**
@@ -366,7 +378,7 @@ ubrk_first(UBreakIterator *bi);
  * @see ubrk_first
  * @stable
  */
-U_CAPI int32_t U_EXPORT2 
+U_CAPI int32_t U_EXPORT2
 ubrk_last(UBreakIterator *bi);
 
 /**
@@ -378,7 +390,7 @@ ubrk_last(UBreakIterator *bi);
  * @see ubrk_following
  * @stable
  */
-U_CAPI int32_t U_EXPORT2 
+U_CAPI int32_t U_EXPORT2
 ubrk_preceding(UBreakIterator *bi,
            int32_t offset);
 
@@ -391,7 +403,7 @@ ubrk_preceding(UBreakIterator *bi,
  * @see ubrk_preceding
  * @stable
  */
-U_CAPI int32_t U_EXPORT2 
+U_CAPI int32_t U_EXPORT2
 ubrk_following(UBreakIterator *bi,
            int32_t offset);
 
@@ -404,7 +416,7 @@ ubrk_following(UBreakIterator *bi,
 * @see ubrk_countAvailable
 * @stable
 */
-U_CAPI const char* U_EXPORT2 
+U_CAPI const char* U_EXPORT2
 ubrk_getAvailable(int32_t index);
 
 /**
@@ -415,7 +427,7 @@ ubrk_getAvailable(int32_t index);
 * @see ubrk_getAvailable
 * @stable
 */
-U_CAPI int32_t U_EXPORT2 
+U_CAPI int32_t U_EXPORT2
 ubrk_countAvailable(void);
 
 
@@ -426,8 +438,9 @@ ubrk_countAvailable(void);
 * @param bi The break iterator to use.
 * @param offset the offset to check.
 * @return True if "offset" is a boundary position.
+* @stable
 */
-U_CAPI  UBool U_EXPORT2 
+U_CAPI  UBool U_EXPORT2
 ubrk_isBoundary(UBreakIterator *bi, int32_t offset);
 
 /**
@@ -437,6 +450,7 @@ ubrk_isBoundary(UBreakIterator *bi, int32_t offset);
  * status, a default value of 0 is returned.
  * <p>
  * For word break iterators, the possible values are defined in enum UWordBreak.
+ * @draft ICU 2.2
  */
 U_CAPI  int32_t U_EXPORT2
 ubrk_getRuleStatus(UBreakIterator *bi);
diff --git a/icu4c/source/test/intltest/rbbiapts.cpp b/icu4c/source/test/intltest/rbbiapts.cpp
index 8f9e87a59a..3f0dd10d46 100644
--- a/icu4c/source/test/intltest/rbbiapts.cpp
+++ b/icu4c/source/test/intltest/rbbiapts.cpp
@@ -654,12 +654,12 @@ void RBBIAPITest::TestWordStatus() {
      int32_t tag_lo[]  = {UBRK_WORD_NONE,     UBRK_WORD_LETTER, UBRK_WORD_NONE,    UBRK_WORD_LETTER,
                           UBRK_WORD_NONE,     UBRK_WORD_NUMBER, UBRK_WORD_NONE,
                           UBRK_WORD_IDEO,     UBRK_WORD_IDEO,   UBRK_WORD_NONE,
-                          UBRK_WORD_HIRAKATA, UBRK_WORD_NONE,   UBRK_WORD_HIRAKATA};
+                          UBRK_WORD_KANA,     UBRK_WORD_NONE,   UBRK_WORD_KANA};
 
-     int32_t tag_hi[]  = {UBRK_WORD_NONE_LIMIT,     UBRK_WORD_LETTER_LIMIT, UBRK_WORD_NONE_LIMIT,    UBRK_WORD_LETTER_LIMIT,
-                          UBRK_WORD_NONE_LIMIT,     UBRK_WORD_NUMBER_LIMIT, UBRK_WORD_NONE_LIMIT,
-                          UBRK_WORD_IDEO_LIMIT,     UBRK_WORD_IDEO_LIMIT,   UBRK_WORD_NONE_LIMIT,
-                          UBRK_WORD_HIRAKATA_LIMIT, UBRK_WORD_NONE_LIMIT,   UBRK_WORD_HIRAKATA_LIMIT};
+     int32_t tag_hi[]  = {UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT, UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT,
+                          UBRK_WORD_NONE_LIMIT, UBRK_WORD_NUMBER_LIMIT, UBRK_WORD_NONE_LIMIT,
+                          UBRK_WORD_IDEO_LIMIT, UBRK_WORD_IDEO_LIMIT,   UBRK_WORD_NONE_LIMIT,
+                          UBRK_WORD_KANA_LIMIT, UBRK_WORD_NONE_LIMIT,   UBRK_WORD_KANA_LIMIT};
 
      UErrorCode status=U_ZERO_ERROR;