ICU-1126 add UniStr::toTitle() functions

X-SVN-Rev: 7734
2002-02-21 18:42:52 +00:00 · 2002-02-21 18:42:52 +00:00 · 0e69af1c9e
commit 0e69af1c9e
parent ff3355548d
2 changed files with 102 additions and 10 deletions
--- a/icu4c/source/common/unicode/unistr.h
+++ b/icu4c/source/common/unicode/unistr.h
@ -23,8 +23,6 @@

 #include "unicode/utypes.h"
 #include "unicode/rep.h"
-/*#include "unicode/uchar.h"*/
-

 struct UConverter;          // unicode/ucnv.h

@ -36,6 +34,7 @@ class UnicodeConverter;     // unicode/convert.h
 class StringCharacterIterator;
 class SearchIterator;
 class StringSearch;
+class BreakIterator;        // unicode/brkiter.h

 /* The <iostream> include has been moved to unicode/ustream.h */

@ -2193,6 +2192,55 @@ public:
   */
  UnicodeString& toLower(const Locale& locale);

+  /**
+   * Titlecase this string, convenience function using the default locale.
+   *
+   * Casing is locale-dependent and context-sensitive.
+   * Titlecasing uses a break iterator to find the first characters of words
+   * that are to be titlecased. It titlecases those characters and lowercases
+   * all others.
+   *
+   * The titlecase break iterator can be provided to customize for arbitrary
+   * styles, using rules and dictionaries beyond the standard iterators.
+   * It may be more efficient to always provide an iterator to avoid
+   * opening and closing one for each string.
+   * The standard titlecase iterator for the root locale implements the
+   * algorithm of Unicode TR 21.
+   *
+   * @param titleIter A break iterator to find the first characters of words
+   *                  that are to be titlecased.
+   *                  If none is provided (0), then a standard titlecase
+   *                  break iterator is opened.
+   * @return A reference to this.
+   * @draft ICU 2.1
+   */
+  UnicodeString &toTitle(BreakIterator *titleIter);
+
+  /**
+   * Titlecase this string.
+   *
+   * Casing is locale-dependent and context-sensitive.
+   * Titlecasing uses a break iterator to find the first characters of words
+   * that are to be titlecased. It titlecases those characters and lowercases
+   * all others.
+   *
+   * The titlecase break iterator can be provided to customize for arbitrary
+   * styles, using rules and dictionaries beyond the standard iterators.
+   * It may be more efficient to always provide an iterator to avoid
+   * opening and closing one for each string.
+   * The standard titlecase iterator for the root locale implements the
+   * algorithm of Unicode TR 21.
+   *
+   * @param titleIter A break iterator to find the first characters of words
+   *                  that are to be titlecased.
+   *                  If none is provided (0), then a standard titlecase
+   *                  break iterator is opened.
+   * @param locale    The locale to consider.
+   * @return A reference to this.
+   * @draft ICU 2.1
+   */
+  UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale);
+
  /**
   * Case-fold the characters in this string.
   * Case-folding is locale-independent and not context-sensitive,
@ -2731,7 +2779,8 @@ private:

  // common function for case mappings
  UnicodeString &
-  caseMap(const Locale& locale,
+  caseMap(BreakIterator *titleIter,
+          const Locale& locale,
          uint32_t options,
          int32_t toWhichCase);

--- a/icu4c/source/common/unistr.cpp
+++ b/icu4c/source/common/unistr.cpp
@ -27,6 +27,7 @@
 #include "unicode/unistr.h"
 #include "unicode/unicode.h"
 #include "unicode/ucnv.h"
+#include "unicode/ubrk.h"
 #include "uhash.h"
 #include "ustr_imp.h"

@ -1011,36 +1012,48 @@ UnicodeString::setCharAt(UTextOffset offset,
 enum {
    TO_LOWER,
    TO_UPPER,
+    TO_TITLE,
    FOLD_CASE
 };

 UnicodeString &
 UnicodeString::toLower() {
-  return caseMap(Locale::getDefault(), 0, TO_LOWER);
+  return caseMap(0, Locale::getDefault(), 0, TO_LOWER);
 }

 UnicodeString &
 UnicodeString::toLower(const Locale &locale) {
-  return caseMap(locale, 0, TO_LOWER);
+  return caseMap(0, locale, 0, TO_LOWER);
 }

 UnicodeString &
 UnicodeString::toUpper() {
-  return caseMap(Locale::getDefault(), 0, TO_UPPER);
+  return caseMap(0, Locale::getDefault(), 0, TO_UPPER);
 }

 UnicodeString &
 UnicodeString::toUpper(const Locale &locale) {
-  return caseMap(locale, 0, TO_UPPER);
+  return caseMap(0, locale, 0, TO_UPPER);
+}
+
+UnicodeString &
+UnicodeString::toTitle(BreakIterator *titleIter) {
+  return caseMap(titleIter, Locale::getDefault(), 0, TO_TITLE);
+}
+
+UnicodeString &
+UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale) {
+  return caseMap(titleIter, locale, 0, TO_TITLE);
 }

 UnicodeString &
 UnicodeString::foldCase(uint32_t options) {
-    return caseMap(Locale::getDefault(), options, FOLD_CASE);
+    return caseMap(0, Locale::getDefault(), options, FOLD_CASE);
 }

 UnicodeString &
-UnicodeString::caseMap(const Locale& locale,
+UnicodeString::caseMap(BreakIterator *titleIter,
+                       const Locale& locale,
                       uint32_t options,
                       int32_t toWhichCase) {
  if(fLength <= 0) {
@ -1071,13 +1084,34 @@ UnicodeString::caseMap(const Locale& locale,
    return *this;
  }

-  // Case-map, and if the result is too long, then reallocate and repeat.
+  // set up the titlecasing break iterator
+  UBreakIterator *cTitleIter = 0;
  UErrorCode errorCode;
+
+  if(toWhichCase == TO_TITLE) {
+    if(titleIter != 0) {
+      cTitleIter = (UBreakIterator *)titleIter;
+    } else {
+      /* ### TODO UBRK_TITLECASE */
+      errorCode = U_ZERO_ERROR;
+      cTitleIter = ubrk_open(UBRK_WORD, locale.getName(),
+                             oldArray, oldLength,
+                             &errorCode);
+      if(U_FAILURE(errorCode)) {
+        delete [] bufferToDelete;
+        setToBogus();
+        return *this;
+      }
+    }
+  }
+
+  // Case-map, and if the result is too long, then reallocate and repeat.
  do {
    errorCode = U_ZERO_ERROR;
    if(toWhichCase==TO_LOWER) {
      fLength = u_internalStrToLower(fArray, fCapacity,
                                     oldArray, oldLength,
+                                     0, oldLength,
                                     locale.getName(),
                                     &errorCode);
    } else if(toWhichCase==TO_UPPER) {
@ -1085,6 +1119,11 @@ UnicodeString::caseMap(const Locale& locale,
                                     oldArray, oldLength,
                                     locale.getName(),
                                     &errorCode);
+    } else if(toWhichCase==TO_TITLE) {
+      fLength = u_internalStrToTitle(fArray, fCapacity,
+                                     oldArray, oldLength,
+                                     cTitleIter, locale.getName(),
+                                     &errorCode);
    } else {
      fLength = u_internalStrFoldCase(fArray, fCapacity,
                                      oldArray, oldLength,
@ -1093,6 +1132,10 @@ UnicodeString::caseMap(const Locale& locale,
    }
  } while(errorCode==U_BUFFER_OVERFLOW_ERROR && cloneArrayIfNeeded(fLength, fLength, FALSE));

+  if(cTitleIter != 0 && titleIter == 0) {
+    ubrk_close(cTitleIter);
+  }
+
  delete [] bufferToDelete;
  if(U_FAILURE(errorCode)) {
    setToBogus();