ICU-4078 disentangle conversion and properties code some from other parts of the common library

X-SVN-Rev: 16194
2004-08-26 22:58:39 +00:00 · 2004-08-26 22:58:39 +00:00 · 2327dcdc7f
commit 2327dcdc7f
parent 53e086dc9a
1 changed files with 121 additions and 2 deletions
--- a/icu4c/source/common/unicode/unistr.h
+++ b/icu4c/source/common/unicode/unistr.h
@ -50,6 +50,18 @@ class BreakIterator;        // unicode/brkiter.h

 /* The <iostream> include has been moved to unicode/ustream.h */

+/**
+ * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
+ * which constructs a Unicode string from an invariant-character char * string.
+ * About invariant characters see utypes.h.
+ * This constructor has no runtime dependency on conversion code and is
+ * therefore recommended over ones taking a charset name string
+ * (where the empty string "" indicates invariant-character conversion).
+ *
+ * @draft ICU 3.2
+ */
+#define US_INV UnicodeString::EInvariant::kInvariant
+
 /**
 * Unicode String literals in C++.
 * Dependent on the platform properties, different UnicodeString
@ -72,7 +84,7 @@ class BreakIterator;        // unicode/brkiter.h
 #elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
 #   define UNICODE_STRING(cs, _length) UnicodeString(TRUE, (const UChar *)cs, _length)
 #else
-#   define UNICODE_STRING(cs, _length) UnicodeString(cs, _length, "")
+#   define UNICODE_STRING(cs, _length) UnicodeString(cs, _length, US_INV)
 #endif

 /**
@ -93,7 +105,7 @@ class BreakIterator;        // unicode/brkiter.h
 #elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
 #   define UNICODE_STRING_SIMPLE(cs) UnicodeString(TRUE, (const UChar *)cs, -1)
 #else
-#   define UNICODE_STRING_SIMPLE(cs) UnicodeString(cs, "")
+#   define UNICODE_STRING_SIMPLE(cs) UnicodeString(cs, -1, US_INV)
 #endif

 /**
@ -170,6 +182,22 @@ class U_COMMON_API UnicodeString : public Replaceable
 {
 public:

+  /**
+   * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
+   * which constructs a Unicode string from an invariant-character char * string.
+   * Use the macro US_INV instead of the full qualification for this value.
+   *
+   * @see US_INV
+   * @draft ICU 3.2
+   */
+  enum EInvariant {
+    /**
+     * @see EInvariant
+     * @draft ICU 3.2
+     */
+    kInvariant
+  };
+
  //========================================
  // Read-only operations
  //========================================
@ -1387,12 +1415,46 @@ public:
              int32_t limit,
              UnicodeString& target) const;

+  /**
+   * Copy the characters in the range 
+   * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters.
+   * All characters must be invariant (see utypes.h).
+   * Use US_INV as the last, signature-distinguishing parameter.
+   *
+   * This function does not write any more than <code>targetLength</code>
+   * characters but returns the length of the entire output string
+   * so that one can allocate a larger buffer and call the function again
+   * if necessary.
+   * The output string is NUL-terminated if possible.
+   *
+   * @param start offset of first character which will be copied
+   * @param startLength the number of characters to extract
+   * @param target the target buffer for extraction, can be NULL
+   *               if targetLength is 0
+   * @param targetLength the length of the target buffer
+   * @param inv Signature-distinguishing paramater, use US_INV.
+   * @return the output string length, not including the terminating NUL
+   * @draft ICU 3.2
+   */
+  int32_t extract(int32_t start,
+           int32_t length,
+           char *target,
+           int32_t targetCapacity,
+           enum EInvariant inv) const;
+
+#if !UCONFIG_NO_CONVERSION
+
  /**
   * Copy the characters in the range
   * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
   * in a specified codepage.
   * The output string is NUL-terminated.
   *
+   * Recommendation: For invariant-character strings use
+   * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
+   * because it avoids object code dependencies of UnicodeString on
+   * the conversion code.
+   *
   * @param start offset of first character which will be copied
   * @param startLength the number of characters to extract
   * @param target the target buffer for extraction
@ -1422,6 +1484,11 @@ public:
   * if necessary.
   * The output string is NUL-terminated if possible.
   *
+   * Recommendation: For invariant-character strings use
+   * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
+   * because it avoids object code dependencies of UnicodeString on
+   * the conversion code.
+   *
   * @param start offset of first character which will be copied
   * @param startLength the number of characters to extract
   * @param target the target buffer for extraction
@ -1463,6 +1530,8 @@ public:
                  UConverter *cnv,
                  UErrorCode &errorCode) const;

+#endif
+
  /* Length operations */

  /**
@ -2651,15 +2720,23 @@ public:
   */
  UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity);

+#if !UCONFIG_NO_CONVERSION
+
  /**
   * char* constructor.
   * @param codepageData an array of bytes, null-terminated
   * @param codepage the encoding of <TT>codepageData</TT>.  The special
   * value 0 for <TT>codepage</TT> indicates that the text is in the
   * platform's default codepage.
+   *
   * If <code>codepage</code> is an empty string (<code>""</code>),
   * then a simple conversion is performed on the codepage-invariant
   * subset ("invariant characters") of the platform encoding. See utypes.h.
+   * Recommendation: For invariant-character strings use the constructor
+   * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
+   * because it avoids object code dependencies of UnicodeString on
+   * the conversion code.
+   *
   * @stable ICU 2.0
   */
  UnicodeString(const char *codepageData,
@ -2675,6 +2752,11 @@ public:
   * If <code>codepage</code> is an empty string (<code>""</code>),
   * then a simple conversion is performed on the codepage-invariant
   * subset ("invariant characters") of the platform encoding. See utypes.h.
+   * Recommendation: For invariant-character strings use the constructor
+   * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
+   * because it avoids object code dependencies of UnicodeString on
+   * the conversion code.
+   *
   * @stable ICU 2.0
   */
  UnicodeString(const char *codepageData,
@ -2707,6 +2789,34 @@ public:
        UConverter *cnv,
        UErrorCode &errorCode);

+#endif
+
+  /**
+   * Constructs a Unicode string from an invariant-character char * string.
+   * About invariant characters see utypes.h.
+   * This constructor has no runtime dependency on conversion code and is
+   * therefore recommended over ones taking a charset name string
+   * (where the empty string "" indicates invariant-character conversion).
+   *
+   * Use the macro US_INV as the third, signature-distinguishing parameter.
+   *
+   * For example:
+   * \code
+   * void fn(const char *s) {
+   *   UnicodeString ustr(s, -1, US_INV);
+   *   // use ustr ...
+   * }
+   * \endcode
+   *
+   * @param src String using only invariant characters.
+   * @param length Length of src, or -1 if NUL-terminated.
+   * @param inv Signature-distinguishing paramater, use US_INV.
+   *
+   * @see US_INV
+   * @draft ICU 3.2
+   */
+  UnicodeString(const char *src, int32_t length, enum EInvariant inv);
+

  /**
   * Copy constructor.
@ -2967,6 +3077,8 @@ private:
  inline void pinIndices(int32_t& start,
                         int32_t& length) const;

+#if !UCONFIG_NO_CONVERSION
+
  /* Internal extract() using UConverter. */
  int32_t doExtract(int32_t start, int32_t length,
                    char *dest, int32_t destCapacity,
@ -2996,6 +3108,9 @@ private:
                   int32_t dataLength,
                   UConverter *converter,
                   UErrorCode &status);
+
+#endif
+
  /*
   * This function is called when write access to the array
   * is necessary.
@ -3733,6 +3848,8 @@ UnicodeString::extract(int32_t start,
               UnicodeString& target) const
 { doExtract(start, _length, target); }

+#if !UCONFIG_NO_CONVERSION
+
 inline int32_t
 UnicodeString::extract(int32_t start,
               int32_t _length,
@ -3744,6 +3861,8 @@ UnicodeString::extract(int32_t start,
  return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage);
 }

+#endif
+
 inline void
 UnicodeString::extractBetween(int32_t start,
                  int32_t limit,