ICU-4078 disentangle conversion and properties code some from other parts of the common library

X-SVN-Rev: 16194
This commit is contained in:
Markus Scherer 2004-08-26 22:58:39 +00:00
parent 53e086dc9a
commit 2327dcdc7f

View File

@ -50,6 +50,18 @@ class BreakIterator; // unicode/brkiter.h
/* The <iostream> include has been moved to unicode/ustream.h */
/**
* Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
* which constructs a Unicode string from an invariant-character char * string.
* About invariant characters see utypes.h.
* This constructor has no runtime dependency on conversion code and is
* therefore recommended over ones taking a charset name string
* (where the empty string "" indicates invariant-character conversion).
*
* @draft ICU 3.2
*/
#define US_INV UnicodeString::EInvariant::kInvariant
/**
* Unicode String literals in C++.
* Dependent on the platform properties, different UnicodeString
@ -72,7 +84,7 @@ class BreakIterator; // unicode/brkiter.h
#elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
# define UNICODE_STRING(cs, _length) UnicodeString(TRUE, (const UChar *)cs, _length)
#else
# define UNICODE_STRING(cs, _length) UnicodeString(cs, _length, "")
# define UNICODE_STRING(cs, _length) UnicodeString(cs, _length, US_INV)
#endif
/**
@ -93,7 +105,7 @@ class BreakIterator; // unicode/brkiter.h
#elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
# define UNICODE_STRING_SIMPLE(cs) UnicodeString(TRUE, (const UChar *)cs, -1)
#else
# define UNICODE_STRING_SIMPLE(cs) UnicodeString(cs, "")
# define UNICODE_STRING_SIMPLE(cs) UnicodeString(cs, -1, US_INV)
#endif
/**
@ -170,6 +182,22 @@ class U_COMMON_API UnicodeString : public Replaceable
{
public:
/**
* Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
* which constructs a Unicode string from an invariant-character char * string.
* Use the macro US_INV instead of the full qualification for this value.
*
* @see US_INV
* @draft ICU 3.2
*/
enum EInvariant {
/**
* @see EInvariant
* @draft ICU 3.2
*/
kInvariant
};
//========================================
// Read-only operations
//========================================
@ -1387,12 +1415,46 @@ public:
int32_t limit,
UnicodeString& target) const;
/**
* Copy the characters in the range
* [<tt>start</TT>, <tt>start + length</TT>) into an array of characters.
* All characters must be invariant (see utypes.h).
* Use US_INV as the last, signature-distinguishing parameter.
*
* This function does not write any more than <code>targetLength</code>
* characters but returns the length of the entire output string
* so that one can allocate a larger buffer and call the function again
* if necessary.
* The output string is NUL-terminated if possible.
*
* @param start offset of first character which will be copied
* @param startLength the number of characters to extract
* @param target the target buffer for extraction, can be NULL
* if targetLength is 0
* @param targetLength the length of the target buffer
* @param inv Signature-distinguishing paramater, use US_INV.
* @return the output string length, not including the terminating NUL
* @draft ICU 3.2
*/
int32_t extract(int32_t start,
int32_t length,
char *target,
int32_t targetCapacity,
enum EInvariant inv) const;
#if !UCONFIG_NO_CONVERSION
/**
* Copy the characters in the range
* [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
* in a specified codepage.
* The output string is NUL-terminated.
*
* Recommendation: For invariant-character strings use
* extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
* because it avoids object code dependencies of UnicodeString on
* the conversion code.
*
* @param start offset of first character which will be copied
* @param startLength the number of characters to extract
* @param target the target buffer for extraction
@ -1422,6 +1484,11 @@ public:
* if necessary.
* The output string is NUL-terminated if possible.
*
* Recommendation: For invariant-character strings use
* extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
* because it avoids object code dependencies of UnicodeString on
* the conversion code.
*
* @param start offset of first character which will be copied
* @param startLength the number of characters to extract
* @param target the target buffer for extraction
@ -1463,6 +1530,8 @@ public:
UConverter *cnv,
UErrorCode &errorCode) const;
#endif
/* Length operations */
/**
@ -2651,15 +2720,23 @@ public:
*/
UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity);
#if !UCONFIG_NO_CONVERSION
/**
* char* constructor.
* @param codepageData an array of bytes, null-terminated
* @param codepage the encoding of <TT>codepageData</TT>. The special
* value 0 for <TT>codepage</TT> indicates that the text is in the
* platform's default codepage.
*
* If <code>codepage</code> is an empty string (<code>""</code>),
* then a simple conversion is performed on the codepage-invariant
* subset ("invariant characters") of the platform encoding. See utypes.h.
* Recommendation: For invariant-character strings use the constructor
* UnicodeString(const char *src, int32_t length, enum EInvariant inv)
* because it avoids object code dependencies of UnicodeString on
* the conversion code.
*
* @stable ICU 2.0
*/
UnicodeString(const char *codepageData,
@ -2675,6 +2752,11 @@ public:
* If <code>codepage</code> is an empty string (<code>""</code>),
* then a simple conversion is performed on the codepage-invariant
* subset ("invariant characters") of the platform encoding. See utypes.h.
* Recommendation: For invariant-character strings use the constructor
* UnicodeString(const char *src, int32_t length, enum EInvariant inv)
* because it avoids object code dependencies of UnicodeString on
* the conversion code.
*
* @stable ICU 2.0
*/
UnicodeString(const char *codepageData,
@ -2707,6 +2789,34 @@ public:
UConverter *cnv,
UErrorCode &errorCode);
#endif
/**
* Constructs a Unicode string from an invariant-character char * string.
* About invariant characters see utypes.h.
* This constructor has no runtime dependency on conversion code and is
* therefore recommended over ones taking a charset name string
* (where the empty string "" indicates invariant-character conversion).
*
* Use the macro US_INV as the third, signature-distinguishing parameter.
*
* For example:
* \code
* void fn(const char *s) {
* UnicodeString ustr(s, -1, US_INV);
* // use ustr ...
* }
* \endcode
*
* @param src String using only invariant characters.
* @param length Length of src, or -1 if NUL-terminated.
* @param inv Signature-distinguishing paramater, use US_INV.
*
* @see US_INV
* @draft ICU 3.2
*/
UnicodeString(const char *src, int32_t length, enum EInvariant inv);
/**
* Copy constructor.
@ -2967,6 +3077,8 @@ private:
inline void pinIndices(int32_t& start,
int32_t& length) const;
#if !UCONFIG_NO_CONVERSION
/* Internal extract() using UConverter. */
int32_t doExtract(int32_t start, int32_t length,
char *dest, int32_t destCapacity,
@ -2996,6 +3108,9 @@ private:
int32_t dataLength,
UConverter *converter,
UErrorCode &status);
#endif
/*
* This function is called when write access to the array
* is necessary.
@ -3733,6 +3848,8 @@ UnicodeString::extract(int32_t start,
UnicodeString& target) const
{ doExtract(start, _length, target); }
#if !UCONFIG_NO_CONVERSION
inline int32_t
UnicodeString::extract(int32_t start,
int32_t _length,
@ -3744,6 +3861,8 @@ UnicodeString::extract(int32_t start,
return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage);
}
#endif
inline void
UnicodeString::extractBetween(int32_t start,
int32_t limit,