ICU-4078 disentangle conversion and properties code some from other parts of the common library

X-SVN-Rev: 16193
This commit is contained in:
Markus Scherer 2004-08-26 22:51:40 +00:00
parent 4022fb8305
commit 53e086dc9a
57 changed files with 3457 additions and 2906 deletions

View File

@ -60,16 +60,18 @@ LIBS = $(LIBICUDT) $(DEFAULT_LIBS)
OBJECTS = putil.o uobject.o cmemory.o umutex.o \
udata.o ucmndata.o udatamem.o udataswp.o umapfile.o ucol_swp.o \
uresbund.o uresdata.o resbund.o ucat.o locmap.o uloc.o locid.o \
uresbund.o ures_cnv.o uresdata.o resbund.o resbund_cnv.o \
ucat.o locmap.o uloc.o locid.o \
uhash.o uhash_us.o \
ucnv.o ucnv_bld.o ucnv_cb.o ucnv_cnv.o ucnv_err.o ucnv_ext.o ucnv_io.o ucnvlat1.o \
ucnv_u7.o ucnv_u8.o ucnv_u16.o ucnv_u32.o ucnvscsu.o ucnvbocu.o \
ucnvmbcs.o ucnv2022.o ucnvhz.o ucnv_lmb.o ucnvisci.o \
unistr.o utf_impl.o ustring.o ustrcase.o cstring.o ustrfmt.o ustrtrns.o \
unistr.o unistr_case.o unistr_cnv.o unistr_props.o \
utf_impl.o ustring.o ustr_cnv.o ustrcase.o cstring.o ustrfmt.o ustrtrns.o \
normlzr.o unorm.o unorm_it.o chariter.o schriter.o uchriter.o uiter.o \
uchar.o uprops.o propname.o ubidi.o ubidiwrt.o ubidiln.o ushape.o unames.o \
ucln_cmn.o uscript.o usc_impl.o uvector.o ustack.o uvectr32.o ucmp8.o \
uarrsort.o utrie.o uset.o uniset.o ruleiter.o caniter.o unifilt.o unifunct.o usetiter.o \
uarrsort.o utrie.o uset.o uniset.o uniset_props.o ruleiter.o caniter.o unifilt.o unifunct.o usetiter.o \
brkiter.o brkdict.o ubrk.o dbbi.o dbbi_tbl.o \
rbbi.o rbbidata.o rbbinode.o rbbirb.o rbbiscan.o rbbisetb.o rbbistbl.o rbbitblb.o \
icuserv.o iculserv.o icunotif.o uenum.o ustrenum.o \

View File

@ -337,7 +337,7 @@ protected:
class ICUBreakIteratorService : public ICULocaleService {
public:
ICUBreakIteratorService()
: ICULocaleService("Break Iterator")
: ICULocaleService(UNICODE_STRING("Break Iterator", 14))
{
UErrorCode status = U_ZERO_ERROR;
registerFactory(new ICUBreakIteratorFactory(), status);

View File

@ -1,6 +1,6 @@
/*
*****************************************************************************
* Copyright (C) 1996-2003, International Business Machines Corporation and *
* Copyright (C) 1996-2004, International Business Machines Corporation and *
* others. All Rights Reserved. *
*****************************************************************************
*/
@ -216,7 +216,7 @@ void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &st
uprv_free(current);
return;
}
pieces[0][0] = UnicodeString("");
pieces[0][0] = UnicodeString();
pieces_lengths = (int32_t*)uprv_malloc(1 * sizeof(int32_t));
/* test for NULL */
if (pieces_lengths == 0) {

View File

@ -1,6 +1,6 @@
/*
**********************************************************************
* Copyright (c) 2001-2003, International Business Machines
* Copyright (c) 2001-2004, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Date Name Description
@ -25,11 +25,16 @@ U_NAMESPACE_BEGIN
class U_COMMON_API CharString : public UMemory {
public:
#if !UCONFIG_NO_CONVERSION
// Constructor
// @param str The unicode string to be converted to char *
// @param codepage The char * code page. "" for invariant conversion.
// NULL for default code page.
inline CharString(const UnicodeString& str, const char *codepage = "");
inline CharString(const UnicodeString& str, const char *codepage);
#endif
inline CharString(const UnicodeString& str);
inline ~CharString();
inline operator const char*() const { return ptr; }
@ -41,17 +46,30 @@ private:
CharString &operator=(const CharString &other); // forbid copying of this class
};
#if !UCONFIG_NO_CONVERSION
inline CharString::CharString(const UnicodeString& str, const char *codepage) {
int32_t len;
ptr = buf;
len = str.extract(0, 0x7FFFFFFF, buf ,sizeof(buf)-1, codepage);
buf[sizeof(buf)-1] = 0; // extract does not add null if it thinks there is no space for it.
if (len >= (int32_t)(sizeof(buf)-1)) {
ptr = (char *)uprv_malloc(len+1);
str.extract(0, 0x7FFFFFFF, ptr, len+1, codepage);
}
}
#endif
inline CharString::CharString(const UnicodeString& str) {
int32_t len;
ptr = buf;
len = str.extract(0, 0x7FFFFFFF, buf, (int32_t)(sizeof(buf)-1), US_INV);
if (len >= (int32_t)(sizeof(buf)-1)) {
ptr = (char *)uprv_malloc(len+1);
str.extract(0, 0x7FFFFFFF, ptr, len+1, US_INV);
}
}
inline CharString::~CharString() {
if (ptr != buf) {
uprv_free(ptr);

View File

@ -2113,6 +2113,10 @@ InputPath=.\unicode\resbund.h
# End Source File
# Begin Source File
SOURCE=.\resbund_cnv.cpp
# End Source File
# Begin Source File
SOURCE=.\ucat.c
# End Source File
# Begin Source File
@ -2168,10 +2172,6 @@ SOURCE=.\uloc.c
# End Source File
# Begin Source File
SOURCE=.\ulocimp.h
# End Source File
# Begin Source File
SOURCE=.\unicode\uloc.h
!IF "$(CFG)" == "common - Win32 Release"
@ -2219,6 +2219,10 @@ InputPath=.\unicode\uloc.h
# End Source File
# Begin Source File
SOURCE=.\ulocimp.h
# End Source File
# Begin Source File
SOURCE=.\unicode\ures.h
!IF "$(CFG)" == "common - Win32 Release"
@ -2266,6 +2270,10 @@ InputPath=.\unicode\ures.h
# End Source File
# Begin Source File
SOURCE=.\ures_cnv.c
# End Source File
# Begin Source File
SOURCE=.\uresbund.c
# End Source File
# Begin Source File
@ -2773,6 +2781,10 @@ InputPath=.\unicode\uniset.h
# End Source File
# Begin Source File
SOURCE=.\uniset_props.cpp
# End Source File
# Begin Source File
SOURCE=.\uprops.c
# End Source File
# Begin Source File
@ -3363,6 +3375,18 @@ InputPath=.\unicode\unistr.h
# End Source File
# Begin Source File
SOURCE=.\unistr_case.cpp
# End Source File
# Begin Source File
SOURCE=.\unistr_cnv.cpp
# End Source File
# Begin Source File
SOURCE=.\unistr_props.cpp
# End Source File
# Begin Source File
SOURCE=.\unicode\urep.h
!IF "$(CFG)" == "common - Win32 Release"
@ -3410,6 +3434,10 @@ InputPath=.\unicode\urep.h
# End Source File
# Begin Source File
SOURCE=.\ustr_cnv.c
# End Source File
# Begin Source File
SOURCE=.\ustr_imp.h
# End Source File
# Begin Source File

View File

@ -1145,6 +1145,9 @@
<File
RelativePath=".\resbund.cpp">
</File>
<File
RelativePath=".\resbund_cnv.cpp">
</File>
<File
RelativePath=".\unicode\resbund.h">
<FileConfiguration
@ -1230,6 +1233,9 @@
Outputs="..\..\include\unicode\$(InputFileName)"/>
</FileConfiguration>
</File>
<File
RelativePath=".\ures_cnv.c">
</File>
<File
RelativePath=".\uresbund.c">
</File>
@ -1447,6 +1453,9 @@
<File
RelativePath=".\uniset.cpp">
</File>
<File
RelativePath=".\uniset_props.cpp">
</File>
<File
RelativePath=".\unicode\uniset.h">
<FileConfiguration
@ -1724,6 +1733,15 @@
<File
RelativePath=".\unistr.cpp">
</File>
<File
RelativePath=".\unistr_case.cpp">
</File>
<File
RelativePath=".\unistr_cnv.cpp">
</File>
<File
RelativePath=".\unistr_props.cpp">
</File>
<File
RelativePath=".\unicode\unistr.h">
<FileConfiguration
@ -1762,6 +1780,9 @@
Outputs="..\..\include\unicode\$(InputFileName)"/>
</FileConfiguration>
</File>
<File
RelativePath=".\ustr_cnv.c">
</File>
<File
RelativePath=".\ustr_imp.h">
</File>

View File

@ -110,18 +110,47 @@ LocaleUtility::canonicalLocaleString(const UnicodeString* id, UnicodeString& res
Locale&
LocaleUtility::initLocaleFromName(const UnicodeString& id, Locale& result)
{
if (id.isBogus()) {
enum { BUFLEN = 128 }; // larger than ever needed
if (id.isBogus() || id.length() >= BUFLEN) {
result.setToBogus();
} else {
const int32_t BUFLEN = 128; // larger than ever needed
/*
* We need to convert from a UnicodeString to char * in order to
* create a Locale.
*
* Problem: Locale ID strings may contain '@' which is a variant
* character and cannot be handled by invariant-character conversion.
*
* Hack: Since ICU code can handle locale IDs with multiple encodings
* of '@' (at least for EBCDIC; it's not known to be a problem for
* ASCII-based systems),
* we use regular invariant-character conversion for everything else
* and manually convert U+0040 into a compiler-char-constant '@'.
* While this compilation-time constant may not match the runtime
* encoding of '@', it should be one of the encodings which ICU
* recognizes.
*
* There should be only at most one '@' in a locale ID.
*/
char buffer[BUFLEN];
int len = id.extract(0, BUFLEN, buffer);
if (len >= BUFLEN) {
result.setToBogus();
} else {
buffer[len] = '\0';
result = Locale::createFromName(buffer);
int32_t prev, i;
prev = 0;
for(;;) {
i = id.indexOf((UChar)0x40, prev);
if(i < 0) {
// no @ between prev and the rest of the string
id.extract(prev, INT32_MAX, buffer + prev, BUFLEN - prev, US_INV);
break; // done
} else {
// normal invariant-character conversion for text between @s
id.extract(prev, i - prev, buffer + prev, BUFLEN - prev, US_INV);
// manually "convert" U+0040 at id[i] into '@' at buffer[i]
buffer[i] = '@';
prev = i + 1;
}
}
result = Locale::createFromName(buffer);
}
return result;
}
@ -132,7 +161,7 @@ LocaleUtility::initNameFromLocale(const Locale& locale, UnicodeString& result)
if (locale.isBogus()) {
result.setToBogus();
} else {
result.append(locale.getName());
result.append(UnicodeString(locale.getName(), -1, US_INV));
}
return result;
}
@ -620,7 +649,15 @@ UObject*
ICUResourceBundleFactory::handleCreate(const Locale& loc, int32_t /* kind */, const ICUService* /* service */, UErrorCode& status) const
{
if (U_SUCCESS(status)) {
return new ResourceBundle(_bundleName, loc, status);
// _bundleName is a package name
// and should only contain invariant characters
char pkg[20];
int32_t length;
length=_bundleName.extract(0, INT32_MAX, pkg, (int32_t)sizeof(pkg), US_INV);
if(length>=sizeof(pkg)) {
return NULL;
}
return new ResourceBundle(pkg, loc, status);
}
return NULL;
}
@ -693,7 +730,7 @@ ICULocaleService::get(const Locale& locale, int32_t kind, Locale* actualReturn,
return result;
}
UnicodeString locName(locale.getName(), "");
UnicodeString locName(locale.getName(), -1, US_INV);
if (locName.isBogus()) {
status = U_MEMORY_ALLOCATION_ERROR;
} else {

View File

@ -562,7 +562,7 @@ ICUService::getKey(ICUServiceKey& key, UnicodeString* actualReturn, const ICUSer
if (actualReturn != NULL) {
// strip null prefix
if (result->actualDescriptor.indexOf("/") == 0) {
if (result->actualDescriptor.indexOf((UChar)0x2f) == 0) { // U+002f=slash (/)
actualReturn->remove();
actualReturn->append(result->actualDescriptor,
1,

View File

@ -279,8 +279,7 @@ RuleBasedBreakIterator::getText() const {
// function is called while we're in that state, we have to fudge an
// an iterator to return.
if (nonConstThis->fText == NULL) {
// TODO: do this in a way that does not do a default conversion!
nonConstThis->fText = new StringCharacterIterator("");
nonConstThis->fText = new StringCharacterIterator(UnicodeString());
}
return *nonConstThis->fText;
}

View File

@ -476,19 +476,19 @@ UBool RBBIRuleScanner::doParseActions(EParseAction action)
case doOptionEnd:
{
UnicodeString opt(fRB->fRules, fOptionStart, fScanIndex-fOptionStart);
if (opt == "chain") {
if (opt == UNICODE_STRING("chain", 5)) {
fRB->fChainRules = TRUE;
} else if (opt == "LBCMNoChain") {
} else if (opt == UNICODE_STRING("LBCMNoChain", 11)) {
fRB->fLBCMNoChain = TRUE;
} else if (opt == "forward") {
} else if (opt == UNICODE_STRING("forward", 7)) {
fRB->fDefaultTree = &fRB->fForwardTree;
} else if (opt == "reverse") {
} else if (opt == UNICODE_STRING("reverse", 7)) {
fRB->fDefaultTree = &fRB->fReverseTree;
} else if (opt == "safe_forward") {
} else if (opt == UNICODE_STRING("safe_forward", 12)) {
fRB->fDefaultTree = &fRB->fSafeFwdTree;
} else if (opt == "safe_reverse") {
} else if (opt == UNICODE_STRING("safe_reverse", 12)) {
fRB->fDefaultTree = &fRB->fSafeRevTree;
} else if (opt == "lookAheadHardBreak") {
} else if (opt == UNICODE_STRING("lookAheadHardBreak", 18)) {
fRB->fLookAheadHardBreak = TRUE;
} else {
error(U_BRK_UNRECOGNIZED_OPTION);

View File

@ -382,7 +382,7 @@ void RBBISetBuilder::printRanges() {
for (i=0; i<rlRange->fIncludesSets->size(); i++) {
RBBINode *usetNode = (RBBINode *)rlRange->fIncludesSets->elementAt(i);
UnicodeString setName = "anon";
UnicodeString setName = UNICODE_STRING("anon", 4);
RBBINode *setRef = usetNode->fParent;
if (setRef != NULL) {
RBBINode *varRef = setRef->fParent;
@ -422,7 +422,7 @@ void RBBISetBuilder::printRangeGroups() {
for (i=0; i<rlRange->fIncludesSets->size(); i++) {
RBBINode *usetNode = (RBBINode *)rlRange->fIncludesSets->elementAt(i);
UnicodeString setName = "anon";
UnicodeString setName = UNICODE_STRING("anon", 4);
RBBINode *setRef = usetNode->fParent;
if (setRef != NULL) {
RBBINode *varRef = setRef->fParent;
@ -473,7 +473,7 @@ void RBBISetBuilder::printSets() {
}
RBBIDebugPrintf("%3d ", i);
setName = "anonymous";
setName = UNICODE_STRING("anonymous", 9);
setRef = usetNode->fParent;
if (setRef != NULL) {
varRef = setRef->fParent;
@ -622,7 +622,7 @@ void RangeDescriptor::setDictionaryFlag() {
setName = varRef->fText;
}
}
if (setName.compare("dictionary") == 0) { // TODO: no string literals.
if (setName.compare(UNICODE_STRING("dictionary", 10)) == 0) { // TODO: no string literals.
this->fNum |= 0x4000;
break;
}

View File

@ -170,27 +170,12 @@ U_NAMESPACE_BEGIN
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ResourceBundle)
ResourceBundle::ResourceBundle( const UnicodeString& path,
const Locale& locale,
UErrorCode& error)
:UObject(), fLocale(NULL)
{
constructForLocale(path, locale, error);
}
ResourceBundle::ResourceBundle(UErrorCode &err)
:UObject(), fLocale(NULL)
{
fResource = ures_open(0, Locale::getDefault().getName(), &err);
}
ResourceBundle::ResourceBundle( const UnicodeString& path,
UErrorCode& error)
:UObject(), fLocale(NULL)
{
constructForLocale(path, Locale::getDefault(), error);
}
ResourceBundle::ResourceBundle(const ResourceBundle &other)
:UObject(other), fLocale(NULL)
{
@ -256,21 +241,6 @@ ResourceBundle::clone() const {
return new ResourceBundle(*this);
}
void
ResourceBundle::constructForLocale(const UnicodeString& path,
const Locale& locale,
UErrorCode& error)
{
char name[300];
if(!path.isEmpty()) {
path.extract(name, sizeof(name), 0, error);
fResource = ures_open(name, locale.getName(), &error);
} else {
fResource = ures_open(0, locale.getName(), &error);
}
}
UnicodeString ResourceBundle::getString(UErrorCode& status) const {
int32_t len = 0;
const UChar *r = ures_getString(fResource, &len, &status);

View File

@ -0,0 +1,71 @@
/*
*******************************************************************************
*
* Copyright (C) 1997-2004, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: resbund_cnv.cpp
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2004aug25
* created by: Markus W. Scherer
*
* Character conversion functions moved here from resbund.cpp
*/
#include "unicode/utypes.h"
#include "unicode/resbund.h"
U_NAMESPACE_BEGIN
ResourceBundle::ResourceBundle( const UnicodeString& path,
const Locale& locale,
UErrorCode& error)
:UObject(), fLocale(NULL)
{
constructForLocale(path, locale, error);
}
ResourceBundle::ResourceBundle( const UnicodeString& path,
UErrorCode& error)
:UObject(), fLocale(NULL)
{
constructForLocale(path, Locale::getDefault(), error);
}
void
ResourceBundle::constructForLocale(const UnicodeString& path,
const Locale& locale,
UErrorCode& error)
{
char name[300];
if(path.length() >= sizeof(name)) {
fResource = NULL;
error = U_ILLEGAL_ARGUMENT_ERROR;
} else if(!path.isEmpty()) {
if(uprv_isInvariantUString(path.getBuffer(), path.length())) {
// the invariant converter is sufficient for package and tree names
// and is more efficient
path.extract(0, INT32_MAX, name, (int32_t)sizeof(name), US_INV);
} else {
#if !UCONFIG_NO_CONVERSION
// use the default converter to support variant-character paths
path.extract(name, sizeof(name), 0, error);
#else
// the default converter is not available
fResource = NULL;
error = U_UNSUPPORTED_ERROR;
return;
#endif
}
fResource = ures_open(name, locale.getName(), &error);
} else {
fResource = ures_open(0, locale.getName(), &error);
}
}
U_NAMESPACE_END

View File

@ -88,8 +88,10 @@ u_cleanup(void)
unames_cleanup();
pname_cleanup();
uchar_cleanup();
#if !UCONFIG_NO_CONVERSION
ucnv_cleanup();
ucnv_io_cleanup();
#endif
udata_cleanup();
putil_cleanup();

View File

@ -20,6 +20,9 @@
*/
#include "unicode/utypes.h"
#if !UCONFIG_NO_CONVERSION
#include "unicode/ustring.h"
#include "unicode/ures.h"
#include "unicode/ucnv.h"
@ -2445,6 +2448,8 @@ ucnv_detectUnicodeSignature( const char* source,
return NULL;
}
#endif
/*
* Hey, Emacs, please set the following:
*

View File

@ -28,7 +28,7 @@
#include "unicode/utypes.h"
#if !UCONFIG_NO_LEGACY_CONVERSION
#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
#include "unicode/ucnv.h"
#include "unicode/uset.h"

View File

@ -19,6 +19,9 @@
* 06/29/2000 helena Major rewrite of the callback interface.
*/
#include "unicode/utypes.h"
#if !UCONFIG_NO_CONVERSION
#include "unicode/udata.h"
#include "unicode/ucnv.h"
@ -1293,3 +1296,5 @@ ucnv_swap(const UDataSwapper *ds,
}
#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
#endif

View File

@ -1,6 +1,6 @@
/*
**********************************************************************
* Copyright (C) 1999-2003, International Business Machines
* Copyright (C) 1999-2004, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*
@ -18,6 +18,9 @@
#define UCNV_BLD_H
#include "unicode/utypes.h"
#if !UCONFIG_NO_CONVERSION
#include "unicode/ucnv.h"
#include "unicode/ucnv_err.h"
#include "ucnv_cnv.h"
@ -242,4 +245,6 @@ ucnv_swap(const UDataSwapper *ds,
const void *inData, int32_t length, void *outData,
UErrorCode *pErrorCode);
#endif
#endif /* _UCNV_BLD */

View File

@ -1,6 +1,6 @@
/*
**********************************************************************
* Copyright (C) 2000-2003, International Business Machines
* Copyright (C) 2000-2004, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* ucnv_cb.c:
@ -19,6 +19,9 @@
*/
#include "unicode/utypes.h"
#if !UCONFIG_NO_CONVERSION
#include "unicode/ucnv_cb.h"
#include "ucnv_bld.h"
#include "ucnv_cnv.h"
@ -224,3 +227,5 @@ ucnv_cbToUWriteSub (UConverterToUnicodeArgs *args,
ucnv_cbToUWriteUChars(args, &kSubstituteChar, 1, offsetIndex, err);
}
}
#endif

View File

@ -1,7 +1,7 @@
/*
******************************************************************************
*
* Copyright (C) 1999-2003, International Business Machines
* Copyright (C) 1999-2004, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
@ -16,6 +16,9 @@
*/
#include "unicode/utypes.h"
#if !UCONFIG_NO_CONVERSION
#include "unicode/ucnv_err.h"
#include "unicode/ucnv.h"
#include "unicode/uset.h"
@ -167,3 +170,5 @@ ucnv_toUWriteCodePoint(UConverter *cnv,
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
}
}
#endif

View File

@ -1,6 +1,6 @@
/*
**********************************************************************
* Copyright (C) 1999-2003, International Business Machines
* Copyright (C) 1999-2004, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*
@ -19,6 +19,9 @@
#define UCNV_CNV_H
#include "unicode/utypes.h"
#if !UCONFIG_NO_CONVERSION
#include "unicode/ucnv.h"
#include "unicode/ucnv_err.h"
@ -274,4 +277,6 @@ ucnv_toUWriteCodePoint(UConverter *cnv,
int32_t sourceIndex,
UErrorCode *pErrorCode);
#endif
#endif /* UCNV_CNV */

View File

@ -1,7 +1,7 @@
/*
*****************************************************************************
*
* Copyright (C) 1998-2003, International Business Machines
* Copyright (C) 1998-2004, International Business Machines
* Corporation and others. All Rights Reserved.
*
*****************************************************************************
@ -15,6 +15,10 @@
* 06/29/2000 helena Major rewrite of the callback APIs.
*/
#include "unicode/utypes.h"
#if !UCONFIG_NO_CONVERSION
#include "unicode/ucnv_err.h"
#include "unicode/ucnv_cb.h"
#include "ucnv_cnv.h"
@ -453,3 +457,5 @@ UCNV_TO_U_CALLBACK_ESCAPE (
ucnv_cbToUWriteUChars(toArgs, uniValueString, valueStringLength, 0, err);
}
#endif

View File

@ -1,7 +1,7 @@
/*
******************************************************************************
*
* Copyright (C) 2003, International Business Machines
* Copyright (C) 2003-2004, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
@ -18,7 +18,7 @@
#include "unicode/utypes.h"
#if !UCONFIG_NO_LEGACY_CONVERSION
#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
#include "unicode/uset.h"
#include "ucnv_bld.h"

View File

@ -1,7 +1,7 @@
/*
******************************************************************************
*
* Copyright (C) 2004, International Business Machines
* Copyright (C) 2003-2004, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
@ -20,6 +20,9 @@
#define __UCNV_EXT_H__
#include "unicode/utypes.h"
#if !UCONFIG_NO_CONVERSION
#include "unicode/ucnv.h"
#include "ucnv_cnv.h"
@ -456,3 +459,5 @@ ucnv_extGetUnicodeSet(const UConverterSharedData *sharedData,
#define UCNV_EXT_FROM_U_GET_DATA(value) ((value)&UCNV_EXT_FROM_U_DATA_MASK)
#endif
#endif

View File

@ -1,6 +1,6 @@
/*
**********************************************************************
* Copyright (C) 1999-2003, International Business Machines
* Copyright (C) 1999-2004, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*
@ -21,6 +21,9 @@
#define UCNV_IMP_H
#include "unicode/utypes.h"
#if !UCONFIG_NO_CONVERSION
#include "unicode/uloc.h"
#include "ucnv_bld.h"
@ -91,5 +94,6 @@ ucnv_incrementRefCount(UConverterSharedData *sharedData);
*/
int32_t ucnv_copyPlatformString(char *platformString, UConverterPlatform platform);
#endif
#endif /* _UCNV_IMP */

View File

@ -28,6 +28,9 @@
*/
#include "unicode/utypes.h"
#if !UCONFIG_NO_CONVERSION
#include "unicode/putil.h"
#include "unicode/ucnv.h" /* This file implements ucnv_xXXX() APIs */
#include "unicode/udata.h"
@ -1329,6 +1332,8 @@ ucnv_swapAliases(const UDataSwapper *ds,
return headerSize+2*(int32_t)topOffset;
}
#endif
/*
* Hey, Emacs, please set the following:
*

View File

@ -1,6 +1,6 @@
/*
**********************************************************************
* Copyright (C) 1999-2003, International Business Machines
* Copyright (C) 1999-2004, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*
@ -14,6 +14,9 @@
#define UCNV_IO_H
#include "unicode/utypes.h"
#if !UCONFIG_NO_CONVERSION
#include "udataswp.h"
#define UCNV_AMBIGUOUS_ALIAS_MAP_BIT 0x8000
@ -159,6 +162,8 @@ ucnv_swapAliases(const UDataSwapper *ds,
const void *inData, int32_t length, void *outData,
UErrorCode *pErrorCode);
#endif
#endif /* _UCNV_IO */
/*

View File

@ -25,7 +25,7 @@
#include "unicode/utypes.h"
#if !UCONFIG_NO_LEGACY_CONVERSION
#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
#include "unicode/ucnv_err.h"
#include "unicode/ucnv.h"

View File

@ -1,6 +1,6 @@
/*
**********************************************************************
* Copyright (C) 2002-2003, International Business Machines
* Copyright (C) 2002-2004, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* file name: ucnv_u16.c
@ -15,6 +15,9 @@
*/
#include "unicode/utypes.h"
#if !UCONFIG_NO_CONVERSION
#include "unicode/ucnv.h"
#include "unicode/ucnv_err.h"
#include "ucnv_bld.h"
@ -1344,3 +1347,5 @@ const UConverterSharedData _UTF16Data = {
NULL, NULL, &_UTF16StaticData, FALSE, &_UTF16Impl,
0
};
#endif

View File

@ -1,6 +1,6 @@
/*
**********************************************************************
* Copyright (C) 2002-2003, International Business Machines
* Copyright (C) 2002-2004, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* file name: ucnv_u32.c
@ -15,6 +15,9 @@
*/
#include "unicode/utypes.h"
#if !UCONFIG_NO_CONVERSION
#include "unicode/ucnv.h"
#include "unicode/ucnv_err.h"
#include "ucnv_bld.h"
@ -1175,3 +1178,5 @@ const UConverterSharedData _UTF32Data = {
NULL, NULL, &_UTF32StaticData, FALSE, &_UTF32Impl,
0
};
#endif

View File

@ -1,6 +1,6 @@
/*
**********************************************************************
* Copyright (C) 2002-2003, International Business Machines
* Copyright (C) 2002-2004, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* file name: ucnv_u7.c
@ -15,6 +15,9 @@
*/
#include "unicode/utypes.h"
#if !UCONFIG_NO_CONVERSION
#include "unicode/ucnv.h"
#include "unicode/ucnv_err.h"
#include "ucnv_bld.h"
@ -1453,3 +1456,5 @@ const UConverterSharedData _IMAPData={
NULL, NULL, &_IMAPStaticData, FALSE, &_IMAPImpl,
0
};
#endif

View File

@ -1,6 +1,6 @@
/*
**********************************************************************
* Copyright (C) 2002-2003, International Business Machines
* Copyright (C) 2002-2004, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* file name: ucnv_u8.c
@ -19,6 +19,9 @@
*/
#include "unicode/utypes.h"
#if !UCONFIG_NO_CONVERSION
#include "unicode/ucnv.h"
#include "unicode/ucnv_err.h"
#include "ucnv_bld.h"
@ -813,3 +816,5 @@ const UConverterSharedData _CESU8Data={
NULL, NULL, &_CESU8StaticData, FALSE, &_CESU8Impl,
0
};
#endif

View File

@ -1,7 +1,7 @@
/*
******************************************************************************
*
* Copyright (C) 2002-2003, International Business Machines
* Copyright (C) 2002-2004, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
@ -18,6 +18,9 @@
*/
#include "unicode/utypes.h"
#if !UCONFIG_NO_CONVERSION
#include "unicode/ucnv.h"
#include "unicode/ucnv_cb.h"
#include "ucnv_bld.h"
@ -1386,3 +1389,5 @@ const UConverterSharedData _Bocu1Data={
NULL, NULL, &_Bocu1StaticData, FALSE, &_Bocu1Impl,
0
};
#endif

View File

@ -16,7 +16,7 @@
#include "unicode/utypes.h"
#if !UCONFIG_NO_LEGACY_CONVERSION
#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
#include "cmemory.h"
#include "unicode/ucnv_err.h"

View File

@ -17,7 +17,7 @@
#include "unicode/utypes.h"
#if !UCONFIG_NO_LEGACY_CONVERSION
#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
#include "cmemory.h"
#include "unicode/ucnv_err.h"

View File

@ -1,6 +1,6 @@
/*
**********************************************************************
* Copyright (C) 2000-2003, International Business Machines
* Copyright (C) 2000-2004, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* file name: ucnvlat1.cpp
@ -13,6 +13,9 @@
*/
#include "unicode/utypes.h"
#if !UCONFIG_NO_CONVERSION
#include "unicode/ucnv.h"
#include "unicode/ucnv_err.h"
#include "unicode/uset.h"
@ -576,3 +579,5 @@ const UConverterSharedData _ASCIIData={
NULL, NULL, &_ASCIIStaticData, FALSE, &_ASCIIImpl,
0
};
#endif

View File

@ -1,7 +1,7 @@
/*
******************************************************************************
*
* Copyright (C) 2000-2003, International Business Machines
* Copyright (C) 2000-2004, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
@ -42,7 +42,7 @@
#include "unicode/utypes.h"
#if !UCONFIG_NO_LEGACY_CONVERSION
#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
#include "unicode/ucnv.h"
#include "unicode/ucnv_cb.h"

View File

@ -1,7 +1,7 @@
/*
******************************************************************************
*
* Copyright (C) 2000-2003, International Business Machines
* Copyright (C) 2000-2004, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
@ -18,6 +18,9 @@
#define __UCNVMBCS_H__
#include "unicode/utypes.h"
#if !UCONFIG_NO_CONVERSION
#include "unicode/ucnv.h"
#include "ucnv_cnv.h"
@ -390,3 +393,5 @@ _MBCSGetUnicodeSetForUnicode(const UConverterSharedData *sharedData,
UErrorCode *pErrorCode);
#endif
#endif

View File

@ -20,6 +20,9 @@
*/
#include "unicode/utypes.h"
#if !UCONFIG_NO_CONVERSION
#include "unicode/ucnv.h"
#include "unicode/ucnv_cb.h"
#include "ucnv_bld.h"
@ -2026,3 +2029,5 @@ const UConverterSharedData _SCSUData={
NULL, NULL, &_SCSUStaticData, FALSE, &_SCSUImpl,
0
};
#endif

View File

@ -74,6 +74,8 @@ typedef struct USet USet;
#endif
#if !UCONFIG_NO_CONVERSION
U_CDECL_BEGIN
/** Maximum length of a converter name including the terminating NULL @stable ICU 2.0 */
@ -1819,5 +1821,6 @@ ucnv_detectUnicodeSignature(const char* source,
UErrorCode *pErrorCode);
#endif
/*_UCNV*/
#endif
/*_UCNV*/

View File

@ -62,6 +62,9 @@
#define UCNV_CB_H
#include "unicode/utypes.h"
#if !UCONFIG_NO_CONVERSION
#include "unicode/ucnv.h"
#include "unicode/ucnv_err.h"
@ -155,3 +158,5 @@ U_STABLE void U_EXPORT2 ucnv_cbToUWriteSub (UConverterToUnicodeArgs *args,
int32_t offsetIndex,
UErrorCode * err);
#endif
#endif

View File

@ -91,6 +91,7 @@
#include "unicode/utypes.h"
#if !UCONFIG_NO_CONVERSION
/**
* FROM_U, TO_U context options for sub callback
@ -450,4 +451,6 @@ U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_ESCAPE (
#endif
#endif
/*UCNV_ERR_H*/

View File

@ -58,6 +58,20 @@
/* common library switches -------------------------------------------------- */
/**
* \def UCONFIG_NO_CONVERSION
* This switch turns off all converters.
*
* @draft ICU 3.2
*/
#ifndef UCONFIG_NO_CONVERSION
# define UCONFIG_NO_CONVERSION 0
#endif
#if UCONFIG_NO_CONVERSION
# define UCONFIG_NO_LEGACY_CONVERSION 1
#endif
/**
* \def UCONFIG_NO_LEGACY_CONVERSION
* This switch turns off all converters except for

View File

@ -463,8 +463,8 @@ public:
* @return a reference to this
* @stable ICU 2.0
*/
virtual UnicodeSet& applyPattern(const UnicodeString& pattern,
UErrorCode& status);
UnicodeSet& applyPattern(const UnicodeString& pattern,
UErrorCode& status);
/**
* Modifies this set to represent the set specified by the given
@ -534,8 +534,8 @@ public:
* U+000A, U+0020..U+007E.
* @stable ICU 2.0
*/
virtual UnicodeString& toPattern(UnicodeString& result,
UBool escapeUnprintable = FALSE) const;
UnicodeString& toPattern(UnicodeString& result,
UBool escapeUnprintable = FALSE) const;
/**
* Modifies this set to contain those code points which have the given value

View File

@ -670,6 +670,8 @@ u_strncpy(UChar *dst,
const UChar *src,
int32_t n);
#if !UCONFIG_NO_CONVERSION
/**
* Copy a byte string encoded in the default codepage to a ustring.
* Adds a null terminator.
@ -728,6 +730,8 @@ U_STABLE char* U_EXPORT2 u_austrncpy(char *dst,
const UChar *src,
int32_t n );
#endif
/**
* Synonym for memcpy(), but with UChars only.
* @param dest The destination string

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -25,9 +25,6 @@
#include "cmemory.h"
#include "unicode/ustring.h"
#include "unicode/unistr.h"
#include "unicode/uchar.h"
#include "unicode/ucnv.h"
#include "unicode/ubrk.h"
#include "uhash.h"
#include "ustr_imp.h"
#include "unormimp.h"
@ -296,66 +293,22 @@ UnicodeString::UnicodeString(UChar *buff,
}
}
UnicodeString::UnicodeString(const char *codepageData,
const char *codepage)
UnicodeString::UnicodeString(const char *src, int32_t length, EInvariant)
: fLength(0),
fCapacity(US_STACKBUF_SIZE),
fArray(fStackBuffer),
fFlags(kShortString)
{
if(codepageData != 0) {
doCodepageCreate(codepageData, (int32_t)uprv_strlen(codepageData), codepage);
}
}
UnicodeString::UnicodeString(const char *codepageData,
int32_t dataLength,
const char *codepage)
: fLength(0),
fCapacity(US_STACKBUF_SIZE),
fArray(fStackBuffer),
fFlags(kShortString)
{
if(codepageData != 0) {
doCodepageCreate(codepageData, dataLength, codepage);
}
}
UnicodeString::UnicodeString(const char *src, int32_t srcLength,
UConverter *cnv,
UErrorCode &errorCode)
: fLength(0),
fCapacity(US_STACKBUF_SIZE),
fArray(fStackBuffer),
fFlags(kShortString)
{
if(U_SUCCESS(errorCode)) {
// check arguments
if(src==NULL) {
// treat as an empty string, do nothing more
} else if(srcLength<-1) {
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
} else {
// get input length
if(srcLength==-1) {
srcLength=(int32_t)uprv_strlen(src);
}
if(srcLength>0) {
if(cnv!=0) {
// use the provided converter
ucnv_resetToUnicode(cnv);
doCodepageCreate(src, srcLength, cnv, errorCode);
} else {
// use the default converter
cnv=u_getDefaultConverter(&errorCode);
doCodepageCreate(src, srcLength, cnv, errorCode);
u_releaseDefaultConverter(cnv);
}
}
if(src==NULL) {
// treat as an empty string
} else {
if(length<0) {
length=uprv_strlen(src);
}
if(U_FAILURE(errorCode)) {
if(cloneArrayIfNeeded(length, length, FALSE)) {
u_charsToUChars(src, getArrayStart(), length);
fLength = length;
} else {
setToBogus();
}
}
@ -673,52 +626,6 @@ UnicodeString::doCompareCodePointOrder(int32_t start,
}
}
int8_t
UnicodeString::doCaseCompare(int32_t start,
int32_t length,
const UChar *srcChars,
int32_t srcStart,
int32_t srcLength,
uint32_t options) const
{
// compare illegal string values
// treat const UChar *srcChars==NULL as an empty string
if(isBogus()) {
return -1;
}
// pin indices to legal values
pinIndices(start, length);
if(srcChars == NULL) {
srcStart = srcLength = 0;
}
// get the correct pointer
const UChar *chars = getArrayStart();
chars += start;
srcChars += srcStart;
if(chars != srcChars) {
UErrorCode errorCode=U_ZERO_ERROR;
int32_t result=unorm_cmpEquivFold(chars, length, srcChars, srcLength,
options|U_COMPARE_IGNORE_CASE, &errorCode);
if(result!=0) {
return (int8_t)(result >> 24 | 1);
}
} else {
// get the srcLength if necessary
if(srcLength < 0) {
srcLength = u_strlen(srcChars + srcStart);
}
if(length != srcLength) {
return (int8_t)((length - srcLength) >> 24 | 1);
}
}
return 0;
}
int32_t
UnicodeString::getLength() const {
return length();
@ -798,6 +705,37 @@ UnicodeString::extract(UChar *dest, int32_t destCapacity,
return fLength;
}
int32_t
UnicodeString::extract(int32_t start,
int32_t length,
char *target,
int32_t targetCapacity,
enum EInvariant) const
{
// if the arguments are illegal, then do nothing
if(targetCapacity < 0 || (targetCapacity > 0 && target == NULL)) {
return 0;
}
// pin the indices to legal values
pinIndices(start, length);
if(length <= targetCapacity) {
u_UCharsToChars(getArrayStart() + start, target, length);
}
UErrorCode status = U_ZERO_ERROR;
return u_terminateChars(target, targetCapacity, length, &status);
}
void
UnicodeString::extractBetween(int32_t start,
int32_t limit,
UnicodeString& target) const {
pinIndex(start);
pinIndex(limit);
doExtract(start, limit - start, target);
}
int32_t
UnicodeString::indexOf(const UChar *srcChars,
int32_t srcStart,
@ -1090,157 +1028,6 @@ UnicodeString::setCharAt(int32_t offset,
return *this;
}
/*
* Implement argument checking and buffer handling
* for string case mapping as a common function.
*/
enum {
TO_LOWER,
TO_UPPER,
TO_TITLE,
FOLD_CASE
};
UnicodeString &
UnicodeString::toLower() {
return caseMap(0, Locale::getDefault(), 0, TO_LOWER);
}
UnicodeString &
UnicodeString::toLower(const Locale &locale) {
return caseMap(0, locale, 0, TO_LOWER);
}
UnicodeString &
UnicodeString::toUpper() {
return caseMap(0, Locale::getDefault(), 0, TO_UPPER);
}
UnicodeString &
UnicodeString::toUpper(const Locale &locale) {
return caseMap(0, locale, 0, TO_UPPER);
}
#if !UCONFIG_NO_BREAK_ITERATION
UnicodeString &
UnicodeString::toTitle(BreakIterator *titleIter) {
return caseMap(titleIter, Locale::getDefault(), 0, TO_TITLE);
}
UnicodeString &
UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale) {
return caseMap(titleIter, locale, 0, TO_TITLE);
}
#endif
UnicodeString &
UnicodeString::foldCase(uint32_t options) {
return caseMap(0, Locale::getDefault(), options, FOLD_CASE);
}
UnicodeString &
UnicodeString::caseMap(BreakIterator *titleIter,
const Locale& locale,
uint32_t options,
int32_t toWhichCase) {
if(fLength <= 0) {
// nothing to do
return *this;
}
// We need to allocate a new buffer for the internal string case mapping function.
// This is very similar to how doReplace() below keeps the old array pointer
// and deletes the old array itself after it is done.
// In addition, we are forcing cloneArrayIfNeeded() to always allocate a new array.
UChar *oldArray = fArray;
int32_t oldLength = fLength;
int32_t *bufferToDelete = 0;
// Make sure that if the string is in fStackBuffer we do not overwrite it!
int32_t capacity;
if(fLength <= US_STACKBUF_SIZE) {
if(fArray == fStackBuffer) {
capacity = 2 * US_STACKBUF_SIZE; // make sure that cloneArrayIfNeeded() allocates a new buffer
} else {
capacity = US_STACKBUF_SIZE;
}
} else {
capacity = fLength + 20;
}
if(!cloneArrayIfNeeded(capacity, capacity, FALSE, &bufferToDelete, TRUE)) {
return *this;
}
UErrorCode errorCode;
#if !UCONFIG_NO_BREAK_ITERATION
// set up the titlecasing break iterator
UBreakIterator *cTitleIter = 0;
if(toWhichCase == TO_TITLE) {
errorCode = U_ZERO_ERROR;
if(titleIter != 0) {
cTitleIter = (UBreakIterator *)titleIter;
ubrk_setText(cTitleIter, oldArray, oldLength, &errorCode);
} else {
cTitleIter = ubrk_open(UBRK_WORD, locale.getName(),
oldArray, oldLength,
&errorCode);
}
if(U_FAILURE(errorCode)) {
uprv_free(bufferToDelete);
setToBogus();
return *this;
}
}
#endif
// Case-map, and if the result is too long, then reallocate and repeat.
do {
errorCode = U_ZERO_ERROR;
if(toWhichCase==TO_LOWER) {
fLength = u_internalStrToLower(fArray, fCapacity,
oldArray, oldLength,
0, oldLength,
locale.getName(),
&errorCode);
} else if(toWhichCase==TO_UPPER) {
fLength = u_internalStrToUpper(fArray, fCapacity,
oldArray, oldLength,
locale.getName(),
&errorCode);
#if !UCONFIG_NO_BREAK_ITERATION
} else if(toWhichCase==TO_TITLE) {
fLength = u_internalStrToTitle(fArray, fCapacity,
oldArray, oldLength,
cTitleIter, locale.getName(),
&errorCode);
#endif
} else {
fLength = u_internalStrFoldCase(fArray, fCapacity,
oldArray, oldLength,
options,
&errorCode);
}
} while(errorCode==U_BUFFER_OVERFLOW_ERROR && cloneArrayIfNeeded(fLength, fLength, FALSE));
#if !UCONFIG_NO_BREAK_ITERATION
if(cTitleIter != 0 && titleIter == 0) {
ubrk_close(cTitleIter);
}
#endif
if (bufferToDelete) {
uprv_free(bufferToDelete);
}
if(U_FAILURE(errorCode)) {
setToBogus();
}
return *this;
}
UnicodeString&
UnicodeString::doReplace( int32_t start,
int32_t length,
@ -1447,53 +1234,6 @@ UnicodeString::padTrailing(int32_t targetLength,
}
}
UnicodeString&
UnicodeString::trim()
{
if(isBogus()) {
return *this;
}
UChar32 c;
int32_t i = fLength, length;
// first cut off trailing white space
for(;;) {
length = i;
if(i <= 0) {
break;
}
UTF_PREV_CHAR(fArray, 0, i, c);
if(!(c == 0x20 || u_isWhitespace(c))) {
break;
}
}
if(length < fLength) {
fLength = length;
}
// find leading white space
int32_t start;
i = 0;
for(;;) {
start = i;
if(i >= length) {
break;
}
UTF_NEXT_CHAR(fArray, i, length, c);
if(!(c == 0x20 || u_isWhitespace(c))) {
break;
}
}
// move string forward over leading white space
if(start > 0) {
doReplace(0, start, 0, 0, 0);
}
return *this;
}
//========================================
// Hashing
//========================================
@ -1509,278 +1249,6 @@ UnicodeString::doHashCode() const
return hashCode;
}
//========================================
// Codeset conversion
//========================================
int32_t
UnicodeString::extract(int32_t start,
int32_t length,
char *target,
uint32_t dstSize,
const char *codepage) const
{
// if the arguments are illegal, then do nothing
if(/*dstSize < 0 || */(dstSize > 0 && target == 0)) {
return 0;
}
// pin the indices to legal values
pinIndices(start, length);
// create the converter
UConverter *converter;
UErrorCode status = U_ZERO_ERROR;
// just write the NUL if the string length is 0
if(length == 0) {
if(dstSize >= 0x80000000) {
// careful: dstSize is unsigned! (0xffffffff means "unlimited")
// make sure that the NUL-termination works (takes int32_t)
dstSize=0x7fffffff;
}
return u_terminateChars(target, dstSize, 0, &status);
}
// if the codepage is the default, use our cache
// if it is an empty string, then use the "invariant character" conversion
if (codepage == 0) {
converter = u_getDefaultConverter(&status);
} else if (*codepage == 0) {
// use the "invariant characters" conversion
int32_t destLength;
// careful: dstSize is unsigned! (0xffffffff means "unlimited")
if(dstSize >= 0x80000000) {
destLength = length;
// make sure that the NUL-termination works (takes int32_t)
dstSize=0x7fffffff;
} else if(length <= (int32_t)dstSize) {
destLength = length;
} else {
destLength = (int32_t)dstSize;
}
u_UCharsToChars(getArrayStart() + start, target, destLength);
return u_terminateChars(target, (int32_t)dstSize, length, &status);
} else {
converter = ucnv_open(codepage, &status);
}
length = doExtract(start, length, target, (int32_t)dstSize, converter, status);
// close the converter
if (codepage == 0) {
u_releaseDefaultConverter(converter);
} else {
ucnv_close(converter);
}
return length;
}
int32_t
UnicodeString::extract(char *dest, int32_t destCapacity,
UConverter *cnv,
UErrorCode &errorCode) const {
if(U_FAILURE(errorCode)) {
return 0;
}
if(isBogus() || destCapacity<0 || (destCapacity>0 && dest==0)) {
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
// nothing to do?
if(fLength<=0) {
return u_terminateChars(dest, destCapacity, 0, &errorCode);
}
// get the converter
UBool isDefaultConverter;
if(cnv==0) {
isDefaultConverter=TRUE;
cnv=u_getDefaultConverter(&errorCode);
if(U_FAILURE(errorCode)) {
return 0;
}
} else {
isDefaultConverter=FALSE;
ucnv_resetFromUnicode(cnv);
}
// convert
int32_t length=doExtract(0, fLength, dest, destCapacity, cnv, errorCode);
// release the converter
if(isDefaultConverter) {
u_releaseDefaultConverter(cnv);
}
return length;
}
void
UnicodeString::extractBetween(int32_t start,
int32_t limit,
UnicodeString& target) const {
pinIndex(start);
pinIndex(limit);
doExtract(start, limit - start, target);
}
int32_t
UnicodeString::doExtract(int32_t start, int32_t length,
char *dest, int32_t destCapacity,
UConverter *cnv,
UErrorCode &errorCode) const {
if(U_FAILURE(errorCode)) {
if(destCapacity!=0) {
*dest=0;
}
return 0;
}
const UChar *src=fArray+start, *srcLimit=src+length;
char *originalDest=dest;
const char *destLimit;
if(destCapacity==0) {
destLimit=dest=0;
} else if(destCapacity==-1) {
// Pin the limit to U_MAX_PTR if the "magic" destCapacity is used.
destLimit=(char*)U_MAX_PTR(dest);
// for NUL-termination, translate into highest int32_t
destCapacity=0x7fffffff;
} else {
destLimit=dest+destCapacity;
}
// perform the conversion
ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, &errorCode);
length=(int32_t)(dest-originalDest);
// if an overflow occurs, then get the preflighting length
if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
char buffer[1024];
destLimit=buffer+sizeof(buffer);
do {
dest=buffer;
errorCode=U_ZERO_ERROR;
ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, &errorCode);
length+=(int32_t)(dest-buffer);
} while(errorCode==U_BUFFER_OVERFLOW_ERROR);
}
return u_terminateChars(originalDest, destCapacity, length, &errorCode);
}
void
UnicodeString::doCodepageCreate(const char *codepageData,
int32_t dataLength,
const char *codepage)
{
// if there's nothing to convert, do nothing
if(codepageData == 0 || dataLength == 0 || dataLength < -1) {
return;
}
if(dataLength == -1) {
dataLength = uprv_strlen(codepageData);
}
UErrorCode status = U_ZERO_ERROR;
// create the converter
// if the codepage is the default, use our cache
// if it is an empty string, then use the "invariant character" conversion
UConverter *converter = (codepage == 0 ?
u_getDefaultConverter(&status) :
*codepage == 0 ?
0 :
ucnv_open(codepage, &status));
// if we failed, set the appropriate flags and return
if(U_FAILURE(status)) {
setToBogus();
return;
}
// perform the conversion
if(converter == 0) {
// use the "invariant characters" conversion
if(cloneArrayIfNeeded(dataLength, dataLength, FALSE)) {
u_charsToUChars(codepageData, getArrayStart(), dataLength);
fLength = dataLength;
} else {
setToBogus();
}
return;
}
// convert using the real converter
doCodepageCreate(codepageData, dataLength, converter, status);
if(U_FAILURE(status)) {
setToBogus();
}
// close the converter
if(codepage == 0) {
u_releaseDefaultConverter(converter);
} else {
ucnv_close(converter);
}
}
void
UnicodeString::doCodepageCreate(const char *codepageData,
int32_t dataLength,
UConverter *converter,
UErrorCode &status) {
if(U_FAILURE(status)) {
return;
}
// set up the conversion parameters
const char *mySource = codepageData;
const char *mySourceEnd = mySource + dataLength;
UChar *myTarget;
// estimate the size needed:
// 1.25 UChar's per source byte should cover most cases
int32_t arraySize = dataLength + (dataLength >> 2);
// we do not care about the current contents
UBool doCopyArray = FALSE;
for(;;) {
if(!cloneArrayIfNeeded(arraySize, arraySize, doCopyArray)) {
setToBogus();
break;
}
// perform the conversion
myTarget = fArray + fLength;
ucnv_toUnicode(converter, &myTarget, fArray + fCapacity,
&mySource, mySourceEnd, 0, TRUE, &status);
// update the conversion parameters
fLength = (int32_t)(myTarget - fArray);
// allocate more space and copy data, if needed
if(status == U_BUFFER_OVERFLOW_ERROR) {
// reset the error code
status = U_ZERO_ERROR;
// keep the previous conversion results
doCopyArray = TRUE;
// estimate the new size needed, larger than before
// try 2 UChar's per remaining source byte
arraySize = (int32_t)(fLength + 2 * (mySourceEnd - mySource));
} else {
break;
}
}
}
//========================================
// External Buffer
//========================================

View File

@ -0,0 +1,238 @@
/*
*******************************************************************************
*
* Copyright (C) 1999-2004, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: unistr_case.cpp
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:2
*
* created on: 2004aug19
* created by: Markus W. Scherer
*
* Case-mapping functions moved here from unistr.cpp
*/
#include "unicode/utypes.h"
#include "unicode/putil.h"
#include "unicode/locid.h"
#include "cstring.h"
#include "cmemory.h"
#include "unicode/ustring.h"
#include "unicode/unistr.h"
#include "unicode/uchar.h"
#include "unicode/ubrk.h"
#include "ustr_imp.h"
#include "unormimp.h"
U_NAMESPACE_BEGIN
//========================================
// Read-only implementation
//========================================
int8_t
UnicodeString::doCaseCompare(int32_t start,
int32_t length,
const UChar *srcChars,
int32_t srcStart,
int32_t srcLength,
uint32_t options) const
{
// compare illegal string values
// treat const UChar *srcChars==NULL as an empty string
if(isBogus()) {
return -1;
}
// pin indices to legal values
pinIndices(start, length);
if(srcChars == NULL) {
srcStart = srcLength = 0;
}
// get the correct pointer
const UChar *chars = getArrayStart();
chars += start;
srcChars += srcStart;
if(chars != srcChars) {
UErrorCode errorCode=U_ZERO_ERROR;
int32_t result=unorm_cmpEquivFold(chars, length, srcChars, srcLength,
options|U_COMPARE_IGNORE_CASE, &errorCode);
if(result!=0) {
return (int8_t)(result >> 24 | 1);
}
} else {
// get the srcLength if necessary
if(srcLength < 0) {
srcLength = u_strlen(srcChars + srcStart);
}
if(length != srcLength) {
return (int8_t)((length - srcLength) >> 24 | 1);
}
}
return 0;
}
//========================================
// Write implementation
//========================================
/*
* Implement argument checking and buffer handling
* for string case mapping as a common function.
*/
enum {
TO_LOWER,
TO_UPPER,
TO_TITLE,
FOLD_CASE
};
UnicodeString &
UnicodeString::toLower() {
return caseMap(0, Locale::getDefault(), 0, TO_LOWER);
}
UnicodeString &
UnicodeString::toLower(const Locale &locale) {
return caseMap(0, locale, 0, TO_LOWER);
}
UnicodeString &
UnicodeString::toUpper() {
return caseMap(0, Locale::getDefault(), 0, TO_UPPER);
}
UnicodeString &
UnicodeString::toUpper(const Locale &locale) {
return caseMap(0, locale, 0, TO_UPPER);
}
#if !UCONFIG_NO_BREAK_ITERATION
UnicodeString &
UnicodeString::toTitle(BreakIterator *titleIter) {
return caseMap(titleIter, Locale::getDefault(), 0, TO_TITLE);
}
UnicodeString &
UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale) {
return caseMap(titleIter, locale, 0, TO_TITLE);
}
#endif
UnicodeString &
UnicodeString::foldCase(uint32_t options) {
return caseMap(0, Locale::getDefault(), options, FOLD_CASE);
}
UnicodeString &
UnicodeString::caseMap(BreakIterator *titleIter,
const Locale& locale,
uint32_t options,
int32_t toWhichCase) {
if(fLength <= 0) {
// nothing to do
return *this;
}
// We need to allocate a new buffer for the internal string case mapping function.
// This is very similar to how doReplace() below keeps the old array pointer
// and deletes the old array itself after it is done.
// In addition, we are forcing cloneArrayIfNeeded() to always allocate a new array.
UChar *oldArray = fArray;
int32_t oldLength = fLength;
int32_t *bufferToDelete = 0;
// Make sure that if the string is in fStackBuffer we do not overwrite it!
int32_t capacity;
if(fLength <= US_STACKBUF_SIZE) {
if(fArray == fStackBuffer) {
capacity = 2 * US_STACKBUF_SIZE; // make sure that cloneArrayIfNeeded() allocates a new buffer
} else {
capacity = US_STACKBUF_SIZE;
}
} else {
capacity = fLength + 20;
}
if(!cloneArrayIfNeeded(capacity, capacity, FALSE, &bufferToDelete, TRUE)) {
return *this;
}
UErrorCode errorCode;
#if !UCONFIG_NO_BREAK_ITERATION
// set up the titlecasing break iterator
UBreakIterator *cTitleIter = 0;
if(toWhichCase == TO_TITLE) {
errorCode = U_ZERO_ERROR;
if(titleIter != 0) {
cTitleIter = (UBreakIterator *)titleIter;
ubrk_setText(cTitleIter, oldArray, oldLength, &errorCode);
} else {
cTitleIter = ubrk_open(UBRK_WORD, locale.getName(),
oldArray, oldLength,
&errorCode);
}
if(U_FAILURE(errorCode)) {
uprv_free(bufferToDelete);
setToBogus();
return *this;
}
}
#endif
// Case-map, and if the result is too long, then reallocate and repeat.
do {
errorCode = U_ZERO_ERROR;
if(toWhichCase==TO_LOWER) {
fLength = u_internalStrToLower(fArray, fCapacity,
oldArray, oldLength,
0, oldLength,
locale.getName(),
&errorCode);
} else if(toWhichCase==TO_UPPER) {
fLength = u_internalStrToUpper(fArray, fCapacity,
oldArray, oldLength,
locale.getName(),
&errorCode);
#if !UCONFIG_NO_BREAK_ITERATION
} else if(toWhichCase==TO_TITLE) {
fLength = u_internalStrToTitle(fArray, fCapacity,
oldArray, oldLength,
cTitleIter, locale.getName(),
&errorCode);
#endif
} else {
fLength = u_internalStrFoldCase(fArray, fCapacity,
oldArray, oldLength,
options,
&errorCode);
}
} while(errorCode==U_BUFFER_OVERFLOW_ERROR && cloneArrayIfNeeded(fLength, fLength, FALSE));
#if !UCONFIG_NO_BREAK_ITERATION
if(cTitleIter != 0 && titleIter == 0) {
ubrk_close(cTitleIter);
}
#endif
if (bufferToDelete) {
uprv_free(bufferToDelete);
}
if(U_FAILURE(errorCode)) {
setToBogus();
}
return *this;
}
U_NAMESPACE_END

View File

@ -0,0 +1,367 @@
/*
*******************************************************************************
*
* Copyright (C) 1999-2004, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: unistr_cnv.cpp
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:2
*
* created on: 2004aug19
* created by: Markus W. Scherer
*
* Character conversion functions moved here from unistr.cpp
*/
#include "unicode/utypes.h"
#if !UCONFIG_NO_CONVERSION
#include "unicode/putil.h"
#include "cstring.h"
#include "cmemory.h"
#include "unicode/ustring.h"
#include "unicode/unistr.h"
#include "unicode/ucnv.h"
#include "ustr_imp.h"
U_NAMESPACE_BEGIN
//========================================
// Constructors
//========================================
UnicodeString::UnicodeString(const char *codepageData,
const char *codepage)
: fLength(0),
fCapacity(US_STACKBUF_SIZE),
fArray(fStackBuffer),
fFlags(kShortString)
{
if(codepageData != 0) {
doCodepageCreate(codepageData, (int32_t)uprv_strlen(codepageData), codepage);
}
}
UnicodeString::UnicodeString(const char *codepageData,
int32_t dataLength,
const char *codepage)
: fLength(0),
fCapacity(US_STACKBUF_SIZE),
fArray(fStackBuffer),
fFlags(kShortString)
{
if(codepageData != 0) {
doCodepageCreate(codepageData, dataLength, codepage);
}
}
UnicodeString::UnicodeString(const char *src, int32_t srcLength,
UConverter *cnv,
UErrorCode &errorCode)
: fLength(0),
fCapacity(US_STACKBUF_SIZE),
fArray(fStackBuffer),
fFlags(kShortString)
{
if(U_SUCCESS(errorCode)) {
// check arguments
if(src==NULL) {
// treat as an empty string, do nothing more
} else if(srcLength<-1) {
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
} else {
// get input length
if(srcLength==-1) {
srcLength=(int32_t)uprv_strlen(src);
}
if(srcLength>0) {
if(cnv!=0) {
// use the provided converter
ucnv_resetToUnicode(cnv);
doCodepageCreate(src, srcLength, cnv, errorCode);
} else {
// use the default converter
cnv=u_getDefaultConverter(&errorCode);
doCodepageCreate(src, srcLength, cnv, errorCode);
u_releaseDefaultConverter(cnv);
}
}
}
if(U_FAILURE(errorCode)) {
setToBogus();
}
}
}
//========================================
// Codeset conversion
//========================================
int32_t
UnicodeString::extract(int32_t start,
int32_t length,
char *target,
uint32_t dstSize,
const char *codepage) const
{
// if the arguments are illegal, then do nothing
if(/*dstSize < 0 || */(dstSize > 0 && target == 0)) {
return 0;
}
// pin the indices to legal values
pinIndices(start, length);
// create the converter
UConverter *converter;
UErrorCode status = U_ZERO_ERROR;
// just write the NUL if the string length is 0
if(length == 0) {
if(dstSize >= 0x80000000) {
// careful: dstSize is unsigned! (0xffffffff means "unlimited")
// make sure that the NUL-termination works (takes int32_t)
dstSize=0x7fffffff;
}
return u_terminateChars(target, dstSize, 0, &status);
}
// if the codepage is the default, use our cache
// if it is an empty string, then use the "invariant character" conversion
if (codepage == 0) {
converter = u_getDefaultConverter(&status);
} else if (*codepage == 0) {
// use the "invariant characters" conversion
int32_t destLength;
// careful: dstSize is unsigned! (0xffffffff means "unlimited")
if(dstSize >= 0x80000000) {
destLength = length;
// make sure that the NUL-termination works (takes int32_t)
dstSize=0x7fffffff;
} else if(length <= (int32_t)dstSize) {
destLength = length;
} else {
destLength = (int32_t)dstSize;
}
u_UCharsToChars(getArrayStart() + start, target, destLength);
return u_terminateChars(target, (int32_t)dstSize, length, &status);
} else {
converter = ucnv_open(codepage, &status);
}
length = doExtract(start, length, target, (int32_t)dstSize, converter, status);
// close the converter
if (codepage == 0) {
u_releaseDefaultConverter(converter);
} else {
ucnv_close(converter);
}
return length;
}
int32_t
UnicodeString::extract(char *dest, int32_t destCapacity,
UConverter *cnv,
UErrorCode &errorCode) const {
if(U_FAILURE(errorCode)) {
return 0;
}
if(isBogus() || destCapacity<0 || (destCapacity>0 && dest==0)) {
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
// nothing to do?
if(fLength<=0) {
return u_terminateChars(dest, destCapacity, 0, &errorCode);
}
// get the converter
UBool isDefaultConverter;
if(cnv==0) {
isDefaultConverter=TRUE;
cnv=u_getDefaultConverter(&errorCode);
if(U_FAILURE(errorCode)) {
return 0;
}
} else {
isDefaultConverter=FALSE;
ucnv_resetFromUnicode(cnv);
}
// convert
int32_t length=doExtract(0, fLength, dest, destCapacity, cnv, errorCode);
// release the converter
if(isDefaultConverter) {
u_releaseDefaultConverter(cnv);
}
return length;
}
int32_t
UnicodeString::doExtract(int32_t start, int32_t length,
char *dest, int32_t destCapacity,
UConverter *cnv,
UErrorCode &errorCode) const {
if(U_FAILURE(errorCode)) {
if(destCapacity!=0) {
*dest=0;
}
return 0;
}
const UChar *src=fArray+start, *srcLimit=src+length;
char *originalDest=dest;
const char *destLimit;
if(destCapacity==0) {
destLimit=dest=0;
} else if(destCapacity==-1) {
// Pin the limit to U_MAX_PTR if the "magic" destCapacity is used.
destLimit=(char*)U_MAX_PTR(dest);
// for NUL-termination, translate into highest int32_t
destCapacity=0x7fffffff;
} else {
destLimit=dest+destCapacity;
}
// perform the conversion
ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, &errorCode);
length=(int32_t)(dest-originalDest);
// if an overflow occurs, then get the preflighting length
if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
char buffer[1024];
destLimit=buffer+sizeof(buffer);
do {
dest=buffer;
errorCode=U_ZERO_ERROR;
ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, &errorCode);
length+=(int32_t)(dest-buffer);
} while(errorCode==U_BUFFER_OVERFLOW_ERROR);
}
return u_terminateChars(originalDest, destCapacity, length, &errorCode);
}
void
UnicodeString::doCodepageCreate(const char *codepageData,
int32_t dataLength,
const char *codepage)
{
// if there's nothing to convert, do nothing
if(codepageData == 0 || dataLength == 0 || dataLength < -1) {
return;
}
if(dataLength == -1) {
dataLength = uprv_strlen(codepageData);
}
UErrorCode status = U_ZERO_ERROR;
// create the converter
// if the codepage is the default, use our cache
// if it is an empty string, then use the "invariant character" conversion
UConverter *converter = (codepage == 0 ?
u_getDefaultConverter(&status) :
*codepage == 0 ?
0 :
ucnv_open(codepage, &status));
// if we failed, set the appropriate flags and return
if(U_FAILURE(status)) {
setToBogus();
return;
}
// perform the conversion
if(converter == 0) {
// use the "invariant characters" conversion
if(cloneArrayIfNeeded(dataLength, dataLength, FALSE)) {
u_charsToUChars(codepageData, getArrayStart(), dataLength);
fLength = dataLength;
} else {
setToBogus();
}
return;
}
// convert using the real converter
doCodepageCreate(codepageData, dataLength, converter, status);
if(U_FAILURE(status)) {
setToBogus();
}
// close the converter
if(codepage == 0) {
u_releaseDefaultConverter(converter);
} else {
ucnv_close(converter);
}
}
void
UnicodeString::doCodepageCreate(const char *codepageData,
int32_t dataLength,
UConverter *converter,
UErrorCode &status) {
if(U_FAILURE(status)) {
return;
}
// set up the conversion parameters
const char *mySource = codepageData;
const char *mySourceEnd = mySource + dataLength;
UChar *myTarget;
// estimate the size needed:
// 1.25 UChar's per source byte should cover most cases
int32_t arraySize = dataLength + (dataLength >> 2);
// we do not care about the current contents
UBool doCopyArray = FALSE;
for(;;) {
if(!cloneArrayIfNeeded(arraySize, arraySize, doCopyArray)) {
setToBogus();
break;
}
// perform the conversion
myTarget = fArray + fLength;
ucnv_toUnicode(converter, &myTarget, fArray + fCapacity,
&mySource, mySourceEnd, 0, TRUE, &status);
// update the conversion parameters
fLength = (int32_t)(myTarget - fArray);
// allocate more space and copy data, if needed
if(status == U_BUFFER_OVERFLOW_ERROR) {
// reset the error code
status = U_ZERO_ERROR;
// keep the previous conversion results
doCopyArray = TRUE;
// estimate the new size needed, larger than before
// try 2 UChar's per remaining source byte
arraySize = (int32_t)(fLength + 2 * (mySourceEnd - mySource));
} else {
break;
}
}
}
U_NAMESPACE_END
#endif

View File

@ -0,0 +1,68 @@
/*
*******************************************************************************
*
* Copyright (C) 1999-2004, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: unistr_props.cpp
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:2
*
* created on: 2004aug25
* created by: Markus W. Scherer
*
* Character property dependent functions moved here from unistr.cpp
*/
#include "unicode/utypes.h"
#include "unicode/uchar.h"
#include "unicode/unistr.h"
UnicodeString&
UnicodeString::trim()
{
if(isBogus()) {
return *this;
}
UChar32 c;
int32_t i = fLength, length;
// first cut off trailing white space
for(;;) {
length = i;
if(i <= 0) {
break;
}
UTF_PREV_CHAR(fArray, 0, i, c);
if(!(c == 0x20 || u_isWhitespace(c))) {
break;
}
}
if(length < fLength) {
fLength = length;
}
// find leading white space
int32_t start;
i = 0;
for(;;) {
start = i;
if(i >= length) {
break;
}
UTF_NEXT_CHAR(fArray, i, length, c);
if(!(c == 0x20 || u_isWhitespace(c))) {
break;
}
}
// move string forward over leading white space
if(start > 0) {
doReplace(0, start, 0, 0, 0);
}
return *this;
}

View File

@ -0,0 +1,76 @@
/*
*******************************************************************************
*
* Copyright (C) 1997-2004, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: ures_cnv.c
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2004aug25
* created by: Markus W. Scherer
*
* Character conversion functions moved here from uresbund.c
*/
#include "unicode/utypes.h"
#include "unicode/ustring.h"
#include "unicode/ucnv.h"
#include "unicode/ures.h"
#include "ustr_imp.h"
U_CAPI UResourceBundle * U_EXPORT2
ures_openU(const UChar *myPath,
const char *localeID,
UErrorCode *status) {
char path[2048];
int32_t length;
if(status==NULL || U_FAILURE(*status)) {
return NULL;
}
if(myPath==NULL) {
*status=U_ILLEGAL_ARGUMENT_ERROR;
return NULL;
}
length=u_strlen(myPath);
if(length>=sizeof(path)) {
*status=U_ILLEGAL_ARGUMENT_ERROR;
return NULL;
} else if(uprv_isInvariantUString(myPath, length)) {
/*
* the invariant converter is sufficient for package and tree names
* and is more efficient
*/
u_UCharsToChars(myPath, path, length+1); /* length+1 to include the NUL */
} else {
#if !UCONFIG_NO_CONVERSION
/* use the default converter to support variant-character paths */
UConverter *cnv=u_getDefaultConverter(status);
if(U_FAILURE(*status)) {
return NULL;
}
length=ucnv_fromUChars(cnv, path, (int32_t)sizeof(path), myPath, length, status);
u_releaseDefaultConverter(cnv);
if(U_FAILURE(*status)) {
return NULL;
}
if(length>=sizeof(path)) {
/* not NUL-terminated - path too long */
*status=U_ILLEGAL_ARGUMENT_ERROR;
return NULL;
}
#else
/* the default converter is not available */
*status=U_UNSUPPORTED_ERROR;
return NULL;
#endif
}
return ures_open(path, localeID, status);
}

View File

@ -1694,41 +1694,6 @@ ures_open(const char* path,
return r;
}
U_CAPI UResourceBundle* U_EXPORT2 ures_openU(const UChar* myPath,
const char* localeID,
UErrorCode* status)
{
char path[2048];
UConverter *cnv;
int32_t length;
if(status==NULL || U_FAILURE(*status)) {
return NULL;
}
if(myPath==NULL) {
*status=U_ILLEGAL_ARGUMENT_ERROR;
return NULL;
}
cnv=u_getDefaultConverter(status);
if(U_FAILURE(*status)) {
return NULL;
}
length=ucnv_fromUChars(cnv, path, sizeof(path), myPath, -1, status);
u_releaseDefaultConverter(cnv);
if(U_FAILURE(*status)) {
return NULL;
}
if(length>=sizeof(path)) {
/* not NUL-terminated - path too long */
*status=U_ILLEGAL_ARGUMENT_ERROR;
return NULL;
}
return ures_open(path, localeID, status);
}
/**
* Opens a resource bundle without "canonicalizing" the locale name. No fallback will be performed
* or sought. However, alias substitution will happen!

View File

@ -0,0 +1,230 @@
/*
*******************************************************************************
*
* Copyright (C) 1998-2004, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: ustr_cnv.c
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2004aug24
* created by: Markus W. Scherer
*
* Character conversion functions moved here from ustring.c
*/
#include "unicode/utypes.h"
#if !UCONFIG_NO_CONVERSION
#include "unicode/ustring.h"
#include "unicode/ucnv.h"
#include "cstring.h"
#include "cmemory.h"
#include "umutex.h"
#include "ustr_imp.h"
/* mutexed access to a shared default converter ----------------------------- */
static UConverter *gDefaultConverter = NULL;
U_CAPI UConverter* U_EXPORT2
u_getDefaultConverter(UErrorCode *status)
{
UConverter *converter = NULL;
if (gDefaultConverter != NULL) {
umtx_lock(NULL);
/* need to check to make sure it wasn't taken out from under us */
if (gDefaultConverter != NULL) {
converter = gDefaultConverter;
gDefaultConverter = NULL;
}
umtx_unlock(NULL);
}
/* if the cache was empty, create a converter */
if(converter == NULL) {
converter = ucnv_open(NULL, status);
if(U_FAILURE(*status)) {
ucnv_close(converter);
converter = NULL;
}
}
return converter;
}
U_CAPI void U_EXPORT2
u_releaseDefaultConverter(UConverter *converter)
{
if(gDefaultConverter == NULL) {
if (converter != NULL) {
ucnv_reset(converter);
}
umtx_lock(NULL);
if(gDefaultConverter == NULL) {
gDefaultConverter = converter;
converter = NULL;
}
umtx_unlock(NULL);
}
if(converter != NULL) {
ucnv_close(converter);
}
}
/* conversions between char* and UChar* ------------------------------------- */
/* maximum string length for u_uastrcpy() and u_austrcpy() implementations */
#define MAX_STRLEN 0x0FFFFFFF
/*
returns the minimum of (the length of the null-terminated string) and n.
*/
static int32_t u_astrnlen(const char *s1, int32_t n)
{
int32_t len = 0;
if (s1)
{
while (n-- && *(s1++))
{
len++;
}
}
return len;
}
U_CAPI UChar* U_EXPORT2
u_uastrncpy(UChar *ucs1,
const char *s2,
int32_t n)
{
UChar *target = ucs1;
UErrorCode err = U_ZERO_ERROR;
UConverter *cnv = u_getDefaultConverter(&err);
if(U_SUCCESS(err) && cnv != NULL) {
ucnv_reset(cnv);
ucnv_toUnicode(cnv,
&target,
ucs1+n,
&s2,
s2+u_astrnlen(s2, n),
NULL,
TRUE,
&err);
ucnv_reset(cnv); /* be good citizens */
u_releaseDefaultConverter(cnv);
if(U_FAILURE(err) && (err != U_BUFFER_OVERFLOW_ERROR) ) {
*ucs1 = 0; /* failure */
}
if(target < (ucs1+n)) { /* U_BUFFER_OVERFLOW_ERROR isn't an err, just means no termination will happen. */
*target = 0; /* terminate */
}
} else {
*ucs1 = 0;
}
return ucs1;
}
U_CAPI UChar* U_EXPORT2
u_uastrcpy(UChar *ucs1,
const char *s2 )
{
UErrorCode err = U_ZERO_ERROR;
UConverter *cnv = u_getDefaultConverter(&err);
if(U_SUCCESS(err) && cnv != NULL) {
ucnv_toUChars(cnv,
ucs1,
MAX_STRLEN,
s2,
uprv_strlen(s2),
&err);
u_releaseDefaultConverter(cnv);
if(U_FAILURE(err)) {
*ucs1 = 0;
}
} else {
*ucs1 = 0;
}
return ucs1;
}
/*
returns the minimum of (the length of the null-terminated string) and n.
*/
static int32_t u_ustrnlen(const UChar *ucs1, int32_t n)
{
int32_t len = 0;
if (ucs1)
{
while (n-- && *(ucs1++))
{
len++;
}
}
return len;
}
U_CAPI char* U_EXPORT2
u_austrncpy(char *s1,
const UChar *ucs2,
int32_t n)
{
char *target = s1;
UErrorCode err = U_ZERO_ERROR;
UConverter *cnv = u_getDefaultConverter(&err);
if(U_SUCCESS(err) && cnv != NULL) {
ucnv_reset(cnv);
ucnv_fromUnicode(cnv,
&target,
s1+n,
&ucs2,
ucs2+u_ustrnlen(ucs2, n),
NULL,
TRUE,
&err);
ucnv_reset(cnv); /* be good citizens */
u_releaseDefaultConverter(cnv);
if(U_FAILURE(err) && (err != U_BUFFER_OVERFLOW_ERROR) ) {
*s1 = 0; /* failure */
}
if(target < (s1+n)) { /* U_BUFFER_OVERFLOW_ERROR isn't an err, just means no termination will happen. */
*target = 0; /* terminate */
}
} else {
*s1 = 0;
}
return s1;
}
U_CAPI char* U_EXPORT2
u_austrcpy(char *s1,
const UChar *ucs2 )
{
UErrorCode err = U_ZERO_ERROR;
UConverter *cnv = u_getDefaultConverter(&err);
if(U_SUCCESS(err) && cnv != NULL) {
int32_t len = ucnv_fromUChars(cnv,
s1,
MAX_STRLEN,
ucs2,
-1,
&err);
u_releaseDefaultConverter(cnv);
s1[len] = 0;
} else {
*s1 = 0;
}
return s1;
}
#endif

View File

@ -197,6 +197,8 @@ u_internalFoldCase(UChar32 c,
UChar *dest, int32_t destCapacity,
uint32_t options);
#if !UCONFIG_NO_CONVERSION
/**
* Get the default converter. This is a commonly used converter
* that is used for the ustring and UnicodeString API.
@ -214,6 +216,8 @@ u_getDefaultConverter(UErrorCode *status);
U_CAPI void U_EXPORT2
u_releaseDefaultConverter(UConverter *converter);
#endif
/**
* NUL-terminate a UChar * string if possible.
* If length < destCapacity then NUL-terminate.

View File

@ -44,7 +44,7 @@ StringEnumeration::next(int32_t *resultLength, UErrorCode &status) {
if(resultLength!=NULL) {
*resultLength=unistr.length();
}
unistr.extract(0, INT32_MAX, chars, charsCapacity, "");
unistr.extract(0, INT32_MAX, chars, charsCapacity, US_INV);
return chars;
}
}

View File

@ -19,23 +19,13 @@
#include "unicode/uchar.h"
#include "unicode/uiter.h"
#include "unicode/ustring.h"
#include "unicode/putil.h"
#include "unicode/ucnv.h"
#include "cstring.h"
#include "cwchar.h"
#include "cmemory.h"
#include "umutex.h"
#include "ustr_imp.h"
/* forward declaractions of definitions for the shared default converter */
static UConverter *gDefaultConverter = NULL;
/* ANSI string.h - style functions ------------------------------------------ */
/* maximum string length for u_uastrcpy() and u_austrcpy() implementations */
#define MAX_STRLEN 0x0FFFFFFF
/* U+ffff is the highest BMP code point, the highest one that fits into a 16-bit UChar */
#define U_BMP_MAX 0xffff
@ -1166,201 +1156,6 @@ u_memcmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t count) {
return uprv_strCompare(s1, count, s2, count, FALSE, TRUE);
}
/* mutexed access to a shared default converter ----------------------------- */
U_CAPI UConverter* U_EXPORT2
u_getDefaultConverter(UErrorCode *status)
{
UConverter *converter = NULL;
if (gDefaultConverter != NULL) {
umtx_lock(NULL);
/* need to check to make sure it wasn't taken out from under us */
if (gDefaultConverter != NULL) {
converter = gDefaultConverter;
gDefaultConverter = NULL;
}
umtx_unlock(NULL);
}
/* if the cache was empty, create a converter */
if(converter == NULL) {
converter = ucnv_open(NULL, status);
if(U_FAILURE(*status)) {
ucnv_close(converter);
converter = NULL;
}
}
return converter;
}
U_CAPI void U_EXPORT2
u_releaseDefaultConverter(UConverter *converter)
{
if(gDefaultConverter == NULL) {
if (converter != NULL) {
ucnv_reset(converter);
}
umtx_lock(NULL);
if(gDefaultConverter == NULL) {
gDefaultConverter = converter;
converter = NULL;
}
umtx_unlock(NULL);
}
if(converter != NULL) {
ucnv_close(converter);
}
}
/* conversions between char* and UChar* ------------------------------------- */
/*
returns the minimum of (the length of the null-terminated string) and n.
*/
static int32_t u_astrnlen(const char *s1, int32_t n)
{
int32_t len = 0;
if (s1)
{
while (n-- && *(s1++))
{
len++;
}
}
return len;
}
U_CAPI UChar* U_EXPORT2
u_uastrncpy(UChar *ucs1,
const char *s2,
int32_t n)
{
UChar *target = ucs1;
UErrorCode err = U_ZERO_ERROR;
UConverter *cnv = u_getDefaultConverter(&err);
if(U_SUCCESS(err) && cnv != NULL) {
ucnv_reset(cnv);
ucnv_toUnicode(cnv,
&target,
ucs1+n,
&s2,
s2+u_astrnlen(s2, n),
NULL,
TRUE,
&err);
ucnv_reset(cnv); /* be good citizens */
u_releaseDefaultConverter(cnv);
if(U_FAILURE(err) && (err != U_BUFFER_OVERFLOW_ERROR) ) {
*ucs1 = 0; /* failure */
}
if(target < (ucs1+n)) { /* U_BUFFER_OVERFLOW_ERROR isn't an err, just means no termination will happen. */
*target = 0; /* terminate */
}
} else {
*ucs1 = 0;
}
return ucs1;
}
U_CAPI UChar* U_EXPORT2
u_uastrcpy(UChar *ucs1,
const char *s2 )
{
UErrorCode err = U_ZERO_ERROR;
UConverter *cnv = u_getDefaultConverter(&err);
if(U_SUCCESS(err) && cnv != NULL) {
ucnv_toUChars(cnv,
ucs1,
MAX_STRLEN,
s2,
uprv_strlen(s2),
&err);
u_releaseDefaultConverter(cnv);
if(U_FAILURE(err)) {
*ucs1 = 0;
}
} else {
*ucs1 = 0;
}
return ucs1;
}
/*
returns the minimum of (the length of the null-terminated string) and n.
*/
static int32_t u_ustrnlen(const UChar *ucs1, int32_t n)
{
int32_t len = 0;
if (ucs1)
{
while (n-- && *(ucs1++))
{
len++;
}
}
return len;
}
U_CAPI char* U_EXPORT2
u_austrncpy(char *s1,
const UChar *ucs2,
int32_t n)
{
char *target = s1;
UErrorCode err = U_ZERO_ERROR;
UConverter *cnv = u_getDefaultConverter(&err);
if(U_SUCCESS(err) && cnv != NULL) {
ucnv_reset(cnv);
ucnv_fromUnicode(cnv,
&target,
s1+n,
&ucs2,
ucs2+u_ustrnlen(ucs2, n),
NULL,
TRUE,
&err);
ucnv_reset(cnv); /* be good citizens */
u_releaseDefaultConverter(cnv);
if(U_FAILURE(err) && (err != U_BUFFER_OVERFLOW_ERROR) ) {
*s1 = 0; /* failure */
}
if(target < (s1+n)) { /* U_BUFFER_OVERFLOW_ERROR isn't an err, just means no termination will happen. */
*target = 0; /* terminate */
}
} else {
*s1 = 0;
}
return s1;
}
U_CAPI char* U_EXPORT2
u_austrcpy(char *s1,
const UChar *ucs2 )
{
UErrorCode err = U_ZERO_ERROR;
UConverter *cnv = u_getDefaultConverter(&err);
if(U_SUCCESS(err) && cnv != NULL) {
int32_t len = ucnv_fromUChars(cnv,
s1,
MAX_STRLEN,
ucs2,
-1,
&err);
u_releaseDefaultConverter(cnv);
s1[len] = 0;
} else {
*s1 = 0;
}
return s1;
}
/* u_unescape & support fns ------------------------------------------------- */
/* This map must be in ASCENDING ORDER OF THE ESCAPE CODE */