ICU-13128 optimize UnicodeString::toTitle() input string handling; test u_strToTitle(in-place)

X-SVN-Rev: 40042
This commit is contained in:
Markus Scherer 2017-04-12 22:39:37 +00:00
parent 1991559782
commit 990890985c
3 changed files with 36 additions and 6 deletions

View File

@ -19,6 +19,7 @@
*/
#include "unicode/utypes.h"
#include "unicode/brkiter.h"
#include "unicode/casemap.h"
#include "unicode/edits.h"
#include "unicode/putil.h"
@ -104,6 +105,12 @@ UnicodeString::caseMap(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITER
UBool writable = isBufferWritable();
UErrorCode errorCode = U_ZERO_ERROR;
#if !UCONFIG_NO_BREAK_ITERATION
// Read-only alias to the original string contents for the titlecasing BreakIterator.
// We cannot set the iterator simply to *this because *this is being modified.
UnicodeString oldString;
#endif
// Try to avoid heap-allocating a new character array for this string.
if (writable ? oldLength <= UPRV_LENGTHOF(oldBuffer) : oldLength < US_STACKBUF_SIZE) {
// Short string: Copy the contents into a temporary buffer and
@ -123,6 +130,12 @@ UnicodeString::caseMap(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITER
buffer = fUnion.fStackFields.fBuffer;
capacity = US_STACKBUF_SIZE;
}
#if !UCONFIG_NO_BREAK_ITERATION
if (iter != nullptr) {
oldString.setTo(FALSE, oldArray, oldLength);
iter->setText(oldString);
}
#endif
newLength = stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR
buffer, capacity,
oldArray, oldLength, NULL, errorCode);
@ -143,6 +156,12 @@ UnicodeString::caseMap(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITER
oldArray = getArrayStart();
Edits edits;
UChar replacementChars[200];
#if !UCONFIG_NO_BREAK_ITERATION
if (iter != nullptr) {
oldString.setTo(FALSE, oldArray, oldLength);
iter->setText(oldString);
}
#endif
stringCaseMapper(caseLocale, options | UCASEMAP_OMIT_UNCHANGED_TEXT, UCASEMAP_BREAK_ITERATOR
replacementChars, UPRV_LENGTHOF(replacementChars),
oldArray, oldLength, &edits, errorCode);
@ -179,6 +198,7 @@ UnicodeString::caseMap(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITER
return *this;
}
errorCode = U_ZERO_ERROR;
// No need to iter->setText() again: The case mapper restarts via iter->first().
newLength = stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR
getArrayStart(), getCapacity(),
oldArray, oldLength, NULL, errorCode);

View File

@ -50,11 +50,6 @@ UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t
return *this;
}
}
// Because the "this" string is both the source and the destination,
// make a copy of the original source for use by the break iterator.
// See tickets #13127 and #13128
UnicodeString copyOfInput(*this);
bi->setText(copyOfInput);
caseMap(ustrcase_getCaseLocale(locale.getBaseName()), options, bi, ustrcase_internalToTitle);
if(titleIter==NULL) {
delete bi;

View File

@ -28,6 +28,7 @@
#include "unicode/ubrk.h"
#include "unicode/unistr.h"
#include "unicode/ucasemap.h"
#include "unicode/ustring.h"
#include "ucase.h"
#include "ustrtest.h"
#include "unicode/tstdtmod.h"
@ -62,6 +63,7 @@ public:
void TestCaseMapUTF8WithEdits();
void TestLongUnicodeString();
void TestBug13127();
void TestInPlaceTitle();
private:
void assertGreekUpper(const char16_t *s, const char16_t *expected);
@ -90,7 +92,6 @@ StringCaseTest::runIndexedTest(int32_t index, UBool exec, const char *&name, cha
TESTCASE_AUTO(TestCaseConversion);
#if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
TESTCASE_AUTO(TestCasing);
TESTCASE_AUTO(TestBug13127);
#endif
TESTCASE_AUTO(TestFullCaseFoldingIterator);
TESTCASE_AUTO(TestGreekUpper);
@ -101,6 +102,10 @@ StringCaseTest::runIndexedTest(int32_t index, UBool exec, const char *&name, cha
TESTCASE_AUTO(TestCaseMapWithEdits);
TESTCASE_AUTO(TestCaseMapUTF8WithEdits);
TESTCASE_AUTO(TestLongUnicodeString);
#if !UCONFIG_NO_BREAK_ITERATION
TESTCASE_AUTO(TestBug13127);
TESTCASE_AUTO(TestInPlaceTitle);
#endif
TESTCASE_AUTO_END;
}
@ -1147,3 +1152,13 @@ void StringCaseTest::TestBug13127() {
UnicodeString s(TRUE, s16, -1);
s.toTitle(0, Locale::getEnglish());
}
void StringCaseTest::TestInPlaceTitle() {
// Similar to TestBug13127. u_strToTitle() can modify the buffer in-place.
IcuTestErrorCode errorCode(*this, "TestInPlaceTitle");
char16_t s[32] = u"ß ß ß日本語 abcdef";
const char16_t *expected = u"Ss Ss Ss日本語 Abcdef";
int32_t length = u_strToTitle(s, UPRV_LENGTHOF(s), s, -1, nullptr, "", errorCode);
assertEquals("u_strToTitle(in-place) length", u_strlen(expected), length);
assertEquals("u_strToTitle(in-place)", expected, s);
}