ICU-6337 Use invariant code page to create UnicodeStrings from string literals that contain "\\".

X-SVN-Rev: 24201
This commit is contained in:
Eric Mader 2008-06-17 00:55:35 +00:00
parent 0ad3aff9b2
commit a8678f38b6
17 changed files with 242 additions and 243 deletions

View File

@ -426,7 +426,7 @@ void CharsetDetectionTest::C1BytesTest()
#if !UCONFIG_NO_LEGACY_CONVERSION
UErrorCode status = U_ZERO_ERROR;
UnicodeString sISO = "This is a small sample of some English text. Just enough to be sure that it detects correctly.";
UnicodeString ssWindows = "This is another small sample of some English text. Just enough to be sure that it detects correctly. It also includes some \\u201CC1\\u201D bytes.";
UnicodeString ssWindows("This is another small sample of some English text. Just enough to be sure that it detects correctly. It also includes some \\u201CC1\\u201D bytes.", -1, US_INV);
UnicodeString sWindows = ssWindows.unescape();
int32_t lISO = 0, lWindows = 0;
char *bISO = extractBytes(sISO, "ISO-8859-1", lISO);

View File

@ -1047,7 +1047,7 @@ DateFormatTest::TestLocaleDateFormat() // Bug 495
DateFormat::FULL, Locale::getFrench());
DateFormat *dfUS = DateFormat::createDateTimeInstance(DateFormat::FULL,
DateFormat::FULL, Locale::getUS());
UnicodeString expectedFRENCH ( "lundi 15 septembre 1997 00:00:00 \\u00C9tats-Unis (Los Angeles)" );
UnicodeString expectedFRENCH ( "lundi 15 septembre 1997 00:00:00 \\u00C9tats-Unis (Los Angeles)", -1, US_INV );
expectedFRENCH = expectedFRENCH.unescape();
//UnicodeString expectedUS ( "Monday, September 15, 1997 12:00:00 o'clock AM PDT" );
UnicodeString expectedUS ( "Monday, September 15, 1997 12:00:00 AM PT" );

View File

@ -1,6 +1,6 @@
/*
*******************************************************************************
* Copyright (C) 1996-2007, International Business Machines Corporation and *
* Copyright (C) 1996-2008, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*/
@ -1824,7 +1824,7 @@ IntlTestRBNF::doTest(RuleBasedNumberFormat* formatter, const char* const testDat
errln(msg);
break;
} else {
UnicodeString expectedString = UnicodeString(expectedWords).unescape();
UnicodeString expectedString = UnicodeString(expectedWords, -1, US_INV).unescape();
if (actualString != expectedString) {
UnicodeString msg = "FAIL: check failed for ";
decFmt.format(expectedNumber, msg, status);

View File

@ -1,6 +1,6 @@
/********************************************************************
* COPYRIGHT:
* Copyright (c) 1997-2007, International Business Machines Corporation and
* Copyright (c) 1997-2008, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************
************************************************************************
@ -26,7 +26,7 @@ JamoTest::JamoTest()
UParseError parseError;
UErrorCode status = U_ZERO_ERROR;
NAME_JAMO = Transliterator::createFromRules("Name-Jamo",
JAMO_NAMES_RULES,
UNICODE_STRING_SIMPLE(JAMO_NAMES_RULES),
UTRANS_FORWARD, parseError, status);
if (U_FAILURE(status)) {
@ -35,7 +35,7 @@ JamoTest::JamoTest()
}
status = U_ZERO_ERROR;
JAMO_NAME = Transliterator::createFromRules("Jamo-Name",
JAMO_NAMES_RULES,
UNICODE_STRING_SIMPLE(JAMO_NAMES_RULES),
UTRANS_REVERSE, parseError, status);
if (U_FAILURE(status)) {
delete JAMO_NAME;
@ -400,7 +400,7 @@ JamoTest::TestRealText() {
int32_t i;
for (i=0; i < WHAT_IS_UNICODE_length; ++i) {
++total;
UnicodeString hangul = WHAT_IS_UNICODE[i];
UnicodeString hangul = UNICODE_STRING_SIMPLE(WHAT_IS_UNICODE[i]);
hangul = hangul.unescape(); // Parse backslash-u escapes
UnicodeString hangulX = hangul;
rt.transliterate(hangulX);

View File

@ -696,7 +696,7 @@ NumberFormatTest::TestCurrency(void)
for(int i=0; i < (int)(sizeof(testCases)/sizeof(testCases[i])); i++){
status = U_ZERO_ERROR;
const char *localeID = testCases[i][0];
UnicodeString expected(testCases[i][1]);
UnicodeString expected(testCases[i][1], -1, US_INV);
expected = expected.unescape();
s.truncate(0);
char loc[256]={0};
@ -1358,7 +1358,7 @@ void NumberFormatTest::TestSurrogateSupport(void) {
custom.setSymbol(DecimalFormatSymbols::kZeroDigitSymbol, (UChar)0x30);
custom.setSymbol(DecimalFormatSymbols::kCurrencySymbol, "units of money");
custom.setSymbol(DecimalFormatSymbols::kMonetarySeparatorSymbol, "money separator");
patternStr = "0.00 \\u00A4' in your bank account'";
patternStr = UNICODE_STRING_SIMPLE("0.00 \\u00A4' in your bank account'");
patternStr = patternStr.unescape();
expStr = UnicodeString(" minus 20money separator00 units of money in your bank account", "");
status = U_ZERO_ERROR;

View File

@ -1,5 +1,5 @@
/***********************************************************************
* Copyright (c) 1997-2007, International Business Machines Corporation
* Copyright (c) 1997-2008, International Business Machines Corporation
* and others. All Rights Reserved.
***********************************************************************/
@ -1085,7 +1085,7 @@ void NumberFormatRegressionTest::Test4071859 (void)
String expectedPercent = "-578.998%";
*/
UnicodeString expectedDefault("-5.789,988");
UnicodeString expectedCurrency("-\\u20A4 5.790");
UnicodeString expectedCurrency("-\\u20A4 5.790", -1, US_INV);
UnicodeString expectedPercent("-578.999%");
expectedCurrency = expectedCurrency.unescape();

View File

@ -1,5 +1,5 @@
/********************************************************************
* Copyright (c) 1999-2007, International Business Machines
* Copyright (c) 1999-2008, International Business Machines
* Corporation and others. All Rights Reserved.
********************************************************************
* Date Name Description
@ -744,12 +744,12 @@ void RBBIAPITest::TestRuleStatus() {
// Test the vector form of break rule status.
//
void RBBIAPITest::TestRuleStatusVec() {
UnicodeString rulesString = "[A-N]{100}; \n"
UnicodeString rulesString( "[A-N]{100}; \n"
"[a-w]{200}; \n"
"[\\p{L}]{300}; \n"
"[\\p{N}]{400}; \n"
"[0-5]{500}; \n"
"!.*;\n";
"!.*;\n", -1, US_INV);
UnicodeString testString1 = "Aapz5?";
int32_t statusVals[10];
int32_t numStatuses;

View File

@ -330,13 +330,13 @@ static const int T_IDEO = 400;
//
//-----------------------------------------------------------------------------------
void RBBITest::TestStatusReturn() {
UnicodeString rulesString1 = "$Letters = [:L:];\n"
UnicodeString rulesString1("$Letters = [:L:];\n"
"$Numbers = [:N:];\n"
"$Letters+{1};\n"
"$Numbers+{2};\n"
"Help\\ {4}/me\\!;\n"
"[^$Letters $Numbers];\n"
"!.*;\n";
"!.*;\n", -1, US_INV);
UnicodeString testString1 = "abc123..abc Help me Help me!";
// 01234567890123456789012345678
int32_t bounds1[] = {0, 3, 6, 7, 8, 11, 12, 16, 17, 19, 20, 25, 27, 28, -1};
@ -1189,7 +1189,7 @@ void RBBITest::TestBug5775() {
return;
}
UnicodeString s("One.\\u00ad Two.");
UnicodeString s("One.\\u00ad Two.", -1, US_INV);
// 01234 56789
s = s.unescape();
bi->setText(s);
@ -1422,7 +1422,7 @@ void RBBITest::TestExtended() {
tp.srcLine = new UVector32(status);
tp.srcCol = new UVector32(status);
RegexMatcher localeMatcher("<locale *([\\p{L}\\p{Nd}_]*) *>", 0, status);
RegexMatcher localeMatcher(UNICODE_STRING_SIMPLE("<locale *([\\p{L}\\p{Nd}_]*) *>"), 0, status);
TEST_ASSERT_SUCCESS(status);
@ -1597,7 +1597,7 @@ void RBBITest::TestExtended() {
break;
}
if (testString.compare(charIdx-1, 3, "\\N{") == 0) {
if (testString.compare(charIdx-1, 3, UNICODE_STRING_SIMPLE("\\N{")) == 0) {
// Named character, e.g. \N{COMBINING GRAVE ACCENT}
// Get the code point from the name and insert it into the test data.
// (Damn, no API takes names in Unicode !!!
@ -2016,7 +2016,7 @@ void RBBITest::runUnicodeTestData(const char *fileName, RuleBasedBreakIterator *
// Caputure Group # 1 2 3 4 5
// Parses this item: divide x hex digits comment \n unrecognized \n
//
UnicodeString tokenExpr = "[ \t]*(?:(\\u00F7)|(\\u00D7)|([0-9a-fA-F]+)|((?:#.*?)?$.)|(.*?$.))";
UnicodeString tokenExpr("[ \t]*(?:(\\u00F7)|(\\u00D7)|([0-9a-fA-F]+)|((?:#.*?)?$.)|(.*?$.))", -1, US_INV);
RegexMatcher tokenMatcher(tokenExpr, testFileAsString, UREGEX_MULTILINE | UREGEX_DOTALL, status);
UnicodeString testString;
UVector32 breakPositions(status);
@ -2245,23 +2245,23 @@ RBBICharMonkey::RBBICharMonkey() {
fText = NULL;
fCRLFSet = new UnicodeSet("[\\r\\n]", status);
fControlSet = new UnicodeSet("[\\p{Grapheme_Cluster_Break = Control}]", status);
fExtendSet = new UnicodeSet("[\\p{Grapheme_Cluster_Break = Extend}]", status);
fPrependSet = new UnicodeSet("[\\p{Grapheme_Cluster_Break = Prepend}]", status);
fSpacingSet = new UnicodeSet("[\\p{Grapheme_Cluster_Break = SpacingMark}]", status);
fLSet = new UnicodeSet("[\\p{Grapheme_Cluster_Break = L}]", status);
fVSet = new UnicodeSet("[\\p{Grapheme_Cluster_Break = V}]", status);
fTSet = new UnicodeSet("[\\p{Grapheme_Cluster_Break = T}]", status);
fLVSet = new UnicodeSet("[\\p{Grapheme_Cluster_Break = LV}]", status);
fLVTSet = new UnicodeSet("[\\p{Grapheme_Cluster_Break = LVT}]", status);
fCRLFSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\r\\n]"), status);
fControlSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = Control}]"), status);
fExtendSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = Extend}]"), status);
fPrependSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = Prepend}]"), status);
fSpacingSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = SpacingMark}]"), status);
fLSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = L}]"), status);
fVSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = V}]"), status);
fTSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = T}]"), status);
fLVSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = LV}]"), status);
fLVTSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = LVT}]"), status);
fHangulSet = new UnicodeSet();
fHangulSet->addAll(*fLSet);
fHangulSet->addAll(*fVSet);
fHangulSet->addAll(*fTSet);
fHangulSet->addAll(*fLVSet);
fHangulSet->addAll(*fLVTSet);
fAnySet = new UnicodeSet("[\\u0000-\\U0010ffff]", status);
fAnySet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\u0000-\\U0010ffff]"), status);
fSets = new UVector(status);
fSets->addElement(fCRLFSet, status);
@ -2457,18 +2457,18 @@ RBBIWordMonkey::RBBIWordMonkey()
fSets = new UVector(status);
fCRSet = new UnicodeSet("[\\p{Word_Break = CR}]", status);
fLFSet = new UnicodeSet("[\\p{Word_Break = LF}]", status);
fNewlineSet = new UnicodeSet("[\\p{Word_Break = Newline}]", status);
fALetterSet = new UnicodeSet("[\\p{Word_Break = ALetter}]", status);
fKatakanaSet = new UnicodeSet("[\\p{Word_Break = Katakana}]", status);
fMidNumLetSet = new UnicodeSet("[\\p{Word_Break = MidNumLet}]", status);
fMidLetterSet = new UnicodeSet("[\\p{Word_Break = MidLetter}]", status);
fMidNumSet = new UnicodeSet("[\\p{Word_Break = MidNum}]", status);
fNumericSet = new UnicodeSet("[\\p{Word_Break = Numeric}]", status);
fFormatSet = new UnicodeSet("[\\p{Word_Break = Format}]", status);
fExtendNumLetSet = new UnicodeSet("[\\p{Word_Break = ExtendNumLet}]", status);
fExtendSet = new UnicodeSet("[\\p{Word_Break = Extend}]", status);
fCRSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = CR}]"), status);
fLFSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = LF}]"), status);
fNewlineSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = Newline}]"), status);
fALetterSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = ALetter}]"), status);
fKatakanaSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = Katakana}]"), status);
fMidNumLetSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = MidNumLet}]"), status);
fMidLetterSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = MidLetter}]"), status);
fMidNumSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = MidNum}]"), status);
fNumericSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = Numeric}]"), status);
fFormatSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = Format}]"), status);
fExtendNumLetSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = ExtendNumLet}]"), status);
fExtendSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = Extend}]"), status);
fOtherSet = new UnicodeSet();
if(U_FAILURE(status)) {
@ -2489,7 +2489,7 @@ RBBIWordMonkey::RBBIWordMonkey()
fOtherSet->removeAll(*fFormatSet);
fOtherSet->removeAll(*fExtendSet);
// Inhibit dictionary characters from being tested at all.
fOtherSet->removeAll(UnicodeSet("[\\p{LineBreak = Complex_Context}]", status));
fOtherSet->removeAll(UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{LineBreak = Complex_Context}]"), status));
fSets->addElement(fCRSet, status);
fSets->addElement(fLFSet, status);
@ -2736,18 +2736,18 @@ RBBISentMonkey::RBBISentMonkey()
// Separator Set Note: Beginning with Unicode 5.1, CR and LF were removed from the separator
// set and made into character classes of their own. For the monkey impl,
// they remain in SEP, since Sep always appears with CR and LF in the rules.
fSepSet = new UnicodeSet("[\\p{Sentence_Break = Sep} \\u000a \\u000d]", status);
fFormatSet = new UnicodeSet("[\\p{Sentence_Break = Format}]", status);
fSpSet = new UnicodeSet("[\\p{Sentence_Break = Sp}]", status);
fLowerSet = new UnicodeSet("[\\p{Sentence_Break = Lower}]", status);
fUpperSet = new UnicodeSet("[\\p{Sentence_Break = Upper}]", status);
fOLetterSet = new UnicodeSet("[\\p{Sentence_Break = OLetter}]", status);
fNumericSet = new UnicodeSet("[\\p{Sentence_Break = Numeric}]", status);
fATermSet = new UnicodeSet("[\\p{Sentence_Break = ATerm}]", status);
fSContinueSet = new UnicodeSet("[\\p{Sentence_Break = SContinue}]", status);
fSTermSet = new UnicodeSet("[\\p{Sentence_Break = STerm}]", status);
fCloseSet = new UnicodeSet("[\\p{Sentence_Break = Close}]", status);
fExtendSet = new UnicodeSet("[\\p{Sentence_Break = Extend}]", status);
fSepSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = Sep} \\u000a \\u000d]"), status);
fFormatSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = Format}]"), status);
fSpSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = Sp}]"), status);
fLowerSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = Lower}]"), status);
fUpperSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = Upper}]"), status);
fOLetterSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = OLetter}]"), status);
fNumericSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = Numeric}]"), status);
fATermSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = ATerm}]"), status);
fSContinueSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = SContinue}]"), status);
fSTermSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = STerm}]"), status);
fCloseSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = Close}]"), status);
fExtendSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = Extend}]"), status);
fOtherSet = new UnicodeSet();
if(U_FAILURE(status)) {
@ -3082,42 +3082,42 @@ RBBILineMonkey::RBBILineMonkey()
fSets = new UVector(status);
fBK = new UnicodeSet("[\\p{Line_Break=BK}]", status);
fCR = new UnicodeSet("[\\p{Line_break=CR}]", status);
fLF = new UnicodeSet("[\\p{Line_break=LF}]", status);
fCM = new UnicodeSet("[\\p{Line_break=CM}]", status);
fNL = new UnicodeSet("[\\p{Line_break=NL}]", status);
fWJ = new UnicodeSet("[\\p{Line_break=WJ}]", status);
fZW = new UnicodeSet("[\\p{Line_break=ZW}]", status);
fGL = new UnicodeSet("[\\p{Line_break=GL}]", status);
fCB = new UnicodeSet("[\\p{Line_break=CB}]", status);
fSP = new UnicodeSet("[\\p{Line_break=SP}]", status);
fB2 = new UnicodeSet("[\\p{Line_break=B2}]", status);
fBA = new UnicodeSet("[\\p{Line_break=BA}]", status);
fBB = new UnicodeSet("[\\p{Line_break=BB}]", status);
fHY = new UnicodeSet("[\\p{Line_break=HY}]", status);
fH2 = new UnicodeSet("[\\p{Line_break=H2}]", status);
fH3 = new UnicodeSet("[\\p{Line_break=H3}]", status);
fCL = new UnicodeSet("[\\p{Line_break=CL}]", status);
fEX = new UnicodeSet("[\\p{Line_break=EX}]", status);
fIN = new UnicodeSet("[\\p{Line_break=IN}]", status);
fJL = new UnicodeSet("[\\p{Line_break=JL}]", status);
fJV = new UnicodeSet("[\\p{Line_break=JV}]", status);
fJT = new UnicodeSet("[\\p{Line_break=JT}]", status);
fNS = new UnicodeSet("[\\p{Line_break=NS}]", status);
fOP = new UnicodeSet("[\\p{Line_break=OP}]", status);
fQU = new UnicodeSet("[\\p{Line_break=QU}]", status);
fIS = new UnicodeSet("[\\p{Line_break=IS}]", status);
fNU = new UnicodeSet("[\\p{Line_break=NU}]", status);
fPO = new UnicodeSet("[\\p{Line_break=PO}]", status);
fPR = new UnicodeSet("[\\p{Line_break=PR}]", status);
fSY = new UnicodeSet("[\\p{Line_break=SY}]", status);
fAI = new UnicodeSet("[\\p{Line_break=AI}]", status);
fAL = new UnicodeSet("[\\p{Line_break=AL}]", status);
fID = new UnicodeSet("[\\p{Line_break=ID}]", status);
fSA = new UnicodeSet("[\\p{Line_break=SA}]", status);
fSG = new UnicodeSet("[\\ud800-\\udfff]", status);
fXX = new UnicodeSet("[\\p{Line_break=XX}]", status);
fBK = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_Break=BK}]"), status);
fCR = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=CR}]"), status);
fLF = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=LF}]"), status);
fCM = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=CM}]"), status);
fNL = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=NL}]"), status);
fWJ = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=WJ}]"), status);
fZW = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=ZW}]"), status);
fGL = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=GL}]"), status);
fCB = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=CB}]"), status);
fSP = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=SP}]"), status);
fB2 = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=B2}]"), status);
fBA = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=BA}]"), status);
fBB = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=BB}]"), status);
fHY = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=HY}]"), status);
fH2 = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=H2}]"), status);
fH3 = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=H3}]"), status);
fCL = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=CL}]"), status);
fEX = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=EX}]"), status);
fIN = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=IN}]"), status);
fJL = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=JL}]"), status);
fJV = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=JV}]"), status);
fJT = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=JT}]"), status);
fNS = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=NS}]"), status);
fOP = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=OP}]"), status);
fQU = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=QU}]"), status);
fIS = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=IS}]"), status);
fNU = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=NU}]"), status);
fPO = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=PO}]"), status);
fPR = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=PR}]"), status);
fSY = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=SY}]"), status);
fAI = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=AI}]"), status);
fAL = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=AL}]"), status);
fID = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=ID}]"), status);
fSA = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=SA}]"), status);
fSG = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\ud800-\\udfff]"), status);
fXX = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=XX}]"), status);
if (U_FAILURE(status)) {
deferredStatus = status;
@ -3169,13 +3169,14 @@ RBBILineMonkey::RBBILineMonkey()
fSets->addElement(fSG, status);
fNumberMatcher = new RegexMatcher(
"((\\p{Line_Break=PR}|\\p{Line_Break=PO})\\p{Line_Break=CM}*)?"
"((\\p{Line_Break=OP}|\\p{Line_Break=HY})\\p{Line_Break=CM}*)?"
"\\p{Line_Break=NU}\\p{Line_Break=CM}*"
"((\\p{Line_Break=NU}|\\p{Line_Break=IS}|\\p{Line_Break=SY})\\p{Line_Break=CM}*)*"
"(\\p{Line_Break=CL}\\p{Line_Break=CM}*)?"
"((\\p{Line_Break=PR}|\\p{Line_Break=PO})\\p{Line_Break=CM}*)?",
0, status);
UNICODE_STRING_SIMPLE(
"((\\p{Line_Break=PR}|\\p{Line_Break=PO})\\p{Line_Break=CM}*)?"
"((\\p{Line_Break=OP}|\\p{Line_Break=HY})\\p{Line_Break=CM}*)?"
"\\p{Line_Break=NU}\\p{Line_Break=CM}*"
"((\\p{Line_Break=NU}|\\p{Line_Break=IS}|\\p{Line_Break=SY})\\p{Line_Break=CM}*)*"
"(\\p{Line_Break=CL}\\p{Line_Break=CM}*)?"
"((\\p{Line_Break=PR}|\\p{Line_Break=PO})\\p{Line_Break=CM}*)?"
), 0, status);
fCharBI = BreakIterator::createCharacterInstance(Locale::getEnglish(), status);
@ -3758,7 +3759,6 @@ void RBBITest::TestWordBreaks(void)
UErrorCode status = U_ZERO_ERROR;
// BreakIterator *bi = BreakIterator::createCharacterInstance(locale, status);
BreakIterator *bi = BreakIterator::createWordInstance(locale, status);
UChar str[300];
static const char *strlist[] =
{
"\\U000e0032\\u0097\\u0f94\\uc2d8\\u05f4\\U000e0031\\u060d",
@ -3808,8 +3808,7 @@ void RBBITest::TestWordBreaks(void)
}
for (loop = 0; loop < (int)(sizeof(strlist) / sizeof(char *)); loop ++) {
// printf("looping %d\n", loop);
u_unescape(strlist[loop], str, 25);
UnicodeString ustr(str);
UnicodeString ustr = CharsToUnicodeString(strlist[loop]);
// RBBICharMonkey monkey;
RBBIWordMonkey monkey;
@ -4105,7 +4104,7 @@ void RBBITest::TestMonkey(char *params) {
// m.reset(p);
if (RegexMatcher("\\S", p, 0, status).find()) {
if (RegexMatcher(UNICODE_STRING_SIMPLE("\\S"), p, 0, status).find()) {
// Each option is stripped out of the option string as it is processed.
// All options have been checked. The option string should have been completely emptied..
char buf[100];

View File

@ -115,15 +115,15 @@ if (status!=errcode) {errln("RegexTest failure at line %d. Expected status=%s,
#define REGEX_TESTLM(pat, text, looking, match) doRegexLMTest(pat, text, looking, match, __LINE__);
UBool RegexTest::doRegexLMTest(const char *pat, const char *text, UBool looking, UBool match, int32_t line) {
const UnicodeString pattern(pat);
const UnicodeString inputText(text);
const UnicodeString pattern(pat, -1, US_INV);
const UnicodeString inputText(text, -1, US_INV);
UErrorCode status = U_ZERO_ERROR;
UParseError pe;
RegexPattern *REPattern = NULL;
RegexMatcher *REMatcher = NULL;
UBool retVal = TRUE;
UnicodeString patString(pat);
UnicodeString patString(pat, -1, US_INV);
REPattern = RegexPattern::compile(patString, 0, pe, status);
if (U_FAILURE(status)) {
errln("RegexTest failure in RegexPattern::compile() at line %d. Status = %s\n",
@ -636,7 +636,7 @@ void RegexTest::API_Match() {
UParseError pe;
UErrorCode status=U_ZERO_ERROR;
UnicodeString re(".*?(?:(\\Gabc)|(abc))");
UnicodeString re(".*?(?:(\\Gabc)|(abc))", -1, US_INV);
RegexPattern *pat = RegexPattern::compile(re, flags, pe, status);
REGEX_CHECK_STATUS;
UnicodeString data = ".abcabc.abc..";
@ -681,7 +681,7 @@ void RegexTest::API_Match() {
REGEX_ASSERT(i==5);
// Check that the bump goes over surrogate pairs OK
s = "\\U00010001\\U00010002\\U00010003\\U00010004";
s = UNICODE_STRING_SIMPLE("\\U00010001\\U00010002\\U00010003\\U00010004");
s = s.unescape();
m.reset(s);
for (i=0; ; i+=2) {
@ -1027,7 +1027,7 @@ void RegexTest::API_Replace() {
REGEX_CHECK_STATUS;
REGEX_ASSERT(dest == "bcbcdefg");
dest = matcher2->replaceFirst("The value of \\$1 is $1.", status);
dest = matcher2->replaceFirst(UNICODE_STRING_SIMPLE("The value of \\$1 is $1."), status);
REGEX_CHECK_STATUS;
REGEX_ASSERT(dest == "The value of $1 is bc.defg");
@ -1035,7 +1035,7 @@ void RegexTest::API_Replace() {
REGEX_CHECK_STATUS;
REGEX_ASSERT(dest == "$ by itself, no group number $$$defg");
UnicodeString replacement = "Supplemental Digit 1 $\\U0001D7CF.";
UnicodeString replacement = UNICODE_STRING_SIMPLE("Supplemental Digit 1 $\\U0001D7CF.");
replacement = replacement.unescape();
dest = matcher2->replaceFirst(replacement, status);
REGEX_CHECK_STATUS;
@ -1049,7 +1049,7 @@ void RegexTest::API_Replace() {
//
{
UnicodeString src = "abc 1 abc 2 abc 3";
UnicodeString substitute = "--\\u0043--";
UnicodeString substitute = UNICODE_STRING_SIMPLE("--\\u0043--");
matcher->reset(src);
UnicodeString result = matcher->replaceAll(substitute, status);
REGEX_CHECK_STATUS;
@ -1057,7 +1057,7 @@ void RegexTest::API_Replace() {
}
{
UnicodeString src = "abc !";
UnicodeString substitute = "--\\U00010000--";
UnicodeString substitute = UNICODE_STRING_SIMPLE("--\\U00010000--");
matcher->reset(src);
UnicodeString result = matcher->replaceAll(substitute, status);
REGEX_CHECK_STATUS;
@ -1186,7 +1186,7 @@ void RegexTest::API_Pattern() {
//
{
UErrorCode status = U_ZERO_ERROR;
RegexPattern *pSource = RegexPattern::compile("\\p{L}+", 0, status);
RegexPattern *pSource = RegexPattern::compile(UNICODE_STRING_SIMPLE("\\p{L}+"), 0, status);
RegexPattern *pClone = pSource->clone();
delete pSource;
RegexMatcher *mFromClone = pClone->matcher(status);
@ -1278,7 +1278,7 @@ void RegexTest::API_Pattern() {
delete pat1;
// split, with a pattern with (capture)
pat1 = RegexPattern::compile("<(\\w*)>", pe, status);
pat1 = RegexPattern::compile(UNICODE_STRING_SIMPLE("<(\\w*)>"), pe, status);
REGEX_CHECK_STATUS;
status = U_ZERO_ERROR;
@ -1444,11 +1444,11 @@ void RegexTest::Extended() {
//
UnicodeString testString(FALSE, testData, len);
RegexMatcher quotedStuffMat("\\s*([\\'\\\"/])(.*?)\\1", 0, status);
RegexMatcher commentMat ("\\s*(#.*)?$", 0, status);
RegexMatcher flagsMat ("\\s*([ixsmdteDEGLMvabtyYzZ2-9]*)([:letter:]*)", 0, status);
RegexMatcher quotedStuffMat(UNICODE_STRING_SIMPLE("\\s*([\\'\\\"/])(.*?)\\1"), 0, status);
RegexMatcher commentMat (UNICODE_STRING_SIMPLE("\\s*(#.*)?$"), 0, status);
RegexMatcher flagsMat (UNICODE_STRING_SIMPLE("\\s*([ixsmdteDEGLMvabtyYzZ2-9]*)([:letter:]*)"), 0, status);
RegexMatcher lineMat("(.*?)\\r?\\n", testString, 0, status);
RegexMatcher lineMat(UNICODE_STRING_SIMPLE("(.*?)\\r?\\n"), testString, 0, status);
UnicodeString testPattern; // The pattern for test from the test file.
UnicodeString testFlags; // the flags for a test.
UnicodeString matchString; // The marked up string to be used as input
@ -2073,7 +2073,7 @@ void RegexTest::PerlTests() {
// Regex to break the input file into lines, and strip the new lines.
// One line per match, capture group one is the desired data.
//
RegexPattern* linePat = RegexPattern::compile("(.+?)[\\r\\n]+", 0, pe, status);
RegexPattern* linePat = RegexPattern::compile(UNICODE_STRING_SIMPLE("(.+?)[\\r\\n]+"), 0, pe, status);
if (U_FAILURE(status)) {
dataerrln("RegexPattern::compile() error");
return;
@ -2084,7 +2084,7 @@ void RegexTest::PerlTests() {
// Regex to split a test file line into fields.
// There are six fields, separated by tabs.
//
RegexPattern* fieldPat = RegexPattern::compile("\\t", 0, pe, status);
RegexPattern* fieldPat = RegexPattern::compile(UNICODE_STRING_SIMPLE("\\t"), 0, pe, status);
//
// Regex to identify test patterns with flag settings, and to separate them.
@ -2092,7 +2092,7 @@ void RegexTest::PerlTests() {
// Test patterns without flags are not quoted: pattern
// Coming out, capture group 2 is the pattern, capture group 3 is the flags.
//
RegexPattern *flagPat = RegexPattern::compile("('?)(.*)\\1(.*)", 0, pe, status);
RegexPattern *flagPat = RegexPattern::compile(UNICODE_STRING_SIMPLE("('?)(.*)\\1(.*)"), 0, pe, status);
RegexMatcher* flagMat = flagPat->matcher(status);
//
@ -2101,19 +2101,19 @@ void RegexTest::PerlTests() {
// are string constants and REs for these constructs.
//
UnicodeString nulnulSrc("${nulnul}");
UnicodeString nulnul("\\u0000\\u0000");
UnicodeString nulnul("\\u0000\\u0000", -1, US_INV);
nulnul = nulnul.unescape();
UnicodeString ffffSrc("${ffff}");
UnicodeString ffff("\\uffff");
UnicodeString ffff("\\uffff", -1, US_INV);
ffff = ffff.unescape();
// regexp for $-[0], $+[2], etc.
RegexPattern *groupsPat = RegexPattern::compile("\\$([+\\-])\\[(\\d+)\\]", 0, pe, status);
RegexPattern *groupsPat = RegexPattern::compile(UNICODE_STRING_SIMPLE("\\$([+\\-])\\[(\\d+)\\]"), 0, pe, status);
RegexMatcher *groupsMat = groupsPat->matcher(status);
// regexp for $0, $1, $2, etc.
RegexPattern *cgPat = RegexPattern::compile("\\$(\\d+)", 0, pe, status);
RegexPattern *cgPat = RegexPattern::compile(UNICODE_STRING_SIMPLE("\\$(\\d+)"), 0, pe, status);
RegexMatcher *cgMat = cgPat->matcher(status);
@ -2138,7 +2138,7 @@ void RegexTest::PerlTests() {
flagMat->matches(status);
UnicodeString pattern = flagMat->group(2, status);
pattern.findAndReplace("${bang}", "!");
pattern.findAndReplace(nulnulSrc, "\\u0000\\u0000");
pattern.findAndReplace(nulnulSrc, UNICODE_STRING_SIMPLE("\\u0000\\u0000"));
pattern.findAndReplace(ffffSrc, ffff);
//
@ -2218,7 +2218,7 @@ void RegexTest::PerlTests() {
// Replace any \n in the match string with an actual new-line char.
// Don't do full unescape, as this unescapes more than Perl does, which
// causes other spurious failures in the tests.
matchString.findAndReplace("\\n", "\n");
matchString.findAndReplace(UNICODE_STRING_SIMPLE("\\n"), "\n");
@ -2315,7 +2315,7 @@ void RegexTest::PerlTests() {
perlExpr.remove(0, 2);
}
else if (perlExpr.startsWith("\\")) { // \Escape. Take following char as a literal.
else if (perlExpr.startsWith(UNICODE_STRING_SIMPLE("\\"))) { // \Escape. Take following char as a literal.
// or as an escaped sequence (e.g. \n)
if (perlExpr.length() > 1) {
perlExpr.remove(0, 1); // Remove the '\', but only if not last char.
@ -2349,7 +2349,7 @@ void RegexTest::PerlTests() {
UnicodeString expectedS(fields[4]);
expectedS.findAndReplace(nulnulSrc, nulnul);
expectedS.findAndReplace(ffffSrc, ffff);
expectedS.findAndReplace("\\n", "\n");
expectedS.findAndReplace(UNICODE_STRING_SIMPLE("\\n"), "\n");
if (expectedS.compare(resultString) != 0) {
@ -2437,7 +2437,7 @@ void RegexTest::Callbacks() {
const void *returnedContext;
URegexMatchCallback *returnedFn;
UErrorCode status = U_ZERO_ERROR;
RegexMatcher matcher("((.)+\\2)+x", 0, status); // A pattern that can run long.
RegexMatcher matcher(UNICODE_STRING_SIMPLE("((.)+\\2)+x"), 0, status); // A pattern that can run long.
REGEX_CHECK_STATUS;
matcher.setMatchCallback(testCallBackFn, &cbInfo, status);
REGEX_CHECK_STATUS;

View File

@ -546,7 +546,7 @@ static char *printOrders(char *buffer, OrderList &list)
void SSearchTest::offsetTest()
{
UnicodeString test[] = {
const char *test[] = {
"\\ua191\\u16ef\\u2036\\u017a",
#if 0
@ -610,7 +610,7 @@ void SSearchTest::offsetTest()
col->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
for(int32_t i = 0; i < testCount; i += 1) {
UnicodeString ts = test[i].unescape();
UnicodeString ts = CharsToUnicodeString(test[i]);
CollationElementIterator *iter = col->createCollationElementIterator(ts);
OrderList forwardList;
OrderList backwardList;
@ -644,7 +644,7 @@ void SSearchTest::offsetTest()
backwardList.reverse();
if (forwardList.compare(backwardList)) {
logln("Works with \"%s\"", test[i].getTerminatedBuffer());
logln("Works with \"%s\"", test[i]);
logln("Forward offsets: [%s]", printOffsets(buffer, forwardList));
// logln("Backward offsets: [%s]", printOffsets(buffer, backwardList));
@ -653,7 +653,7 @@ void SSearchTest::offsetTest()
logln();
} else {
errln("Fails with \"%S\"", test[i].getTerminatedBuffer());
errln("Fails with \"%s\"", test[i]);
infoln("Forward offsets: [%s]", printOffsets(buffer, forwardList));
infoln("Backward offsets: [%s]", printOffsets(buffer, backwardList));

View File

@ -1480,7 +1480,7 @@ void TestIDNA::TestIDNAMonkeyTest(){
/* for debugging */
for (i=0; i<(int)(sizeof(failures)/sizeof(failures[0])); i++){
source.truncate(0);
source.append( failures[i] );
source.append( UNICODE_STRING_SIMPLE(failures[i]) );
source = source.unescape();
source.append((UChar)0x0000);
const UChar *src = source.getBuffer();
@ -1490,13 +1490,13 @@ void TestIDNA::TestIDNAMonkeyTest(){
source.truncate(0);
source.append("\\uCF18\\U00021161\\U000EEF11\\U0002BB82\\U0001D63C");
source.append(UNICODE_STRING_SIMPLE("\\uCF18\\U00021161\\U000EEF11\\U0002BB82\\U0001D63C"));
debug(source.getBuffer(),source.length(),UIDNA_ALLOW_UNASSIGNED);
{ // test deletion of code points
UnicodeString source("\\u043f\\u00AD\\u034f\\u043e\\u0447\\u0435\\u043c\\u0443\\u0436\\u0435\\u043e\\u043d\\u0438\\u043d\\u0435\\u0433\\u043e\\u0432\\u043e\\u0440\\u044f\\u0442\\u043f\\u043e\\u0440\\u0443\\u0441\\u0441\\u043a\\u0438\\u0000");
UnicodeString source("\\u043f\\u00AD\\u034f\\u043e\\u0447\\u0435\\u043c\\u0443\\u0436\\u0435\\u043e\\u043d\\u0438\\u043d\\u0435\\u0433\\u043e\\u0432\\u043e\\u0440\\u044f\\u0442\\u043f\\u043e\\u0440\\u0443\\u0441\\u0441\\u043a\\u0438\\u0000", -1, US_INV);
source = source.unescape();
UnicodeString expected("\\u043f\\u043e\\u0447\\u0435\\u043c\\u0443\\u0436\\u0435\\u043e\\u043d\\u0438\\u043d\\u0435\\u0433\\u043e\\u0432\\u043e\\u0440\\u044f\\u0442\\u043f\\u043e\\u0440\\u0443\\u0441\\u0441\\u043a\\u0438\\u0000");
UnicodeString expected("\\u043f\\u043e\\u0447\\u0435\\u043c\\u0443\\u0436\\u0435\\u043e\\u043d\\u0438\\u043d\\u0435\\u0433\\u043e\\u0432\\u043e\\u0440\\u044f\\u0442\\u043f\\u043e\\u0440\\u0443\\u0441\\u0441\\u043a\\u0438\\u0000", -1, US_INV);
expected = expected.unescape();
UnicodeString ascii("xn--b1abfaaepdrnnbgefbadotcwatmq2g4l");
ascii.append((UChar)0x0000);

View File

@ -1,6 +1,6 @@
/************************************************************************
* COPYRIGHT:
* Copyright (c) 2000-2007, International Business Machines Corporation
* Copyright (c) 2000-2008, International Business Machines Corporation
* and others. All Rights Reserved.
************************************************************************/
/************************************************************************
@ -274,7 +274,7 @@ void TransliteratorAPITest::TestTransliterate1(){
"Latin-Devanagari",CharsToUnicodeString("bha\\u0304rata"), CharsToUnicodeString("\\u092D\\u093E\\u0930\\u0924") ,
"Latin-Devanagari",UnicodeString("kra ksha khra gra cra dya dhya",""), CharsToUnicodeString("\\u0915\\u094D\\u0930 \\u0915\\u094D\\u0936 \\u0916\\u094D\\u0930 \\u0917\\u094D\\u0930 \\u091a\\u094D\\u0930 \\u0926\\u094D\\u092F \\u0927\\u094D\\u092F") ,
"Devanagari-Latin", CharsToUnicodeString("\\u092D\\u093E\\u0930\\u0924"), UnicodeString("bh\\u0101rata"),
"Devanagari-Latin", CharsToUnicodeString("\\u092D\\u093E\\u0930\\u0924"), CharsToUnicodeString("bh\\u0101rata"),
// "Contracted-Expanded", CharsToUnicodeString("\\u00C0\\u00C1\\u0042"), CharsToUnicodeString("\\u0041\\u0300\\u0041\\u0301\\u0042") ,
// "Expanded-Contracted", CharsToUnicodeString("\\u0041\\u0300\\u0041\\u0301\\u0042"), CharsToUnicodeString("\\u00C0\\u00C1\\u0042") ,
//"Latin-Arabic", "aap", CharsToUnicodeString("\\u0627\\u06A4") ,
@ -325,7 +325,7 @@ void TransliteratorAPITest::TestTransliterate2(){
"Hex-Any", CharsToUnicodeString("\\u0068\\u0065\\u006C\\u006C\\u006F\\u0021\\u0020"), "0", "5", "hello", "hello! " ,
// "Contracted-Expanded", CharsToUnicodeString("\\u00C0\\u00C1\\u0042"), "1", "2", CharsToUnicodeString("\\u0041\\u0301"), CharsToUnicodeString("\\u00C0\\u0041\\u0301\\u0042") ,
"Devanagari-Latin", CharsToUnicodeString("\\u092D\\u093E\\u0930\\u0924"), "0", "1", "bha", CharsToUnicodeString("bha\\u093E\\u0930\\u0924") ,
"Devanagari-Latin", CharsToUnicodeString("\\u092D\\u093E\\u0930\\u0924"), "1", "2", "\\u0314\\u0101", CharsToUnicodeString("\\u092D\\u0314\\u0101\\u0930\\u0924")
"Devanagari-Latin", CharsToUnicodeString("\\u092D\\u093E\\u0930\\u0924"), "1", "2", CharsToUnicodeString("\\u0314\\u0101"), CharsToUnicodeString("\\u092D\\u0314\\u0101\\u0930\\u0924")
};
logln("\n Testing transliterate(String, int, int, StringBuffer)");

View File

@ -522,7 +522,7 @@ void RTTest::test(const UnicodeString& sourceRangeVal,
this->roundtripExclusionsSet.clear();
if (roundtripExclusions != NULL && strlen(roundtripExclusions) > 0) {
this->roundtripExclusionsSet.applyPattern(roundtripExclusions, status);
this->roundtripExclusionsSet.applyPattern(UNICODE_STRING_SIMPLE(roundtripExclusions), status);
if (U_FAILURE(status)) {
parent->errln("FAIL: UnicodeSet::applyPattern(%s)", roundtripExclusions);
return;
@ -991,7 +991,7 @@ void TransliteratorRoundTripTest::TestHiragana() {
RTTest test("Latin-Hiragana");
Legal *legal = new Legal();
test.test(UnicodeString("[a-zA-Z]", ""),
HIRAGANA,
UNICODE_STRING_SIMPLE(HIRAGANA),
HIRAGANA_ITERATION, this, quick, legal);
delete legal;
}
@ -1005,7 +1005,7 @@ void TransliteratorRoundTripTest::TestKatakana() {
strcat(temp, HALFWIDTH_KATAKANA);
strcat(temp, "]");
test.test(UnicodeString("[a-zA-Z]", ""),
KATAKANA,
UNICODE_STRING_SIMPLE(KATAKANA),
temp,
this, quick, legal);
delete legal;
@ -1105,7 +1105,7 @@ void TransliteratorRoundTripTest::TestHan() {
UnicodeString nfded = target2;
nfd->transliterate(nfded);
UnicodeSet allMarks("[\\u0304\\u0301\\u030C\\u0300\\u0306]", status); // look only for Pinyin tone marks, not all marks (there are some others in there)
UnicodeSet allMarks(UNICODE_STRING_SIMPLE("[\\u0304\\u0301\\u030C\\u0300\\u0306]"), status); // look only for Pinyin tone marks, not all marks (there are some others in there)
ASSERT_SUCCESS(status);
assertFalse("NumericPinyin must contain no marks", allMarks.containsSome(nfded));
@ -1231,10 +1231,10 @@ void TransliteratorRoundTripTest::Testel() {
void TransliteratorRoundTripTest::TestArabic() {
UnicodeString ARABIC("[\\u060C\\u061B\\u061F\\u0621\\u0627-\\u063A\\u0641-\\u0655\\u0660-\\u066C\\u067E\\u0686\\u0698\\u06A4\\u06AD\\u06AF\\u06CB-\\u06CC\\u06F0-\\u06F9]");
UnicodeString ARABIC("[\\u060C\\u061B\\u061F\\u0621\\u0627-\\u063A\\u0641-\\u0655\\u0660-\\u066C\\u067E\\u0686\\u0698\\u06A4\\u06AD\\u06AF\\u06CB-\\u06CC\\u06F0-\\u06F9]", -1, US_INV);
Legal *legal = new Legal();
RTTest test("Latin-Arabic");
test.test("[a-zA-Z\\u02BE\\u02BF\\u207F]", ARABIC, "[a-zA-Z\\u02BE\\u02BF\\u207F]",this, quick, legal); //
test.test(UNICODE_STRING_SIMPLE("[a-zA-Z\\u02BE\\u02BF\\u207F]"), ARABIC, "[a-zA-Z\\u02BE\\u02BF\\u207F]",this, quick, legal); //
delete legal;
}
class LegalHebrew : public Legal {
@ -1249,8 +1249,8 @@ public:
};
LegalHebrew::LegalHebrew(UErrorCode& error){
FINAL.applyPattern("[\\u05DA\\u05DD\\u05DF\\u05E3\\u05E5]", error);
NON_FINAL.applyPattern("[\\u05DB\\u05DE\\u05E0\\u05E4\\u05E6]", error);
FINAL.applyPattern(UNICODE_STRING_SIMPLE("[\\u05DA\\u05DD\\u05DF\\u05E3\\u05E5]"), error);
NON_FINAL.applyPattern(UNICODE_STRING_SIMPLE("[\\u05DB\\u05DE\\u05E0\\u05E4\\u05E6]"), error);
LETTER.applyPattern("[:letter:]", error);
}
UBool LegalHebrew::is(const UnicodeString& sourceString)const{
@ -1285,7 +1285,7 @@ void TransliteratorRoundTripTest::TestHebrew() {
return;
}
RTTest test("Latin-Hebrew");
test.test("[a-zA-Z\\u02BC\\u02BB]", "[[[:hebrew:]-[\\u05BD\\uFB00-\\uFBFF]]&[:Age=4.0:]]", "[\\u05F0\\u05F1\\u05F2]", this, quick, legal);
test.test(UNICODE_STRING_SIMPLE("[a-zA-Z\\u02BC\\u02BB]"), UNICODE_STRING_SIMPLE("[[[:hebrew:]-[\\u05BD\\uFB00-\\uFBFF]]&[:Age=4.0:]]"), "[\\u05F0\\u05F1\\u05F2]", this, quick, legal);
//showElapsed(start, "TestHebrew");
delete legal;

View File

@ -1269,7 +1269,7 @@ void TransliteratorTest::TestNameMap(void) {
// Careful: CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
expect(*uni2name, CharsToUnicodeString("\\u00A0abc\\u4E01\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF"),
CharsToUnicodeString("\\\\N{NO-BREAK SPACE}abc\\\\N{CJK UNIFIED IDEOGRAPH-4E01}\\\\N{MICRO SIGN}\\\\N{GUJARATI SIGN CANDRABINDU}\\\\N{REPLACEMENT CHARACTER}\\\\N{END OF TRANSMISSION}\\\\N{CHARACTER TABULATION}\\\\N{<control-0081>}\\\\N{<noncharacter-FFFF>}"));
expect(*name2uni, "{\\N { NO-BREAK SPACE}abc\\N{ CJK UNIFIED IDEOGRAPH-4E01 }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{END OF TRANSMISSION}\\N{CHARACTER TABULATION}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{",
expect(*name2uni, UNICODE_STRING_SIMPLE("{\\N { NO-BREAK SPACE}abc\\N{ CJK UNIFIED IDEOGRAPH-4E01 }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{END OF TRANSMISSION}\\N{CHARACTER TABULATION}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{"),
CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{"));
delete uni2name;
@ -1516,7 +1516,7 @@ void TransliteratorTest::TestCompoundRBT(void) {
errln("FAIL: createFromRules failed");
return;
}
expect(*t, "\\u0043at in the hat, bat on the mat",
expect(*t, UNICODE_STRING_SIMPLE("\\u0043at in the hat, bat on the mat"),
"C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t");
UnicodeString r;
t->toRules(r, TRUE);
@ -1728,7 +1728,7 @@ void TransliteratorTest::TestToRules(void) {
UParseError parseError;
UErrorCode status = U_ZERO_ERROR;
Transliterator *t = Transliterator::createFromRules("ID",
DATA[d+1], UTRANS_FORWARD, parseError, status);
UNICODE_STRING_SIMPLE(DATA[d+1]), UTRANS_FORWARD, parseError, status);
if (t == 0) {
errln("FAIL: createFromRules failed");
return;
@ -1737,19 +1737,19 @@ void TransliteratorTest::TestToRules(void) {
t->toRules(rules, FALSE);
t->toRules(escapedRules, TRUE);
UnicodeString expRules = CharsToUnicodeString(DATA[d+2]);
UnicodeString expEscapedRules(DATA[d+2]);
UnicodeString expEscapedRules(DATA[d+2], -1, US_INV);
if (rules == expRules) {
logln((UnicodeString)"Ok: " + DATA[d+1] +
logln((UnicodeString)"Ok: " + UNICODE_STRING_SIMPLE(DATA[d+1]) +
" => " + rules);
} else {
errln((UnicodeString)"FAIL: " + DATA[d+1] +
errln((UnicodeString)"FAIL: " + UNICODE_STRING_SIMPLE(DATA[d+1]) +
" => " + rules + ", exp " + expRules);
}
if (escapedRules == expEscapedRules) {
logln((UnicodeString)"Ok: " + DATA[d+1] +
logln((UnicodeString)"Ok: " + UNICODE_STRING_SIMPLE(DATA[d+1]) +
" => " + escapedRules);
} else {
errln((UnicodeString)"FAIL: " + DATA[d+1] +
errln((UnicodeString)"FAIL: " + UNICODE_STRING_SIMPLE(DATA[d+1]) +
" => " + escapedRules + ", exp " + expEscapedRules);
}
delete t;
@ -1757,8 +1757,8 @@ void TransliteratorTest::TestToRules(void) {
} else {
// UnicodeSet test
UErrorCode status = U_ZERO_ERROR;
UnicodeString pat(DATA[d+1]);
UnicodeString expToPat(DATA[d+2]);
UnicodeString pat(DATA[d+1], -1, US_INV);
UnicodeString expToPat(DATA[d+2], -1, US_INV);
UnicodeSet set(pat, status);
if (U_FAILURE(status)) {
errln("FAIL: UnicodeSet ct failed");
@ -1820,23 +1820,23 @@ void TransliteratorTest::TestSupplemental() {
expectT("Any-Name",
CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"),
"\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}");
UNICODE_STRING_SIMPLE("\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}"));
expectT("Any-Hex/Unicode",
CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
"U+10330U+10FF00U+E0061U+00A0");
UNICODE_STRING_SIMPLE("U+10330U+10FF00U+E0061U+00A0"));
expectT("Any-Hex/C",
CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
"\\U00010330\\U0010FF00\\U000E0061\\u00A0");
UNICODE_STRING_SIMPLE("\\U00010330\\U0010FF00\\U000E0061\\u00A0"));
expectT("Any-Hex/Perl",
CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
"\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}");
UNICODE_STRING_SIMPLE("\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}"));
expectT("Any-Hex/Java",
CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
"\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0");
UNICODE_STRING_SIMPLE("\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0"));
expectT("Any-Hex/XML",
CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
@ -1846,7 +1846,7 @@ void TransliteratorTest::TestSupplemental() {
CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
"&#66352;&#1113856;&#917601;&#160;");
expectT("[\\U000E0000-\\U000E0FFF] Remove",
expectT(UNICODE_STRING_SIMPLE("[\\U000E0000-\\U000E0FFF] Remove"),
CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
CharsToUnicodeString("\\U00010330\\U0010FF00\\u00A0"));
}
@ -2363,7 +2363,7 @@ void TransliteratorTest::TestCompoundFilterID(void) {
* Test new property set syntax
*/
void TransliteratorTest::TestPropertySet() {
expect("a>A; \\p{Lu}>x; \\p{ANY}>y;", "abcDEF", "Ayyxxx");
expect(UNICODE_STRING_SIMPLE("a>A; \\p{Lu}>x; \\p{ANY}>y;"), "abcDEF", "Ayyxxx");
expect("(.+)>'[' $1 ']';", " a stitch \n in time \r saves 9",
"[ a stitch ]\n[ in time ]\r[ saves 9]");
}
@ -2838,8 +2838,8 @@ void TransliteratorTest::TestGurmukhiDevanagari(){
// (\u0902) (when preceded by vowel) ---> (\u0A02)
// (\u0902) (when preceded by consonant) ---> (\u0A70)
UErrorCode status = U_ZERO_ERROR;
UnicodeSet vowel(UnicodeString("[\\u0905-\\u090A \\u090F\\u0910\\u0913\\u0914 \\u093e-\\u0942\\u0947\\u0948\\u094B\\u094C\\u094D]").unescape(), status);
UnicodeSet non_vowel(UnicodeString("[\\u0915-\\u0928\\u092A-\\u0930]").unescape(), status);
UnicodeSet vowel(UnicodeString("[\\u0905-\\u090A \\u090F\\u0910\\u0913\\u0914 \\u093e-\\u0942\\u0947\\u0948\\u094B\\u094C\\u094D]", -1, US_INV).unescape(), status);
UnicodeSet non_vowel(UnicodeString("[\\u0915-\\u0928\\u092A-\\u0930]", -1, US_INV).unescape(), status);
UParseError parseError;
UnicodeSetIterator vIter(vowel);
@ -2850,8 +2850,8 @@ void TransliteratorTest::TestGurmukhiDevanagari(){
delete trans;
return;
}
UnicodeString src (" \\u0902");
UnicodeString expected(" \\u0A02");
UnicodeString src (" \\u0902", -1, US_INV);
UnicodeString expected(" \\u0A02", -1, US_INV);
src = src.unescape();
expected= expected.unescape();
@ -3165,8 +3165,8 @@ void TransliteratorTest::TestToRulesMark() {
UParseError pe;
UErrorCode ec = U_ZERO_ERROR;
Transliterator *t2 = Transliterator::createFromRules("source-target", testRules, UTRANS_FORWARD, pe, ec);
Transliterator *t3 = Transliterator::createFromRules("target-source", testRules, UTRANS_REVERSE, pe, ec);
Transliterator *t2 = Transliterator::createFromRules("source-target", UNICODE_STRING_SIMPLE(testRules), UTRANS_FORWARD, pe, ec);
Transliterator *t3 = Transliterator::createFromRules("target-source", UNICODE_STRING_SIMPLE(testRules), UTRANS_REVERSE, pe, ec);
if (U_FAILURE(ec)) {
delete t2;
@ -3178,8 +3178,8 @@ void TransliteratorTest::TestToRulesMark() {
expect(*t2, source, target);
expect(*t3, target, source);
checkRules("Failed toRules FORWARD", *t2, testRulesForward);
checkRules("Failed toRules BACKWARD", *t3, testRulesBackward);
checkRules("Failed toRules FORWARD", *t2, UNICODE_STRING_SIMPLE(testRulesForward));
checkRules("Failed toRules BACKWARD", *t3, UNICODE_STRING_SIMPLE(testRulesBackward));
delete t2;
delete t3;
@ -3199,7 +3199,7 @@ void TransliteratorTest::TestEscape() {
errln((UnicodeString)"FAIL: createInstance");
} else {
expect(*t,
"\\x{40}\\U00000031&#x32;&#81;",
UNICODE_STRING_SIMPLE("\\x{40}\\U00000031&#x32;&#81;"),
"@12Q");
}
delete t;
@ -3211,7 +3211,7 @@ void TransliteratorTest::TestEscape() {
} else {
expect(*t,
CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
"\\u0041\\U0010BEEF\\uFEED");
UNICODE_STRING_SIMPLE("\\u0041\\U0010BEEF\\uFEED"));
}
delete t;
@ -3222,7 +3222,7 @@ void TransliteratorTest::TestEscape() {
} else {
expect(*t,
CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
"\\u0041\\uDBEF\\uDEEF\\uFEED");
UNICODE_STRING_SIMPLE("\\u0041\\uDBEF\\uDEEF\\uFEED"));
}
delete t;
@ -3233,7 +3233,7 @@ void TransliteratorTest::TestEscape() {
} else {
expect(*t,
CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
"\\x{41}\\x{10BEEF}\\x{FEED}");
UNICODE_STRING_SIMPLE("\\x{41}\\x{10BEEF}\\x{FEED}"));
}
delete t;
}
@ -3638,7 +3638,7 @@ void TransliteratorTest::TestFunction() {
}
expect(*t, "The Quick Brown Fox",
"T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox");
UNICODE_STRING_SIMPLE("T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox"));
delete t;
}
@ -3756,7 +3756,7 @@ void TransliteratorTest::TestUserFunction() {
// There's no need to register inverses if we don't use them
t = Transliterator::createFromRules("gif",
"'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';",
UNICODE_STRING_SIMPLE("'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';"),
UTRANS_FORWARD, pe, ec);
if (t == NULL || U_FAILURE(ec)) {
errln((UnicodeString)"FAIL: createFromRules gif " + u_errorName(ec));
@ -3765,13 +3765,13 @@ void TransliteratorTest::TestUserFunction() {
_TUFReg("Any-gif", t, 0);
t = Transliterator::createFromRules("RemoveCurly",
"[\\{\\}] > ; '\\N' > ;",
UNICODE_STRING_SIMPLE("[\\{\\}] > ; '\\N' > ;"),
UTRANS_FORWARD, pe, ec);
if (t == NULL || U_FAILURE(ec)) {
errln((UnicodeString)"FAIL: createFromRules RemoveCurly " + u_errorName(ec));
goto FAIL;
}
expect(*t, "\\N{name}", "name");
expect(*t, UNICODE_STRING_SIMPLE("\\N{name}"), "name");
_TUFReg("Any-RemoveCurly", t, 1);
logln("Trying &hex");
@ -3789,7 +3789,7 @@ void TransliteratorTest::TestUserFunction() {
errln((UnicodeString)"FAIL: createInstance Any-hex2 " + u_errorName(ec));
goto FAIL;
}
expect(*t, "abc", "\\u0061\\u0062\\u0063");
expect(*t, "abc", UNICODE_STRING_SIMPLE("\\u0061\\u0062\\u0063"));
delete t;
logln("Trying &gif");
@ -3820,7 +3820,7 @@ void TransliteratorTest::TestUserFunction() {
goto FAIL;
}
expect(*t, "abc",
"\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C ");
UNICODE_STRING_SIMPLE("\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C "));
delete t;
FAIL:
@ -4039,7 +4039,7 @@ void TransliteratorTest::TestAlternateSyntax() {
"xbz");
expect(CharsToUnicodeString("([:^ASCII:]) \\u2192 \\u2206Name($1);"),
CharsToUnicodeString("<=\\u2190; >=\\u2192; <>=\\u2194; &=\\u2206"),
"<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}");
UNICODE_STRING_SIMPLE("<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}"));
}
static const char* BEGIN_END_RULES[] = {
@ -4276,9 +4276,9 @@ void TransliteratorTest::TestBeginEnd() {
int32_t i = 0;
for (i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
expect((UnicodeString)"Test case #" + (i / 3),
UnicodeString(BEGIN_END_TEST_CASES[i]),
UnicodeString(BEGIN_END_TEST_CASES[i + 1]),
UnicodeString(BEGIN_END_TEST_CASES[i + 2]));
UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV),
UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV),
UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV));
}
// instantiate the one reversible rule set in the reverse direction and make sure it does the right thing
@ -4319,7 +4319,7 @@ void TransliteratorTest::TestBeginEndToRules() {
for (int32_t i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
UParseError parseError;
UErrorCode status = U_ZERO_ERROR;
Transliterator* t = Transliterator::createFromRules("--", UnicodeString(BEGIN_END_TEST_CASES[i]),
Transliterator* t = Transliterator::createFromRules("--", UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV),
UTRANS_FORWARD, parseError, status);
if (U_FAILURE(status)) {
reportParseError(UnicodeString("FAIL: Couldn't create transliterator"), parseError, status);
@ -4334,8 +4334,8 @@ void TransliteratorTest::TestBeginEndToRules() {
delete t;
} else {
expect(*t2,
UnicodeString(BEGIN_END_TEST_CASES[i + 1]),
UnicodeString(BEGIN_END_TEST_CASES[i + 2]));
UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV),
UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV));
delete t;
delete t2;
}

View File

@ -170,19 +170,19 @@ void UnicodeSetTest::TestToPattern() {
const char* exp2[] = {"aa", "ab", "ac", NOT, "xy", NULL};
expectToPattern(*s, "[a-z{aa}{ab}{ac}]", exp2);
s->applyPattern("[a-z {\\{l} {r\\}}]", ec);
s->applyPattern(UNICODE_STRING_SIMPLE("[a-z {\\{l} {r\\}}]"), ec);
if (U_FAILURE(ec)) break;
const char* exp3[] = {"{l", "r}", NOT, "xy", NULL};
expectToPattern(*s, "[a-z{r\\}}{\\{l}]", exp3);
expectToPattern(*s, UNICODE_STRING_SIMPLE("[a-z{r\\}}{\\{l}]"), exp3);
s->add("[]");
const char* exp4[] = {"{l", "r}", "[]", NOT, "xy", NULL};
expectToPattern(*s, "[a-z{\\[\\]}{r\\}}{\\{l}]", exp4);
expectToPattern(*s, UNICODE_STRING_SIMPLE("[a-z{\\[\\]}{r\\}}{\\{l}]"), exp4);
s->applyPattern("[a-z {\\u4E01\\u4E02}{\\n\\r}]", ec);
s->applyPattern(UNICODE_STRING_SIMPLE("[a-z {\\u4E01\\u4E02}{\\n\\r}]"), ec);
if (U_FAILURE(ec)) break;
const char* exp5[] = {"\\u4E01\\u4E02", "\n\r", NULL};
expectToPattern(*s, "[a-z{\\u000A\\u000D}{\\u4E01\\u4E02}]", exp5);
expectToPattern(*s, UNICODE_STRING_SIMPLE("[a-z{\\u000A\\u000D}{\\u4E01\\u4E02}]"), exp5);
// j2189
s->clear();
@ -318,14 +318,14 @@ UnicodeSetTest::TestCloneEqualHash(void) {
// set1 and set2 used to be built with the obsolete constructor taking
// UCharCategory values; replaced with pattern constructors
// markus 20030502
UnicodeSet *set1=new UnicodeSet("\\p{Lowercase Letter}", status); // :Ll: Letter, lowercase
UnicodeSet *set1a=new UnicodeSet("[:Ll:]", status); // Letter, lowercase
UnicodeSet *set1=new UnicodeSet(UNICODE_STRING_SIMPLE("\\p{Lowercase Letter}"), status); // :Ll: Letter, lowercase
UnicodeSet *set1a=new UnicodeSet(UNICODE_STRING_SIMPLE("[:Ll:]"), status); // Letter, lowercase
if (U_FAILURE(status)){
errln((UnicodeString)"FAIL: Can't construst set with category->Ll");
return;
}
UnicodeSet *set2=new UnicodeSet("\\p{Decimal Number}", status); //Number, Decimal digit
UnicodeSet *set2a=new UnicodeSet("[:Nd:]", status); //Number, Decimal digit
UnicodeSet *set2=new UnicodeSet(UNICODE_STRING_SIMPLE("\\p{Decimal Number}"), status); //Number, Decimal digit
UnicodeSet *set2a=new UnicodeSet(UNICODE_STRING_SIMPLE("[:Nd:]"), status); //Number, Decimal digit
if (U_FAILURE(status)){
errln((UnicodeString)"FAIL: Can't construct set with category->Nd");
return;
@ -705,7 +705,7 @@ void UnicodeSetTest::TestIteration() {
// 6 code points, 3 ranges, 2 strings, 8 total elements
// Iteration will access them in sorted order - a, b, c, y, z, U0001abcd, "str1", "str2"
UnicodeSet set("[zabyc\\U0001abcd{str1}{str2}]", ec);
UnicodeSet set(UNICODE_STRING_SIMPLE("[zabyc\\U0001abcd{str1}{str2}]"), ec);
TEST_ASSERT_SUCCESS(ec);
UnicodeSetIterator it(set);
@ -822,12 +822,12 @@ void UnicodeSetTest::TestStrings() {
* Test the [:Latin:] syntax.
*/
void UnicodeSetTest::TestScriptSet() {
expectContainment("[:Latin:]", "aA", CharsToUnicodeString("\\u0391\\u03B1"));
expectContainment(UNICODE_STRING_SIMPLE("[:Latin:]"), "aA", CharsToUnicodeString("\\u0391\\u03B1"));
expectContainment("[:Greek:]", CharsToUnicodeString("\\u0391\\u03B1"), "aA");
expectContainment(UNICODE_STRING_SIMPLE("[:Greek:]"), CharsToUnicodeString("\\u0391\\u03B1"), "aA");
/* Jitterbug 1423 */
expectContainment("[[:Common:][:Inherited:]]", CharsToUnicodeString("\\U00003099\\U0001D169\\u0000"), "aA");
expectContainment(UNICODE_STRING_SIMPLE("[[:Common:][:Inherited:]]"), CharsToUnicodeString("\\U00003099\\U0001D169\\u0000"), "aA");
}
@ -1002,7 +1002,7 @@ void UnicodeSetTest::TestPropertySet() {
static const int32_t DATA_LEN = sizeof(DATA)/sizeof(DATA[0]);
for (int32_t i=0; i<DATA_LEN; i+=3) {
expectContainment(DATA[i], CharsToUnicodeString(DATA[i+1]),
expectContainment(UNICODE_STRING_SIMPLE(DATA[i]), CharsToUnicodeString(DATA[i+1]),
CharsToUnicodeString(DATA[i+2]));
}
}
@ -1015,56 +1015,56 @@ void UnicodeSetTest::TestPosixClasses() {
{
UErrorCode status = U_ZERO_ERROR;
UnicodeSet s1("[:alpha:]", status);
UnicodeSet s2("\\p{Alphabetic}", status);
UnicodeSet s2(UNICODE_STRING_SIMPLE("\\p{Alphabetic}"), status);
TEST_ASSERT_SUCCESS(status);
TEST_ASSERT(s1==s2);
}
{
UErrorCode status = U_ZERO_ERROR;
UnicodeSet s1("[:lower:]", status);
UnicodeSet s2("\\p{lowercase}", status);
UnicodeSet s2(UNICODE_STRING_SIMPLE("\\p{lowercase}"), status);
TEST_ASSERT_SUCCESS(status);
TEST_ASSERT(s1==s2);
}
{
UErrorCode status = U_ZERO_ERROR;
UnicodeSet s1("[:upper:]", status);
UnicodeSet s2("\\p{Uppercase}", status);
UnicodeSet s2(UNICODE_STRING_SIMPLE("\\p{Uppercase}"), status);
TEST_ASSERT_SUCCESS(status);
TEST_ASSERT(s1==s2);
}
{
UErrorCode status = U_ZERO_ERROR;
UnicodeSet s1("[:punct:]", status);
UnicodeSet s2("\\p{gc=Punctuation}", status);
UnicodeSet s2(UNICODE_STRING_SIMPLE("\\p{gc=Punctuation}"), status);
TEST_ASSERT_SUCCESS(status);
TEST_ASSERT(s1==s2);
}
{
UErrorCode status = U_ZERO_ERROR;
UnicodeSet s1("[:digit:]", status);
UnicodeSet s2("\\p{gc=DecimalNumber}", status);
UnicodeSet s2(UNICODE_STRING_SIMPLE("\\p{gc=DecimalNumber}"), status);
TEST_ASSERT_SUCCESS(status);
TEST_ASSERT(s1==s2);
}
{
UErrorCode status = U_ZERO_ERROR;
UnicodeSet s1("[:xdigit:]", status);
UnicodeSet s2("[\\p{DecimalNumber}\\p{HexDigit}]", status);
UnicodeSet s2(UNICODE_STRING_SIMPLE("[\\p{DecimalNumber}\\p{HexDigit}]"), status);
TEST_ASSERT_SUCCESS(status);
TEST_ASSERT(s1==s2);
}
{
UErrorCode status = U_ZERO_ERROR;
UnicodeSet s1("[:alnum:]", status);
UnicodeSet s2("[\\p{Alphabetic}\\p{DecimalNumber}]", status);
UnicodeSet s2(UNICODE_STRING_SIMPLE("[\\p{Alphabetic}\\p{DecimalNumber}]"), status);
TEST_ASSERT_SUCCESS(status);
TEST_ASSERT(s1==s2);
}
{
UErrorCode status = U_ZERO_ERROR;
UnicodeSet s1("[:space:]", status);
UnicodeSet s2("\\p{Whitespace}", status);
UnicodeSet s2(UNICODE_STRING_SIMPLE("\\p{Whitespace}"), status);
TEST_ASSERT_SUCCESS(status);
TEST_ASSERT(s1==s2);
}
@ -1072,7 +1072,7 @@ void UnicodeSetTest::TestPosixClasses() {
UErrorCode status = U_ZERO_ERROR;
UnicodeSet s1("[:blank:]", status);
TEST_ASSERT_SUCCESS(status);
UnicodeSet s2("[\\p{Whitespace}-[\\u000a\\u000B\\u000c\\u000d\\u0085\\p{LineSeparator}\\p{ParagraphSeparator}]]",
UnicodeSet s2(UNICODE_STRING_SIMPLE("[\\p{Whitespace}-[\\u000a\\u000B\\u000c\\u000d\\u0085\\p{LineSeparator}\\p{ParagraphSeparator}]]"),
status);
TEST_ASSERT_SUCCESS(status);
TEST_ASSERT(s1==s2);
@ -1081,7 +1081,7 @@ void UnicodeSetTest::TestPosixClasses() {
UErrorCode status = U_ZERO_ERROR;
UnicodeSet s1("[:cntrl:]", status);
TEST_ASSERT_SUCCESS(status);
UnicodeSet s2("\\p{Control}", status);
UnicodeSet s2(UNICODE_STRING_SIMPLE("\\p{Control}"), status);
TEST_ASSERT_SUCCESS(status);
TEST_ASSERT(s1==s2);
}
@ -1089,7 +1089,7 @@ void UnicodeSetTest::TestPosixClasses() {
UErrorCode status = U_ZERO_ERROR;
UnicodeSet s1("[:graph:]", status);
TEST_ASSERT_SUCCESS(status);
UnicodeSet s2("[^\\p{Whitespace}\\p{Control}\\p{Surrogate}\\p{Unassigned}]", status);
UnicodeSet s2(UNICODE_STRING_SIMPLE("[^\\p{Whitespace}\\p{Control}\\p{Surrogate}\\p{Unassigned}]"), status);
TEST_ASSERT_SUCCESS(status);
TEST_ASSERT(s1==s2);
}
@ -1097,7 +1097,7 @@ void UnicodeSetTest::TestPosixClasses() {
UErrorCode status = U_ZERO_ERROR;
UnicodeSet s1("[:print:]", status);
TEST_ASSERT_SUCCESS(status);
UnicodeSet s2("[[:graph:][:blank:]-[\\p{Control}]]" ,status);
UnicodeSet s2(UNICODE_STRING_SIMPLE("[[:graph:][:blank:]-[\\p{Control}]]") ,status);
TEST_ASSERT_SUCCESS(status);
TEST_ASSERT(s1==s2);
}
@ -1222,8 +1222,8 @@ void UnicodeSetTest::TestCloseOver() {
UnicodeString buf;
for (int32_t i=0; DATA[i]!=NULL; i+=3) {
int32_t selector = DATA[i][0];
UnicodeString pat(DATA[i+1]);
UnicodeString exp(DATA[i+2]);
UnicodeString pat(DATA[i+1], -1, US_INV);
UnicodeString exp(DATA[i+2], -1, US_INV);
s.applyPattern(pat, ec);
s.closeOver(selector);
t.applyPattern(exp, ec);
@ -1331,7 +1331,7 @@ void UnicodeSetTest::TestEscapePattern() {
// this fails -- which is what we expect.
for (int32_t pass=1; pass<=2; ++pass) {
UErrorCode ec = U_ZERO_ERROR;
UnicodeString pat(pattern);
UnicodeString pat(pattern, -1, US_INV);
if (pass==2) {
pat = pat.unescape();
}
@ -1354,7 +1354,7 @@ void UnicodeSetTest::TestEscapePattern() {
UnicodeString newpat;
set.toPattern(newpat, TRUE);
if (newpat == exp) {
if (newpat == UNICODE_STRING_SIMPLE(exp)) {
logln(escape(pat) + " => " + newpat);
} else {
errln((UnicodeString)"FAIL: " + escape(pat) + " => " + newpat);
@ -1587,7 +1587,7 @@ void UnicodeSetTest::TestSymbolTable() {
// Set up variables
while (DATA[i+2] != NULL) {
sym.add(DATA[i], DATA[i+1], ec);
sym.add(UNICODE_STRING_SIMPLE(DATA[i]), UNICODE_STRING_SIMPLE(DATA[i+1]), ec);
if (U_FAILURE(ec)) {
errln("FAIL: couldn't add to TokenSymbolTable");
continue;
@ -1596,7 +1596,7 @@ void UnicodeSetTest::TestSymbolTable() {
}
// Input pattern and expected output pattern
UnicodeString inpat = DATA[i], exppat = DATA[i+1];
UnicodeString inpat = UNICODE_STRING_SIMPLE(DATA[i]), exppat = UNICODE_STRING_SIMPLE(DATA[i+1]);
i += 2;
ParsePosition pos(0);
@ -1640,8 +1640,8 @@ void UnicodeSetTest::TestSurrogate() {
};
for (int i=0; DATA[i] != 0; ++i) {
UErrorCode ec = U_ZERO_ERROR;
logln((UnicodeString)"Test pattern " + i + " :" + DATA[i]);
UnicodeSet set(DATA[i], ec);
logln((UnicodeString)"Test pattern " + i + " :" + UNICODE_STRING_SIMPLE(DATA[i]));
UnicodeSet set(UNICODE_STRING_SIMPLE(DATA[i]), ec);
if (U_FAILURE(ec)) {
errln("FAIL: UnicodeSet constructor");
continue;
@ -1650,7 +1650,7 @@ void UnicodeSetTest::TestSurrogate() {
CharsToUnicodeString("abc\\U00010000"),
CharsToUnicodeString("\\uD800;\\uDC00")); // split apart surrogate-pair
if (set.size() != 4) {
errln((UnicodeString)"FAIL: " + DATA[i] + ".size() == " +
errln((UnicodeString)"FAIL: " + UNICODE_STRING_SIMPLE(DATA[i]) + ".size() == " +
set.size() + ", expected 4");
}
}

View File

@ -1,6 +1,6 @@
/********************************************************************
* COPYRIGHT:
* Copyright (c) 2005-2007, International Business Machines Corporation and
* Copyright (c) 2005-2008, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
/************************************************************************
@ -1044,7 +1044,7 @@ void UTextTest::ErrorTest()
{ // Similar test, with utf16 instead of utf8
// TODO: merge the common parts of these tests.
UnicodeString u16str("\\u1000\\U00011000\\u2000\\U00022000");
UnicodeString u16str("\\u1000\\U00011000\\u2000\\U00022000", -1, US_INV);
int32_t startMap[] ={ 0, 1, 1, 3, 4, 4, 6, 6};
int32_t nextMap[] = { 1, 3, 3, 4, 6, 6, 6, 6};
int32_t prevMap[] = { 0, 0, 0, 1, 3, 3, 4, 4};
@ -1112,7 +1112,7 @@ void UTextTest::ErrorTest()
{ // Similar test, with UText over Replaceable
// TODO: merge the common parts of these tests.
UnicodeString u16str("\\u1000\\U00011000\\u2000\\U00022000");
UnicodeString u16str("\\u1000\\U00011000\\u2000\\U00022000", -1, US_INV);
int32_t startMap[] ={ 0, 1, 1, 3, 4, 4, 6, 6};
int32_t nextMap[] = { 1, 3, 3, 4, 6, 6, 6, 6};
int32_t prevMap[] = { 0, 0, 0, 1, 3, 3, 4, 4};

View File

@ -61,16 +61,16 @@ UXMLParser::UXMLParser(UErrorCode &status) :
// example: "<?xml version=1.0 encoding="utf-16" ?>
// This is a sloppy implementation - just look for the leading <?xml and the closing ?>
// allow for a possible leading BOM.
mXMLDecl(UnicodeString("(?s)\\uFEFF?<\\?xml.+?\\?>"), 0, status),
mXMLDecl(UnicodeString("(?s)\\uFEFF?<\\?xml.+?\\?>", -1, US_INV), 0, status),
// XML Comment production #15
// example: "<!-- whatever -->
// note, does not detect an illegal "--" within comments
mXMLComment(UnicodeString("(?s)<!--.+?-->"), 0, status),
mXMLComment(UnicodeString("(?s)<!--.+?-->", -1, US_INV), 0, status),
// XML Spaces
// production [3]
mXMLSP(UnicodeString(XML_SPACES "+"), 0, status),
mXMLSP(UnicodeString(XML_SPACES "+", -1, US_INV), 0, status),
// XML Doctype decl production #28
// example "<!DOCTYPE foo SYSTEM "somewhere" >
@ -81,12 +81,12 @@ UXMLParser::UXMLParser(UErrorCode &status) :
// of closeing square brackets. These could appear in comments,
// or in parameter entity declarations, for example.
mXMLDoctype(UnicodeString(
"(?s)<!DOCTYPE.*?(>|\\[.*?\\].*?>)"
"(?s)<!DOCTYPE.*?(>|\\[.*?\\].*?>)", -1, US_INV
), 0, status),
// XML PI production #16
// example "<?target stuff?>
mXMLPI(UnicodeString("(?s)<\\?.+?\\?>"), 0, status),
mXMLPI(UnicodeString("(?s)<\\?.+?\\?>", -1, US_INV), 0, status),
// XML Element Start Productions #40, #41
// example <foo att1='abc' att2="d e f" >
@ -97,11 +97,11 @@ UXMLParser::UXMLParser(UErrorCode &status) :
XML_SPACES "+" XML_NAME XML_SPACES "*=" XML_SPACES "*" // match "ATTR_NAME = "
"(?:(?:\\\'[^<\\\']*?\\\')|(?:\\\"[^<\\\"]*?\\\"))" // match '"attribute value"'
")*" // * for zero or more attributes.
XML_SPACES "*?>"), 0, status), // match " >"
XML_SPACES "*?>", -1, US_INV), 0, status), // match " >"
// XML Element End production #42
// example </foo>
mXMLElemEnd (UnicodeString("</(" XML_NAME ")" XML_SPACES "*>"), 0, status),
mXMLElemEnd (UnicodeString("</(" XML_NAME ")" XML_SPACES "*>", -1, US_INV), 0, status),
// XML Element Empty production #44
// example <foo att1="abc" att2="d e f" />
@ -110,11 +110,11 @@ UXMLParser::UXMLParser(UErrorCode &status) :
XML_SPACES "+" XML_NAME XML_SPACES "*=" XML_SPACES "*" // match "ATTR_NAME = "
"(?:(?:\\\'[^<\\\']*?\\\')|(?:\\\"[^<\\\"]*?\\\"))" // match '"attribute value"'
")*" // * for zero or more attributes.
XML_SPACES "*?/>"), 0, status), // match " />"
XML_SPACES "*?/>", -1, US_INV), 0, status), // match " />"
// XMLCharData. Everything but '<'. Note that & will be dealt with later.
mXMLCharData(UnicodeString("(?s)[^<]*"), 0, status),
mXMLCharData(UnicodeString("(?s)[^<]*", -1, US_INV), 0, status),
// Attribute name = "value". XML Productions 10, 40/41
// Capture group 1 is name,
@ -126,14 +126,14 @@ UXMLParser::UXMLParser(UErrorCode &status) :
// Here, we match a single attribute, and make its name and
// attribute value available to the parser code.
mAttrValue(UnicodeString(XML_SPACES "+(" XML_NAME ")" XML_SPACES "*=" XML_SPACES "*"
"((?:\\\'[^<\\\']*?\\\')|(?:\\\"[^<\\\"]*?\\\"))"), 0, status),
"((?:\\\'[^<\\\']*?\\\')|(?:\\\"[^<\\\"]*?\\\"))", -1, US_INV), 0, status),
mAttrNormalizer(UnicodeString(XML_SPACES), 0, status),
mAttrNormalizer(UnicodeString(XML_SPACES, -1, US_INV), 0, status),
// Match any of the new-line sequences in content.
// All are changed to \u000a.
mNewLineNormalizer(UnicodeString("\\u000d\\u000a|\\u000d\\u0085|\\u000a|\\u000d|\\u0085|\\u2028"), 0, status),
mNewLineNormalizer(UnicodeString("\\u000d\\u000a|\\u000d\\u0085|\\u000a|\\u000d|\\u0085|\\u2028", -1, US_INV), 0, status),
// & char references
// We will figure out what we've got based on which capture group has content.