ICU-6337 Use invariant code page to create UnicodeStrings from string literals that contain "\\".
X-SVN-Rev: 24201
This commit is contained in:
parent
0ad3aff9b2
commit
a8678f38b6
@ -426,7 +426,7 @@ void CharsetDetectionTest::C1BytesTest()
|
||||
#if !UCONFIG_NO_LEGACY_CONVERSION
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UnicodeString sISO = "This is a small sample of some English text. Just enough to be sure that it detects correctly.";
|
||||
UnicodeString ssWindows = "This is another small sample of some English text. Just enough to be sure that it detects correctly. It also includes some \\u201CC1\\u201D bytes.";
|
||||
UnicodeString ssWindows("This is another small sample of some English text. Just enough to be sure that it detects correctly. It also includes some \\u201CC1\\u201D bytes.", -1, US_INV);
|
||||
UnicodeString sWindows = ssWindows.unescape();
|
||||
int32_t lISO = 0, lWindows = 0;
|
||||
char *bISO = extractBytes(sISO, "ISO-8859-1", lISO);
|
||||
|
@ -1047,7 +1047,7 @@ DateFormatTest::TestLocaleDateFormat() // Bug 495
|
||||
DateFormat::FULL, Locale::getFrench());
|
||||
DateFormat *dfUS = DateFormat::createDateTimeInstance(DateFormat::FULL,
|
||||
DateFormat::FULL, Locale::getUS());
|
||||
UnicodeString expectedFRENCH ( "lundi 15 septembre 1997 00:00:00 \\u00C9tats-Unis (Los Angeles)" );
|
||||
UnicodeString expectedFRENCH ( "lundi 15 septembre 1997 00:00:00 \\u00C9tats-Unis (Los Angeles)", -1, US_INV );
|
||||
expectedFRENCH = expectedFRENCH.unescape();
|
||||
//UnicodeString expectedUS ( "Monday, September 15, 1997 12:00:00 o'clock AM PDT" );
|
||||
UnicodeString expectedUS ( "Monday, September 15, 1997 12:00:00 AM PT" );
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2007, International Business Machines Corporation and *
|
||||
* Copyright (C) 1996-2008, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -1824,7 +1824,7 @@ IntlTestRBNF::doTest(RuleBasedNumberFormat* formatter, const char* const testDat
|
||||
errln(msg);
|
||||
break;
|
||||
} else {
|
||||
UnicodeString expectedString = UnicodeString(expectedWords).unescape();
|
||||
UnicodeString expectedString = UnicodeString(expectedWords, -1, US_INV).unescape();
|
||||
if (actualString != expectedString) {
|
||||
UnicodeString msg = "FAIL: check failed for ";
|
||||
decFmt.format(expectedNumber, msg, status);
|
||||
|
@ -1,6 +1,6 @@
|
||||
/********************************************************************
|
||||
* COPYRIGHT:
|
||||
* Copyright (c) 1997-2007, International Business Machines Corporation and
|
||||
* Copyright (c) 1997-2008, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
********************************************************************
|
||||
************************************************************************
|
||||
@ -26,7 +26,7 @@ JamoTest::JamoTest()
|
||||
UParseError parseError;
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
NAME_JAMO = Transliterator::createFromRules("Name-Jamo",
|
||||
JAMO_NAMES_RULES,
|
||||
UNICODE_STRING_SIMPLE(JAMO_NAMES_RULES),
|
||||
UTRANS_FORWARD, parseError, status);
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
@ -35,7 +35,7 @@ JamoTest::JamoTest()
|
||||
}
|
||||
status = U_ZERO_ERROR;
|
||||
JAMO_NAME = Transliterator::createFromRules("Jamo-Name",
|
||||
JAMO_NAMES_RULES,
|
||||
UNICODE_STRING_SIMPLE(JAMO_NAMES_RULES),
|
||||
UTRANS_REVERSE, parseError, status);
|
||||
if (U_FAILURE(status)) {
|
||||
delete JAMO_NAME;
|
||||
@ -400,7 +400,7 @@ JamoTest::TestRealText() {
|
||||
int32_t i;
|
||||
for (i=0; i < WHAT_IS_UNICODE_length; ++i) {
|
||||
++total;
|
||||
UnicodeString hangul = WHAT_IS_UNICODE[i];
|
||||
UnicodeString hangul = UNICODE_STRING_SIMPLE(WHAT_IS_UNICODE[i]);
|
||||
hangul = hangul.unescape(); // Parse backslash-u escapes
|
||||
UnicodeString hangulX = hangul;
|
||||
rt.transliterate(hangulX);
|
||||
|
@ -696,7 +696,7 @@ NumberFormatTest::TestCurrency(void)
|
||||
for(int i=0; i < (int)(sizeof(testCases)/sizeof(testCases[i])); i++){
|
||||
status = U_ZERO_ERROR;
|
||||
const char *localeID = testCases[i][0];
|
||||
UnicodeString expected(testCases[i][1]);
|
||||
UnicodeString expected(testCases[i][1], -1, US_INV);
|
||||
expected = expected.unescape();
|
||||
s.truncate(0);
|
||||
char loc[256]={0};
|
||||
@ -1358,7 +1358,7 @@ void NumberFormatTest::TestSurrogateSupport(void) {
|
||||
custom.setSymbol(DecimalFormatSymbols::kZeroDigitSymbol, (UChar)0x30);
|
||||
custom.setSymbol(DecimalFormatSymbols::kCurrencySymbol, "units of money");
|
||||
custom.setSymbol(DecimalFormatSymbols::kMonetarySeparatorSymbol, "money separator");
|
||||
patternStr = "0.00 \\u00A4' in your bank account'";
|
||||
patternStr = UNICODE_STRING_SIMPLE("0.00 \\u00A4' in your bank account'");
|
||||
patternStr = patternStr.unescape();
|
||||
expStr = UnicodeString(" minus 20money separator00 units of money in your bank account", "");
|
||||
status = U_ZERO_ERROR;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/***********************************************************************
|
||||
* Copyright (c) 1997-2007, International Business Machines Corporation
|
||||
* Copyright (c) 1997-2008, International Business Machines Corporation
|
||||
* and others. All Rights Reserved.
|
||||
***********************************************************************/
|
||||
|
||||
@ -1085,7 +1085,7 @@ void NumberFormatRegressionTest::Test4071859 (void)
|
||||
String expectedPercent = "-578.998%";
|
||||
*/
|
||||
UnicodeString expectedDefault("-5.789,988");
|
||||
UnicodeString expectedCurrency("-\\u20A4 5.790");
|
||||
UnicodeString expectedCurrency("-\\u20A4 5.790", -1, US_INV);
|
||||
UnicodeString expectedPercent("-578.999%");
|
||||
expectedCurrency = expectedCurrency.unescape();
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/********************************************************************
|
||||
* Copyright (c) 1999-2007, International Business Machines
|
||||
* Copyright (c) 1999-2008, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
********************************************************************
|
||||
* Date Name Description
|
||||
@ -744,12 +744,12 @@ void RBBIAPITest::TestRuleStatus() {
|
||||
// Test the vector form of break rule status.
|
||||
//
|
||||
void RBBIAPITest::TestRuleStatusVec() {
|
||||
UnicodeString rulesString = "[A-N]{100}; \n"
|
||||
UnicodeString rulesString( "[A-N]{100}; \n"
|
||||
"[a-w]{200}; \n"
|
||||
"[\\p{L}]{300}; \n"
|
||||
"[\\p{N}]{400}; \n"
|
||||
"[0-5]{500}; \n"
|
||||
"!.*;\n";
|
||||
"!.*;\n", -1, US_INV);
|
||||
UnicodeString testString1 = "Aapz5?";
|
||||
int32_t statusVals[10];
|
||||
int32_t numStatuses;
|
||||
|
@ -330,13 +330,13 @@ static const int T_IDEO = 400;
|
||||
//
|
||||
//-----------------------------------------------------------------------------------
|
||||
void RBBITest::TestStatusReturn() {
|
||||
UnicodeString rulesString1 = "$Letters = [:L:];\n"
|
||||
UnicodeString rulesString1("$Letters = [:L:];\n"
|
||||
"$Numbers = [:N:];\n"
|
||||
"$Letters+{1};\n"
|
||||
"$Numbers+{2};\n"
|
||||
"Help\\ {4}/me\\!;\n"
|
||||
"[^$Letters $Numbers];\n"
|
||||
"!.*;\n";
|
||||
"!.*;\n", -1, US_INV);
|
||||
UnicodeString testString1 = "abc123..abc Help me Help me!";
|
||||
// 01234567890123456789012345678
|
||||
int32_t bounds1[] = {0, 3, 6, 7, 8, 11, 12, 16, 17, 19, 20, 25, 27, 28, -1};
|
||||
@ -1189,7 +1189,7 @@ void RBBITest::TestBug5775() {
|
||||
return;
|
||||
}
|
||||
|
||||
UnicodeString s("One.\\u00ad Two.");
|
||||
UnicodeString s("One.\\u00ad Two.", -1, US_INV);
|
||||
// 01234 56789
|
||||
s = s.unescape();
|
||||
bi->setText(s);
|
||||
@ -1422,7 +1422,7 @@ void RBBITest::TestExtended() {
|
||||
tp.srcLine = new UVector32(status);
|
||||
tp.srcCol = new UVector32(status);
|
||||
|
||||
RegexMatcher localeMatcher("<locale *([\\p{L}\\p{Nd}_]*) *>", 0, status);
|
||||
RegexMatcher localeMatcher(UNICODE_STRING_SIMPLE("<locale *([\\p{L}\\p{Nd}_]*) *>"), 0, status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
|
||||
|
||||
@ -1597,7 +1597,7 @@ void RBBITest::TestExtended() {
|
||||
break;
|
||||
}
|
||||
|
||||
if (testString.compare(charIdx-1, 3, "\\N{") == 0) {
|
||||
if (testString.compare(charIdx-1, 3, UNICODE_STRING_SIMPLE("\\N{")) == 0) {
|
||||
// Named character, e.g. \N{COMBINING GRAVE ACCENT}
|
||||
// Get the code point from the name and insert it into the test data.
|
||||
// (Damn, no API takes names in Unicode !!!
|
||||
@ -2016,7 +2016,7 @@ void RBBITest::runUnicodeTestData(const char *fileName, RuleBasedBreakIterator *
|
||||
// Caputure Group # 1 2 3 4 5
|
||||
// Parses this item: divide x hex digits comment \n unrecognized \n
|
||||
//
|
||||
UnicodeString tokenExpr = "[ \t]*(?:(\\u00F7)|(\\u00D7)|([0-9a-fA-F]+)|((?:#.*?)?$.)|(.*?$.))";
|
||||
UnicodeString tokenExpr("[ \t]*(?:(\\u00F7)|(\\u00D7)|([0-9a-fA-F]+)|((?:#.*?)?$.)|(.*?$.))", -1, US_INV);
|
||||
RegexMatcher tokenMatcher(tokenExpr, testFileAsString, UREGEX_MULTILINE | UREGEX_DOTALL, status);
|
||||
UnicodeString testString;
|
||||
UVector32 breakPositions(status);
|
||||
@ -2245,23 +2245,23 @@ RBBICharMonkey::RBBICharMonkey() {
|
||||
|
||||
fText = NULL;
|
||||
|
||||
fCRLFSet = new UnicodeSet("[\\r\\n]", status);
|
||||
fControlSet = new UnicodeSet("[\\p{Grapheme_Cluster_Break = Control}]", status);
|
||||
fExtendSet = new UnicodeSet("[\\p{Grapheme_Cluster_Break = Extend}]", status);
|
||||
fPrependSet = new UnicodeSet("[\\p{Grapheme_Cluster_Break = Prepend}]", status);
|
||||
fSpacingSet = new UnicodeSet("[\\p{Grapheme_Cluster_Break = SpacingMark}]", status);
|
||||
fLSet = new UnicodeSet("[\\p{Grapheme_Cluster_Break = L}]", status);
|
||||
fVSet = new UnicodeSet("[\\p{Grapheme_Cluster_Break = V}]", status);
|
||||
fTSet = new UnicodeSet("[\\p{Grapheme_Cluster_Break = T}]", status);
|
||||
fLVSet = new UnicodeSet("[\\p{Grapheme_Cluster_Break = LV}]", status);
|
||||
fLVTSet = new UnicodeSet("[\\p{Grapheme_Cluster_Break = LVT}]", status);
|
||||
fCRLFSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\r\\n]"), status);
|
||||
fControlSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = Control}]"), status);
|
||||
fExtendSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = Extend}]"), status);
|
||||
fPrependSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = Prepend}]"), status);
|
||||
fSpacingSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = SpacingMark}]"), status);
|
||||
fLSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = L}]"), status);
|
||||
fVSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = V}]"), status);
|
||||
fTSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = T}]"), status);
|
||||
fLVSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = LV}]"), status);
|
||||
fLVTSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = LVT}]"), status);
|
||||
fHangulSet = new UnicodeSet();
|
||||
fHangulSet->addAll(*fLSet);
|
||||
fHangulSet->addAll(*fVSet);
|
||||
fHangulSet->addAll(*fTSet);
|
||||
fHangulSet->addAll(*fLVSet);
|
||||
fHangulSet->addAll(*fLVTSet);
|
||||
fAnySet = new UnicodeSet("[\\u0000-\\U0010ffff]", status);
|
||||
fAnySet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\u0000-\\U0010ffff]"), status);
|
||||
|
||||
fSets = new UVector(status);
|
||||
fSets->addElement(fCRLFSet, status);
|
||||
@ -2457,18 +2457,18 @@ RBBIWordMonkey::RBBIWordMonkey()
|
||||
|
||||
fSets = new UVector(status);
|
||||
|
||||
fCRSet = new UnicodeSet("[\\p{Word_Break = CR}]", status);
|
||||
fLFSet = new UnicodeSet("[\\p{Word_Break = LF}]", status);
|
||||
fNewlineSet = new UnicodeSet("[\\p{Word_Break = Newline}]", status);
|
||||
fALetterSet = new UnicodeSet("[\\p{Word_Break = ALetter}]", status);
|
||||
fKatakanaSet = new UnicodeSet("[\\p{Word_Break = Katakana}]", status);
|
||||
fMidNumLetSet = new UnicodeSet("[\\p{Word_Break = MidNumLet}]", status);
|
||||
fMidLetterSet = new UnicodeSet("[\\p{Word_Break = MidLetter}]", status);
|
||||
fMidNumSet = new UnicodeSet("[\\p{Word_Break = MidNum}]", status);
|
||||
fNumericSet = new UnicodeSet("[\\p{Word_Break = Numeric}]", status);
|
||||
fFormatSet = new UnicodeSet("[\\p{Word_Break = Format}]", status);
|
||||
fExtendNumLetSet = new UnicodeSet("[\\p{Word_Break = ExtendNumLet}]", status);
|
||||
fExtendSet = new UnicodeSet("[\\p{Word_Break = Extend}]", status);
|
||||
fCRSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = CR}]"), status);
|
||||
fLFSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = LF}]"), status);
|
||||
fNewlineSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = Newline}]"), status);
|
||||
fALetterSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = ALetter}]"), status);
|
||||
fKatakanaSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = Katakana}]"), status);
|
||||
fMidNumLetSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = MidNumLet}]"), status);
|
||||
fMidLetterSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = MidLetter}]"), status);
|
||||
fMidNumSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = MidNum}]"), status);
|
||||
fNumericSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = Numeric}]"), status);
|
||||
fFormatSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = Format}]"), status);
|
||||
fExtendNumLetSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = ExtendNumLet}]"), status);
|
||||
fExtendSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = Extend}]"), status);
|
||||
|
||||
fOtherSet = new UnicodeSet();
|
||||
if(U_FAILURE(status)) {
|
||||
@ -2489,7 +2489,7 @@ RBBIWordMonkey::RBBIWordMonkey()
|
||||
fOtherSet->removeAll(*fFormatSet);
|
||||
fOtherSet->removeAll(*fExtendSet);
|
||||
// Inhibit dictionary characters from being tested at all.
|
||||
fOtherSet->removeAll(UnicodeSet("[\\p{LineBreak = Complex_Context}]", status));
|
||||
fOtherSet->removeAll(UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{LineBreak = Complex_Context}]"), status));
|
||||
|
||||
fSets->addElement(fCRSet, status);
|
||||
fSets->addElement(fLFSet, status);
|
||||
@ -2736,18 +2736,18 @@ RBBISentMonkey::RBBISentMonkey()
|
||||
// Separator Set Note: Beginning with Unicode 5.1, CR and LF were removed from the separator
|
||||
// set and made into character classes of their own. For the monkey impl,
|
||||
// they remain in SEP, since Sep always appears with CR and LF in the rules.
|
||||
fSepSet = new UnicodeSet("[\\p{Sentence_Break = Sep} \\u000a \\u000d]", status);
|
||||
fFormatSet = new UnicodeSet("[\\p{Sentence_Break = Format}]", status);
|
||||
fSpSet = new UnicodeSet("[\\p{Sentence_Break = Sp}]", status);
|
||||
fLowerSet = new UnicodeSet("[\\p{Sentence_Break = Lower}]", status);
|
||||
fUpperSet = new UnicodeSet("[\\p{Sentence_Break = Upper}]", status);
|
||||
fOLetterSet = new UnicodeSet("[\\p{Sentence_Break = OLetter}]", status);
|
||||
fNumericSet = new UnicodeSet("[\\p{Sentence_Break = Numeric}]", status);
|
||||
fATermSet = new UnicodeSet("[\\p{Sentence_Break = ATerm}]", status);
|
||||
fSContinueSet = new UnicodeSet("[\\p{Sentence_Break = SContinue}]", status);
|
||||
fSTermSet = new UnicodeSet("[\\p{Sentence_Break = STerm}]", status);
|
||||
fCloseSet = new UnicodeSet("[\\p{Sentence_Break = Close}]", status);
|
||||
fExtendSet = new UnicodeSet("[\\p{Sentence_Break = Extend}]", status);
|
||||
fSepSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = Sep} \\u000a \\u000d]"), status);
|
||||
fFormatSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = Format}]"), status);
|
||||
fSpSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = Sp}]"), status);
|
||||
fLowerSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = Lower}]"), status);
|
||||
fUpperSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = Upper}]"), status);
|
||||
fOLetterSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = OLetter}]"), status);
|
||||
fNumericSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = Numeric}]"), status);
|
||||
fATermSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = ATerm}]"), status);
|
||||
fSContinueSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = SContinue}]"), status);
|
||||
fSTermSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = STerm}]"), status);
|
||||
fCloseSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = Close}]"), status);
|
||||
fExtendSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = Extend}]"), status);
|
||||
fOtherSet = new UnicodeSet();
|
||||
|
||||
if(U_FAILURE(status)) {
|
||||
@ -3082,42 +3082,42 @@ RBBILineMonkey::RBBILineMonkey()
|
||||
|
||||
fSets = new UVector(status);
|
||||
|
||||
fBK = new UnicodeSet("[\\p{Line_Break=BK}]", status);
|
||||
fCR = new UnicodeSet("[\\p{Line_break=CR}]", status);
|
||||
fLF = new UnicodeSet("[\\p{Line_break=LF}]", status);
|
||||
fCM = new UnicodeSet("[\\p{Line_break=CM}]", status);
|
||||
fNL = new UnicodeSet("[\\p{Line_break=NL}]", status);
|
||||
fWJ = new UnicodeSet("[\\p{Line_break=WJ}]", status);
|
||||
fZW = new UnicodeSet("[\\p{Line_break=ZW}]", status);
|
||||
fGL = new UnicodeSet("[\\p{Line_break=GL}]", status);
|
||||
fCB = new UnicodeSet("[\\p{Line_break=CB}]", status);
|
||||
fSP = new UnicodeSet("[\\p{Line_break=SP}]", status);
|
||||
fB2 = new UnicodeSet("[\\p{Line_break=B2}]", status);
|
||||
fBA = new UnicodeSet("[\\p{Line_break=BA}]", status);
|
||||
fBB = new UnicodeSet("[\\p{Line_break=BB}]", status);
|
||||
fHY = new UnicodeSet("[\\p{Line_break=HY}]", status);
|
||||
fH2 = new UnicodeSet("[\\p{Line_break=H2}]", status);
|
||||
fH3 = new UnicodeSet("[\\p{Line_break=H3}]", status);
|
||||
fCL = new UnicodeSet("[\\p{Line_break=CL}]", status);
|
||||
fEX = new UnicodeSet("[\\p{Line_break=EX}]", status);
|
||||
fIN = new UnicodeSet("[\\p{Line_break=IN}]", status);
|
||||
fJL = new UnicodeSet("[\\p{Line_break=JL}]", status);
|
||||
fJV = new UnicodeSet("[\\p{Line_break=JV}]", status);
|
||||
fJT = new UnicodeSet("[\\p{Line_break=JT}]", status);
|
||||
fNS = new UnicodeSet("[\\p{Line_break=NS}]", status);
|
||||
fOP = new UnicodeSet("[\\p{Line_break=OP}]", status);
|
||||
fQU = new UnicodeSet("[\\p{Line_break=QU}]", status);
|
||||
fIS = new UnicodeSet("[\\p{Line_break=IS}]", status);
|
||||
fNU = new UnicodeSet("[\\p{Line_break=NU}]", status);
|
||||
fPO = new UnicodeSet("[\\p{Line_break=PO}]", status);
|
||||
fPR = new UnicodeSet("[\\p{Line_break=PR}]", status);
|
||||
fSY = new UnicodeSet("[\\p{Line_break=SY}]", status);
|
||||
fAI = new UnicodeSet("[\\p{Line_break=AI}]", status);
|
||||
fAL = new UnicodeSet("[\\p{Line_break=AL}]", status);
|
||||
fID = new UnicodeSet("[\\p{Line_break=ID}]", status);
|
||||
fSA = new UnicodeSet("[\\p{Line_break=SA}]", status);
|
||||
fSG = new UnicodeSet("[\\ud800-\\udfff]", status);
|
||||
fXX = new UnicodeSet("[\\p{Line_break=XX}]", status);
|
||||
fBK = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_Break=BK}]"), status);
|
||||
fCR = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=CR}]"), status);
|
||||
fLF = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=LF}]"), status);
|
||||
fCM = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=CM}]"), status);
|
||||
fNL = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=NL}]"), status);
|
||||
fWJ = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=WJ}]"), status);
|
||||
fZW = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=ZW}]"), status);
|
||||
fGL = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=GL}]"), status);
|
||||
fCB = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=CB}]"), status);
|
||||
fSP = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=SP}]"), status);
|
||||
fB2 = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=B2}]"), status);
|
||||
fBA = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=BA}]"), status);
|
||||
fBB = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=BB}]"), status);
|
||||
fHY = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=HY}]"), status);
|
||||
fH2 = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=H2}]"), status);
|
||||
fH3 = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=H3}]"), status);
|
||||
fCL = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=CL}]"), status);
|
||||
fEX = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=EX}]"), status);
|
||||
fIN = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=IN}]"), status);
|
||||
fJL = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=JL}]"), status);
|
||||
fJV = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=JV}]"), status);
|
||||
fJT = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=JT}]"), status);
|
||||
fNS = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=NS}]"), status);
|
||||
fOP = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=OP}]"), status);
|
||||
fQU = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=QU}]"), status);
|
||||
fIS = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=IS}]"), status);
|
||||
fNU = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=NU}]"), status);
|
||||
fPO = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=PO}]"), status);
|
||||
fPR = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=PR}]"), status);
|
||||
fSY = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=SY}]"), status);
|
||||
fAI = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=AI}]"), status);
|
||||
fAL = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=AL}]"), status);
|
||||
fID = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=ID}]"), status);
|
||||
fSA = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=SA}]"), status);
|
||||
fSG = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\ud800-\\udfff]"), status);
|
||||
fXX = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=XX}]"), status);
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
deferredStatus = status;
|
||||
@ -3169,13 +3169,14 @@ RBBILineMonkey::RBBILineMonkey()
|
||||
fSets->addElement(fSG, status);
|
||||
|
||||
fNumberMatcher = new RegexMatcher(
|
||||
"((\\p{Line_Break=PR}|\\p{Line_Break=PO})\\p{Line_Break=CM}*)?"
|
||||
"((\\p{Line_Break=OP}|\\p{Line_Break=HY})\\p{Line_Break=CM}*)?"
|
||||
"\\p{Line_Break=NU}\\p{Line_Break=CM}*"
|
||||
"((\\p{Line_Break=NU}|\\p{Line_Break=IS}|\\p{Line_Break=SY})\\p{Line_Break=CM}*)*"
|
||||
"(\\p{Line_Break=CL}\\p{Line_Break=CM}*)?"
|
||||
"((\\p{Line_Break=PR}|\\p{Line_Break=PO})\\p{Line_Break=CM}*)?",
|
||||
0, status);
|
||||
UNICODE_STRING_SIMPLE(
|
||||
"((\\p{Line_Break=PR}|\\p{Line_Break=PO})\\p{Line_Break=CM}*)?"
|
||||
"((\\p{Line_Break=OP}|\\p{Line_Break=HY})\\p{Line_Break=CM}*)?"
|
||||
"\\p{Line_Break=NU}\\p{Line_Break=CM}*"
|
||||
"((\\p{Line_Break=NU}|\\p{Line_Break=IS}|\\p{Line_Break=SY})\\p{Line_Break=CM}*)*"
|
||||
"(\\p{Line_Break=CL}\\p{Line_Break=CM}*)?"
|
||||
"((\\p{Line_Break=PR}|\\p{Line_Break=PO})\\p{Line_Break=CM}*)?"
|
||||
), 0, status);
|
||||
|
||||
fCharBI = BreakIterator::createCharacterInstance(Locale::getEnglish(), status);
|
||||
|
||||
@ -3758,7 +3759,6 @@ void RBBITest::TestWordBreaks(void)
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
// BreakIterator *bi = BreakIterator::createCharacterInstance(locale, status);
|
||||
BreakIterator *bi = BreakIterator::createWordInstance(locale, status);
|
||||
UChar str[300];
|
||||
static const char *strlist[] =
|
||||
{
|
||||
"\\U000e0032\\u0097\\u0f94\\uc2d8\\u05f4\\U000e0031\\u060d",
|
||||
@ -3808,8 +3808,7 @@ void RBBITest::TestWordBreaks(void)
|
||||
}
|
||||
for (loop = 0; loop < (int)(sizeof(strlist) / sizeof(char *)); loop ++) {
|
||||
// printf("looping %d\n", loop);
|
||||
u_unescape(strlist[loop], str, 25);
|
||||
UnicodeString ustr(str);
|
||||
UnicodeString ustr = CharsToUnicodeString(strlist[loop]);
|
||||
// RBBICharMonkey monkey;
|
||||
RBBIWordMonkey monkey;
|
||||
|
||||
@ -4105,7 +4104,7 @@ void RBBITest::TestMonkey(char *params) {
|
||||
|
||||
|
||||
// m.reset(p);
|
||||
if (RegexMatcher("\\S", p, 0, status).find()) {
|
||||
if (RegexMatcher(UNICODE_STRING_SIMPLE("\\S"), p, 0, status).find()) {
|
||||
// Each option is stripped out of the option string as it is processed.
|
||||
// All options have been checked. The option string should have been completely emptied..
|
||||
char buf[100];
|
||||
|
@ -115,15 +115,15 @@ if (status!=errcode) {errln("RegexTest failure at line %d. Expected status=%s,
|
||||
#define REGEX_TESTLM(pat, text, looking, match) doRegexLMTest(pat, text, looking, match, __LINE__);
|
||||
|
||||
UBool RegexTest::doRegexLMTest(const char *pat, const char *text, UBool looking, UBool match, int32_t line) {
|
||||
const UnicodeString pattern(pat);
|
||||
const UnicodeString inputText(text);
|
||||
const UnicodeString pattern(pat, -1, US_INV);
|
||||
const UnicodeString inputText(text, -1, US_INV);
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UParseError pe;
|
||||
RegexPattern *REPattern = NULL;
|
||||
RegexMatcher *REMatcher = NULL;
|
||||
UBool retVal = TRUE;
|
||||
|
||||
UnicodeString patString(pat);
|
||||
UnicodeString patString(pat, -1, US_INV);
|
||||
REPattern = RegexPattern::compile(patString, 0, pe, status);
|
||||
if (U_FAILURE(status)) {
|
||||
errln("RegexTest failure in RegexPattern::compile() at line %d. Status = %s\n",
|
||||
@ -636,7 +636,7 @@ void RegexTest::API_Match() {
|
||||
UParseError pe;
|
||||
UErrorCode status=U_ZERO_ERROR;
|
||||
|
||||
UnicodeString re(".*?(?:(\\Gabc)|(abc))");
|
||||
UnicodeString re(".*?(?:(\\Gabc)|(abc))", -1, US_INV);
|
||||
RegexPattern *pat = RegexPattern::compile(re, flags, pe, status);
|
||||
REGEX_CHECK_STATUS;
|
||||
UnicodeString data = ".abcabc.abc..";
|
||||
@ -681,7 +681,7 @@ void RegexTest::API_Match() {
|
||||
REGEX_ASSERT(i==5);
|
||||
|
||||
// Check that the bump goes over surrogate pairs OK
|
||||
s = "\\U00010001\\U00010002\\U00010003\\U00010004";
|
||||
s = UNICODE_STRING_SIMPLE("\\U00010001\\U00010002\\U00010003\\U00010004");
|
||||
s = s.unescape();
|
||||
m.reset(s);
|
||||
for (i=0; ; i+=2) {
|
||||
@ -1027,7 +1027,7 @@ void RegexTest::API_Replace() {
|
||||
REGEX_CHECK_STATUS;
|
||||
REGEX_ASSERT(dest == "bcbcdefg");
|
||||
|
||||
dest = matcher2->replaceFirst("The value of \\$1 is $1.", status);
|
||||
dest = matcher2->replaceFirst(UNICODE_STRING_SIMPLE("The value of \\$1 is $1."), status);
|
||||
REGEX_CHECK_STATUS;
|
||||
REGEX_ASSERT(dest == "The value of $1 is bc.defg");
|
||||
|
||||
@ -1035,7 +1035,7 @@ void RegexTest::API_Replace() {
|
||||
REGEX_CHECK_STATUS;
|
||||
REGEX_ASSERT(dest == "$ by itself, no group number $$$defg");
|
||||
|
||||
UnicodeString replacement = "Supplemental Digit 1 $\\U0001D7CF.";
|
||||
UnicodeString replacement = UNICODE_STRING_SIMPLE("Supplemental Digit 1 $\\U0001D7CF.");
|
||||
replacement = replacement.unescape();
|
||||
dest = matcher2->replaceFirst(replacement, status);
|
||||
REGEX_CHECK_STATUS;
|
||||
@ -1049,7 +1049,7 @@ void RegexTest::API_Replace() {
|
||||
//
|
||||
{
|
||||
UnicodeString src = "abc 1 abc 2 abc 3";
|
||||
UnicodeString substitute = "--\\u0043--";
|
||||
UnicodeString substitute = UNICODE_STRING_SIMPLE("--\\u0043--");
|
||||
matcher->reset(src);
|
||||
UnicodeString result = matcher->replaceAll(substitute, status);
|
||||
REGEX_CHECK_STATUS;
|
||||
@ -1057,7 +1057,7 @@ void RegexTest::API_Replace() {
|
||||
}
|
||||
{
|
||||
UnicodeString src = "abc !";
|
||||
UnicodeString substitute = "--\\U00010000--";
|
||||
UnicodeString substitute = UNICODE_STRING_SIMPLE("--\\U00010000--");
|
||||
matcher->reset(src);
|
||||
UnicodeString result = matcher->replaceAll(substitute, status);
|
||||
REGEX_CHECK_STATUS;
|
||||
@ -1186,7 +1186,7 @@ void RegexTest::API_Pattern() {
|
||||
//
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
RegexPattern *pSource = RegexPattern::compile("\\p{L}+", 0, status);
|
||||
RegexPattern *pSource = RegexPattern::compile(UNICODE_STRING_SIMPLE("\\p{L}+"), 0, status);
|
||||
RegexPattern *pClone = pSource->clone();
|
||||
delete pSource;
|
||||
RegexMatcher *mFromClone = pClone->matcher(status);
|
||||
@ -1278,7 +1278,7 @@ void RegexTest::API_Pattern() {
|
||||
delete pat1;
|
||||
|
||||
// split, with a pattern with (capture)
|
||||
pat1 = RegexPattern::compile("<(\\w*)>", pe, status);
|
||||
pat1 = RegexPattern::compile(UNICODE_STRING_SIMPLE("<(\\w*)>"), pe, status);
|
||||
REGEX_CHECK_STATUS;
|
||||
|
||||
status = U_ZERO_ERROR;
|
||||
@ -1444,11 +1444,11 @@ void RegexTest::Extended() {
|
||||
//
|
||||
UnicodeString testString(FALSE, testData, len);
|
||||
|
||||
RegexMatcher quotedStuffMat("\\s*([\\'\\\"/])(.*?)\\1", 0, status);
|
||||
RegexMatcher commentMat ("\\s*(#.*)?$", 0, status);
|
||||
RegexMatcher flagsMat ("\\s*([ixsmdteDEGLMvabtyYzZ2-9]*)([:letter:]*)", 0, status);
|
||||
RegexMatcher quotedStuffMat(UNICODE_STRING_SIMPLE("\\s*([\\'\\\"/])(.*?)\\1"), 0, status);
|
||||
RegexMatcher commentMat (UNICODE_STRING_SIMPLE("\\s*(#.*)?$"), 0, status);
|
||||
RegexMatcher flagsMat (UNICODE_STRING_SIMPLE("\\s*([ixsmdteDEGLMvabtyYzZ2-9]*)([:letter:]*)"), 0, status);
|
||||
|
||||
RegexMatcher lineMat("(.*?)\\r?\\n", testString, 0, status);
|
||||
RegexMatcher lineMat(UNICODE_STRING_SIMPLE("(.*?)\\r?\\n"), testString, 0, status);
|
||||
UnicodeString testPattern; // The pattern for test from the test file.
|
||||
UnicodeString testFlags; // the flags for a test.
|
||||
UnicodeString matchString; // The marked up string to be used as input
|
||||
@ -2073,7 +2073,7 @@ void RegexTest::PerlTests() {
|
||||
// Regex to break the input file into lines, and strip the new lines.
|
||||
// One line per match, capture group one is the desired data.
|
||||
//
|
||||
RegexPattern* linePat = RegexPattern::compile("(.+?)[\\r\\n]+", 0, pe, status);
|
||||
RegexPattern* linePat = RegexPattern::compile(UNICODE_STRING_SIMPLE("(.+?)[\\r\\n]+"), 0, pe, status);
|
||||
if (U_FAILURE(status)) {
|
||||
dataerrln("RegexPattern::compile() error");
|
||||
return;
|
||||
@ -2084,7 +2084,7 @@ void RegexTest::PerlTests() {
|
||||
// Regex to split a test file line into fields.
|
||||
// There are six fields, separated by tabs.
|
||||
//
|
||||
RegexPattern* fieldPat = RegexPattern::compile("\\t", 0, pe, status);
|
||||
RegexPattern* fieldPat = RegexPattern::compile(UNICODE_STRING_SIMPLE("\\t"), 0, pe, status);
|
||||
|
||||
//
|
||||
// Regex to identify test patterns with flag settings, and to separate them.
|
||||
@ -2092,7 +2092,7 @@ void RegexTest::PerlTests() {
|
||||
// Test patterns without flags are not quoted: pattern
|
||||
// Coming out, capture group 2 is the pattern, capture group 3 is the flags.
|
||||
//
|
||||
RegexPattern *flagPat = RegexPattern::compile("('?)(.*)\\1(.*)", 0, pe, status);
|
||||
RegexPattern *flagPat = RegexPattern::compile(UNICODE_STRING_SIMPLE("('?)(.*)\\1(.*)"), 0, pe, status);
|
||||
RegexMatcher* flagMat = flagPat->matcher(status);
|
||||
|
||||
//
|
||||
@ -2101,19 +2101,19 @@ void RegexTest::PerlTests() {
|
||||
// are string constants and REs for these constructs.
|
||||
//
|
||||
UnicodeString nulnulSrc("${nulnul}");
|
||||
UnicodeString nulnul("\\u0000\\u0000");
|
||||
UnicodeString nulnul("\\u0000\\u0000", -1, US_INV);
|
||||
nulnul = nulnul.unescape();
|
||||
|
||||
UnicodeString ffffSrc("${ffff}");
|
||||
UnicodeString ffff("\\uffff");
|
||||
UnicodeString ffff("\\uffff", -1, US_INV);
|
||||
ffff = ffff.unescape();
|
||||
|
||||
// regexp for $-[0], $+[2], etc.
|
||||
RegexPattern *groupsPat = RegexPattern::compile("\\$([+\\-])\\[(\\d+)\\]", 0, pe, status);
|
||||
RegexPattern *groupsPat = RegexPattern::compile(UNICODE_STRING_SIMPLE("\\$([+\\-])\\[(\\d+)\\]"), 0, pe, status);
|
||||
RegexMatcher *groupsMat = groupsPat->matcher(status);
|
||||
|
||||
// regexp for $0, $1, $2, etc.
|
||||
RegexPattern *cgPat = RegexPattern::compile("\\$(\\d+)", 0, pe, status);
|
||||
RegexPattern *cgPat = RegexPattern::compile(UNICODE_STRING_SIMPLE("\\$(\\d+)"), 0, pe, status);
|
||||
RegexMatcher *cgMat = cgPat->matcher(status);
|
||||
|
||||
|
||||
@ -2138,7 +2138,7 @@ void RegexTest::PerlTests() {
|
||||
flagMat->matches(status);
|
||||
UnicodeString pattern = flagMat->group(2, status);
|
||||
pattern.findAndReplace("${bang}", "!");
|
||||
pattern.findAndReplace(nulnulSrc, "\\u0000\\u0000");
|
||||
pattern.findAndReplace(nulnulSrc, UNICODE_STRING_SIMPLE("\\u0000\\u0000"));
|
||||
pattern.findAndReplace(ffffSrc, ffff);
|
||||
|
||||
//
|
||||
@ -2218,7 +2218,7 @@ void RegexTest::PerlTests() {
|
||||
// Replace any \n in the match string with an actual new-line char.
|
||||
// Don't do full unescape, as this unescapes more than Perl does, which
|
||||
// causes other spurious failures in the tests.
|
||||
matchString.findAndReplace("\\n", "\n");
|
||||
matchString.findAndReplace(UNICODE_STRING_SIMPLE("\\n"), "\n");
|
||||
|
||||
|
||||
|
||||
@ -2315,7 +2315,7 @@ void RegexTest::PerlTests() {
|
||||
perlExpr.remove(0, 2);
|
||||
}
|
||||
|
||||
else if (perlExpr.startsWith("\\")) { // \Escape. Take following char as a literal.
|
||||
else if (perlExpr.startsWith(UNICODE_STRING_SIMPLE("\\"))) { // \Escape. Take following char as a literal.
|
||||
// or as an escaped sequence (e.g. \n)
|
||||
if (perlExpr.length() > 1) {
|
||||
perlExpr.remove(0, 1); // Remove the '\', but only if not last char.
|
||||
@ -2349,7 +2349,7 @@ void RegexTest::PerlTests() {
|
||||
UnicodeString expectedS(fields[4]);
|
||||
expectedS.findAndReplace(nulnulSrc, nulnul);
|
||||
expectedS.findAndReplace(ffffSrc, ffff);
|
||||
expectedS.findAndReplace("\\n", "\n");
|
||||
expectedS.findAndReplace(UNICODE_STRING_SIMPLE("\\n"), "\n");
|
||||
|
||||
|
||||
if (expectedS.compare(resultString) != 0) {
|
||||
@ -2437,7 +2437,7 @@ void RegexTest::Callbacks() {
|
||||
const void *returnedContext;
|
||||
URegexMatchCallback *returnedFn;
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
RegexMatcher matcher("((.)+\\2)+x", 0, status); // A pattern that can run long.
|
||||
RegexMatcher matcher(UNICODE_STRING_SIMPLE("((.)+\\2)+x"), 0, status); // A pattern that can run long.
|
||||
REGEX_CHECK_STATUS;
|
||||
matcher.setMatchCallback(testCallBackFn, &cbInfo, status);
|
||||
REGEX_CHECK_STATUS;
|
||||
|
@ -546,7 +546,7 @@ static char *printOrders(char *buffer, OrderList &list)
|
||||
|
||||
void SSearchTest::offsetTest()
|
||||
{
|
||||
UnicodeString test[] = {
|
||||
const char *test[] = {
|
||||
"\\ua191\\u16ef\\u2036\\u017a",
|
||||
|
||||
#if 0
|
||||
@ -610,7 +610,7 @@ void SSearchTest::offsetTest()
|
||||
col->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
|
||||
|
||||
for(int32_t i = 0; i < testCount; i += 1) {
|
||||
UnicodeString ts = test[i].unescape();
|
||||
UnicodeString ts = CharsToUnicodeString(test[i]);
|
||||
CollationElementIterator *iter = col->createCollationElementIterator(ts);
|
||||
OrderList forwardList;
|
||||
OrderList backwardList;
|
||||
@ -644,7 +644,7 @@ void SSearchTest::offsetTest()
|
||||
backwardList.reverse();
|
||||
|
||||
if (forwardList.compare(backwardList)) {
|
||||
logln("Works with \"%s\"", test[i].getTerminatedBuffer());
|
||||
logln("Works with \"%s\"", test[i]);
|
||||
logln("Forward offsets: [%s]", printOffsets(buffer, forwardList));
|
||||
// logln("Backward offsets: [%s]", printOffsets(buffer, backwardList));
|
||||
|
||||
@ -653,7 +653,7 @@ void SSearchTest::offsetTest()
|
||||
|
||||
logln();
|
||||
} else {
|
||||
errln("Fails with \"%S\"", test[i].getTerminatedBuffer());
|
||||
errln("Fails with \"%s\"", test[i]);
|
||||
infoln("Forward offsets: [%s]", printOffsets(buffer, forwardList));
|
||||
infoln("Backward offsets: [%s]", printOffsets(buffer, backwardList));
|
||||
|
||||
|
@ -1480,7 +1480,7 @@ void TestIDNA::TestIDNAMonkeyTest(){
|
||||
/* for debugging */
|
||||
for (i=0; i<(int)(sizeof(failures)/sizeof(failures[0])); i++){
|
||||
source.truncate(0);
|
||||
source.append( failures[i] );
|
||||
source.append( UNICODE_STRING_SIMPLE(failures[i]) );
|
||||
source = source.unescape();
|
||||
source.append((UChar)0x0000);
|
||||
const UChar *src = source.getBuffer();
|
||||
@ -1490,13 +1490,13 @@ void TestIDNA::TestIDNAMonkeyTest(){
|
||||
|
||||
|
||||
source.truncate(0);
|
||||
source.append("\\uCF18\\U00021161\\U000EEF11\\U0002BB82\\U0001D63C");
|
||||
source.append(UNICODE_STRING_SIMPLE("\\uCF18\\U00021161\\U000EEF11\\U0002BB82\\U0001D63C"));
|
||||
debug(source.getBuffer(),source.length(),UIDNA_ALLOW_UNASSIGNED);
|
||||
|
||||
{ // test deletion of code points
|
||||
UnicodeString source("\\u043f\\u00AD\\u034f\\u043e\\u0447\\u0435\\u043c\\u0443\\u0436\\u0435\\u043e\\u043d\\u0438\\u043d\\u0435\\u0433\\u043e\\u0432\\u043e\\u0440\\u044f\\u0442\\u043f\\u043e\\u0440\\u0443\\u0441\\u0441\\u043a\\u0438\\u0000");
|
||||
UnicodeString source("\\u043f\\u00AD\\u034f\\u043e\\u0447\\u0435\\u043c\\u0443\\u0436\\u0435\\u043e\\u043d\\u0438\\u043d\\u0435\\u0433\\u043e\\u0432\\u043e\\u0440\\u044f\\u0442\\u043f\\u043e\\u0440\\u0443\\u0441\\u0441\\u043a\\u0438\\u0000", -1, US_INV);
|
||||
source = source.unescape();
|
||||
UnicodeString expected("\\u043f\\u043e\\u0447\\u0435\\u043c\\u0443\\u0436\\u0435\\u043e\\u043d\\u0438\\u043d\\u0435\\u0433\\u043e\\u0432\\u043e\\u0440\\u044f\\u0442\\u043f\\u043e\\u0440\\u0443\\u0441\\u0441\\u043a\\u0438\\u0000");
|
||||
UnicodeString expected("\\u043f\\u043e\\u0447\\u0435\\u043c\\u0443\\u0436\\u0435\\u043e\\u043d\\u0438\\u043d\\u0435\\u0433\\u043e\\u0432\\u043e\\u0440\\u044f\\u0442\\u043f\\u043e\\u0440\\u0443\\u0441\\u0441\\u043a\\u0438\\u0000", -1, US_INV);
|
||||
expected = expected.unescape();
|
||||
UnicodeString ascii("xn--b1abfaaepdrnnbgefbadotcwatmq2g4l");
|
||||
ascii.append((UChar)0x0000);
|
||||
|
@ -1,6 +1,6 @@
|
||||
/************************************************************************
|
||||
* COPYRIGHT:
|
||||
* Copyright (c) 2000-2007, International Business Machines Corporation
|
||||
* Copyright (c) 2000-2008, International Business Machines Corporation
|
||||
* and others. All Rights Reserved.
|
||||
************************************************************************/
|
||||
/************************************************************************
|
||||
@ -274,7 +274,7 @@ void TransliteratorAPITest::TestTransliterate1(){
|
||||
"Latin-Devanagari",CharsToUnicodeString("bha\\u0304rata"), CharsToUnicodeString("\\u092D\\u093E\\u0930\\u0924") ,
|
||||
"Latin-Devanagari",UnicodeString("kra ksha khra gra cra dya dhya",""), CharsToUnicodeString("\\u0915\\u094D\\u0930 \\u0915\\u094D\\u0936 \\u0916\\u094D\\u0930 \\u0917\\u094D\\u0930 \\u091a\\u094D\\u0930 \\u0926\\u094D\\u092F \\u0927\\u094D\\u092F") ,
|
||||
|
||||
"Devanagari-Latin", CharsToUnicodeString("\\u092D\\u093E\\u0930\\u0924"), UnicodeString("bh\\u0101rata"),
|
||||
"Devanagari-Latin", CharsToUnicodeString("\\u092D\\u093E\\u0930\\u0924"), CharsToUnicodeString("bh\\u0101rata"),
|
||||
// "Contracted-Expanded", CharsToUnicodeString("\\u00C0\\u00C1\\u0042"), CharsToUnicodeString("\\u0041\\u0300\\u0041\\u0301\\u0042") ,
|
||||
// "Expanded-Contracted", CharsToUnicodeString("\\u0041\\u0300\\u0041\\u0301\\u0042"), CharsToUnicodeString("\\u00C0\\u00C1\\u0042") ,
|
||||
//"Latin-Arabic", "aap", CharsToUnicodeString("\\u0627\\u06A4") ,
|
||||
@ -325,7 +325,7 @@ void TransliteratorAPITest::TestTransliterate2(){
|
||||
"Hex-Any", CharsToUnicodeString("\\u0068\\u0065\\u006C\\u006C\\u006F\\u0021\\u0020"), "0", "5", "hello", "hello! " ,
|
||||
// "Contracted-Expanded", CharsToUnicodeString("\\u00C0\\u00C1\\u0042"), "1", "2", CharsToUnicodeString("\\u0041\\u0301"), CharsToUnicodeString("\\u00C0\\u0041\\u0301\\u0042") ,
|
||||
"Devanagari-Latin", CharsToUnicodeString("\\u092D\\u093E\\u0930\\u0924"), "0", "1", "bha", CharsToUnicodeString("bha\\u093E\\u0930\\u0924") ,
|
||||
"Devanagari-Latin", CharsToUnicodeString("\\u092D\\u093E\\u0930\\u0924"), "1", "2", "\\u0314\\u0101", CharsToUnicodeString("\\u092D\\u0314\\u0101\\u0930\\u0924")
|
||||
"Devanagari-Latin", CharsToUnicodeString("\\u092D\\u093E\\u0930\\u0924"), "1", "2", CharsToUnicodeString("\\u0314\\u0101"), CharsToUnicodeString("\\u092D\\u0314\\u0101\\u0930\\u0924")
|
||||
|
||||
};
|
||||
logln("\n Testing transliterate(String, int, int, StringBuffer)");
|
||||
|
@ -522,7 +522,7 @@ void RTTest::test(const UnicodeString& sourceRangeVal,
|
||||
|
||||
this->roundtripExclusionsSet.clear();
|
||||
if (roundtripExclusions != NULL && strlen(roundtripExclusions) > 0) {
|
||||
this->roundtripExclusionsSet.applyPattern(roundtripExclusions, status);
|
||||
this->roundtripExclusionsSet.applyPattern(UNICODE_STRING_SIMPLE(roundtripExclusions), status);
|
||||
if (U_FAILURE(status)) {
|
||||
parent->errln("FAIL: UnicodeSet::applyPattern(%s)", roundtripExclusions);
|
||||
return;
|
||||
@ -991,7 +991,7 @@ void TransliteratorRoundTripTest::TestHiragana() {
|
||||
RTTest test("Latin-Hiragana");
|
||||
Legal *legal = new Legal();
|
||||
test.test(UnicodeString("[a-zA-Z]", ""),
|
||||
HIRAGANA,
|
||||
UNICODE_STRING_SIMPLE(HIRAGANA),
|
||||
HIRAGANA_ITERATION, this, quick, legal);
|
||||
delete legal;
|
||||
}
|
||||
@ -1005,7 +1005,7 @@ void TransliteratorRoundTripTest::TestKatakana() {
|
||||
strcat(temp, HALFWIDTH_KATAKANA);
|
||||
strcat(temp, "]");
|
||||
test.test(UnicodeString("[a-zA-Z]", ""),
|
||||
KATAKANA,
|
||||
UNICODE_STRING_SIMPLE(KATAKANA),
|
||||
temp,
|
||||
this, quick, legal);
|
||||
delete legal;
|
||||
@ -1105,7 +1105,7 @@ void TransliteratorRoundTripTest::TestHan() {
|
||||
|
||||
UnicodeString nfded = target2;
|
||||
nfd->transliterate(nfded);
|
||||
UnicodeSet allMarks("[\\u0304\\u0301\\u030C\\u0300\\u0306]", status); // look only for Pinyin tone marks, not all marks (there are some others in there)
|
||||
UnicodeSet allMarks(UNICODE_STRING_SIMPLE("[\\u0304\\u0301\\u030C\\u0300\\u0306]"), status); // look only for Pinyin tone marks, not all marks (there are some others in there)
|
||||
ASSERT_SUCCESS(status);
|
||||
assertFalse("NumericPinyin must contain no marks", allMarks.containsSome(nfded));
|
||||
|
||||
@ -1231,10 +1231,10 @@ void TransliteratorRoundTripTest::Testel() {
|
||||
|
||||
|
||||
void TransliteratorRoundTripTest::TestArabic() {
|
||||
UnicodeString ARABIC("[\\u060C\\u061B\\u061F\\u0621\\u0627-\\u063A\\u0641-\\u0655\\u0660-\\u066C\\u067E\\u0686\\u0698\\u06A4\\u06AD\\u06AF\\u06CB-\\u06CC\\u06F0-\\u06F9]");
|
||||
UnicodeString ARABIC("[\\u060C\\u061B\\u061F\\u0621\\u0627-\\u063A\\u0641-\\u0655\\u0660-\\u066C\\u067E\\u0686\\u0698\\u06A4\\u06AD\\u06AF\\u06CB-\\u06CC\\u06F0-\\u06F9]", -1, US_INV);
|
||||
Legal *legal = new Legal();
|
||||
RTTest test("Latin-Arabic");
|
||||
test.test("[a-zA-Z\\u02BE\\u02BF\\u207F]", ARABIC, "[a-zA-Z\\u02BE\\u02BF\\u207F]",this, quick, legal); //
|
||||
test.test(UNICODE_STRING_SIMPLE("[a-zA-Z\\u02BE\\u02BF\\u207F]"), ARABIC, "[a-zA-Z\\u02BE\\u02BF\\u207F]",this, quick, legal); //
|
||||
delete legal;
|
||||
}
|
||||
class LegalHebrew : public Legal {
|
||||
@ -1249,8 +1249,8 @@ public:
|
||||
};
|
||||
|
||||
LegalHebrew::LegalHebrew(UErrorCode& error){
|
||||
FINAL.applyPattern("[\\u05DA\\u05DD\\u05DF\\u05E3\\u05E5]", error);
|
||||
NON_FINAL.applyPattern("[\\u05DB\\u05DE\\u05E0\\u05E4\\u05E6]", error);
|
||||
FINAL.applyPattern(UNICODE_STRING_SIMPLE("[\\u05DA\\u05DD\\u05DF\\u05E3\\u05E5]"), error);
|
||||
NON_FINAL.applyPattern(UNICODE_STRING_SIMPLE("[\\u05DB\\u05DE\\u05E0\\u05E4\\u05E6]"), error);
|
||||
LETTER.applyPattern("[:letter:]", error);
|
||||
}
|
||||
UBool LegalHebrew::is(const UnicodeString& sourceString)const{
|
||||
@ -1285,7 +1285,7 @@ void TransliteratorRoundTripTest::TestHebrew() {
|
||||
return;
|
||||
}
|
||||
RTTest test("Latin-Hebrew");
|
||||
test.test("[a-zA-Z\\u02BC\\u02BB]", "[[[:hebrew:]-[\\u05BD\\uFB00-\\uFBFF]]&[:Age=4.0:]]", "[\\u05F0\\u05F1\\u05F2]", this, quick, legal);
|
||||
test.test(UNICODE_STRING_SIMPLE("[a-zA-Z\\u02BC\\u02BB]"), UNICODE_STRING_SIMPLE("[[[:hebrew:]-[\\u05BD\\uFB00-\\uFBFF]]&[:Age=4.0:]]"), "[\\u05F0\\u05F1\\u05F2]", this, quick, legal);
|
||||
|
||||
//showElapsed(start, "TestHebrew");
|
||||
delete legal;
|
||||
|
@ -1269,7 +1269,7 @@ void TransliteratorTest::TestNameMap(void) {
|
||||
// Careful: CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
|
||||
expect(*uni2name, CharsToUnicodeString("\\u00A0abc\\u4E01\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF"),
|
||||
CharsToUnicodeString("\\\\N{NO-BREAK SPACE}abc\\\\N{CJK UNIFIED IDEOGRAPH-4E01}\\\\N{MICRO SIGN}\\\\N{GUJARATI SIGN CANDRABINDU}\\\\N{REPLACEMENT CHARACTER}\\\\N{END OF TRANSMISSION}\\\\N{CHARACTER TABULATION}\\\\N{<control-0081>}\\\\N{<noncharacter-FFFF>}"));
|
||||
expect(*name2uni, "{\\N { NO-BREAK SPACE}abc\\N{ CJK UNIFIED IDEOGRAPH-4E01 }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{END OF TRANSMISSION}\\N{CHARACTER TABULATION}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{",
|
||||
expect(*name2uni, UNICODE_STRING_SIMPLE("{\\N { NO-BREAK SPACE}abc\\N{ CJK UNIFIED IDEOGRAPH-4E01 }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{END OF TRANSMISSION}\\N{CHARACTER TABULATION}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{"),
|
||||
CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{"));
|
||||
|
||||
delete uni2name;
|
||||
@ -1516,7 +1516,7 @@ void TransliteratorTest::TestCompoundRBT(void) {
|
||||
errln("FAIL: createFromRules failed");
|
||||
return;
|
||||
}
|
||||
expect(*t, "\\u0043at in the hat, bat on the mat",
|
||||
expect(*t, UNICODE_STRING_SIMPLE("\\u0043at in the hat, bat on the mat"),
|
||||
"C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t");
|
||||
UnicodeString r;
|
||||
t->toRules(r, TRUE);
|
||||
@ -1728,7 +1728,7 @@ void TransliteratorTest::TestToRules(void) {
|
||||
UParseError parseError;
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
Transliterator *t = Transliterator::createFromRules("ID",
|
||||
DATA[d+1], UTRANS_FORWARD, parseError, status);
|
||||
UNICODE_STRING_SIMPLE(DATA[d+1]), UTRANS_FORWARD, parseError, status);
|
||||
if (t == 0) {
|
||||
errln("FAIL: createFromRules failed");
|
||||
return;
|
||||
@ -1737,19 +1737,19 @@ void TransliteratorTest::TestToRules(void) {
|
||||
t->toRules(rules, FALSE);
|
||||
t->toRules(escapedRules, TRUE);
|
||||
UnicodeString expRules = CharsToUnicodeString(DATA[d+2]);
|
||||
UnicodeString expEscapedRules(DATA[d+2]);
|
||||
UnicodeString expEscapedRules(DATA[d+2], -1, US_INV);
|
||||
if (rules == expRules) {
|
||||
logln((UnicodeString)"Ok: " + DATA[d+1] +
|
||||
logln((UnicodeString)"Ok: " + UNICODE_STRING_SIMPLE(DATA[d+1]) +
|
||||
" => " + rules);
|
||||
} else {
|
||||
errln((UnicodeString)"FAIL: " + DATA[d+1] +
|
||||
errln((UnicodeString)"FAIL: " + UNICODE_STRING_SIMPLE(DATA[d+1]) +
|
||||
" => " + rules + ", exp " + expRules);
|
||||
}
|
||||
if (escapedRules == expEscapedRules) {
|
||||
logln((UnicodeString)"Ok: " + DATA[d+1] +
|
||||
logln((UnicodeString)"Ok: " + UNICODE_STRING_SIMPLE(DATA[d+1]) +
|
||||
" => " + escapedRules);
|
||||
} else {
|
||||
errln((UnicodeString)"FAIL: " + DATA[d+1] +
|
||||
errln((UnicodeString)"FAIL: " + UNICODE_STRING_SIMPLE(DATA[d+1]) +
|
||||
" => " + escapedRules + ", exp " + expEscapedRules);
|
||||
}
|
||||
delete t;
|
||||
@ -1757,8 +1757,8 @@ void TransliteratorTest::TestToRules(void) {
|
||||
} else {
|
||||
// UnicodeSet test
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UnicodeString pat(DATA[d+1]);
|
||||
UnicodeString expToPat(DATA[d+2]);
|
||||
UnicodeString pat(DATA[d+1], -1, US_INV);
|
||||
UnicodeString expToPat(DATA[d+2], -1, US_INV);
|
||||
UnicodeSet set(pat, status);
|
||||
if (U_FAILURE(status)) {
|
||||
errln("FAIL: UnicodeSet ct failed");
|
||||
@ -1820,23 +1820,23 @@ void TransliteratorTest::TestSupplemental() {
|
||||
|
||||
expectT("Any-Name",
|
||||
CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"),
|
||||
"\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}");
|
||||
UNICODE_STRING_SIMPLE("\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}"));
|
||||
|
||||
expectT("Any-Hex/Unicode",
|
||||
CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
|
||||
"U+10330U+10FF00U+E0061U+00A0");
|
||||
UNICODE_STRING_SIMPLE("U+10330U+10FF00U+E0061U+00A0"));
|
||||
|
||||
expectT("Any-Hex/C",
|
||||
CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
|
||||
"\\U00010330\\U0010FF00\\U000E0061\\u00A0");
|
||||
UNICODE_STRING_SIMPLE("\\U00010330\\U0010FF00\\U000E0061\\u00A0"));
|
||||
|
||||
expectT("Any-Hex/Perl",
|
||||
CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
|
||||
"\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}");
|
||||
UNICODE_STRING_SIMPLE("\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}"));
|
||||
|
||||
expectT("Any-Hex/Java",
|
||||
CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
|
||||
"\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0");
|
||||
UNICODE_STRING_SIMPLE("\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0"));
|
||||
|
||||
expectT("Any-Hex/XML",
|
||||
CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
|
||||
@ -1846,7 +1846,7 @@ void TransliteratorTest::TestSupplemental() {
|
||||
CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
|
||||
"𐌰􏼀󠁡 ");
|
||||
|
||||
expectT("[\\U000E0000-\\U000E0FFF] Remove",
|
||||
expectT(UNICODE_STRING_SIMPLE("[\\U000E0000-\\U000E0FFF] Remove"),
|
||||
CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
|
||||
CharsToUnicodeString("\\U00010330\\U0010FF00\\u00A0"));
|
||||
}
|
||||
@ -2363,7 +2363,7 @@ void TransliteratorTest::TestCompoundFilterID(void) {
|
||||
* Test new property set syntax
|
||||
*/
|
||||
void TransliteratorTest::TestPropertySet() {
|
||||
expect("a>A; \\p{Lu}>x; \\p{ANY}>y;", "abcDEF", "Ayyxxx");
|
||||
expect(UNICODE_STRING_SIMPLE("a>A; \\p{Lu}>x; \\p{ANY}>y;"), "abcDEF", "Ayyxxx");
|
||||
expect("(.+)>'[' $1 ']';", " a stitch \n in time \r saves 9",
|
||||
"[ a stitch ]\n[ in time ]\r[ saves 9]");
|
||||
}
|
||||
@ -2838,8 +2838,8 @@ void TransliteratorTest::TestGurmukhiDevanagari(){
|
||||
// (\u0902) (when preceded by vowel) ---> (\u0A02)
|
||||
// (\u0902) (when preceded by consonant) ---> (\u0A70)
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UnicodeSet vowel(UnicodeString("[\\u0905-\\u090A \\u090F\\u0910\\u0913\\u0914 \\u093e-\\u0942\\u0947\\u0948\\u094B\\u094C\\u094D]").unescape(), status);
|
||||
UnicodeSet non_vowel(UnicodeString("[\\u0915-\\u0928\\u092A-\\u0930]").unescape(), status);
|
||||
UnicodeSet vowel(UnicodeString("[\\u0905-\\u090A \\u090F\\u0910\\u0913\\u0914 \\u093e-\\u0942\\u0947\\u0948\\u094B\\u094C\\u094D]", -1, US_INV).unescape(), status);
|
||||
UnicodeSet non_vowel(UnicodeString("[\\u0915-\\u0928\\u092A-\\u0930]", -1, US_INV).unescape(), status);
|
||||
UParseError parseError;
|
||||
|
||||
UnicodeSetIterator vIter(vowel);
|
||||
@ -2850,8 +2850,8 @@ void TransliteratorTest::TestGurmukhiDevanagari(){
|
||||
delete trans;
|
||||
return;
|
||||
}
|
||||
UnicodeString src (" \\u0902");
|
||||
UnicodeString expected(" \\u0A02");
|
||||
UnicodeString src (" \\u0902", -1, US_INV);
|
||||
UnicodeString expected(" \\u0A02", -1, US_INV);
|
||||
src = src.unescape();
|
||||
expected= expected.unescape();
|
||||
|
||||
@ -3165,8 +3165,8 @@ void TransliteratorTest::TestToRulesMark() {
|
||||
|
||||
UParseError pe;
|
||||
UErrorCode ec = U_ZERO_ERROR;
|
||||
Transliterator *t2 = Transliterator::createFromRules("source-target", testRules, UTRANS_FORWARD, pe, ec);
|
||||
Transliterator *t3 = Transliterator::createFromRules("target-source", testRules, UTRANS_REVERSE, pe, ec);
|
||||
Transliterator *t2 = Transliterator::createFromRules("source-target", UNICODE_STRING_SIMPLE(testRules), UTRANS_FORWARD, pe, ec);
|
||||
Transliterator *t3 = Transliterator::createFromRules("target-source", UNICODE_STRING_SIMPLE(testRules), UTRANS_REVERSE, pe, ec);
|
||||
|
||||
if (U_FAILURE(ec)) {
|
||||
delete t2;
|
||||
@ -3178,8 +3178,8 @@ void TransliteratorTest::TestToRulesMark() {
|
||||
expect(*t2, source, target);
|
||||
expect(*t3, target, source);
|
||||
|
||||
checkRules("Failed toRules FORWARD", *t2, testRulesForward);
|
||||
checkRules("Failed toRules BACKWARD", *t3, testRulesBackward);
|
||||
checkRules("Failed toRules FORWARD", *t2, UNICODE_STRING_SIMPLE(testRulesForward));
|
||||
checkRules("Failed toRules BACKWARD", *t3, UNICODE_STRING_SIMPLE(testRulesBackward));
|
||||
|
||||
delete t2;
|
||||
delete t3;
|
||||
@ -3199,7 +3199,7 @@ void TransliteratorTest::TestEscape() {
|
||||
errln((UnicodeString)"FAIL: createInstance");
|
||||
} else {
|
||||
expect(*t,
|
||||
"\\x{40}\\U000000312Q",
|
||||
UNICODE_STRING_SIMPLE("\\x{40}\\U000000312Q"),
|
||||
"@12Q");
|
||||
}
|
||||
delete t;
|
||||
@ -3211,7 +3211,7 @@ void TransliteratorTest::TestEscape() {
|
||||
} else {
|
||||
expect(*t,
|
||||
CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
|
||||
"\\u0041\\U0010BEEF\\uFEED");
|
||||
UNICODE_STRING_SIMPLE("\\u0041\\U0010BEEF\\uFEED"));
|
||||
}
|
||||
delete t;
|
||||
|
||||
@ -3222,7 +3222,7 @@ void TransliteratorTest::TestEscape() {
|
||||
} else {
|
||||
expect(*t,
|
||||
CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
|
||||
"\\u0041\\uDBEF\\uDEEF\\uFEED");
|
||||
UNICODE_STRING_SIMPLE("\\u0041\\uDBEF\\uDEEF\\uFEED"));
|
||||
}
|
||||
delete t;
|
||||
|
||||
@ -3233,7 +3233,7 @@ void TransliteratorTest::TestEscape() {
|
||||
} else {
|
||||
expect(*t,
|
||||
CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
|
||||
"\\x{41}\\x{10BEEF}\\x{FEED}");
|
||||
UNICODE_STRING_SIMPLE("\\x{41}\\x{10BEEF}\\x{FEED}"));
|
||||
}
|
||||
delete t;
|
||||
}
|
||||
@ -3638,7 +3638,7 @@ void TransliteratorTest::TestFunction() {
|
||||
}
|
||||
|
||||
expect(*t, "The Quick Brown Fox",
|
||||
"T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox");
|
||||
UNICODE_STRING_SIMPLE("T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox"));
|
||||
|
||||
delete t;
|
||||
}
|
||||
@ -3756,7 +3756,7 @@ void TransliteratorTest::TestUserFunction() {
|
||||
|
||||
// There's no need to register inverses if we don't use them
|
||||
t = Transliterator::createFromRules("gif",
|
||||
"'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';",
|
||||
UNICODE_STRING_SIMPLE("'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';"),
|
||||
UTRANS_FORWARD, pe, ec);
|
||||
if (t == NULL || U_FAILURE(ec)) {
|
||||
errln((UnicodeString)"FAIL: createFromRules gif " + u_errorName(ec));
|
||||
@ -3765,13 +3765,13 @@ void TransliteratorTest::TestUserFunction() {
|
||||
_TUFReg("Any-gif", t, 0);
|
||||
|
||||
t = Transliterator::createFromRules("RemoveCurly",
|
||||
"[\\{\\}] > ; '\\N' > ;",
|
||||
UNICODE_STRING_SIMPLE("[\\{\\}] > ; '\\N' > ;"),
|
||||
UTRANS_FORWARD, pe, ec);
|
||||
if (t == NULL || U_FAILURE(ec)) {
|
||||
errln((UnicodeString)"FAIL: createFromRules RemoveCurly " + u_errorName(ec));
|
||||
goto FAIL;
|
||||
}
|
||||
expect(*t, "\\N{name}", "name");
|
||||
expect(*t, UNICODE_STRING_SIMPLE("\\N{name}"), "name");
|
||||
_TUFReg("Any-RemoveCurly", t, 1);
|
||||
|
||||
logln("Trying &hex");
|
||||
@ -3789,7 +3789,7 @@ void TransliteratorTest::TestUserFunction() {
|
||||
errln((UnicodeString)"FAIL: createInstance Any-hex2 " + u_errorName(ec));
|
||||
goto FAIL;
|
||||
}
|
||||
expect(*t, "abc", "\\u0061\\u0062\\u0063");
|
||||
expect(*t, "abc", UNICODE_STRING_SIMPLE("\\u0061\\u0062\\u0063"));
|
||||
delete t;
|
||||
|
||||
logln("Trying &gif");
|
||||
@ -3820,7 +3820,7 @@ void TransliteratorTest::TestUserFunction() {
|
||||
goto FAIL;
|
||||
}
|
||||
expect(*t, "abc",
|
||||
"\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C ");
|
||||
UNICODE_STRING_SIMPLE("\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C "));
|
||||
delete t;
|
||||
|
||||
FAIL:
|
||||
@ -4039,7 +4039,7 @@ void TransliteratorTest::TestAlternateSyntax() {
|
||||
"xbz");
|
||||
expect(CharsToUnicodeString("([:^ASCII:]) \\u2192 \\u2206Name($1);"),
|
||||
CharsToUnicodeString("<=\\u2190; >=\\u2192; <>=\\u2194; &=\\u2206"),
|
||||
"<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}");
|
||||
UNICODE_STRING_SIMPLE("<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}"));
|
||||
}
|
||||
|
||||
static const char* BEGIN_END_RULES[] = {
|
||||
@ -4276,9 +4276,9 @@ void TransliteratorTest::TestBeginEnd() {
|
||||
int32_t i = 0;
|
||||
for (i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
|
||||
expect((UnicodeString)"Test case #" + (i / 3),
|
||||
UnicodeString(BEGIN_END_TEST_CASES[i]),
|
||||
UnicodeString(BEGIN_END_TEST_CASES[i + 1]),
|
||||
UnicodeString(BEGIN_END_TEST_CASES[i + 2]));
|
||||
UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV),
|
||||
UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV),
|
||||
UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV));
|
||||
}
|
||||
|
||||
// instantiate the one reversible rule set in the reverse direction and make sure it does the right thing
|
||||
@ -4319,7 +4319,7 @@ void TransliteratorTest::TestBeginEndToRules() {
|
||||
for (int32_t i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
|
||||
UParseError parseError;
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
Transliterator* t = Transliterator::createFromRules("--", UnicodeString(BEGIN_END_TEST_CASES[i]),
|
||||
Transliterator* t = Transliterator::createFromRules("--", UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV),
|
||||
UTRANS_FORWARD, parseError, status);
|
||||
if (U_FAILURE(status)) {
|
||||
reportParseError(UnicodeString("FAIL: Couldn't create transliterator"), parseError, status);
|
||||
@ -4334,8 +4334,8 @@ void TransliteratorTest::TestBeginEndToRules() {
|
||||
delete t;
|
||||
} else {
|
||||
expect(*t2,
|
||||
UnicodeString(BEGIN_END_TEST_CASES[i + 1]),
|
||||
UnicodeString(BEGIN_END_TEST_CASES[i + 2]));
|
||||
UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV),
|
||||
UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV));
|
||||
delete t;
|
||||
delete t2;
|
||||
}
|
||||
|
@ -170,19 +170,19 @@ void UnicodeSetTest::TestToPattern() {
|
||||
const char* exp2[] = {"aa", "ab", "ac", NOT, "xy", NULL};
|
||||
expectToPattern(*s, "[a-z{aa}{ab}{ac}]", exp2);
|
||||
|
||||
s->applyPattern("[a-z {\\{l} {r\\}}]", ec);
|
||||
s->applyPattern(UNICODE_STRING_SIMPLE("[a-z {\\{l} {r\\}}]"), ec);
|
||||
if (U_FAILURE(ec)) break;
|
||||
const char* exp3[] = {"{l", "r}", NOT, "xy", NULL};
|
||||
expectToPattern(*s, "[a-z{r\\}}{\\{l}]", exp3);
|
||||
expectToPattern(*s, UNICODE_STRING_SIMPLE("[a-z{r\\}}{\\{l}]"), exp3);
|
||||
|
||||
s->add("[]");
|
||||
const char* exp4[] = {"{l", "r}", "[]", NOT, "xy", NULL};
|
||||
expectToPattern(*s, "[a-z{\\[\\]}{r\\}}{\\{l}]", exp4);
|
||||
expectToPattern(*s, UNICODE_STRING_SIMPLE("[a-z{\\[\\]}{r\\}}{\\{l}]"), exp4);
|
||||
|
||||
s->applyPattern("[a-z {\\u4E01\\u4E02}{\\n\\r}]", ec);
|
||||
s->applyPattern(UNICODE_STRING_SIMPLE("[a-z {\\u4E01\\u4E02}{\\n\\r}]"), ec);
|
||||
if (U_FAILURE(ec)) break;
|
||||
const char* exp5[] = {"\\u4E01\\u4E02", "\n\r", NULL};
|
||||
expectToPattern(*s, "[a-z{\\u000A\\u000D}{\\u4E01\\u4E02}]", exp5);
|
||||
expectToPattern(*s, UNICODE_STRING_SIMPLE("[a-z{\\u000A\\u000D}{\\u4E01\\u4E02}]"), exp5);
|
||||
|
||||
// j2189
|
||||
s->clear();
|
||||
@ -318,14 +318,14 @@ UnicodeSetTest::TestCloneEqualHash(void) {
|
||||
// set1 and set2 used to be built with the obsolete constructor taking
|
||||
// UCharCategory values; replaced with pattern constructors
|
||||
// markus 20030502
|
||||
UnicodeSet *set1=new UnicodeSet("\\p{Lowercase Letter}", status); // :Ll: Letter, lowercase
|
||||
UnicodeSet *set1a=new UnicodeSet("[:Ll:]", status); // Letter, lowercase
|
||||
UnicodeSet *set1=new UnicodeSet(UNICODE_STRING_SIMPLE("\\p{Lowercase Letter}"), status); // :Ll: Letter, lowercase
|
||||
UnicodeSet *set1a=new UnicodeSet(UNICODE_STRING_SIMPLE("[:Ll:]"), status); // Letter, lowercase
|
||||
if (U_FAILURE(status)){
|
||||
errln((UnicodeString)"FAIL: Can't construst set with category->Ll");
|
||||
return;
|
||||
}
|
||||
UnicodeSet *set2=new UnicodeSet("\\p{Decimal Number}", status); //Number, Decimal digit
|
||||
UnicodeSet *set2a=new UnicodeSet("[:Nd:]", status); //Number, Decimal digit
|
||||
UnicodeSet *set2=new UnicodeSet(UNICODE_STRING_SIMPLE("\\p{Decimal Number}"), status); //Number, Decimal digit
|
||||
UnicodeSet *set2a=new UnicodeSet(UNICODE_STRING_SIMPLE("[:Nd:]"), status); //Number, Decimal digit
|
||||
if (U_FAILURE(status)){
|
||||
errln((UnicodeString)"FAIL: Can't construct set with category->Nd");
|
||||
return;
|
||||
@ -705,7 +705,7 @@ void UnicodeSetTest::TestIteration() {
|
||||
|
||||
// 6 code points, 3 ranges, 2 strings, 8 total elements
|
||||
// Iteration will access them in sorted order - a, b, c, y, z, U0001abcd, "str1", "str2"
|
||||
UnicodeSet set("[zabyc\\U0001abcd{str1}{str2}]", ec);
|
||||
UnicodeSet set(UNICODE_STRING_SIMPLE("[zabyc\\U0001abcd{str1}{str2}]"), ec);
|
||||
TEST_ASSERT_SUCCESS(ec);
|
||||
UnicodeSetIterator it(set);
|
||||
|
||||
@ -822,12 +822,12 @@ void UnicodeSetTest::TestStrings() {
|
||||
* Test the [:Latin:] syntax.
|
||||
*/
|
||||
void UnicodeSetTest::TestScriptSet() {
|
||||
expectContainment("[:Latin:]", "aA", CharsToUnicodeString("\\u0391\\u03B1"));
|
||||
expectContainment(UNICODE_STRING_SIMPLE("[:Latin:]"), "aA", CharsToUnicodeString("\\u0391\\u03B1"));
|
||||
|
||||
expectContainment("[:Greek:]", CharsToUnicodeString("\\u0391\\u03B1"), "aA");
|
||||
expectContainment(UNICODE_STRING_SIMPLE("[:Greek:]"), CharsToUnicodeString("\\u0391\\u03B1"), "aA");
|
||||
|
||||
/* Jitterbug 1423 */
|
||||
expectContainment("[[:Common:][:Inherited:]]", CharsToUnicodeString("\\U00003099\\U0001D169\\u0000"), "aA");
|
||||
expectContainment(UNICODE_STRING_SIMPLE("[[:Common:][:Inherited:]]"), CharsToUnicodeString("\\U00003099\\U0001D169\\u0000"), "aA");
|
||||
|
||||
}
|
||||
|
||||
@ -1002,7 +1002,7 @@ void UnicodeSetTest::TestPropertySet() {
|
||||
static const int32_t DATA_LEN = sizeof(DATA)/sizeof(DATA[0]);
|
||||
|
||||
for (int32_t i=0; i<DATA_LEN; i+=3) {
|
||||
expectContainment(DATA[i], CharsToUnicodeString(DATA[i+1]),
|
||||
expectContainment(UNICODE_STRING_SIMPLE(DATA[i]), CharsToUnicodeString(DATA[i+1]),
|
||||
CharsToUnicodeString(DATA[i+2]));
|
||||
}
|
||||
}
|
||||
@ -1015,56 +1015,56 @@ void UnicodeSetTest::TestPosixClasses() {
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UnicodeSet s1("[:alpha:]", status);
|
||||
UnicodeSet s2("\\p{Alphabetic}", status);
|
||||
UnicodeSet s2(UNICODE_STRING_SIMPLE("\\p{Alphabetic}"), status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
TEST_ASSERT(s1==s2);
|
||||
}
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UnicodeSet s1("[:lower:]", status);
|
||||
UnicodeSet s2("\\p{lowercase}", status);
|
||||
UnicodeSet s2(UNICODE_STRING_SIMPLE("\\p{lowercase}"), status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
TEST_ASSERT(s1==s2);
|
||||
}
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UnicodeSet s1("[:upper:]", status);
|
||||
UnicodeSet s2("\\p{Uppercase}", status);
|
||||
UnicodeSet s2(UNICODE_STRING_SIMPLE("\\p{Uppercase}"), status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
TEST_ASSERT(s1==s2);
|
||||
}
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UnicodeSet s1("[:punct:]", status);
|
||||
UnicodeSet s2("\\p{gc=Punctuation}", status);
|
||||
UnicodeSet s2(UNICODE_STRING_SIMPLE("\\p{gc=Punctuation}"), status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
TEST_ASSERT(s1==s2);
|
||||
}
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UnicodeSet s1("[:digit:]", status);
|
||||
UnicodeSet s2("\\p{gc=DecimalNumber}", status);
|
||||
UnicodeSet s2(UNICODE_STRING_SIMPLE("\\p{gc=DecimalNumber}"), status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
TEST_ASSERT(s1==s2);
|
||||
}
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UnicodeSet s1("[:xdigit:]", status);
|
||||
UnicodeSet s2("[\\p{DecimalNumber}\\p{HexDigit}]", status);
|
||||
UnicodeSet s2(UNICODE_STRING_SIMPLE("[\\p{DecimalNumber}\\p{HexDigit}]"), status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
TEST_ASSERT(s1==s2);
|
||||
}
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UnicodeSet s1("[:alnum:]", status);
|
||||
UnicodeSet s2("[\\p{Alphabetic}\\p{DecimalNumber}]", status);
|
||||
UnicodeSet s2(UNICODE_STRING_SIMPLE("[\\p{Alphabetic}\\p{DecimalNumber}]"), status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
TEST_ASSERT(s1==s2);
|
||||
}
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UnicodeSet s1("[:space:]", status);
|
||||
UnicodeSet s2("\\p{Whitespace}", status);
|
||||
UnicodeSet s2(UNICODE_STRING_SIMPLE("\\p{Whitespace}"), status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
TEST_ASSERT(s1==s2);
|
||||
}
|
||||
@ -1072,7 +1072,7 @@ void UnicodeSetTest::TestPosixClasses() {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UnicodeSet s1("[:blank:]", status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
UnicodeSet s2("[\\p{Whitespace}-[\\u000a\\u000B\\u000c\\u000d\\u0085\\p{LineSeparator}\\p{ParagraphSeparator}]]",
|
||||
UnicodeSet s2(UNICODE_STRING_SIMPLE("[\\p{Whitespace}-[\\u000a\\u000B\\u000c\\u000d\\u0085\\p{LineSeparator}\\p{ParagraphSeparator}]]"),
|
||||
status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
TEST_ASSERT(s1==s2);
|
||||
@ -1081,7 +1081,7 @@ void UnicodeSetTest::TestPosixClasses() {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UnicodeSet s1("[:cntrl:]", status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
UnicodeSet s2("\\p{Control}", status);
|
||||
UnicodeSet s2(UNICODE_STRING_SIMPLE("\\p{Control}"), status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
TEST_ASSERT(s1==s2);
|
||||
}
|
||||
@ -1089,7 +1089,7 @@ void UnicodeSetTest::TestPosixClasses() {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UnicodeSet s1("[:graph:]", status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
UnicodeSet s2("[^\\p{Whitespace}\\p{Control}\\p{Surrogate}\\p{Unassigned}]", status);
|
||||
UnicodeSet s2(UNICODE_STRING_SIMPLE("[^\\p{Whitespace}\\p{Control}\\p{Surrogate}\\p{Unassigned}]"), status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
TEST_ASSERT(s1==s2);
|
||||
}
|
||||
@ -1097,7 +1097,7 @@ void UnicodeSetTest::TestPosixClasses() {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UnicodeSet s1("[:print:]", status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
UnicodeSet s2("[[:graph:][:blank:]-[\\p{Control}]]" ,status);
|
||||
UnicodeSet s2(UNICODE_STRING_SIMPLE("[[:graph:][:blank:]-[\\p{Control}]]") ,status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
TEST_ASSERT(s1==s2);
|
||||
}
|
||||
@ -1222,8 +1222,8 @@ void UnicodeSetTest::TestCloseOver() {
|
||||
UnicodeString buf;
|
||||
for (int32_t i=0; DATA[i]!=NULL; i+=3) {
|
||||
int32_t selector = DATA[i][0];
|
||||
UnicodeString pat(DATA[i+1]);
|
||||
UnicodeString exp(DATA[i+2]);
|
||||
UnicodeString pat(DATA[i+1], -1, US_INV);
|
||||
UnicodeString exp(DATA[i+2], -1, US_INV);
|
||||
s.applyPattern(pat, ec);
|
||||
s.closeOver(selector);
|
||||
t.applyPattern(exp, ec);
|
||||
@ -1331,7 +1331,7 @@ void UnicodeSetTest::TestEscapePattern() {
|
||||
// this fails -- which is what we expect.
|
||||
for (int32_t pass=1; pass<=2; ++pass) {
|
||||
UErrorCode ec = U_ZERO_ERROR;
|
||||
UnicodeString pat(pattern);
|
||||
UnicodeString pat(pattern, -1, US_INV);
|
||||
if (pass==2) {
|
||||
pat = pat.unescape();
|
||||
}
|
||||
@ -1354,7 +1354,7 @@ void UnicodeSetTest::TestEscapePattern() {
|
||||
|
||||
UnicodeString newpat;
|
||||
set.toPattern(newpat, TRUE);
|
||||
if (newpat == exp) {
|
||||
if (newpat == UNICODE_STRING_SIMPLE(exp)) {
|
||||
logln(escape(pat) + " => " + newpat);
|
||||
} else {
|
||||
errln((UnicodeString)"FAIL: " + escape(pat) + " => " + newpat);
|
||||
@ -1587,7 +1587,7 @@ void UnicodeSetTest::TestSymbolTable() {
|
||||
|
||||
// Set up variables
|
||||
while (DATA[i+2] != NULL) {
|
||||
sym.add(DATA[i], DATA[i+1], ec);
|
||||
sym.add(UNICODE_STRING_SIMPLE(DATA[i]), UNICODE_STRING_SIMPLE(DATA[i+1]), ec);
|
||||
if (U_FAILURE(ec)) {
|
||||
errln("FAIL: couldn't add to TokenSymbolTable");
|
||||
continue;
|
||||
@ -1596,7 +1596,7 @@ void UnicodeSetTest::TestSymbolTable() {
|
||||
}
|
||||
|
||||
// Input pattern and expected output pattern
|
||||
UnicodeString inpat = DATA[i], exppat = DATA[i+1];
|
||||
UnicodeString inpat = UNICODE_STRING_SIMPLE(DATA[i]), exppat = UNICODE_STRING_SIMPLE(DATA[i+1]);
|
||||
i += 2;
|
||||
|
||||
ParsePosition pos(0);
|
||||
@ -1640,8 +1640,8 @@ void UnicodeSetTest::TestSurrogate() {
|
||||
};
|
||||
for (int i=0; DATA[i] != 0; ++i) {
|
||||
UErrorCode ec = U_ZERO_ERROR;
|
||||
logln((UnicodeString)"Test pattern " + i + " :" + DATA[i]);
|
||||
UnicodeSet set(DATA[i], ec);
|
||||
logln((UnicodeString)"Test pattern " + i + " :" + UNICODE_STRING_SIMPLE(DATA[i]));
|
||||
UnicodeSet set(UNICODE_STRING_SIMPLE(DATA[i]), ec);
|
||||
if (U_FAILURE(ec)) {
|
||||
errln("FAIL: UnicodeSet constructor");
|
||||
continue;
|
||||
@ -1650,7 +1650,7 @@ void UnicodeSetTest::TestSurrogate() {
|
||||
CharsToUnicodeString("abc\\U00010000"),
|
||||
CharsToUnicodeString("\\uD800;\\uDC00")); // split apart surrogate-pair
|
||||
if (set.size() != 4) {
|
||||
errln((UnicodeString)"FAIL: " + DATA[i] + ".size() == " +
|
||||
errln((UnicodeString)"FAIL: " + UNICODE_STRING_SIMPLE(DATA[i]) + ".size() == " +
|
||||
set.size() + ", expected 4");
|
||||
}
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
/********************************************************************
|
||||
* COPYRIGHT:
|
||||
* Copyright (c) 2005-2007, International Business Machines Corporation and
|
||||
* Copyright (c) 2005-2008, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
********************************************************************/
|
||||
/************************************************************************
|
||||
@ -1044,7 +1044,7 @@ void UTextTest::ErrorTest()
|
||||
{ // Similar test, with utf16 instead of utf8
|
||||
// TODO: merge the common parts of these tests.
|
||||
|
||||
UnicodeString u16str("\\u1000\\U00011000\\u2000\\U00022000");
|
||||
UnicodeString u16str("\\u1000\\U00011000\\u2000\\U00022000", -1, US_INV);
|
||||
int32_t startMap[] ={ 0, 1, 1, 3, 4, 4, 6, 6};
|
||||
int32_t nextMap[] = { 1, 3, 3, 4, 6, 6, 6, 6};
|
||||
int32_t prevMap[] = { 0, 0, 0, 1, 3, 3, 4, 4};
|
||||
@ -1112,7 +1112,7 @@ void UTextTest::ErrorTest()
|
||||
{ // Similar test, with UText over Replaceable
|
||||
// TODO: merge the common parts of these tests.
|
||||
|
||||
UnicodeString u16str("\\u1000\\U00011000\\u2000\\U00022000");
|
||||
UnicodeString u16str("\\u1000\\U00011000\\u2000\\U00022000", -1, US_INV);
|
||||
int32_t startMap[] ={ 0, 1, 1, 3, 4, 4, 6, 6};
|
||||
int32_t nextMap[] = { 1, 3, 3, 4, 6, 6, 6, 6};
|
||||
int32_t prevMap[] = { 0, 0, 0, 1, 3, 3, 4, 4};
|
||||
|
@ -61,16 +61,16 @@ UXMLParser::UXMLParser(UErrorCode &status) :
|
||||
// example: "<?xml version=1.0 encoding="utf-16" ?>
|
||||
// This is a sloppy implementation - just look for the leading <?xml and the closing ?>
|
||||
// allow for a possible leading BOM.
|
||||
mXMLDecl(UnicodeString("(?s)\\uFEFF?<\\?xml.+?\\?>"), 0, status),
|
||||
mXMLDecl(UnicodeString("(?s)\\uFEFF?<\\?xml.+?\\?>", -1, US_INV), 0, status),
|
||||
|
||||
// XML Comment production #15
|
||||
// example: "<!-- whatever -->
|
||||
// note, does not detect an illegal "--" within comments
|
||||
mXMLComment(UnicodeString("(?s)<!--.+?-->"), 0, status),
|
||||
mXMLComment(UnicodeString("(?s)<!--.+?-->", -1, US_INV), 0, status),
|
||||
|
||||
// XML Spaces
|
||||
// production [3]
|
||||
mXMLSP(UnicodeString(XML_SPACES "+"), 0, status),
|
||||
mXMLSP(UnicodeString(XML_SPACES "+", -1, US_INV), 0, status),
|
||||
|
||||
// XML Doctype decl production #28
|
||||
// example "<!DOCTYPE foo SYSTEM "somewhere" >
|
||||
@ -81,12 +81,12 @@ UXMLParser::UXMLParser(UErrorCode &status) :
|
||||
// of closeing square brackets. These could appear in comments,
|
||||
// or in parameter entity declarations, for example.
|
||||
mXMLDoctype(UnicodeString(
|
||||
"(?s)<!DOCTYPE.*?(>|\\[.*?\\].*?>)"
|
||||
"(?s)<!DOCTYPE.*?(>|\\[.*?\\].*?>)", -1, US_INV
|
||||
), 0, status),
|
||||
|
||||
// XML PI production #16
|
||||
// example "<?target stuff?>
|
||||
mXMLPI(UnicodeString("(?s)<\\?.+?\\?>"), 0, status),
|
||||
mXMLPI(UnicodeString("(?s)<\\?.+?\\?>", -1, US_INV), 0, status),
|
||||
|
||||
// XML Element Start Productions #40, #41
|
||||
// example <foo att1='abc' att2="d e f" >
|
||||
@ -97,11 +97,11 @@ UXMLParser::UXMLParser(UErrorCode &status) :
|
||||
XML_SPACES "+" XML_NAME XML_SPACES "*=" XML_SPACES "*" // match "ATTR_NAME = "
|
||||
"(?:(?:\\\'[^<\\\']*?\\\')|(?:\\\"[^<\\\"]*?\\\"))" // match '"attribute value"'
|
||||
")*" // * for zero or more attributes.
|
||||
XML_SPACES "*?>"), 0, status), // match " >"
|
||||
XML_SPACES "*?>", -1, US_INV), 0, status), // match " >"
|
||||
|
||||
// XML Element End production #42
|
||||
// example </foo>
|
||||
mXMLElemEnd (UnicodeString("</(" XML_NAME ")" XML_SPACES "*>"), 0, status),
|
||||
mXMLElemEnd (UnicodeString("</(" XML_NAME ")" XML_SPACES "*>", -1, US_INV), 0, status),
|
||||
|
||||
// XML Element Empty production #44
|
||||
// example <foo att1="abc" att2="d e f" />
|
||||
@ -110,11 +110,11 @@ UXMLParser::UXMLParser(UErrorCode &status) :
|
||||
XML_SPACES "+" XML_NAME XML_SPACES "*=" XML_SPACES "*" // match "ATTR_NAME = "
|
||||
"(?:(?:\\\'[^<\\\']*?\\\')|(?:\\\"[^<\\\"]*?\\\"))" // match '"attribute value"'
|
||||
")*" // * for zero or more attributes.
|
||||
XML_SPACES "*?/>"), 0, status), // match " />"
|
||||
XML_SPACES "*?/>", -1, US_INV), 0, status), // match " />"
|
||||
|
||||
|
||||
// XMLCharData. Everything but '<'. Note that & will be dealt with later.
|
||||
mXMLCharData(UnicodeString("(?s)[^<]*"), 0, status),
|
||||
mXMLCharData(UnicodeString("(?s)[^<]*", -1, US_INV), 0, status),
|
||||
|
||||
// Attribute name = "value". XML Productions 10, 40/41
|
||||
// Capture group 1 is name,
|
||||
@ -126,14 +126,14 @@ UXMLParser::UXMLParser(UErrorCode &status) :
|
||||
// Here, we match a single attribute, and make its name and
|
||||
// attribute value available to the parser code.
|
||||
mAttrValue(UnicodeString(XML_SPACES "+(" XML_NAME ")" XML_SPACES "*=" XML_SPACES "*"
|
||||
"((?:\\\'[^<\\\']*?\\\')|(?:\\\"[^<\\\"]*?\\\"))"), 0, status),
|
||||
"((?:\\\'[^<\\\']*?\\\')|(?:\\\"[^<\\\"]*?\\\"))", -1, US_INV), 0, status),
|
||||
|
||||
|
||||
mAttrNormalizer(UnicodeString(XML_SPACES), 0, status),
|
||||
mAttrNormalizer(UnicodeString(XML_SPACES, -1, US_INV), 0, status),
|
||||
|
||||
// Match any of the new-line sequences in content.
|
||||
// All are changed to \u000a.
|
||||
mNewLineNormalizer(UnicodeString("\\u000d\\u000a|\\u000d\\u0085|\\u000a|\\u000d|\\u0085|\\u2028"), 0, status),
|
||||
mNewLineNormalizer(UnicodeString("\\u000d\\u000a|\\u000d\\u0085|\\u000a|\\u000d|\\u0085|\\u2028", -1, US_INV), 0, status),
|
||||
|
||||
// & char references
|
||||
// We will figure out what we've got based on which capture group has content.
|
||||
|
Loading…
Reference in New Issue
Block a user