ICU-6947 implement UREGEX_LITERAL flag.
X-SVN-Rev: 31398
This commit is contained in:
parent
3f18f96246
commit
c74df646b7
@ -106,7 +106,7 @@ void RegexCompile::compile(
|
||||
UParseError &pp, // Error position info
|
||||
UErrorCode &e) // Error Code
|
||||
{
|
||||
fRXPat->fPatternString = new UnicodeString(pat);
|
||||
fRXPat->fPatternString = new UnicodeString(pat);
|
||||
UText patternText = UTEXT_INITIALIZER;
|
||||
utext_openConstUnicodeString(&patternText, fRXPat->fPatternString, &e);
|
||||
|
||||
@ -147,6 +147,12 @@ void RegexCompile::compile(
|
||||
fPatternLength = utext_nativeLength(pat);
|
||||
uint16_t state = 1;
|
||||
const RegexTableEl *tableEl;
|
||||
|
||||
// UREGEX_LITERAL force entire pattern to be treated as a literal string.
|
||||
if (fModeFlags & UREGEX_LITERAL) {
|
||||
fQuoteMode = TRUE;
|
||||
}
|
||||
|
||||
nextChar(fC); // Fetch the first char from the pattern string.
|
||||
|
||||
//
|
||||
@ -3652,10 +3658,11 @@ void RegexCompile::nextChar(RegexPatternChar &c) {
|
||||
|
||||
if (fQuoteMode) {
|
||||
c.fQuoted = TRUE;
|
||||
if ((c.fChar==chBackSlash && peekCharLL()==chE) || c.fChar == (UChar32)-1) {
|
||||
if ((c.fChar==chBackSlash && peekCharLL()==chE && ((fModeFlags & UREGEX_LITERAL) == 0)) ||
|
||||
c.fChar == (UChar32)-1) {
|
||||
fQuoteMode = FALSE; // Exit quote mode,
|
||||
nextCharLL(); // discard the E
|
||||
nextChar(c); // recurse to get the real next char
|
||||
nextCharLL(); // discard the E
|
||||
nextChar(c); // recurse to get the real next char
|
||||
}
|
||||
}
|
||||
else if (fInBackslashQuote) {
|
||||
|
@ -30,8 +30,6 @@ U_NAMESPACE_BEGIN
|
||||
//
|
||||
//--------------------------------------------------------------------------
|
||||
RegexPattern::RegexPattern() {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
|
||||
// Init all of this instances data.
|
||||
init();
|
||||
}
|
||||
@ -287,7 +285,7 @@ RegexPattern::compile(const UnicodeString ®ex,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if ((flags & (UREGEX_CANON_EQ | UREGEX_LITERAL)) != 0) {
|
||||
if ((flags & UREGEX_CANON_EQ) != 0) {
|
||||
status = U_REGEX_UNIMPLEMENTED;
|
||||
return NULL;
|
||||
}
|
||||
@ -338,7 +336,7 @@ RegexPattern::compile(UText *regex,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if ((flags & (UREGEX_CANON_EQ | UREGEX_LITERAL)) != 0) {
|
||||
if ((flags & UREGEX_CANON_EQ) != 0) {
|
||||
status = U_REGEX_UNIMPLEMENTED;
|
||||
return NULL;
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 2004-2011, International Business Machines
|
||||
* Copyright (C) 2004-2012, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* file name: uregex.h
|
||||
@ -64,13 +64,12 @@ typedef enum URegexpFlag{
|
||||
|
||||
/** If set, treat the entire pattern as a literal string.
|
||||
* Metacharacters or escape sequences in the input sequence will be given
|
||||
* no special meaning. Not implemented yet as of ICU 4.4.
|
||||
* no special meaning.
|
||||
*
|
||||
* The flags CASE_INSENSITIVE and UNICODE_CASE retain their impact
|
||||
* The flag UREGEX_CASE_INSENSITIVE retains its impact
|
||||
* on matching when used in conjunction with this flag.
|
||||
* The other flags become superfluous.
|
||||
* TODO: say which escapes are still handled; anything Java does
|
||||
* early (\\u) we should still do.
|
||||
*
|
||||
* @stable ICU 4.0
|
||||
*/
|
||||
UREGEX_LITERAL = 16,
|
||||
|
@ -228,7 +228,7 @@ static void TestRegexCAPI(void) {
|
||||
|
||||
/* Open with an unimplemented flag */
|
||||
status = U_ZERO_ERROR;
|
||||
re = uregex_open(pat, -1, UREGEX_LITERAL, 0, &status);
|
||||
re = uregex_open(pat, -1, UREGEX_CANON_EQ, 0, &status);
|
||||
TEST_ASSERT(status == U_REGEX_UNIMPLEMENTED);
|
||||
uregex_close(re);
|
||||
|
||||
|
@ -3119,7 +3119,7 @@ void RegexTest::Extended() {
|
||||
|
||||
RegexMatcher quotedStuffMat(UNICODE_STRING_SIMPLE("\\s*([\\'\\\"/])(.*?)\\1"), 0, status);
|
||||
RegexMatcher commentMat (UNICODE_STRING_SIMPLE("\\s*(#.*)?$"), 0, status);
|
||||
RegexMatcher flagsMat (UNICODE_STRING_SIMPLE("\\s*([ixsmdteDEGLMvabtyYzZ2-9]*)([:letter:]*)"), 0, status);
|
||||
RegexMatcher flagsMat (UNICODE_STRING_SIMPLE("\\s*([ixsmdteDEGLMQvabtyYzZ2-9]*)([:letter:]*)"), 0, status);
|
||||
|
||||
RegexMatcher lineMat(UNICODE_STRING_SIMPLE("(.*?)\\r?\\n"), testString, 0, status);
|
||||
UnicodeString testPattern; // The pattern for test from the test file.
|
||||
@ -3329,6 +3329,9 @@ void RegexTest::regex_find(const UnicodeString &pattern,
|
||||
if (flags.indexOf((UChar)0x44) >= 0) { // 'D' flag
|
||||
bflags |= UREGEX_UNIX_LINES;
|
||||
}
|
||||
if (flags.indexOf((UChar)0x51) >= 0) { // 'Q' flag
|
||||
bflags |= UREGEX_LITERAL;
|
||||
}
|
||||
|
||||
|
||||
callerPattern = RegexPattern::compile(pattern, bflags, pe, status);
|
||||
|
6
icu4c/source/test/testdata/regextst.txt
vendored
6
icu4c/source/test/testdata/regextst.txt
vendored
@ -23,6 +23,7 @@
|
||||
# m multi-line mode.
|
||||
# ($ and ^ match at embedded new-lines)
|
||||
# D Unix Lines mode (only recognize 0x0a as new-line)
|
||||
# Q UREGEX_LITERAL flag. Entire pattern is literal string.
|
||||
# v If icu configured without break iteration, this
|
||||
# regex test pattern should not compile.
|
||||
# e set the UREGEX_ERROR_ON_UNKNOWN_ESCAPES flag
|
||||
@ -282,6 +283,11 @@
|
||||
"\Q$*^^(*)?\A\E(a*)" "<0>$*^^(*)?\\A<1>aaaaaaaaaaaaaaa</1></0>"
|
||||
"[abc\Q]\r\E]+" "<0>aaaccc]]]\\\\\\</0>\r..." # \Q ... \E escape in a [set]
|
||||
|
||||
# UREGEX_LITERAL - entire pattern is a literal string, no escapes recognized.
|
||||
# Note that data strings in test cases still get escape processing.
|
||||
"abc\an\r\E\\abcd\u0031bye" Q "lead<0>abc\\an\\r\\E\\\\abcd\\u0031bye</0>extra"
|
||||
"case insensitive \\ (l)iteral" Qi "stuff!! <0>cAsE InSenSiTiVE \\\\ (L)ITeral</0>"
|
||||
|
||||
# \S and \s space characters
|
||||
"\s+" "not_space<0> \t \r \n \u3000 \u2004 \u2028 \u2029</0>xyz"
|
||||
"(\S+).*?(\S+).*" "<0><1>Not-spaces</1> <2>more-non-spaces</2> </0>"
|
||||
|
Loading…
Reference in New Issue
Block a user