ICU-10815 Fix for uregex_findNext() not setting U_REGEX_STOPPED_BY_CALLER

X-SVN-Rev: 36260
This commit is contained in:
Andy Heninger 2014-08-28 01:19:29 +00:00
parent d29271fcce
commit f2dfa7422e
5 changed files with 348 additions and 276 deletions

View File

@ -33,26 +33,6 @@
// #include <malloc.h> // Needed for heapcheck testing // #include <malloc.h> // Needed for heapcheck testing
// Find progress callback
// ----------------------
// Macro to inline test & call to ReportFindProgress(). Eliminates unnecessary function call.
//
#define REGEXFINDPROGRESS_INTERRUPT(pos, status) \
(fFindProgressCallbackFn != NULL) && (ReportFindProgress(pos, status) == FALSE)
// Smart Backtracking
// ------------------
// When a failure would go back to a LOOP_C instruction,
// strings, characters, and setrefs scan backwards for a valid start
// character themselves, pop the stack, and save state, emulating the
// LOOP_C's effect but assured that the next character of input is a
// possible matching character.
//
// Good idea in theory; unfortunately it only helps out a few specific
// cases and slows the engine down a little in the rest.
U_NAMESPACE_BEGIN U_NAMESPACE_BEGIN
// Default limit for the size of the back track stack, to avoid system // Default limit for the size of the back track stack, to avoid system
@ -584,15 +564,33 @@ int32_t RegexMatcher::end(int32_t group, UErrorCode &err) const {
// //
//-------------------------------------------------------------------------------- //--------------------------------------------------------------------------------
UBool RegexMatcher::find() { UBool RegexMatcher::find() {
if (U_FAILURE(fDeferredStatus)) {
return FALSE;
}
UErrorCode status = U_ZERO_ERROR;
UBool result = find(status);
return result;
}
//--------------------------------------------------------------------------------
//
// find()
//
//--------------------------------------------------------------------------------
UBool RegexMatcher::find(UErrorCode &status) {
// Start at the position of the last match end. (Will be zero if the // Start at the position of the last match end. (Will be zero if the
// matcher has been reset.) // matcher has been reset.)
// //
if (U_FAILURE(status)) {
return FALSE;
}
if (U_FAILURE(fDeferredStatus)) { if (U_FAILURE(fDeferredStatus)) {
status = fDeferredStatus;
return FALSE; return FALSE;
} }
if (UTEXT_FULL_TEXT_IN_CHUNK(fInputText, fInputLength)) { if (UTEXT_FULL_TEXT_IN_CHUNK(fInputText, fInputLength)) {
return findUsingChunk(); return findUsingChunk(status);
} }
int64_t startPos = fMatchEnd; int64_t startPos = fMatchEnd;
@ -653,8 +651,8 @@ UBool RegexMatcher::find() {
// No optimization was found. // No optimization was found.
// Try a match at each input position. // Try a match at each input position.
for (;;) { for (;;) {
MatchAt(startPos, FALSE, fDeferredStatus); MatchAt(startPos, FALSE, status);
if (U_FAILURE(fDeferredStatus)) { if (U_FAILURE(status)) {
return FALSE; return FALSE;
} }
if (fMatch) { if (fMatch) {
@ -670,7 +668,7 @@ UBool RegexMatcher::find() {
// Note that it's perfectly OK for a pattern to have a zero-length // Note that it's perfectly OK for a pattern to have a zero-length
// match at the end of a string, so we must make sure that the loop // match at the end of a string, so we must make sure that the loop
// runs with startPos == testStartLimit the last time through. // runs with startPos == testStartLimit the last time through.
if (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus)) if (findProgressInterrupt(startPos, status))
return FALSE; return FALSE;
} }
U_ASSERT(FALSE); U_ASSERT(FALSE);
@ -682,8 +680,8 @@ UBool RegexMatcher::find() {
fMatch = FALSE; fMatch = FALSE;
return FALSE; return FALSE;
} }
MatchAt(startPos, FALSE, fDeferredStatus); MatchAt(startPos, FALSE, status);
if (U_FAILURE(fDeferredStatus)) { if (U_FAILURE(status)) {
return FALSE; return FALSE;
} }
return fMatch; return fMatch;
@ -703,8 +701,8 @@ UBool RegexMatcher::find() {
// and handle end of text in the following block. // and handle end of text in the following block.
if (c >= 0 && ((c<256 && fPattern->fInitialChars8->contains(c)) || if (c >= 0 && ((c<256 && fPattern->fInitialChars8->contains(c)) ||
(c>=256 && fPattern->fInitialChars->contains(c)))) { (c>=256 && fPattern->fInitialChars->contains(c)))) {
MatchAt(pos, FALSE, fDeferredStatus); MatchAt(pos, FALSE, status);
if (U_FAILURE(fDeferredStatus)) { if (U_FAILURE(status)) {
return FALSE; return FALSE;
} }
if (fMatch) { if (fMatch) {
@ -717,7 +715,7 @@ UBool RegexMatcher::find() {
fHitEnd = TRUE; fHitEnd = TRUE;
return FALSE; return FALSE;
} }
if (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus)) if (findProgressInterrupt(startPos, status))
return FALSE; return FALSE;
} }
} }
@ -735,8 +733,8 @@ UBool RegexMatcher::find() {
c = UTEXT_NEXT32(fInputText); c = UTEXT_NEXT32(fInputText);
startPos = UTEXT_GETNATIVEINDEX(fInputText); startPos = UTEXT_GETNATIVEINDEX(fInputText);
if (c == theChar) { if (c == theChar) {
MatchAt(pos, FALSE, fDeferredStatus); MatchAt(pos, FALSE, status);
if (U_FAILURE(fDeferredStatus)) { if (U_FAILURE(status)) {
return FALSE; return FALSE;
} }
if (fMatch) { if (fMatch) {
@ -749,7 +747,7 @@ UBool RegexMatcher::find() {
fHitEnd = TRUE; fHitEnd = TRUE;
return FALSE; return FALSE;
} }
if (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus)) if (findProgressInterrupt(startPos, status))
return FALSE; return FALSE;
} }
} }
@ -759,8 +757,8 @@ UBool RegexMatcher::find() {
{ {
UChar32 c; UChar32 c;
if (startPos == fAnchorStart) { if (startPos == fAnchorStart) {
MatchAt(startPos, FALSE, fDeferredStatus); MatchAt(startPos, FALSE, status);
if (U_FAILURE(fDeferredStatus)) { if (U_FAILURE(status)) {
return FALSE; return FALSE;
} }
if (fMatch) { if (fMatch) {
@ -778,8 +776,8 @@ UBool RegexMatcher::find() {
if (fPattern->fFlags & UREGEX_UNIX_LINES) { if (fPattern->fFlags & UREGEX_UNIX_LINES) {
for (;;) { for (;;) {
if (c == 0x0a) { if (c == 0x0a) {
MatchAt(startPos, FALSE, fDeferredStatus); MatchAt(startPos, FALSE, status);
if (U_FAILURE(fDeferredStatus)) { if (U_FAILURE(status)) {
return FALSE; return FALSE;
} }
if (fMatch) { if (fMatch) {
@ -797,7 +795,7 @@ UBool RegexMatcher::find() {
// Note that it's perfectly OK for a pattern to have a zero-length // Note that it's perfectly OK for a pattern to have a zero-length
// match at the end of a string, so we must make sure that the loop // match at the end of a string, so we must make sure that the loop
// runs with startPos == testStartLimit the last time through. // runs with startPos == testStartLimit the last time through.
if (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus)) if (findProgressInterrupt(startPos, status))
return FALSE; return FALSE;
} }
} else { } else {
@ -808,8 +806,8 @@ UBool RegexMatcher::find() {
(void)UTEXT_NEXT32(fInputText); (void)UTEXT_NEXT32(fInputText);
startPos = UTEXT_GETNATIVEINDEX(fInputText); startPos = UTEXT_GETNATIVEINDEX(fInputText);
} }
MatchAt(startPos, FALSE, fDeferredStatus); MatchAt(startPos, FALSE, status);
if (U_FAILURE(fDeferredStatus)) { if (U_FAILURE(status)) {
return FALSE; return FALSE;
} }
if (fMatch) { if (fMatch) {
@ -827,7 +825,7 @@ UBool RegexMatcher::find() {
// Note that it's perfectly OK for a pattern to have a zero-length // Note that it's perfectly OK for a pattern to have a zero-length
// match at the end of a string, so we must make sure that the loop // match at the end of a string, so we must make sure that the loop
// runs with startPos == testStartLimit the last time through. // runs with startPos == testStartLimit the last time through.
if (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus)) if (findProgressInterrupt(startPos, status))
return FALSE; return FALSE;
} }
} }
@ -864,7 +862,7 @@ UBool RegexMatcher::find(int64_t start, UErrorCode &status) {
return FALSE; return FALSE;
} }
fMatchEnd = nativeStart; fMatchEnd = nativeStart;
return find(); return find(status);
} }
@ -874,7 +872,7 @@ UBool RegexMatcher::find(int64_t start, UErrorCode &status) {
// entire string is available in the UText's chunk buffer. // entire string is available in the UText's chunk buffer.
// //
//-------------------------------------------------------------------------------- //--------------------------------------------------------------------------------
UBool RegexMatcher::findUsingChunk() { UBool RegexMatcher::findUsingChunk(UErrorCode &status) {
// Start at the position of the last match end. (Will be zero if the // Start at the position of the last match end. (Will be zero if the
// matcher has been reset. // matcher has been reset.
// //
@ -931,8 +929,8 @@ UBool RegexMatcher::findUsingChunk() {
// No optimization was found. // No optimization was found.
// Try a match at each input position. // Try a match at each input position.
for (;;) { for (;;) {
MatchChunkAt(startPos, FALSE, fDeferredStatus); MatchChunkAt(startPos, FALSE, status);
if (U_FAILURE(fDeferredStatus)) { if (U_FAILURE(status)) {
return FALSE; return FALSE;
} }
if (fMatch) { if (fMatch) {
@ -946,7 +944,7 @@ UBool RegexMatcher::findUsingChunk() {
// Note that it's perfectly OK for a pattern to have a zero-length // Note that it's perfectly OK for a pattern to have a zero-length
// match at the end of a string, so we must make sure that the loop // match at the end of a string, so we must make sure that the loop
// runs with startPos == testLen the last time through. // runs with startPos == testLen the last time through.
if (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus)) if (findProgressInterrupt(startPos, status))
return FALSE; return FALSE;
} }
U_ASSERT(FALSE); U_ASSERT(FALSE);
@ -958,8 +956,8 @@ UBool RegexMatcher::findUsingChunk() {
fMatch = FALSE; fMatch = FALSE;
return FALSE; return FALSE;
} }
MatchChunkAt(startPos, FALSE, fDeferredStatus); MatchChunkAt(startPos, FALSE, status);
if (U_FAILURE(fDeferredStatus)) { if (U_FAILURE(status)) {
return FALSE; return FALSE;
} }
return fMatch; return fMatch;
@ -974,8 +972,8 @@ UBool RegexMatcher::findUsingChunk() {
U16_NEXT(inputBuf, startPos, fActiveLimit, c); // like c = inputBuf[startPos++]; U16_NEXT(inputBuf, startPos, fActiveLimit, c); // like c = inputBuf[startPos++];
if ((c<256 && fPattern->fInitialChars8->contains(c)) || if ((c<256 && fPattern->fInitialChars8->contains(c)) ||
(c>=256 && fPattern->fInitialChars->contains(c))) { (c>=256 && fPattern->fInitialChars->contains(c))) {
MatchChunkAt(pos, FALSE, fDeferredStatus); MatchChunkAt(pos, FALSE, status);
if (U_FAILURE(fDeferredStatus)) { if (U_FAILURE(status)) {
return FALSE; return FALSE;
} }
if (fMatch) { if (fMatch) {
@ -987,7 +985,7 @@ UBool RegexMatcher::findUsingChunk() {
fHitEnd = TRUE; fHitEnd = TRUE;
return FALSE; return FALSE;
} }
if (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus)) if (findProgressInterrupt(startPos, status))
return FALSE; return FALSE;
} }
} }
@ -1003,8 +1001,8 @@ UBool RegexMatcher::findUsingChunk() {
int32_t pos = startPos; int32_t pos = startPos;
U16_NEXT(inputBuf, startPos, fActiveLimit, c); // like c = inputBuf[startPos++]; U16_NEXT(inputBuf, startPos, fActiveLimit, c); // like c = inputBuf[startPos++];
if (c == theChar) { if (c == theChar) {
MatchChunkAt(pos, FALSE, fDeferredStatus); MatchChunkAt(pos, FALSE, status);
if (U_FAILURE(fDeferredStatus)) { if (U_FAILURE(status)) {
return FALSE; return FALSE;
} }
if (fMatch) { if (fMatch) {
@ -1016,7 +1014,7 @@ UBool RegexMatcher::findUsingChunk() {
fHitEnd = TRUE; fHitEnd = TRUE;
return FALSE; return FALSE;
} }
if (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus)) if (findProgressInterrupt(startPos, status))
return FALSE; return FALSE;
} }
} }
@ -1026,8 +1024,8 @@ UBool RegexMatcher::findUsingChunk() {
{ {
UChar32 c; UChar32 c;
if (startPos == fAnchorStart) { if (startPos == fAnchorStart) {
MatchChunkAt(startPos, FALSE, fDeferredStatus); MatchChunkAt(startPos, FALSE, status);
if (U_FAILURE(fDeferredStatus)) { if (U_FAILURE(status)) {
return FALSE; return FALSE;
} }
if (fMatch) { if (fMatch) {
@ -1040,8 +1038,8 @@ UBool RegexMatcher::findUsingChunk() {
for (;;) { for (;;) {
c = inputBuf[startPos-1]; c = inputBuf[startPos-1];
if (c == 0x0a) { if (c == 0x0a) {
MatchChunkAt(startPos, FALSE, fDeferredStatus); MatchChunkAt(startPos, FALSE, status);
if (U_FAILURE(fDeferredStatus)) { if (U_FAILURE(status)) {
return FALSE; return FALSE;
} }
if (fMatch) { if (fMatch) {
@ -1057,7 +1055,7 @@ UBool RegexMatcher::findUsingChunk() {
// Note that it's perfectly OK for a pattern to have a zero-length // Note that it's perfectly OK for a pattern to have a zero-length
// match at the end of a string, so we must make sure that the loop // match at the end of a string, so we must make sure that the loop
// runs with startPos == testLen the last time through. // runs with startPos == testLen the last time through.
if (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus)) if (findProgressInterrupt(startPos, status))
return FALSE; return FALSE;
} }
} else { } else {
@ -1068,8 +1066,8 @@ UBool RegexMatcher::findUsingChunk() {
if (c == 0x0d && startPos < fActiveLimit && inputBuf[startPos] == 0x0a) { if (c == 0x0d && startPos < fActiveLimit && inputBuf[startPos] == 0x0a) {
startPos++; startPos++;
} }
MatchChunkAt(startPos, FALSE, fDeferredStatus); MatchChunkAt(startPos, FALSE, status);
if (U_FAILURE(fDeferredStatus)) { if (U_FAILURE(status)) {
return FALSE; return FALSE;
} }
if (fMatch) { if (fMatch) {
@ -1085,7 +1083,7 @@ UBool RegexMatcher::findUsingChunk() {
// Note that it's perfectly OK for a pattern to have a zero-length // Note that it's perfectly OK for a pattern to have a zero-length
// match at the end of a string, so we must make sure that the loop // match at the end of a string, so we must make sure that the loop
// runs with startPos == testLen the last time through. // runs with startPos == testLen the last time through.
if (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus)) if (findProgressInterrupt(startPos, status))
return FALSE; return FALSE;
} }
} }
@ -1172,8 +1170,8 @@ UnicodeString RegexMatcher::group(int32_t groupNum, UErrorCode &status) const {
// Return deep (mutable) clone // Return deep (mutable) clone
// Technology Preview (as an API), but note that the UnicodeString API is implemented // Technology Preview (as an API), but note that the UnicodeString API is implemented
// using this function. // using this function.
UText *RegexMatcher::group(int32_t groupNum, UText *dest, UErrorCode &status) const { UText *RegexMatcher::group(int32_t groupNum, UText *dest, UErrorCode &status) const {
if (U_FAILURE(status)) { if (U_FAILURE(status)) {
return dest; return dest;
@ -2625,25 +2623,20 @@ void RegexMatcher::IncrementTime(UErrorCode &status) {
//-------------------------------------------------------------------------------- //--------------------------------------------------------------------------------
// //
// ReportFindProgress This function is called once for each advance in the target // findProgressInterrupt This function is called once for each advance in the target
// string from the find() function, and calls the user progress callback // string from the find() function, and calls the user progress callback
// function if there is one installed. // function if there is one installed.
// //
// NOTE: // Return: TRUE if the find operation is to be terminated.
// // FALSE if the find operation is to continue running.
// If the match operation needs to be aborted because the user
// callback asked for it, just set an error status.
// The engine will pick that up and stop in its outer loop.
// //
//-------------------------------------------------------------------------------- //--------------------------------------------------------------------------------
UBool RegexMatcher::ReportFindProgress(int64_t matchIndex, UErrorCode &status) { UBool RegexMatcher::findProgressInterrupt(int64_t pos, UErrorCode &status) {
if (fFindProgressCallbackFn != NULL) { if (fFindProgressCallbackFn && !(*fFindProgressCallbackFn)(fFindProgressCallbackContext, pos)) {
if ((*fFindProgressCallbackFn)(fFindProgressCallbackContext, matchIndex) == FALSE) { status = U_REGEX_STOPPED_BY_CALLER;
status = U_ZERO_ERROR /*U_REGEX_STOPPED_BY_CALLER*/; return TRUE;
return FALSE;
}
} }
return TRUE; return FALSE;
} }
//-------------------------------------------------------------------------------- //--------------------------------------------------------------------------------

View File

@ -801,6 +801,21 @@ public:
virtual UBool find(); virtual UBool find();
/**
* Find the next pattern match in the input string.
* The find begins searching the input at the location following the end of
* the previous match, or at the start of the string if there is no previous match.
* If a match is found, <code>start(), end()</code> and <code>group()</code>
* will provide more information regarding the match.
* <p>Note that if the input string is changed by the application,
* use find(startPos, status) instead of find(), because the saved starting
* position may not be valid with the altered input string.</p>
* @param status A reference to a UErrorCode to receive any errors.
* @return TRUE if a match is found.
* @stable @internal
*/
virtual UBool find(UErrorCode &status);
/** /**
* Resets this RegexMatcher and then attempts to find the next substring of the * Resets this RegexMatcher and then attempts to find the next substring of the
* input string that matches the pattern, starting at the specified index. * input string that matches the pattern, starting at the specified index.
@ -1744,11 +1759,13 @@ private:
REStackFrame *resetStack(); REStackFrame *resetStack();
inline REStackFrame *StateSave(REStackFrame *fp, int64_t savePatIdx, UErrorCode &status); inline REStackFrame *StateSave(REStackFrame *fp, int64_t savePatIdx, UErrorCode &status);
void IncrementTime(UErrorCode &status); void IncrementTime(UErrorCode &status);
UBool ReportFindProgress(int64_t matchIndex, UErrorCode &status);
// Call user find callback function, if set. Return TRUE if operation should be interrupted.
inline UBool findProgressInterrupt(int64_t matchIndex, UErrorCode &status);
int64_t appendGroup(int32_t groupNum, UText *dest, UErrorCode &status) const; int64_t appendGroup(int32_t groupNum, UText *dest, UErrorCode &status) const;
UBool findUsingChunk(); UBool findUsingChunk(UErrorCode &status);
void MatchChunkAt(int32_t startIdx, UBool toEnd, UErrorCode &status); void MatchChunkAt(int32_t startIdx, UBool toEnd, UErrorCode &status);
UBool isChunkWordBoundary(int32_t pos); UBool isChunkWordBoundary(int32_t pos);

View File

@ -1,6 +1,6 @@
/* /*
******************************************************************************* *******************************************************************************
* Copyright (C) 2004-2013, International Business Machines * Copyright (C) 2004-2014, International Business Machines
* Corporation and others. All Rights Reserved. * Corporation and others. All Rights Reserved.
******************************************************************************* *******************************************************************************
* file name: uregex.cpp * file name: uregex.cpp
@ -144,7 +144,7 @@ uregex_open( const UChar *pattern,
re->fPatStringLen = patternLength; re->fPatStringLen = patternLength;
u_memcpy(patBuf, pattern, actualPatLen); u_memcpy(patBuf, pattern, actualPatLen);
patBuf[actualPatLen] = 0; patBuf[actualPatLen] = 0;
UText patText = UTEXT_INITIALIZER; UText patText = UTEXT_INITIALIZER;
utext_openUChars(&patText, patBuf, patternLength, status); utext_openUChars(&patText, patBuf, patternLength, status);
@ -157,7 +157,7 @@ uregex_open( const UChar *pattern,
re->fPat = RegexPattern::compile(&patText, flags, *status); re->fPat = RegexPattern::compile(&patText, flags, *status);
} }
utext_close(&patText); utext_close(&patText);
if (U_FAILURE(*status)) { if (U_FAILURE(*status)) {
goto ErrorExit; goto ErrorExit;
} }
@ -186,7 +186,7 @@ uregex_openUText(UText *pattern,
uint32_t flags, uint32_t flags,
UParseError *pe, UParseError *pe,
UErrorCode *status) { UErrorCode *status) {
if (U_FAILURE(*status)) { if (U_FAILURE(*status)) {
return NULL; return NULL;
} }
@ -194,19 +194,19 @@ uregex_openUText(UText *pattern,
*status = U_ILLEGAL_ARGUMENT_ERROR; *status = U_ILLEGAL_ARGUMENT_ERROR;
return NULL; return NULL;
} }
int64_t patternNativeLength = utext_nativeLength(pattern); int64_t patternNativeLength = utext_nativeLength(pattern);
if (patternNativeLength == 0) { if (patternNativeLength == 0) {
*status = U_ILLEGAL_ARGUMENT_ERROR; *status = U_ILLEGAL_ARGUMENT_ERROR;
return NULL; return NULL;
} }
RegularExpression *re = new RegularExpression; RegularExpression *re = new RegularExpression;
UErrorCode lengthStatus = U_ZERO_ERROR; UErrorCode lengthStatus = U_ZERO_ERROR;
int32_t pattern16Length = utext_extract(pattern, 0, patternNativeLength, NULL, 0, &lengthStatus); int32_t pattern16Length = utext_extract(pattern, 0, patternNativeLength, NULL, 0, &lengthStatus);
u_atomic_int32_t *refC = (u_atomic_int32_t *)uprv_malloc(sizeof(int32_t)); u_atomic_int32_t *refC = (u_atomic_int32_t *)uprv_malloc(sizeof(int32_t));
UChar *patBuf = (UChar *)uprv_malloc(sizeof(UChar)*(pattern16Length+1)); UChar *patBuf = (UChar *)uprv_malloc(sizeof(UChar)*(pattern16Length+1));
if (re == NULL || refC == NULL || patBuf == NULL) { if (re == NULL || refC == NULL || patBuf == NULL) {
@ -218,7 +218,7 @@ uregex_openUText(UText *pattern,
} }
re->fPatRefCount = refC; re->fPatRefCount = refC;
*re->fPatRefCount = 1; *re->fPatRefCount = 1;
// //
// Make a copy of the pattern string, so we can return it later if asked. // Make a copy of the pattern string, so we can return it later if asked.
// For compiling the pattern, we will use a read-only UText wrapper // For compiling the pattern, we will use a read-only UText wrapper
@ -227,10 +227,10 @@ uregex_openUText(UText *pattern,
re->fPatString = patBuf; re->fPatString = patBuf;
re->fPatStringLen = pattern16Length; re->fPatStringLen = pattern16Length;
utext_extract(pattern, 0, patternNativeLength, patBuf, pattern16Length+1, status); utext_extract(pattern, 0, patternNativeLength, patBuf, pattern16Length+1, status);
UText patText = UTEXT_INITIALIZER; UText patText = UTEXT_INITIALIZER;
utext_openUChars(&patText, patBuf, pattern16Length, status); utext_openUChars(&patText, patBuf, pattern16Length, status);
// //
// Compile the pattern // Compile the pattern
// //
@ -240,11 +240,11 @@ uregex_openUText(UText *pattern,
re->fPat = RegexPattern::compile(&patText, flags, *status); re->fPat = RegexPattern::compile(&patText, flags, *status);
} }
utext_close(&patText); utext_close(&patText);
if (U_FAILURE(*status)) { if (U_FAILURE(*status)) {
goto ErrorExit; goto ErrorExit;
} }
// //
// Create the matcher object // Create the matcher object
// //
@ -252,11 +252,11 @@ uregex_openUText(UText *pattern,
if (U_SUCCESS(*status)) { if (U_SUCCESS(*status)) {
return (URegularExpression*)re; return (URegularExpression*)re;
} }
ErrorExit: ErrorExit:
delete re; delete re;
return NULL; return NULL;
} }
//---------------------------------------------------------------------------------------- //----------------------------------------------------------------------------------------
@ -280,7 +280,7 @@ uregex_close(URegularExpression *re2) {
// uregex_clone // uregex_clone
// //
//---------------------------------------------------------------------------------------- //----------------------------------------------------------------------------------------
U_CAPI URegularExpression * U_EXPORT2 U_CAPI URegularExpression * U_EXPORT2
uregex_clone(const URegularExpression *source2, UErrorCode *status) { uregex_clone(const URegularExpression *source2, UErrorCode *status) {
RegularExpression *source = (RegularExpression*)source2; RegularExpression *source = (RegularExpression*)source2;
if (validateRE(source, FALSE, status) == FALSE) { if (validateRE(source, FALSE, status) == FALSE) {
@ -300,7 +300,7 @@ uregex_clone(const URegularExpression *source2, UErrorCode *status) {
} }
clone->fPat = source->fPat; clone->fPat = source->fPat;
clone->fPatRefCount = source->fPatRefCount; clone->fPatRefCount = source->fPatRefCount;
clone->fPatString = source->fPatString; clone->fPatString = source->fPatString;
clone->fPatStringLen = source->fPatStringLen; clone->fPatStringLen = source->fPatStringLen;
umtx_atomic_inc(source->fPatRefCount); umtx_atomic_inc(source->fPatRefCount);
@ -317,12 +317,12 @@ uregex_clone(const URegularExpression *source2, UErrorCode *status) {
// uregex_pattern // uregex_pattern
// //
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
U_CAPI const UChar * U_EXPORT2 U_CAPI const UChar * U_EXPORT2
uregex_pattern(const URegularExpression *regexp2, uregex_pattern(const URegularExpression *regexp2,
int32_t *patLength, int32_t *patLength,
UErrorCode *status) { UErrorCode *status) {
RegularExpression *regexp = (RegularExpression*)regexp2; RegularExpression *regexp = (RegularExpression*)regexp2;
if (validateRE(regexp, FALSE, status) == FALSE) { if (validateRE(regexp, FALSE, status) == FALSE) {
return NULL; return NULL;
} }
@ -351,7 +351,7 @@ uregex_patternUText(const URegularExpression *regexp2,
// uregex_flags // uregex_flags
// //
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
U_CAPI int32_t U_EXPORT2 U_CAPI int32_t U_EXPORT2
uregex_flags(const URegularExpression *regexp2, UErrorCode *status) { uregex_flags(const URegularExpression *regexp2, UErrorCode *status) {
RegularExpression *regexp = (RegularExpression*)regexp2; RegularExpression *regexp = (RegularExpression*)regexp2;
if (validateRE(regexp, FALSE, status) == FALSE) { if (validateRE(regexp, FALSE, status) == FALSE) {
@ -367,7 +367,7 @@ uregex_flags(const URegularExpression *regexp2, UErrorCode *status) {
// uregex_setText // uregex_setText
// //
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
U_CAPI void U_EXPORT2 U_CAPI void U_EXPORT2
uregex_setText(URegularExpression *regexp2, uregex_setText(URegularExpression *regexp2,
const UChar *text, const UChar *text,
int32_t textLength, int32_t textLength,
@ -380,15 +380,15 @@ uregex_setText(URegularExpression *regexp2,
*status = U_ILLEGAL_ARGUMENT_ERROR; *status = U_ILLEGAL_ARGUMENT_ERROR;
return; return;
} }
if (regexp->fOwnsText && regexp->fText != NULL) { if (regexp->fOwnsText && regexp->fText != NULL) {
uprv_free((void *)regexp->fText); uprv_free((void *)regexp->fText);
} }
regexp->fText = text; regexp->fText = text;
regexp->fTextLength = textLength; regexp->fTextLength = textLength;
regexp->fOwnsText = FALSE; regexp->fOwnsText = FALSE;
UText input = UTEXT_INITIALIZER; UText input = UTEXT_INITIALIZER;
utext_openUChars(&input, text, textLength, status); utext_openUChars(&input, text, textLength, status);
regexp->fMatcher->reset(&input); regexp->fMatcher->reset(&input);
@ -401,7 +401,7 @@ uregex_setText(URegularExpression *regexp2,
// uregex_setUText // uregex_setUText
// //
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
U_CAPI void U_EXPORT2 U_CAPI void U_EXPORT2
uregex_setUText(URegularExpression *regexp2, uregex_setUText(URegularExpression *regexp2,
UText *text, UText *text,
UErrorCode *status) { UErrorCode *status) {
@ -413,11 +413,11 @@ uregex_setUText(URegularExpression *regexp2,
*status = U_ILLEGAL_ARGUMENT_ERROR; *status = U_ILLEGAL_ARGUMENT_ERROR;
return; return;
} }
if (regexp->fOwnsText && regexp->fText != NULL) { if (regexp->fOwnsText && regexp->fText != NULL) {
uprv_free((void *)regexp->fText); uprv_free((void *)regexp->fText);
} }
regexp->fText = NULL; // only fill it in on request regexp->fText = NULL; // only fill it in on request
regexp->fTextLength = -1; regexp->fTextLength = -1;
regexp->fOwnsText = TRUE; regexp->fOwnsText = TRUE;
@ -431,7 +431,7 @@ uregex_setUText(URegularExpression *regexp2,
// uregex_getText // uregex_getText
// //
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
U_CAPI const UChar * U_EXPORT2 U_CAPI const UChar * U_EXPORT2
uregex_getText(URegularExpression *regexp2, uregex_getText(URegularExpression *regexp2,
int32_t *textLength, int32_t *textLength,
UErrorCode *status) { UErrorCode *status) {
@ -439,7 +439,7 @@ uregex_getText(URegularExpression *regexp2,
if (validateRE(regexp, FALSE, status) == FALSE) { if (validateRE(regexp, FALSE, status) == FALSE) {
return NULL; return NULL;
} }
if (regexp->fText == NULL) { if (regexp->fText == NULL) {
// need to fill in the text // need to fill in the text
UText *inputText = regexp->fMatcher->inputText(); UText *inputText = regexp->fMatcher->inputText();
@ -452,13 +452,13 @@ uregex_getText(URegularExpression *regexp2,
UErrorCode lengthStatus = U_ZERO_ERROR; UErrorCode lengthStatus = U_ZERO_ERROR;
regexp->fTextLength = utext_extract(inputText, 0, inputNativeLength, NULL, 0, &lengthStatus); // buffer overflow error regexp->fTextLength = utext_extract(inputText, 0, inputNativeLength, NULL, 0, &lengthStatus); // buffer overflow error
UChar *inputChars = (UChar *)uprv_malloc(sizeof(UChar)*(regexp->fTextLength+1)); UChar *inputChars = (UChar *)uprv_malloc(sizeof(UChar)*(regexp->fTextLength+1));
utext_extract(inputText, 0, inputNativeLength, inputChars, regexp->fTextLength+1, status); utext_extract(inputText, 0, inputNativeLength, inputChars, regexp->fTextLength+1, status);
regexp->fText = inputChars; regexp->fText = inputChars;
regexp->fOwnsText = TRUE; // should already be set but just in case regexp->fOwnsText = TRUE; // should already be set but just in case
} }
} }
if (textLength != NULL) { if (textLength != NULL) {
*textLength = regexp->fTextLength; *textLength = regexp->fTextLength;
} }
@ -471,7 +471,7 @@ uregex_getText(URegularExpression *regexp2,
// uregex_getUText // uregex_getUText
// //
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
U_CAPI UText * U_EXPORT2 U_CAPI UText * U_EXPORT2
uregex_getUText(URegularExpression *regexp2, uregex_getUText(URegularExpression *regexp2,
UText *dest, UText *dest,
UErrorCode *status) { UErrorCode *status) {
@ -488,7 +488,7 @@ uregex_getUText(URegularExpression *regexp2,
// uregex_refreshUText // uregex_refreshUText
// //
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
U_CAPI void U_EXPORT2 U_CAPI void U_EXPORT2
uregex_refreshUText(URegularExpression *regexp2, uregex_refreshUText(URegularExpression *regexp2,
UText *text, UText *text,
UErrorCode *status) { UErrorCode *status) {
@ -505,14 +505,14 @@ uregex_refreshUText(URegularExpression *regexp2,
// uregex_matches // uregex_matches
// //
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
U_CAPI UBool U_EXPORT2 U_CAPI UBool U_EXPORT2
uregex_matches(URegularExpression *regexp2, uregex_matches(URegularExpression *regexp2,
int32_t startIndex, int32_t startIndex,
UErrorCode *status) { UErrorCode *status) {
return uregex_matches64( regexp2, (int64_t)startIndex, status); return uregex_matches64( regexp2, (int64_t)startIndex, status);
} }
U_CAPI UBool U_EXPORT2 U_CAPI UBool U_EXPORT2
uregex_matches64(URegularExpression *regexp2, uregex_matches64(URegularExpression *regexp2,
int64_t startIndex, int64_t startIndex,
UErrorCode *status) { UErrorCode *status) {
@ -535,14 +535,14 @@ uregex_matches64(URegularExpression *regexp2,
// uregex_lookingAt // uregex_lookingAt
// //
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
U_CAPI UBool U_EXPORT2 U_CAPI UBool U_EXPORT2
uregex_lookingAt(URegularExpression *regexp2, uregex_lookingAt(URegularExpression *regexp2,
int32_t startIndex, int32_t startIndex,
UErrorCode *status) { UErrorCode *status) {
return uregex_lookingAt64( regexp2, (int64_t)startIndex, status); return uregex_lookingAt64( regexp2, (int64_t)startIndex, status);
} }
U_CAPI UBool U_EXPORT2 U_CAPI UBool U_EXPORT2
uregex_lookingAt64(URegularExpression *regexp2, uregex_lookingAt64(URegularExpression *regexp2,
int64_t startIndex, int64_t startIndex,
UErrorCode *status) { UErrorCode *status) {
@ -566,16 +566,16 @@ uregex_lookingAt64(URegularExpression *regexp2,
// uregex_find // uregex_find
// //
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
U_CAPI UBool U_EXPORT2 U_CAPI UBool U_EXPORT2
uregex_find(URegularExpression *regexp2, uregex_find(URegularExpression *regexp2,
int32_t startIndex, int32_t startIndex,
UErrorCode *status) { UErrorCode *status) {
return uregex_find64( regexp2, (int64_t)startIndex, status); return uregex_find64( regexp2, (int64_t)startIndex, status);
} }
U_CAPI UBool U_EXPORT2 U_CAPI UBool U_EXPORT2
uregex_find64(URegularExpression *regexp2, uregex_find64(URegularExpression *regexp2,
int64_t startIndex, int64_t startIndex,
UErrorCode *status) { UErrorCode *status) {
RegularExpression *regexp = (RegularExpression*)regexp2; RegularExpression *regexp = (RegularExpression*)regexp2;
UBool result = FALSE; UBool result = FALSE;
@ -584,7 +584,7 @@ uregex_find64(URegularExpression *regexp2,
} }
if (startIndex == -1) { if (startIndex == -1) {
regexp->fMatcher->resetPreserveRegion(); regexp->fMatcher->resetPreserveRegion();
result = regexp->fMatcher->find(); result = regexp->fMatcher->find(*status);
} else { } else {
result = regexp->fMatcher->find(startIndex, *status); result = regexp->fMatcher->find(startIndex, *status);
} }
@ -597,14 +597,14 @@ uregex_find64(URegularExpression *regexp2,
// uregex_findNext // uregex_findNext
// //
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
U_CAPI UBool U_EXPORT2 U_CAPI UBool U_EXPORT2
uregex_findNext(URegularExpression *regexp2, uregex_findNext(URegularExpression *regexp2,
UErrorCode *status) { UErrorCode *status) {
RegularExpression *regexp = (RegularExpression*)regexp2; RegularExpression *regexp = (RegularExpression*)regexp2;
if (validateRE(regexp, TRUE, status) == FALSE) { if (validateRE(regexp, TRUE, status) == FALSE) {
return FALSE; return FALSE;
} }
UBool result = regexp->fMatcher->find(); UBool result = regexp->fMatcher->find(*status);
return result; return result;
} }
@ -613,7 +613,7 @@ uregex_findNext(URegularExpression *regexp2,
// uregex_groupCount // uregex_groupCount
// //
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
U_CAPI int32_t U_EXPORT2 U_CAPI int32_t U_EXPORT2
uregex_groupCount(URegularExpression *regexp2, uregex_groupCount(URegularExpression *regexp2,
UErrorCode *status) { UErrorCode *status) {
RegularExpression *regexp = (RegularExpression*)regexp2; RegularExpression *regexp = (RegularExpression*)regexp2;
@ -630,7 +630,7 @@ uregex_groupCount(URegularExpression *regexp2,
// uregex_group // uregex_group
// //
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
U_CAPI int32_t U_EXPORT2 U_CAPI int32_t U_EXPORT2
uregex_group(URegularExpression *regexp2, uregex_group(URegularExpression *regexp2,
int32_t groupNum, int32_t groupNum,
UChar *dest, UChar *dest,
@ -644,11 +644,11 @@ uregex_group(URegularExpression *regexp2,
*status = U_ILLEGAL_ARGUMENT_ERROR; *status = U_ILLEGAL_ARGUMENT_ERROR;
return 0; return 0;
} }
if (destCapacity == 0 || regexp->fText != NULL) { if (destCapacity == 0 || regexp->fText != NULL) {
// If preflighting or if we already have the text as UChars, // If preflighting or if we already have the text as UChars,
// this is a little cheaper than going through uregex_groupUTextDeep() // this is a little cheaper than going through uregex_groupUTextDeep()
// //
// Pick up the range of characters from the matcher // Pick up the range of characters from the matcher
// //
@ -660,7 +660,7 @@ uregex_group(URegularExpression *regexp2,
// //
// Trim length based on buffer capacity // Trim length based on buffer capacity
// //
int32_t fullLength = endIx - startIx; int32_t fullLength = endIx - startIx;
int32_t copyLength = fullLength; int32_t copyLength = fullLength;
if (copyLength < destCapacity) { if (copyLength < destCapacity) {
@ -671,7 +671,7 @@ uregex_group(URegularExpression *regexp2,
copyLength = destCapacity; copyLength = destCapacity;
*status = U_BUFFER_OVERFLOW_ERROR; *status = U_BUFFER_OVERFLOW_ERROR;
} }
// //
// Copy capture group to user's buffer // Copy capture group to user's buffer
// //
@ -696,7 +696,7 @@ uregex_group(URegularExpression *regexp2,
// uregex_groupUText // uregex_groupUText
// //
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
U_CAPI UText * U_EXPORT2 U_CAPI UText * U_EXPORT2
uregex_groupUText(URegularExpression *regexp2, uregex_groupUText(URegularExpression *regexp2,
int32_t groupNum, int32_t groupNum,
UText *dest, UText *dest,
@ -716,7 +716,7 @@ uregex_groupUText(URegularExpression *regexp2,
// uregex_groupUTextDeep // uregex_groupUTextDeep
// //
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
U_CAPI UText * U_EXPORT2 U_CAPI UText * U_EXPORT2
uregex_groupUTextDeep(URegularExpression *regexp2, uregex_groupUTextDeep(URegularExpression *regexp2,
int32_t groupNum, int32_t groupNum,
UText *dest, UText *dest,
@ -738,7 +738,7 @@ uregex_groupUTextDeep(URegularExpression *regexp2,
UErrorCode emptyTextStatus = U_ZERO_ERROR; UErrorCode emptyTextStatus = U_ZERO_ERROR;
return (dest ? dest : utext_openUChars(NULL, NULL, 0, &emptyTextStatus)); return (dest ? dest : utext_openUChars(NULL, NULL, 0, &emptyTextStatus));
} }
if (dest) { if (dest) {
utext_replace(dest, 0, utext_nativeLength(dest), &regexp->fText[startIx], endIx - startIx, status); utext_replace(dest, 0, utext_nativeLength(dest), &regexp->fText[startIx], endIx - startIx, status);
} else { } else {
@ -747,7 +747,7 @@ uregex_groupUTextDeep(URegularExpression *regexp2,
dest = utext_clone(NULL, &groupText, TRUE, FALSE, status); dest = utext_clone(NULL, &groupText, TRUE, FALSE, status);
utext_close(&groupText); utext_close(&groupText);
} }
return dest; return dest;
} else { } else {
return regexp->fMatcher->group(groupNum, dest, *status); return regexp->fMatcher->group(groupNum, dest, *status);
@ -759,14 +759,14 @@ uregex_groupUTextDeep(URegularExpression *regexp2,
// uregex_start // uregex_start
// //
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
U_CAPI int32_t U_EXPORT2 U_CAPI int32_t U_EXPORT2
uregex_start(URegularExpression *regexp2, uregex_start(URegularExpression *regexp2,
int32_t groupNum, int32_t groupNum,
UErrorCode *status) { UErrorCode *status) {
return (int32_t)uregex_start64( regexp2, groupNum, status); return (int32_t)uregex_start64( regexp2, groupNum, status);
} }
U_CAPI int64_t U_EXPORT2 U_CAPI int64_t U_EXPORT2
uregex_start64(URegularExpression *regexp2, uregex_start64(URegularExpression *regexp2,
int32_t groupNum, int32_t groupNum,
UErrorCode *status) { UErrorCode *status) {
@ -783,14 +783,14 @@ uregex_start64(URegularExpression *regexp2,
// uregex_end // uregex_end
// //
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
U_CAPI int32_t U_EXPORT2 U_CAPI int32_t U_EXPORT2
uregex_end(URegularExpression *regexp2, uregex_end(URegularExpression *regexp2,
int32_t groupNum, int32_t groupNum,
UErrorCode *status) { UErrorCode *status) {
return (int32_t)uregex_end64( regexp2, groupNum, status); return (int32_t)uregex_end64( regexp2, groupNum, status);
} }
U_CAPI int64_t U_EXPORT2 U_CAPI int64_t U_EXPORT2
uregex_end64(URegularExpression *regexp2, uregex_end64(URegularExpression *regexp2,
int32_t groupNum, int32_t groupNum,
UErrorCode *status) { UErrorCode *status) {
@ -807,14 +807,14 @@ uregex_end64(URegularExpression *regexp2,
// uregex_reset // uregex_reset
// //
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
U_CAPI void U_EXPORT2 U_CAPI void U_EXPORT2
uregex_reset(URegularExpression *regexp2, uregex_reset(URegularExpression *regexp2,
int32_t index, int32_t index,
UErrorCode *status) { UErrorCode *status) {
uregex_reset64( regexp2, (int64_t)index, status); uregex_reset64( regexp2, (int64_t)index, status);
} }
U_CAPI void U_EXPORT2 U_CAPI void U_EXPORT2
uregex_reset64(URegularExpression *regexp2, uregex_reset64(URegularExpression *regexp2,
int64_t index, int64_t index,
UErrorCode *status) { UErrorCode *status) {
@ -831,7 +831,7 @@ uregex_reset64(URegularExpression *regexp2,
// uregex_setRegion // uregex_setRegion
// //
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
U_CAPI void U_EXPORT2 U_CAPI void U_EXPORT2
uregex_setRegion(URegularExpression *regexp2, uregex_setRegion(URegularExpression *regexp2,
int32_t regionStart, int32_t regionStart,
int32_t regionLimit, int32_t regionLimit,
@ -839,7 +839,7 @@ uregex_setRegion(URegularExpression *regexp2,
uregex_setRegion64( regexp2, (int64_t)regionStart, (int64_t)regionLimit, status); uregex_setRegion64( regexp2, (int64_t)regionStart, (int64_t)regionLimit, status);
} }
U_CAPI void U_EXPORT2 U_CAPI void U_EXPORT2
uregex_setRegion64(URegularExpression *regexp2, uregex_setRegion64(URegularExpression *regexp2,
int64_t regionStart, int64_t regionStart,
int64_t regionLimit, int64_t regionLimit,
@ -857,7 +857,7 @@ uregex_setRegion64(URegularExpression *regexp2,
// uregex_setRegionAndStart // uregex_setRegionAndStart
// //
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
U_CAPI void U_EXPORT2 U_CAPI void U_EXPORT2
uregex_setRegionAndStart(URegularExpression *regexp2, uregex_setRegionAndStart(URegularExpression *regexp2,
int64_t regionStart, int64_t regionStart,
int64_t regionLimit, int64_t regionLimit,
@ -875,13 +875,13 @@ uregex_setRegionAndStart(URegularExpression *regexp2,
// uregex_regionStart // uregex_regionStart
// //
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
U_CAPI int32_t U_EXPORT2 U_CAPI int32_t U_EXPORT2
uregex_regionStart(const URegularExpression *regexp2, uregex_regionStart(const URegularExpression *regexp2,
UErrorCode *status) { UErrorCode *status) {
return (int32_t)uregex_regionStart64(regexp2, status); return (int32_t)uregex_regionStart64(regexp2, status);
} }
U_CAPI int64_t U_EXPORT2 U_CAPI int64_t U_EXPORT2
uregex_regionStart64(const URegularExpression *regexp2, uregex_regionStart64(const URegularExpression *regexp2,
UErrorCode *status) { UErrorCode *status) {
RegularExpression *regexp = (RegularExpression*)regexp2; RegularExpression *regexp = (RegularExpression*)regexp2;
@ -897,13 +897,13 @@ uregex_regionStart64(const URegularExpression *regexp2,
// uregex_regionEnd // uregex_regionEnd
// //
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
U_CAPI int32_t U_EXPORT2 U_CAPI int32_t U_EXPORT2
uregex_regionEnd(const URegularExpression *regexp2, uregex_regionEnd(const URegularExpression *regexp2,
UErrorCode *status) { UErrorCode *status) {
return (int32_t)uregex_regionEnd64(regexp2, status); return (int32_t)uregex_regionEnd64(regexp2, status);
} }
U_CAPI int64_t U_EXPORT2 U_CAPI int64_t U_EXPORT2
uregex_regionEnd64(const URegularExpression *regexp2, uregex_regionEnd64(const URegularExpression *regexp2,
UErrorCode *status) { UErrorCode *status) {
RegularExpression *regexp = (RegularExpression*)regexp2; RegularExpression *regexp = (RegularExpression*)regexp2;
@ -919,7 +919,7 @@ uregex_regionEnd64(const URegularExpression *regexp2,
// uregex_hasTransparentBounds // uregex_hasTransparentBounds
// //
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
U_CAPI UBool U_EXPORT2 U_CAPI UBool U_EXPORT2
uregex_hasTransparentBounds(const URegularExpression *regexp2, uregex_hasTransparentBounds(const URegularExpression *regexp2,
UErrorCode *status) { UErrorCode *status) {
RegularExpression *regexp = (RegularExpression*)regexp2; RegularExpression *regexp = (RegularExpression*)regexp2;
@ -935,7 +935,7 @@ uregex_hasTransparentBounds(const URegularExpression *regexp2,
// uregex_useTransparentBounds // uregex_useTransparentBounds
// //
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
U_CAPI void U_EXPORT2 U_CAPI void U_EXPORT2
uregex_useTransparentBounds(URegularExpression *regexp2, uregex_useTransparentBounds(URegularExpression *regexp2,
UBool b, UBool b,
UErrorCode *status) { UErrorCode *status) {
@ -952,7 +952,7 @@ uregex_useTransparentBounds(URegularExpression *regexp2,
// uregex_hasAnchoringBounds // uregex_hasAnchoringBounds
// //
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
U_CAPI UBool U_EXPORT2 U_CAPI UBool U_EXPORT2
uregex_hasAnchoringBounds(const URegularExpression *regexp2, uregex_hasAnchoringBounds(const URegularExpression *regexp2,
UErrorCode *status) { UErrorCode *status) {
RegularExpression *regexp = (RegularExpression*)regexp2; RegularExpression *regexp = (RegularExpression*)regexp2;
@ -968,7 +968,7 @@ uregex_hasAnchoringBounds(const URegularExpression *regexp2,
// uregex_useAnchoringBounds // uregex_useAnchoringBounds
// //
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
U_CAPI void U_EXPORT2 U_CAPI void U_EXPORT2
uregex_useAnchoringBounds(URegularExpression *regexp2, uregex_useAnchoringBounds(URegularExpression *regexp2,
UBool b, UBool b,
UErrorCode *status) { UErrorCode *status) {
@ -985,7 +985,7 @@ uregex_useAnchoringBounds(URegularExpression *regexp2,
// uregex_hitEnd // uregex_hitEnd
// //
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
U_CAPI UBool U_EXPORT2 U_CAPI UBool U_EXPORT2
uregex_hitEnd(const URegularExpression *regexp2, uregex_hitEnd(const URegularExpression *regexp2,
UErrorCode *status) { UErrorCode *status) {
RegularExpression *regexp = (RegularExpression*)regexp2; RegularExpression *regexp = (RegularExpression*)regexp2;
@ -1001,7 +1001,7 @@ uregex_hitEnd(const URegularExpression *regexp2,
// uregex_requireEnd // uregex_requireEnd
// //
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
U_CAPI UBool U_EXPORT2 U_CAPI UBool U_EXPORT2
uregex_requireEnd(const URegularExpression *regexp2, uregex_requireEnd(const URegularExpression *regexp2,
UErrorCode *status) { UErrorCode *status) {
RegularExpression *regexp = (RegularExpression*)regexp2; RegularExpression *regexp = (RegularExpression*)regexp2;
@ -1017,7 +1017,7 @@ uregex_requireEnd(const URegularExpression *regexp2,
// uregex_setTimeLimit // uregex_setTimeLimit
// //
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
U_CAPI void U_EXPORT2 U_CAPI void U_EXPORT2
uregex_setTimeLimit(URegularExpression *regexp2, uregex_setTimeLimit(URegularExpression *regexp2,
int32_t limit, int32_t limit,
UErrorCode *status) { UErrorCode *status) {
@ -1034,7 +1034,7 @@ uregex_setTimeLimit(URegularExpression *regexp2,
// uregex_getTimeLimit // uregex_getTimeLimit
// //
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
U_CAPI int32_t U_EXPORT2 U_CAPI int32_t U_EXPORT2
uregex_getTimeLimit(const URegularExpression *regexp2, uregex_getTimeLimit(const URegularExpression *regexp2,
UErrorCode *status) { UErrorCode *status) {
int32_t retVal = 0; int32_t retVal = 0;
@ -1052,7 +1052,7 @@ uregex_getTimeLimit(const URegularExpression *regexp2,
// uregex_setStackLimit // uregex_setStackLimit
// //
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
U_CAPI void U_EXPORT2 U_CAPI void U_EXPORT2
uregex_setStackLimit(URegularExpression *regexp2, uregex_setStackLimit(URegularExpression *regexp2,
int32_t limit, int32_t limit,
UErrorCode *status) { UErrorCode *status) {
@ -1069,7 +1069,7 @@ uregex_setStackLimit(URegularExpression *regexp2,
// uregex_getStackLimit // uregex_getStackLimit
// //
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
U_CAPI int32_t U_EXPORT2 U_CAPI int32_t U_EXPORT2
uregex_getStackLimit(const URegularExpression *regexp2, uregex_getStackLimit(const URegularExpression *regexp2,
UErrorCode *status) { UErrorCode *status) {
int32_t retVal = 0; int32_t retVal = 0;
@ -1103,7 +1103,7 @@ uregex_setMatchCallback(URegularExpression *regexp2,
// uregex_getMatchCallback // uregex_getMatchCallback
// //
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
U_CAPI void U_EXPORT2 U_CAPI void U_EXPORT2
uregex_getMatchCallback(const URegularExpression *regexp2, uregex_getMatchCallback(const URegularExpression *regexp2,
URegexMatchCallback **callback, URegexMatchCallback **callback,
const void **context, const void **context,
@ -1137,7 +1137,7 @@ uregex_setFindProgressCallback(URegularExpression *regexp2,
// uregex_getMatchCallback // uregex_getMatchCallback
// //
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
U_CAPI void U_EXPORT2 U_CAPI void U_EXPORT2
uregex_getFindProgressCallback(const URegularExpression *regexp2, uregex_getFindProgressCallback(const URegularExpression *regexp2,
URegexFindProgressCallback **callback, URegexFindProgressCallback **callback,
const void **context, const void **context,
@ -1154,7 +1154,7 @@ uregex_getFindProgressCallback(const URegularExpression *regexp2,
// uregex_replaceAll // uregex_replaceAll
// //
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
U_CAPI int32_t U_EXPORT2 U_CAPI int32_t U_EXPORT2
uregex_replaceAll(URegularExpression *regexp2, uregex_replaceAll(URegularExpression *regexp2,
const UChar *replacementText, const UChar *replacementText,
int32_t replacementLength, int32_t replacementLength,
@ -1187,7 +1187,7 @@ uregex_replaceAll(URegularExpression *regexp2,
&destBuf, &destCapacity, status); &destBuf, &destCapacity, status);
} }
len += uregex_appendTail(regexp2, &destBuf, &destCapacity, status); len += uregex_appendTail(regexp2, &destBuf, &destCapacity, status);
if (U_FAILURE(findStatus)) { if (U_FAILURE(findStatus)) {
// If anything went wrong with the findNext(), make that error trump // If anything went wrong with the findNext(), make that error trump
// whatever may have happened with the append() operations. // whatever may have happened with the append() operations.
@ -1204,7 +1204,7 @@ uregex_replaceAll(URegularExpression *regexp2,
// uregex_replaceAllUText // uregex_replaceAllUText
// //
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
U_CAPI UText * U_EXPORT2 U_CAPI UText * U_EXPORT2
uregex_replaceAllUText(URegularExpression *regexp2, uregex_replaceAllUText(URegularExpression *regexp2,
UText *replacementText, UText *replacementText,
UText *dest, UText *dest,
@ -1217,18 +1217,18 @@ uregex_replaceAllUText(URegularExpression *regexp2,
*status = U_ILLEGAL_ARGUMENT_ERROR; *status = U_ILLEGAL_ARGUMENT_ERROR;
return 0; return 0;
} }
dest = regexp->fMatcher->replaceAll(replacementText, dest, *status); dest = regexp->fMatcher->replaceAll(replacementText, dest, *status);
return dest; return dest;
} }
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
// //
// uregex_replaceFirst // uregex_replaceFirst
// //
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
U_CAPI int32_t U_EXPORT2 U_CAPI int32_t U_EXPORT2
uregex_replaceFirst(URegularExpression *regexp2, uregex_replaceFirst(URegularExpression *regexp2,
const UChar *replacementText, const UChar *replacementText,
int32_t replacementLength, int32_t replacementLength,
@ -1251,7 +1251,7 @@ uregex_replaceFirst(URegularExpression *regexp2,
uregex_reset(regexp2, 0, status); uregex_reset(regexp2, 0, status);
findSucceeded = uregex_find(regexp2, 0, status); findSucceeded = uregex_find(regexp2, 0, status);
if (findSucceeded) { if (findSucceeded) {
len = uregex_appendReplacement(regexp2, replacementText, replacementLength, len = uregex_appendReplacement(regexp2, replacementText, replacementLength,
&destBuf, &destCapacity, status); &destBuf, &destCapacity, status);
} }
len += uregex_appendTail(regexp2, &destBuf, &destCapacity, status); len += uregex_appendTail(regexp2, &destBuf, &destCapacity, status);
@ -1265,7 +1265,7 @@ uregex_replaceFirst(URegularExpression *regexp2,
// uregex_replaceFirstUText // uregex_replaceFirstUText
// //
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
U_CAPI UText * U_EXPORT2 U_CAPI UText * U_EXPORT2
uregex_replaceFirstUText(URegularExpression *regexp2, uregex_replaceFirstUText(URegularExpression *regexp2,
UText *replacementText, UText *replacementText,
UText *dest, UText *dest,
@ -1278,7 +1278,7 @@ uregex_replaceFirstUText(URegularExpression *regexp2,
*status = U_ILLEGAL_ARGUMENT_ERROR; *status = U_ILLEGAL_ARGUMENT_ERROR;
return 0; return 0;
} }
dest = regexp->fMatcher->replaceFirst(replacementText, dest, *status); dest = regexp->fMatcher->replaceFirst(replacementText, dest, *status);
return dest; return dest;
} }
@ -1308,7 +1308,7 @@ class RegexCImpl {
UChar **destBuf, UChar **destBuf,
int32_t *destCapacity, int32_t *destCapacity,
UErrorCode *status); UErrorCode *status);
inline static int32_t split(RegularExpression *regexp, inline static int32_t split(RegularExpression *regexp,
UChar *destBuf, UChar *destBuf,
int32_t destCapacity, int32_t destCapacity,
@ -1364,7 +1364,7 @@ int32_t RegexCImpl::appendReplacement(RegularExpression *regexp,
return 0; return 0;
} }
if (replacementText == NULL || replacementLength < -1 || if (replacementText == NULL || replacementLength < -1 ||
destCapacity == NULL || destBuf == NULL || destCapacity == NULL || destBuf == NULL ||
(*destBuf == NULL && *destCapacity > 0) || (*destBuf == NULL && *destCapacity > 0) ||
*destCapacity < 0) { *destCapacity < 0) {
*status = U_ILLEGAL_ARGUMENT_ERROR; *status = U_ILLEGAL_ARGUMENT_ERROR;
@ -1381,7 +1381,7 @@ int32_t RegexCImpl::appendReplacement(RegularExpression *regexp,
int32_t capacity = *destCapacity; int32_t capacity = *destCapacity;
int32_t destIdx = 0; int32_t destIdx = 0;
int32_t i; int32_t i;
// If it wasn't supplied by the caller, get the length of the replacement text. // If it wasn't supplied by the caller, get the length of the replacement text.
// TODO: slightly smarter logic in the copy loop could watch for the NUL on // TODO: slightly smarter logic in the copy loop could watch for the NUL on
// the fly and avoid this step. // the fly and avoid this step.
@ -1405,7 +1405,7 @@ int32_t RegexCImpl::appendReplacement(RegularExpression *regexp,
} }
for (i=lastMatchEnd; i<matchStart; i++) { for (i=lastMatchEnd; i<matchStart; i++) {
appendToBuf(regexp->fText[i], &destIdx, dest, capacity); appendToBuf(regexp->fText[i], &destIdx, dest, capacity);
} }
} else { } else {
UErrorCode possibleOverflowError = U_ZERO_ERROR; // ignore UErrorCode possibleOverflowError = U_ZERO_ERROR; // ignore
destIdx += utext_extract(m->fInputText, m->fLastMatchEnd, m->fMatchStart, destIdx += utext_extract(m->fInputText, m->fLastMatchEnd, m->fMatchStart,
@ -1420,7 +1420,7 @@ int32_t RegexCImpl::appendReplacement(RegularExpression *regexp,
UChar c = replacementText[replIdx]; UChar c = replacementText[replIdx];
replIdx++; replIdx++;
if (c != DOLLARSIGN && c != BACKSLASH) { if (c != DOLLARSIGN && c != BACKSLASH) {
// Common case, no substitution, no escaping, // Common case, no substitution, no escaping,
// just copy the char to the dest buf. // just copy the char to the dest buf.
appendToBuf(c, &destIdx, dest, capacity); appendToBuf(c, &destIdx, dest, capacity);
continue; continue;
@ -1439,9 +1439,9 @@ int32_t RegexCImpl::appendReplacement(RegularExpression *regexp,
if (c==0x55/*U*/ || c==0x75/*u*/) { if (c==0x55/*U*/ || c==0x75/*u*/) {
// We have a \udddd or \Udddddddd escape sequence. // We have a \udddd or \Udddddddd escape sequence.
UChar32 escapedChar = UChar32 escapedChar =
u_unescapeAt(uregex_ucstr_unescape_charAt, u_unescapeAt(uregex_ucstr_unescape_charAt,
&replIdx, // Index is updated by unescapeAt &replIdx, // Index is updated by unescapeAt
replacementLength, // Length of replacement text replacementLength, // Length of replacement text
(void *)replacementText); (void *)replacementText);
@ -1527,7 +1527,7 @@ int32_t RegexCImpl::appendReplacement(RegularExpression *regexp,
} else { } else {
*status = U_BUFFER_OVERFLOW_ERROR; *status = U_BUFFER_OVERFLOW_ERROR;
} }
// //
// Return an updated dest buffer and capacity to the caller. // Return an updated dest buffer and capacity to the caller.
// //
@ -1554,14 +1554,14 @@ int32_t RegexCImpl::appendReplacement(RegularExpression *regexp,
// //
// appendReplacement the actual API function, // appendReplacement the actual API function,
// //
U_CAPI int32_t U_EXPORT2 U_CAPI int32_t U_EXPORT2
uregex_appendReplacement(URegularExpression *regexp2, uregex_appendReplacement(URegularExpression *regexp2,
const UChar *replacementText, const UChar *replacementText,
int32_t replacementLength, int32_t replacementLength,
UChar **destBuf, UChar **destBuf,
int32_t *destCapacity, int32_t *destCapacity,
UErrorCode *status) { UErrorCode *status) {
RegularExpression *regexp = (RegularExpression*)regexp2; RegularExpression *regexp = (RegularExpression*)regexp2;
return RegexCImpl::appendReplacement( return RegexCImpl::appendReplacement(
regexp, replacementText, replacementLength,destBuf, destCapacity, status); regexp, replacementText, replacementLength,destBuf, destCapacity, status);
@ -1570,7 +1570,7 @@ uregex_appendReplacement(URegularExpression *regexp2,
// //
// uregex_appendReplacementUText...can just use the normal C++ method // uregex_appendReplacementUText...can just use the normal C++ method
// //
U_CAPI void U_EXPORT2 U_CAPI void U_EXPORT2
uregex_appendReplacementUText(URegularExpression *regexp2, uregex_appendReplacementUText(URegularExpression *regexp2,
UText *replText, UText *replText,
UText *dest, UText *dest,
@ -1603,8 +1603,8 @@ int32_t RegexCImpl::appendTail(RegularExpression *regexp,
if (validateRE(regexp, TRUE, status) == FALSE) { if (validateRE(regexp, TRUE, status) == FALSE) {
return 0; return 0;
} }
if (destCapacity == NULL || destBuf == NULL || if (destCapacity == NULL || destBuf == NULL ||
(*destBuf == NULL && *destCapacity > 0) || (*destBuf == NULL && *destCapacity > 0) ||
*destCapacity < 0) *destCapacity < 0)
{ {
@ -1617,7 +1617,7 @@ int32_t RegexCImpl::appendTail(RegularExpression *regexp,
int32_t destIdx = 0; int32_t destIdx = 0;
int32_t destCap = *destCapacity; int32_t destCap = *destCapacity;
UChar *dest = *destBuf; UChar *dest = *destBuf;
if (regexp->fText != NULL) { if (regexp->fText != NULL) {
int32_t srcIdx; int32_t srcIdx;
int64_t nativeIdx = (m->fMatch ? m->fMatchEnd : m->fLastMatchEnd); int64_t nativeIdx = (m->fMatch ? m->fMatchEnd : m->fLastMatchEnd);
@ -1629,7 +1629,7 @@ int32_t RegexCImpl::appendTail(RegularExpression *regexp,
UErrorCode status = U_ZERO_ERROR; UErrorCode status = U_ZERO_ERROR;
srcIdx = utext_extract(m->fInputText, 0, nativeIdx, NULL, 0, &status); srcIdx = utext_extract(m->fInputText, 0, nativeIdx, NULL, 0, &status);
} }
for (;;) { for (;;) {
U_ASSERT(destIdx >= 0); U_ASSERT(destIdx >= 0);
@ -1655,11 +1655,11 @@ int32_t RegexCImpl::appendTail(RegularExpression *regexp,
} }
srcIdx++; srcIdx++;
destIdx++; destIdx++;
} }
} else { } else {
int64_t srcIdx; int64_t srcIdx;
if (m->fMatch) { if (m->fMatch) {
// The most recent call to find() succeeded. // The most recent call to find() succeeded.
srcIdx = m->fMatchEnd; srcIdx = m->fMatchEnd;
} else { } else {
// The last call to find() on this matcher failed(). // The last call to find() on this matcher failed().
@ -1710,7 +1710,7 @@ int32_t RegexCImpl::appendTail(RegularExpression *regexp,
// //
// appendTail the actual API function // appendTail the actual API function
// //
U_CAPI int32_t U_EXPORT2 U_CAPI int32_t U_EXPORT2
uregex_appendTail(URegularExpression *regexp2, uregex_appendTail(URegularExpression *regexp2,
UChar **destBuf, UChar **destBuf,
int32_t *destCapacity, int32_t *destCapacity,
@ -1723,7 +1723,7 @@ uregex_appendTail(URegularExpression *regexp2,
// //
// uregex_appendTailUText...can just use the normal C++ method // uregex_appendTailUText...can just use the normal C++ method
// //
U_CAPI UText * U_EXPORT2 U_CAPI UText * U_EXPORT2
uregex_appendTailUText(URegularExpression *regexp2, uregex_appendTailUText(URegularExpression *regexp2,
UText *dest, UText *dest,
UErrorCode *status) { UErrorCode *status) {
@ -1815,19 +1815,19 @@ int32_t RegexCImpl::split(RegularExpression *regexp,
i = destFieldsCapacity-1; i = destFieldsCapacity-1;
destIdx = (int32_t)(destFields[i] - destFields[0]); destIdx = (int32_t)(destFields[i] - destFields[0]);
} }
destFields[i] = &destBuf[destIdx]; destFields[i] = &destBuf[destIdx];
destIdx += 1 + utext_extract(inputText, nextOutputStringStart, inputLen, destIdx += 1 + utext_extract(inputText, nextOutputStringStart, inputLen,
&destBuf[destIdx], REMAINING_CAPACITY(destIdx, destCapacity), status); &destBuf[destIdx], REMAINING_CAPACITY(destIdx, destCapacity), status);
} }
break; break;
} }
if (regexp->fMatcher->find()) { if (regexp->fMatcher->find()) {
// We found another delimiter. Move everything from where we started looking // We found another delimiter. Move everything from where we started looking
// up until the start of the delimiter into the next output string. // up until the start of the delimiter into the next output string.
destFields[i] = &destBuf[destIdx]; destFields[i] = &destBuf[destIdx];
destIdx += 1 + utext_extract(inputText, nextOutputStringStart, regexp->fMatcher->fMatchStart, destIdx += 1 + utext_extract(inputText, nextOutputStringStart, regexp->fMatcher->fMatchStart,
&destBuf[destIdx], REMAINING_CAPACITY(destIdx, destCapacity), &tStatus); &destBuf[destIdx], REMAINING_CAPACITY(destIdx, destCapacity), &tStatus);
if (tStatus == U_BUFFER_OVERFLOW_ERROR) { if (tStatus == U_BUFFER_OVERFLOW_ERROR) {
@ -1836,7 +1836,7 @@ int32_t RegexCImpl::split(RegularExpression *regexp,
*status = tStatus; *status = tStatus;
} }
nextOutputStringStart = regexp->fMatcher->fMatchEnd; nextOutputStringStart = regexp->fMatcher->fMatchEnd;
// If the delimiter pattern has capturing parentheses, the captured // If the delimiter pattern has capturing parentheses, the captured
// text goes out into the next n destination strings. // text goes out into the next n destination strings.
int32_t groupNum; int32_t groupNum;
@ -1846,14 +1846,14 @@ int32_t RegexCImpl::split(RegularExpression *regexp,
break; break;
} }
i++; i++;
// Set up to extract the capture group contents into the dest buffer. // Set up to extract the capture group contents into the dest buffer.
destFields[i] = &destBuf[destIdx]; destFields[i] = &destBuf[destIdx];
tStatus = U_ZERO_ERROR; tStatus = U_ZERO_ERROR;
int32_t t = uregex_group((URegularExpression*)regexp, int32_t t = uregex_group((URegularExpression*)regexp,
groupNum, groupNum,
destFields[i], destFields[i],
REMAINING_CAPACITY(destIdx, destCapacity), REMAINING_CAPACITY(destIdx, destCapacity),
&tStatus); &tStatus);
destIdx += t + 1; // Record the space used in the output string buffer. destIdx += t + 1; // Record the space used in the output string buffer.
// +1 for the NUL that terminates the string. // +1 for the NUL that terminates the string.
@ -1865,7 +1865,7 @@ int32_t RegexCImpl::split(RegularExpression *regexp,
} }
if (nextOutputStringStart == inputLen) { if (nextOutputStringStart == inputLen) {
// The delimiter was at the end of the string. // The delimiter was at the end of the string.
// Output an empty string, and then we are done. // Output an empty string, and then we are done.
if (destIdx < destCapacity) { if (destIdx < destCapacity) {
destBuf[destIdx] = 0; destBuf[destIdx] = 0;
@ -1910,7 +1910,7 @@ int32_t RegexCImpl::split(RegularExpression *regexp,
// //
// uregex_split The actual API function // uregex_split The actual API function
// //
U_CAPI int32_t U_EXPORT2 U_CAPI int32_t U_EXPORT2
uregex_split(URegularExpression *regexp2, uregex_split(URegularExpression *regexp2,
UChar *destBuf, UChar *destBuf,
int32_t destCapacity, int32_t destCapacity,
@ -1929,15 +1929,15 @@ uregex_split(URegularExpression *regexp2,
*status = U_ILLEGAL_ARGUMENT_ERROR; *status = U_ILLEGAL_ARGUMENT_ERROR;
return 0; return 0;
} }
return RegexCImpl::split(regexp, destBuf, destCapacity, requiredCapacity, destFields, destFieldsCapacity, status); return RegexCImpl::split(regexp, destBuf, destCapacity, requiredCapacity, destFields, destFieldsCapacity, status);
} }
// //
// uregex_splitUText...can just use the normal C++ method // uregex_splitUText...can just use the normal C++ method
// //
U_CAPI int32_t U_EXPORT2 U_CAPI int32_t U_EXPORT2
uregex_splitUText(URegularExpression *regexp2, uregex_splitUText(URegularExpression *regexp2,
UText *destFields[], UText *destFields[],
int32_t destFieldsCapacity, int32_t destFieldsCapacity,

View File

@ -1,6 +1,6 @@
/******************************************************************** /********************************************************************
* COPYRIGHT: * COPYRIGHT:
* Copyright (c) 2004-2013, International Business Machines Corporation and * Copyright (c) 2004-2014, International Business Machines Corporation and
* others. All Rights Reserved. * others. All Rights Reserved.
********************************************************************/ ********************************************************************/
/******************************************************************************** /********************************************************************************
@ -29,11 +29,13 @@
#include "unicode/utext.h" #include "unicode/utext.h"
#include "cintltst.h" #include "cintltst.h"
#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
#define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \ #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \
log_data_err("Failure at file %s, line %d, error = %s (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));}} log_data_err("Failure at file %s:%d - error = %s (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));}}
#define TEST_ASSERT(expr) {if ((expr)==FALSE) { \ #define TEST_ASSERT(expr) {if ((expr)==FALSE) { \
log_data_err("Test Failure at file %s, line %d (Are you missing data?)\n", __FILE__, __LINE__);}} log_err("Test Failure at file %s:%d - ASSERT(%s) failed.\n", __FILE__, __LINE__, #expr);}}
/* /*
* TEST_SETUP and TEST_TEARDOWN * TEST_SETUP and TEST_TEARDOWN
@ -158,6 +160,7 @@ static void TestBug4315(void);
static void TestUTextAPI(void); static void TestUTextAPI(void);
static void TestRefreshInput(void); static void TestRefreshInput(void);
static void TestBug8421(void); static void TestBug8421(void);
static void TestBug10815(void);
void addURegexTest(TestNode** root); void addURegexTest(TestNode** root);
@ -168,6 +171,7 @@ void addURegexTest(TestNode** root)
addTest(root, &TestUTextAPI, "regex/TestUTextAPI"); addTest(root, &TestUTextAPI, "regex/TestUTextAPI");
addTest(root, &TestRefreshInput, "regex/TestRefreshInput"); addTest(root, &TestRefreshInput, "regex/TestRefreshInput");
addTest(root, &TestBug8421, "regex/TestBug8421"); addTest(root, &TestBug8421, "regex/TestBug8421");
addTest(root, &TestBug10815, "regex/TestBug10815");
} }
/* /*
@ -204,7 +208,7 @@ static void TestRegexCAPI(void) {
memset(&minus1, -1, sizeof(minus1)); memset(&minus1, -1, sizeof(minus1));
/* Mimimalist open/close */ /* Mimimalist open/close */
u_uastrncpy(pat, "abc*", sizeof(pat)/2); u_uastrncpy(pat, "abc*", LENGTHOF(pat));
re = uregex_open(pat, -1, 0, 0, &status); re = uregex_open(pat, -1, 0, 0, &status);
if (U_FAILURE(status)) { if (U_FAILURE(status)) {
log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status)); log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));
@ -256,7 +260,7 @@ static void TestRegexCAPI(void) {
/* The TEST_ASSERT_SUCCESS above should change too... */ /* The TEST_ASSERT_SUCCESS above should change too... */
if(U_SUCCESS(status)) { if(U_SUCCESS(status)) {
u_uastrncpy(pat, "abc*", sizeof(pat)/2); u_uastrncpy(pat, "abc*", LENGTHOF(pat));
TEST_ASSERT(u_strcmp(pat, p) == 0); TEST_ASSERT(u_strcmp(pat, p) == 0);
TEST_ASSERT(len==(int32_t)strlen("abc*")); TEST_ASSERT(len==(int32_t)strlen("abc*"));
} }
@ -296,8 +300,8 @@ static void TestRegexCAPI(void) {
TEST_ASSERT_SUCCESS(status); TEST_ASSERT_SUCCESS(status);
TEST_ASSERT(clone3 != NULL); TEST_ASSERT(clone3 != NULL);
u_uastrncpy(testString1, "abcccd", sizeof(pat)/2); u_uastrncpy(testString1, "abcccd", LENGTHOF(pat));
u_uastrncpy(testString2, "xxxabcccd", sizeof(pat)/2); u_uastrncpy(testString2, "xxxabcccd", LENGTHOF(pat));
status = U_ZERO_ERROR; status = U_ZERO_ERROR;
uregex_setText(clone1, testString1, -1, &status); uregex_setText(clone1, testString1, -1, &status);
@ -328,7 +332,7 @@ static void TestRegexCAPI(void) {
{ {
const UChar *resultPat; const UChar *resultPat;
int32_t resultLen; int32_t resultLen;
u_uastrncpy(pat, "hello", sizeof(pat)/2); u_uastrncpy(pat, "hello", LENGTHOF(pat));
status = U_ZERO_ERROR; status = U_ZERO_ERROR;
re = uregex_open(pat, -1, 0, NULL, &status); re = uregex_open(pat, -1, 0, NULL, &status);
resultPat = uregex_pattern(re, &resultLen, &status); resultPat = uregex_pattern(re, &resultLen, &status);
@ -394,10 +398,10 @@ static void TestRegexCAPI(void) {
UChar text2[50]; UChar text2[50];
UBool result; UBool result;
u_uastrncpy(text1, "abcccd", sizeof(text1)/2); u_uastrncpy(text1, "abcccd", LENGTHOF(text1));
u_uastrncpy(text2, "abcccxd", sizeof(text2)/2); u_uastrncpy(text2, "abcccxd", LENGTHOF(text2));
status = U_ZERO_ERROR; status = U_ZERO_ERROR;
u_uastrncpy(pat, "abc*d", sizeof(pat)/2); u_uastrncpy(pat, "abc*d", LENGTHOF(pat));
re = uregex_open(pat, -1, 0, NULL, &status); re = uregex_open(pat, -1, 0, NULL, &status);
TEST_ASSERT_SUCCESS(status); TEST_ASSERT_SUCCESS(status);
@ -449,10 +453,10 @@ static void TestRegexCAPI(void) {
const UChar *result; const UChar *result;
int32_t textLength; int32_t textLength;
u_uastrncpy(text1, "abcccd", sizeof(text1)/2); u_uastrncpy(text1, "abcccd", LENGTHOF(text1));
u_uastrncpy(text2, "abcccxd", sizeof(text2)/2); u_uastrncpy(text2, "abcccxd", LENGTHOF(text2));
status = U_ZERO_ERROR; status = U_ZERO_ERROR;
u_uastrncpy(pat, "abc*d", sizeof(pat)/2); u_uastrncpy(pat, "abc*d", LENGTHOF(pat));
re = uregex_open(pat, -1, 0, NULL, &status); re = uregex_open(pat, -1, 0, NULL, &status);
uregex_setText(re, text1, -1, &status); uregex_setText(re, text1, -1, &status);
@ -486,9 +490,9 @@ static void TestRegexCAPI(void) {
int len; int len;
UChar nullString[] = {0,0,0}; UChar nullString[] = {0,0,0};
u_uastrncpy(text1, "abcccde", sizeof(text1)/2); u_uastrncpy(text1, "abcccde", LENGTHOF(text1));
status = U_ZERO_ERROR; status = U_ZERO_ERROR;
u_uastrncpy(pat, "abc*d", sizeof(pat)/2); u_uastrncpy(pat, "abc*d", LENGTHOF(pat));
re = uregex_open(pat, -1, 0, NULL, &status); re = uregex_open(pat, -1, 0, NULL, &status);
uregex_setText(re, text1, -1, &status); uregex_setText(re, text1, -1, &status);
@ -538,7 +542,7 @@ static void TestRegexCAPI(void) {
{ {
UChar text1[50]; UChar text1[50];
UBool result; UBool result;
u_uastrncpy(text1, "012rx5rx890rxrx...", sizeof(text1)/2); u_uastrncpy(text1, "012rx5rx890rxrx...", LENGTHOF(text1));
status = U_ZERO_ERROR; status = U_ZERO_ERROR;
re = uregex_openC("rx", 0, NULL, &status); re = uregex_openC("rx", 0, NULL, &status);
@ -621,7 +625,7 @@ static void TestRegexCAPI(void) {
UChar buf[80]; UChar buf[80];
UBool result; UBool result;
int32_t resultSz; int32_t resultSz;
u_uastrncpy(text1, "noise abc interior def, and this is off the end", sizeof(text1)/2); u_uastrncpy(text1, "noise abc interior def, and this is off the end", LENGTHOF(text1));
status = U_ZERO_ERROR; status = U_ZERO_ERROR;
re = uregex_openC("abc(.*?)def", 0, NULL, &status); re = uregex_openC("abc(.*?)def", 0, NULL, &status);
@ -634,21 +638,21 @@ static void TestRegexCAPI(void) {
/* Capture Group 0, the full match. Should succeed. */ /* Capture Group 0, the full match. Should succeed. */
status = U_ZERO_ERROR; status = U_ZERO_ERROR;
resultSz = uregex_group(re, 0, buf, sizeof(buf)/2, &status); resultSz = uregex_group(re, 0, buf, LENGTHOF(buf), &status);
TEST_ASSERT_SUCCESS(status); TEST_ASSERT_SUCCESS(status);
TEST_ASSERT_STRING("abc interior def", buf, TRUE); TEST_ASSERT_STRING("abc interior def", buf, TRUE);
TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def")); TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
/* Capture group #1. Should succeed. */ /* Capture group #1. Should succeed. */
status = U_ZERO_ERROR; status = U_ZERO_ERROR;
resultSz = uregex_group(re, 1, buf, sizeof(buf)/2, &status); resultSz = uregex_group(re, 1, buf, LENGTHOF(buf), &status);
TEST_ASSERT_SUCCESS(status); TEST_ASSERT_SUCCESS(status);
TEST_ASSERT_STRING(" interior ", buf, TRUE); TEST_ASSERT_STRING(" interior ", buf, TRUE);
TEST_ASSERT(resultSz == (int32_t)strlen(" interior ")); TEST_ASSERT(resultSz == (int32_t)strlen(" interior "));
/* Capture group out of range. Error. */ /* Capture group out of range. Error. */
status = U_ZERO_ERROR; status = U_ZERO_ERROR;
uregex_group(re, 2, buf, sizeof(buf)/2, &status); uregex_group(re, 2, buf, LENGTHOF(buf), &status);
TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
/* NULL buffer, pure pre-flight */ /* NULL buffer, pure pre-flight */
@ -692,7 +696,7 @@ static void TestRegexCAPI(void) {
TEST_ASSERT(uregex_regionStart(re, &status) == 3); TEST_ASSERT(uregex_regionStart(re, &status) == 3);
TEST_ASSERT(uregex_regionEnd(re, &status) == 6); TEST_ASSERT(uregex_regionEnd(re, &status) == 6);
TEST_ASSERT(uregex_findNext(re, &status)); TEST_ASSERT(uregex_findNext(re, &status));
TEST_ASSERT(uregex_group(re, 0, resultString, sizeof(resultString)/2, &status) == 3) TEST_ASSERT(uregex_group(re, 0, resultString, LENGTHOF(resultString), &status) == 3)
TEST_ASSERT_STRING("345", resultString, TRUE); TEST_ASSERT_STRING("345", resultString, TRUE);
TEST_TEARDOWN; TEST_TEARDOWN;
@ -816,9 +820,9 @@ static void TestRegexCAPI(void) {
UChar replText[80]; UChar replText[80];
UChar buf[80]; UChar buf[80];
int32_t resultSz; int32_t resultSz;
u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2); u_uastrncpy(text1, "Replace xaax x1x x...x.", LENGTHOF(text1));
u_uastrncpy(text2, "No match here.", sizeof(text2)/2); u_uastrncpy(text2, "No match here.", LENGTHOF(text2));
u_uastrncpy(replText, "<$1>", sizeof(replText)/2); u_uastrncpy(replText, "<$1>", LENGTHOF(replText));
status = U_ZERO_ERROR; status = U_ZERO_ERROR;
re = uregex_openC("x(.*?)x", 0, NULL, &status); re = uregex_openC("x(.*?)x", 0, NULL, &status);
@ -826,7 +830,7 @@ static void TestRegexCAPI(void) {
/* Normal case, with match */ /* Normal case, with match */
uregex_setText(re, text1, -1, &status); uregex_setText(re, text1, -1, &status);
resultSz = uregex_replaceFirst(re, replText, -1, buf, sizeof(buf)/2, &status); resultSz = uregex_replaceFirst(re, replText, -1, buf, LENGTHOF(buf), &status);
TEST_ASSERT_SUCCESS(status); TEST_ASSERT_SUCCESS(status);
TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, TRUE); TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, TRUE);
TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x.")); TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
@ -834,7 +838,7 @@ static void TestRegexCAPI(void) {
/* No match. Text should copy to output with no changes. */ /* No match. Text should copy to output with no changes. */
status = U_ZERO_ERROR; status = U_ZERO_ERROR;
uregex_setText(re, text2, -1, &status); uregex_setText(re, text2, -1, &status);
resultSz = uregex_replaceFirst(re, replText, -1, buf, sizeof(buf)/2, &status); resultSz = uregex_replaceFirst(re, replText, -1, buf, LENGTHOF(buf), &status);
TEST_ASSERT_SUCCESS(status); TEST_ASSERT_SUCCESS(status);
TEST_ASSERT_STRING("No match here.", buf, TRUE); TEST_ASSERT_STRING("No match here.", buf, TRUE);
TEST_ASSERT(resultSz == (int32_t)strlen("No match here.")); TEST_ASSERT(resultSz == (int32_t)strlen("No match here."));
@ -896,10 +900,10 @@ static void TestRegexCAPI(void) {
int32_t expectedResultSize2; int32_t expectedResultSize2;
int32_t i; int32_t i;
u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2); u_uastrncpy(text1, "Replace xaax x1x x...x.", LENGTHOF(text1));
u_uastrncpy(text2, "No match here.", sizeof(text2)/2); u_uastrncpy(text2, "No match here.", LENGTHOF(text2));
u_uastrncpy(replText, "<$1>", sizeof(replText)/2); u_uastrncpy(replText, "<$1>", LENGTHOF(replText));
u_uastrncpy(replText2, "<<$1>>", sizeof(replText2)/2); u_uastrncpy(replText2, "<<$1>>", LENGTHOF(replText2));
expectedResultSize = strlen(expectedResult); expectedResultSize = strlen(expectedResult);
expectedResultSize2 = strlen(expectedResult2); expectedResultSize2 = strlen(expectedResult2);
@ -909,7 +913,7 @@ static void TestRegexCAPI(void) {
/* Normal case, with match */ /* Normal case, with match */
uregex_setText(re, text1, -1, &status); uregex_setText(re, text1, -1, &status);
resultSize = uregex_replaceAll(re, replText, -1, buf, sizeof(buf)/2, &status); resultSize = uregex_replaceAll(re, replText, -1, buf, LENGTHOF(buf), &status);
TEST_ASSERT_SUCCESS(status); TEST_ASSERT_SUCCESS(status);
TEST_ASSERT_STRING(expectedResult, buf, TRUE); TEST_ASSERT_STRING(expectedResult, buf, TRUE);
TEST_ASSERT(resultSize == expectedResultSize); TEST_ASSERT(resultSize == expectedResultSize);
@ -917,7 +921,7 @@ static void TestRegexCAPI(void) {
/* No match. Text should copy to output with no changes. */ /* No match. Text should copy to output with no changes. */
status = U_ZERO_ERROR; status = U_ZERO_ERROR;
uregex_setText(re, text2, -1, &status); uregex_setText(re, text2, -1, &status);
resultSize = uregex_replaceAll(re, replText, -1, buf, sizeof(buf)/2, &status); resultSize = uregex_replaceAll(re, replText, -1, buf, LENGTHOF(buf), &status);
TEST_ASSERT_SUCCESS(status); TEST_ASSERT_SUCCESS(status);
TEST_ASSERT_STRING("No match here.", buf, TRUE); TEST_ASSERT_STRING("No match here.", buf, TRUE);
TEST_ASSERT(resultSize == u_strlen(text2)); TEST_ASSERT(resultSize == u_strlen(text2));
@ -1001,15 +1005,15 @@ static void TestRegexCAPI(void) {
re = uregex_openC(".*", 0, 0, &status); re = uregex_openC(".*", 0, 0, &status);
TEST_ASSERT_SUCCESS(status); TEST_ASSERT_SUCCESS(status);
u_uastrncpy(text, "whatever", sizeof(text)/2); u_uastrncpy(text, "whatever", LENGTHOF(text));
u_uastrncpy(repl, "some other", sizeof(repl)/2); u_uastrncpy(repl, "some other", LENGTHOF(repl));
uregex_setText(re, text, -1, &status); uregex_setText(re, text, -1, &status);
/* match covers whole target string */ /* match covers whole target string */
uregex_find(re, 0, &status); uregex_find(re, 0, &status);
TEST_ASSERT_SUCCESS(status); TEST_ASSERT_SUCCESS(status);
bufPtr = buf; bufPtr = buf;
bufCap = sizeof(buf) / 2; bufCap = LENGTHOF(buf);
uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status); uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
TEST_ASSERT_SUCCESS(status); TEST_ASSERT_SUCCESS(status);
TEST_ASSERT_STRING("some other", buf, TRUE); TEST_ASSERT_STRING("some other", buf, TRUE);
@ -1018,8 +1022,8 @@ static void TestRegexCAPI(void) {
uregex_find(re, 0, &status); uregex_find(re, 0, &status);
TEST_ASSERT_SUCCESS(status); TEST_ASSERT_SUCCESS(status);
bufPtr = buf; bufPtr = buf;
bufCap = sizeof(buf) / 2; bufCap = LENGTHOF(buf);
u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ $ \\abc", sizeof(repl)/2); u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ $ \\abc", LENGTHOF(repl));
uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status); uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
TEST_ASSERT_SUCCESS(status); TEST_ASSERT_SUCCESS(status);
TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE); TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE);
@ -1054,8 +1058,8 @@ static void TestRegexCAPI(void) {
int32_t spaceNeeded; int32_t spaceNeeded;
int32_t sz; int32_t sz;
u_uastrncpy(textToSplit, "first : second: third", sizeof(textToSplit)/2); u_uastrncpy(textToSplit, "first : second: third", LENGTHOF(textToSplit));
u_uastrncpy(text2, "No match here.", sizeof(text2)/2); u_uastrncpy(text2, "No match here.", LENGTHOF(text2));
status = U_ZERO_ERROR; status = U_ZERO_ERROR;
re = uregex_openC(":", 0, NULL, &status); re = uregex_openC(":", 0, NULL, &status);
@ -1070,7 +1074,7 @@ static void TestRegexCAPI(void) {
if (U_SUCCESS(status)) { if (U_SUCCESS(status)) {
memset(fields, -1, sizeof(fields)); memset(fields, -1, sizeof(fields));
numFields = numFields =
uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 10, &status); uregex_split(re, buf, LENGTHOF(buf), &requiredCapacity, fields, 10, &status);
TEST_ASSERT_SUCCESS(status); TEST_ASSERT_SUCCESS(status);
/* The TEST_ASSERT_SUCCESS call above should change too... */ /* The TEST_ASSERT_SUCCESS call above should change too... */
@ -1102,7 +1106,7 @@ static void TestRegexCAPI(void) {
if(U_SUCCESS(status)) { if(U_SUCCESS(status)) {
memset(fields, -1, sizeof(fields)); memset(fields, -1, sizeof(fields));
numFields = numFields =
uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 2, &status); uregex_split(re, buf, LENGTHOF(buf), &requiredCapacity, fields, 2, &status);
TEST_ASSERT_SUCCESS(status); TEST_ASSERT_SUCCESS(status);
/* The TEST_ASSERT_SUCCESS call above should change too... */ /* The TEST_ASSERT_SUCCESS call above should change too... */
@ -1160,7 +1164,7 @@ static void TestRegexCAPI(void) {
int32_t spaceNeeded; int32_t spaceNeeded;
int32_t sz; int32_t sz;
u_uastrncpy(textToSplit, "first <tag-a> second<tag-b> third", sizeof(textToSplit)/2); u_uastrncpy(textToSplit, "first <tag-a> second<tag-b> third", LENGTHOF(textToSplit));
status = U_ZERO_ERROR; status = U_ZERO_ERROR;
re = uregex_openC("<(.*?)>", 0, NULL, &status); re = uregex_openC("<(.*?)>", 0, NULL, &status);
@ -1172,7 +1176,7 @@ static void TestRegexCAPI(void) {
if(U_SUCCESS(status)) { if(U_SUCCESS(status)) {
memset(fields, -1, sizeof(fields)); memset(fields, -1, sizeof(fields));
numFields = numFields =
uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 10, &status); uregex_split(re, buf, LENGTHOF(buf), &requiredCapacity, fields, 10, &status);
TEST_ASSERT_SUCCESS(status); TEST_ASSERT_SUCCESS(status);
/* The TEST_ASSERT_SUCCESS call above should change too... */ /* The TEST_ASSERT_SUCCESS call above should change too... */
@ -1193,7 +1197,7 @@ static void TestRegexCAPI(void) {
status = U_ZERO_ERROR; status = U_ZERO_ERROR;
memset(fields, -1, sizeof(fields)); memset(fields, -1, sizeof(fields));
numFields = numFields =
uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 2, &status); uregex_split(re, buf, LENGTHOF(buf), &requiredCapacity, fields, 2, &status);
TEST_ASSERT_SUCCESS(status); TEST_ASSERT_SUCCESS(status);
/* The TEST_ASSERT_SUCCESS call above should change too... */ /* The TEST_ASSERT_SUCCESS call above should change too... */
@ -1211,7 +1215,7 @@ static void TestRegexCAPI(void) {
status = U_ZERO_ERROR; status = U_ZERO_ERROR;
memset(fields, -1, sizeof(fields)); memset(fields, -1, sizeof(fields));
numFields = numFields =
uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 3, &status); uregex_split(re, buf, LENGTHOF(buf), &requiredCapacity, fields, 3, &status);
TEST_ASSERT_SUCCESS(status); TEST_ASSERT_SUCCESS(status);
/* The TEST_ASSERT_SUCCESS call above should change too... */ /* The TEST_ASSERT_SUCCESS call above should change too... */
@ -1230,7 +1234,7 @@ static void TestRegexCAPI(void) {
status = U_ZERO_ERROR; status = U_ZERO_ERROR;
memset(fields, -1, sizeof(fields)); memset(fields, -1, sizeof(fields));
numFields = numFields =
uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 5, &status); uregex_split(re, buf, LENGTHOF(buf), &requiredCapacity, fields, 5, &status);
TEST_ASSERT_SUCCESS(status); TEST_ASSERT_SUCCESS(status);
/* The TEST_ASSERT_SUCCESS call above should change too... */ /* The TEST_ASSERT_SUCCESS call above should change too... */
@ -1257,7 +1261,7 @@ static void TestRegexCAPI(void) {
if(U_SUCCESS(status)) { if(U_SUCCESS(status)) {
memset(fields, -1, sizeof(fields)); memset(fields, -1, sizeof(fields));
numFields = numFields =
uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 9, &status); uregex_split(re, buf, LENGTHOF(buf), &requiredCapacity, fields, 9, &status);
TEST_ASSERT_SUCCESS(status); TEST_ASSERT_SUCCESS(status);
/* The TEST_ASSERT_SUCCESS call above should change too... */ /* The TEST_ASSERT_SUCCESS call above should change too... */
@ -1464,8 +1468,8 @@ static void TestUTextAPI(void) {
TEST_ASSERT_SUCCESS(status); TEST_ASSERT_SUCCESS(status);
TEST_ASSERT(clone3 != NULL); TEST_ASSERT(clone3 != NULL);
u_uastrncpy(testString1, "abcccd", sizeof(pat)/2); u_uastrncpy(testString1, "abcccd", LENGTHOF(pat));
u_uastrncpy(testString2, "xxxabcccd", sizeof(pat)/2); u_uastrncpy(testString2, "xxxabcccd", LENGTHOF(pat));
status = U_ZERO_ERROR; status = U_ZERO_ERROR;
uregex_setText(clone1, testString1, -1, &status); uregex_setText(clone1, testString1, -1, &status);
@ -1499,7 +1503,7 @@ static void TestUTextAPI(void) {
UText *resultText; UText *resultText;
const char str_hello[] = { 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x00 }; /* hello */ const char str_hello[] = { 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x00 }; /* hello */
const char str_hel[] = { 0x68, 0x65, 0x6c, 0x00 }; /* hel */ const char str_hel[] = { 0x68, 0x65, 0x6c, 0x00 }; /* hel */
u_uastrncpy(pat, "hello", sizeof(pat)/2); /* for comparison */ u_uastrncpy(pat, "hello", LENGTHOF(pat)); /* for comparison */
status = U_ZERO_ERROR; status = U_ZERO_ERROR;
utext_openUTF8(&patternText, str_hello, -1, &status); utext_openUTF8(&patternText, str_hello, -1, &status);
@ -1602,7 +1606,7 @@ static void TestUTextAPI(void) {
status = U_ZERO_ERROR; status = U_ZERO_ERROR;
utext_openUTF8(&text1, str_abcccd, -1, &status); utext_openUTF8(&text1, str_abcccd, -1, &status);
u_uastrncpy(text2Chars, str_abcccxd, sizeof(text2)/2); u_uastrncpy(text2Chars, str_abcccxd, LENGTHOF(text2Chars));
utext_openUChars(&text2, text2Chars, -1, &status); utext_openUChars(&text2, text2Chars, -1, &status);
utext_openUTF8(&patternText, str_abcd, -1, &status); utext_openUTF8(&patternText, str_abcd, -1, &status);
@ -1698,7 +1702,7 @@ static void TestUTextAPI(void) {
{ {
UChar text1[50]; UChar text1[50];
UBool result; UBool result;
u_uastrncpy(text1, "012rx5rx890rxrx...", sizeof(text1)/2); u_uastrncpy(text1, "012rx5rx890rxrx...", LENGTHOF(text1));
status = U_ZERO_ERROR; status = U_ZERO_ERROR;
re = uregex_openC("rx", 0, NULL, &status); re = uregex_openC("rx", 0, NULL, &status);
@ -1762,7 +1766,7 @@ static void TestUTextAPI(void) {
const char str_interior[] = { 0x20, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x69, 0x6f, 0x72, 0x20, 0x00 }; /* ' interior ' */ const char str_interior[] = { 0x20, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x69, 0x6f, 0x72, 0x20, 0x00 }; /* ' interior ' */
u_uastrncpy(text1, "noise abc interior def, and this is off the end", sizeof(text1)/2); u_uastrncpy(text1, "noise abc interior def, and this is off the end", LENGTHOF(text1));
status = U_ZERO_ERROR; status = U_ZERO_ERROR;
re = uregex_openC("abc(.*?)def", 0, NULL, &status); re = uregex_openC("abc(.*?)def", 0, NULL, &status);
@ -1840,8 +1844,8 @@ static void TestUTextAPI(void) {
const char str_1x[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */ const char str_1x[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
const char str_ReplaceAaaBax1xxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x5c, 0x41, 0x61, 0x61, 0x42, 0x24, 0x61, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace \AaaB$a x1x x...x. */ const char str_ReplaceAaaBax1xxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x5c, 0x41, 0x61, 0x61, 0x42, 0x24, 0x61, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace \AaaB$a x1x x...x. */
status = U_ZERO_ERROR; status = U_ZERO_ERROR;
u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2); u_uastrncpy(text1, "Replace xaax x1x x...x.", LENGTHOF(text1));
u_uastrncpy(text2, "No match here.", sizeof(text2)/2); u_uastrncpy(text2, "No match here.", LENGTHOF(text2));
utext_openUTF8(&replText, str_1x, -1, &status); utext_openUTF8(&replText, str_1x, -1, &status);
re = uregex_openC("x(.*?)x", 0, NULL, &status); re = uregex_openC("x(.*?)x", 0, NULL, &status);
@ -1886,8 +1890,8 @@ static void TestUTextAPI(void) {
const char str_Replaceaa1[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x3c, 0x31, 0x3e, 0x20, 0x3c, 0x2e, 0x2e, 0x2e, 0x3e, 0x2e, 0x00 }; /* Replace <aa> <1> <...>. */ const char str_Replaceaa1[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x3c, 0x31, 0x3e, 0x20, 0x3c, 0x2e, 0x2e, 0x2e, 0x3e, 0x2e, 0x00 }; /* Replace <aa> <1> <...>. */
const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */ const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
status = U_ZERO_ERROR; status = U_ZERO_ERROR;
u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2); u_uastrncpy(text1, "Replace xaax x1x x...x.", LENGTHOF(text1));
u_uastrncpy(text2, "No match here.", sizeof(text2)/2); u_uastrncpy(text2, "No match here.", LENGTHOF(text2));
utext_openUTF8(&replText, str_1, -1, &status); utext_openUTF8(&replText, str_1, -1, &status);
re = uregex_openC("x(.*?)x", 0, NULL, &status); re = uregex_openC("x(.*?)x", 0, NULL, &status);
@ -1926,15 +1930,15 @@ static void TestUTextAPI(void) {
re = uregex_openC(".*", 0, 0, &status); re = uregex_openC(".*", 0, 0, &status);
TEST_ASSERT_SUCCESS(status); TEST_ASSERT_SUCCESS(status);
u_uastrncpy(text, "whatever", sizeof(text)/2); u_uastrncpy(text, "whatever", LENGTHOF(text));
u_uastrncpy(repl, "some other", sizeof(repl)/2); u_uastrncpy(repl, "some other", LENGTHOF(repl));
uregex_setText(re, text, -1, &status); uregex_setText(re, text, -1, &status);
/* match covers whole target string */ /* match covers whole target string */
uregex_find(re, 0, &status); uregex_find(re, 0, &status);
TEST_ASSERT_SUCCESS(status); TEST_ASSERT_SUCCESS(status);
bufPtr = buf; bufPtr = buf;
bufCap = sizeof(buf) / 2; bufCap = LENGTHOF(buf);
uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status); uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
TEST_ASSERT_SUCCESS(status); TEST_ASSERT_SUCCESS(status);
TEST_ASSERT_STRING("some other", buf, TRUE); TEST_ASSERT_STRING("some other", buf, TRUE);
@ -1943,8 +1947,8 @@ static void TestUTextAPI(void) {
uregex_find(re, 0, &status); uregex_find(re, 0, &status);
TEST_ASSERT_SUCCESS(status); TEST_ASSERT_SUCCESS(status);
bufPtr = buf; bufPtr = buf;
bufCap = sizeof(buf) / 2; bufCap = LENGTHOF(buf);
u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ $ \\abc", sizeof(repl)/2); u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ $ \\abc", LENGTHOF(repl));
uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status); uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
TEST_ASSERT_SUCCESS(status); TEST_ASSERT_SUCCESS(status);
TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE); TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE);
@ -1967,8 +1971,8 @@ static void TestUTextAPI(void) {
int32_t numFields; int32_t numFields;
int32_t i; int32_t i;
u_uastrncpy(textToSplit, "first : second: third", sizeof(textToSplit)/2); u_uastrncpy(textToSplit, "first : second: third", LENGTHOF(textToSplit));
u_uastrncpy(text2, "No match here.", sizeof(text2)/2); u_uastrncpy(text2, "No match here.", LENGTHOF(text2));
status = U_ZERO_ERROR; status = U_ZERO_ERROR;
re = uregex_openC(":", 0, NULL, &status); re = uregex_openC(":", 0, NULL, &status);
@ -2043,7 +2047,7 @@ static void TestUTextAPI(void) {
int32_t numFields; int32_t numFields;
int32_t i; int32_t i;
u_uastrncpy(textToSplit, "first <tag-a> second<tag-b> third", sizeof(textToSplit)/2); u_uastrncpy(textToSplit, "first <tag-a> second<tag-b> third", LENGTHOF(textToSplit));
status = U_ZERO_ERROR; status = U_ZERO_ERROR;
re = uregex_openC("<(.*?)>", 0, NULL, &status); re = uregex_openC("<(.*?)>", 0, NULL, &status);
@ -2266,5 +2270,60 @@ static void TestBug8421(void) {
uregex_close(re); uregex_close(re);
} }
static UBool U_CALLCONV FindCallback(const void* context , int64_t matchIndex) {
return FALSE;
}
static UBool U_CALLCONV MatchCallback(const void *context, int32_t steps) {
return FALSE;
}
static void TestBug10815() {
/* Bug 10815: uregex_findNext() does not set U_REGEX_STOPPED_BY_CALLER
* when the callback function specified by uregex_setMatchCallback() returns FALSE
*/
URegularExpression *re;
UErrorCode status = U_ZERO_ERROR;
UChar text[100];
// findNext() with a find progress callback function.
re = uregex_openC(".z", 0, 0, &status);
TEST_ASSERT_SUCCESS(status);
u_uastrncpy(text, "Hello, World.", LENGTHOF(text));
uregex_setText(re, text, -1, &status);
TEST_ASSERT_SUCCESS(status);
uregex_setFindProgressCallback(re, FindCallback, NULL, &status);
TEST_ASSERT_SUCCESS(status);
uregex_findNext(re, &status);
TEST_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);
uregex_close(re);
// findNext() with a match progress callback function.
status = U_ZERO_ERROR;
re = uregex_openC("((xxx)*)*y", 0, 0, &status);
TEST_ASSERT_SUCCESS(status);
// Pattern + this text gives an exponential time match. Without the callback to stop the match,
// it will appear to be stuck in a (near) infinite loop.
u_uastrncpy(text, "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", LENGTHOF(text));
uregex_setText(re, text, -1, &status);
TEST_ASSERT_SUCCESS(status);
uregex_setMatchCallback(re, MatchCallback, NULL, &status);
TEST_ASSERT_SUCCESS(status);
uregex_findNext(re, &status);
TEST_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);
uregex_close(re);
}
#endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */ #endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */

View File

@ -2035,7 +2035,7 @@ void RegexTest::API_Match_UTF8() {
utext_openUnicodeString(&destText, &dest, &status); utext_openUnicodeString(&destText, &dest, &status);
UText *result; UText *result;
//const char str_0123456789[] = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00 }; /* 0123456789 */ //const char str_0123456789[] = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00 }; /* 0123456789 */
// Test shallow-clone API // Test shallow-clone API
int64_t group_len; int64_t group_len;
result = matcher->group((UText *)NULL, group_len, status); result = matcher->group((UText *)NULL, group_len, status);
REGEX_CHECK_STATUS; REGEX_CHECK_STATUS;
@ -4826,6 +4826,9 @@ struct progressCallBackContext {
void reset(int32_t max) {maxCalls=max; numCalls=0;lastIndex=0;}; void reset(int32_t max) {maxCalls=max; numCalls=0;lastIndex=0;};
}; };
// call-back function for find().
// Return TRUE to continue the find().
// Return FALSE to stop the find().
U_CDECL_BEGIN U_CDECL_BEGIN
static UBool U_CALLCONV static UBool U_CALLCONV
testProgressCallBackFn(const void *context, int64_t matchIndex) { testProgressCallBackFn(const void *context, int64_t matchIndex) {
@ -4861,7 +4864,7 @@ void RegexTest::FindProgressCallbacks() {
const void *returnedContext; const void *returnedContext;
URegexFindProgressCallback *returnedFn; URegexFindProgressCallback *returnedFn;
UErrorCode status = U_ZERO_ERROR; UErrorCode status = U_ZERO_ERROR;
RegexMatcher matcher(UNICODE_STRING_SIMPLE("((.)+\\2)+x"), 0, status); // A pattern that can run long. RegexMatcher matcher(UNICODE_STRING_SIMPLE("((.)\\2)x"), 0, status);
REGEX_CHECK_STATUS; REGEX_CHECK_STATUS;
matcher.setFindProgressCallback(testProgressCallBackFn, &cbInfo, status); matcher.setFindProgressCallback(testProgressCallBackFn, &cbInfo, status);
REGEX_CHECK_STATUS; REGEX_CHECK_STATUS;
@ -4870,10 +4873,10 @@ void RegexTest::FindProgressCallbacks() {
REGEX_ASSERT(returnedFn == testProgressCallBackFn); REGEX_ASSERT(returnedFn == testProgressCallBackFn);
REGEX_ASSERT(returnedContext == &cbInfo); REGEX_ASSERT(returnedContext == &cbInfo);
// A short-running match should NOT invoke the callback. // A find that matches on the initial position does NOT invoke the callback.
status = U_ZERO_ERROR; status = U_ZERO_ERROR;
cbInfo.reset(100); cbInfo.reset(100);
UnicodeString s = "abxxx"; UnicodeString s = "aaxxx";
matcher.reset(s); matcher.reset(s);
#if 0 #if 0
matcher.setTrace(TRUE); matcher.setTrace(TRUE);
@ -4882,7 +4885,8 @@ void RegexTest::FindProgressCallbacks() {
REGEX_CHECK_STATUS; REGEX_CHECK_STATUS;
REGEX_ASSERT(cbInfo.numCalls == 0); REGEX_ASSERT(cbInfo.numCalls == 0);
// A medium running match that causes matcher.find() to invoke our callback for each index. // A medium running find() that causes matcher.find() to invoke our callback for each index,
// but not so many times that we interrupt the operation.
status = U_ZERO_ERROR; status = U_ZERO_ERROR;
s = "aaaaaaaaaaaaaaaaaaab"; s = "aaaaaaaaaaaaaaaaaaab";
cbInfo.reset(s.length()); // Some upper limit for number of calls that is greater than size of our input string cbInfo.reset(s.length()); // Some upper limit for number of calls that is greater than size of our input string
@ -4897,22 +4901,21 @@ void RegexTest::FindProgressCallbacks() {
cbInfo.reset(s1.length() - 5); // Bail early somewhere near the end of input string cbInfo.reset(s1.length() - 5); // Bail early somewhere near the end of input string
matcher.reset(s1); matcher.reset(s1);
REGEX_ASSERT(matcher.find(0, status)==FALSE); REGEX_ASSERT(matcher.find(0, status)==FALSE);
REGEX_CHECK_STATUS; REGEX_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);
REGEX_ASSERT(cbInfo.numCalls == s1.length() - 5); REGEX_ASSERT(cbInfo.numCalls == s1.length() - 5);
#if 0
// Now a match that will succeed, but after an interruption // Now a match that will succeed, but after an interruption
status = U_ZERO_ERROR; status = U_ZERO_ERROR;
UnicodeString s2 = "aaaaaaaaaaaaaa aaaaaaaaab xxx"; UnicodeString s2 = "aaaaaaaaaaaaaa aaaaaaaaab xxx";
cbInfo.reset(s2.length() - 10); // Bail early somewhere near the end of input string cbInfo.reset(s2.length() - 10); // Bail early somewhere near the end of input string
matcher.reset(s2); matcher.reset(s2);
REGEX_ASSERT(matcher.find(0, status)==FALSE); REGEX_ASSERT(matcher.find(0, status)==FALSE);
REGEX_CHECK_STATUS; REGEX_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);
// Now retry the match from where left off // Now retry the match from where left off
cbInfo.maxCalls = 100; // No callback limit cbInfo.maxCalls = 100; // No callback limit
status = U_ZERO_ERROR;
REGEX_ASSERT(matcher.find(cbInfo.lastIndex, status)); REGEX_ASSERT(matcher.find(cbInfo.lastIndex, status));
REGEX_CHECK_STATUS; REGEX_CHECK_STATUS;
#endif
} }
@ -5317,7 +5320,7 @@ void RegexTest::TestBug11049() {
TestCase11049("A|B|C", "a string \\ud800\\udc00", FALSE, __LINE__); TestCase11049("A|B|C", "a string \\ud800\\udc00", FALSE, __LINE__);
TestCase11049("A|B|C", "string matches at end C", TRUE, __LINE__); TestCase11049("A|B|C", "string matches at end C", TRUE, __LINE__);
// Test again with a pattern starting with a single character, // Test again with a pattern starting with a single character,
// which takes a different code path than starting with an OR expression, // which takes a different code path than starting with an OR expression,
// but with similar logic. // but with similar logic.
TestCase11049("C", "a string \\ud800\\udc00", FALSE, __LINE__); TestCase11049("C", "a string \\ud800\\udc00", FALSE, __LINE__);