ICU-2421 Redo Regex C API changes lost in OSS crash
X-SVN-Rev: 14721
This commit is contained in:
parent
33c376bd78
commit
efeb3b480d
@ -1943,6 +1943,57 @@ SOURCE=.\rematch.cpp
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\repattrn.cpp
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\uregex.cpp
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\unicode\uregex.h
|
||||
|
||||
!IF "$(CFG)" == "i18n - Win32 Release"
|
||||
|
||||
# Begin Custom Build
|
||||
InputPath=.\unicode\uregex.h
|
||||
|
||||
"..\..\include\unicode\uregex.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy $(InputPath) ..\..\include\unicode
|
||||
|
||||
# End Custom Build
|
||||
|
||||
!ELSEIF "$(CFG)" == "i18n - Win32 Debug"
|
||||
|
||||
# Begin Custom Build
|
||||
InputPath=.\unicode\uregex.h
|
||||
|
||||
"..\..\include\unicode\uregex.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy $(InputPath) ..\..\include\unicode
|
||||
|
||||
# End Custom Build
|
||||
|
||||
!ELSEIF "$(CFG)" == "i18n - Win64 Release"
|
||||
|
||||
# Begin Custom Build
|
||||
InputPath=.\unicode\uregex.h
|
||||
|
||||
"..\..\include\unicode\uregex.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy $(InputPath) ..\..\include\unicode
|
||||
|
||||
# End Custom Build
|
||||
|
||||
!ELSEIF "$(CFG)" == "i18n - Win64 Debug"
|
||||
|
||||
# Begin Custom Build
|
||||
InputPath=.\unicode\uregex.h
|
||||
|
||||
"..\..\include\unicode\uregex.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy $(InputPath) ..\..\include\unicode
|
||||
|
||||
# End Custom Build
|
||||
|
||||
!ENDIF
|
||||
|
||||
# End Source File
|
||||
# End Group
|
||||
# Begin Group "transforms"
|
||||
|
@ -243,7 +243,7 @@ RegexPattern *RegexPattern::compile(
|
||||
}
|
||||
|
||||
const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
|
||||
UREGEX_DOTALL | UREGEX_MULTILINE;
|
||||
UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD;
|
||||
|
||||
if ((flags & ~allFlags) != 0) {
|
||||
status = U_REGEX_INVALID_FLAG;
|
||||
|
@ -60,6 +60,7 @@ class UnicodeSet;
|
||||
struct REStackFrame;
|
||||
struct Regex8BitSet;
|
||||
class RuleBasedBreakIterator;
|
||||
class RegexCImpl;
|
||||
|
||||
|
||||
|
||||
@ -383,6 +384,7 @@ private:
|
||||
|
||||
friend class RegexCompile;
|
||||
friend class RegexMatcher;
|
||||
friend class RegexCImpl;
|
||||
|
||||
//
|
||||
// Implementation Methods
|
||||
@ -817,6 +819,7 @@ private:
|
||||
RegexMatcher(const RegexMatcher &other);
|
||||
RegexMatcher &operator =(const RegexMatcher &rhs);
|
||||
friend class RegexPattern;
|
||||
friend class RegexCImpl;
|
||||
|
||||
|
||||
//
|
||||
@ -857,6 +860,7 @@ private:
|
||||
|
||||
RuleBasedBreakIterator *fWordBreakItr;
|
||||
|
||||
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
@ -20,6 +20,9 @@
|
||||
|
||||
#if !UCONFIG_NO_REGULAR_EXPRESSIONS
|
||||
|
||||
#include "unicode/parseerr.h"
|
||||
|
||||
|
||||
struct URegularExpression;
|
||||
/**
|
||||
* Structure represeting a compiled regular rexpression, plus the results
|
||||
@ -132,10 +135,14 @@ U_CAPI void U_EXPORT2
|
||||
uregex_close(URegularExpression *regexp);
|
||||
|
||||
/**
|
||||
* Make an exact copy of a compiled regular expression. Cloning a regular
|
||||
* Make a copy of a compiled regular expression. Cloning a regular
|
||||
* expression is faster than opening a second instance from the source
|
||||
* form of the expression, and requires less memory.
|
||||
* <p>
|
||||
* Note that the current input string and the position of any matched text
|
||||
* within it are not cloned; only the pattern itself and and the
|
||||
* match mode flags are copied.
|
||||
* <p>
|
||||
* Cloning can be particularly useful to threaded applications that perform
|
||||
* multiple match operations in parallel. Each concurrent RE
|
||||
* operation requires its own instance of a URegularExpression.
|
||||
@ -152,7 +159,10 @@ uregex_clone(const URegularExpression *regexp, UErrorCode *status);
|
||||
*
|
||||
* @param regexp The compiled regular expression.
|
||||
* @param patLength This output parameter will be set to the length of the
|
||||
* pattern string.
|
||||
* pattern string. A NULL pointer may be used here if the
|
||||
* pattern length is not needed, as would be the case if
|
||||
* the pattern is known in advance to be a NUL terminated
|
||||
* string.
|
||||
* @param status Receives errors detected by this function.
|
||||
* @return a pointer to the pattern string. The storage for the string is
|
||||
* owned by the regular expression object, and must not be
|
||||
@ -160,19 +170,21 @@ uregex_clone(const URegularExpression *regexp, UErrorCode *status);
|
||||
* will remain valid until the regular expression is closed.
|
||||
*/
|
||||
U_CAPI const UChar * U_EXPORT2
|
||||
uregex_pattern(const URegularExpression *regexp,
|
||||
int32_t **patLength,
|
||||
UErrorCode *status);
|
||||
uregex_pattern(const URegularExpression *regexp,
|
||||
int32_t *patLength,
|
||||
UErrorCode *status);
|
||||
|
||||
|
||||
/**
|
||||
* Get the match mode flags that were specified when compiling this regular expression.
|
||||
* @param status Receives errors detected by this function.
|
||||
* @param regexp The compiled regular expression.
|
||||
* @param return The match mode flags
|
||||
* @see URegexpFlag
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uregex_flags(UErrorCode *status);
|
||||
uregex_flags(const URegularExpression *regexp,
|
||||
UErrorCode *status);
|
||||
|
||||
|
||||
/**
|
||||
@ -206,14 +218,18 @@ uregex_setText(URegularExpression *regexp,
|
||||
* pointer was previously supplied via uregex_setText().
|
||||
*
|
||||
* @param regexp The compiled regular expression.
|
||||
* @param textLength The length of the string is returned in this output parameter.
|
||||
* @param textLength The length of the string is returned in this output parameter.
|
||||
* A NULL pointer may be used here if the
|
||||
* text length is not needed, as would be the case if
|
||||
* the text is known in advance to be a NUL terminated
|
||||
* string.
|
||||
* @param status Receives errors detected by this function.
|
||||
* @return Poiner to the subject text string currently associated with
|
||||
* this regular expression.
|
||||
*/
|
||||
U_CAPI const UChar * U_EXPORT2
|
||||
uregex_getText(URegularExpression *regexp,
|
||||
int32_t **textLength,
|
||||
int32_t *textLength,
|
||||
UErrorCode *status);
|
||||
|
||||
/**
|
||||
@ -449,6 +465,17 @@ uregex_replaceFirst(URegularExpression *regexp,
|
||||
* replacement string is appended to the output string,
|
||||
* including handling any substitutions of captured text.</p>
|
||||
*
|
||||
* <p>A note on preflight computation of buffersize and error handling:
|
||||
* Calls to uregex_appendReplacement() and uregex_appendTail() are
|
||||
* designed to be chained, one after another, with the destination
|
||||
* buffer pointer and buffer capacity updated after each in preparation
|
||||
* to for the next. If the destination buffer is exhausted partway through such a
|
||||
* sequence, a U_BUFFER_OVERFLOW_ERROR status will be returned. Normal
|
||||
* ICU conventions are for a function to perform no action if it is
|
||||
* called with an error status, but for this one case, uregex_appendRepacement()
|
||||
* will operate normally so that buffer size computations will complete
|
||||
* correctly.
|
||||
*
|
||||
* <p>For simple, prepackaged, non-incremental find-and-replace
|
||||
* operations, see replaceFirst() or replaceAll().</p>
|
||||
*
|
||||
@ -535,7 +562,7 @@ uregex_appendTail(URegularExpression *regexp,
|
||||
* extra positions within the destFields array will be
|
||||
* set to NULL.
|
||||
* @param destCapacity The capacity of the destBuf.
|
||||
* @param requiredCapacty The actual capacity required of the destBuf.
|
||||
* @param requiredCapacity The actual capacity required of the destBuf.
|
||||
* If destCapacity is too small, requiredCapacity is the
|
||||
* total capacity required to hold all of the output.
|
||||
* @param destFields An array to be filled with the position of each
|
||||
@ -572,7 +599,7 @@ U_CAPI int32_t U_EXPORT2
|
||||
uregex_split( URegularExpression *regexp,
|
||||
UChar *destBuf,
|
||||
int32_t destCapacity,
|
||||
int32_t **requiredCapacity,
|
||||
int32_t *requiredCapacity,
|
||||
UChar *destFields[],
|
||||
int32_t destFieldsCapacity,
|
||||
UErrorCode *status);
|
||||
|
1092
icu4c/source/i18n/uregex.cpp
Normal file
1092
icu4c/source/i18n/uregex.cpp
Normal file
File diff suppressed because it is too large
Load Diff
176
icu4c/source/test/cintltst/reapits.c
Normal file
176
icu4c/source/test/cintltst/reapits.c
Normal file
@ -0,0 +1,176 @@
|
||||
/********************************************************************
|
||||
* COPYRIGHT:
|
||||
* Copyright (c) 2004, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
********************************************************************/
|
||||
/********************************************************************************
|
||||
*
|
||||
* File reapits.c
|
||||
*
|
||||
*********************************************************************************/
|
||||
/*C API TEST FOR Regular Expressions */
|
||||
/**
|
||||
* This is an API test for ICU regular expressions in C. It doesn't test very many cases, and doesn't
|
||||
* try to test the full functionality. It just calls each function and verifies that it
|
||||
* works on a basic level.
|
||||
*
|
||||
* More complete testing of regular expression functionality is done with the C++ tests.
|
||||
**/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_REGULAR_EXPRESSIONS
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "unicode/uloc.h"
|
||||
#include "unicode/uregex.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "cintltst.h"
|
||||
|
||||
#define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \
|
||||
log_err("Failure at file %s, line %d, error = %s\n", __FILE__, __LINE__, u_errorName(status));}}
|
||||
|
||||
#define TEST_ASSERT(expr) {if ((expr)==FALSE) { \
|
||||
log_err("Test Failure at file %s, line %d\n", __FILE__, __LINE__);}}
|
||||
|
||||
static void TestRegexCAPI(void);
|
||||
|
||||
void addURegexTest(TestNode** root);
|
||||
|
||||
void addURegexTest(TestNode** root)
|
||||
{
|
||||
addTest(root, &TestRegexCAPI, "regex/TestRegexCAPI");
|
||||
/* addTest(root, &TestBreakIteratorSafeClone, "tstxtbd/cbiapts/TestBreakIteratorSafeClone"); */
|
||||
}
|
||||
|
||||
|
||||
void TestRegexCAPI(void) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
URegularExpression *re;
|
||||
UChar pat[200];
|
||||
|
||||
/* Mimimalist open/close */
|
||||
u_uastrncpy(pat, "abc*", sizeof(pat)/2);
|
||||
re = uregex_open(pat, -1, 0, 0, &status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
uregex_close(re);
|
||||
|
||||
/* Open with all flag values set */
|
||||
status = U_ZERO_ERROR;
|
||||
re = uregex_open(pat, -1,
|
||||
UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD,
|
||||
0, &status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
uregex_close(re);
|
||||
|
||||
/* Open with an invalid flag */
|
||||
status = U_ZERO_ERROR;
|
||||
re = uregex_open(pat, -1, 0x40000000, 0, &status);
|
||||
TEST_ASSERT(status == U_REGEX_INVALID_FLAG);
|
||||
uregex_close(re);
|
||||
|
||||
|
||||
/* openC open from a C string */
|
||||
{
|
||||
const UChar *p;
|
||||
int32_t len;
|
||||
status = U_ZERO_ERROR;
|
||||
re = uregex_openC("abc*", 0, 0, &status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
p = uregex_pattern(re, &len, &status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
u_uastrncpy(pat, "abc*", sizeof(pat)/2);
|
||||
TEST_ASSERT(u_strcmp(pat, p) == 0);
|
||||
TEST_ASSERT(len==(int32_t)strlen("abc*"));
|
||||
|
||||
uregex_close(re);
|
||||
}
|
||||
|
||||
/*
|
||||
* clone
|
||||
*/
|
||||
{
|
||||
URegularExpression *clone1;
|
||||
URegularExpression *clone2;
|
||||
URegularExpression *clone3;
|
||||
UChar testString1[30];
|
||||
UChar testString2[30];
|
||||
UBool result;
|
||||
|
||||
|
||||
status = U_ZERO_ERROR;
|
||||
re = uregex_openC("abc*", 0, 0, &status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
clone1 = uregex_clone(re, &status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
TEST_ASSERT(clone1 != NULL);
|
||||
|
||||
status = U_ZERO_ERROR;
|
||||
clone2 = uregex_clone(re, &status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
TEST_ASSERT(clone2 != NULL);
|
||||
uregex_close(re);
|
||||
|
||||
status = U_ZERO_ERROR;
|
||||
clone3 = uregex_clone(clone2, &status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
TEST_ASSERT(clone3 != NULL);
|
||||
|
||||
u_uastrncpy(testString1, "abcccd", sizeof(pat)/2);
|
||||
u_uastrncpy(testString2, "xxxabcccd", sizeof(pat)/2);
|
||||
|
||||
status = U_ZERO_ERROR;
|
||||
uregex_setText(clone1, testString1, -1, &status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
result = uregex_lookingAt(clone1, 0, &status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
TEST_ASSERT(result==TRUE);
|
||||
|
||||
status = U_ZERO_ERROR;
|
||||
uregex_setText(clone2, testString2, -1, &status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
result = uregex_lookingAt(clone2, 0, &status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
TEST_ASSERT(result==FALSE);
|
||||
result = uregex_find(clone2, 0, &status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
TEST_ASSERT(result==TRUE);
|
||||
|
||||
uregex_close(clone1);
|
||||
uregex_close(clone2);
|
||||
uregex_close(clone3);
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* pattern()
|
||||
*/
|
||||
{
|
||||
const UChar *resultPat;
|
||||
int32_t resultLen;
|
||||
u_uastrncpy(pat, "hello", sizeof(pat)/2);
|
||||
status = U_ZERO_ERROR;
|
||||
re = uregex_open(pat, -1, 0, NULL, &status);
|
||||
resultPat = uregex_pattern(re, &resultLen, &status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
TEST_ASSERT(resultLen == -1);
|
||||
TEST_ASSERT(u_strcmp(resultPat, pat) == 0);
|
||||
uregex_close(re);
|
||||
|
||||
status = U_ZERO_ERROR;
|
||||
re = uregex_open(pat, 3, 0, NULL, &status);
|
||||
resultPat = uregex_pattern(re, &resultLen, &status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
TEST_ASSERT(resultLen == 3);
|
||||
TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);
|
||||
TEST_ASSERT(u_strlen(resultPat) == 3);
|
||||
uregex_close(re);
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
#endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */
|
Loading…
Reference in New Issue
Block a user