ICU-2421 Redo Regex C API changes lost in OSS crash

X-SVN-Rev: 14721
This commit is contained in:
Andy Heninger 2004-03-22 22:16:21 +00:00
parent 33c376bd78
commit efeb3b480d
6 changed files with 1361 additions and 11 deletions

View File

@ -1943,6 +1943,57 @@ SOURCE=.\rematch.cpp
# Begin Source File
SOURCE=.\repattrn.cpp
# End Source File
# Begin Source File
SOURCE=.\uregex.cpp
# End Source File
# Begin Source File
SOURCE=.\unicode\uregex.h
!IF "$(CFG)" == "i18n - Win32 Release"
# Begin Custom Build
InputPath=.\unicode\uregex.h
"..\..\include\unicode\uregex.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy $(InputPath) ..\..\include\unicode
# End Custom Build
!ELSEIF "$(CFG)" == "i18n - Win32 Debug"
# Begin Custom Build
InputPath=.\unicode\uregex.h
"..\..\include\unicode\uregex.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy $(InputPath) ..\..\include\unicode
# End Custom Build
!ELSEIF "$(CFG)" == "i18n - Win64 Release"
# Begin Custom Build
InputPath=.\unicode\uregex.h
"..\..\include\unicode\uregex.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy $(InputPath) ..\..\include\unicode
# End Custom Build
!ELSEIF "$(CFG)" == "i18n - Win64 Debug"
# Begin Custom Build
InputPath=.\unicode\uregex.h
"..\..\include\unicode\uregex.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy $(InputPath) ..\..\include\unicode
# End Custom Build
!ENDIF
# End Source File
# End Group
# Begin Group "transforms"

View File

@ -243,7 +243,7 @@ RegexPattern *RegexPattern::compile(
}
const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
UREGEX_DOTALL | UREGEX_MULTILINE;
UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD;
if ((flags & ~allFlags) != 0) {
status = U_REGEX_INVALID_FLAG;

View File

@ -60,6 +60,7 @@ class UnicodeSet;
struct REStackFrame;
struct Regex8BitSet;
class RuleBasedBreakIterator;
class RegexCImpl;
@ -383,6 +384,7 @@ private:
friend class RegexCompile;
friend class RegexMatcher;
friend class RegexCImpl;
//
// Implementation Methods
@ -817,6 +819,7 @@ private:
RegexMatcher(const RegexMatcher &other);
RegexMatcher &operator =(const RegexMatcher &rhs);
friend class RegexPattern;
friend class RegexCImpl;
//
@ -857,6 +860,7 @@ private:
RuleBasedBreakIterator *fWordBreakItr;
};
U_NAMESPACE_END

View File

@ -20,6 +20,9 @@
#if !UCONFIG_NO_REGULAR_EXPRESSIONS
#include "unicode/parseerr.h"
struct URegularExpression;
/**
* Structure represeting a compiled regular rexpression, plus the results
@ -132,10 +135,14 @@ U_CAPI void U_EXPORT2
uregex_close(URegularExpression *regexp);
/**
* Make an exact copy of a compiled regular expression. Cloning a regular
* Make a copy of a compiled regular expression. Cloning a regular
* expression is faster than opening a second instance from the source
* form of the expression, and requires less memory.
* <p>
* Note that the current input string and the position of any matched text
* within it are not cloned; only the pattern itself and and the
* match mode flags are copied.
* <p>
* Cloning can be particularly useful to threaded applications that perform
* multiple match operations in parallel. Each concurrent RE
* operation requires its own instance of a URegularExpression.
@ -152,7 +159,10 @@ uregex_clone(const URegularExpression *regexp, UErrorCode *status);
*
* @param regexp The compiled regular expression.
* @param patLength This output parameter will be set to the length of the
* pattern string.
* pattern string. A NULL pointer may be used here if the
* pattern length is not needed, as would be the case if
* the pattern is known in advance to be a NUL terminated
* string.
* @param status Receives errors detected by this function.
* @return a pointer to the pattern string. The storage for the string is
* owned by the regular expression object, and must not be
@ -160,19 +170,21 @@ uregex_clone(const URegularExpression *regexp, UErrorCode *status);
* will remain valid until the regular expression is closed.
*/
U_CAPI const UChar * U_EXPORT2
uregex_pattern(const URegularExpression *regexp,
int32_t **patLength,
UErrorCode *status);
uregex_pattern(const URegularExpression *regexp,
int32_t *patLength,
UErrorCode *status);
/**
* Get the match mode flags that were specified when compiling this regular expression.
* @param status Receives errors detected by this function.
* @param regexp The compiled regular expression.
* @param return The match mode flags
* @see URegexpFlag
*/
U_CAPI int32_t U_EXPORT2
uregex_flags(UErrorCode *status);
uregex_flags(const URegularExpression *regexp,
UErrorCode *status);
/**
@ -206,14 +218,18 @@ uregex_setText(URegularExpression *regexp,
* pointer was previously supplied via uregex_setText().
*
* @param regexp The compiled regular expression.
* @param textLength The length of the string is returned in this output parameter.
* @param textLength The length of the string is returned in this output parameter.
* A NULL pointer may be used here if the
* text length is not needed, as would be the case if
* the text is known in advance to be a NUL terminated
* string.
* @param status Receives errors detected by this function.
* @return Poiner to the subject text string currently associated with
* this regular expression.
*/
U_CAPI const UChar * U_EXPORT2
uregex_getText(URegularExpression *regexp,
int32_t **textLength,
int32_t *textLength,
UErrorCode *status);
/**
@ -449,6 +465,17 @@ uregex_replaceFirst(URegularExpression *regexp,
* replacement string is appended to the output string,
* including handling any substitutions of captured text.</p>
*
* <p>A note on preflight computation of buffersize and error handling:
* Calls to uregex_appendReplacement() and uregex_appendTail() are
* designed to be chained, one after another, with the destination
* buffer pointer and buffer capacity updated after each in preparation
* to for the next. If the destination buffer is exhausted partway through such a
* sequence, a U_BUFFER_OVERFLOW_ERROR status will be returned. Normal
* ICU conventions are for a function to perform no action if it is
* called with an error status, but for this one case, uregex_appendRepacement()
* will operate normally so that buffer size computations will complete
* correctly.
*
* <p>For simple, prepackaged, non-incremental find-and-replace
* operations, see replaceFirst() or replaceAll().</p>
*
@ -535,7 +562,7 @@ uregex_appendTail(URegularExpression *regexp,
* extra positions within the destFields array will be
* set to NULL.
* @param destCapacity The capacity of the destBuf.
* @param requiredCapacty The actual capacity required of the destBuf.
* @param requiredCapacity The actual capacity required of the destBuf.
* If destCapacity is too small, requiredCapacity is the
* total capacity required to hold all of the output.
* @param destFields An array to be filled with the position of each
@ -572,7 +599,7 @@ U_CAPI int32_t U_EXPORT2
uregex_split( URegularExpression *regexp,
UChar *destBuf,
int32_t destCapacity,
int32_t **requiredCapacity,
int32_t *requiredCapacity,
UChar *destFields[],
int32_t destFieldsCapacity,
UErrorCode *status);

1092
icu4c/source/i18n/uregex.cpp Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,176 @@
/********************************************************************
* COPYRIGHT:
* Copyright (c) 2004, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
/********************************************************************************
*
* File reapits.c
*
*********************************************************************************/
/*C API TEST FOR Regular Expressions */
/**
* This is an API test for ICU regular expressions in C. It doesn't test very many cases, and doesn't
* try to test the full functionality. It just calls each function and verifies that it
* works on a basic level.
*
* More complete testing of regular expression functionality is done with the C++ tests.
**/
#include "unicode/utypes.h"
#if !UCONFIG_NO_REGULAR_EXPRESSIONS
#include <stdlib.h>
#include <string.h>
#include "unicode/uloc.h"
#include "unicode/uregex.h"
#include "unicode/ustring.h"
#include "cintltst.h"
#define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \
log_err("Failure at file %s, line %d, error = %s\n", __FILE__, __LINE__, u_errorName(status));}}
#define TEST_ASSERT(expr) {if ((expr)==FALSE) { \
log_err("Test Failure at file %s, line %d\n", __FILE__, __LINE__);}}
static void TestRegexCAPI(void);
void addURegexTest(TestNode** root);
void addURegexTest(TestNode** root)
{
addTest(root, &TestRegexCAPI, "regex/TestRegexCAPI");
/* addTest(root, &TestBreakIteratorSafeClone, "tstxtbd/cbiapts/TestBreakIteratorSafeClone"); */
}
void TestRegexCAPI(void) {
UErrorCode status = U_ZERO_ERROR;
URegularExpression *re;
UChar pat[200];
/* Mimimalist open/close */
u_uastrncpy(pat, "abc*", sizeof(pat)/2);
re = uregex_open(pat, -1, 0, 0, &status);
TEST_ASSERT_SUCCESS(status);
uregex_close(re);
/* Open with all flag values set */
status = U_ZERO_ERROR;
re = uregex_open(pat, -1,
UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD,
0, &status);
TEST_ASSERT_SUCCESS(status);
uregex_close(re);
/* Open with an invalid flag */
status = U_ZERO_ERROR;
re = uregex_open(pat, -1, 0x40000000, 0, &status);
TEST_ASSERT(status == U_REGEX_INVALID_FLAG);
uregex_close(re);
/* openC open from a C string */
{
const UChar *p;
int32_t len;
status = U_ZERO_ERROR;
re = uregex_openC("abc*", 0, 0, &status);
TEST_ASSERT_SUCCESS(status);
p = uregex_pattern(re, &len, &status);
TEST_ASSERT_SUCCESS(status);
u_uastrncpy(pat, "abc*", sizeof(pat)/2);
TEST_ASSERT(u_strcmp(pat, p) == 0);
TEST_ASSERT(len==(int32_t)strlen("abc*"));
uregex_close(re);
}
/*
* clone
*/
{
URegularExpression *clone1;
URegularExpression *clone2;
URegularExpression *clone3;
UChar testString1[30];
UChar testString2[30];
UBool result;
status = U_ZERO_ERROR;
re = uregex_openC("abc*", 0, 0, &status);
TEST_ASSERT_SUCCESS(status);
clone1 = uregex_clone(re, &status);
TEST_ASSERT_SUCCESS(status);
TEST_ASSERT(clone1 != NULL);
status = U_ZERO_ERROR;
clone2 = uregex_clone(re, &status);
TEST_ASSERT_SUCCESS(status);
TEST_ASSERT(clone2 != NULL);
uregex_close(re);
status = U_ZERO_ERROR;
clone3 = uregex_clone(clone2, &status);
TEST_ASSERT_SUCCESS(status);
TEST_ASSERT(clone3 != NULL);
u_uastrncpy(testString1, "abcccd", sizeof(pat)/2);
u_uastrncpy(testString2, "xxxabcccd", sizeof(pat)/2);
status = U_ZERO_ERROR;
uregex_setText(clone1, testString1, -1, &status);
TEST_ASSERT_SUCCESS(status);
result = uregex_lookingAt(clone1, 0, &status);
TEST_ASSERT_SUCCESS(status);
TEST_ASSERT(result==TRUE);
status = U_ZERO_ERROR;
uregex_setText(clone2, testString2, -1, &status);
TEST_ASSERT_SUCCESS(status);
result = uregex_lookingAt(clone2, 0, &status);
TEST_ASSERT_SUCCESS(status);
TEST_ASSERT(result==FALSE);
result = uregex_find(clone2, 0, &status);
TEST_ASSERT_SUCCESS(status);
TEST_ASSERT(result==TRUE);
uregex_close(clone1);
uregex_close(clone2);
uregex_close(clone3);
}
/*
* pattern()
*/
{
const UChar *resultPat;
int32_t resultLen;
u_uastrncpy(pat, "hello", sizeof(pat)/2);
status = U_ZERO_ERROR;
re = uregex_open(pat, -1, 0, NULL, &status);
resultPat = uregex_pattern(re, &resultLen, &status);
TEST_ASSERT_SUCCESS(status);
TEST_ASSERT(resultLen == -1);
TEST_ASSERT(u_strcmp(resultPat, pat) == 0);
uregex_close(re);
status = U_ZERO_ERROR;
re = uregex_open(pat, 3, 0, NULL, &status);
resultPat = uregex_pattern(re, &resultLen, &status);
TEST_ASSERT_SUCCESS(status);
TEST_ASSERT(resultLen == 3);
TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);
TEST_ASSERT(u_strlen(resultPat) == 3);
uregex_close(re);
}
}
#endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */