ICU-7795 Regular Expression refreshInputText added.

X-SVN-Rev: 29363
This commit is contained in:
Andy Heninger 2011-01-27 01:12:12 +00:00
parent a658867100
commit 4a932a4dfc
5 changed files with 163 additions and 5 deletions

View File

@ -1,6 +1,6 @@
/*
**************************************************************************
* Copyright (C) 2002-2010 International Business Machines Corporation *
* Copyright (C) 2002-2011 International Business Machines Corporation *
* and others. All rights reserved. *
**************************************************************************
*/
@ -1963,6 +1963,41 @@ RegexMatcher &RegexMatcher::reset(int64_t position, UErrorCode &status) {
}
//--------------------------------------------------------------------------------
//
// refresh
//
//--------------------------------------------------------------------------------
RegexMatcher &RegexMatcher::refreshInputText(UText *input, UErrorCode &status) {
if (U_FAILURE(status)) {
return *this;
}
if (input == NULL) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return *this;
}
if (utext_nativeLength(fInputText) != utext_nativeLength(input)) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return *this;
}
int64_t pos = utext_getNativeIndex(fInputText);
// Shallow read-only clone of the new UText into the existing input UText
fInputText = utext_clone(fInputText, input, FALSE, TRUE, &status);
if (U_FAILURE(status)) {
return *this;
}
utext_setNativeIndex(fInputText, pos);
if (fAltInputText != NULL) {
pos = utext_getNativeIndex(fAltInputText);
fAltInputText = utext_clone(fAltInputText, input, FALSE, TRUE, &status);
if (U_FAILURE(status)) {
return *this;
}
utext_setNativeIndex(fAltInputText, pos);
}
return *this;
}

View File

@ -1,6 +1,6 @@
/*
**********************************************************************
* Copyright (C) 2002-2010, International Business Machines
* Copyright (C) 2002-2011, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* file name: regex.h
@ -1055,6 +1055,33 @@ public:
*/
virtual RegexMatcher &reset(UText *input);
/**
* Set the subject text string upon which the regular expression is looking for matches
* without changing any other aspect of the matching state.
* The new and previous text strings must have the same content.
*
* This function is intended for use in environments where ICU is operating on
* strings that may move around in memory. It provides a mechanism for notifying
* ICU that the string has been relocated, and providing a new UText to access the
* string in its new position.
*
* Note that the regular expression implementation never copies the underlying text
* of a string being matched, but always operates directly on the original text
* provided by the user. Refreshing simply drops the references to the old text
* and replaces them with references to the new.
*
* Caution: this function is normally used only by very specialized,
* system-level code. One example use case is with garbage collection that moves
* the text in memory.
*
* @param input The new (moved) text string.
* @param status Receives errors detected by this function.
*
* @draft ICU 4.8
*/
virtual RegexMatcher &refreshInputText(UText *input, UErrorCode &status);
private:
/**
* Cause a compilation error if an application accidently attempts to

View File

@ -1,6 +1,6 @@
/*
**********************************************************************
* Copyright (C) 2004-2010, International Business Machines
* Copyright (C) 2004-2011, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* file name: uregex.h
@ -395,6 +395,36 @@ uregex_getUText(URegularExpression *regexp,
UText *dest,
UErrorCode *status);
/**
* Set the subject text string upon which the regular expression is looking for matches
* without changing any other aspect of the matching state.
* The new and previous text strings must have the same content.
*
* This function is intended for use in environments where ICU is operating on
* strings that may move around in memory. It provides a mechanism for notifying
* ICU that the string has been relocated, and providing a new UText to access the
* string in its new position.
*
* Note that the regular expression implementation never copies the underlying text
* of a string being matched, but always operates directly on the original text
* provided by the user. Refreshing simply drops the references to the old text
* and replaces them with references to the new.
*
* Caution: this function is normally used only by very specialized
* system-level code. One example use case is with garbage collection
* that moves the text in memory.
*
* @param regexp The compiled regular expression.
* @param text The new (moved) text string.
* @param status Receives errors detected by this function.
*
* @draft ICU 4.8
*/
U_INTERNAL void U_EXPORT2
uregex_refreshUText(URegularExpression *regexp,
UText *text,
UErrorCode *status);
/**
* Attempts to match the input string against the pattern.
* To succeed, the match must extend to the end of the string,

View File

@ -1,6 +1,6 @@
/*
*******************************************************************************
* Copyright (C) 2004-2010, International Business Machines
* Copyright (C) 2004-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* file name: regex.cpp
@ -482,6 +482,23 @@ uregex_getUText(URegularExpression *regexp2,
}
//------------------------------------------------------------------------------
//
// uregex_refreshUText
//
//------------------------------------------------------------------------------
U_CAPI void U_EXPORT2
uregex_refreshUText(URegularExpression *regexp2,
UText *text,
UErrorCode *status) {
RegularExpression *regexp = (RegularExpression*)regexp2;
if (validateRE(regexp, status, FALSE) == FALSE) {
return;
}
regexp->fMatcher->refreshInputText(text, *status);
}
//------------------------------------------------------------------------------
//
// uregex_matches

View File

@ -1,6 +1,6 @@
/********************************************************************
* COPYRIGHT:
* Copyright (c) 2004-2010, International Business Machines Corporation and
* Copyright (c) 2004-2011, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
/********************************************************************************
@ -119,6 +119,7 @@ static void test_assert_utext(const char *expected, UText *actual, const char *f
static void TestRegexCAPI(void);
static void TestBug4315(void);
static void TestUTextAPI(void);
static void TestRefreshInput(void);
void addURegexTest(TestNode** root);
@ -127,6 +128,7 @@ void addURegexTest(TestNode** root)
addTest(root, &TestRegexCAPI, "regex/TestRegexCAPI");
addTest(root, &TestBug4315, "regex/TestBug4315");
addTest(root, &TestUTextAPI, "regex/TestUTextAPI");
addTest(root, &TestRefreshInput, "regex/TestRefreshInput");
}
/*
@ -2150,4 +2152,51 @@ static void TestUTextAPI(void) {
utext_close(&patternText);
}
static void TestRefreshInput(void) {
/*
* RefreshInput changes out the input of a URegularExpression without
* changing anything else in the match state. Used with Java JNI,
* when Java moves the underlying string storage. This test
* runs a find() loop, moving the text after the first match.
* The right number of matches should still be found.
*/
UChar testStr[] = {0x41, 0x20, 0x42, 0x20, 0x43, 0x0}; /* = "A B C" */
UChar movedStr[] = { 0, 0, 0, 0, 0, 0};
UErrorCode status = U_ZERO_ERROR;
URegularExpression *re;
UText ut1 = UTEXT_INITIALIZER;
UText ut2 = UTEXT_INITIALIZER;
re = uregex_openC("[ABC]", 0, 0, &status);
TEST_ASSERT_SUCCESS(status);
utext_openUChars(&ut1, testStr, -1, &status);
TEST_ASSERT_SUCCESS(status);
uregex_setUText(re, &ut1, &status);
TEST_ASSERT_SUCCESS(status);
/* Find the first match "A" in the original string */
TEST_ASSERT(uregex_findNext(re, &status));
TEST_ASSERT(uregex_start(re, 0, &status) == 0);
/* Move the string, kill the original string. */
u_strcpy(movedStr, testStr);
u_memset(testStr, 0, u_strlen(testStr));
utext_openUChars(&ut2, movedStr, -1, &status);
TEST_ASSERT_SUCCESS(status);
uregex_refreshUText(re, &ut2, &status);
TEST_ASSERT_SUCCESS(status);
/* Find the following two matches, now working in the moved string. */
TEST_ASSERT(uregex_findNext(re, &status));
TEST_ASSERT(uregex_start(re, 0, &status) == 2);
TEST_ASSERT(uregex_findNext(re, &status));
TEST_ASSERT(uregex_start(re, 0, &status) == 4);
TEST_ASSERT(FALSE == uregex_findNext(re, &status));
uregex_close(re);
}
#endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */