ICU-7795 Regular Expression refreshInputText added.
X-SVN-Rev: 29363
This commit is contained in:
parent
a658867100
commit
4a932a4dfc
@ -1,6 +1,6 @@
|
||||
/*
|
||||
**************************************************************************
|
||||
* Copyright (C) 2002-2010 International Business Machines Corporation *
|
||||
* Copyright (C) 2002-2011 International Business Machines Corporation *
|
||||
* and others. All rights reserved. *
|
||||
**************************************************************************
|
||||
*/
|
||||
@ -1963,6 +1963,41 @@ RegexMatcher &RegexMatcher::reset(int64_t position, UErrorCode &status) {
|
||||
}
|
||||
|
||||
|
||||
//--------------------------------------------------------------------------------
|
||||
//
|
||||
// refresh
|
||||
//
|
||||
//--------------------------------------------------------------------------------
|
||||
RegexMatcher &RegexMatcher::refreshInputText(UText *input, UErrorCode &status) {
|
||||
if (U_FAILURE(status)) {
|
||||
return *this;
|
||||
}
|
||||
if (input == NULL) {
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return *this;
|
||||
}
|
||||
if (utext_nativeLength(fInputText) != utext_nativeLength(input)) {
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return *this;
|
||||
}
|
||||
int64_t pos = utext_getNativeIndex(fInputText);
|
||||
// Shallow read-only clone of the new UText into the existing input UText
|
||||
fInputText = utext_clone(fInputText, input, FALSE, TRUE, &status);
|
||||
if (U_FAILURE(status)) {
|
||||
return *this;
|
||||
}
|
||||
utext_setNativeIndex(fInputText, pos);
|
||||
|
||||
if (fAltInputText != NULL) {
|
||||
pos = utext_getNativeIndex(fAltInputText);
|
||||
fAltInputText = utext_clone(fAltInputText, input, FALSE, TRUE, &status);
|
||||
if (U_FAILURE(status)) {
|
||||
return *this;
|
||||
}
|
||||
utext_setNativeIndex(fAltInputText, pos);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 2002-2010, International Business Machines
|
||||
* Copyright (C) 2002-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* file name: regex.h
|
||||
@ -1055,6 +1055,33 @@ public:
|
||||
*/
|
||||
virtual RegexMatcher &reset(UText *input);
|
||||
|
||||
|
||||
/**
|
||||
* Set the subject text string upon which the regular expression is looking for matches
|
||||
* without changing any other aspect of the matching state.
|
||||
* The new and previous text strings must have the same content.
|
||||
*
|
||||
* This function is intended for use in environments where ICU is operating on
|
||||
* strings that may move around in memory. It provides a mechanism for notifying
|
||||
* ICU that the string has been relocated, and providing a new UText to access the
|
||||
* string in its new position.
|
||||
*
|
||||
* Note that the regular expression implementation never copies the underlying text
|
||||
* of a string being matched, but always operates directly on the original text
|
||||
* provided by the user. Refreshing simply drops the references to the old text
|
||||
* and replaces them with references to the new.
|
||||
*
|
||||
* Caution: this function is normally used only by very specialized,
|
||||
* system-level code. One example use case is with garbage collection that moves
|
||||
* the text in memory.
|
||||
*
|
||||
* @param input The new (moved) text string.
|
||||
* @param status Receives errors detected by this function.
|
||||
*
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
virtual RegexMatcher &refreshInputText(UText *input, UErrorCode &status);
|
||||
|
||||
private:
|
||||
/**
|
||||
* Cause a compilation error if an application accidently attempts to
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 2004-2010, International Business Machines
|
||||
* Copyright (C) 2004-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* file name: uregex.h
|
||||
@ -395,6 +395,36 @@ uregex_getUText(URegularExpression *regexp,
|
||||
UText *dest,
|
||||
UErrorCode *status);
|
||||
|
||||
/**
|
||||
* Set the subject text string upon which the regular expression is looking for matches
|
||||
* without changing any other aspect of the matching state.
|
||||
* The new and previous text strings must have the same content.
|
||||
*
|
||||
* This function is intended for use in environments where ICU is operating on
|
||||
* strings that may move around in memory. It provides a mechanism for notifying
|
||||
* ICU that the string has been relocated, and providing a new UText to access the
|
||||
* string in its new position.
|
||||
*
|
||||
* Note that the regular expression implementation never copies the underlying text
|
||||
* of a string being matched, but always operates directly on the original text
|
||||
* provided by the user. Refreshing simply drops the references to the old text
|
||||
* and replaces them with references to the new.
|
||||
*
|
||||
* Caution: this function is normally used only by very specialized
|
||||
* system-level code. One example use case is with garbage collection
|
||||
* that moves the text in memory.
|
||||
*
|
||||
* @param regexp The compiled regular expression.
|
||||
* @param text The new (moved) text string.
|
||||
* @param status Receives errors detected by this function.
|
||||
*
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
U_INTERNAL void U_EXPORT2
|
||||
uregex_refreshUText(URegularExpression *regexp,
|
||||
UText *text,
|
||||
UErrorCode *status);
|
||||
|
||||
/**
|
||||
* Attempts to match the input string against the pattern.
|
||||
* To succeed, the match must extend to the end of the string,
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2004-2010, International Business Machines
|
||||
* Copyright (C) 2004-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
* file name: regex.cpp
|
||||
@ -482,6 +482,23 @@ uregex_getUText(URegularExpression *regexp2,
|
||||
}
|
||||
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
//
|
||||
// uregex_refreshUText
|
||||
//
|
||||
//------------------------------------------------------------------------------
|
||||
U_CAPI void U_EXPORT2
|
||||
uregex_refreshUText(URegularExpression *regexp2,
|
||||
UText *text,
|
||||
UErrorCode *status) {
|
||||
RegularExpression *regexp = (RegularExpression*)regexp2;
|
||||
if (validateRE(regexp, status, FALSE) == FALSE) {
|
||||
return;
|
||||
}
|
||||
regexp->fMatcher->refreshInputText(text, *status);
|
||||
}
|
||||
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
//
|
||||
// uregex_matches
|
||||
|
@ -1,6 +1,6 @@
|
||||
/********************************************************************
|
||||
* COPYRIGHT:
|
||||
* Copyright (c) 2004-2010, International Business Machines Corporation and
|
||||
* Copyright (c) 2004-2011, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
********************************************************************/
|
||||
/********************************************************************************
|
||||
@ -119,6 +119,7 @@ static void test_assert_utext(const char *expected, UText *actual, const char *f
|
||||
static void TestRegexCAPI(void);
|
||||
static void TestBug4315(void);
|
||||
static void TestUTextAPI(void);
|
||||
static void TestRefreshInput(void);
|
||||
|
||||
void addURegexTest(TestNode** root);
|
||||
|
||||
@ -127,6 +128,7 @@ void addURegexTest(TestNode** root)
|
||||
addTest(root, &TestRegexCAPI, "regex/TestRegexCAPI");
|
||||
addTest(root, &TestBug4315, "regex/TestBug4315");
|
||||
addTest(root, &TestUTextAPI, "regex/TestUTextAPI");
|
||||
addTest(root, &TestRefreshInput, "regex/TestRefreshInput");
|
||||
}
|
||||
|
||||
/*
|
||||
@ -2150,4 +2152,51 @@ static void TestUTextAPI(void) {
|
||||
utext_close(&patternText);
|
||||
}
|
||||
|
||||
|
||||
static void TestRefreshInput(void) {
|
||||
/*
|
||||
* RefreshInput changes out the input of a URegularExpression without
|
||||
* changing anything else in the match state. Used with Java JNI,
|
||||
* when Java moves the underlying string storage. This test
|
||||
* runs a find() loop, moving the text after the first match.
|
||||
* The right number of matches should still be found.
|
||||
*/
|
||||
UChar testStr[] = {0x41, 0x20, 0x42, 0x20, 0x43, 0x0}; /* = "A B C" */
|
||||
UChar movedStr[] = { 0, 0, 0, 0, 0, 0};
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
URegularExpression *re;
|
||||
UText ut1 = UTEXT_INITIALIZER;
|
||||
UText ut2 = UTEXT_INITIALIZER;
|
||||
|
||||
re = uregex_openC("[ABC]", 0, 0, &status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
|
||||
utext_openUChars(&ut1, testStr, -1, &status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
uregex_setUText(re, &ut1, &status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
|
||||
/* Find the first match "A" in the original string */
|
||||
TEST_ASSERT(uregex_findNext(re, &status));
|
||||
TEST_ASSERT(uregex_start(re, 0, &status) == 0);
|
||||
|
||||
/* Move the string, kill the original string. */
|
||||
u_strcpy(movedStr, testStr);
|
||||
u_memset(testStr, 0, u_strlen(testStr));
|
||||
utext_openUChars(&ut2, movedStr, -1, &status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
uregex_refreshUText(re, &ut2, &status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
|
||||
/* Find the following two matches, now working in the moved string. */
|
||||
TEST_ASSERT(uregex_findNext(re, &status));
|
||||
TEST_ASSERT(uregex_start(re, 0, &status) == 2);
|
||||
TEST_ASSERT(uregex_findNext(re, &status));
|
||||
TEST_ASSERT(uregex_start(re, 0, &status) == 4);
|
||||
TEST_ASSERT(FALSE == uregex_findNext(re, &status));
|
||||
|
||||
uregex_close(re);
|
||||
}
|
||||
|
||||
|
||||
#endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */
|
||||
|
Loading…
Reference in New Issue
Block a user