ICU-8490 Add BreakIterator::refresInputText()
X-SVN-Rev: 30203
This commit is contained in:
parent
b90dc9ad48
commit
b8d330e9a7
@ -486,6 +486,37 @@ RuleBasedBreakIterator::setText(const UnicodeString& newText) {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Provide a new UText for the input text. Must reference text with contents identical
|
||||
* to the original.
|
||||
* Intended for use with text data originating in Java (garbage collected) environments
|
||||
* where the data may be moved in memory at arbitrary times.
|
||||
*/
|
||||
RuleBasedBreakIterator &RuleBasedBreakIterator::refreshInputText(UText *input, UErrorCode &status) {
|
||||
if (U_FAILURE(status)) {
|
||||
return *this;
|
||||
}
|
||||
if (input == NULL) {
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return *this;
|
||||
}
|
||||
int64_t pos = utext_getNativeIndex(fText);
|
||||
// Shallow read-only clone of the new UText into the existing input UText
|
||||
fText = utext_clone(fText, input, FALSE, TRUE, &status);
|
||||
if (U_FAILURE(status)) {
|
||||
return *this;
|
||||
}
|
||||
utext_setNativeIndex(fText, pos);
|
||||
if (utext_getNativeIndex(fText) != pos) {
|
||||
// Sanity check. The new input utext is supposed to have the exact same
|
||||
// contents as the old. If we can't set to the same position, it doesn't.
|
||||
// The contents underlying the old utext might be invalid at this point,
|
||||
// so it's not safe to check directly.
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Sets the current iteration position to the beginning of the text.
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
********************************************************************************
|
||||
* Copyright (C) 1996-2008, International Business Machines
|
||||
* Copyright (C) 1996-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
********************************************************************************
|
||||
*/
|
||||
@ -290,4 +290,14 @@ ubrk_getLocaleByType(const UBreakIterator *bi,
|
||||
}
|
||||
|
||||
|
||||
void ubrk_refreshUText(UBreakIterator *bi,
|
||||
UText *text,
|
||||
UErrorCode *status)
|
||||
{
|
||||
BreakIterator *bii = reinterpret_cast<BreakIterator *>(bi);
|
||||
bii->refreshInputText(text, *status);
|
||||
}
|
||||
|
||||
|
||||
|
||||
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
********************************************************************************
|
||||
* Copyright (C) 1997-2010, International Business Machines
|
||||
* Copyright (C) 1997-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
********************************************************************************
|
||||
*
|
||||
@ -514,6 +514,33 @@ public:
|
||||
*/
|
||||
const char *getLocaleID(ULocDataLocaleType type, UErrorCode& status) const;
|
||||
|
||||
/**
|
||||
* Set the subject text string upon which the break iterator is operating
|
||||
* without changing any other aspect of the matching state.
|
||||
* The new and previous text strings must have the same content.
|
||||
*
|
||||
* This function is intended for use in environments where ICU is operating on
|
||||
* strings that may move around in memory. It provides a mechanism for notifying
|
||||
* ICU that the string has been relocated, and providing a new UText to access the
|
||||
* string in its new position.
|
||||
*
|
||||
* Note that the break iterator implementation never copies the underlying text
|
||||
* of a string being processed, but always operates directly on the original text
|
||||
* provided by the user. Refreshing simply drops the references to the old text
|
||||
* and replaces them with references to the new.
|
||||
*
|
||||
* Caution: this function is normally used only by very specialized,
|
||||
* system-level code. One example use case is with garbage collection that moves
|
||||
* the text in memory.
|
||||
*
|
||||
* @param input The new (moved) text string.
|
||||
* @param status Receives errors detected by this function.
|
||||
* @return *this
|
||||
*
|
||||
* @draft ICU 5.0
|
||||
*/
|
||||
virtual BreakIterator &refreshInputText(UText *input, UErrorCode &status) = 0;
|
||||
|
||||
private:
|
||||
static BreakIterator* buildInstance(const Locale& loc, const char *type, int32_t kind, UErrorCode& status);
|
||||
static BreakIterator* createInstance(const Locale& loc, int32_t kind, UErrorCode& status);
|
||||
|
@ -633,6 +633,33 @@ public:
|
||||
*/
|
||||
virtual const uint8_t *getBinaryRules(uint32_t &length);
|
||||
|
||||
/**
|
||||
* Set the subject text string upon which the break iterator is operating
|
||||
* without changing any other aspect of the matching state.
|
||||
* The new and previous text strings must have the same content.
|
||||
*
|
||||
* This function is intended for use in environments where ICU is operating on
|
||||
* strings that may move around in memory. It provides a mechanism for notifying
|
||||
* ICU that the string has been relocated, and providing a new UText to access the
|
||||
* string in its new position.
|
||||
*
|
||||
* Note that the break iterator implementation never copies the underlying text
|
||||
* of a string being processed, but always operates directly on the original text
|
||||
* provided by the user. Refreshing simply drops the references to the old text
|
||||
* and replaces them with references to the new.
|
||||
*
|
||||
* Caution: this function is normally used only by very specialized,
|
||||
* system-level code. One example use case is with garbage collection that moves
|
||||
* the text in memory.
|
||||
*
|
||||
* @param input The new (moved) text string.
|
||||
* @param status Receives errors detected by this function.
|
||||
* @return *this
|
||||
*
|
||||
* @draft ICU 5.0
|
||||
*/
|
||||
virtual RuleBasedBreakIterator &refreshInputText(UText *input, UErrorCode &status);
|
||||
|
||||
|
||||
protected:
|
||||
//=======================================================================
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
******************************************************************************
|
||||
* Copyright (C) 1996-2010, International Business Machines Corporation and others.
|
||||
* Copyright (C) 1996-2011, International Business Machines Corporation and others.
|
||||
* All Rights Reserved.
|
||||
******************************************************************************
|
||||
*/
|
||||
@ -496,6 +496,37 @@ U_STABLE const char* U_EXPORT2
|
||||
ubrk_getLocaleByType(const UBreakIterator *bi, ULocDataLocaleType type, UErrorCode* status);
|
||||
|
||||
|
||||
/**
|
||||
* Set the subject text string upon which the break iterator is operating
|
||||
* without changing any other aspect of the state.
|
||||
* The new and previous text strings must have the same content.
|
||||
*
|
||||
* This function is intended for use in environments where ICU is operating on
|
||||
* strings that may move around in memory. It provides a mechanism for notifying
|
||||
* ICU that the string has been relocated, and providing a new UText to access the
|
||||
* string in its new position.
|
||||
*
|
||||
* Note that the break iterator never copies the underlying text
|
||||
* of a string being processed, but always operates directly on the original text
|
||||
* provided by the user. Refreshing simply drops the references to the old text
|
||||
* and replaces them with references to the new.
|
||||
*
|
||||
* Caution: this function is normally used only by very specialized
|
||||
* system-level code. One example use case is with garbage collection
|
||||
* that moves the text in memory.
|
||||
*
|
||||
* @param bi The break iterator.
|
||||
* @param text The new (moved) text string.
|
||||
* @param status Receives errors detected by this function.
|
||||
*
|
||||
* @draft ICU 5.0
|
||||
*/
|
||||
U_DRAFT void U_EXPORT2
|
||||
ubrk_refreshUText(UBreakIterator *bi,
|
||||
UText *text,
|
||||
UErrorCode *status);
|
||||
|
||||
|
||||
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
|
||||
|
||||
#endif
|
||||
|
@ -44,6 +44,7 @@ static void TestBreakIteratorRuleError(void);
|
||||
static void TestBreakIteratorStatusVec(void);
|
||||
static void TestBreakIteratorUText(void);
|
||||
static void TestBreakIteratorTailoring(void);
|
||||
static void TestBreakIteratorRefresh(void);
|
||||
|
||||
void addBrkIterAPITest(TestNode** root);
|
||||
|
||||
@ -58,6 +59,7 @@ void addBrkIterAPITest(TestNode** root)
|
||||
addTest(root, &TestBreakIteratorRuleError, "tstxtbd/cbiapts/TestBreakIteratorRuleError");
|
||||
addTest(root, &TestBreakIteratorStatusVec, "tstxtbd/cbiapts/TestBreakIteratorStatusVec");
|
||||
addTest(root, &TestBreakIteratorTailoring, "tstxtbd/cbiapts/TestBreakIteratorTailoring");
|
||||
addTest(root, &TestBreakIteratorRefresh, "tstxtbd/cbiapts/TestBreakIteratorRefresh");
|
||||
}
|
||||
|
||||
#define CLONETEST_ITERATOR_COUNT 2
|
||||
@ -823,4 +825,52 @@ static void TestBreakIteratorTailoring(void) {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void TestBreakIteratorRefresh(void) {
|
||||
/*
|
||||
* RefreshInput changes out the input of a Break Iterator without
|
||||
* changing anything else in the iterator's state. Used with Java JNI,
|
||||
* when Java moves the underlying string storage. This test
|
||||
* runs a ubrk_next() repeatedly, moving the text in the middle of the sequence.
|
||||
* The right set of boundaries should still be found.
|
||||
*/
|
||||
UChar testStr[] = {0x20, 0x41, 0x20, 0x42, 0x20, 0x43, 0x20, 0x44, 0x0}; /* = " A B C D" */
|
||||
UChar movedStr[] = {0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0};
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UBreakIterator *bi;
|
||||
UText ut1 = UTEXT_INITIALIZER;
|
||||
UText ut2 = UTEXT_INITIALIZER;
|
||||
|
||||
bi = ubrk_open(UBRK_LINE, "en_US", NULL, 0, &status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
|
||||
utext_openUChars(&ut1, testStr, -1, &status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
ubrk_setUText(bi, &ut1, &status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
|
||||
/* Line boundaries will occur before each letter in the original string */
|
||||
TEST_ASSERT(1 == ubrk_next(bi));
|
||||
TEST_ASSERT(3 == ubrk_next(bi));
|
||||
|
||||
/* Move the string, kill the original string. */
|
||||
u_strcpy(movedStr, testStr);
|
||||
u_memset(testStr, 0x20, u_strlen(testStr));
|
||||
utext_openUChars(&ut2, movedStr, -1, &status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
ubrk_refreshUText(bi, &ut2, &status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
|
||||
/* Find the following matches, now working in the moved string. */
|
||||
TEST_ASSERT(5 == ubrk_next(bi));
|
||||
TEST_ASSERT(7 == ubrk_next(bi));
|
||||
TEST_ASSERT(8 == ubrk_next(bi));
|
||||
TEST_ASSERT(UBRK_DONE == ubrk_next(bi));
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
|
||||
ubrk_close(bi);
|
||||
utext_close(&ut1);
|
||||
utext_close(&ut2);
|
||||
}
|
||||
|
||||
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
|
||||
|
@ -1122,6 +1122,54 @@ void RBBIAPITest::TestCreateFromRBBIData() {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void RBBIAPITest::TestRefreshInputText() {
|
||||
/*
|
||||
* RefreshInput changes out the input of a Break Iterator without
|
||||
* changing anything else in the iterator's state. Used with Java JNI,
|
||||
* when Java moves the underlying string storage. This test
|
||||
* runs BreakIterator::next() repeatedly, moving the text in the middle of the sequence.
|
||||
* The right set of boundaries should still be found.
|
||||
*/
|
||||
UChar testStr[] = {0x20, 0x41, 0x20, 0x42, 0x20, 0x43, 0x20, 0x44, 0x0}; /* = " A B C D" */
|
||||
UChar movedStr[] = {0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0};
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UText ut1 = UTEXT_INITIALIZER;
|
||||
UText ut2 = UTEXT_INITIALIZER;
|
||||
RuleBasedBreakIterator *bi = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getEnglish(), status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
|
||||
utext_openUChars(&ut1, testStr, -1, &status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
bi->setText(&ut1, status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
|
||||
/* Line boundaries will occur before each letter in the original string */
|
||||
TEST_ASSERT(1 == bi->next());
|
||||
TEST_ASSERT(3 == bi->next());
|
||||
|
||||
/* Move the string, kill the original string. */
|
||||
u_strcpy(movedStr, testStr);
|
||||
u_memset(testStr, 0x20, u_strlen(testStr));
|
||||
utext_openUChars(&ut2, movedStr, -1, &status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
RuleBasedBreakIterator *returnedBI = &bi->refreshInputText(&ut2, status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
TEST_ASSERT(bi == returnedBI);
|
||||
|
||||
/* Find the following matches, now working in the moved string. */
|
||||
TEST_ASSERT(5 == bi->next());
|
||||
TEST_ASSERT(7 == bi->next());
|
||||
TEST_ASSERT(8 == bi->next());
|
||||
TEST_ASSERT(UBRK_DONE == bi->next());
|
||||
|
||||
delete bi;
|
||||
utext_close(&ut1);
|
||||
utext_close(&ut2);
|
||||
|
||||
}
|
||||
|
||||
|
||||
//---------------------------------------------
|
||||
// runIndexedTest
|
||||
//---------------------------------------------
|
||||
@ -1153,6 +1201,7 @@ void RBBIAPITest::runIndexedTest( int32_t index, UBool exec, const char* &name,
|
||||
#else
|
||||
case 9: case 10: case 11: case 12: case 13: name = "skip"; break;
|
||||
#endif
|
||||
case 14: name = "TestRefreshInputText"; if (exec) TestRefreshInputText(); break;
|
||||
|
||||
default: name = ""; break; // needed to end loop
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
/********************************************************************
|
||||
* COPYRIGHT:
|
||||
* Copyright (c) 1999-2004,2008 International Business Machines Corporation and
|
||||
* Copyright (c) 1999-2011 International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
********************************************************************/
|
||||
/************************************************************************
|
||||
@ -86,6 +86,8 @@ public:
|
||||
|
||||
void TestRegistration();
|
||||
|
||||
void TestRefreshInputText();
|
||||
|
||||
/**
|
||||
*Internal subroutines
|
||||
**/
|
||||
|
Loading…
Reference in New Issue
Block a user