ICU-1076 allow unlimited @ operator; make @ increment by code points forward or backward; pin cursor to begining of antecontext and limit

X-SVN-Rev: 5440
This commit is contained in:
Alan Liu 2001-08-04 00:15:12 +00:00
parent c427ceba1c
commit 264c1f3bcc
3 changed files with 70 additions and 110 deletions

View File

@ -1091,8 +1091,12 @@ int32_t TransliteratorParser::parseRule(int32_t pos, int32_t limit) {
if (right->ante >= 0 || right->post >= 0 || left->cursor >= 0 ||
right->segments != NULL || left->maxRef >= 0 ||
(right->cursorOffset != 0 && right->cursor < 0) ||
(right->cursorOffset > (left->text.length() - left->post)) ||
(-right->cursorOffset > left->ante) ||
// - The following two checks were used to ensure that the
// - the cursor offset stayed within the ante- or postcontext.
// - However, with the addition of quantifiers, we have to
// - allow arbitrary cursor offsets and do runtime checking.
//(right->cursorOffset > (left->text.length() - left->post)) ||
//(-right->cursorOffset > left->ante) ||
right->anchorStart || right->anchorEnd) {
return syntaxError(RuleBasedTransliterator::MALFORMED_RULE, rule, start);

View File

@ -68,67 +68,7 @@ TransliterationRule::TransliterationRule(const UnicodeString& input,
const TransliterationRuleData& theData,
UErrorCode& status) :
data(theData) {
init(input, anteContextPos, postContextPos,
outputStr, cursorPosition, cursorOffset, adoptedSegs,
anchorStart, anchorEnd, status);
}
/**
* Construct a new rule with the given input, output text, and other
* attributes. A cursor position may be specified for the output text.
* @param input input string, including key and optional ante and
* post context
* @param anteContextPos offset into input to end of ante context, or -1 if
* none. Must be <= input.length() if not -1.
* @param postContextPos offset into input to start of post context, or -1
* if none. Must be <= input.length() if not -1, and must be >=
* anteContextPos.
* @param output output string
* @param cursorPosition offset into output at which cursor is located, or -1 if
* none. If less than zero, then the cursor is placed after the
* <code>output</code>; that is, -1 is equivalent to
* <code>output.length()</code>. If greater than
* <code>output.length()</code> then an exception is thrown.
*/
TransliterationRule::TransliterationRule(const UnicodeString& input,
int32_t anteContextPos, int32_t postContextPos,
const UnicodeString& outputStr,
int32_t cursorPosition,
const TransliterationRuleData& theData,
UErrorCode& status) :
data(theData) {
init(input, anteContextPos, postContextPos,
outputStr, cursorPosition, 0, NULL, FALSE, FALSE, status);
}
/**
* Copy constructor.
*/
TransliterationRule::TransliterationRule(TransliterationRule& other) :
pattern(other.pattern),
output(other.output),
anteContextLength(other.anteContextLength),
keyLength(other.keyLength),
cursorPos(other.cursorPos),
flags(other.flags),
firstKeySeg(other.firstKeySeg),
data(other.data) {
segments = 0;
if (other.segments != 0) {
int32_t len = SEGMENTS_LEN;
segments = new int32_t[len];
uprv_memcpy(segments, other.segments, len*sizeof(segments[0]));
}
}
void TransliterationRule::init(const UnicodeString& input,
int32_t anteContextPos, int32_t postContextPos,
const UnicodeString& outputStr,
int32_t cursorPosition, int32_t cursorOffset,
int32_t* adoptedSegs,
UBool anchorStart, UBool anchorEnd,
UErrorCode& status) {
if (U_FAILURE(status)) {
return;
}
@ -193,6 +133,27 @@ void TransliterationRule::init(const UnicodeString& input,
}
}
/**
* Copy constructor.
*/
TransliterationRule::TransliterationRule(TransliterationRule& other) :
pattern(other.pattern),
output(other.output),
anteContextLength(other.anteContextLength),
keyLength(other.keyLength),
cursorPos(other.cursorPos),
flags(other.flags),
firstKeySeg(other.firstKeySeg),
data(other.data) {
segments = 0;
if (other.segments != 0) {
int32_t len = SEGMENTS_LEN;
segments = new int32_t[len];
uprv_memcpy(segments, other.segments, len*sizeof(segments[0]));
}
}
TransliterationRule::~TransliterationRule() {
delete[] segments;
}
@ -326,6 +287,18 @@ UBool TransliterationRule::masks(const TransliterationRule& r2) const {
0 == r2.pattern.compare(left2 - left, len, pattern);
}
inline int32_t posBefore(const Replaceable& str, int32_t pos) {
return (pos > 0) ?
pos - UTF_CHAR_LENGTH(str.char32At(pos-1)) :
pos - 1;
}
inline int32_t posAfter(const Replaceable& str, int32_t pos) {
return (pos < str.length()) ?
pos + UTF_CHAR_LENGTH(str.char32At(pos)) :
pos + 1;
}
/**
* Attempt a match and replacement at the given position. Return
* the degree of match between this rule and the given text. The
@ -385,16 +358,13 @@ UMatchDegree TransliterationRule::matchAndReplace(Replaceable& text,
// A mismatch in the ante context, or with the start anchor,
// is an outright U_MISMATCH regardless of whether we are
// incremental or not.
int32_t cursor = pos.start;
int32_t cursor;
int32_t newStart = 0;
int32_t minCursor;
int32_t i;
// Backup cursor by one
if (cursor > 0) {
cursor -= UTF_CHAR_LENGTH(text.char32At(cursor-1));
} else {
--cursor;
}
cursor = posBefore(text, pos.start);
for (i=anteContextLength-1; i>=0; --i) {
UChar keyChar = pattern.charAt(i);
@ -415,10 +385,6 @@ UMatchDegree TransliterationRule::matchAndReplace(Replaceable& text,
goto exit;
}
}
if (cursorPos == (i - anteContextLength)) {
// Record the position of the cursor
newStart = cursor;
}
while (nextSegPos == i) {
segPos[iSeg] = cursor;
if (cursor >= 0) {
@ -430,9 +396,11 @@ UMatchDegree TransliterationRule::matchAndReplace(Replaceable& text,
}
}
minCursor = posAfter(text, cursor);
// ------------------------ Start Anchor ------------------------
if ((flags & ANCHOR_START) && cursor != (pos.contextStart-1)) {
if ((flags & ANCHOR_START) && cursor != posBefore(text, pos.contextStart)) {
m = U_MISMATCH;
goto exit;
}
@ -513,8 +481,18 @@ UMatchDegree TransliterationRule::matchAndReplace(Replaceable& text,
if (segments == NULL) {
text.handleReplaceBetween(pos.start, keyLimit, output);
lenDelta = output.length() - (keyLimit - pos.start);
if (cursorPos >= 0) {
newStart = pos.start + cursorPos;
newStart = pos.start;
int32_t n = cursorPos;
// cursorPos counts 16-bit code units
while (n > 0) {
int32_t l = UTF_CHAR_LENGTH(text.char32At(newStart));
n -= l;
newStart += l;
}
while (n < 0) {
int32_t l = UTF_CHAR_LENGTH(text.char32At(newStart-1));
n += l;
newStart -= l;
}
} else {
/* When there are segments to be copied, use the Replaceable.copy()
@ -567,11 +545,23 @@ UMatchDegree TransliterationRule::matchAndReplace(Replaceable& text,
buf.remove();
text.handleReplaceBetween(pos.start, keyLimit, buf);
lenDelta = dest - keyLimit - (keyLimit - pos.start);
// Handle cursor in postContext
if (cursorPos > output.length()) {
newStart = pos.start + (dest - keyLimit);
int32_t n = cursorPos - output.length();
// cursorPos counts 16-bit code units
while (n > 0) {
int32_t l = UTF_CHAR_LENGTH(text.char32At(newStart));
n -= l;
newStart += l;
}
}
}
pos.limit += lenDelta;
pos.contextLimit += lenDelta;
pos.start = newStart;
// Restrict new value of start to [minCursor, pos.limit].
pos.start = uprv_max(minCursor, uprv_min(pos.limit, newStart));
m = U_MATCH;
exit:

View File

@ -158,30 +158,6 @@ public:
const TransliterationRuleData& data,
UErrorCode& status);
/**
* Construct a new rule with the given input, output text, and other
* attributes. A cursor position may be specified for the output text.
* @param input input string, including key and optional ante and
* post context
* @param anteContextPos offset into input to end of ante context, or -1 if
* none. Must be <= input.length() if not -1.
* @param postContextPos offset into input to start of post context, or -1
* if none. Must be <= input.length() if not -1, and must be >=
* anteContextPos.
* @param output output string
* @param cursorPosition offset into output at which cursor is located, or -1 if
* none. If less than zero, then the cursor is placed after the
* <code>output</code>; that is, -1 is equivalent to
* <code>output.length()</code>. If greater than
* <code>output.length()</code> then an exception is thrown.
*/
TransliterationRule(const UnicodeString& input,
int32_t anteContextPos, int32_t postContextPos,
const UnicodeString& outputStr,
int32_t cursorPosition,
const TransliterationRuleData& data,
UErrorCode& status);
/**
* Copy constructor.
*/
@ -268,16 +244,6 @@ public:
*/
virtual UnicodeString& toRule(UnicodeString& pat,
UBool escapeUnprintable) const;
private:
void init(const UnicodeString& input,
int32_t anteContextPos, int32_t postContextPos,
const UnicodeString& output,
int32_t cursorPos, int32_t cursorOffset,
int32_t* adoptedSegs,
UBool anchorStart, UBool anchorEnd,
UErrorCode& status);
private:
friend class StringMatcher;