2017-01-20 00:20:31 +00:00
|
|
|
// © 2016 and later: Unicode, Inc. and others.
|
2016-06-15 18:58:17 +00:00
|
|
|
// License & terms of use: http://www.unicode.org/copyright.html
|
2001-07-27 00:18:53 +00:00
|
|
|
/*
|
2007-06-14 20:30:52 +00:00
|
|
|
**********************************************************************
|
2016-05-31 21:45:07 +00:00
|
|
|
* Copyright (C) 2001-2012, International Business Machines
|
|
|
|
* Corporation and others. All Rights Reserved.
|
2001-07-27 00:18:53 +00:00
|
|
|
**********************************************************************
|
|
|
|
* Date Name Description
|
|
|
|
* 07/26/01 aliu Creation.
|
|
|
|
**********************************************************************
|
|
|
|
*/
|
|
|
|
|
2002-09-20 01:54:48 +00:00
|
|
|
#include "unicode/utypes.h"
|
|
|
|
|
|
|
|
#if !UCONFIG_NO_TRANSLITERATION
|
|
|
|
|
2001-07-27 00:18:53 +00:00
|
|
|
#include "quant.h"
|
2001-07-30 23:23:16 +00:00
|
|
|
#include "unicode/unistr.h"
|
2001-11-20 00:07:11 +00:00
|
|
|
#include "util.h"
|
2001-07-27 00:18:53 +00:00
|
|
|
|
2001-10-08 23:26:58 +00:00
|
|
|
U_NAMESPACE_BEGIN
|
|
|
|
|
2003-08-31 20:53:46 +00:00
|
|
|
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(Quantifier)
|
2002-06-29 00:04:16 +00:00
|
|
|
|
2002-02-07 01:07:55 +00:00
|
|
|
Quantifier::Quantifier(UnicodeFunctor *adoptedMatcher,
|
2001-10-15 22:11:00 +00:00
|
|
|
uint32_t _minCount, uint32_t _maxCount) {
|
2001-07-27 00:18:53 +00:00
|
|
|
// assert(adopted != 0);
|
|
|
|
// assert(minCount <= maxCount);
|
2002-02-07 01:07:55 +00:00
|
|
|
matcher = adoptedMatcher;
|
2001-10-15 22:11:00 +00:00
|
|
|
this->minCount = _minCount;
|
|
|
|
this->maxCount = _maxCount;
|
2001-07-27 00:18:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
Quantifier::Quantifier(const Quantifier& o) :
|
2002-02-07 01:07:55 +00:00
|
|
|
UnicodeFunctor(o),
|
2003-12-01 21:53:22 +00:00
|
|
|
UnicodeMatcher(o),
|
2001-07-27 00:18:53 +00:00
|
|
|
matcher(o.matcher->clone()),
|
|
|
|
minCount(o.minCount),
|
2001-10-09 22:53:02 +00:00
|
|
|
maxCount(o.maxCount)
|
|
|
|
{
|
2001-07-27 00:18:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
Quantifier::~Quantifier() {
|
|
|
|
delete matcher;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2002-02-07 01:07:55 +00:00
|
|
|
* Implement UnicodeFunctor
|
2001-07-27 00:18:53 +00:00
|
|
|
*/
|
2002-02-07 01:07:55 +00:00
|
|
|
UnicodeFunctor* Quantifier::clone() const {
|
2001-07-27 00:18:53 +00:00
|
|
|
return new Quantifier(*this);
|
|
|
|
}
|
|
|
|
|
2002-02-07 01:07:55 +00:00
|
|
|
/**
|
|
|
|
* UnicodeFunctor API. Cast 'this' to a UnicodeMatcher* pointer
|
|
|
|
* and return the pointer.
|
|
|
|
*/
|
|
|
|
UnicodeMatcher* Quantifier::toMatcher() const {
|
2012-08-31 19:12:19 +00:00
|
|
|
Quantifier *nonconst_this = const_cast<Quantifier *>(this);
|
|
|
|
UnicodeMatcher *nonconst_base = static_cast<UnicodeMatcher *>(nonconst_this);
|
|
|
|
|
|
|
|
return nonconst_base;
|
2002-02-07 01:07:55 +00:00
|
|
|
}
|
|
|
|
|
2001-07-27 00:18:53 +00:00
|
|
|
UMatchDegree Quantifier::matches(const Replaceable& text,
|
|
|
|
int32_t& offset,
|
|
|
|
int32_t limit,
|
2001-10-30 23:55:09 +00:00
|
|
|
UBool incremental) {
|
2001-07-27 00:18:53 +00:00
|
|
|
int32_t start = offset;
|
|
|
|
uint32_t count = 0;
|
|
|
|
while (count < maxCount) {
|
2001-10-17 19:21:12 +00:00
|
|
|
int32_t pos = offset;
|
2002-02-07 01:07:55 +00:00
|
|
|
UMatchDegree m = matcher->toMatcher()->matches(text, offset, limit, incremental);
|
2001-07-27 00:18:53 +00:00
|
|
|
if (m == U_MATCH) {
|
|
|
|
++count;
|
2001-10-17 19:21:12 +00:00
|
|
|
if (pos == offset) {
|
|
|
|
// If offset has not moved we have a zero-width match.
|
|
|
|
// Don't keep matching it infinitely.
|
|
|
|
break;
|
|
|
|
}
|
2001-07-27 00:18:53 +00:00
|
|
|
} else if (incremental && m == U_PARTIAL_MATCH) {
|
|
|
|
return U_PARTIAL_MATCH;
|
|
|
|
} else {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (incremental && offset == limit) {
|
|
|
|
return U_PARTIAL_MATCH;
|
|
|
|
}
|
|
|
|
if (count >= minCount) {
|
|
|
|
return U_MATCH;
|
|
|
|
}
|
|
|
|
offset = start;
|
|
|
|
return U_MISMATCH;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Implement UnicodeMatcher
|
|
|
|
*/
|
|
|
|
UnicodeString& Quantifier::toPattern(UnicodeString& result,
|
|
|
|
UBool escapeUnprintable) const {
|
2008-01-14 18:38:36 +00:00
|
|
|
result.truncate(0);
|
2002-02-07 01:07:55 +00:00
|
|
|
matcher->toMatcher()->toPattern(result, escapeUnprintable);
|
2001-07-30 23:23:16 +00:00
|
|
|
if (minCount == 0) {
|
|
|
|
if (maxCount == 1) {
|
|
|
|
return result.append((UChar)63); /*?*/
|
|
|
|
} else if (maxCount == MAX) {
|
|
|
|
return result.append((UChar)42); /***/
|
|
|
|
}
|
|
|
|
// else fall through
|
|
|
|
} else if (minCount == 1 && maxCount == MAX) {
|
|
|
|
return result.append((UChar)43); /*+*/
|
|
|
|
}
|
|
|
|
result.append((UChar)123); /*{*/
|
2001-12-03 20:19:08 +00:00
|
|
|
ICU_Utility::appendNumber(result, minCount);
|
2001-07-30 23:23:16 +00:00
|
|
|
result.append((UChar)44); /*,*/
|
|
|
|
if (maxCount != MAX) {
|
2001-12-03 20:19:08 +00:00
|
|
|
ICU_Utility::appendNumber(result, maxCount);
|
2001-07-30 23:23:16 +00:00
|
|
|
}
|
|
|
|
result.append((UChar)125); /*}*/
|
2001-07-27 00:18:53 +00:00
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Implement UnicodeMatcher
|
|
|
|
*/
|
|
|
|
UBool Quantifier::matchesIndexValue(uint8_t v) const {
|
2008-02-23 19:15:18 +00:00
|
|
|
return (minCount == 0) || matcher->toMatcher()->matchesIndexValue(v);
|
2001-07-27 00:18:53 +00:00
|
|
|
}
|
|
|
|
|
2002-06-28 21:13:54 +00:00
|
|
|
/**
|
|
|
|
* Implement UnicodeMatcher
|
|
|
|
*/
|
|
|
|
void Quantifier::addMatchSetTo(UnicodeSet& toUnionTo) const {
|
2008-02-23 19:15:18 +00:00
|
|
|
if (maxCount > 0) {
|
2007-06-14 20:30:52 +00:00
|
|
|
matcher->toMatcher()->addMatchSetTo(toUnionTo);
|
2002-06-28 21:13:54 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2002-03-20 00:42:02 +00:00
|
|
|
/**
|
|
|
|
* Implement UnicodeFunctor
|
|
|
|
*/
|
|
|
|
void Quantifier::setData(const TransliterationRuleData* d) {
|
2008-01-14 18:38:36 +00:00
|
|
|
matcher->setData(d);
|
2002-03-20 00:42:02 +00:00
|
|
|
}
|
|
|
|
|
2001-10-08 23:26:58 +00:00
|
|
|
U_NAMESPACE_END
|
|
|
|
|
2002-09-20 01:54:48 +00:00
|
|
|
#endif /* #if !UCONFIG_NO_TRANSLITERATION */
|
|
|
|
|
2001-07-27 00:18:53 +00:00
|
|
|
//eof
|