2001-07-25 18:50:09 +00:00
|
|
|
/*
|
2003-08-27 01:01:42 +00:00
|
|
|
**********************************************************************
|
2004-10-29 22:50:01 +00:00
|
|
|
* Copyright (c) 2001-2004, International Business Machines
|
2003-08-27 01:01:42 +00:00
|
|
|
* Corporation and others. All Rights Reserved.
|
2001-07-25 18:50:09 +00:00
|
|
|
**********************************************************************
|
|
|
|
* Date Name Description
|
|
|
|
* 07/18/01 aliu Creation.
|
|
|
|
**********************************************************************
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "unicode/unifilt.h"
|
|
|
|
#include "unicode/rep.h"
|
|
|
|
|
2001-10-08 23:26:58 +00:00
|
|
|
U_NAMESPACE_BEGIN
|
2004-06-04 01:16:05 +00:00
|
|
|
UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(UnicodeFilter)
|
2004-05-28 20:13:11 +00:00
|
|
|
|
2001-10-08 23:26:58 +00:00
|
|
|
|
2003-08-27 01:01:42 +00:00
|
|
|
/* Define this here due to the lack of another file.
|
|
|
|
It can't be defined in the header */
|
|
|
|
UnicodeMatcher::~UnicodeMatcher() {}
|
|
|
|
|
|
|
|
UnicodeFilter::~UnicodeFilter() {}
|
2002-06-29 00:04:16 +00:00
|
|
|
|
2002-02-07 01:07:55 +00:00
|
|
|
/**
|
|
|
|
* UnicodeFunctor API. Cast 'this' to a UnicodeMatcher* pointer
|
|
|
|
* and return the pointer.
|
|
|
|
*/
|
|
|
|
UnicodeMatcher* UnicodeFilter::toMatcher() const {
|
|
|
|
return (UnicodeMatcher*) this;
|
|
|
|
}
|
|
|
|
|
2003-08-27 01:01:42 +00:00
|
|
|
void UnicodeFilter::setData(const TransliterationRuleData*) {}
|
|
|
|
|
2001-07-25 18:50:09 +00:00
|
|
|
/**
|
|
|
|
* Default implementation of UnicodeMatcher::matches() for Unicode
|
2001-11-20 00:41:01 +00:00
|
|
|
* filters. Matches a single code point at offset (either one or
|
|
|
|
* two 16-bit code units).
|
2001-07-25 18:50:09 +00:00
|
|
|
*/
|
|
|
|
UMatchDegree UnicodeFilter::matches(const Replaceable& text,
|
|
|
|
int32_t& offset,
|
|
|
|
int32_t limit,
|
2001-10-30 23:55:09 +00:00
|
|
|
UBool incremental) {
|
2001-07-25 18:50:09 +00:00
|
|
|
UChar32 c;
|
|
|
|
if (offset < limit &&
|
|
|
|
contains(c = text.char32At(offset))) {
|
|
|
|
offset += UTF_CHAR_LENGTH(c);
|
|
|
|
return U_MATCH;
|
|
|
|
}
|
|
|
|
if (offset > limit &&
|
2001-07-25 21:15:53 +00:00
|
|
|
contains(c = text.char32At(offset))) {
|
|
|
|
// Backup offset by 1, unless the preceding character is a
|
|
|
|
// surrogate pair -- then backup by 2 (keep offset pointing at
|
|
|
|
// the lead surrogate).
|
|
|
|
--offset;
|
|
|
|
if (offset >= 0) {
|
|
|
|
offset -= UTF_CHAR_LENGTH(text.char32At(offset)) - 1;
|
|
|
|
}
|
2001-07-25 18:50:09 +00:00
|
|
|
return U_MATCH;
|
|
|
|
}
|
|
|
|
if (incremental && offset == limit) {
|
|
|
|
return U_PARTIAL_MATCH;
|
|
|
|
}
|
|
|
|
return U_MISMATCH;
|
|
|
|
}
|
2001-07-27 00:18:53 +00:00
|
|
|
|
2001-10-08 23:26:58 +00:00
|
|
|
U_NAMESPACE_END
|
|
|
|
|
2001-07-27 00:18:53 +00:00
|
|
|
//eof
|