/* ********************************************************************** * Copyright (C) 2001 IBM and others. All rights reserved. ********************************************************************** * Date Name Description * 03/22/2000 helena Creation. ********************************************************************** */ #include "unicode/brkiter.h" #include "unicode/schriter.h" #include "unicode/search.h" #include "usrchimp.h" #include "cmemory.h" // public constructors and destructors ----------------------------------- SearchIterator::SearchIterator(const SearchIterator &other) { if (other != *this) { m_breakiterator_ = other.m_breakiterator_; m_text_ = other.m_text_; m_search_ = (USearch *)uprv_malloc(sizeof(USearch)); m_search_->breakIter = other.m_search_->breakIter; m_search_->isCanonicalMatch = other.m_search_->isCanonicalMatch; m_search_->isOverlap = other.m_search_->isOverlap; m_search_->matchedIndex = other.m_search_->matchedIndex; m_search_->matchedLength = other.m_search_->matchedLength; m_search_->text = other.m_search_->text; m_search_->textLength = other.m_search_->textLength; } } SearchIterator::~SearchIterator() { if (m_search_ != NULL) { uprv_free(m_search_); } } // public get and set methods ---------------------------------------- void SearchIterator::setAttribute(USearchAttribute attribute, USearchAttributeValue value, UErrorCode &status) { if (U_SUCCESS(status)) { switch (attribute) { case USEARCH_OVERLAP : m_search_->isOverlap = (value == USEARCH_ON ? TRUE : FALSE); break; case USEARCH_CANONICAL_MATCH : m_search_->isCanonicalMatch = (value == USEARCH_ON ? TRUE : FALSE); break; default: status = U_ILLEGAL_ARGUMENT_ERROR; } } if (value == USEARCH_ATTRIBUTE_VALUE_COUNT) { status = U_ILLEGAL_ARGUMENT_ERROR; } } USearchAttributeValue SearchIterator::getAttribute( USearchAttribute attribute) const { switch (attribute) { case USEARCH_ATTRIBUTE_COUNT : return USEARCH_DEFAULT; case USEARCH_OVERLAP : return (m_search_->isOverlap == TRUE ? USEARCH_ON : USEARCH_OFF); case USEARCH_CANONICAL_MATCH : return (m_search_->isCanonicalMatch == TRUE ? USEARCH_ON : USEARCH_OFF); } return USEARCH_DEFAULT; } UTextOffset SearchIterator::getMatchedStart() const { return m_search_->matchedIndex; } int32_t SearchIterator::getMatchedLength() const { return m_search_->matchedLength; } void SearchIterator::getMatchedText(UnicodeString &result) const { UTextOffset matchedindex = m_search_->matchedIndex; int32_t matchedlength = m_search_->matchedLength; if (matchedindex != USEARCH_DONE && matchedlength != 0) { result.setTo(m_search_->text + matchedindex, matchedlength); } else { result.remove(); } } void SearchIterator::setBreakIterator(BreakIterator *breakiter, UErrorCode &status) { if (U_SUCCESS(status)) { m_search_->breakIter = NULL; // the c++ breakiterator may not make use of ubreakiterator. // so we'll have to keep track of it ourselves. m_breakiterator_ = breakiter; } } const BreakIterator * SearchIterator::getBreakIterator(void) const { return m_breakiterator_; } void SearchIterator::setText(const UnicodeString &text, UErrorCode &status) { if (U_SUCCESS(status)) { if (text.length() == 0) { status = U_ILLEGAL_ARGUMENT_ERROR; } else { m_text_ = text; m_search_->text = m_text_.fArray; } } } void SearchIterator::setText(CharacterIterator &text, UErrorCode &status) { if (U_SUCCESS(status)) { text.getText(m_text_); setText(m_text_, status); } } const UnicodeString & SearchIterator::getText(void) const { return m_text_; } // operator overloading ---------------------------------------------- UBool SearchIterator::operator==(const SearchIterator &that) const { if (this == &that) { return TRUE; } return (m_breakiterator_ == that.m_breakiterator_ && m_search_->isCanonicalMatch == that.m_search_->isCanonicalMatch && m_search_->isOverlap == that.m_search_->isOverlap && m_search_->matchedIndex == that.m_search_->matchedIndex && m_search_->matchedLength == that.m_search_->matchedLength && m_search_->textLength == that.m_search_->textLength && getOffset() == that.getOffset() && (uprv_memcmp(m_search_->text, that.m_search_->text, m_search_->textLength * sizeof(UChar)) == 0)); } // public methods ---------------------------------------------------- UTextOffset SearchIterator::first(UErrorCode &status) { setOffset(0, status); return handleNext(0, status); } UTextOffset SearchIterator::following(UTextOffset position, UErrorCode &status) { setOffset(position, status); return handleNext(position, status); } UTextOffset SearchIterator::last(UErrorCode &status) { setOffset(m_search_->textLength, status); return handlePrev(m_search_->textLength, status); } UTextOffset SearchIterator::preceding(UTextOffset position, UErrorCode &status) { setOffset(position, status); return handlePrev(position, status); } UTextOffset SearchIterator::next(UErrorCode &status) { if (U_SUCCESS(status)) { UTextOffset offset = getOffset(); UTextOffset matchindex = m_search_->matchedIndex; int32_t matchlength = m_search_->matchedLength; m_search_->reset = FALSE; if (m_search_->isForwardSearching == TRUE) { int32_t textlength = m_search_->textLength; if (offset == textlength || matchindex == textlength || (matchindex != USEARCH_DONE && matchindex + matchlength >= textlength)) { // not enough characters to match setMatchNotFound(); return USEARCH_DONE; } } else { // switching direction. // if matchedIndex == USEARCH_DONE, it means that either a // setOffset has been called or that previous ran off the text // string. the iterator would have been set to offset 0 if a // match is not found. m_search_->isForwardSearching = TRUE; if (m_search_->matchedIndex != USEARCH_DONE) { // there's no need to set the collation element iterator // the next call to next will set the offset. return matchindex; } } if (matchindex != USEARCH_DONE) { return handleNext(matchindex + matchlength, status); } return handleNext(offset, status); } return USEARCH_DONE; } UTextOffset SearchIterator::previous(UErrorCode &status) { if (U_SUCCESS(status)) { UTextOffset offset; if (m_search_->reset) { offset = m_search_->textLength; m_search_->isForwardSearching = FALSE; m_search_->reset = FALSE; } else { offset = getOffset(); } UTextOffset matchindex = m_search_->matchedIndex; if (m_search_->isForwardSearching == TRUE) { // switching direction. // if matchedIndex == USEARCH_DONE, it means that either a // setOffset has been called or that next ran off the text // string. the iterator would have been set to offset textLength if // a match is not found. m_search_->isForwardSearching = FALSE; if (matchindex != USEARCH_DONE) { return matchindex; } } else { if (offset == 0 || matchindex == 0) { // not enough characters to match setMatchNotFound(); return USEARCH_DONE; } } if (matchindex != USEARCH_DONE) { return handlePrev(matchindex, status); } return handlePrev(offset, status); } return USEARCH_DONE; } void SearchIterator::reset() { setMatchNotFound(); m_search_->isOverlap = FALSE; m_search_->isCanonicalMatch = FALSE; m_search_->isForwardSearching = TRUE; m_search_->reset = TRUE; } // protected constructors and destructors ----------------------------- SearchIterator::SearchIterator() : m_breakiterator_(NULL) { m_search_ = (USearch *)uprv_malloc(sizeof(USearch)); m_search_->breakIter = NULL; m_search_->isOverlap = FALSE; m_search_->isCanonicalMatch = FALSE; m_search_->isForwardSearching = TRUE; m_search_->reset = TRUE; m_search_->matchedIndex = USEARCH_DONE; m_search_->matchedLength = 0; m_search_->text = NULL; m_search_->textLength = 0; } SearchIterator::SearchIterator(const UnicodeString &text, BreakIterator *breakiter) : m_breakiterator_(breakiter), m_text_(text) { m_search_ = (USearch *)uprv_malloc(sizeof(USearch)); m_search_->breakIter = NULL; m_search_->isOverlap = FALSE; m_search_->isCanonicalMatch = FALSE; m_search_->isForwardSearching = TRUE; m_search_->reset = TRUE; m_search_->matchedIndex = USEARCH_DONE; m_search_->matchedLength = 0; m_search_->text = m_text_.fArray; m_search_->textLength = text.length(); } SearchIterator::SearchIterator(CharacterIterator &text, BreakIterator *breakiter) : m_breakiterator_(breakiter) { m_search_ = (USearch *)uprv_malloc(sizeof(USearch)); m_search_->breakIter = NULL; m_search_->isOverlap = FALSE; m_search_->isCanonicalMatch = FALSE; m_search_->isForwardSearching = TRUE; m_search_->reset = TRUE; m_search_->matchedIndex = USEARCH_DONE; m_search_->matchedLength = 0; text.getText(m_text_); m_search_->text = m_text_.fArray; m_search_->textLength = m_text_.length(); m_breakiterator_ = breakiter; } // protected methods ------------------------------------------------------ void SearchIterator::setMatchLength(int32_t length) { m_search_->matchedLength = length; } void SearchIterator::setMatchStart(UTextOffset position) { m_search_->matchedIndex = position; } void SearchIterator::setMatchNotFound() { setMatchStart(USEARCH_DONE); setMatchLength(0); UErrorCode status = U_ZERO_ERROR; // by default no errors should be returned here since offsets are within // range. if (m_search_->isForwardSearching) { setOffset(m_search_->textLength, status); } else { setOffset(0, status); } }