/* ********************************************************************** * Copyright (C) 1999-2000 IBM Corp. All rights reserved. ********************************************************************** * Date Name Description * 12/1/99 rgillam Complete port from Java. * 01/13/2000 helena Added UErrorCode to ctors. ********************************************************************** */ #include "ucmp8.h" #include "dbbi_tbl.h" #include "unicode/dbbi.h" U_NAMESPACE_BEGIN //======================================================================= // constructor //======================================================================= DictionaryBasedBreakIteratorTables::DictionaryBasedBreakIteratorTables( UDataMemory* tablesMemory, const char* dictionaryFilename, UErrorCode &status) : RuleBasedBreakIteratorTables(tablesMemory), dictionary(dictionaryFilename, status) { if(tablesMemory != 0) { const void* tablesImage = udata_getMemory(tablesMemory); if(tablesImage != 0) { if (U_FAILURE(status)) return; const int32_t* tablesIdx = (int32_t*) tablesImage; const int8_t* dbbiImage = ((const int8_t*)tablesImage + tablesIdx[8]); // we know the offset into the memory image where the DBBI stuff // starts is stored in element 8 of the array. There should be // a way for the RBBI constructor to give us this, but there's // isn't a good one. const int32_t* dbbiIdx = (const int32_t*)dbbiImage; categoryFlags = (int8_t*)((const int8_t*)dbbiImage + (int32_t)dbbiIdx[0]); } } } //======================================================================= // boilerplate //======================================================================= /** * Destructor */ DictionaryBasedBreakIteratorTables::~DictionaryBasedBreakIteratorTables() { if (ownTables) delete [] categoryFlags; } int32_t DictionaryBasedBreakIteratorTables::lookupCategory(UChar c, BreakIterator* bi) const { // this override of lookupCategory() exists only to keep track of whether we've // passed over any dictionary characters. It calls the inherited lookupCategory() // to do the real work, and then checks whether its return value is one of the // categories represented in the dictionary. If it is, bump the dictionary- // character count. int32_t result = RuleBasedBreakIteratorTables::lookupCategory(c, bi); if (result != RuleBasedBreakIterator::UBRK_IGNORE && categoryFlags[result]) { ((DictionaryBasedBreakIterator*)bi)->bumpDictionaryCharCount(); } return result; } U_NAMESPACE_END /* eof */