2000-01-08 02:05:05 +00:00
|
|
|
/*
|
|
|
|
**********************************************************************
|
2000-01-14 00:13:59 +00:00
|
|
|
* Copyright (C) 1999-2000 IBM Corp. All rights reserved.
|
2000-01-08 02:05:05 +00:00
|
|
|
**********************************************************************
|
|
|
|
* Date Name Description
|
|
|
|
* 12/1/99 rgillam Complete port from Java.
|
2000-01-14 00:13:59 +00:00
|
|
|
* 01/13/2000 helena Added UErrorCode to ctors.
|
2000-01-08 02:05:05 +00:00
|
|
|
**********************************************************************
|
|
|
|
*/
|
|
|
|
|
2000-01-11 00:46:58 +00:00
|
|
|
#include "ucmp8.h"
|
2000-01-08 02:05:05 +00:00
|
|
|
#include "dbbi_tbl.h"
|
2000-01-10 21:21:52 +00:00
|
|
|
#include "unicode/dbbi.h"
|
2000-01-08 02:05:05 +00:00
|
|
|
|
|
|
|
//=======================================================================
|
|
|
|
// constructor
|
|
|
|
//=======================================================================
|
|
|
|
|
|
|
|
DictionaryBasedBreakIteratorTables::DictionaryBasedBreakIteratorTables(
|
2000-07-12 05:01:53 +00:00
|
|
|
UDataMemory* tablesMemory,
|
2000-01-14 00:13:59 +00:00
|
|
|
char* dictionaryFilename,
|
|
|
|
UErrorCode &status)
|
2000-07-12 05:01:53 +00:00
|
|
|
: RuleBasedBreakIteratorTables(tablesMemory),
|
2000-01-14 00:13:59 +00:00
|
|
|
dictionary(dictionaryFilename, status)
|
2000-01-08 02:05:05 +00:00
|
|
|
{
|
2000-07-12 05:01:53 +00:00
|
|
|
if(tablesMemory != 0) {
|
|
|
|
const void* tablesImage = udata_getMemory(tablesMemory);
|
|
|
|
if(tablesImage != 0) {
|
|
|
|
if (U_FAILURE(status)) return;
|
|
|
|
const int32_t* tablesIdx = (int32_t*) tablesImage;
|
|
|
|
const int8_t* dbbiImage = ((const int8_t*)tablesImage + tablesIdx[8]);
|
|
|
|
// we know the offset into the memory image where the DBBI stuff
|
|
|
|
// starts is stored in element 8 of the array. There should be
|
|
|
|
// a way for the RBBI constructor to give us this, but there's
|
|
|
|
// isn't a good one.
|
|
|
|
const int32_t* dbbiIdx = (const int32_t*)dbbiImage;
|
2000-01-08 02:05:05 +00:00
|
|
|
|
2000-07-12 05:01:53 +00:00
|
|
|
categoryFlags = (int8_t*)((const int8_t*)dbbiImage + (int32_t)dbbiIdx[0]);
|
|
|
|
}
|
|
|
|
}
|
2000-01-08 02:05:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
//=======================================================================
|
|
|
|
// boilerplate
|
|
|
|
//=======================================================================
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Destructor
|
|
|
|
*/
|
|
|
|
DictionaryBasedBreakIteratorTables::~DictionaryBasedBreakIteratorTables() {
|
|
|
|
if (ownTables)
|
|
|
|
delete [] categoryFlags;
|
|
|
|
}
|
|
|
|
|
|
|
|
int32_t
|
|
|
|
DictionaryBasedBreakIteratorTables::lookupCategory(UChar c,
|
|
|
|
BreakIterator* bi) const {
|
|
|
|
// this override of lookupCategory() exists only to keep track of whether we've
|
|
|
|
// passed over any dictionary characters. It calls the inherited lookupCategory()
|
|
|
|
// to do the real work, and then checks whether its return value is one of the
|
|
|
|
// categories represented in the dictionary. If it is, bump the dictionary-
|
|
|
|
// character count.
|
|
|
|
int32_t result = RuleBasedBreakIteratorTables::lookupCategory(c, bi);
|
|
|
|
if (result != RuleBasedBreakIterator::IGNORE && categoryFlags[result]) {
|
|
|
|
((DictionaryBasedBreakIterator*)bi)->bumpDictionaryCharCount();
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|