1999-08-16 21:50:52 +00:00
|
|
|
/*
|
1999-11-22 20:25:35 +00:00
|
|
|
*******************************************************************************
|
2001-03-21 20:31:13 +00:00
|
|
|
* Copyright (C) 1997-2001, International Business Machines Corporation and *
|
1999-11-22 20:25:35 +00:00
|
|
|
* others. All Rights Reserved. *
|
|
|
|
*******************************************************************************
|
1999-08-16 21:50:52 +00:00
|
|
|
*
|
|
|
|
* File TXTBDRY.CPP
|
|
|
|
*
|
|
|
|
* Modification History:
|
|
|
|
*
|
|
|
|
* Date Name Description
|
|
|
|
* 02/18/97 aliu Converted from OpenClass. Added DONE.
|
2000-01-14 00:13:59 +00:00
|
|
|
* 01/13/2000 helena Added UErrorCode parameter to createXXXInstance methods.
|
1999-08-16 21:50:52 +00:00
|
|
|
*****************************************************************************************
|
|
|
|
*/
|
|
|
|
|
|
|
|
// *****************************************************************************
|
|
|
|
// This file was generated from the java source file BreakIterator.java
|
|
|
|
// *****************************************************************************
|
|
|
|
|
2000-01-10 21:21:52 +00:00
|
|
|
#include "unicode/dbbi.h"
|
1999-12-28 23:57:50 +00:00
|
|
|
#include "unicode/brkiter.h"
|
2000-01-08 02:05:05 +00:00
|
|
|
#include "unicode/udata.h"
|
2000-01-10 21:21:52 +00:00
|
|
|
#include "unicode/resbund.h"
|
2000-04-15 21:23:28 +00:00
|
|
|
#include "cstring.h"
|
1999-08-16 21:50:52 +00:00
|
|
|
#include <string.h>
|
|
|
|
|
|
|
|
// *****************************************************************************
|
|
|
|
// class BreakIterator
|
|
|
|
// This class implements methods for finding the location of boundaries in text.
|
|
|
|
// Instances of BreakIterator maintain a current position and scan over text
|
|
|
|
// returning the index of characters where boundaries occur.
|
|
|
|
// *****************************************************************************
|
|
|
|
|
|
|
|
const UTextOffset BreakIterator::DONE = (int32_t)-1;
|
|
|
|
|
|
|
|
// -------------------------------------
|
|
|
|
|
|
|
|
// Creates a simple text boundary for word breaks.
|
|
|
|
BreakIterator*
|
2000-01-14 00:13:59 +00:00
|
|
|
BreakIterator::createWordInstance(const Locale& key, UErrorCode& status)
|
1999-08-16 21:50:52 +00:00
|
|
|
{
|
2000-01-08 02:05:05 +00:00
|
|
|
// WARNING: This routine is currently written specifically to handle only the
|
|
|
|
// default rules files and the alternate rules files for Thai. This function
|
|
|
|
// will have to be made fully general at some time in the future!
|
|
|
|
BreakIterator* result = NULL;
|
|
|
|
const char* filename = "word";
|
|
|
|
|
|
|
|
UnicodeString temp;
|
2000-01-14 00:13:59 +00:00
|
|
|
if (U_FAILURE(status)) return NULL;
|
2000-04-15 21:23:28 +00:00
|
|
|
if (!uprv_strcmp(key.getLanguage(), "th"))
|
|
|
|
{
|
2000-01-08 02:05:05 +00:00
|
|
|
filename = "word_th";
|
|
|
|
}
|
|
|
|
|
2000-01-14 00:13:59 +00:00
|
|
|
UDataMemory* file = udata_open(NULL, "brk", filename, &status);
|
2000-01-08 02:05:05 +00:00
|
|
|
|
2000-01-14 00:13:59 +00:00
|
|
|
if (!U_FAILURE(status)) {
|
2000-01-08 02:05:05 +00:00
|
|
|
|
2000-04-15 21:23:28 +00:00
|
|
|
if(!uprv_strcmp(filename, "word_th")) {
|
2000-01-08 02:05:05 +00:00
|
|
|
filename = "thaidict.brk";
|
2000-07-10 20:16:27 +00:00
|
|
|
|
2000-07-12 05:01:53 +00:00
|
|
|
result = new DictionaryBasedBreakIterator(file, (char *)filename, status);
|
2000-01-08 02:05:05 +00:00
|
|
|
}
|
|
|
|
else {
|
2000-07-12 05:01:53 +00:00
|
|
|
result = new RuleBasedBreakIterator(file);
|
2000-01-08 02:05:05 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return result;
|
1999-08-16 21:50:52 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// -------------------------------------
|
|
|
|
|
|
|
|
// Creates a simple text boundary for line breaks.
|
|
|
|
BreakIterator*
|
2000-01-14 00:13:59 +00:00
|
|
|
BreakIterator::createLineInstance(const Locale& key, UErrorCode& status)
|
1999-08-16 21:50:52 +00:00
|
|
|
{
|
2000-01-08 02:05:05 +00:00
|
|
|
// WARNING: This routine is currently written specifically to handle only the
|
|
|
|
// default rules files and the alternate rules files for Thai. This function
|
|
|
|
// will have to be made fully general at some time in the future!
|
|
|
|
BreakIterator* result = NULL;
|
|
|
|
const char* filename = "line";
|
|
|
|
|
|
|
|
UnicodeString temp;
|
2000-01-14 00:13:59 +00:00
|
|
|
if (U_FAILURE(status)) return NULL;
|
2000-04-15 21:23:28 +00:00
|
|
|
if (!uprv_strcmp(key.getLanguage(), "th"))
|
|
|
|
{
|
2000-01-08 02:05:05 +00:00
|
|
|
filename = "line_th";
|
|
|
|
}
|
|
|
|
|
2000-01-14 00:13:59 +00:00
|
|
|
UDataMemory* file = udata_open(NULL, "brk", filename, &status);
|
2000-01-08 02:05:05 +00:00
|
|
|
|
2000-01-14 00:13:59 +00:00
|
|
|
if (!U_FAILURE(status)) {
|
2000-01-08 02:05:05 +00:00
|
|
|
|
2000-04-15 21:23:28 +00:00
|
|
|
if (!uprv_strcmp(key.getLanguage(), "th")) {
|
2000-01-08 02:05:05 +00:00
|
|
|
const char* dataDir = u_getDataDirectory();
|
|
|
|
filename = "thaidict.brk";
|
2000-07-12 05:01:53 +00:00
|
|
|
result = new DictionaryBasedBreakIterator(file, (char *)filename, status);
|
2000-01-08 02:05:05 +00:00
|
|
|
}
|
|
|
|
else {
|
2000-07-12 05:01:53 +00:00
|
|
|
result = new RuleBasedBreakIterator(file);
|
2000-01-08 02:05:05 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2000-06-26 22:46:15 +00:00
|
|
|
|
2000-01-08 02:05:05 +00:00
|
|
|
return result;
|
1999-08-16 21:50:52 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// -------------------------------------
|
|
|
|
|
|
|
|
// Creates a simple text boundary for character breaks.
|
|
|
|
BreakIterator*
|
2000-01-14 00:13:59 +00:00
|
|
|
BreakIterator::createCharacterInstance(const Locale& key, UErrorCode& status)
|
1999-08-16 21:50:52 +00:00
|
|
|
{
|
2000-01-08 02:05:05 +00:00
|
|
|
// WARNING: This routine is currently written specifically to handle only the
|
|
|
|
// default rules files and the alternate rules files for Thai. This function
|
|
|
|
// will have to be made fully general at some time in the future!
|
|
|
|
BreakIterator* result = NULL;
|
|
|
|
const char* filename = "char";
|
|
|
|
|
2000-01-14 00:13:59 +00:00
|
|
|
if (U_FAILURE(status)) return NULL;
|
|
|
|
UDataMemory* file = udata_open(NULL, "brk", filename, &status);
|
2000-01-08 02:05:05 +00:00
|
|
|
|
2000-01-14 00:13:59 +00:00
|
|
|
if (!U_FAILURE(status)) {
|
2000-07-12 05:01:53 +00:00
|
|
|
result = new RuleBasedBreakIterator(file);
|
2000-01-08 02:05:05 +00:00
|
|
|
}
|
|
|
|
|
2000-06-26 22:46:15 +00:00
|
|
|
|
2000-01-08 02:05:05 +00:00
|
|
|
return result;
|
1999-08-16 21:50:52 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// -------------------------------------
|
|
|
|
|
|
|
|
// Creates a simple text boundary for sentence breaks.
|
|
|
|
BreakIterator*
|
2000-01-14 00:13:59 +00:00
|
|
|
BreakIterator::createSentenceInstance(const Locale& key, UErrorCode& status)
|
1999-08-16 21:50:52 +00:00
|
|
|
{
|
2000-01-08 02:05:05 +00:00
|
|
|
// WARNING: This routine is currently written specifically to handle only the
|
|
|
|
// default rules files and the alternate rules files for Thai. This function
|
|
|
|
// will have to be made fully general at some time in the future!
|
|
|
|
BreakIterator* result = NULL;
|
|
|
|
const char* filename = "sent";
|
|
|
|
|
2000-01-14 00:13:59 +00:00
|
|
|
if (U_FAILURE(status)) return NULL;
|
|
|
|
UDataMemory* file = udata_open(NULL, "brk", filename, &status);
|
2000-01-08 02:05:05 +00:00
|
|
|
|
2000-01-14 00:13:59 +00:00
|
|
|
if (!U_FAILURE(status)) {
|
2000-07-12 05:01:53 +00:00
|
|
|
result = new RuleBasedBreakIterator(file);
|
2000-01-08 02:05:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return result;
|
1999-08-16 21:50:52 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// -------------------------------------
|
|
|
|
|
|
|
|
// Gets all the available locales that has localized text boundary data.
|
|
|
|
const Locale*
|
|
|
|
BreakIterator::getAvailableLocales(int32_t& count)
|
|
|
|
{
|
|
|
|
return Locale::getAvailableLocales(count);
|
|
|
|
}
|
|
|
|
|
|
|
|
// -------------------------------------
|
|
|
|
// Gets the objectLocale display name in the default locale language.
|
|
|
|
UnicodeString&
|
|
|
|
BreakIterator::getDisplayName(const Locale& objectLocale,
|
|
|
|
UnicodeString& name)
|
|
|
|
{
|
|
|
|
return objectLocale.getDisplayName(name);
|
|
|
|
}
|
|
|
|
|
|
|
|
// -------------------------------------
|
|
|
|
// Gets the objectLocale display name in the displayLocale language.
|
|
|
|
UnicodeString&
|
|
|
|
BreakIterator::getDisplayName(const Locale& objectLocale,
|
|
|
|
const Locale& displayLocale,
|
|
|
|
UnicodeString& name)
|
|
|
|
{
|
|
|
|
return objectLocale.getDisplayName(displayLocale, name);
|
|
|
|
}
|
|
|
|
|
|
|
|
// -------------------------------------
|
|
|
|
|
|
|
|
// Needed because we declare the copy constructor (in order to prevent synthesizing one) and
|
|
|
|
// so the default constructor is no longer synthesized.
|
|
|
|
|
|
|
|
BreakIterator::BreakIterator()
|
|
|
|
{
|
2001-02-21 23:40:41 +00:00
|
|
|
fBufferClone = FALSE;
|
1999-08-16 21:50:52 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
BreakIterator::~BreakIterator()
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
//eof
|