1999-08-16 21:50:52 +00:00
|
|
|
/*
|
1999-11-22 20:25:35 +00:00
|
|
|
*******************************************************************************
|
2001-03-21 20:31:13 +00:00
|
|
|
* Copyright (C) 1997-2001, International Business Machines Corporation and *
|
1999-11-22 20:25:35 +00:00
|
|
|
* others. All Rights Reserved. *
|
|
|
|
*******************************************************************************
|
1999-08-16 21:50:52 +00:00
|
|
|
*
|
|
|
|
* File TXTBDRY.CPP
|
|
|
|
*
|
|
|
|
* Modification History:
|
|
|
|
*
|
|
|
|
* Date Name Description
|
|
|
|
* 02/18/97 aliu Converted from OpenClass. Added DONE.
|
2000-01-14 00:13:59 +00:00
|
|
|
* 01/13/2000 helena Added UErrorCode parameter to createXXXInstance methods.
|
1999-08-16 21:50:52 +00:00
|
|
|
*****************************************************************************************
|
|
|
|
*/
|
|
|
|
|
|
|
|
// *****************************************************************************
|
|
|
|
// This file was generated from the java source file BreakIterator.java
|
|
|
|
// *****************************************************************************
|
|
|
|
|
2000-01-10 21:21:52 +00:00
|
|
|
#include "unicode/dbbi.h"
|
1999-12-28 23:57:50 +00:00
|
|
|
#include "unicode/brkiter.h"
|
2000-01-08 02:05:05 +00:00
|
|
|
#include "unicode/udata.h"
|
2000-01-10 21:21:52 +00:00
|
|
|
#include "unicode/resbund.h"
|
2000-04-15 21:23:28 +00:00
|
|
|
#include "cstring.h"
|
1999-08-16 21:50:52 +00:00
|
|
|
|
|
|
|
// *****************************************************************************
|
|
|
|
// class BreakIterator
|
2002-08-01 16:17:41 +00:00
|
|
|
// This class implements methods for finding the location of boundaries in text.
|
1999-08-16 21:50:52 +00:00
|
|
|
// Instances of BreakIterator maintain a current position and scan over text
|
|
|
|
// returning the index of characters where boundaries occur.
|
|
|
|
// *****************************************************************************
|
|
|
|
|
2001-10-08 23:26:58 +00:00
|
|
|
U_NAMESPACE_BEGIN
|
|
|
|
|
2002-03-12 01:32:42 +00:00
|
|
|
const int32_t BreakIterator::DONE = (int32_t)-1;
|
1999-08-16 21:50:52 +00:00
|
|
|
|
|
|
|
// -------------------------------------
|
|
|
|
|
2002-08-08 00:39:13 +00:00
|
|
|
// Creates a break iterator for word breaks.
|
1999-08-16 21:50:52 +00:00
|
|
|
BreakIterator*
|
2000-01-14 00:13:59 +00:00
|
|
|
BreakIterator::createWordInstance(const Locale& key, UErrorCode& status)
|
1999-08-16 21:50:52 +00:00
|
|
|
{
|
2000-01-08 02:05:05 +00:00
|
|
|
// WARNING: This routine is currently written specifically to handle only the
|
|
|
|
// default rules files and the alternate rules files for Thai. This function
|
|
|
|
// will have to be made fully general at some time in the future!
|
|
|
|
BreakIterator* result = NULL;
|
|
|
|
const char* filename = "word";
|
|
|
|
|
2001-10-09 22:57:29 +00:00
|
|
|
if (U_FAILURE(status))
|
|
|
|
return NULL;
|
2002-08-08 00:39:13 +00:00
|
|
|
|
2000-04-15 21:23:28 +00:00
|
|
|
if (!uprv_strcmp(key.getLanguage(), "th"))
|
|
|
|
{
|
2000-01-08 02:05:05 +00:00
|
|
|
filename = "word_th";
|
|
|
|
}
|
|
|
|
|
2000-01-14 00:13:59 +00:00
|
|
|
UDataMemory* file = udata_open(NULL, "brk", filename, &status);
|
2002-08-08 00:39:13 +00:00
|
|
|
if (U_FAILURE(status)) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
// The UDataMemory is adopted by the break iterator.
|
2000-01-08 02:05:05 +00:00
|
|
|
|
2002-08-08 00:39:13 +00:00
|
|
|
if(!uprv_strcmp(filename, "word_th")) {
|
|
|
|
filename = "thaidict.brk";
|
|
|
|
result = new DictionaryBasedBreakIterator(file, filename, status);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
result = new RuleBasedBreakIterator(file, status);
|
|
|
|
}
|
|
|
|
if (result == NULL) {
|
|
|
|
udata_close(file);
|
|
|
|
status = U_MEMORY_ALLOCATION_ERROR;
|
|
|
|
}
|
|
|
|
if (U_FAILURE(status)) { // Sometimes redundant check, but simple.
|
|
|
|
delete result;
|
|
|
|
result = NULL;
|
2000-01-08 02:05:05 +00:00
|
|
|
}
|
2001-10-09 22:57:29 +00:00
|
|
|
|
2000-01-08 02:05:05 +00:00
|
|
|
return result;
|
1999-08-16 21:50:52 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// -------------------------------------
|
|
|
|
|
2002-08-08 00:39:13 +00:00
|
|
|
// Creates a break iterator for line breaks.
|
1999-08-16 21:50:52 +00:00
|
|
|
BreakIterator*
|
2000-01-14 00:13:59 +00:00
|
|
|
BreakIterator::createLineInstance(const Locale& key, UErrorCode& status)
|
1999-08-16 21:50:52 +00:00
|
|
|
{
|
2000-01-08 02:05:05 +00:00
|
|
|
// WARNING: This routine is currently written specifically to handle only the
|
|
|
|
// default rules files and the alternate rules files for Thai. This function
|
|
|
|
// will have to be made fully general at some time in the future!
|
|
|
|
BreakIterator* result = NULL;
|
|
|
|
const char* filename = "line";
|
|
|
|
|
2001-10-09 22:57:29 +00:00
|
|
|
if (U_FAILURE(status))
|
|
|
|
return NULL;
|
2002-08-08 00:39:13 +00:00
|
|
|
|
2000-04-15 21:23:28 +00:00
|
|
|
if (!uprv_strcmp(key.getLanguage(), "th"))
|
|
|
|
{
|
2000-01-08 02:05:05 +00:00
|
|
|
filename = "line_th";
|
|
|
|
}
|
|
|
|
|
2000-01-14 00:13:59 +00:00
|
|
|
UDataMemory* file = udata_open(NULL, "brk", filename, &status);
|
2002-08-08 00:39:13 +00:00
|
|
|
if (U_FAILURE(status)) {
|
|
|
|
return NULL;
|
2000-01-08 02:05:05 +00:00
|
|
|
}
|
2002-08-08 00:39:13 +00:00
|
|
|
// The UDataMemory is adopted by the break iterator.
|
2000-06-26 22:46:15 +00:00
|
|
|
|
2002-08-08 00:39:13 +00:00
|
|
|
if (!uprv_strcmp(key.getLanguage(), "th")) {
|
|
|
|
filename = "thaidict.brk";
|
|
|
|
result = new DictionaryBasedBreakIterator(file, filename, status);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
result = new RuleBasedBreakIterator(file, status);
|
|
|
|
}
|
|
|
|
if (result == NULL) {
|
|
|
|
udata_close(file);
|
|
|
|
status = U_MEMORY_ALLOCATION_ERROR;
|
|
|
|
}
|
|
|
|
if (U_FAILURE(status)) { // Sometimes redundant check, but simple.
|
|
|
|
delete result;
|
|
|
|
result = NULL;
|
|
|
|
}
|
2000-01-08 02:05:05 +00:00
|
|
|
return result;
|
1999-08-16 21:50:52 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// -------------------------------------
|
|
|
|
|
2002-08-08 00:39:13 +00:00
|
|
|
// Creates a break iterator for character breaks.
|
1999-08-16 21:50:52 +00:00
|
|
|
BreakIterator*
|
2002-07-24 19:07:37 +00:00
|
|
|
BreakIterator::createCharacterInstance(const Locale& /* key */, UErrorCode& status)
|
1999-08-16 21:50:52 +00:00
|
|
|
{
|
2000-01-08 02:05:05 +00:00
|
|
|
// WARNING: This routine is currently written specifically to handle only the
|
|
|
|
// default rules files and the alternate rules files for Thai. This function
|
|
|
|
// will have to be made fully general at some time in the future!
|
|
|
|
BreakIterator* result = NULL;
|
2001-10-09 22:57:29 +00:00
|
|
|
static const char filename[] = "char";
|
2000-01-08 02:05:05 +00:00
|
|
|
|
2001-10-09 22:57:29 +00:00
|
|
|
if (U_FAILURE(status))
|
|
|
|
return NULL;
|
2000-01-14 00:13:59 +00:00
|
|
|
UDataMemory* file = udata_open(NULL, "brk", filename, &status);
|
2002-08-08 00:39:13 +00:00
|
|
|
if (U_FAILURE(status)) {
|
|
|
|
return NULL;
|
2000-01-08 02:05:05 +00:00
|
|
|
}
|
2002-08-08 00:39:13 +00:00
|
|
|
// The UDataMemory is adopted by the break iterator.
|
2000-06-26 22:46:15 +00:00
|
|
|
|
2002-08-08 00:39:13 +00:00
|
|
|
result = new RuleBasedBreakIterator(file, status);
|
|
|
|
if (result == NULL) {
|
|
|
|
udata_close(file);
|
|
|
|
status = U_MEMORY_ALLOCATION_ERROR;
|
|
|
|
}
|
|
|
|
if (U_FAILURE(status)) { // Sometimes redundant check, but simple.
|
|
|
|
delete result;
|
|
|
|
result = NULL;
|
|
|
|
}
|
2000-01-08 02:05:05 +00:00
|
|
|
return result;
|
1999-08-16 21:50:52 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// -------------------------------------
|
|
|
|
|
2002-08-08 00:39:13 +00:00
|
|
|
// Creates a break iterator for sentence breaks.
|
1999-08-16 21:50:52 +00:00
|
|
|
BreakIterator*
|
2002-07-24 19:07:37 +00:00
|
|
|
BreakIterator::createSentenceInstance(const Locale& /*key */, UErrorCode& status)
|
1999-08-16 21:50:52 +00:00
|
|
|
{
|
2000-01-08 02:05:05 +00:00
|
|
|
// WARNING: This routine is currently written specifically to handle only the
|
|
|
|
// default rules files and the alternate rules files for Thai. This function
|
|
|
|
// will have to be made fully general at some time in the future!
|
|
|
|
BreakIterator* result = NULL;
|
2001-10-09 22:57:29 +00:00
|
|
|
static const char filename[] = "sent";
|
2000-01-08 02:05:05 +00:00
|
|
|
|
2001-10-09 22:57:29 +00:00
|
|
|
if (U_FAILURE(status))
|
|
|
|
return NULL;
|
2000-01-14 00:13:59 +00:00
|
|
|
UDataMemory* file = udata_open(NULL, "brk", filename, &status);
|
2002-08-08 00:39:13 +00:00
|
|
|
if (U_FAILURE(status)) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
// The UDataMemory is adopted by the break iterator.
|
2000-01-08 02:05:05 +00:00
|
|
|
|
2002-08-08 00:39:13 +00:00
|
|
|
result = new RuleBasedBreakIterator(file, status);
|
|
|
|
if (result == NULL) {
|
|
|
|
udata_close(file);
|
|
|
|
status = U_MEMORY_ALLOCATION_ERROR;
|
|
|
|
}
|
|
|
|
if (U_FAILURE(status)) { // Sometimes redundant check, but simple.
|
|
|
|
delete result;
|
|
|
|
result = NULL;
|
2000-01-08 02:05:05 +00:00
|
|
|
}
|
2001-10-09 22:57:29 +00:00
|
|
|
|
2000-01-08 02:05:05 +00:00
|
|
|
return result;
|
1999-08-16 21:50:52 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// -------------------------------------
|
|
|
|
|
2002-08-08 00:39:13 +00:00
|
|
|
// Creates a break iterator for title casing breaks.
|
2002-02-28 01:28:04 +00:00
|
|
|
BreakIterator*
|
2002-07-24 19:07:37 +00:00
|
|
|
BreakIterator::createTitleInstance(const Locale& /* key */, UErrorCode& status)
|
2002-02-28 01:28:04 +00:00
|
|
|
{
|
|
|
|
// WARNING: This routine is currently written specifically to handle only the
|
2002-08-01 16:17:41 +00:00
|
|
|
// default rules files. This function will have to be made fully general
|
2002-02-28 01:28:04 +00:00
|
|
|
// at some time in the future!
|
|
|
|
BreakIterator* result = NULL;
|
|
|
|
static const char filename[] = "title";
|
|
|
|
|
|
|
|
if (U_FAILURE(status))
|
|
|
|
return NULL;
|
|
|
|
UDataMemory* file = udata_open(NULL, "brk", filename, &status);
|
2002-08-08 00:39:13 +00:00
|
|
|
if (U_FAILURE(status)) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
// The UDataMemory is adopted by the break iterator.
|
2002-02-28 01:28:04 +00:00
|
|
|
|
2002-08-08 00:39:13 +00:00
|
|
|
result = new RuleBasedBreakIterator(file, status);
|
|
|
|
if (result == NULL) {
|
|
|
|
udata_close(file);
|
|
|
|
status = U_MEMORY_ALLOCATION_ERROR;
|
|
|
|
}
|
|
|
|
if (U_FAILURE(status)) { // Sometimes redundant check, but simple.
|
|
|
|
delete result;
|
|
|
|
result = NULL;
|
2002-02-28 01:28:04 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
// -------------------------------------
|
|
|
|
|
1999-08-16 21:50:52 +00:00
|
|
|
// Gets all the available locales that has localized text boundary data.
|
|
|
|
const Locale*
|
|
|
|
BreakIterator::getAvailableLocales(int32_t& count)
|
|
|
|
{
|
|
|
|
return Locale::getAvailableLocales(count);
|
|
|
|
}
|
|
|
|
|
|
|
|
// -------------------------------------
|
|
|
|
// Gets the objectLocale display name in the default locale language.
|
|
|
|
UnicodeString&
|
|
|
|
BreakIterator::getDisplayName(const Locale& objectLocale,
|
|
|
|
UnicodeString& name)
|
|
|
|
{
|
|
|
|
return objectLocale.getDisplayName(name);
|
|
|
|
}
|
|
|
|
|
|
|
|
// -------------------------------------
|
|
|
|
// Gets the objectLocale display name in the displayLocale language.
|
|
|
|
UnicodeString&
|
|
|
|
BreakIterator::getDisplayName(const Locale& objectLocale,
|
|
|
|
const Locale& displayLocale,
|
|
|
|
UnicodeString& name)
|
|
|
|
{
|
|
|
|
return objectLocale.getDisplayName(displayLocale, name);
|
|
|
|
}
|
|
|
|
|
2002-08-08 00:39:13 +00:00
|
|
|
// ------------------------------------------
|
|
|
|
//
|
|
|
|
// Default constructor and destructor
|
|
|
|
//
|
|
|
|
//-------------------------------------------
|
1999-08-16 21:50:52 +00:00
|
|
|
BreakIterator::BreakIterator()
|
|
|
|
{
|
2001-02-21 23:40:41 +00:00
|
|
|
fBufferClone = FALSE;
|
1999-08-16 21:50:52 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
BreakIterator::~BreakIterator()
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2001-10-08 23:26:58 +00:00
|
|
|
U_NAMESPACE_END
|
|
|
|
|
1999-08-16 21:50:52 +00:00
|
|
|
//eof
|