ICU-5766 Extended Grapheme Clusters for ICU4C
X-SVN-Rev: 21933
This commit is contained in:
parent
abbc83a287
commit
ca5d005978
@ -189,6 +189,15 @@ BreakIterator::createTitleInstance(const Locale& key, UErrorCode& status)
|
||||
|
||||
// -------------------------------------
|
||||
|
||||
// Creates a break iterator for Extended Grapheme Cluster breaks.
|
||||
BreakIterator* U_EXPORT2
|
||||
BreakIterator::createXGraphemeClusterInstance(const Locale& key, UErrorCode& status)
|
||||
{
|
||||
return createInstance(key, UBRK_X_GRAPHEME_CLUSTER, status);
|
||||
}
|
||||
|
||||
// -------------------------------------
|
||||
|
||||
// Gets all the available locales that has localized text boundary data.
|
||||
const Locale* U_EXPORT2
|
||||
BreakIterator::getAvailableLocales(int32_t& count)
|
||||
@ -424,6 +433,9 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
|
||||
case UBRK_TITLE:
|
||||
result = BreakIterator::buildInstance(loc, "title", kind, status);
|
||||
break;
|
||||
case UBRK_X_GRAPHEME_CLUSTER:
|
||||
result = BreakIterator::buildInstance(loc, "xgc", kind, status);
|
||||
break;
|
||||
default:
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
}
|
||||
|
@ -397,6 +397,22 @@ public:
|
||||
static BreakIterator* U_EXPORT2
|
||||
createTitleInstance(const Locale& where, UErrorCode& status);
|
||||
|
||||
/**
|
||||
* Create BreakIterator for Extended Grapheme Clusters using specified locale
|
||||
* Returns an instance of a BreakIterator for locating XGC booundaries
|
||||
* Extended Grapheme Clusters are combining character sequences and other
|
||||
* sequences that should remain unbroken when iterating over
|
||||
* "characters" from a user perspective.
|
||||
* @param loc the locale.
|
||||
* @param status Receive information regarding any errors or warnings that
|
||||
* occurred in creating the break iterator.
|
||||
* @return A BreakIterator for Extended Grapheme Clusters.
|
||||
* The caller owns the returned object and is responsible for deleting it.
|
||||
* @draft ICU 3.8
|
||||
*/
|
||||
static BreakIterator* U_EXPORT2
|
||||
createXGraphemeClusterInstance(const Locale& loc, UErrorCode& status);
|
||||
|
||||
/**
|
||||
* Get the set of Locales for which TextBoundaries are installed.
|
||||
* <p><b>Note:</b> this will not return locales added through the register
|
||||
|
@ -105,7 +105,9 @@ typedef enum UBreakIteratorType {
|
||||
*/
|
||||
UBRK_TITLE = 4,
|
||||
#endif /* U_HIDE_DEPRECATED_API */
|
||||
UBRK_COUNT = 5
|
||||
/** Extended Grapheme Cluster breaks @draft ICU 3.8 */
|
||||
UBRK_X_GRAPHEME_CLUSTER=6,
|
||||
UBRK_COUNT = 6
|
||||
} UBreakIteratorType;
|
||||
|
||||
/** Value indicating all text boundaries have been returned.
|
||||
|
@ -1,4 +1,4 @@
|
||||
// ***************************************************************************
|
||||
// ***************************************************************************
|
||||
// *
|
||||
// * Copyright (C) 2007 International Business Machines
|
||||
// * Corporation and others. All Rights Reserved.
|
||||
@ -14,6 +14,7 @@ root{
|
||||
sentence:process(dependency){"sent.brk"}
|
||||
title:process(dependency){"title.brk"}
|
||||
word:process(dependency){"word.brk"}
|
||||
xgc:process(dependency){"xgc.brk"}
|
||||
}
|
||||
dictionaries{
|
||||
Thai:process(dependency){"thaidict.ctd"}
|
||||
|
@ -1,6 +1,6 @@
|
||||
<?xml version="1.0" encoding="UTF-8" ?>
|
||||
<!--
|
||||
Copyright (c) 2006 International Business Machines Corporation and others. All rights reserved.
|
||||
Copyright (c) 2007 International Business Machines Corporation and others. All rights reserved.
|
||||
-->
|
||||
<!DOCTYPE ldml SYSTEM "http://www.unicode.org/cldr/dtd/1.4/ldml.dtd"
|
||||
[
|
||||
@ -22,6 +22,7 @@
|
||||
<icu:line icu:dependency="line.brk"/>
|
||||
<icu:sentence icu:dependency="sent.brk"/>
|
||||
<icu:title icu:dependency="title.brk"/>
|
||||
<icu:xgc icu:dependency="xgc.brk"/>
|
||||
</icu:boundaries>
|
||||
<icu:dictionaries>
|
||||
<icu:dictionary type="Thai" icu:dependency="thaidict.ctd"/>
|
||||
|
@ -411,6 +411,13 @@ void RBBIAPITest::TestIteration()
|
||||
}
|
||||
delete bi;
|
||||
|
||||
status=U_ZERO_ERROR;
|
||||
bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::
|
||||
createXGraphemeClusterInstance(Locale::getDefault(), status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
TEST_ASSERT(bi != NULL);
|
||||
delete bi;
|
||||
|
||||
status=U_ZERO_ERROR;
|
||||
bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
|
||||
if (U_FAILURE(status) || bi == NULL) {
|
||||
|
@ -1502,6 +1502,13 @@ void RBBITest::TestExtended() {
|
||||
charIdx += 6;
|
||||
break;
|
||||
}
|
||||
if (testString.compare(charIdx-1, 5, "<xgc>") == 0) {
|
||||
delete tp.bi;
|
||||
tp.bi = BreakIterator::createXGraphemeClusterInstance(locale, status);
|
||||
charIdx += 4;
|
||||
break;
|
||||
}
|
||||
|
||||
// <locale loc_name>
|
||||
localeMatcher.reset(testString);
|
||||
if (localeMatcher.lookingAt(charIdx-1, status)) {
|
||||
|
24
icu4c/source/test/testdata/rbbitst.txt
vendored
24
icu4c/source/test/testdata/rbbitst.txt
vendored
@ -1,4 +1,4 @@
|
||||
# Copyright (c) 2001-2006 International Business Machines
|
||||
# Copyright (c) 2001-2006 International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#
|
||||
# RBBI Test Data
|
||||
@ -91,6 +91,28 @@
|
||||
# Treat Japanese Half Width voicing marks as combining
|
||||
<data>•A\uff9e•B\uff9f\uff9e\uff9f•C•</data>
|
||||
|
||||
########################################################################################
|
||||
#
|
||||
#
|
||||
# Extended G r a p h e m e C l u s t e r T e s t s
|
||||
#
|
||||
#
|
||||
##########################################################################################
|
||||
<xgc>
|
||||
|
||||
# Plain Vanilla grapheme clusters
|
||||
<data>•a•b•c•</data>
|
||||
<data>•a\u0301\u0302• •b\u0303\u0304•</data>
|
||||
|
||||
# Assorted Hindi combining marks
|
||||
<data>•\u0904\u0903• •\u0937\u093E• •\u0904\u093F• •\u0937\u0940• •\u0937\u0949• •\u0937\u094A• •\u0937\u094B• •\u0937\u094C•</data>
|
||||
|
||||
# Thai Clusters
|
||||
# $Prepend $Extend* $PrependBase $Extend*;
|
||||
#
|
||||
<data>•\u0e40\u0e01•\u0e44\u0301\u0e23\u0302\u0303•\u0e40•\u0e40\u0e02•\u0e02• •</data>
|
||||
|
||||
|
||||
########################################################################################
|
||||
#
|
||||
#
|
||||
|
Loading…
Reference in New Issue
Block a user