ICU-20991 Trace BreakIterator/BreakEngine creation

See #1014
This commit is contained in:
Frank Tang 2020-03-06 19:17:02 +00:00 committed by Frank Yung-Fong Tang
parent 01523b4da6
commit 94c9ff2089
7 changed files with 319 additions and 9 deletions

View File

@ -38,6 +38,7 @@
#include "uresimp.h"
#include "uassert.h"
#include "ubrkimpl.h"
#include "utracimp.h"
#include "charstr.h"
// *****************************************************************************
@ -412,14 +413,23 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
BreakIterator *result = NULL;
switch (kind) {
case UBRK_CHARACTER:
result = BreakIterator::buildInstance(loc, "grapheme", status);
{
UTRACE_ENTRY(UTRACE_UBRK_CREATE_CHARACTER);
result = BreakIterator::buildInstance(loc, "grapheme", status);
UTRACE_EXIT_STATUS(status);
}
break;
case UBRK_WORD:
result = BreakIterator::buildInstance(loc, "word", status);
{
UTRACE_ENTRY(UTRACE_UBRK_CREATE_WORD);
result = BreakIterator::buildInstance(loc, "word", status);
UTRACE_EXIT_STATUS(status);
}
break;
case UBRK_LINE:
uprv_strcpy(lbType, "line");
{
UTRACE_ENTRY(UTRACE_UBRK_CREATE_LINE);
uprv_strcpy(lbType, "line");
char lbKeyValue[kKeyValueLenMax] = {0};
UErrorCode kvStatus = U_ZERO_ERROR;
int32_t kLen = loc.getKeywordValue("lb", lbKeyValue, kKeyValueLenMax, kvStatus);
@ -427,13 +437,17 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
uprv_strcat(lbType, "_");
uprv_strcat(lbType, lbKeyValue);
}
result = BreakIterator::buildInstance(loc, lbType, status);
UTRACE_DATA1(UTRACE_INFO, "lb=%s", lbKeyValue);
UTRACE_EXIT_STATUS(status);
}
result = BreakIterator::buildInstance(loc, lbType, status);
break;
case UBRK_SENTENCE:
result = BreakIterator::buildInstance(loc, "sentence", status);
#if !UCONFIG_NO_FILTERED_BREAK_ITERATION
{
UTRACE_ENTRY(UTRACE_UBRK_CREATE_SENTENCE);
result = BreakIterator::buildInstance(loc, "sentence", status);
#if !UCONFIG_NO_FILTERED_BREAK_ITERATION
char ssKeyValue[kKeyValueLenMax] = {0};
UErrorCode kvStatus = U_ZERO_ERROR;
int32_t kLen = loc.getKeywordValue("ss", ssKeyValue, kKeyValueLenMax, kvStatus);
@ -444,11 +458,16 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
delete fbiBuilder;
}
}
}
#endif
UTRACE_EXIT_STATUS(status);
}
break;
case UBRK_TITLE:
result = BreakIterator::buildInstance(loc, "title", status);
{
UTRACE_ENTRY(UTRACE_UBRK_CREATE_TITLE);
result = BreakIterator::buildInstance(loc, "title", status);
UTRACE_EXIT_STATUS(status);
}
break;
default:
status = U_ILLEGAL_ARGUMENT_ERROR;

View File

@ -18,6 +18,7 @@
#include "unicode/uniset.h"
#include "unicode/chariter.h"
#include "unicode/ubrk.h"
#include "utracimp.h"
#include "uvectr32.h"
#include "uvector.h"
#include "uassert.h"
@ -194,6 +195,8 @@ ThaiBreakEngine::ThaiBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode
: DictionaryBreakEngine(),
fDictionary(adoptDictionary)
{
UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE);
UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Thai");
fThaiWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Thai:]&[:LineBreak=SA:]]"), status);
if (U_SUCCESS(status)) {
setCharacters(fThaiWordSet);
@ -213,6 +216,7 @@ ThaiBreakEngine::ThaiBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode
fEndWordSet.compact();
fBeginWordSet.compact();
fSuffixSet.compact();
UTRACE_EXIT_STATUS(status);
}
ThaiBreakEngine::~ThaiBreakEngine() {
@ -436,6 +440,8 @@ LaoBreakEngine::LaoBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &s
: DictionaryBreakEngine(),
fDictionary(adoptDictionary)
{
UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE);
UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Laoo");
fLaoWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Laoo:]&[:LineBreak=SA:]]"), status);
if (U_SUCCESS(status)) {
setCharacters(fLaoWordSet);
@ -452,6 +458,7 @@ LaoBreakEngine::LaoBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &s
fMarkSet.compact();
fEndWordSet.compact();
fBeginWordSet.compact();
UTRACE_EXIT_STATUS(status);
}
LaoBreakEngine::~LaoBreakEngine() {
@ -632,6 +639,8 @@ BurmeseBreakEngine::BurmeseBreakEngine(DictionaryMatcher *adoptDictionary, UErro
: DictionaryBreakEngine(),
fDictionary(adoptDictionary)
{
UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE);
UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Mymr");
fBurmeseWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Mymr:]&[:LineBreak=SA:]]"), status);
if (U_SUCCESS(status)) {
setCharacters(fBurmeseWordSet);
@ -645,6 +654,7 @@ BurmeseBreakEngine::BurmeseBreakEngine(DictionaryMatcher *adoptDictionary, UErro
fMarkSet.compact();
fEndWordSet.compact();
fBeginWordSet.compact();
UTRACE_EXIT_STATUS(status);
}
BurmeseBreakEngine::~BurmeseBreakEngine() {
@ -825,6 +835,8 @@ KhmerBreakEngine::KhmerBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCod
: DictionaryBreakEngine(),
fDictionary(adoptDictionary)
{
UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE);
UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Khmr");
fKhmerWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Khmr:]&[:LineBreak=SA:]]"), status);
if (U_SUCCESS(status)) {
setCharacters(fKhmerWordSet);
@ -850,6 +862,7 @@ KhmerBreakEngine::KhmerBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCod
fEndWordSet.compact();
fBeginWordSet.compact();
// fSuffixSet.compact();
UTRACE_EXIT_STATUS(status);
}
KhmerBreakEngine::~KhmerBreakEngine() {
@ -1045,6 +1058,8 @@ foundBest:
static const uint32_t kuint32max = 0xFFFFFFFF;
CjkBreakEngine::CjkBreakEngine(DictionaryMatcher *adoptDictionary, LanguageType type, UErrorCode &status)
: DictionaryBreakEngine(), fDictionary(adoptDictionary) {
UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE);
UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Hani");
// Korean dictionary only includes Hangul syllables
fHangulWordSet.applyPattern(UNICODE_STRING_SIMPLE("[\\uac00-\\ud7a3]"), status);
fHanWordSet.applyPattern(UNICODE_STRING_SIMPLE("[:Han:]"), status);
@ -1066,6 +1081,7 @@ CjkBreakEngine::CjkBreakEngine(DictionaryMatcher *adoptDictionary, LanguageType
setCharacters(cjSet);
}
}
UTRACE_EXIT_STATUS(status);
}
CjkBreakEngine::~CjkBreakEngine(){

View File

@ -1117,7 +1117,7 @@ static icu::UInitOnce gRBBIInitOnce = U_INITONCE_INITIALIZER;
* Release all static memory held by breakiterator.
*/
U_CDECL_BEGIN
static UBool U_CALLCONV rbbi_cleanup(void) {
UBool U_CALLCONV rbbi_cleanup(void) {
delete gLanguageBreakFactories;
gLanguageBreakFactories = nullptr;
delete gEmptyString;

View File

@ -192,6 +192,8 @@ private:
U_NAMESPACE_END
U_CFUNC UBool rbbi_cleanup(void);
#endif /* C++ */
#endif

View File

@ -177,6 +177,71 @@ typedef enum UTraceFunctionNumber {
UTRACE_RES_DATA_LIMIT,
#endif // U_HIDE_INTERNAL_API
#ifndef U_HIDE_DRAFT_API
/**
* The lowest break iterator location.
* @draft ICU 67
*/
UTRACE_UBRK_START=0x4000,
/**
* Indicates that a character instance of break iterator was created.
*
* @draft ICU 67
*/
UTRACE_UBRK_CREATE_CHARACTER = UTRACE_UBRK_START,
/**
* Indicates that a word instance of break iterator was created.
*
* @draft ICU 67
*/
UTRACE_UBRK_CREATE_WORD,
/**
* Indicates that a line instance of break iterator was created.
*
* Provides one C-style string to UTraceData: the lb value ("",
* "loose", "strict", or "normal").
*
* @draft ICU 67
*/
UTRACE_UBRK_CREATE_LINE,
/**
* Indicates that a sentence instance of break iterator was created.
*
* @draft ICU 67
*/
UTRACE_UBRK_CREATE_SENTENCE,
/**
* Indicates that a title instance of break iterator was created.
*
* @draft ICU 67
*/
UTRACE_UBRK_CREATE_TITLE,
/**
* Indicates that an internal dictionary break engine was created.
*
* Provides one C-style string to UTraceData: the script code of what
* the break engine cover ("Hani", "Khmr", "Laoo", "Mymr", or "Thai").
*
* @draft ICU 67
*/
UTRACE_UBRK_CREATE_BREAK_ENGINE,
#endif // U_HIDE_DRAFT_API
#ifndef U_HIDE_INTERNAL_API
/**
* One more than the highest normal break iterator trace location.
* @internal The numeric value may change over time, see ICU ticket #12420.
*/
UTRACE_UBRK_LIMIT,
#endif // U_HIDE_INTERNAL_API
} UTraceFunctionNumber;
/**

View File

@ -14,6 +14,7 @@
#include "unicode/utypes.h"
#if !UCONFIG_NO_BREAK_ITERATION
#include <sstream>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@ -35,6 +36,7 @@
#include "unicode/uscript.h"
#include "unicode/ustring.h"
#include "unicode/utext.h"
#include "unicode/utrace.h"
#include "charstr.h"
#include "cmemory.h"
@ -126,6 +128,19 @@ void RBBITest::runIndexedTest( int32_t index, UBool exec, const char* &name, cha
TESTCASE_AUTO(TestReverse);
TESTCASE_AUTO(TestBug13692);
TESTCASE_AUTO(TestDebugRules);
#if U_ENABLE_TRACING
TESTCASE_AUTO(TestTraceCreateCharacter);
TESTCASE_AUTO(TestTraceCreateWord);
TESTCASE_AUTO(TestTraceCreateSentence);
TESTCASE_AUTO(TestTraceCreateTitle);
TESTCASE_AUTO(TestTraceCreateLine);
TESTCASE_AUTO(TestTraceCreateLineNormal);
TESTCASE_AUTO(TestTraceCreateLineLoose);
TESTCASE_AUTO(TestTraceCreateLineStrict);
TESTCASE_AUTO(TestTraceCreateBreakEngine);
#endif
TESTCASE_AUTO_END;
}
@ -4865,6 +4880,182 @@ void RBBITest::TestDebugRules() {
#endif
}
#if U_ENABLE_TRACING
static std::vector<std::string> gData;
static std::vector<int32_t> gEntryFn;
static std::vector<int32_t> gExitFn;
static std::vector<int32_t> gDataFn;
static void U_CALLCONV traceData(
const void*,
int32_t fnNumber,
int32_t,
const char *,
va_list args) {
if (UTRACE_UBRK_START <= fnNumber && fnNumber <= UTRACE_UBRK_LIMIT) {
const char* data = va_arg(args, const char*);
gDataFn.push_back(fnNumber);
gData.push_back(data);
}
}
static void traceEntry(const void *, int32_t fnNumber) {
if (UTRACE_UBRK_START <= fnNumber && fnNumber <= UTRACE_UBRK_LIMIT) {
gEntryFn.push_back(fnNumber);
}
}
static void traceExit(const void *, int32_t fnNumber, const char *, va_list) {
if (UTRACE_UBRK_START <= fnNumber && fnNumber <= UTRACE_UBRK_LIMIT) {
gExitFn.push_back(fnNumber);
}
}
void RBBITest::assertTestTraceResult(int32_t fnNumber, const char* expectedData) {
assertEquals("utrace_entry should be called ", 1, gEntryFn.size());
assertEquals("utrace_entry should be called with ", fnNumber, gEntryFn[0]);
assertEquals("utrace_exit should be called ", 1, gExitFn.size());
assertEquals("utrace_exit should be called with ", fnNumber, gExitFn[0]);
if (expectedData == nullptr) {
assertEquals("utrace_data should not be called ", 0, gDataFn.size());
assertEquals("utrace_data should not be called ", 0, gData.size());
} else {
assertEquals("utrace_data should be called ", 1, gDataFn.size());
assertEquals("utrace_data should be called with ", fnNumber, gDataFn[0]);
assertEquals("utrace_data should be called ", 1, gData.size());
assertEquals("utrace_data should pass in ", expectedData, gData[0].c_str());
}
}
void SetupTestTrace() {
gEntryFn.clear();
gExitFn.clear();
gDataFn.clear();
gData.clear();
const void* context = nullptr;
utrace_setFunctions(context, traceEntry, traceExit, traceData);
utrace_setLevel(UTRACE_INFO);
}
void RBBITest::TestTraceCreateCharacter(void) {
SetupTestTrace();
IcuTestErrorCode status(*this, "TestTraceCreateCharacter");
LocalPointer<BreakIterator> brkitr(
BreakIterator::createCharacterInstance("zh-CN", status));
status.errIfFailureAndReset();
assertTestTraceResult(UTRACE_UBRK_CREATE_CHARACTER, nullptr);
}
void RBBITest::TestTraceCreateTitle(void) {
SetupTestTrace();
IcuTestErrorCode status(*this, "TestTraceCreateTitle");
LocalPointer<BreakIterator> brkitr(
BreakIterator::createTitleInstance("zh-CN", status));
status.errIfFailureAndReset();
assertTestTraceResult(UTRACE_UBRK_CREATE_TITLE, nullptr);
}
void RBBITest::TestTraceCreateSentence(void) {
SetupTestTrace();
IcuTestErrorCode status(*this, "TestTraceCreateSentence");
LocalPointer<BreakIterator> brkitr(
BreakIterator::createSentenceInstance("zh-CN", status));
status.errIfFailureAndReset();
assertTestTraceResult(UTRACE_UBRK_CREATE_SENTENCE, nullptr);
}
void RBBITest::TestTraceCreateWord(void) {
SetupTestTrace();
IcuTestErrorCode status(*this, "TestTraceCreateWord");
LocalPointer<BreakIterator> brkitr(
BreakIterator::createWordInstance("zh-CN", status));
status.errIfFailureAndReset();
assertTestTraceResult(UTRACE_UBRK_CREATE_WORD, nullptr);
}
void RBBITest::TestTraceCreateLine(void) {
SetupTestTrace();
IcuTestErrorCode status(*this, "TestTraceCreateLine");
LocalPointer<BreakIterator> brkitr(
BreakIterator::createLineInstance("zh-CN", status));
status.errIfFailureAndReset();
assertTestTraceResult(UTRACE_UBRK_CREATE_LINE, "");
}
void RBBITest::TestTraceCreateLineStrict(void) {
SetupTestTrace();
IcuTestErrorCode status(*this, "TestTraceCreateLineStrict");
LocalPointer<BreakIterator> brkitr(
BreakIterator::createLineInstance("zh-CN-u-lb-strict", status));
status.errIfFailureAndReset();
assertTestTraceResult(UTRACE_UBRK_CREATE_LINE, "strict");
}
void RBBITest::TestTraceCreateLineNormal(void) {
SetupTestTrace();
IcuTestErrorCode status(*this, "TestTraceCreateLineNormal");
LocalPointer<BreakIterator> brkitr(
BreakIterator::createLineInstance("zh-CN-u-lb-normal", status));
status.errIfFailureAndReset();
assertTestTraceResult(UTRACE_UBRK_CREATE_LINE, "normal");
}
void RBBITest::TestTraceCreateLineLoose(void) {
SetupTestTrace();
IcuTestErrorCode status(*this, "TestTraceCreateLineLoose");
LocalPointer<BreakIterator> brkitr(
BreakIterator::createLineInstance("zh-CN-u-lb-loose", status));
status.errIfFailureAndReset();
assertTestTraceResult(UTRACE_UBRK_CREATE_LINE, "loose");
}
void RBBITest::TestTraceCreateBreakEngine(void) {
rbbi_cleanup();
SetupTestTrace();
IcuTestErrorCode status(*this, "TestTraceCreateBreakEngine");
LocalPointer<BreakIterator> brkitr(
BreakIterator::createWordInstance("zh-CN", status));
status.errIfFailureAndReset();
assertTestTraceResult(UTRACE_UBRK_CREATE_WORD, nullptr);
// To word break the following text, BreakIterator will create 5 dictionary
// break engine internally.
brkitr->setText(
u"test "
u"測試 " // Hani
u"សាកល្បង " // Khmr
u"ທົດສອບ " // Laoo
u"စမ်းသပ်မှု " // Mymr
u"ทดสอบ " // Thai
u"test "
);
// Loop through all the text.
while (brkitr->next() > 0) ;
assertEquals("utrace_entry should be called ", 6, gEntryFn.size());
assertEquals("utrace_exit should be called ", 6, gExitFn.size());
assertEquals("utrace_data should be called ", 5, gDataFn.size());
for (std::vector<int>::size_type i = 0; i < gDataFn.size(); i++) {
assertEquals("utrace_entry should be called ",
UTRACE_UBRK_CREATE_BREAK_ENGINE, gEntryFn[i+1]);
assertEquals("utrace_exit should be called ",
UTRACE_UBRK_CREATE_BREAK_ENGINE, gExitFn[i+1]);
assertEquals("utrace_data should be called ",
UTRACE_UBRK_CREATE_BREAK_ENGINE, gDataFn[i]);
}
assertEquals("utrace_data should pass ", "Hani", gData[0].c_str());
assertEquals("utrace_data should pass ", "Khmr", gData[1].c_str());
assertEquals("utrace_data should pass ", "Laoo", gData[2].c_str());
assertEquals("utrace_data should pass ", "Mymr", gData[3].c_str());
assertEquals("utrace_data should pass ", "Thai", gData[4].c_str());
}
#endif
#endif // #if !UCONFIG_NO_BREAK_ITERATION

View File

@ -87,6 +87,18 @@ public:
void TestDebug();
void TestProperties();
#if U_ENABLE_TRACING
void TestTraceCreateCharacter();
void TestTraceCreateWord();
void TestTraceCreateSentence();
void TestTraceCreateTitle();
void TestTraceCreateLine();
void TestTraceCreateLineNormal();
void TestTraceCreateLineStrict();
void TestTraceCreateLineLoose();
void TestTraceCreateBreakEngine();
#endif
/***********************/
private:
/**
@ -120,6 +132,11 @@ private:
// Test parameters, from the test framework and test invocation.
const char* fTestParams;
#if U_ENABLE_TRACING
void assertTestTraceResult(int32_t fnNumber, const char* expectedData);
#endif
};
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */