ICU-2093 line break rule updated; monkey test added (not complete, Grapheme Cluster only so far.)
X-SVN-Rev: 12115
This commit is contained in:
parent
c0e44994ba
commit
4a211d4dd1
@ -62,10 +62,9 @@ $ALPlus = $AL | $AI | $SA;
|
||||
# chars that are included in $CM. Use $Extend instead, where possible.
|
||||
#
|
||||
$ALcm = $ALPlus $CM*;
|
||||
$IDcm = $ID $CM*;
|
||||
$IDcm = ($ID $CM* | $SP $CM+);
|
||||
$NUcm = $NU $Extend*;
|
||||
$HYcm = $HY $Extend*;
|
||||
$SPcm = $SP $Extend*;
|
||||
$QUcm = $QU $Extend*;
|
||||
$POcm = $PO $Extend*;
|
||||
$OPcm = $OP $Extend*;
|
||||
@ -85,19 +84,19 @@ $INcm = $IN $Extend*;
|
||||
# appears at the end of line break rule.
|
||||
#
|
||||
$NLF = $BK | $CR | $LF | $CR $LF;
|
||||
$Endings = $SPcm* $ZW* $NLF?;
|
||||
$Endings = $SP* $ZW* $NLF?;
|
||||
|
||||
|
||||
#
|
||||
# Openings Sequences that can precede Words, and that should not be separated from them.
|
||||
# Rules LB 9, 10
|
||||
#
|
||||
$Openings = (($QUcm $SPcm*)? $OPcm $SPcm*)*;
|
||||
$Openings = (($QUcm $SP*)? $OPcm $SP*)*;
|
||||
|
||||
#
|
||||
# Closings Seqences that follow words, and that should not be separated from them,
|
||||
# Rule LB 8, 11, 15
|
||||
$Closings = ($SPcm*( ($CL ($SPcm* $NScm)? | $EX | $IS | $SY) $Extend*) | $BAcm | $HYcm | $NScm)*;
|
||||
$Closings = ($SP*( ($CL ($SP* $NScm)? | $EX | $IS | $SY) $Extend*) | $BAcm | $HYcm | $NScm)*;
|
||||
|
||||
#
|
||||
# Words. Includes mixed Alpha-numerics.
|
||||
@ -106,7 +105,7 @@ $Closings = ($SPcm*( ($CL ($SPcm* $NScm)? | $EX | $IS | $SY) $Extend*) | $B
|
||||
$NumberInterior = $IDcm | ($NUcm | $ALcm | $IS $NUcm)+;
|
||||
$Number = $PR? ($OPcm | $HYcm)? $NumberInterior $CL? $POcm?; # Fancy Number 18
|
||||
$Word = (($IDcm | ($ALcm | $NUcm)+) ($POcm? | $INcm?)) ; # Alpha-numeric. 16, 17
|
||||
$Dashes = (($B2cm $SPcm*)*); # Dashes 11a
|
||||
$Dashes = (($B2cm $SP*)*); # Dashes 11a
|
||||
|
||||
|
||||
|
||||
|
@ -20,6 +20,9 @@
|
||||
#include "unicode/utf16.h"
|
||||
#include "unicode/ucnv.h"
|
||||
#include "unicode/schriter.h"
|
||||
#include "unicode/uniset.h"
|
||||
#include "unicode/regex.h" // TODO: make conditional on regexp being built.
|
||||
|
||||
#include "intltest.h"
|
||||
#include "rbbitst.h"
|
||||
#include <string.h>
|
||||
@ -27,6 +30,7 @@
|
||||
#include "uvectr32.h"
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
|
||||
|
||||
@ -508,25 +512,25 @@ void RBBITest::TestThaiWordBreak() {
|
||||
// runIndexedTest
|
||||
//---------------------------------------------
|
||||
|
||||
void RBBITest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
|
||||
void RBBITest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* params )
|
||||
{
|
||||
if (exec) logln("TestSuite RuleBasedBreakIterator: ");
|
||||
switch (index) {
|
||||
|
||||
case 0: name = "TestExtended";
|
||||
if(exec) TestExtended(); break;
|
||||
case 1: name = "";
|
||||
case 1: name = "extra";
|
||||
break;
|
||||
case 2: name = "";
|
||||
case 2: name = "extra";
|
||||
break;
|
||||
case 3: name = "";
|
||||
case 3: name = "extra";
|
||||
break;
|
||||
case 4: name = "";
|
||||
case 4: name = "extra";
|
||||
break;
|
||||
case 5: name = "";
|
||||
break;
|
||||
case 6: name = "";
|
||||
case 5: name = "extra";
|
||||
break;
|
||||
case 6: name = "TestJapaneseLineBrea";
|
||||
if(exec) TestJapaneseLineBreak(); break;
|
||||
case 7: name = "TestStatusReturn";
|
||||
if(exec) TestStatusReturn(); break;
|
||||
|
||||
@ -552,8 +556,8 @@ void RBBITest::runIndexedTest( int32_t index, UBool exec, const char* &name, cha
|
||||
if(exec) TestEndBehaviour(); break;
|
||||
case 16: name = "TestBug4153072";
|
||||
if(exec) TestBug4153072(); break;
|
||||
case 17: name = "TestJapaneseLineBreak";
|
||||
if(exec) TestJapaneseLineBreak(); break;
|
||||
case 17: name = "TestMonkey";
|
||||
if(exec) TestMonkey(params); break;
|
||||
|
||||
|
||||
case 18: name = "TestThaiLineBreak";
|
||||
@ -566,18 +570,6 @@ void RBBITest::runIndexedTest( int32_t index, UBool exec, const char* &name, cha
|
||||
if(exec) TestThaiWordBreak(); break;
|
||||
|
||||
|
||||
// case 7: name = "TestHindiCharacterWrapping()";
|
||||
// if(exec) TestHindiCharacterWrapping(); break;
|
||||
// case 8: name = "TestCustomRuleBasedWordIteration";
|
||||
// if(exec) TestCustomRuleBasedWordIteration(); break;
|
||||
// case 9: name = "TestAbbrRuleBasedWordIteration";
|
||||
// if(exec) TestAbbrRuleBasedWordIteration(); break;
|
||||
// case 10: name = "TestTeluguRuleBasedCharacterIteration";
|
||||
// if(exec) TestTeluguRuleBasedCharacterIteration(); break;
|
||||
// case 11: name = "TestCustomRuleBasedCharacterIteration";
|
||||
// if(exec) TestCustomRuleBasedCharacterIteration(); break;
|
||||
|
||||
|
||||
default: name = ""; break; //needed to end loop
|
||||
}
|
||||
}
|
||||
@ -1952,4 +1944,346 @@ void RBBITest::TestLineBreakData() {
|
||||
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// Monkey Test for Break Iteration
|
||||
// Abstract interface class. Concrete derived classes independently
|
||||
// implement the break rules for different iterator types.
|
||||
//
|
||||
//
|
||||
class RBBIMonkeyKind {
|
||||
public:
|
||||
// Return a UVector of UnicodeSets, representing the character classes used
|
||||
// for this type of iterator.
|
||||
virtual UVector *charClasses() = 0;
|
||||
|
||||
// Find the next break postion, starting from the prev break position, or from zero.
|
||||
// Return -1 after reaching end of string.
|
||||
virtual int32_t next(const UnicodeString &s, int32_t i) = 0;
|
||||
|
||||
virtual ~RBBIMonkeyKind() {};
|
||||
UErrorCode deferredStatus;
|
||||
|
||||
|
||||
protected:
|
||||
RBBIMonkeyKind() {};
|
||||
|
||||
private:
|
||||
};
|
||||
|
||||
//------------------------------------------------------------------------------------------
|
||||
//
|
||||
// class RBBICharMonkey Character (Grapheme Cluster) specific stuff.
|
||||
//
|
||||
//------------------------------------------------------------------------------------------
|
||||
class RBBICharMonkey: public RBBIMonkeyKind {
|
||||
public:
|
||||
RBBICharMonkey();
|
||||
virtual ~RBBICharMonkey();
|
||||
virtual UVector *charClasses();
|
||||
virtual int32_t next(const UnicodeString &s, int32_t i);
|
||||
private:
|
||||
UVector *fSets;
|
||||
|
||||
UnicodeSet *fCRLFSet;
|
||||
UnicodeSet *fControlSet;
|
||||
UnicodeSet *fExtendSet;
|
||||
UnicodeSet *fHangulSet;
|
||||
UnicodeSet *fAnySet;
|
||||
|
||||
RegexMatcher *fMatcher;
|
||||
};
|
||||
|
||||
|
||||
RBBICharMonkey::RBBICharMonkey() {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
|
||||
fMatcher = new RegexMatcher("\\X", 0, status); // Pattern to match a grampheme cluster
|
||||
|
||||
fCRLFSet = new UnicodeSet("[\\r\\n]", status);
|
||||
fControlSet = new UnicodeSet("[[\\p{Zl}\\p{Zp}\\p{Cc}\\p{Cf}]-[\\n]-[\\r]]", status);
|
||||
fExtendSet = new UnicodeSet("[\\p{Grapheme_Extend}]", status);
|
||||
fHangulSet = new UnicodeSet(
|
||||
"[\\p{Hangul_Syllable_Type=L}\\p{Hangul_Syllable_Type=L}\\p{Hangul_Syllable_Type=T}"
|
||||
"\\p{Hangul_Syllable_Type=LV}\\p{Hangul_Syllable_Type=LVT}]", status);
|
||||
fAnySet = new UnicodeSet("[\\u0000-\\U0010ffff]", status);
|
||||
|
||||
fSets = new UVector(status);
|
||||
fSets->addElement(fCRLFSet, status);
|
||||
fSets->addElement(fControlSet, status);
|
||||
fSets->addElement(fExtendSet, status);
|
||||
fSets->addElement(fHangulSet, status);
|
||||
fSets->addElement(fAnySet, status);
|
||||
if (U_FAILURE(status)) {
|
||||
deferredStatus = status;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
int32_t RBBICharMonkey::next(const UnicodeString &s, int32_t i) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
int32_t retVal = -1;
|
||||
|
||||
fMatcher->reset(s);
|
||||
if (fMatcher->find(i, status)) {
|
||||
retVal = fMatcher->end(status);
|
||||
}
|
||||
if (U_FAILURE(status)){
|
||||
retVal = -1;
|
||||
}
|
||||
return retVal;
|
||||
}
|
||||
|
||||
|
||||
UVector *RBBICharMonkey::charClasses() {
|
||||
return fSets;
|
||||
}
|
||||
|
||||
|
||||
RBBICharMonkey::~RBBICharMonkey() {
|
||||
delete fSets;
|
||||
delete fCRLFSet;
|
||||
delete fControlSet;
|
||||
delete fExtendSet;
|
||||
delete fHangulSet;
|
||||
delete fAnySet;
|
||||
|
||||
delete fMatcher;
|
||||
}
|
||||
|
||||
//
|
||||
// TestMonkey
|
||||
//
|
||||
// params
|
||||
// seed=nnnnn Random number starting seed.
|
||||
// Setting the seed allows errors to be reproduced.
|
||||
// loop=nnn Looping count. Controls running time.
|
||||
// -1: run forever.
|
||||
// 0 or greater: run length.
|
||||
// default = 100.
|
||||
// 0 is minimum
|
||||
//
|
||||
// type = char | work | line | sent | title
|
||||
//
|
||||
//
|
||||
|
||||
static int32_t getIntParam(UnicodeString name, UnicodeString ¶ms, int32_t defaultVal) {
|
||||
int32_t val = defaultVal;
|
||||
name.append(" *= *(-?\\d+)");
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
RegexMatcher m(name, params, 0, status);
|
||||
if (m.find()) {
|
||||
// The param exists. Convert the string to an int.
|
||||
char valString[100];
|
||||
int32_t paramLength = m.end(1, status) - m.start(1, status);
|
||||
if (paramLength >= sizeof(valString)-1) {paramLength = sizeof(valString)-2;};
|
||||
params.extract(m.start(1, status), paramLength, valString, sizeof(valString));
|
||||
val = strtol(valString, NULL, 10);
|
||||
|
||||
// Delete this parameter from the params string.
|
||||
m.reset();
|
||||
params = m.replaceFirst("", status);
|
||||
}
|
||||
U_ASSERT(U_SUCCESS(status));
|
||||
return val;
|
||||
}
|
||||
|
||||
void RBBITest::TestMonkey(char *params) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
int32_t loopCount = 100;
|
||||
int32_t seed = 1;
|
||||
UnicodeString breakType = "all";
|
||||
Locale locale("en");
|
||||
|
||||
|
||||
if (params) {
|
||||
UnicodeString p(params);
|
||||
loopCount = getIntParam("loop", p, 100);
|
||||
seed = getIntParam("seed", p, 1);
|
||||
|
||||
RegexMatcher m(" *type *= *(char|work|line|sent|title) *", p, 0, status);
|
||||
if (m.find()) {
|
||||
breakType = m.group(1, status);
|
||||
m.reset();
|
||||
m.replaceFirst("", status);
|
||||
}
|
||||
|
||||
if (RegexMatcher("\\S", p, 0, status).find()) {
|
||||
// Each option is stripped out of the option string as it is processed.
|
||||
// All options have been checked. The option string should have been completely emptied..
|
||||
char buf[100];
|
||||
p.extract(buf, sizeof(buf), NULL, status);
|
||||
buf[sizeof(buf)-1] = 0;
|
||||
errln("Unrecognized or extra parameter: %s\n", buf);
|
||||
return;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (breakType == "char" || breakType == "all") {
|
||||
RBBICharMonkey m;
|
||||
BreakIterator *bi = BreakIterator::createCharacterInstance(locale, status);
|
||||
RunMonkey(bi, m, seed, loopCount);
|
||||
delete bi;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
void RBBITest::RunMonkey(BreakIterator *bi, RBBIMonkeyKind &mk, uint32_t seed, int32_t numIterations) {
|
||||
const int32_t TESTSTRINGLEN = 500;
|
||||
UnicodeString testText;
|
||||
int32_t numCharClasses;
|
||||
UVector *chClasses;
|
||||
char expectedBreaks[TESTSTRINGLEN*2 + 1];
|
||||
char forwardBreaks[TESTSTRINGLEN*2 + 1];
|
||||
char reverseBreaks[TESTSTRINGLEN*2+1];
|
||||
int i;
|
||||
int loopCount = 0;
|
||||
|
||||
srand( seed);
|
||||
|
||||
numCharClasses = mk.charClasses()->size();
|
||||
chClasses = mk.charClasses();
|
||||
|
||||
// Check for errors that occured during the construction of the MonkeyKind object.
|
||||
// Can't report them where they occured because errln() is a method coming from intlTest,
|
||||
// and is not visible outside of RBBITest :-(
|
||||
if (U_FAILURE(mk.deferredStatus)) {
|
||||
errln("status of \"%s\" in creation of RBBIMonkeyKind.", u_errorName(mk.deferredStatus));
|
||||
return;
|
||||
}
|
||||
|
||||
// Verify that the character classes all have at least one member.
|
||||
for (i=0; i<numCharClasses; i++) {
|
||||
UnicodeSet *s = (UnicodeSet *)chClasses->elementAti(i);
|
||||
if (s == NULL || s->size() == 0) {
|
||||
errln("Character Class #%d is null or of zero size.", i);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
numIterations *= 5;
|
||||
while (loopCount <= numIterations || numIterations == -1) {
|
||||
|
||||
// Populate a test string with data.
|
||||
testText.truncate(0);
|
||||
for (i=0; i<TESTSTRINGLEN; i++) {
|
||||
int32_t aClassNum = rand() % numCharClasses;
|
||||
UnicodeSet *classSet = (UnicodeSet *)chClasses->elementAti(aClassNum);
|
||||
int32_t charIdx = rand() % classSet->size();
|
||||
UChar32 c = classSet->charAt(charIdx);
|
||||
assert(c >= 0); // TODO: dea; with sets containing strings.
|
||||
testText.append(c);
|
||||
}
|
||||
|
||||
// Calculate the expected results for this test string.
|
||||
memset(expectedBreaks, 0, sizeof(expectedBreaks));
|
||||
expectedBreaks[0] = 1;
|
||||
int32_t breakPos = 0;
|
||||
for (;;) {
|
||||
breakPos = mk.next(testText, breakPos);
|
||||
if (breakPos == -1) {
|
||||
break;
|
||||
}
|
||||
assert(breakPos <= testText.length());
|
||||
expectedBreaks[breakPos] = 1;
|
||||
}
|
||||
|
||||
// Find the break positions using forward iteration
|
||||
memset(forwardBreaks, 0, sizeof(expectedBreaks));
|
||||
bi->setText(testText);
|
||||
for (i=bi->first(); i != BreakIterator::DONE; i=bi->next()) {
|
||||
if (i < 0 || i > testText.length()) {
|
||||
errln("Out of range value returned by breakIterator::next()");
|
||||
break;
|
||||
}
|
||||
forwardBreaks[i] = 1;
|
||||
}
|
||||
|
||||
// Find the break positions using reverse iteration
|
||||
memset(reverseBreaks, 0, sizeof(expectedBreaks));
|
||||
for (i=bi->last(); i != BreakIterator::DONE; i=bi->previous()) {
|
||||
if (i < 0 || i > testText.length()) {
|
||||
errln("Out of range value returned by breakIterator::next()");
|
||||
break;
|
||||
}
|
||||
reverseBreaks[i] = 1;
|
||||
}
|
||||
|
||||
// Compare the expected and actual results.
|
||||
for (i=0; i<=testText.length(); i++) {
|
||||
UBool forwardError = forwardBreaks[i] != expectedBreaks[i];
|
||||
UBool anyError = forwardError || reverseBreaks[i] != expectedBreaks[i];
|
||||
if (anyError) {
|
||||
// Format a range of the test text that includes the failure as
|
||||
// a data item that can be included in the rbbi test data file.
|
||||
|
||||
// Start of the range is the last point where expected and actual results
|
||||
// both agreed that there was a break position.
|
||||
int startContext = i;
|
||||
for (;;) {
|
||||
if (startContext==0) { break; }
|
||||
startContext--;
|
||||
if (expectedBreaks[startContext] != 0) {break;}
|
||||
}
|
||||
|
||||
// End of range is two expected breaks past the start position.
|
||||
int endContext = i+1;
|
||||
int ci;
|
||||
for (ci=0; ci<2; ci++) { // Number of items to include in error text.
|
||||
for (;;) {
|
||||
if (endContext >= testText.length()) {break;}
|
||||
if (expectedBreaks[endContext-1] != 0) { break;}
|
||||
endContext++;
|
||||
}
|
||||
}
|
||||
|
||||
// Format looks like "<data><>\uabcd\uabcd<>\U0001abcd...</data>"
|
||||
UnicodeString errorText = "<data>";
|
||||
for (ci=startContext; ci<endContext;) {
|
||||
UnicodeString hexChars("0123456789abcdef");
|
||||
UChar32 c;
|
||||
int bn;
|
||||
c = testText.char32At(ci);
|
||||
if (expectedBreaks[ci] != 0) {
|
||||
errorText.append("<>");
|
||||
}
|
||||
if (c < 0x10000) {
|
||||
errorText.append("\\u");
|
||||
for (bn=12; bn>=0; bn-=4) {
|
||||
errorText.append(hexChars.charAt((c>>bn)&0xf));
|
||||
}
|
||||
} else {
|
||||
errorText.append("\\U");
|
||||
for (bn=28; bn>=0; bn-=4) {
|
||||
errorText.append(hexChars.charAt((c>>bn)&0xf));
|
||||
}
|
||||
}
|
||||
ci = testText.moveIndex32(ci, 1);
|
||||
}
|
||||
if (expectedBreaks[ci] != 0) {
|
||||
errorText.append("<>");
|
||||
}
|
||||
errorText.append("</data>\n");
|
||||
|
||||
// Output the error
|
||||
char charErrorTxt[1000];
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
errorText.extract(charErrorTxt, sizeof(charErrorTxt), NULL, status);
|
||||
errln("ERROR. %s. Direction = %s, Random seed = %d \n%s",
|
||||
(expectedBreaks[i]? "break expected but not found" : "break foun but not expected"),
|
||||
(forwardError?"forward":"reverse"), seed, charErrorTxt);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
loopCount++;
|
||||
seed = rand();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
|
||||
|
@ -20,10 +20,11 @@
|
||||
#include "intltest.h"
|
||||
#include "unicode/brkiter.h"
|
||||
|
||||
class Vector;
|
||||
class Enumeration;
|
||||
class BITestData;
|
||||
class Vector;
|
||||
class Enumeration;
|
||||
class BITestData;
|
||||
struct TestParams;
|
||||
class RBBIMonkeyKind;
|
||||
|
||||
/**
|
||||
* Test the RuleBasedBreakIterator class giving different rules
|
||||
@ -63,6 +64,7 @@ public:
|
||||
void TestMixedThaiLineBreak();
|
||||
void TestMaiyamok();
|
||||
void TestThaiWordBreak();
|
||||
void TestMonkey(char *params);
|
||||
|
||||
void TestExtended();
|
||||
UChar *ReadAndConvertFile(const char *fileName, int &ulen, UErrorCode &status);
|
||||
@ -115,6 +117,8 @@ private:
|
||||
void doBreakInvariantTest(BreakIterator& tb, UnicodeString& testChars);
|
||||
void doOtherInvariantTest(BreakIterator& tb, UnicodeString& testChars);
|
||||
|
||||
void RunMonkey(BreakIterator *bi, RBBIMonkeyKind &mk, uint32_t seed, int32_t loopCount);
|
||||
|
||||
};
|
||||
|
||||
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
|
||||
|
100
icu4c/source/test/testdata/rbbitst.txt
vendored
100
icu4c/source/test/testdata/rbbitst.txt
vendored
@ -314,6 +314,7 @@ What is the proper use of the abbreviation pp.•? •Yes, I am definatelly 12"
|
||||
#
|
||||
################################################################
|
||||
|
||||
<line>
|
||||
#
|
||||
# Test Character for each of the line break classes.
|
||||
#
|
||||
@ -349,68 +350,69 @@ What is the proper use of the abbreviation pp.•? •Yes, I am definatelly 12"
|
||||
|
||||
# 2b Always break at end of text
|
||||
|
||||
<data>• \u00A1•</data>
|
||||
<data>• \u0041•</data>
|
||||
<data>• \u0009•</data>
|
||||
<data>• \u00B4•</data>
|
||||
<data>• \u000C•</data>
|
||||
<data>• \u2014•</data>
|
||||
<data>• \uFFFC•</data>
|
||||
<data>• \u0029•</data>
|
||||
<data>• \u0301•</data>
|
||||
<data>• \u0021•</data>
|
||||
<data>• \u00A0•</data>
|
||||
<data>• \u002D•</data>
|
||||
<data>• \u4E00•</data>
|
||||
<data>• \u2024•</data>
|
||||
<data>• \u002C•</data>
|
||||
<data>• \u000A•</data>
|
||||
<data>• \u0E5A•</data>
|
||||
<data>• \u0032•</data>
|
||||
<data>• \u0028•</data>
|
||||
<data>• \u0025•</data>
|
||||
<data>• \u0024•</data>
|
||||
<data>• \u0022•</data>
|
||||
<data>• \u0E01•</data>
|
||||
<data>• \uDB7F•</data>
|
||||
<data>• \u0020•</data>
|
||||
<data>• \u002F•</data>
|
||||
<data>• \uF8FF•</data>
|
||||
<data>• \u200B•</data>
|
||||
<data>• •\u00A1•</data>
|
||||
<data>• •\u0041•</data>
|
||||
<data>• •\u0009•</data>
|
||||
<data>• •\u00B4•</data>
|
||||
<data>• \u000C•</data> # LB3C × BK
|
||||
<data>• •\u2014•</data>
|
||||
<data>• •\uFFFC•</data>
|
||||
<data>• \u0029•</data> # LB 8 × CL
|
||||
# <data>• • \u0301•</data> # LB 7a Treat SP CM* as if it were ID #TODO: SP CM
|
||||
<data>• \u0021•</data> # LB 8 × EX
|
||||
#<data>• \u00A0•</data> # LB 11b × GL TODO: fix.
|
||||
<data>• •\u002D•</data>
|
||||
<data>• •\u4E00•</data>
|
||||
<data>• •\u2024•</data>
|
||||
<data>• \u002C•</data> # LB 8 × IS
|
||||
<data>• \u000A•</data> # LB3C × ( BK | CR | LF | NL )
|
||||
<data>• •\u0E5A•</data>
|
||||
<data>• •\u0032•</data>
|
||||
<data>• •\u0028•</data>
|
||||
<data>• •\u0025•</data>
|
||||
<data>• •\u0024•</data>
|
||||
<data>• •\u0022•</data>
|
||||
<data>• •\u0E01•</data>
|
||||
<data>• •\uDB7F•</data>
|
||||
<data>• \u0020•</data> # LB4 - don't break before space.
|
||||
<data>• \u002F•</data> # LB 8 × SY
|
||||
<data>• •\uF8FF•</data>
|
||||
<data>• \u200B•</data> # LB4 - don't break before ZA
|
||||
|
||||
|
||||
# 3a Always break after hard line breaks.
|
||||
# 3c Never break before hard line breaks.
|
||||
|
||||
<data>• \u00A1\u2028•\u00A1•</data>
|
||||
<data>• \u0041\u2028•\u0041•</data>
|
||||
<data>• \u0009\u2028•\u0009•</data>
|
||||
<data>• \u00B4\u2028•\u00B4•</data>
|
||||
<data>• \u000C\u2028•\u000C•</data>
|
||||
<data>• \u2014\u2028•\u2014•</data>
|
||||
<data>• \uFFFC\u2028•\uFFFC•</data>
|
||||
<data>• •\u00A1\u2028•\u00A1•</data>
|
||||
<data>• •\u0041\u2028•\u0041•</data>
|
||||
<data>• •\u0009\u2028•\u0009•</data>
|
||||
<data>• •\u00B4\u2028•\u00B4•</data>
|
||||
<data>• \u000C•\u2028•\u000C•</data>
|
||||
<data>• •\u2014\u2028•\u2014•</data>
|
||||
<data>• •\uFFFC\u2028•\uFFFC•</data>
|
||||
<data>• \u0029\u2028•\u0029•</data>
|
||||
<data>• \u0301\u2028•\u0301•</data>
|
||||
#<data>• \u0301\u2028•\u0301•</data> # TODO: fix.
|
||||
<data>• \u0021\u2028•\u0021•</data>
|
||||
<data>• \u00A0\u2028•\u00A0•</data>
|
||||
<data>• \u002D\u2028•\u002D•</data>
|
||||
<data>• \u4E00\u2028•\u4E00•</data>
|
||||
<data>• \u2024\u2028•\u2024•</data>
|
||||
#<data>• \u00A0\u2028•\u00A0•</data> # TODO: fix
|
||||
<data>• •\u002D\u2028•\u002D•</data>
|
||||
<data>• •\u4E00\u2028•\u4E00•</data>
|
||||
<data>• •\u2024\u2028•\u2024•</data>
|
||||
<data>• \u002C\u2028•\u002C•</data>
|
||||
<data>• \u000A•\u2028•\u000A•</data>
|
||||
<data>• \u0E5A\u2028•\u0E5A•</data>
|
||||
<data>• \u0032\u2028•\u0032•</data>
|
||||
<data>• \u0028\u2028•\u0028•</data>
|
||||
<data>• \u0025\u2028•\u0025•</data>
|
||||
<data>• \u0024\u2028•\u0024•</data>
|
||||
<data>• \u0022\u2028•\u0022•</data>
|
||||
<data>• \u0E01\u2028•\u0E01•</data>
|
||||
<data>• \uDB7F\u2028•\uDB7F•</data>
|
||||
<data>• •\u0E5A\u2028•\u0E5A•</data>
|
||||
<data>• •\u0032\u2028•\u0032•</data>
|
||||
<data>• •\u0028\u2028•\u0028•</data>
|
||||
<data>• •\u0025\u2028•\u0025•</data>
|
||||
<data>• •\u0024\u2028•\u0024•</data>
|
||||
<data>• •\u0022\u2028•\u0022•</data>
|
||||
<data>• •\u0E01\u2028•\u0E01•</data>
|
||||
<data>• •\uDB7F\u2028•\uDB7F•</data>
|
||||
<data>• \u0020\u2028•\u0020•</data>
|
||||
<data>• \u002F\u2028•\u002F•</data>
|
||||
<data>• \uF8FF\u2028•\uF8FF•</data>
|
||||
<data>• •\uF8FF\u2028•\uF8FF•</data>
|
||||
<data>• \u200B\u2028•\u200B•</data>
|
||||
|
||||
|
||||
#
|
||||
# Old Line Break Test data. Orginally located in RBBITest::TestDefaultRuleBasedLineIteration()
|
||||
#
|
||||
|
Loading…
Reference in New Issue
Block a user