ICU-3700 updated rules for Unicode 4.0.1.
X-SVN-Rev: 15286
This commit is contained in:
parent
7f012c20cb
commit
0a217cf782
@ -212,10 +212,13 @@ $CM+ $NUcm; # Rule 7c
|
||||
$NUcm $ALcm+;
|
||||
|
||||
# LB 18
|
||||
$PRcm? ($OPcm | $HYcm)? $NUcm ($NUcm | $IScm)* $CLcm? $POcm?;
|
||||
$PRcm? ($OPcm | $HYcm)? $NUcm ($NUcm | $IScm | $SYcm)* $CLcm? $POcm?;
|
||||
$PRcm $ALcm;
|
||||
$PRcm $IDcm;
|
||||
|
||||
# LB 19
|
||||
$CM* $ALcm+; # The $CM* is from rule 7C, and unattached CM is treated as AL
|
||||
$IScm $ALcm;
|
||||
|
||||
#
|
||||
# Reverse Rules.
|
||||
@ -326,11 +329,14 @@ $CM* $NU $CM+ / $LB5Breaks; # Rule 7c
|
||||
$CM* $ALPlus $CM* $NU;
|
||||
|
||||
# LB 18
|
||||
($CM* $PO)? ($CM* $CL)? ($CM* ($NU | $IS))* $CM* $NU ($CM* ($OP | $HY))? ($CM* $PR)?;
|
||||
($CM* $PO)? ($CM* $CL)? ($CM* ($NU | $IS | $SY))* $CM* $NU ($CM* ($OP | $HY))? ($CM* $PR)?;
|
||||
$CM* $ALPlus $CM* $PR;
|
||||
$CM* ($ID | $BackHangulSyllable) $CM* $PR;
|
||||
|
||||
# LB 19
|
||||
$CM* $ALPlus $CM* $ALPlus;
|
||||
# The $CM* is from rule 7C, and unattached CM is treated as AL
|
||||
$CM* $ALPlus $CM* $IS;
|
||||
$CM* $ALPlus $CM+ / $LB5Breaks;
|
||||
|
||||
## problem state table can't handle lookahead when it is at the
|
||||
@ -358,8 +364,8 @@ $SP+ $CM* $QU;
|
||||
$SP+ $CM* $CL;
|
||||
|
||||
# LB 18
|
||||
($CM* $IS)+ $CM* $NU;
|
||||
$CL $CM* ($NU | $IS);
|
||||
($CM* ($IS | $SY))+ $CM* $NU;
|
||||
$CL $CM* ($NU | $IS | $SY);
|
||||
|
||||
## -------------------------------------------------
|
||||
|
||||
@ -384,3 +390,4 @@ $CL $CM* $SP+;
|
||||
# LB 18
|
||||
$HY $CM* $NU;
|
||||
$IS $CM* $CL;
|
||||
$SY $CM* $CL;
|
||||
|
@ -2538,7 +2538,7 @@ RBBILineMonkey::RBBILineMonkey()
|
||||
"(\\p{Line_Break=PR}\\p{Line_Break=CM}*)?"
|
||||
"((\\p{Line_Break=OP}|\\p{Line_Break=HY})\\p{Line_Break=CM}*)?"
|
||||
"\\p{Line_Break=NU}\\p{Line_Break=CM}*"
|
||||
"((\\p{Line_Break=NU}|\\p{Line_Break=IS})\\p{Line_Break=CM}*)*"
|
||||
"((\\p{Line_Break=NU}|\\p{Line_Break=IS}|\\p{Line_Break=SY})\\p{Line_Break=CM}*)*"
|
||||
"(\\p{Line_Break=CL}\\p{Line_Break=CM}*)?"
|
||||
"(\\p{Line_Break=PO}\\p{Line_Break=CM}*)?",
|
||||
0, status);
|
||||
@ -2799,7 +2799,7 @@ int32_t RBBILineMonkey::next(int32_t startPos) {
|
||||
if (!fNU->contains(prevChar) && fCL->contains(thisChar) ||
|
||||
fEX->contains(thisChar) ||
|
||||
!fNU->contains(prevChar) && fIS->contains(thisChar) ||
|
||||
fSY->contains(thisChar)) {
|
||||
!fNU->contains(prevChar) && fSY->contains(thisChar)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -2910,6 +2910,14 @@ int32_t RBBILineMonkey::next(int32_t startPos) {
|
||||
}
|
||||
}
|
||||
|
||||
if (fPR->contains(prevChar) && fAL->contains(thisChar)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (fPR->contains(prevChar) && fID->contains(thisChar)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// LB 18b
|
||||
if (fHY->contains(prevChar) || fBB->contains(thisChar)) {
|
||||
break;
|
||||
@ -2920,6 +2928,11 @@ int32_t RBBILineMonkey::next(int32_t startPos) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// LB 19b
|
||||
if (fIS->contains(prevChar) && fAL->contains(thisChar)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// LB 20 Break everywhere else
|
||||
break;
|
||||
|
||||
@ -3238,6 +3251,7 @@ void RBBITest::TestLineBreaks(void)
|
||||
UChar str[50];
|
||||
static const char *strlist[] =
|
||||
{
|
||||
"\\u0668\\u192b\\u002f\\u2034\\ufe39\\u00b4\\u0cc8\\u2571\\u200b\\u003f",
|
||||
"\\ufeff\\ufffc\\u3289\\u0085\\u2772\\u0020\\U000e010a\\u0020\\u2025\\u000a\\U000e0123",
|
||||
"\\ufe3c\\u201c\\u000d\\u2025\\u2007\\u201c\\u002d\\u20a0\\u002d\\u30a7\\u17a4",
|
||||
"\\u2772\\u0020\\U000e010a\\u0020\\u2025\\u000a\\U000e0123",
|
||||
@ -3317,7 +3331,7 @@ void RBBITest::TestSentBreaks(void)
|
||||
};
|
||||
int loop;
|
||||
int forward[100];
|
||||
for (loop = 0; loop < (int)(sizeof(strlist) / sizeof(char *)); loop ++) {
|
||||
for (loop = 0; loop < (int)(sizeof(strlist) / sizeof(char *)); loop ++) {
|
||||
u_unescape(strlist[loop], str, 100);
|
||||
UnicodeString ustr(str);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user