ICU-10176 No line break in $SY $HL; update tests accordingly
X-SVN-Rev: 34142
This commit is contained in:
parent
45561757cc
commit
b6dcdfcd25
@ -1,4 +1,4 @@
|
||||
# Copyright (c) 2002-2012 International Business Machines Corporation and
|
||||
# Copyright (c) 2002-2013 International Business Machines Corporation and
|
||||
# others. All Rights Reserved.
|
||||
#
|
||||
# file: line.txt
|
||||
@ -342,6 +342,10 @@ $BBcm $LB20NonBreaks $CM*;
|
||||
#
|
||||
$HLcm ($HYcm | $BAcm) [^$CB]?;
|
||||
|
||||
# LB 21b (forward) Don't break between SY and HL
|
||||
# (break between HL and SY already disallowed by LB 13 above)
|
||||
$SYcm $HLcm;
|
||||
|
||||
# LB 22
|
||||
($ALcm | $HLcm) $INcm;
|
||||
$CM+ $INcm; # by rule 10, any otherwise unattached CM behaves as AL
|
||||
@ -575,6 +579,8 @@ $CM* [$LB20NonBreaks-$CM] $CM* $BB; # BB x .
|
||||
# LB21a
|
||||
[^$CB] $CM* ($HY | $BA) $CM* $HL;
|
||||
|
||||
# LB21b (reverse)
|
||||
$CM* $HL $CM* $SY;
|
||||
|
||||
# LB 22
|
||||
$CM* $IN $CM* ($ALPlus | $HL);
|
||||
|
@ -1,4 +1,4 @@
|
||||
# Copyright (c) 2002-2012 International Business Machines Corporation and
|
||||
# Copyright (c) 2002-2013 International Business Machines Corporation and
|
||||
# others. All Rights Reserved.
|
||||
#
|
||||
# file: line_fi.txt
|
||||
@ -348,6 +348,10 @@ $BBcm $LB20NonBreaks $CM*;
|
||||
#
|
||||
$HLcm ($HYcm | $BAcm | $HHcm) [^$CB]?;
|
||||
|
||||
# LB 21b (forward) Don't break between SY and HL
|
||||
# (break between HL and SY already disallowed by LB 13 above)
|
||||
$SYcm $HLcm;
|
||||
|
||||
# LB 22
|
||||
($ALcm | $HLcm) $INcm;
|
||||
$CM+ $INcm; # by rule 10, any otherwise unattached CM behaves as AL
|
||||
@ -585,6 +589,8 @@ $CM* [$LB20NonBreaks-$CM] $CM* $BB; # BB x .
|
||||
# LB21a
|
||||
[^$CB] $CM* ($HY | $BA | $HH) $CM* $HL;
|
||||
|
||||
# LB21b (reverse)
|
||||
$CM* $HL $CM* $SY;
|
||||
|
||||
# LB 22
|
||||
$CM* $IN $CM* ($ALPlus | $HL);
|
||||
|
@ -1,4 +1,4 @@
|
||||
# Copyright (c) 2002-2012 International Business Machines Corporation and
|
||||
# Copyright (c) 2002-2013 International Business Machines Corporation and
|
||||
# others. All Rights Reserved.
|
||||
#
|
||||
# file: line_ja.txt
|
||||
@ -342,6 +342,10 @@ $BBcm $LB20NonBreaks $CM*;
|
||||
#
|
||||
$HLcm ($HYcm | $BAcm) [^$CB]?;
|
||||
|
||||
# LB 21b (forward) Don't break between SY and HL
|
||||
# (break between HL and SY already disallowed by LB 13 above)
|
||||
$SYcm $HLcm;
|
||||
|
||||
# LB 22
|
||||
($ALcm | $HLcm) $INcm;
|
||||
$CM+ $INcm; # by rule 10, any otherwise unattached CM behaves as AL
|
||||
@ -575,6 +579,8 @@ $CM* [$LB20NonBreaks-$CM] $CM* $BB; # BB x .
|
||||
# LB21a
|
||||
[^$CB] $CM* ($HY | $BA) $CM* $HL;
|
||||
|
||||
# LB21b (reverse)
|
||||
$CM* $HL $CM* $SY;
|
||||
|
||||
# LB 22
|
||||
$CM* $IN $CM* ($ALPlus | $HL);
|
||||
|
@ -3189,6 +3189,12 @@ int32_t RBBILineMonkey::next(int32_t startPos) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// LB 21b
|
||||
// SY x HL
|
||||
if (fSY->contains(prevChar) && fHL->contains(thisChar)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// LB 22
|
||||
if ((fAL->contains(prevChar) && fIN->contains(thisChar)) ||
|
||||
(fHL->contains(prevChar) && fIN->contains(thisChar)) ||
|
||||
|
4
icu4c/source/test/testdata/LineBreakTest.txt
vendored
4
icu4c/source/test/testdata/LineBreakTest.txt
vendored
@ -4914,9 +4914,9 @@
|
||||
× 002F × 0020 ÷ AC01 ÷ # × [0.3] SOLIDUS (SY) × [7.01] SPACE (SP) ÷ [18.0] HANGUL SYLLABLE GAG (H3) ÷ [0.3]
|
||||
× 002F × 0308 ÷ AC01 ÷ # × [0.3] SOLIDUS (SY) × [9.0] COMBINING DIAERESIS (CM) ÷ [999.0] HANGUL SYLLABLE GAG (H3) ÷ [0.3]
|
||||
× 002F × 0308 × 0020 ÷ AC01 ÷ # × [0.3] SOLIDUS (SY) × [9.0] COMBINING DIAERESIS (CM) × [7.01] SPACE (SP) ÷ [18.0] HANGUL SYLLABLE GAG (H3) ÷ [0.3]
|
||||
× 002F ÷ 05D0 ÷ # × [0.3] SOLIDUS (SY) ÷ [999.0] HEBREW LETTER ALEF (HL) ÷ [0.3]
|
||||
× 002F × 05D0 ÷ # × [0.3] SOLIDUS (SY) ÷ [21.05] HEBREW LETTER ALEF (HL) ÷ [0.3]
|
||||
× 002F × 0020 ÷ 05D0 ÷ # × [0.3] SOLIDUS (SY) × [7.01] SPACE (SP) ÷ [18.0] HEBREW LETTER ALEF (HL) ÷ [0.3]
|
||||
× 002F × 0308 ÷ 05D0 ÷ # × [0.3] SOLIDUS (SY) × [9.0] COMBINING DIAERESIS (CM) ÷ [999.0] HEBREW LETTER ALEF (HL) ÷ [0.3]
|
||||
× 002F × 0308 × 05D0 ÷ # × [0.3] SOLIDUS (SY) × [9.0] COMBINING DIAERESIS (CM) ÷ [21.05] HEBREW LETTER ALEF (HL) ÷ [0.3]
|
||||
× 002F × 0308 × 0020 ÷ 05D0 ÷ # × [0.3] SOLIDUS (SY) × [9.0] COMBINING DIAERESIS (CM) × [7.01] SPACE (SP) ÷ [18.0] HEBREW LETTER ALEF (HL) ÷ [0.3]
|
||||
× 002F × 002D ÷ # × [0.3] SOLIDUS (SY) × [21.02] HYPHEN-MINUS (HY) ÷ [0.3]
|
||||
× 002F × 0020 ÷ 002D ÷ # × [0.3] SOLIDUS (SY) × [7.01] SPACE (SP) ÷ [18.0] HYPHEN-MINUS (HY) ÷ [0.3]
|
||||
|
23
icu4c/source/test/testdata/rbbitst.txt
vendored
23
icu4c/source/test/testdata/rbbitst.txt
vendored
@ -567,6 +567,14 @@ What is the proper use of the abbreviation pp.? •Yes, I am definatelly 12" tal
|
||||
<data>•\u114d\u31f3•\ube44\u002d•\u0362\u24e2\u276e\u2014\u205f\ufe16•\uc877•\u0fd0\u000a<100>\u20a3•</data>
|
||||
<data>•\u080a\u215b\U0001d7d3\u002c•\u2025\U000e012e•\u02df\u118d\u0029\ua8d6\u0085<100>\u6cc4\u2024\u202f\ufffc•</data>
|
||||
|
||||
# Test for #10176 (in root)
|
||||
<line>
|
||||
<data>•abc/•s •def•</data>
|
||||
<data>•abc/\u05D9 •def•</data>
|
||||
<data>•\u05E7\u05D7/\u05D9 •\u05DE\u05E2\u05D9\u05DC•</data>
|
||||
<data>•\u05D3\u05E8\u05D5\u05E9\u05D9\u05DD •\u05E9\u05D7\u05E7\u05E0\u05D9\u05DD/\u05D9\u05D5\u05EA•</data>
|
||||
|
||||
|
||||
|
||||
########################################################################################
|
||||
#
|
||||
@ -762,6 +770,14 @@ Bangkok)•</data>
|
||||
<word>
|
||||
<data>•私<400>達<400>に<400>一<400>〇<400>〇〇<400>の<400>コンピュータ<400>が<400>ある<400>。<0>奈々<400>は<400>ワード<400>で<400>ある<400>。•</data>
|
||||
|
||||
# Test for #10176 (in ja)
|
||||
<line>
|
||||
<data>•abc/•s •def•</data>
|
||||
<data>•abc/\u05D9 •def•</data>
|
||||
<data>•\u05E7\u05D7/\u05D9 •\u05DE\u05E2\u05D9\u05DC•</data>
|
||||
<data>•\u05D3\u05E8\u05D5\u05E9\u05D9\u05DD •\u05E9\u05D7\u05E7\u05E0\u05D9\u05DD/\u05D9\u05D5\u05EA•</data>
|
||||
|
||||
|
||||
<locale root>
|
||||
<word>
|
||||
<data>•私<400>達<400>に<400>一<400>〇<400>〇〇<400>の<400>コンピュータ<400>が<400>ある<400>。<0>奈々<400>は<400>ワード<400>で<400>ある<400>。•</data>
|
||||
@ -834,3 +850,10 @@ Bangkok)•</data>
|
||||
|
||||
<data>•abc •- •def •abc •-def •abc- •def •</data> # With ASCII hyphen
|
||||
<data>•abc •‐ •def •abc •‐def •abc‐ •def •</data> # With Unicode u2010 hyphen
|
||||
|
||||
# Test for #10176 (in fi)
|
||||
<line>
|
||||
<data>•abc/•s •def•</data>
|
||||
<data>•abc/\u05D9 •def•</data>
|
||||
<data>•\u05E7\u05D7/\u05D9 •\u05DE\u05E2\u05D9\u05DC•</data>
|
||||
<data>•\u05D3\u05E8\u05D5\u05E9\u05D9\u05DD •\u05E9\u05D7\u05E7\u05E0\u05D9\u05DD/\u05D9\u05D5\u05EA•</data>
|
||||
|
Loading…
Reference in New Issue
Block a user