ICU-4855 rbbi Line Break, handle unpaired surrogates as AL
X-SVN-Rev: 18596
This commit is contained in:
parent
9a954ce8d6
commit
6390a76fe2
@ -93,10 +93,11 @@ $ZW = [:LineBreak = ZWSpace:];
|
||||
#
|
||||
# Rule LB1. By default, treat AI (characters with ambiguous east Asian width),
|
||||
# SA (South East Asian: Thai, Lao, Khmer)
|
||||
# SG (Unpaired Surrogates)
|
||||
# XX (Unknown, unassigned)
|
||||
# as $AL (Alphabetic)
|
||||
#
|
||||
$ALPlus = [$AL $AI $SA $XX];
|
||||
$ALPlus = [$AL $AI $SA $SG $XX];
|
||||
|
||||
#
|
||||
# Combining Marks. X $CM* behaves as if it were X. Rule LB6.
|
||||
|
@ -2662,6 +2662,7 @@ RBBILineMonkey::RBBILineMonkey()
|
||||
fAL = new UnicodeSet("[\\p{Line_break=AL}]", status);
|
||||
fID = new UnicodeSet("[\\p{Line_break=ID}]", status);
|
||||
fSA = new UnicodeSet("[\\p{Line_break=SA}]", status);
|
||||
fSG = new UnicodeSet("[\\ud800-\\udfff]", status);
|
||||
fXX = new UnicodeSet("[\\p{Line_break=XX}]", status);
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
@ -2674,6 +2675,7 @@ RBBILineMonkey::RBBILineMonkey()
|
||||
fAL->addAll(*fXX); // Default behavior for XX is identical to AL
|
||||
fAL->addAll(*fAI); // Default behavior for AI is identical to AL
|
||||
fAL->addAll(*fSA); // Default behavior for SA is XX, which defaults to AL
|
||||
fAL->addAll(*fSG); // Default behavior for SG is identical to AL.
|
||||
|
||||
fSets->addElement(fBK, status);
|
||||
fSets->addElement(fCR, status);
|
||||
@ -2710,6 +2712,7 @@ RBBILineMonkey::RBBILineMonkey()
|
||||
fSets->addElement(fID, status);
|
||||
fSets->addElement(fWJ, status);
|
||||
fSets->addElement(fSA, status);
|
||||
fSets->addElement(fSG, status);
|
||||
|
||||
fNumberMatcher = new RegexMatcher(
|
||||
"(\\p{Line_Break=PR}\\p{Line_Break=CM}*)?"
|
||||
@ -3159,6 +3162,7 @@ RBBILineMonkey::~RBBILineMonkey() {
|
||||
delete fAL;
|
||||
delete fID;
|
||||
delete fSA;
|
||||
delete fSG;
|
||||
delete fXX;
|
||||
|
||||
delete fCharBI;
|
||||
|
Loading…
Reference in New Issue
Block a user