ICU-4855 rbbi Line Break, handle unpaired surrogates as AL

X-SVN-Rev: 18596
This commit is contained in:
Andy Heninger 2005-09-28 04:57:25 +00:00
parent 9a954ce8d6
commit 6390a76fe2
2 changed files with 6 additions and 1 deletions

View File

@ -93,10 +93,11 @@ $ZW = [:LineBreak = ZWSpace:];
#
# Rule LB1. By default, treat AI (characters with ambiguous east Asian width),
# SA (South East Asian: Thai, Lao, Khmer)
# SG (Unpaired Surrogates)
# XX (Unknown, unassigned)
# as $AL (Alphabetic)
#
$ALPlus = [$AL $AI $SA $XX];
$ALPlus = [$AL $AI $SA $SG $XX];
#
# Combining Marks. X $CM* behaves as if it were X. Rule LB6.

View File

@ -2662,6 +2662,7 @@ RBBILineMonkey::RBBILineMonkey()
fAL = new UnicodeSet("[\\p{Line_break=AL}]", status);
fID = new UnicodeSet("[\\p{Line_break=ID}]", status);
fSA = new UnicodeSet("[\\p{Line_break=SA}]", status);
fSG = new UnicodeSet("[\\ud800-\\udfff]", status);
fXX = new UnicodeSet("[\\p{Line_break=XX}]", status);
if (U_FAILURE(status)) {
@ -2674,6 +2675,7 @@ RBBILineMonkey::RBBILineMonkey()
fAL->addAll(*fXX); // Default behavior for XX is identical to AL
fAL->addAll(*fAI); // Default behavior for AI is identical to AL
fAL->addAll(*fSA); // Default behavior for SA is XX, which defaults to AL
fAL->addAll(*fSG); // Default behavior for SG is identical to AL.
fSets->addElement(fBK, status);
fSets->addElement(fCR, status);
@ -2710,6 +2712,7 @@ RBBILineMonkey::RBBILineMonkey()
fSets->addElement(fID, status);
fSets->addElement(fWJ, status);
fSets->addElement(fSA, status);
fSets->addElement(fSG, status);
fNumberMatcher = new RegexMatcher(
"(\\p{Line_Break=PR}\\p{Line_Break=CM}*)?"
@ -3159,6 +3162,7 @@ RBBILineMonkey::~RBBILineMonkey() {
delete fAL;
delete fID;
delete fSA;
delete fSG;
delete fXX;
delete fCharBI;