ICU-11556 Line Break rules update for L2/16-043R, don't break CA$; also LB rules refactored for reduced memory consumption. ICU4J Data refreshed from ICU4C.
X-SVN-Rev: 38645
This commit is contained in:
parent
7265eeae4c
commit
c1422845ac
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:1420edbb9a70020f48c545a75738981907657c79f8c8543dbee9cbbb75ada655
|
||||
size 11767515
|
||||
oid sha256:9e60171048ccda76c8c9c0ced344822e21543ef8608d188f0029edfc5a5a87ea
|
||||
size 11718381
|
||||
|
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:75953512893b452eabe5d0b7721ca2c15473e11c14c51526d25d6aa1051b3bc1
|
||||
size 91108
|
||||
oid sha256:6451e003b77fcc7cf03b1e0a0eebdcc112a41209d3a41837964370a893198f24
|
||||
size 91105
|
||||
|
@ -1057,25 +1057,36 @@ public class RBBITestMonkey extends TestFmwk {
|
||||
continue;
|
||||
}
|
||||
|
||||
// LB 23 (AL | HL) x NU
|
||||
// NU x (AL | HL)
|
||||
if ((fAL.contains(prevChar) || fHL.contains(prevChar)) && fNU.contains(thisChar)) {
|
||||
continue;
|
||||
}
|
||||
if (fNU.contains(prevChar) && (fAL.contains(thisChar) || fHL.contains(thisChar))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// LB 23 ID x PO (Note: Leading CM behaves like ID)
|
||||
// AL x NU
|
||||
// NU x AL
|
||||
if (fID.contains(prevChar) && fPO.contains(thisChar) ||
|
||||
fAL.contains(prevChar) && fNU.contains(thisChar) ||
|
||||
fHL.contains(prevChar) && fNU.contains(thisChar) ||
|
||||
fNU.contains(prevChar) && fAL.contains(thisChar) ||
|
||||
fNU.contains(prevChar) && fHL.contains(thisChar) ) {
|
||||
continue;
|
||||
// LB 23a Do not break between numeric prefixes and ideographs, or between ideographs and numeric postfixes.
|
||||
// PR x (ID | EB | EM)
|
||||
// (ID | EB | EM) x PO
|
||||
if (fPR.contains(prevChar) &&
|
||||
(fID.contains(thisChar) || fEB.contains(thisChar) || fEM.contains(thisChar))) {
|
||||
continue;
|
||||
}
|
||||
if ((fID.contains(prevChar) || fEB.contains(prevChar) || fEM.contains(prevChar)) &&
|
||||
fPO.contains(thisChar)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// LB 24 Do not break between prefix and letters or ideographs.
|
||||
// PR x ID
|
||||
// PR x AL
|
||||
// PO x AL
|
||||
if (fPR.contains(prevChar) && fID.contains(thisChar) ||
|
||||
fPR.contains(prevChar) && (fAL.contains(thisChar) || fHL.contains(thisChar)) ||
|
||||
fPO.contains(prevChar) && (fAL.contains(thisChar) || fHL.contains(thisChar))) {
|
||||
// (PR | PO) x (AL | HL)
|
||||
// (AL | HL) x (PR | PO)
|
||||
if ((fPR.contains(prevChar) || fPO.contains(prevChar)) &&
|
||||
(fAL.contains(thisChar) || fHL.contains(thisChar))) {
|
||||
continue;
|
||||
}
|
||||
if ((fAL.contains(prevChar) || fHL.contains(prevChar)) &&
|
||||
(fPR.contains(thisChar) || fPO.contains(thisChar))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
# Copyright (c) 2001-2015 International Business Machines
|
||||
# Copyright (c) 2001-2016 International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#
|
||||
# RBBI Test Data
|
||||
@ -32,6 +32,23 @@
|
||||
# TODO: figure out how to have a single copy of the file for use by both C and Java.
|
||||
|
||||
|
||||
## FILTERED BREAK TESTS
|
||||
|
||||
# (William Bradford, public domain. http://catalog.hathitrust.org/Record/008651224 ) - edited.
|
||||
#<locale en>
|
||||
#<sent>
|
||||
#<data>\
|
||||
#•In the meantime Mr. •Weston arrived with his small ship, which he had now recovered. •Capt. •Gorges, who informed the Sgt. here that one purpose of his going east was to meet with Mr. •Weston, took this opportunity to call him to account for some abuses he had to lay to his charge.•</data>
|
||||
#
|
||||
#<locale en@ss=standard>
|
||||
#<sent>
|
||||
#<data>\
|
||||
#•In the meantime Mr. Weston arrived with his small ship, which he had now recovered. •Capt. Gorges, who informed the Sgt. here that one purpose of his going east was to meet with Mr. Weston, took this opportunity to call him to account for some abuses he had to lay to his charge.•</data>
|
||||
#
|
||||
## END FILTERED BREAK TESTS
|
||||
|
||||
<locale en>
|
||||
|
||||
# Temp debugging tests
|
||||
<sent>
|
||||
<data>•\u00c0.•</data>
|
||||
@ -496,6 +513,18 @@ What is the proper use of the abbreviation pp.? •Yes, I am definatelly 12" tal
|
||||
<data>• •\uF8FF\u2028<100>\uF8FF•</data>
|
||||
<data>• \u200B\u2028<100>\u200B•</data>
|
||||
|
||||
# Regional Indicator sequences. They group in pairs. The reverse rules are tricky.
|
||||
# Sequences are long enough that the non-exaustive monkey test won't reliably pick up problems.
|
||||
|
||||
<data>•\U0001F1E6\U0001F1E6•\U0001F1E6\U0001F1E6•\U0001F1E6\U0001F1E6•\U0001F1E6\U0001F1E6•</data>
|
||||
<data>•\U0001F1E6\U0001F1E6•\U0001F1E6\U0001F1E6•\U0001F1E6\U0001F1E6•\U0001F1E6\U0001F1E6•\U0001F1E6•</data>
|
||||
|
||||
<data>•\U0001F1E6\U0001F1E6•\U0001F1E6\U0001F1E6\u00a0\U0001F1E6\U0001F1E6•\U0001F1E6\U0001F1E6•</data>
|
||||
<data>•\U0001F1E6\U0001F1E6•\U0001F1E6\U0001F1E6\u00a0\U0001F1E6\U0001F1E6•\U0001F1E6\U0001F1E6•\U0001F1E6•</data>
|
||||
<data>•\U0001F1E6\U0001F1E6•\U0001F1E6\u00a0\U0001F1E6\U0001F1E6•\U0001F1E6\U0001F1E6•</data>
|
||||
<data>•\U0001F1E6\U0001F1E6•\U0001F1E6\u00a0\U0001F1E6\U0001F1E6•\U0001F1E6\U0001F1E6•\U0001F1E6•</data>
|
||||
|
||||
|
||||
# User Guide example
|
||||
|
||||
<data>•Parlez-•vous •français ?•</data>
|
||||
@ -578,6 +607,11 @@ What is the proper use of the abbreviation pp.? •Yes, I am definatelly 12" tal
|
||||
<data>•\u05E7\u05D7/\u05D9 •\u05DE\u05E2\u05D9\u05DC•</data>
|
||||
<data>•\u05D3\u05E8\u05D5\u05E9\u05D9\u05DD •\u05E9\u05D7\u05E7\u05E0\u05D9\u05DD/\u05D9\u05D5\u05EA•</data>
|
||||
|
||||
# Ticket #11556 don't break "R$" or "JP¥"
|
||||
<locale en>
|
||||
<line>
|
||||
<data>•R$ •JP¥ •a9 •3a •H% •CA$ •Travi$ •Scott •Ke$ha •Curren$y •A$AP •Rocky•</data>
|
||||
|
||||
|
||||
|
||||
########################################################################################
|
||||
|
Loading…
Reference in New Issue
Block a user